S/390: Set the preferred mode for float vectors
[official-gcc.git] / gcc / config / s390 / s390.c
blob0ceeef47ec3947f119d60e3627ce470a9dca8858
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2017 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "target-globals.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "cfgloop.h"
34 #include "df.h"
35 #include "memmodel.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "attribs.h"
39 #include "expmed.h"
40 #include "optabs.h"
41 #include "regs.h"
42 #include "emit-rtl.h"
43 #include "recog.h"
44 #include "cgraph.h"
45 #include "diagnostic-core.h"
46 #include "diagnostic.h"
47 #include "alias.h"
48 #include "fold-const.h"
49 #include "print-tree.h"
50 #include "stor-layout.h"
51 #include "varasm.h"
52 #include "calls.h"
53 #include "conditions.h"
54 #include "output.h"
55 #include "insn-attr.h"
56 #include "flags.h"
57 #include "except.h"
58 #include "dojump.h"
59 #include "explow.h"
60 #include "stmt.h"
61 #include "expr.h"
62 #include "reload.h"
63 #include "cfgrtl.h"
64 #include "cfganal.h"
65 #include "lcm.h"
66 #include "cfgbuild.h"
67 #include "cfgcleanup.h"
68 #include "debug.h"
69 #include "langhooks.h"
70 #include "internal-fn.h"
71 #include "gimple-fold.h"
72 #include "tree-eh.h"
73 #include "gimplify.h"
74 #include "params.h"
75 #include "opts.h"
76 #include "tree-pass.h"
77 #include "context.h"
78 #include "builtins.h"
79 #include "rtl-iter.h"
80 #include "intl.h"
81 #include "tm-constrs.h"
82 #include "tree-vrp.h"
83 #include "symbol-summary.h"
84 #include "ipa-prop.h"
85 #include "ipa-fnsummary.h"
87 /* This file should be included last. */
88 #include "target-def.h"
90 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode);
92 /* Remember the last target of s390_set_current_function. */
93 static GTY(()) tree s390_previous_fndecl;
95 /* Define the specific costs for a given cpu. */
97 struct processor_costs
99 /* multiplication */
100 const int m; /* cost of an M instruction. */
101 const int mghi; /* cost of an MGHI instruction. */
102 const int mh; /* cost of an MH instruction. */
103 const int mhi; /* cost of an MHI instruction. */
104 const int ml; /* cost of an ML instruction. */
105 const int mr; /* cost of an MR instruction. */
106 const int ms; /* cost of an MS instruction. */
107 const int msg; /* cost of an MSG instruction. */
108 const int msgf; /* cost of an MSGF instruction. */
109 const int msgfr; /* cost of an MSGFR instruction. */
110 const int msgr; /* cost of an MSGR instruction. */
111 const int msr; /* cost of an MSR instruction. */
112 const int mult_df; /* cost of multiplication in DFmode. */
113 const int mxbr;
114 /* square root */
115 const int sqxbr; /* cost of square root in TFmode. */
116 const int sqdbr; /* cost of square root in DFmode. */
117 const int sqebr; /* cost of square root in SFmode. */
118 /* multiply and add */
119 const int madbr; /* cost of multiply and add in DFmode. */
120 const int maebr; /* cost of multiply and add in SFmode. */
121 /* division */
122 const int dxbr;
123 const int ddbr;
124 const int debr;
125 const int dlgr;
126 const int dlr;
127 const int dr;
128 const int dsgfr;
129 const int dsgr;
132 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
134 static const
135 struct processor_costs z900_cost =
137 COSTS_N_INSNS (5), /* M */
138 COSTS_N_INSNS (10), /* MGHI */
139 COSTS_N_INSNS (5), /* MH */
140 COSTS_N_INSNS (4), /* MHI */
141 COSTS_N_INSNS (5), /* ML */
142 COSTS_N_INSNS (5), /* MR */
143 COSTS_N_INSNS (4), /* MS */
144 COSTS_N_INSNS (15), /* MSG */
145 COSTS_N_INSNS (7), /* MSGF */
146 COSTS_N_INSNS (7), /* MSGFR */
147 COSTS_N_INSNS (10), /* MSGR */
148 COSTS_N_INSNS (4), /* MSR */
149 COSTS_N_INSNS (7), /* multiplication in DFmode */
150 COSTS_N_INSNS (13), /* MXBR */
151 COSTS_N_INSNS (136), /* SQXBR */
152 COSTS_N_INSNS (44), /* SQDBR */
153 COSTS_N_INSNS (35), /* SQEBR */
154 COSTS_N_INSNS (18), /* MADBR */
155 COSTS_N_INSNS (13), /* MAEBR */
156 COSTS_N_INSNS (134), /* DXBR */
157 COSTS_N_INSNS (30), /* DDBR */
158 COSTS_N_INSNS (27), /* DEBR */
159 COSTS_N_INSNS (220), /* DLGR */
160 COSTS_N_INSNS (34), /* DLR */
161 COSTS_N_INSNS (34), /* DR */
162 COSTS_N_INSNS (32), /* DSGFR */
163 COSTS_N_INSNS (32), /* DSGR */
166 static const
167 struct processor_costs z990_cost =
169 COSTS_N_INSNS (4), /* M */
170 COSTS_N_INSNS (2), /* MGHI */
171 COSTS_N_INSNS (2), /* MH */
172 COSTS_N_INSNS (2), /* MHI */
173 COSTS_N_INSNS (4), /* ML */
174 COSTS_N_INSNS (4), /* MR */
175 COSTS_N_INSNS (5), /* MS */
176 COSTS_N_INSNS (6), /* MSG */
177 COSTS_N_INSNS (4), /* MSGF */
178 COSTS_N_INSNS (4), /* MSGFR */
179 COSTS_N_INSNS (4), /* MSGR */
180 COSTS_N_INSNS (4), /* MSR */
181 COSTS_N_INSNS (1), /* multiplication in DFmode */
182 COSTS_N_INSNS (28), /* MXBR */
183 COSTS_N_INSNS (130), /* SQXBR */
184 COSTS_N_INSNS (66), /* SQDBR */
185 COSTS_N_INSNS (38), /* SQEBR */
186 COSTS_N_INSNS (1), /* MADBR */
187 COSTS_N_INSNS (1), /* MAEBR */
188 COSTS_N_INSNS (60), /* DXBR */
189 COSTS_N_INSNS (40), /* DDBR */
190 COSTS_N_INSNS (26), /* DEBR */
191 COSTS_N_INSNS (176), /* DLGR */
192 COSTS_N_INSNS (31), /* DLR */
193 COSTS_N_INSNS (31), /* DR */
194 COSTS_N_INSNS (31), /* DSGFR */
195 COSTS_N_INSNS (31), /* DSGR */
198 static const
199 struct processor_costs z9_109_cost =
201 COSTS_N_INSNS (4), /* M */
202 COSTS_N_INSNS (2), /* MGHI */
203 COSTS_N_INSNS (2), /* MH */
204 COSTS_N_INSNS (2), /* MHI */
205 COSTS_N_INSNS (4), /* ML */
206 COSTS_N_INSNS (4), /* MR */
207 COSTS_N_INSNS (5), /* MS */
208 COSTS_N_INSNS (6), /* MSG */
209 COSTS_N_INSNS (4), /* MSGF */
210 COSTS_N_INSNS (4), /* MSGFR */
211 COSTS_N_INSNS (4), /* MSGR */
212 COSTS_N_INSNS (4), /* MSR */
213 COSTS_N_INSNS (1), /* multiplication in DFmode */
214 COSTS_N_INSNS (28), /* MXBR */
215 COSTS_N_INSNS (130), /* SQXBR */
216 COSTS_N_INSNS (66), /* SQDBR */
217 COSTS_N_INSNS (38), /* SQEBR */
218 COSTS_N_INSNS (1), /* MADBR */
219 COSTS_N_INSNS (1), /* MAEBR */
220 COSTS_N_INSNS (60), /* DXBR */
221 COSTS_N_INSNS (40), /* DDBR */
222 COSTS_N_INSNS (26), /* DEBR */
223 COSTS_N_INSNS (30), /* DLGR */
224 COSTS_N_INSNS (23), /* DLR */
225 COSTS_N_INSNS (23), /* DR */
226 COSTS_N_INSNS (24), /* DSGFR */
227 COSTS_N_INSNS (24), /* DSGR */
230 static const
231 struct processor_costs z10_cost =
233 COSTS_N_INSNS (10), /* M */
234 COSTS_N_INSNS (10), /* MGHI */
235 COSTS_N_INSNS (10), /* MH */
236 COSTS_N_INSNS (10), /* MHI */
237 COSTS_N_INSNS (10), /* ML */
238 COSTS_N_INSNS (10), /* MR */
239 COSTS_N_INSNS (10), /* MS */
240 COSTS_N_INSNS (10), /* MSG */
241 COSTS_N_INSNS (10), /* MSGF */
242 COSTS_N_INSNS (10), /* MSGFR */
243 COSTS_N_INSNS (10), /* MSGR */
244 COSTS_N_INSNS (10), /* MSR */
245 COSTS_N_INSNS (1) , /* multiplication in DFmode */
246 COSTS_N_INSNS (50), /* MXBR */
247 COSTS_N_INSNS (120), /* SQXBR */
248 COSTS_N_INSNS (52), /* SQDBR */
249 COSTS_N_INSNS (38), /* SQEBR */
250 COSTS_N_INSNS (1), /* MADBR */
251 COSTS_N_INSNS (1), /* MAEBR */
252 COSTS_N_INSNS (111), /* DXBR */
253 COSTS_N_INSNS (39), /* DDBR */
254 COSTS_N_INSNS (32), /* DEBR */
255 COSTS_N_INSNS (160), /* DLGR */
256 COSTS_N_INSNS (71), /* DLR */
257 COSTS_N_INSNS (71), /* DR */
258 COSTS_N_INSNS (71), /* DSGFR */
259 COSTS_N_INSNS (71), /* DSGR */
262 static const
263 struct processor_costs z196_cost =
265 COSTS_N_INSNS (7), /* M */
266 COSTS_N_INSNS (5), /* MGHI */
267 COSTS_N_INSNS (5), /* MH */
268 COSTS_N_INSNS (5), /* MHI */
269 COSTS_N_INSNS (7), /* ML */
270 COSTS_N_INSNS (7), /* MR */
271 COSTS_N_INSNS (6), /* MS */
272 COSTS_N_INSNS (8), /* MSG */
273 COSTS_N_INSNS (6), /* MSGF */
274 COSTS_N_INSNS (6), /* MSGFR */
275 COSTS_N_INSNS (8), /* MSGR */
276 COSTS_N_INSNS (6), /* MSR */
277 COSTS_N_INSNS (1) , /* multiplication in DFmode */
278 COSTS_N_INSNS (40), /* MXBR B+40 */
279 COSTS_N_INSNS (100), /* SQXBR B+100 */
280 COSTS_N_INSNS (42), /* SQDBR B+42 */
281 COSTS_N_INSNS (28), /* SQEBR B+28 */
282 COSTS_N_INSNS (1), /* MADBR B */
283 COSTS_N_INSNS (1), /* MAEBR B */
284 COSTS_N_INSNS (101), /* DXBR B+101 */
285 COSTS_N_INSNS (29), /* DDBR */
286 COSTS_N_INSNS (22), /* DEBR */
287 COSTS_N_INSNS (160), /* DLGR cracked */
288 COSTS_N_INSNS (160), /* DLR cracked */
289 COSTS_N_INSNS (160), /* DR expanded */
290 COSTS_N_INSNS (160), /* DSGFR cracked */
291 COSTS_N_INSNS (160), /* DSGR cracked */
294 static const
295 struct processor_costs zEC12_cost =
297 COSTS_N_INSNS (7), /* M */
298 COSTS_N_INSNS (5), /* MGHI */
299 COSTS_N_INSNS (5), /* MH */
300 COSTS_N_INSNS (5), /* MHI */
301 COSTS_N_INSNS (7), /* ML */
302 COSTS_N_INSNS (7), /* MR */
303 COSTS_N_INSNS (6), /* MS */
304 COSTS_N_INSNS (8), /* MSG */
305 COSTS_N_INSNS (6), /* MSGF */
306 COSTS_N_INSNS (6), /* MSGFR */
307 COSTS_N_INSNS (8), /* MSGR */
308 COSTS_N_INSNS (6), /* MSR */
309 COSTS_N_INSNS (1) , /* multiplication in DFmode */
310 COSTS_N_INSNS (40), /* MXBR B+40 */
311 COSTS_N_INSNS (100), /* SQXBR B+100 */
312 COSTS_N_INSNS (42), /* SQDBR B+42 */
313 COSTS_N_INSNS (28), /* SQEBR B+28 */
314 COSTS_N_INSNS (1), /* MADBR B */
315 COSTS_N_INSNS (1), /* MAEBR B */
316 COSTS_N_INSNS (131), /* DXBR B+131 */
317 COSTS_N_INSNS (29), /* DDBR */
318 COSTS_N_INSNS (22), /* DEBR */
319 COSTS_N_INSNS (160), /* DLGR cracked */
320 COSTS_N_INSNS (160), /* DLR cracked */
321 COSTS_N_INSNS (160), /* DR expanded */
322 COSTS_N_INSNS (160), /* DSGFR cracked */
323 COSTS_N_INSNS (160), /* DSGR cracked */
326 static struct
328 /* The preferred name to be used in user visible output. */
329 const char *const name;
330 /* CPU name as it should be passed to Binutils via .machine */
331 const char *const binutils_name;
332 const enum processor_type processor;
333 const struct processor_costs *cost;
335 const processor_table[] =
337 { "g5", "g5", PROCESSOR_9672_G5, &z900_cost },
338 { "g6", "g6", PROCESSOR_9672_G6, &z900_cost },
339 { "z900", "z900", PROCESSOR_2064_Z900, &z900_cost },
340 { "z990", "z990", PROCESSOR_2084_Z990, &z990_cost },
341 { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost },
342 { "z9-ec", "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost },
343 { "z10", "z10", PROCESSOR_2097_Z10, &z10_cost },
344 { "z196", "z196", PROCESSOR_2817_Z196, &z196_cost },
345 { "zEC12", "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost },
346 { "z13", "z13", PROCESSOR_2964_Z13, &zEC12_cost },
347 { "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost },
348 { "native", "", PROCESSOR_NATIVE, NULL }
351 extern int reload_completed;
353 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
354 static rtx_insn *last_scheduled_insn;
355 #define MAX_SCHED_UNITS 3
356 static int last_scheduled_unit_distance[MAX_SCHED_UNITS];
358 /* The maximum score added for an instruction whose unit hasn't been
359 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
360 give instruction mix scheduling more priority over instruction
361 grouping. */
362 #define MAX_SCHED_MIX_SCORE 8
364 /* The maximum distance up to which individual scores will be
365 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
366 Increase this with the OOO windows size of the machine. */
367 #define MAX_SCHED_MIX_DISTANCE 100
369 /* Structure used to hold the components of a S/390 memory
370 address. A legitimate address on S/390 is of the general
371 form
372 base + index + displacement
373 where any of the components is optional.
375 base and index are registers of the class ADDR_REGS,
376 displacement is an unsigned 12-bit immediate constant. */
378 struct s390_address
380 rtx base;
381 rtx indx;
382 rtx disp;
383 bool pointer;
384 bool literal_pool;
387 /* The following structure is embedded in the machine
388 specific part of struct function. */
390 struct GTY (()) s390_frame_layout
392 /* Offset within stack frame. */
393 HOST_WIDE_INT gprs_offset;
394 HOST_WIDE_INT f0_offset;
395 HOST_WIDE_INT f4_offset;
396 HOST_WIDE_INT f8_offset;
397 HOST_WIDE_INT backchain_offset;
399 /* Number of first and last gpr where slots in the register
400 save area are reserved for. */
401 int first_save_gpr_slot;
402 int last_save_gpr_slot;
404 /* Location (FP register number) where GPRs (r0-r15) should
405 be saved to.
406 0 - does not need to be saved at all
407 -1 - stack slot */
408 #define SAVE_SLOT_NONE 0
409 #define SAVE_SLOT_STACK -1
410 signed char gpr_save_slots[16];
412 /* Number of first and last gpr to be saved, restored. */
413 int first_save_gpr;
414 int first_restore_gpr;
415 int last_save_gpr;
416 int last_restore_gpr;
418 /* Bits standing for floating point registers. Set, if the
419 respective register has to be saved. Starting with reg 16 (f0)
420 at the rightmost bit.
421 Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
422 fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0
423 reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */
424 unsigned int fpr_bitmap;
426 /* Number of floating point registers f8-f15 which must be saved. */
427 int high_fprs;
429 /* Set if return address needs to be saved.
430 This flag is set by s390_return_addr_rtx if it could not use
431 the initial value of r14 and therefore depends on r14 saved
432 to the stack. */
433 bool save_return_addr_p;
435 /* Size of stack frame. */
436 HOST_WIDE_INT frame_size;
439 /* Define the structure for the machine field in struct function. */
441 struct GTY(()) machine_function
443 struct s390_frame_layout frame_layout;
445 /* Literal pool base register. */
446 rtx base_reg;
448 /* True if we may need to perform branch splitting. */
449 bool split_branches_pending_p;
451 bool has_landing_pad_p;
453 /* True if the current function may contain a tbegin clobbering
454 FPRs. */
455 bool tbegin_p;
457 /* For -fsplit-stack support: A stack local which holds a pointer to
458 the stack arguments for a function with a variable number of
459 arguments. This is set at the start of the function and is used
460 to initialize the overflow_arg_area field of the va_list
461 structure. */
462 rtx split_stack_varargs_pointer;
465 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
467 #define cfun_frame_layout (cfun->machine->frame_layout)
468 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
469 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
470 ? cfun_frame_layout.fpr_bitmap & 0x0f \
471 : cfun_frame_layout.fpr_bitmap & 0x03))
472 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
473 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
474 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
475 (1 << (REGNO - FPR0_REGNUM)))
476 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
477 (1 << (REGNO - FPR0_REGNUM))))
478 #define cfun_gpr_save_slot(REGNO) \
479 cfun->machine->frame_layout.gpr_save_slots[REGNO]
481 /* Number of GPRs and FPRs used for argument passing. */
482 #define GP_ARG_NUM_REG 5
483 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
484 #define VEC_ARG_NUM_REG 8
486 /* A couple of shortcuts. */
487 #define CONST_OK_FOR_J(x) \
488 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
489 #define CONST_OK_FOR_K(x) \
490 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
491 #define CONST_OK_FOR_Os(x) \
492 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
493 #define CONST_OK_FOR_Op(x) \
494 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
495 #define CONST_OK_FOR_On(x) \
496 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
498 #define REGNO_PAIR_OK(REGNO, MODE) \
499 (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
501 /* That's the read ahead of the dynamic branch prediction unit in
502 bytes on a z10 (or higher) CPU. */
503 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
506 /* Indicate which ABI has been used for passing vector args.
507 0 - no vector type arguments have been passed where the ABI is relevant
508 1 - the old ABI has been used
509 2 - a vector type argument has been passed either in a vector register
510 or on the stack by value */
511 static int s390_vector_abi = 0;
513 /* Set the vector ABI marker if TYPE is subject to the vector ABI
514 switch. The vector ABI affects only vector data types. There are
515 two aspects of the vector ABI relevant here:
517 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
518 ABI and natural alignment with the old.
520 2. vector <= 16 bytes are passed in VRs or by value on the stack
521 with the new ABI but by reference on the stack with the old.
523 If ARG_P is true TYPE is used for a function argument or return
524 value. The ABI marker then is set for all vector data types. If
525 ARG_P is false only type 1 vectors are being checked. */
527 static void
528 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
530 static hash_set<const_tree> visited_types_hash;
532 if (s390_vector_abi)
533 return;
535 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
536 return;
538 if (visited_types_hash.contains (type))
539 return;
541 visited_types_hash.add (type);
543 if (VECTOR_TYPE_P (type))
545 int type_size = int_size_in_bytes (type);
547 /* Outside arguments only the alignment is changing and this
548 only happens for vector types >= 16 bytes. */
549 if (!arg_p && type_size < 16)
550 return;
552 /* In arguments vector types > 16 are passed as before (GCC
553 never enforced the bigger alignment for arguments which was
554 required by the old vector ABI). However, it might still be
555 ABI relevant due to the changed alignment if it is a struct
556 member. */
557 if (arg_p && type_size > 16 && !in_struct_p)
558 return;
560 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
562 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
564 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
565 natural alignment there will never be ABI dependent padding
566 in an array type. That's why we do not set in_struct_p to
567 true here. */
568 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
570 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
572 tree arg_chain;
574 /* Check the return type. */
575 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
577 for (arg_chain = TYPE_ARG_TYPES (type);
578 arg_chain;
579 arg_chain = TREE_CHAIN (arg_chain))
580 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
582 else if (RECORD_OR_UNION_TYPE_P (type))
584 tree field;
586 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
588 if (TREE_CODE (field) != FIELD_DECL)
589 continue;
591 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
597 /* System z builtins. */
599 #include "s390-builtins.h"
601 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
603 #undef B_DEF
604 #undef OB_DEF
605 #undef OB_DEF_VAR
606 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
607 #define OB_DEF(...)
608 #define OB_DEF_VAR(...)
609 #include "s390-builtins.def"
613 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
615 #undef B_DEF
616 #undef OB_DEF
617 #undef OB_DEF_VAR
618 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
619 #define OB_DEF(...)
620 #define OB_DEF_VAR(...)
621 #include "s390-builtins.def"
625 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
627 #undef B_DEF
628 #undef OB_DEF
629 #undef OB_DEF_VAR
630 #define B_DEF(...)
631 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
632 #define OB_DEF_VAR(...)
633 #include "s390-builtins.def"
637 const unsigned int
638 bflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
640 #undef B_DEF
641 #undef OB_DEF
642 #undef OB_DEF_VAR
643 #define B_DEF(...)
644 #define OB_DEF(...)
645 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
646 #include "s390-builtins.def"
650 const unsigned int
651 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
653 #undef B_DEF
654 #undef OB_DEF
655 #undef OB_DEF_VAR
656 #define B_DEF(...)
657 #define OB_DEF(...)
658 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
659 #include "s390-builtins.def"
663 tree s390_builtin_types[BT_MAX];
664 tree s390_builtin_fn_types[BT_FN_MAX];
665 tree s390_builtin_decls[S390_BUILTIN_MAX +
666 S390_OVERLOADED_BUILTIN_MAX +
667 S390_OVERLOADED_BUILTIN_VAR_MAX];
669 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
670 #undef B_DEF
671 #undef OB_DEF
672 #undef OB_DEF_VAR
673 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
674 #define OB_DEF(...)
675 #define OB_DEF_VAR(...)
677 #include "s390-builtins.def"
678 CODE_FOR_nothing
681 static void
682 s390_init_builtins (void)
684 /* These definitions are being used in s390-builtins.def. */
685 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
686 NULL, NULL);
687 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
688 tree c_uint64_type_node;
690 /* The uint64_type_node from tree.c is not compatible to the C99
691 uint64_t data type. What we want is c_uint64_type_node from
692 c-common.c. But since backend code is not supposed to interface
693 with the frontend we recreate it here. */
694 if (TARGET_64BIT)
695 c_uint64_type_node = long_unsigned_type_node;
696 else
697 c_uint64_type_node = long_long_unsigned_type_node;
699 #undef DEF_TYPE
700 #define DEF_TYPE(INDEX, NODE, CONST_P) \
701 if (s390_builtin_types[INDEX] == NULL) \
702 s390_builtin_types[INDEX] = (!CONST_P) ? \
703 (NODE) : build_type_variant ((NODE), 1, 0);
705 #undef DEF_POINTER_TYPE
706 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \
707 if (s390_builtin_types[INDEX] == NULL) \
708 s390_builtin_types[INDEX] = \
709 build_pointer_type (s390_builtin_types[INDEX_BASE]);
711 #undef DEF_DISTINCT_TYPE
712 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \
713 if (s390_builtin_types[INDEX] == NULL) \
714 s390_builtin_types[INDEX] = \
715 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
717 #undef DEF_VECTOR_TYPE
718 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
719 if (s390_builtin_types[INDEX] == NULL) \
720 s390_builtin_types[INDEX] = \
721 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
723 #undef DEF_OPAQUE_VECTOR_TYPE
724 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
725 if (s390_builtin_types[INDEX] == NULL) \
726 s390_builtin_types[INDEX] = \
727 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
729 #undef DEF_FN_TYPE
730 #define DEF_FN_TYPE(INDEX, args...) \
731 if (s390_builtin_fn_types[INDEX] == NULL) \
732 s390_builtin_fn_types[INDEX] = \
733 build_function_type_list (args, NULL_TREE);
734 #undef DEF_OV_TYPE
735 #define DEF_OV_TYPE(...)
736 #include "s390-builtin-types.def"
738 #undef B_DEF
739 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
740 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
741 s390_builtin_decls[S390_BUILTIN_##NAME] = \
742 add_builtin_function ("__builtin_" #NAME, \
743 s390_builtin_fn_types[FNTYPE], \
744 S390_BUILTIN_##NAME, \
745 BUILT_IN_MD, \
746 NULL, \
747 ATTRS);
748 #undef OB_DEF
749 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
750 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
751 == NULL) \
752 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
753 add_builtin_function ("__builtin_" #NAME, \
754 s390_builtin_fn_types[FNTYPE], \
755 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
756 BUILT_IN_MD, \
757 NULL, \
759 #undef OB_DEF_VAR
760 #define OB_DEF_VAR(...)
761 #include "s390-builtins.def"
765 /* Return true if ARG is appropriate as argument number ARGNUM of
766 builtin DECL. The operand flags from s390-builtins.def have to
767 passed as OP_FLAGS. */
768 bool
769 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
771 if (O_UIMM_P (op_flags))
773 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
774 int bitwidth = bitwidths[op_flags - O_U1];
776 if (!tree_fits_uhwi_p (arg)
777 || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1)
779 error("constant argument %d for builtin %qF is out of range (0.."
780 HOST_WIDE_INT_PRINT_UNSIGNED ")",
781 argnum, decl,
782 (HOST_WIDE_INT_1U << bitwidth) - 1);
783 return false;
787 if (O_SIMM_P (op_flags))
789 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
790 int bitwidth = bitwidths[op_flags - O_S2];
792 if (!tree_fits_shwi_p (arg)
793 || tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1))
794 || tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1))
796 error("constant argument %d for builtin %qF is out of range ("
797 HOST_WIDE_INT_PRINT_DEC ".."
798 HOST_WIDE_INT_PRINT_DEC ")",
799 argnum, decl,
800 -(HOST_WIDE_INT_1 << (bitwidth - 1)),
801 (HOST_WIDE_INT_1 << (bitwidth - 1)) - 1);
802 return false;
805 return true;
808 /* Expand an expression EXP that calls a built-in function,
809 with result going to TARGET if that's convenient
810 (and in mode MODE if that's convenient).
811 SUBTARGET may be used as the target for computing one of EXP's operands.
812 IGNORE is nonzero if the value is to be ignored. */
814 static rtx
815 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
816 machine_mode mode ATTRIBUTE_UNUSED,
817 int ignore ATTRIBUTE_UNUSED)
819 #define MAX_ARGS 6
821 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
822 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
823 enum insn_code icode;
824 rtx op[MAX_ARGS], pat;
825 int arity;
826 bool nonvoid;
827 tree arg;
828 call_expr_arg_iterator iter;
829 unsigned int all_op_flags = opflags_for_builtin (fcode);
830 machine_mode last_vec_mode = VOIDmode;
832 if (TARGET_DEBUG_ARG)
834 fprintf (stderr,
835 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
836 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
837 bflags_for_builtin (fcode));
840 if (S390_USE_TARGET_ATTRIBUTE)
842 unsigned int bflags;
844 bflags = bflags_for_builtin (fcode);
845 if ((bflags & B_HTM) && !TARGET_HTM)
847 error ("builtin %qF is not supported without -mhtm "
848 "(default with -march=zEC12 and higher).", fndecl);
849 return const0_rtx;
851 if (((bflags & B_VX) || (bflags & B_VXE)) && !TARGET_VX)
853 error ("builtin %qF requires -mvx "
854 "(default with -march=z13 and higher).", fndecl);
855 return const0_rtx;
858 if ((bflags & B_VXE) && !TARGET_VXE)
860 error ("Builtin %qF requires z14 or higher.", fndecl);
861 return const0_rtx;
864 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
865 && fcode < S390_ALL_BUILTIN_MAX)
867 gcc_unreachable ();
869 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
871 icode = code_for_builtin[fcode];
872 /* Set a flag in the machine specific cfun part in order to support
873 saving/restoring of FPRs. */
874 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
875 cfun->machine->tbegin_p = true;
877 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
879 error ("unresolved overloaded builtin");
880 return const0_rtx;
882 else
883 internal_error ("bad builtin fcode");
885 if (icode == 0)
886 internal_error ("bad builtin icode");
888 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
890 if (nonvoid)
892 machine_mode tmode = insn_data[icode].operand[0].mode;
893 if (!target
894 || GET_MODE (target) != tmode
895 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
896 target = gen_reg_rtx (tmode);
898 /* There are builtins (e.g. vec_promote) with no vector
899 arguments but an element selector. So we have to also look
900 at the vector return type when emitting the modulo
901 operation. */
902 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
903 last_vec_mode = insn_data[icode].operand[0].mode;
906 arity = 0;
907 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
909 rtx tmp_rtx;
910 const struct insn_operand_data *insn_op;
911 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
913 all_op_flags = all_op_flags >> O_SHIFT;
915 if (arg == error_mark_node)
916 return NULL_RTX;
917 if (arity >= MAX_ARGS)
918 return NULL_RTX;
920 if (O_IMM_P (op_flags)
921 && TREE_CODE (arg) != INTEGER_CST)
923 error ("constant value required for builtin %qF argument %d",
924 fndecl, arity + 1);
925 return const0_rtx;
928 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
929 return const0_rtx;
931 insn_op = &insn_data[icode].operand[arity + nonvoid];
932 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
934 /* expand_expr truncates constants to the target mode only if it
935 is "convenient". However, our checks below rely on this
936 being done. */
937 if (CONST_INT_P (op[arity])
938 && SCALAR_INT_MODE_P (insn_op->mode)
939 && GET_MODE (op[arity]) != insn_op->mode)
940 op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
941 insn_op->mode));
943 /* Wrap the expanded RTX for pointer types into a MEM expr with
944 the proper mode. This allows us to use e.g. (match_operand
945 "memory_operand"..) in the insn patterns instead of (mem
946 (match_operand "address_operand)). This is helpful for
947 patterns not just accepting MEMs. */
948 if (POINTER_TYPE_P (TREE_TYPE (arg))
949 && insn_op->predicate != address_operand)
950 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
952 /* Expand the module operation required on element selectors. */
953 if (op_flags == O_ELEM)
955 gcc_assert (last_vec_mode != VOIDmode);
956 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
957 op[arity],
958 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
959 NULL_RTX, 1, OPTAB_DIRECT);
962 /* Record the vector mode used for an element selector. This assumes:
963 1. There is no builtin with two different vector modes and an element selector
964 2. The element selector comes after the vector type it is referring to.
965 This currently the true for all the builtins but FIXME we
966 should better check for that. */
967 if (VECTOR_MODE_P (insn_op->mode))
968 last_vec_mode = insn_op->mode;
970 if (insn_op->predicate (op[arity], insn_op->mode))
972 arity++;
973 continue;
976 if (MEM_P (op[arity])
977 && insn_op->predicate == memory_operand
978 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
979 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
981 op[arity] = replace_equiv_address (op[arity],
982 copy_to_mode_reg (Pmode,
983 XEXP (op[arity], 0)));
985 /* Some of the builtins require different modes/types than the
986 pattern in order to implement a specific API. Instead of
987 adding many expanders which do the mode change we do it here.
988 E.g. s390_vec_add_u128 required to have vector unsigned char
989 arguments is mapped to addti3. */
990 else if (insn_op->mode != VOIDmode
991 && GET_MODE (op[arity]) != VOIDmode
992 && GET_MODE (op[arity]) != insn_op->mode
993 && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
994 GET_MODE (op[arity]), 0))
995 != NULL_RTX))
997 op[arity] = tmp_rtx;
999 else if (GET_MODE (op[arity]) == insn_op->mode
1000 || GET_MODE (op[arity]) == VOIDmode
1001 || (insn_op->predicate == address_operand
1002 && GET_MODE (op[arity]) == Pmode))
1004 /* An address_operand usually has VOIDmode in the expander
1005 so we cannot use this. */
1006 machine_mode target_mode =
1007 (insn_op->predicate == address_operand
1008 ? (machine_mode) Pmode : insn_op->mode);
1009 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
1012 if (!insn_op->predicate (op[arity], insn_op->mode))
1014 error ("invalid argument %d for builtin %qF", arity + 1, fndecl);
1015 return const0_rtx;
1017 arity++;
1020 switch (arity)
1022 case 0:
1023 pat = GEN_FCN (icode) (target);
1024 break;
1025 case 1:
1026 if (nonvoid)
1027 pat = GEN_FCN (icode) (target, op[0]);
1028 else
1029 pat = GEN_FCN (icode) (op[0]);
1030 break;
1031 case 2:
1032 if (nonvoid)
1033 pat = GEN_FCN (icode) (target, op[0], op[1]);
1034 else
1035 pat = GEN_FCN (icode) (op[0], op[1]);
1036 break;
1037 case 3:
1038 if (nonvoid)
1039 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1040 else
1041 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1042 break;
1043 case 4:
1044 if (nonvoid)
1045 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1046 else
1047 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1048 break;
1049 case 5:
1050 if (nonvoid)
1051 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1052 else
1053 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1054 break;
1055 case 6:
1056 if (nonvoid)
1057 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1058 else
1059 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1060 break;
1061 default:
1062 gcc_unreachable ();
1064 if (!pat)
1065 return NULL_RTX;
1066 emit_insn (pat);
1068 if (nonvoid)
1069 return target;
1070 else
1071 return const0_rtx;
1075 static const int s390_hotpatch_hw_max = 1000000;
1076 static int s390_hotpatch_hw_before_label = 0;
1077 static int s390_hotpatch_hw_after_label = 0;
1079 /* Check whether the hotpatch attribute is applied to a function and, if it has
1080 an argument, the argument is valid. */
1082 static tree
1083 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1084 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1086 tree expr;
1087 tree expr2;
1088 int err;
1090 if (TREE_CODE (*node) != FUNCTION_DECL)
1092 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1093 name);
1094 *no_add_attrs = true;
1096 if (args != NULL && TREE_CHAIN (args) != NULL)
1098 expr = TREE_VALUE (args);
1099 expr2 = TREE_VALUE (TREE_CHAIN (args));
1101 if (args == NULL || TREE_CHAIN (args) == NULL)
1102 err = 1;
1103 else if (TREE_CODE (expr) != INTEGER_CST
1104 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1105 || wi::gtu_p (expr, s390_hotpatch_hw_max))
1106 err = 1;
1107 else if (TREE_CODE (expr2) != INTEGER_CST
1108 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1109 || wi::gtu_p (expr2, s390_hotpatch_hw_max))
1110 err = 1;
1111 else
1112 err = 0;
1113 if (err)
1115 error ("requested %qE attribute is not a comma separated pair of"
1116 " non-negative integer constants or too large (max. %d)", name,
1117 s390_hotpatch_hw_max);
1118 *no_add_attrs = true;
1121 return NULL_TREE;
1124 /* Expand the s390_vector_bool type attribute. */
1126 static tree
1127 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1128 tree args ATTRIBUTE_UNUSED,
1129 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1131 tree type = *node, result = NULL_TREE;
1132 machine_mode mode;
1134 while (POINTER_TYPE_P (type)
1135 || TREE_CODE (type) == FUNCTION_TYPE
1136 || TREE_CODE (type) == METHOD_TYPE
1137 || TREE_CODE (type) == ARRAY_TYPE)
1138 type = TREE_TYPE (type);
1140 mode = TYPE_MODE (type);
1141 switch (mode)
1143 case E_DImode: case E_V2DImode:
1144 result = s390_builtin_types[BT_BV2DI];
1145 break;
1146 case E_SImode: case E_V4SImode:
1147 result = s390_builtin_types[BT_BV4SI];
1148 break;
1149 case E_HImode: case E_V8HImode:
1150 result = s390_builtin_types[BT_BV8HI];
1151 break;
1152 case E_QImode: case E_V16QImode:
1153 result = s390_builtin_types[BT_BV16QI];
1154 break;
1155 default:
1156 break;
1159 *no_add_attrs = true; /* No need to hang on to the attribute. */
1161 if (result)
1162 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1164 return NULL_TREE;
1167 static const struct attribute_spec s390_attribute_table[] = {
1168 { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false },
1169 { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true },
1170 /* End element. */
1171 { NULL, 0, 0, false, false, false, NULL, false }
1174 /* Return the alignment for LABEL. We default to the -falign-labels
1175 value except for the literal pool base label. */
1177 s390_label_align (rtx_insn *label)
1179 rtx_insn *prev_insn = prev_active_insn (label);
1180 rtx set, src;
1182 if (prev_insn == NULL_RTX)
1183 goto old;
1185 set = single_set (prev_insn);
1187 if (set == NULL_RTX)
1188 goto old;
1190 src = SET_SRC (set);
1192 /* Don't align literal pool base labels. */
1193 if (GET_CODE (src) == UNSPEC
1194 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1195 return 0;
1197 old:
1198 return align_labels_log;
1201 static GTY(()) rtx got_symbol;
1203 /* Return the GOT table symbol. The symbol will be created when the
1204 function is invoked for the first time. */
1206 static rtx
1207 s390_got_symbol (void)
1209 if (!got_symbol)
1211 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1212 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
1215 return got_symbol;
1218 static scalar_int_mode
1219 s390_libgcc_cmp_return_mode (void)
1221 return TARGET_64BIT ? DImode : SImode;
1224 static scalar_int_mode
1225 s390_libgcc_shift_count_mode (void)
1227 return TARGET_64BIT ? DImode : SImode;
1230 static scalar_int_mode
1231 s390_unwind_word_mode (void)
1233 return TARGET_64BIT ? DImode : SImode;
1236 /* Return true if the back end supports mode MODE. */
1237 static bool
1238 s390_scalar_mode_supported_p (scalar_mode mode)
1240 /* In contrast to the default implementation reject TImode constants on 31bit
1241 TARGET_ZARCH for ABI compliance. */
1242 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1243 return false;
1245 if (DECIMAL_FLOAT_MODE_P (mode))
1246 return default_decimal_float_supported_p ();
1248 return default_scalar_mode_supported_p (mode);
1251 /* Return true if the back end supports vector mode MODE. */
1252 static bool
1253 s390_vector_mode_supported_p (machine_mode mode)
1255 machine_mode inner;
1257 if (!VECTOR_MODE_P (mode)
1258 || !TARGET_VX
1259 || GET_MODE_SIZE (mode) > 16)
1260 return false;
1262 inner = GET_MODE_INNER (mode);
1264 switch (inner)
1266 case E_QImode:
1267 case E_HImode:
1268 case E_SImode:
1269 case E_DImode:
1270 case E_TImode:
1271 case E_SFmode:
1272 case E_DFmode:
1273 case E_TFmode:
1274 return true;
1275 default:
1276 return false;
1280 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1282 void
1283 s390_set_has_landing_pad_p (bool value)
1285 cfun->machine->has_landing_pad_p = value;
1288 /* If two condition code modes are compatible, return a condition code
1289 mode which is compatible with both. Otherwise, return
1290 VOIDmode. */
1292 static machine_mode
1293 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1295 if (m1 == m2)
1296 return m1;
1298 switch (m1)
1300 case E_CCZmode:
1301 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1302 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1303 return m2;
1304 return VOIDmode;
1306 case E_CCSmode:
1307 case E_CCUmode:
1308 case E_CCTmode:
1309 case E_CCSRmode:
1310 case E_CCURmode:
1311 case E_CCZ1mode:
1312 if (m2 == CCZmode)
1313 return m1;
1315 return VOIDmode;
1317 default:
1318 return VOIDmode;
1320 return VOIDmode;
1323 /* Return true if SET either doesn't set the CC register, or else
1324 the source and destination have matching CC modes and that
1325 CC mode is at least as constrained as REQ_MODE. */
1327 static bool
1328 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1330 machine_mode set_mode;
1332 gcc_assert (GET_CODE (set) == SET);
1334 /* These modes are supposed to be used only in CC consumer
1335 patterns. */
1336 gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
1337 && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
1339 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1340 return 1;
1342 set_mode = GET_MODE (SET_DEST (set));
1343 switch (set_mode)
1345 case E_CCZ1mode:
1346 case E_CCSmode:
1347 case E_CCSRmode:
1348 case E_CCUmode:
1349 case E_CCURmode:
1350 case E_CCLmode:
1351 case E_CCL1mode:
1352 case E_CCL2mode:
1353 case E_CCL3mode:
1354 case E_CCT1mode:
1355 case E_CCT2mode:
1356 case E_CCT3mode:
1357 case E_CCVEQmode:
1358 case E_CCVIHmode:
1359 case E_CCVIHUmode:
1360 case E_CCVFHmode:
1361 case E_CCVFHEmode:
1362 if (req_mode != set_mode)
1363 return 0;
1364 break;
1366 case E_CCZmode:
1367 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1368 && req_mode != CCSRmode && req_mode != CCURmode
1369 && req_mode != CCZ1mode)
1370 return 0;
1371 break;
1373 case E_CCAPmode:
1374 case E_CCANmode:
1375 if (req_mode != CCAmode)
1376 return 0;
1377 break;
1379 default:
1380 gcc_unreachable ();
1383 return (GET_MODE (SET_SRC (set)) == set_mode);
1386 /* Return true if every SET in INSN that sets the CC register
1387 has source and destination with matching CC modes and that
1388 CC mode is at least as constrained as REQ_MODE.
1389 If REQ_MODE is VOIDmode, always return false. */
1391 bool
1392 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1394 int i;
1396 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1397 if (req_mode == VOIDmode)
1398 return false;
1400 if (GET_CODE (PATTERN (insn)) == SET)
1401 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1403 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1404 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1406 rtx set = XVECEXP (PATTERN (insn), 0, i);
1407 if (GET_CODE (set) == SET)
1408 if (!s390_match_ccmode_set (set, req_mode))
1409 return false;
1412 return true;
1415 /* If a test-under-mask instruction can be used to implement
1416 (compare (and ... OP1) OP2), return the CC mode required
1417 to do that. Otherwise, return VOIDmode.
1418 MIXED is true if the instruction can distinguish between
1419 CC1 and CC2 for mixed selected bits (TMxx), it is false
1420 if the instruction cannot (TM). */
1422 machine_mode
1423 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1425 int bit0, bit1;
1427 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1428 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1429 return VOIDmode;
1431 /* Selected bits all zero: CC0.
1432 e.g.: int a; if ((a & (16 + 128)) == 0) */
1433 if (INTVAL (op2) == 0)
1434 return CCTmode;
1436 /* Selected bits all one: CC3.
1437 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1438 if (INTVAL (op2) == INTVAL (op1))
1439 return CCT3mode;
1441 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1442 int a;
1443 if ((a & (16 + 128)) == 16) -> CCT1
1444 if ((a & (16 + 128)) == 128) -> CCT2 */
1445 if (mixed)
1447 bit1 = exact_log2 (INTVAL (op2));
1448 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1449 if (bit0 != -1 && bit1 != -1)
1450 return bit0 > bit1 ? CCT1mode : CCT2mode;
1453 return VOIDmode;
1456 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1457 OP0 and OP1 of a COMPARE, return the mode to be used for the
1458 comparison. */
1460 machine_mode
1461 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1463 switch (code)
1465 case EQ:
1466 case NE:
1467 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1468 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1469 return CCAPmode;
1470 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1471 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1472 return CCAPmode;
1473 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1474 || GET_CODE (op1) == NEG)
1475 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1476 return CCLmode;
1478 if (GET_CODE (op0) == AND)
1480 /* Check whether we can potentially do it via TM. */
1481 machine_mode ccmode;
1482 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1483 if (ccmode != VOIDmode)
1485 /* Relax CCTmode to CCZmode to allow fall-back to AND
1486 if that turns out to be beneficial. */
1487 return ccmode == CCTmode ? CCZmode : ccmode;
1491 if (register_operand (op0, HImode)
1492 && GET_CODE (op1) == CONST_INT
1493 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1494 return CCT3mode;
1495 if (register_operand (op0, QImode)
1496 && GET_CODE (op1) == CONST_INT
1497 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1498 return CCT3mode;
1500 return CCZmode;
1502 case LE:
1503 case LT:
1504 case GE:
1505 case GT:
1506 /* The only overflow condition of NEG and ABS happens when
1507 -INT_MAX is used as parameter, which stays negative. So
1508 we have an overflow from a positive value to a negative.
1509 Using CCAP mode the resulting cc can be used for comparisons. */
1510 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1511 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1512 return CCAPmode;
1514 /* If constants are involved in an add instruction it is possible to use
1515 the resulting cc for comparisons with zero. Knowing the sign of the
1516 constant the overflow behavior gets predictable. e.g.:
1517 int a, b; if ((b = a + c) > 0)
1518 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1519 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1520 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1521 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1522 /* Avoid INT32_MIN on 32 bit. */
1523 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1525 if (INTVAL (XEXP((op0), 1)) < 0)
1526 return CCANmode;
1527 else
1528 return CCAPmode;
1530 /* Fall through. */
1531 case UNORDERED:
1532 case ORDERED:
1533 case UNEQ:
1534 case UNLE:
1535 case UNLT:
1536 case UNGE:
1537 case UNGT:
1538 case LTGT:
1539 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1540 && GET_CODE (op1) != CONST_INT)
1541 return CCSRmode;
1542 return CCSmode;
1544 case LTU:
1545 case GEU:
1546 if (GET_CODE (op0) == PLUS
1547 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1548 return CCL1mode;
1550 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1551 && GET_CODE (op1) != CONST_INT)
1552 return CCURmode;
1553 return CCUmode;
1555 case LEU:
1556 case GTU:
1557 if (GET_CODE (op0) == MINUS
1558 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1559 return CCL2mode;
1561 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1562 && GET_CODE (op1) != CONST_INT)
1563 return CCURmode;
1564 return CCUmode;
1566 default:
1567 gcc_unreachable ();
1571 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1572 that we can implement more efficiently. */
1574 static void
1575 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1576 bool op0_preserve_value)
1578 if (op0_preserve_value)
1579 return;
1581 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1582 if ((*code == EQ || *code == NE)
1583 && *op1 == const0_rtx
1584 && GET_CODE (*op0) == ZERO_EXTRACT
1585 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1586 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1587 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1589 rtx inner = XEXP (*op0, 0);
1590 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1591 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1592 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1594 if (len > 0 && len < modesize
1595 && pos >= 0 && pos + len <= modesize
1596 && modesize <= HOST_BITS_PER_WIDE_INT)
1598 unsigned HOST_WIDE_INT block;
1599 block = (HOST_WIDE_INT_1U << len) - 1;
1600 block <<= modesize - pos - len;
1602 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1603 gen_int_mode (block, GET_MODE (inner)));
1607 /* Narrow AND of memory against immediate to enable TM. */
1608 if ((*code == EQ || *code == NE)
1609 && *op1 == const0_rtx
1610 && GET_CODE (*op0) == AND
1611 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1612 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1614 rtx inner = XEXP (*op0, 0);
1615 rtx mask = XEXP (*op0, 1);
1617 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1618 if (GET_CODE (inner) == SUBREG
1619 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1620 && (GET_MODE_SIZE (GET_MODE (inner))
1621 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1622 && ((INTVAL (mask)
1623 & GET_MODE_MASK (GET_MODE (inner))
1624 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1625 == 0))
1626 inner = SUBREG_REG (inner);
1628 /* Do not change volatile MEMs. */
1629 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1631 int part = s390_single_part (XEXP (*op0, 1),
1632 GET_MODE (inner), QImode, 0);
1633 if (part >= 0)
1635 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1636 inner = adjust_address_nv (inner, QImode, part);
1637 *op0 = gen_rtx_AND (QImode, inner, mask);
1642 /* Narrow comparisons against 0xffff to HImode if possible. */
1643 if ((*code == EQ || *code == NE)
1644 && GET_CODE (*op1) == CONST_INT
1645 && INTVAL (*op1) == 0xffff
1646 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1647 && (nonzero_bits (*op0, GET_MODE (*op0))
1648 & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1650 *op0 = gen_lowpart (HImode, *op0);
1651 *op1 = constm1_rtx;
1654 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1655 if (GET_CODE (*op0) == UNSPEC
1656 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1657 && XVECLEN (*op0, 0) == 1
1658 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1659 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1660 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1661 && *op1 == const0_rtx)
1663 enum rtx_code new_code = UNKNOWN;
1664 switch (*code)
1666 case EQ: new_code = EQ; break;
1667 case NE: new_code = NE; break;
1668 case LT: new_code = GTU; break;
1669 case GT: new_code = LTU; break;
1670 case LE: new_code = GEU; break;
1671 case GE: new_code = LEU; break;
1672 default: break;
1675 if (new_code != UNKNOWN)
1677 *op0 = XVECEXP (*op0, 0, 0);
1678 *code = new_code;
1682 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1683 if (GET_CODE (*op0) == UNSPEC
1684 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1685 && XVECLEN (*op0, 0) == 1
1686 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1687 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1688 && CONST_INT_P (*op1))
1690 enum rtx_code new_code = UNKNOWN;
1691 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1693 case E_CCZmode:
1694 case E_CCRAWmode:
1695 switch (*code)
1697 case EQ: new_code = EQ; break;
1698 case NE: new_code = NE; break;
1699 default: break;
1701 break;
1702 default: break;
1705 if (new_code != UNKNOWN)
1707 /* For CCRAWmode put the required cc mask into the second
1708 operand. */
1709 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1710 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1711 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1712 *op0 = XVECEXP (*op0, 0, 0);
1713 *code = new_code;
1717 /* Simplify cascaded EQ, NE with const0_rtx. */
1718 if ((*code == NE || *code == EQ)
1719 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1720 && GET_MODE (*op0) == SImode
1721 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1722 && REG_P (XEXP (*op0, 0))
1723 && XEXP (*op0, 1) == const0_rtx
1724 && *op1 == const0_rtx)
1726 if ((*code == EQ && GET_CODE (*op0) == NE)
1727 || (*code == NE && GET_CODE (*op0) == EQ))
1728 *code = EQ;
1729 else
1730 *code = NE;
1731 *op0 = XEXP (*op0, 0);
1734 /* Prefer register over memory as first operand. */
1735 if (MEM_P (*op0) && REG_P (*op1))
1737 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1738 *code = (int)swap_condition ((enum rtx_code)*code);
1741 /* A comparison result is compared against zero. Replace it with
1742 the (perhaps inverted) original comparison.
1743 This probably should be done by simplify_relational_operation. */
1744 if ((*code == EQ || *code == NE)
1745 && *op1 == const0_rtx
1746 && COMPARISON_P (*op0)
1747 && CC_REG_P (XEXP (*op0, 0)))
1749 enum rtx_code new_code;
1751 if (*code == EQ)
1752 new_code = reversed_comparison_code_parts (GET_CODE (*op0),
1753 XEXP (*op0, 0),
1754 XEXP (*op1, 0), NULL);
1755 else
1756 new_code = GET_CODE (*op0);
1758 if (new_code != UNKNOWN)
1760 *code = new_code;
1761 *op1 = XEXP (*op0, 1);
1762 *op0 = XEXP (*op0, 0);
1768 /* Emit a compare instruction suitable to implement the comparison
1769 OP0 CODE OP1. Return the correct condition RTL to be placed in
1770 the IF_THEN_ELSE of the conditional branch testing the result. */
1773 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1775 machine_mode mode = s390_select_ccmode (code, op0, op1);
1776 rtx cc;
1778 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1780 /* Do not output a redundant compare instruction if a
1781 compare_and_swap pattern already computed the result and the
1782 machine modes are compatible. */
1783 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1784 == GET_MODE (op0));
1785 cc = op0;
1787 else
1789 cc = gen_rtx_REG (mode, CC_REGNUM);
1790 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1793 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1796 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1797 matches CMP.
1798 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1799 conditional branch testing the result. */
1801 static rtx
1802 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1803 rtx cmp, rtx new_rtx, machine_mode ccmode)
1805 rtx cc;
1807 cc = gen_rtx_REG (ccmode, CC_REGNUM);
1808 switch (GET_MODE (mem))
1810 case E_SImode:
1811 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp,
1812 new_rtx, cc));
1813 break;
1814 case E_DImode:
1815 emit_insn (gen_atomic_compare_and_swapdi_internal (old, mem, cmp,
1816 new_rtx, cc));
1817 break;
1818 case E_TImode:
1819 emit_insn (gen_atomic_compare_and_swapti_internal (old, mem, cmp,
1820 new_rtx, cc));
1821 break;
1822 case E_QImode:
1823 case E_HImode:
1824 default:
1825 gcc_unreachable ();
1827 return s390_emit_compare (code, cc, const0_rtx);
1830 /* Emit a jump instruction to TARGET and return it. If COND is
1831 NULL_RTX, emit an unconditional jump, else a conditional jump under
1832 condition COND. */
1834 rtx_insn *
1835 s390_emit_jump (rtx target, rtx cond)
1837 rtx insn;
1839 target = gen_rtx_LABEL_REF (VOIDmode, target);
1840 if (cond)
1841 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1843 insn = gen_rtx_SET (pc_rtx, target);
1844 return emit_jump_insn (insn);
1847 /* Return branch condition mask to implement a branch
1848 specified by CODE. Return -1 for invalid comparisons. */
1851 s390_branch_condition_mask (rtx code)
1853 const int CC0 = 1 << 3;
1854 const int CC1 = 1 << 2;
1855 const int CC2 = 1 << 1;
1856 const int CC3 = 1 << 0;
1858 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1859 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1860 gcc_assert (XEXP (code, 1) == const0_rtx
1861 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1862 && CONST_INT_P (XEXP (code, 1))));
1865 switch (GET_MODE (XEXP (code, 0)))
1867 case E_CCZmode:
1868 case E_CCZ1mode:
1869 switch (GET_CODE (code))
1871 case EQ: return CC0;
1872 case NE: return CC1 | CC2 | CC3;
1873 default: return -1;
1875 break;
1877 case E_CCT1mode:
1878 switch (GET_CODE (code))
1880 case EQ: return CC1;
1881 case NE: return CC0 | CC2 | CC3;
1882 default: return -1;
1884 break;
1886 case E_CCT2mode:
1887 switch (GET_CODE (code))
1889 case EQ: return CC2;
1890 case NE: return CC0 | CC1 | CC3;
1891 default: return -1;
1893 break;
1895 case E_CCT3mode:
1896 switch (GET_CODE (code))
1898 case EQ: return CC3;
1899 case NE: return CC0 | CC1 | CC2;
1900 default: return -1;
1902 break;
1904 case E_CCLmode:
1905 switch (GET_CODE (code))
1907 case EQ: return CC0 | CC2;
1908 case NE: return CC1 | CC3;
1909 default: return -1;
1911 break;
1913 case E_CCL1mode:
1914 switch (GET_CODE (code))
1916 case LTU: return CC2 | CC3; /* carry */
1917 case GEU: return CC0 | CC1; /* no carry */
1918 default: return -1;
1920 break;
1922 case E_CCL2mode:
1923 switch (GET_CODE (code))
1925 case GTU: return CC0 | CC1; /* borrow */
1926 case LEU: return CC2 | CC3; /* no borrow */
1927 default: return -1;
1929 break;
1931 case E_CCL3mode:
1932 switch (GET_CODE (code))
1934 case EQ: return CC0 | CC2;
1935 case NE: return CC1 | CC3;
1936 case LTU: return CC1;
1937 case GTU: return CC3;
1938 case LEU: return CC1 | CC2;
1939 case GEU: return CC2 | CC3;
1940 default: return -1;
1943 case E_CCUmode:
1944 switch (GET_CODE (code))
1946 case EQ: return CC0;
1947 case NE: return CC1 | CC2 | CC3;
1948 case LTU: return CC1;
1949 case GTU: return CC2;
1950 case LEU: return CC0 | CC1;
1951 case GEU: return CC0 | CC2;
1952 default: return -1;
1954 break;
1956 case E_CCURmode:
1957 switch (GET_CODE (code))
1959 case EQ: return CC0;
1960 case NE: return CC2 | CC1 | CC3;
1961 case LTU: return CC2;
1962 case GTU: return CC1;
1963 case LEU: return CC0 | CC2;
1964 case GEU: return CC0 | CC1;
1965 default: return -1;
1967 break;
1969 case E_CCAPmode:
1970 switch (GET_CODE (code))
1972 case EQ: return CC0;
1973 case NE: return CC1 | CC2 | CC3;
1974 case LT: return CC1 | CC3;
1975 case GT: return CC2;
1976 case LE: return CC0 | CC1 | CC3;
1977 case GE: return CC0 | CC2;
1978 default: return -1;
1980 break;
1982 case E_CCANmode:
1983 switch (GET_CODE (code))
1985 case EQ: return CC0;
1986 case NE: return CC1 | CC2 | CC3;
1987 case LT: return CC1;
1988 case GT: return CC2 | CC3;
1989 case LE: return CC0 | CC1;
1990 case GE: return CC0 | CC2 | CC3;
1991 default: return -1;
1993 break;
1995 case E_CCSmode:
1996 switch (GET_CODE (code))
1998 case EQ: return CC0;
1999 case NE: return CC1 | CC2 | CC3;
2000 case LT: return CC1;
2001 case GT: return CC2;
2002 case LE: return CC0 | CC1;
2003 case GE: return CC0 | CC2;
2004 case UNORDERED: return CC3;
2005 case ORDERED: return CC0 | CC1 | CC2;
2006 case UNEQ: return CC0 | CC3;
2007 case UNLT: return CC1 | CC3;
2008 case UNGT: return CC2 | CC3;
2009 case UNLE: return CC0 | CC1 | CC3;
2010 case UNGE: return CC0 | CC2 | CC3;
2011 case LTGT: return CC1 | CC2;
2012 default: return -1;
2014 break;
2016 case E_CCSRmode:
2017 switch (GET_CODE (code))
2019 case EQ: return CC0;
2020 case NE: return CC2 | CC1 | CC3;
2021 case LT: return CC2;
2022 case GT: return CC1;
2023 case LE: return CC0 | CC2;
2024 case GE: return CC0 | CC1;
2025 case UNORDERED: return CC3;
2026 case ORDERED: return CC0 | CC2 | CC1;
2027 case UNEQ: return CC0 | CC3;
2028 case UNLT: return CC2 | CC3;
2029 case UNGT: return CC1 | CC3;
2030 case UNLE: return CC0 | CC2 | CC3;
2031 case UNGE: return CC0 | CC1 | CC3;
2032 case LTGT: return CC2 | CC1;
2033 default: return -1;
2035 break;
2037 /* Vector comparison modes. */
2038 /* CC2 will never be set. It however is part of the negated
2039 masks. */
2040 case E_CCVIALLmode:
2041 switch (GET_CODE (code))
2043 case EQ:
2044 case GTU:
2045 case GT:
2046 case GE: return CC0;
2047 /* The inverted modes are in fact *any* modes. */
2048 case NE:
2049 case LEU:
2050 case LE:
2051 case LT: return CC3 | CC1 | CC2;
2052 default: return -1;
2055 case E_CCVIANYmode:
2056 switch (GET_CODE (code))
2058 case EQ:
2059 case GTU:
2060 case GT:
2061 case GE: return CC0 | CC1;
2062 /* The inverted modes are in fact *all* modes. */
2063 case NE:
2064 case LEU:
2065 case LE:
2066 case LT: return CC3 | CC2;
2067 default: return -1;
2069 case E_CCVFALLmode:
2070 switch (GET_CODE (code))
2072 case EQ:
2073 case GT:
2074 case GE: return CC0;
2075 /* The inverted modes are in fact *any* modes. */
2076 case NE:
2077 case UNLE:
2078 case UNLT: return CC3 | CC1 | CC2;
2079 default: return -1;
2082 case E_CCVFANYmode:
2083 switch (GET_CODE (code))
2085 case EQ:
2086 case GT:
2087 case GE: return CC0 | CC1;
2088 /* The inverted modes are in fact *all* modes. */
2089 case NE:
2090 case UNLE:
2091 case UNLT: return CC3 | CC2;
2092 default: return -1;
2095 case E_CCRAWmode:
2096 switch (GET_CODE (code))
2098 case EQ:
2099 return INTVAL (XEXP (code, 1));
2100 case NE:
2101 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2102 default:
2103 gcc_unreachable ();
2106 default:
2107 return -1;
2112 /* Return branch condition mask to implement a compare and branch
2113 specified by CODE. Return -1 for invalid comparisons. */
2116 s390_compare_and_branch_condition_mask (rtx code)
2118 const int CC0 = 1 << 3;
2119 const int CC1 = 1 << 2;
2120 const int CC2 = 1 << 1;
2122 switch (GET_CODE (code))
2124 case EQ:
2125 return CC0;
2126 case NE:
2127 return CC1 | CC2;
2128 case LT:
2129 case LTU:
2130 return CC1;
2131 case GT:
2132 case GTU:
2133 return CC2;
2134 case LE:
2135 case LEU:
2136 return CC0 | CC1;
2137 case GE:
2138 case GEU:
2139 return CC0 | CC2;
2140 default:
2141 gcc_unreachable ();
2143 return -1;
2146 /* If INV is false, return assembler mnemonic string to implement
2147 a branch specified by CODE. If INV is true, return mnemonic
2148 for the corresponding inverted branch. */
2150 static const char *
2151 s390_branch_condition_mnemonic (rtx code, int inv)
2153 int mask;
2155 static const char *const mnemonic[16] =
2157 NULL, "o", "h", "nle",
2158 "l", "nhe", "lh", "ne",
2159 "e", "nlh", "he", "nl",
2160 "le", "nh", "no", NULL
2163 if (GET_CODE (XEXP (code, 0)) == REG
2164 && REGNO (XEXP (code, 0)) == CC_REGNUM
2165 && (XEXP (code, 1) == const0_rtx
2166 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2167 && CONST_INT_P (XEXP (code, 1)))))
2168 mask = s390_branch_condition_mask (code);
2169 else
2170 mask = s390_compare_and_branch_condition_mask (code);
2172 gcc_assert (mask >= 0);
2174 if (inv)
2175 mask ^= 15;
2177 gcc_assert (mask >= 1 && mask <= 14);
2179 return mnemonic[mask];
2182 /* Return the part of op which has a value different from def.
2183 The size of the part is determined by mode.
2184 Use this function only if you already know that op really
2185 contains such a part. */
2187 unsigned HOST_WIDE_INT
2188 s390_extract_part (rtx op, machine_mode mode, int def)
2190 unsigned HOST_WIDE_INT value = 0;
2191 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2192 int part_bits = GET_MODE_BITSIZE (mode);
2193 unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1;
2194 int i;
2196 for (i = 0; i < max_parts; i++)
2198 if (i == 0)
2199 value = UINTVAL (op);
2200 else
2201 value >>= part_bits;
2203 if ((value & part_mask) != (def & part_mask))
2204 return value & part_mask;
2207 gcc_unreachable ();
2210 /* If OP is an integer constant of mode MODE with exactly one
2211 part of mode PART_MODE unequal to DEF, return the number of that
2212 part. Otherwise, return -1. */
2215 s390_single_part (rtx op,
2216 machine_mode mode,
2217 machine_mode part_mode,
2218 int def)
2220 unsigned HOST_WIDE_INT value = 0;
2221 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2222 unsigned HOST_WIDE_INT part_mask
2223 = (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1;
2224 int i, part = -1;
2226 if (GET_CODE (op) != CONST_INT)
2227 return -1;
2229 for (i = 0; i < n_parts; i++)
2231 if (i == 0)
2232 value = UINTVAL (op);
2233 else
2234 value >>= GET_MODE_BITSIZE (part_mode);
2236 if ((value & part_mask) != (def & part_mask))
2238 if (part != -1)
2239 return -1;
2240 else
2241 part = i;
2244 return part == -1 ? -1 : n_parts - 1 - part;
2247 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2248 bits and no other bits are set in (the lower SIZE bits of) IN.
2250 PSTART and PEND can be used to obtain the start and end
2251 position (inclusive) of the bitfield relative to 64
2252 bits. *PSTART / *PEND gives the position of the first/last bit
2253 of the bitfield counting from the highest order bit starting
2254 with zero. */
2256 bool
2257 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2258 int *pstart, int *pend)
2260 int start;
2261 int end = -1;
2262 int lowbit = HOST_BITS_PER_WIDE_INT - 1;
2263 int highbit = HOST_BITS_PER_WIDE_INT - size;
2264 unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U;
2266 gcc_assert (!!pstart == !!pend);
2267 for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2268 if (end == -1)
2270 /* Look for the rightmost bit of a contiguous range of ones. */
2271 if (bitmask & in)
2272 /* Found it. */
2273 end = start;
2275 else
2277 /* Look for the firt zero bit after the range of ones. */
2278 if (! (bitmask & in))
2279 /* Found it. */
2280 break;
2282 /* We're one past the last one-bit. */
2283 start++;
2285 if (end == -1)
2286 /* No one bits found. */
2287 return false;
2289 if (start > highbit)
2291 unsigned HOST_WIDE_INT mask;
2293 /* Calculate a mask for all bits beyond the contiguous bits. */
2294 mask = ((~HOST_WIDE_INT_0U >> highbit)
2295 & (~HOST_WIDE_INT_0U << (lowbit - start + 1)));
2296 if (mask & in)
2297 /* There are more bits set beyond the first range of one bits. */
2298 return false;
2301 if (pstart)
2303 *pstart = start;
2304 *pend = end;
2307 return true;
2310 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2311 if ~IN contains a contiguous bitfield. In that case, *END is <
2312 *START.
2314 If WRAP_P is true, a bitmask that wraps around is also tested.
2315 When a wraparoud occurs *START is greater than *END (in
2316 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2317 part of the range. If WRAP_P is false, no wraparound is
2318 tested. */
2320 bool
2321 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2322 int size, int *start, int *end)
2324 int bs = HOST_BITS_PER_WIDE_INT;
2325 bool b;
2327 gcc_assert (!!start == !!end);
2328 if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0)
2329 /* This cannot be expressed as a contiguous bitmask. Exit early because
2330 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2331 a valid bitmask. */
2332 return false;
2333 b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2334 if (b)
2335 return true;
2336 if (! wrap_p)
2337 return false;
2338 b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2339 if (b && start)
2341 int s = *start;
2342 int e = *end;
2344 gcc_assert (s >= 1);
2345 *start = ((e + 1) & (bs - 1));
2346 *end = ((s - 1 + bs) & (bs - 1));
2349 return b;
2352 /* Return true if OP contains the same contiguous bitfield in *all*
2353 its elements. START and END can be used to obtain the start and
2354 end position of the bitfield.
2356 START/STOP give the position of the first/last bit of the bitfield
2357 counting from the lowest order bit starting with zero. In order to
2358 use these values for S/390 instructions this has to be converted to
2359 "bits big endian" style. */
2361 bool
2362 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2364 unsigned HOST_WIDE_INT mask;
2365 int size;
2366 rtx elt;
2367 bool b;
2369 gcc_assert (!!start == !!end);
2370 if (!const_vec_duplicate_p (op, &elt)
2371 || !CONST_INT_P (elt))
2372 return false;
2374 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2376 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2377 if (size > 64)
2378 return false;
2380 mask = UINTVAL (elt);
2382 b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2383 if (b)
2385 if (start)
2387 *start -= (HOST_BITS_PER_WIDE_INT - size);
2388 *end -= (HOST_BITS_PER_WIDE_INT - size);
2390 return true;
2392 else
2393 return false;
2396 /* Return true if C consists only of byte chunks being either 0 or
2397 0xff. If MASK is !=NULL a byte mask is generated which is
2398 appropriate for the vector generate byte mask instruction. */
2400 bool
2401 s390_bytemask_vector_p (rtx op, unsigned *mask)
2403 int i;
2404 unsigned tmp_mask = 0;
2405 int nunit, unit_size;
2407 if (!VECTOR_MODE_P (GET_MODE (op))
2408 || GET_CODE (op) != CONST_VECTOR
2409 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2410 return false;
2412 nunit = GET_MODE_NUNITS (GET_MODE (op));
2413 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2415 for (i = 0; i < nunit; i++)
2417 unsigned HOST_WIDE_INT c;
2418 int j;
2420 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2421 return false;
2423 c = UINTVAL (XVECEXP (op, 0, i));
2424 for (j = 0; j < unit_size; j++)
2426 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2427 return false;
2428 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2429 c = c >> BITS_PER_UNIT;
2433 if (mask != NULL)
2434 *mask = tmp_mask;
2436 return true;
2439 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2440 equivalent to a shift followed by the AND. In particular, CONTIG
2441 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2442 for ROTL indicate a rotate to the right. */
2444 bool
2445 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2447 int start, end;
2448 bool ok;
2450 ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2451 gcc_assert (ok);
2453 if (rotl >= 0)
2454 return (64 - end >= rotl);
2455 else
2457 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2458 DIMode. */
2459 rotl = -rotl + (64 - bitsize);
2460 return (start >= rotl);
2464 /* Check whether we can (and want to) split a double-word
2465 move in mode MODE from SRC to DST into two single-word
2466 moves, moving the subword FIRST_SUBWORD first. */
2468 bool
2469 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2471 /* Floating point and vector registers cannot be split. */
2472 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2473 return false;
2475 /* Non-offsettable memory references cannot be split. */
2476 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2477 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2478 return false;
2480 /* Moving the first subword must not clobber a register
2481 needed to move the second subword. */
2482 if (register_operand (dst, mode))
2484 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2485 if (reg_overlap_mentioned_p (subreg, src))
2486 return false;
2489 return true;
2492 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2493 and [MEM2, MEM2 + SIZE] do overlap and false
2494 otherwise. */
2496 bool
2497 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2499 rtx addr1, addr2, addr_delta;
2500 HOST_WIDE_INT delta;
2502 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2503 return true;
2505 if (size == 0)
2506 return false;
2508 addr1 = XEXP (mem1, 0);
2509 addr2 = XEXP (mem2, 0);
2511 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2513 /* This overlapping check is used by peepholes merging memory block operations.
2514 Overlapping operations would otherwise be recognized by the S/390 hardware
2515 and would fall back to a slower implementation. Allowing overlapping
2516 operations would lead to slow code but not to wrong code. Therefore we are
2517 somewhat optimistic if we cannot prove that the memory blocks are
2518 overlapping.
2519 That's why we return false here although this may accept operations on
2520 overlapping memory areas. */
2521 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2522 return false;
2524 delta = INTVAL (addr_delta);
2526 if (delta == 0
2527 || (delta > 0 && delta < size)
2528 || (delta < 0 && -delta < size))
2529 return true;
2531 return false;
2534 /* Check whether the address of memory reference MEM2 equals exactly
2535 the address of memory reference MEM1 plus DELTA. Return true if
2536 we can prove this to be the case, false otherwise. */
2538 bool
2539 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2541 rtx addr1, addr2, addr_delta;
2543 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2544 return false;
2546 addr1 = XEXP (mem1, 0);
2547 addr2 = XEXP (mem2, 0);
2549 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2550 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2551 return false;
2553 return true;
2556 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2558 void
2559 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2560 rtx *operands)
2562 machine_mode wmode = mode;
2563 rtx dst = operands[0];
2564 rtx src1 = operands[1];
2565 rtx src2 = operands[2];
2566 rtx op, clob, tem;
2568 /* If we cannot handle the operation directly, use a temp register. */
2569 if (!s390_logical_operator_ok_p (operands))
2570 dst = gen_reg_rtx (mode);
2572 /* QImode and HImode patterns make sense only if we have a destination
2573 in memory. Otherwise perform the operation in SImode. */
2574 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2575 wmode = SImode;
2577 /* Widen operands if required. */
2578 if (mode != wmode)
2580 if (GET_CODE (dst) == SUBREG
2581 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2582 dst = tem;
2583 else if (REG_P (dst))
2584 dst = gen_rtx_SUBREG (wmode, dst, 0);
2585 else
2586 dst = gen_reg_rtx (wmode);
2588 if (GET_CODE (src1) == SUBREG
2589 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2590 src1 = tem;
2591 else if (GET_MODE (src1) != VOIDmode)
2592 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2594 if (GET_CODE (src2) == SUBREG
2595 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2596 src2 = tem;
2597 else if (GET_MODE (src2) != VOIDmode)
2598 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2601 /* Emit the instruction. */
2602 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2603 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2604 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2606 /* Fix up the destination if needed. */
2607 if (dst != operands[0])
2608 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2611 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2613 bool
2614 s390_logical_operator_ok_p (rtx *operands)
2616 /* If the destination operand is in memory, it needs to coincide
2617 with one of the source operands. After reload, it has to be
2618 the first source operand. */
2619 if (GET_CODE (operands[0]) == MEM)
2620 return rtx_equal_p (operands[0], operands[1])
2621 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2623 return true;
2626 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2627 operand IMMOP to switch from SS to SI type instructions. */
2629 void
2630 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2632 int def = code == AND ? -1 : 0;
2633 HOST_WIDE_INT mask;
2634 int part;
2636 gcc_assert (GET_CODE (*memop) == MEM);
2637 gcc_assert (!MEM_VOLATILE_P (*memop));
2639 mask = s390_extract_part (*immop, QImode, def);
2640 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2641 gcc_assert (part >= 0);
2643 *memop = adjust_address (*memop, QImode, part);
2644 *immop = gen_int_mode (mask, QImode);
2648 /* How to allocate a 'struct machine_function'. */
2650 static struct machine_function *
2651 s390_init_machine_status (void)
2653 return ggc_cleared_alloc<machine_function> ();
2656 /* Map for smallest class containing reg regno. */
2658 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2659 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2660 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2661 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2662 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2663 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2664 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2665 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2666 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2667 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2668 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2669 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2670 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2671 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2672 VEC_REGS, VEC_REGS /* 52 */
2675 /* Return attribute type of insn. */
2677 static enum attr_type
2678 s390_safe_attr_type (rtx_insn *insn)
2680 if (recog_memoized (insn) >= 0)
2681 return get_attr_type (insn);
2682 else
2683 return TYPE_NONE;
2686 /* Return true if DISP is a valid short displacement. */
2688 static bool
2689 s390_short_displacement (rtx disp)
2691 /* No displacement is OK. */
2692 if (!disp)
2693 return true;
2695 /* Without the long displacement facility we don't need to
2696 distingiush between long and short displacement. */
2697 if (!TARGET_LONG_DISPLACEMENT)
2698 return true;
2700 /* Integer displacement in range. */
2701 if (GET_CODE (disp) == CONST_INT)
2702 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2704 /* GOT offset is not OK, the GOT can be large. */
2705 if (GET_CODE (disp) == CONST
2706 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2707 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2708 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2709 return false;
2711 /* All other symbolic constants are literal pool references,
2712 which are OK as the literal pool must be small. */
2713 if (GET_CODE (disp) == CONST)
2714 return true;
2716 return false;
2719 /* Decompose a RTL expression ADDR for a memory address into
2720 its components, returned in OUT.
2722 Returns false if ADDR is not a valid memory address, true
2723 otherwise. If OUT is NULL, don't return the components,
2724 but check for validity only.
2726 Note: Only addresses in canonical form are recognized.
2727 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2728 canonical form so that they will be recognized. */
2730 static int
2731 s390_decompose_address (rtx addr, struct s390_address *out)
2733 HOST_WIDE_INT offset = 0;
2734 rtx base = NULL_RTX;
2735 rtx indx = NULL_RTX;
2736 rtx disp = NULL_RTX;
2737 rtx orig_disp;
2738 bool pointer = false;
2739 bool base_ptr = false;
2740 bool indx_ptr = false;
2741 bool literal_pool = false;
2743 /* We may need to substitute the literal pool base register into the address
2744 below. However, at this point we do not know which register is going to
2745 be used as base, so we substitute the arg pointer register. This is going
2746 to be treated as holding a pointer below -- it shouldn't be used for any
2747 other purpose. */
2748 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2750 /* Decompose address into base + index + displacement. */
2752 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2753 base = addr;
2755 else if (GET_CODE (addr) == PLUS)
2757 rtx op0 = XEXP (addr, 0);
2758 rtx op1 = XEXP (addr, 1);
2759 enum rtx_code code0 = GET_CODE (op0);
2760 enum rtx_code code1 = GET_CODE (op1);
2762 if (code0 == REG || code0 == UNSPEC)
2764 if (code1 == REG || code1 == UNSPEC)
2766 indx = op0; /* index + base */
2767 base = op1;
2770 else
2772 base = op0; /* base + displacement */
2773 disp = op1;
2777 else if (code0 == PLUS)
2779 indx = XEXP (op0, 0); /* index + base + disp */
2780 base = XEXP (op0, 1);
2781 disp = op1;
2784 else
2786 return false;
2790 else
2791 disp = addr; /* displacement */
2793 /* Extract integer part of displacement. */
2794 orig_disp = disp;
2795 if (disp)
2797 if (GET_CODE (disp) == CONST_INT)
2799 offset = INTVAL (disp);
2800 disp = NULL_RTX;
2802 else if (GET_CODE (disp) == CONST
2803 && GET_CODE (XEXP (disp, 0)) == PLUS
2804 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2806 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2807 disp = XEXP (XEXP (disp, 0), 0);
2811 /* Strip off CONST here to avoid special case tests later. */
2812 if (disp && GET_CODE (disp) == CONST)
2813 disp = XEXP (disp, 0);
2815 /* We can convert literal pool addresses to
2816 displacements by basing them off the base register. */
2817 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2819 if (base || indx)
2820 return false;
2822 base = fake_pool_base, literal_pool = true;
2824 /* Mark up the displacement. */
2825 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2826 UNSPEC_LTREL_OFFSET);
2829 /* Validate base register. */
2830 if (base)
2832 if (GET_CODE (base) == UNSPEC)
2833 switch (XINT (base, 1))
2835 case UNSPEC_LTREF:
2836 if (!disp)
2837 disp = gen_rtx_UNSPEC (Pmode,
2838 gen_rtvec (1, XVECEXP (base, 0, 0)),
2839 UNSPEC_LTREL_OFFSET);
2840 else
2841 return false;
2843 base = XVECEXP (base, 0, 1);
2844 break;
2846 case UNSPEC_LTREL_BASE:
2847 if (XVECLEN (base, 0) == 1)
2848 base = fake_pool_base, literal_pool = true;
2849 else
2850 base = XVECEXP (base, 0, 1);
2851 break;
2853 default:
2854 return false;
2857 if (!REG_P (base) || GET_MODE (base) != Pmode)
2858 return false;
2860 if (REGNO (base) == STACK_POINTER_REGNUM
2861 || REGNO (base) == FRAME_POINTER_REGNUM
2862 || ((reload_completed || reload_in_progress)
2863 && frame_pointer_needed
2864 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
2865 || REGNO (base) == ARG_POINTER_REGNUM
2866 || (flag_pic
2867 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
2868 pointer = base_ptr = true;
2870 if ((reload_completed || reload_in_progress)
2871 && base == cfun->machine->base_reg)
2872 pointer = base_ptr = literal_pool = true;
2875 /* Validate index register. */
2876 if (indx)
2878 if (GET_CODE (indx) == UNSPEC)
2879 switch (XINT (indx, 1))
2881 case UNSPEC_LTREF:
2882 if (!disp)
2883 disp = gen_rtx_UNSPEC (Pmode,
2884 gen_rtvec (1, XVECEXP (indx, 0, 0)),
2885 UNSPEC_LTREL_OFFSET);
2886 else
2887 return false;
2889 indx = XVECEXP (indx, 0, 1);
2890 break;
2892 case UNSPEC_LTREL_BASE:
2893 if (XVECLEN (indx, 0) == 1)
2894 indx = fake_pool_base, literal_pool = true;
2895 else
2896 indx = XVECEXP (indx, 0, 1);
2897 break;
2899 default:
2900 return false;
2903 if (!REG_P (indx) || GET_MODE (indx) != Pmode)
2904 return false;
2906 if (REGNO (indx) == STACK_POINTER_REGNUM
2907 || REGNO (indx) == FRAME_POINTER_REGNUM
2908 || ((reload_completed || reload_in_progress)
2909 && frame_pointer_needed
2910 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
2911 || REGNO (indx) == ARG_POINTER_REGNUM
2912 || (flag_pic
2913 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
2914 pointer = indx_ptr = true;
2916 if ((reload_completed || reload_in_progress)
2917 && indx == cfun->machine->base_reg)
2918 pointer = indx_ptr = literal_pool = true;
2921 /* Prefer to use pointer as base, not index. */
2922 if (base && indx && !base_ptr
2923 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2925 rtx tmp = base;
2926 base = indx;
2927 indx = tmp;
2930 /* Validate displacement. */
2931 if (!disp)
2933 /* If virtual registers are involved, the displacement will change later
2934 anyway as the virtual registers get eliminated. This could make a
2935 valid displacement invalid, but it is more likely to make an invalid
2936 displacement valid, because we sometimes access the register save area
2937 via negative offsets to one of those registers.
2938 Thus we don't check the displacement for validity here. If after
2939 elimination the displacement turns out to be invalid after all,
2940 this is fixed up by reload in any case. */
2941 /* LRA maintains always displacements up to date and we need to
2942 know the displacement is right during all LRA not only at the
2943 final elimination. */
2944 if (lra_in_progress
2945 || (base != arg_pointer_rtx
2946 && indx != arg_pointer_rtx
2947 && base != return_address_pointer_rtx
2948 && indx != return_address_pointer_rtx
2949 && base != frame_pointer_rtx
2950 && indx != frame_pointer_rtx
2951 && base != virtual_stack_vars_rtx
2952 && indx != virtual_stack_vars_rtx))
2953 if (!DISP_IN_RANGE (offset))
2954 return false;
2956 else
2958 /* All the special cases are pointers. */
2959 pointer = true;
2961 /* In the small-PIC case, the linker converts @GOT
2962 and @GOTNTPOFF offsets to possible displacements. */
2963 if (GET_CODE (disp) == UNSPEC
2964 && (XINT (disp, 1) == UNSPEC_GOT
2965 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
2966 && flag_pic == 1)
2971 /* Accept pool label offsets. */
2972 else if (GET_CODE (disp) == UNSPEC
2973 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
2976 /* Accept literal pool references. */
2977 else if (GET_CODE (disp) == UNSPEC
2978 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
2980 /* In case CSE pulled a non literal pool reference out of
2981 the pool we have to reject the address. This is
2982 especially important when loading the GOT pointer on non
2983 zarch CPUs. In this case the literal pool contains an lt
2984 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
2985 will most likely exceed the displacement. */
2986 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2987 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
2988 return false;
2990 orig_disp = gen_rtx_CONST (Pmode, disp);
2991 if (offset)
2993 /* If we have an offset, make sure it does not
2994 exceed the size of the constant pool entry. */
2995 rtx sym = XVECEXP (disp, 0, 0);
2996 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
2997 return false;
2999 orig_disp = plus_constant (Pmode, orig_disp, offset);
3003 else
3004 return false;
3007 if (!base && !indx)
3008 pointer = true;
3010 if (out)
3012 out->base = base;
3013 out->indx = indx;
3014 out->disp = orig_disp;
3015 out->pointer = pointer;
3016 out->literal_pool = literal_pool;
3019 return true;
3022 /* Decompose a RTL expression OP for an address style operand into its
3023 components, and return the base register in BASE and the offset in
3024 OFFSET. While OP looks like an address it is never supposed to be
3025 used as such.
3027 Return true if OP is a valid address operand, false if not. */
3029 bool
3030 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
3031 HOST_WIDE_INT *offset)
3033 rtx off = NULL_RTX;
3035 /* We can have an integer constant, an address register,
3036 or a sum of the two. */
3037 if (CONST_SCALAR_INT_P (op))
3039 off = op;
3040 op = NULL_RTX;
3042 if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3044 off = XEXP (op, 1);
3045 op = XEXP (op, 0);
3047 while (op && GET_CODE (op) == SUBREG)
3048 op = SUBREG_REG (op);
3050 if (op && GET_CODE (op) != REG)
3051 return false;
3053 if (offset)
3055 if (off == NULL_RTX)
3056 *offset = 0;
3057 else if (CONST_INT_P (off))
3058 *offset = INTVAL (off);
3059 else if (CONST_WIDE_INT_P (off))
3060 /* The offset will anyway be cut down to 12 bits so take just
3061 the lowest order chunk of the wide int. */
3062 *offset = CONST_WIDE_INT_ELT (off, 0);
3063 else
3064 gcc_unreachable ();
3066 if (base)
3067 *base = op;
3069 return true;
3073 /* Return true if CODE is a valid address without index. */
3075 bool
3076 s390_legitimate_address_without_index_p (rtx op)
3078 struct s390_address addr;
3080 if (!s390_decompose_address (XEXP (op, 0), &addr))
3081 return false;
3082 if (addr.indx)
3083 return false;
3085 return true;
3089 /* Return TRUE if ADDR is an operand valid for a load/store relative
3090 instruction. Be aware that the alignment of the operand needs to
3091 be checked separately.
3092 Valid addresses are single references or a sum of a reference and a
3093 constant integer. Return these parts in SYMREF and ADDEND. You can
3094 pass NULL in REF and/or ADDEND if you are not interested in these
3095 values. Literal pool references are *not* considered symbol
3096 references. */
3098 static bool
3099 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3101 HOST_WIDE_INT tmpaddend = 0;
3103 if (GET_CODE (addr) == CONST)
3104 addr = XEXP (addr, 0);
3106 if (GET_CODE (addr) == PLUS)
3108 if (!CONST_INT_P (XEXP (addr, 1)))
3109 return false;
3111 tmpaddend = INTVAL (XEXP (addr, 1));
3112 addr = XEXP (addr, 0);
3115 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
3116 || (GET_CODE (addr) == UNSPEC
3117 && (XINT (addr, 1) == UNSPEC_GOTENT
3118 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3120 if (symref)
3121 *symref = addr;
3122 if (addend)
3123 *addend = tmpaddend;
3125 return true;
3127 return false;
3130 /* Return true if the address in OP is valid for constraint letter C
3131 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3132 pool MEMs should be accepted. Only the Q, R, S, T constraint
3133 letters are allowed for C. */
3135 static int
3136 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3138 struct s390_address addr;
3139 bool decomposed = false;
3141 if (!address_operand (op, GET_MODE (op)))
3142 return 0;
3144 /* This check makes sure that no symbolic address (except literal
3145 pool references) are accepted by the R or T constraints. */
3146 if (s390_loadrelative_operand_p (op, NULL, NULL))
3147 return 0;
3149 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3150 if (!lit_pool_ok)
3152 if (!s390_decompose_address (op, &addr))
3153 return 0;
3154 if (addr.literal_pool)
3155 return 0;
3156 decomposed = true;
3159 /* With reload, we sometimes get intermediate address forms that are
3160 actually invalid as-is, but we need to accept them in the most
3161 generic cases below ('R' or 'T'), since reload will in fact fix
3162 them up. LRA behaves differently here; we never see such forms,
3163 but on the other hand, we need to strictly reject every invalid
3164 address form. Perform this check right up front. */
3165 if (lra_in_progress)
3167 if (!decomposed && !s390_decompose_address (op, &addr))
3168 return 0;
3169 decomposed = true;
3172 switch (c)
3174 case 'Q': /* no index short displacement */
3175 if (!decomposed && !s390_decompose_address (op, &addr))
3176 return 0;
3177 if (addr.indx)
3178 return 0;
3179 if (!s390_short_displacement (addr.disp))
3180 return 0;
3181 break;
3183 case 'R': /* with index short displacement */
3184 if (TARGET_LONG_DISPLACEMENT)
3186 if (!decomposed && !s390_decompose_address (op, &addr))
3187 return 0;
3188 if (!s390_short_displacement (addr.disp))
3189 return 0;
3191 /* Any invalid address here will be fixed up by reload,
3192 so accept it for the most generic constraint. */
3193 break;
3195 case 'S': /* no index long displacement */
3196 if (!decomposed && !s390_decompose_address (op, &addr))
3197 return 0;
3198 if (addr.indx)
3199 return 0;
3200 break;
3202 case 'T': /* with index long displacement */
3203 /* Any invalid address here will be fixed up by reload,
3204 so accept it for the most generic constraint. */
3205 break;
3207 default:
3208 return 0;
3210 return 1;
3214 /* Evaluates constraint strings described by the regular expression
3215 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3216 the constraint given in STR, or 0 else. */
3219 s390_mem_constraint (const char *str, rtx op)
3221 char c = str[0];
3223 switch (c)
3225 case 'A':
3226 /* Check for offsettable variants of memory constraints. */
3227 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3228 return 0;
3229 if ((reload_completed || reload_in_progress)
3230 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3231 return 0;
3232 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3233 case 'B':
3234 /* Check for non-literal-pool variants of memory constraints. */
3235 if (!MEM_P (op))
3236 return 0;
3237 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3238 case 'Q':
3239 case 'R':
3240 case 'S':
3241 case 'T':
3242 if (GET_CODE (op) != MEM)
3243 return 0;
3244 return s390_check_qrst_address (c, XEXP (op, 0), true);
3245 case 'Y':
3246 /* Simply check for the basic form of a shift count. Reload will
3247 take care of making sure we have a proper base register. */
3248 if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3249 return 0;
3250 break;
3251 case 'Z':
3252 return s390_check_qrst_address (str[1], op, true);
3253 default:
3254 return 0;
3256 return 1;
3260 /* Evaluates constraint strings starting with letter O. Input
3261 parameter C is the second letter following the "O" in the constraint
3262 string. Returns 1 if VALUE meets the respective constraint and 0
3263 otherwise. */
3266 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3268 if (!TARGET_EXTIMM)
3269 return 0;
3271 switch (c)
3273 case 's':
3274 return trunc_int_for_mode (value, SImode) == value;
3276 case 'p':
3277 return value == 0
3278 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3280 case 'n':
3281 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3283 default:
3284 gcc_unreachable ();
3289 /* Evaluates constraint strings starting with letter N. Parameter STR
3290 contains the letters following letter "N" in the constraint string.
3291 Returns true if VALUE matches the constraint. */
3294 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3296 machine_mode mode, part_mode;
3297 int def;
3298 int part, part_goal;
3301 if (str[0] == 'x')
3302 part_goal = -1;
3303 else
3304 part_goal = str[0] - '0';
3306 switch (str[1])
3308 case 'Q':
3309 part_mode = QImode;
3310 break;
3311 case 'H':
3312 part_mode = HImode;
3313 break;
3314 case 'S':
3315 part_mode = SImode;
3316 break;
3317 default:
3318 return 0;
3321 switch (str[2])
3323 case 'H':
3324 mode = HImode;
3325 break;
3326 case 'S':
3327 mode = SImode;
3328 break;
3329 case 'D':
3330 mode = DImode;
3331 break;
3332 default:
3333 return 0;
3336 switch (str[3])
3338 case '0':
3339 def = 0;
3340 break;
3341 case 'F':
3342 def = -1;
3343 break;
3344 default:
3345 return 0;
3348 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3349 return 0;
3351 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3352 if (part < 0)
3353 return 0;
3354 if (part_goal != -1 && part_goal != part)
3355 return 0;
3357 return 1;
3361 /* Returns true if the input parameter VALUE is a float zero. */
3364 s390_float_const_zero_p (rtx value)
3366 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3367 && value == CONST0_RTX (GET_MODE (value)));
3370 /* Implement TARGET_REGISTER_MOVE_COST. */
3372 static int
3373 s390_register_move_cost (machine_mode mode,
3374 reg_class_t from, reg_class_t to)
3376 /* On s390, copy between fprs and gprs is expensive. */
3378 /* It becomes somewhat faster having ldgr/lgdr. */
3379 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3381 /* ldgr is single cycle. */
3382 if (reg_classes_intersect_p (from, GENERAL_REGS)
3383 && reg_classes_intersect_p (to, FP_REGS))
3384 return 1;
3385 /* lgdr needs 3 cycles. */
3386 if (reg_classes_intersect_p (to, GENERAL_REGS)
3387 && reg_classes_intersect_p (from, FP_REGS))
3388 return 3;
3391 /* Otherwise copying is done via memory. */
3392 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3393 && reg_classes_intersect_p (to, FP_REGS))
3394 || (reg_classes_intersect_p (from, FP_REGS)
3395 && reg_classes_intersect_p (to, GENERAL_REGS)))
3396 return 10;
3398 return 1;
3401 /* Implement TARGET_MEMORY_MOVE_COST. */
3403 static int
3404 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3405 reg_class_t rclass ATTRIBUTE_UNUSED,
3406 bool in ATTRIBUTE_UNUSED)
3408 return 2;
3411 /* Compute a (partial) cost for rtx X. Return true if the complete
3412 cost has been computed, and false if subexpressions should be
3413 scanned. In either case, *TOTAL contains the cost result. The
3414 initial value of *TOTAL is the default value computed by
3415 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3416 code of the superexpression of x. */
3418 static bool
3419 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3420 int opno ATTRIBUTE_UNUSED,
3421 int *total, bool speed ATTRIBUTE_UNUSED)
3423 int code = GET_CODE (x);
3424 switch (code)
3426 case CONST:
3427 case CONST_INT:
3428 case LABEL_REF:
3429 case SYMBOL_REF:
3430 case CONST_DOUBLE:
3431 case CONST_WIDE_INT:
3432 case MEM:
3433 *total = 0;
3434 return true;
3436 case SET:
3438 /* Without this a conditional move instruction would be
3439 accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3440 comparison operator). That's a bit pessimistic. */
3442 if (!TARGET_Z196 || GET_CODE (SET_SRC (x)) != IF_THEN_ELSE)
3443 return false;
3445 rtx cond = XEXP (SET_SRC (x), 0);
3447 if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1)))
3448 return false;
3450 /* It is going to be a load/store on condition. Make it
3451 slightly more expensive than a normal load. */
3452 *total = COSTS_N_INSNS (1) + 1;
3454 rtx dst = SET_DEST (x);
3455 rtx then = XEXP (SET_SRC (x), 1);
3456 rtx els = XEXP (SET_SRC (x), 2);
3458 /* It is a real IF-THEN-ELSE. An additional move will be
3459 needed to implement that. */
3460 if (reload_completed
3461 && !rtx_equal_p (dst, then)
3462 && !rtx_equal_p (dst, els))
3463 *total += COSTS_N_INSNS (1) / 2;
3465 /* A minor penalty for constants we cannot directly handle. */
3466 if ((CONST_INT_P (then) || CONST_INT_P (els))
3467 && (!TARGET_Z13 || MEM_P (dst)
3468 || (CONST_INT_P (then) && !satisfies_constraint_K (then))
3469 || (CONST_INT_P (els) && !satisfies_constraint_K (els))))
3470 *total += COSTS_N_INSNS (1) / 2;
3472 /* A store on condition can only handle register src operands. */
3473 if (MEM_P (dst) && (!REG_P (then) || !REG_P (els)))
3474 *total += COSTS_N_INSNS (1) / 2;
3476 return true;
3478 case IOR:
3479 /* risbg */
3480 if (GET_CODE (XEXP (x, 0)) == AND
3481 && GET_CODE (XEXP (x, 1)) == ASHIFT
3482 && REG_P (XEXP (XEXP (x, 0), 0))
3483 && REG_P (XEXP (XEXP (x, 1), 0))
3484 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3485 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3486 && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3487 (HOST_WIDE_INT_1U << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3489 *total = COSTS_N_INSNS (2);
3490 return true;
3493 /* ~AND on a 128 bit mode. This can be done using a vector
3494 instruction. */
3495 if (TARGET_VXE
3496 && GET_CODE (XEXP (x, 0)) == NOT
3497 && GET_CODE (XEXP (x, 1)) == NOT
3498 && REG_P (XEXP (XEXP (x, 0), 0))
3499 && REG_P (XEXP (XEXP (x, 1), 0))
3500 && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 0), 0))) == 16
3501 && s390_hard_regno_mode_ok (VR0_REGNUM,
3502 GET_MODE (XEXP (XEXP (x, 0), 0))))
3504 *total = COSTS_N_INSNS (1);
3505 return true;
3507 /* fallthrough */
3508 case ASHIFT:
3509 case ASHIFTRT:
3510 case LSHIFTRT:
3511 case ROTATE:
3512 case ROTATERT:
3513 case AND:
3514 case XOR:
3515 case NEG:
3516 case NOT:
3517 *total = COSTS_N_INSNS (1);
3518 return false;
3520 case PLUS:
3521 case MINUS:
3522 *total = COSTS_N_INSNS (1);
3523 return false;
3525 case MULT:
3526 switch (mode)
3528 case E_SImode:
3530 rtx left = XEXP (x, 0);
3531 rtx right = XEXP (x, 1);
3532 if (GET_CODE (right) == CONST_INT
3533 && CONST_OK_FOR_K (INTVAL (right)))
3534 *total = s390_cost->mhi;
3535 else if (GET_CODE (left) == SIGN_EXTEND)
3536 *total = s390_cost->mh;
3537 else
3538 *total = s390_cost->ms; /* msr, ms, msy */
3539 break;
3541 case E_DImode:
3543 rtx left = XEXP (x, 0);
3544 rtx right = XEXP (x, 1);
3545 if (TARGET_ZARCH)
3547 if (GET_CODE (right) == CONST_INT
3548 && CONST_OK_FOR_K (INTVAL (right)))
3549 *total = s390_cost->mghi;
3550 else if (GET_CODE (left) == SIGN_EXTEND)
3551 *total = s390_cost->msgf;
3552 else
3553 *total = s390_cost->msg; /* msgr, msg */
3555 else /* TARGET_31BIT */
3557 if (GET_CODE (left) == SIGN_EXTEND
3558 && GET_CODE (right) == SIGN_EXTEND)
3559 /* mulsidi case: mr, m */
3560 *total = s390_cost->m;
3561 else if (GET_CODE (left) == ZERO_EXTEND
3562 && GET_CODE (right) == ZERO_EXTEND
3563 && TARGET_CPU_ZARCH)
3564 /* umulsidi case: ml, mlr */
3565 *total = s390_cost->ml;
3566 else
3567 /* Complex calculation is required. */
3568 *total = COSTS_N_INSNS (40);
3570 break;
3572 case E_SFmode:
3573 case E_DFmode:
3574 *total = s390_cost->mult_df;
3575 break;
3576 case E_TFmode:
3577 *total = s390_cost->mxbr;
3578 break;
3579 default:
3580 return false;
3582 return false;
3584 case FMA:
3585 switch (mode)
3587 case E_DFmode:
3588 *total = s390_cost->madbr;
3589 break;
3590 case E_SFmode:
3591 *total = s390_cost->maebr;
3592 break;
3593 default:
3594 return false;
3596 /* Negate in the third argument is free: FMSUB. */
3597 if (GET_CODE (XEXP (x, 2)) == NEG)
3599 *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3600 + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3601 + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3602 return true;
3604 return false;
3606 case UDIV:
3607 case UMOD:
3608 if (mode == TImode) /* 128 bit division */
3609 *total = s390_cost->dlgr;
3610 else if (mode == DImode)
3612 rtx right = XEXP (x, 1);
3613 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3614 *total = s390_cost->dlr;
3615 else /* 64 by 64 bit division */
3616 *total = s390_cost->dlgr;
3618 else if (mode == SImode) /* 32 bit division */
3619 *total = s390_cost->dlr;
3620 return false;
3622 case DIV:
3623 case MOD:
3624 if (mode == DImode)
3626 rtx right = XEXP (x, 1);
3627 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3628 if (TARGET_ZARCH)
3629 *total = s390_cost->dsgfr;
3630 else
3631 *total = s390_cost->dr;
3632 else /* 64 by 64 bit division */
3633 *total = s390_cost->dsgr;
3635 else if (mode == SImode) /* 32 bit division */
3636 *total = s390_cost->dlr;
3637 else if (mode == SFmode)
3639 *total = s390_cost->debr;
3641 else if (mode == DFmode)
3643 *total = s390_cost->ddbr;
3645 else if (mode == TFmode)
3647 *total = s390_cost->dxbr;
3649 return false;
3651 case SQRT:
3652 if (mode == SFmode)
3653 *total = s390_cost->sqebr;
3654 else if (mode == DFmode)
3655 *total = s390_cost->sqdbr;
3656 else /* TFmode */
3657 *total = s390_cost->sqxbr;
3658 return false;
3660 case SIGN_EXTEND:
3661 case ZERO_EXTEND:
3662 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3663 || outer_code == PLUS || outer_code == MINUS
3664 || outer_code == COMPARE)
3665 *total = 0;
3666 return false;
3668 case COMPARE:
3669 *total = COSTS_N_INSNS (1);
3670 if (GET_CODE (XEXP (x, 0)) == AND
3671 && GET_CODE (XEXP (x, 1)) == CONST_INT
3672 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3674 rtx op0 = XEXP (XEXP (x, 0), 0);
3675 rtx op1 = XEXP (XEXP (x, 0), 1);
3676 rtx op2 = XEXP (x, 1);
3678 if (memory_operand (op0, GET_MODE (op0))
3679 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3680 return true;
3681 if (register_operand (op0, GET_MODE (op0))
3682 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3683 return true;
3685 return false;
3687 default:
3688 return false;
3692 /* Return the cost of an address rtx ADDR. */
3694 static int
3695 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3696 addr_space_t as ATTRIBUTE_UNUSED,
3697 bool speed ATTRIBUTE_UNUSED)
3699 struct s390_address ad;
3700 if (!s390_decompose_address (addr, &ad))
3701 return 1000;
3703 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3706 /* Implement targetm.vectorize.builtin_vectorization_cost. */
3707 static int
3708 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3709 tree vectype,
3710 int misalign ATTRIBUTE_UNUSED)
3712 switch (type_of_cost)
3714 case scalar_stmt:
3715 case scalar_load:
3716 case scalar_store:
3717 case vector_stmt:
3718 case vector_load:
3719 case vector_store:
3720 case vec_to_scalar:
3721 case scalar_to_vec:
3722 case cond_branch_not_taken:
3723 case vec_perm:
3724 case vec_promote_demote:
3725 case unaligned_load:
3726 case unaligned_store:
3727 return 1;
3729 case cond_branch_taken:
3730 return 3;
3732 case vec_construct:
3733 return TYPE_VECTOR_SUBPARTS (vectype) - 1;
3735 default:
3736 gcc_unreachable ();
3740 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3741 otherwise return 0. */
3744 tls_symbolic_operand (rtx op)
3746 if (GET_CODE (op) != SYMBOL_REF)
3747 return 0;
3748 return SYMBOL_REF_TLS_MODEL (op);
3751 /* Split DImode access register reference REG (on 64-bit) into its constituent
3752 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3753 gen_highpart cannot be used as they assume all registers are word-sized,
3754 while our access registers have only half that size. */
3756 void
3757 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3759 gcc_assert (TARGET_64BIT);
3760 gcc_assert (ACCESS_REG_P (reg));
3761 gcc_assert (GET_MODE (reg) == DImode);
3762 gcc_assert (!(REGNO (reg) & 1));
3764 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3765 *hi = gen_rtx_REG (SImode, REGNO (reg));
3768 /* Return true if OP contains a symbol reference */
3770 bool
3771 symbolic_reference_mentioned_p (rtx op)
3773 const char *fmt;
3774 int i;
3776 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3777 return 1;
3779 fmt = GET_RTX_FORMAT (GET_CODE (op));
3780 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3782 if (fmt[i] == 'E')
3784 int j;
3786 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3787 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3788 return 1;
3791 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3792 return 1;
3795 return 0;
3798 /* Return true if OP contains a reference to a thread-local symbol. */
3800 bool
3801 tls_symbolic_reference_mentioned_p (rtx op)
3803 const char *fmt;
3804 int i;
3806 if (GET_CODE (op) == SYMBOL_REF)
3807 return tls_symbolic_operand (op);
3809 fmt = GET_RTX_FORMAT (GET_CODE (op));
3810 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3812 if (fmt[i] == 'E')
3814 int j;
3816 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3817 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3818 return true;
3821 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3822 return true;
3825 return false;
3829 /* Return true if OP is a legitimate general operand when
3830 generating PIC code. It is given that flag_pic is on
3831 and that OP satisfies CONSTANT_P. */
3834 legitimate_pic_operand_p (rtx op)
3836 /* Accept all non-symbolic constants. */
3837 if (!SYMBOLIC_CONST (op))
3838 return 1;
3840 /* Reject everything else; must be handled
3841 via emit_symbolic_move. */
3842 return 0;
3845 /* Returns true if the constant value OP is a legitimate general operand.
3846 It is given that OP satisfies CONSTANT_P. */
3848 static bool
3849 s390_legitimate_constant_p (machine_mode mode, rtx op)
3851 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3853 if (GET_MODE_SIZE (mode) != 16)
3854 return 0;
3856 if (!satisfies_constraint_j00 (op)
3857 && !satisfies_constraint_jm1 (op)
3858 && !satisfies_constraint_jKK (op)
3859 && !satisfies_constraint_jxx (op)
3860 && !satisfies_constraint_jyy (op))
3861 return 0;
3864 /* Accept all non-symbolic constants. */
3865 if (!SYMBOLIC_CONST (op))
3866 return 1;
3868 /* Accept immediate LARL operands. */
3869 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
3870 return 1;
3872 /* Thread-local symbols are never legal constants. This is
3873 so that emit_call knows that computing such addresses
3874 might require a function call. */
3875 if (TLS_SYMBOLIC_CONST (op))
3876 return 0;
3878 /* In the PIC case, symbolic constants must *not* be
3879 forced into the literal pool. We accept them here,
3880 so that they will be handled by emit_symbolic_move. */
3881 if (flag_pic)
3882 return 1;
3884 /* All remaining non-PIC symbolic constants are
3885 forced into the literal pool. */
3886 return 0;
3889 /* Determine if it's legal to put X into the constant pool. This
3890 is not possible if X contains the address of a symbol that is
3891 not constant (TLS) or not known at final link time (PIC). */
3893 static bool
3894 s390_cannot_force_const_mem (machine_mode mode, rtx x)
3896 switch (GET_CODE (x))
3898 case CONST_INT:
3899 case CONST_DOUBLE:
3900 case CONST_WIDE_INT:
3901 case CONST_VECTOR:
3902 /* Accept all non-symbolic constants. */
3903 return false;
3905 case LABEL_REF:
3906 /* Labels are OK iff we are non-PIC. */
3907 return flag_pic != 0;
3909 case SYMBOL_REF:
3910 /* 'Naked' TLS symbol references are never OK,
3911 non-TLS symbols are OK iff we are non-PIC. */
3912 if (tls_symbolic_operand (x))
3913 return true;
3914 else
3915 return flag_pic != 0;
3917 case CONST:
3918 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
3919 case PLUS:
3920 case MINUS:
3921 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
3922 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
3924 case UNSPEC:
3925 switch (XINT (x, 1))
3927 /* Only lt-relative or GOT-relative UNSPECs are OK. */
3928 case UNSPEC_LTREL_OFFSET:
3929 case UNSPEC_GOT:
3930 case UNSPEC_GOTOFF:
3931 case UNSPEC_PLTOFF:
3932 case UNSPEC_TLSGD:
3933 case UNSPEC_TLSLDM:
3934 case UNSPEC_NTPOFF:
3935 case UNSPEC_DTPOFF:
3936 case UNSPEC_GOTNTPOFF:
3937 case UNSPEC_INDNTPOFF:
3938 return false;
3940 /* If the literal pool shares the code section, be put
3941 execute template placeholders into the pool as well. */
3942 case UNSPEC_INSN:
3943 return TARGET_CPU_ZARCH;
3945 default:
3946 return true;
3948 break;
3950 default:
3951 gcc_unreachable ();
3955 /* Returns true if the constant value OP is a legitimate general
3956 operand during and after reload. The difference to
3957 legitimate_constant_p is that this function will not accept
3958 a constant that would need to be forced to the literal pool
3959 before it can be used as operand.
3960 This function accepts all constants which can be loaded directly
3961 into a GPR. */
3963 bool
3964 legitimate_reload_constant_p (rtx op)
3966 /* Accept la(y) operands. */
3967 if (GET_CODE (op) == CONST_INT
3968 && DISP_IN_RANGE (INTVAL (op)))
3969 return true;
3971 /* Accept l(g)hi/l(g)fi operands. */
3972 if (GET_CODE (op) == CONST_INT
3973 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
3974 return true;
3976 /* Accept lliXX operands. */
3977 if (TARGET_ZARCH
3978 && GET_CODE (op) == CONST_INT
3979 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3980 && s390_single_part (op, word_mode, HImode, 0) >= 0)
3981 return true;
3983 if (TARGET_EXTIMM
3984 && GET_CODE (op) == CONST_INT
3985 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3986 && s390_single_part (op, word_mode, SImode, 0) >= 0)
3987 return true;
3989 /* Accept larl operands. */
3990 if (TARGET_CPU_ZARCH
3991 && larl_operand (op, VOIDmode))
3992 return true;
3994 /* Accept floating-point zero operands that fit into a single GPR. */
3995 if (GET_CODE (op) == CONST_DOUBLE
3996 && s390_float_const_zero_p (op)
3997 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
3998 return true;
4000 /* Accept double-word operands that can be split. */
4001 if (GET_CODE (op) == CONST_WIDE_INT
4002 || (GET_CODE (op) == CONST_INT
4003 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
4005 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
4006 rtx hi = operand_subword (op, 0, 0, dword_mode);
4007 rtx lo = operand_subword (op, 1, 0, dword_mode);
4008 return legitimate_reload_constant_p (hi)
4009 && legitimate_reload_constant_p (lo);
4012 /* Everything else cannot be handled without reload. */
4013 return false;
4016 /* Returns true if the constant value OP is a legitimate fp operand
4017 during and after reload.
4018 This function accepts all constants which can be loaded directly
4019 into an FPR. */
4021 static bool
4022 legitimate_reload_fp_constant_p (rtx op)
4024 /* Accept floating-point zero operands if the load zero instruction
4025 can be used. Prior to z196 the load fp zero instruction caused a
4026 performance penalty if the result is used as BFP number. */
4027 if (TARGET_Z196
4028 && GET_CODE (op) == CONST_DOUBLE
4029 && s390_float_const_zero_p (op))
4030 return true;
4032 return false;
4035 /* Returns true if the constant value OP is a legitimate vector operand
4036 during and after reload.
4037 This function accepts all constants which can be loaded directly
4038 into an VR. */
4040 static bool
4041 legitimate_reload_vector_constant_p (rtx op)
4043 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
4044 && (satisfies_constraint_j00 (op)
4045 || satisfies_constraint_jm1 (op)
4046 || satisfies_constraint_jKK (op)
4047 || satisfies_constraint_jxx (op)
4048 || satisfies_constraint_jyy (op)))
4049 return true;
4051 return false;
4054 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4055 return the class of reg to actually use. */
4057 static reg_class_t
4058 s390_preferred_reload_class (rtx op, reg_class_t rclass)
4060 switch (GET_CODE (op))
4062 /* Constants we cannot reload into general registers
4063 must be forced into the literal pool. */
4064 case CONST_VECTOR:
4065 case CONST_DOUBLE:
4066 case CONST_INT:
4067 case CONST_WIDE_INT:
4068 if (reg_class_subset_p (GENERAL_REGS, rclass)
4069 && legitimate_reload_constant_p (op))
4070 return GENERAL_REGS;
4071 else if (reg_class_subset_p (ADDR_REGS, rclass)
4072 && legitimate_reload_constant_p (op))
4073 return ADDR_REGS;
4074 else if (reg_class_subset_p (FP_REGS, rclass)
4075 && legitimate_reload_fp_constant_p (op))
4076 return FP_REGS;
4077 else if (reg_class_subset_p (VEC_REGS, rclass)
4078 && legitimate_reload_vector_constant_p (op))
4079 return VEC_REGS;
4081 return NO_REGS;
4083 /* If a symbolic constant or a PLUS is reloaded,
4084 it is most likely being used as an address, so
4085 prefer ADDR_REGS. If 'class' is not a superset
4086 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4087 case CONST:
4088 /* Symrefs cannot be pushed into the literal pool with -fPIC
4089 so we *MUST NOT* return NO_REGS for these cases
4090 (s390_cannot_force_const_mem will return true).
4092 On the other hand we MUST return NO_REGS for symrefs with
4093 invalid addend which might have been pushed to the literal
4094 pool (no -fPIC). Usually we would expect them to be
4095 handled via secondary reload but this does not happen if
4096 they are used as literal pool slot replacement in reload
4097 inheritance (see emit_input_reload_insns). */
4098 if (TARGET_CPU_ZARCH
4099 && GET_CODE (XEXP (op, 0)) == PLUS
4100 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4101 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4103 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4104 return ADDR_REGS;
4105 else
4106 return NO_REGS;
4108 /* fallthrough */
4109 case LABEL_REF:
4110 case SYMBOL_REF:
4111 if (!legitimate_reload_constant_p (op))
4112 return NO_REGS;
4113 /* fallthrough */
4114 case PLUS:
4115 /* load address will be used. */
4116 if (reg_class_subset_p (ADDR_REGS, rclass))
4117 return ADDR_REGS;
4118 else
4119 return NO_REGS;
4121 default:
4122 break;
4125 return rclass;
4128 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4129 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4130 aligned. */
4132 bool
4133 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4135 HOST_WIDE_INT addend;
4136 rtx symref;
4138 /* The "required alignment" might be 0 (e.g. for certain structs
4139 accessed via BLKmode). Early abort in this case, as well as when
4140 an alignment > 8 is required. */
4141 if (alignment < 2 || alignment > 8)
4142 return false;
4144 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4145 return false;
4147 if (addend & (alignment - 1))
4148 return false;
4150 if (GET_CODE (symref) == SYMBOL_REF)
4152 /* We have load-relative instructions for 2-byte, 4-byte, and
4153 8-byte alignment so allow only these. */
4154 switch (alignment)
4156 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4157 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4158 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4159 default: return false;
4163 if (GET_CODE (symref) == UNSPEC
4164 && alignment <= UNITS_PER_LONG)
4165 return true;
4167 return false;
4170 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4171 operand SCRATCH is used to reload the even part of the address and
4172 adding one. */
4174 void
4175 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4177 HOST_WIDE_INT addend;
4178 rtx symref;
4180 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4181 gcc_unreachable ();
4183 if (!(addend & 1))
4184 /* Easy case. The addend is even so larl will do fine. */
4185 emit_move_insn (reg, addr);
4186 else
4188 /* We can leave the scratch register untouched if the target
4189 register is a valid base register. */
4190 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4191 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4192 scratch = reg;
4194 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4195 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4197 if (addend != 1)
4198 emit_move_insn (scratch,
4199 gen_rtx_CONST (Pmode,
4200 gen_rtx_PLUS (Pmode, symref,
4201 GEN_INT (addend - 1))));
4202 else
4203 emit_move_insn (scratch, symref);
4205 /* Increment the address using la in order to avoid clobbering cc. */
4206 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4210 /* Generate what is necessary to move between REG and MEM using
4211 SCRATCH. The direction is given by TOMEM. */
4213 void
4214 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4216 /* Reload might have pulled a constant out of the literal pool.
4217 Force it back in. */
4218 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4219 || GET_CODE (mem) == CONST_WIDE_INT
4220 || GET_CODE (mem) == CONST_VECTOR
4221 || GET_CODE (mem) == CONST)
4222 mem = force_const_mem (GET_MODE (reg), mem);
4224 gcc_assert (MEM_P (mem));
4226 /* For a load from memory we can leave the scratch register
4227 untouched if the target register is a valid base register. */
4228 if (!tomem
4229 && REGNO (reg) < FIRST_PSEUDO_REGISTER
4230 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4231 && GET_MODE (reg) == GET_MODE (scratch))
4232 scratch = reg;
4234 /* Load address into scratch register. Since we can't have a
4235 secondary reload for a secondary reload we have to cover the case
4236 where larl would need a secondary reload here as well. */
4237 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4239 /* Now we can use a standard load/store to do the move. */
4240 if (tomem)
4241 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4242 else
4243 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4246 /* Inform reload about cases where moving X with a mode MODE to a register in
4247 RCLASS requires an extra scratch or immediate register. Return the class
4248 needed for the immediate register. */
4250 static reg_class_t
4251 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4252 machine_mode mode, secondary_reload_info *sri)
4254 enum reg_class rclass = (enum reg_class) rclass_i;
4256 /* Intermediate register needed. */
4257 if (reg_classes_intersect_p (CC_REGS, rclass))
4258 return GENERAL_REGS;
4260 if (TARGET_VX)
4262 /* The vst/vl vector move instructions allow only for short
4263 displacements. */
4264 if (MEM_P (x)
4265 && GET_CODE (XEXP (x, 0)) == PLUS
4266 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4267 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4268 && reg_class_subset_p (rclass, VEC_REGS)
4269 && (!reg_class_subset_p (rclass, FP_REGS)
4270 || (GET_MODE_SIZE (mode) > 8
4271 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4273 if (in_p)
4274 sri->icode = (TARGET_64BIT ?
4275 CODE_FOR_reloaddi_la_in :
4276 CODE_FOR_reloadsi_la_in);
4277 else
4278 sri->icode = (TARGET_64BIT ?
4279 CODE_FOR_reloaddi_la_out :
4280 CODE_FOR_reloadsi_la_out);
4284 if (TARGET_Z10)
4286 HOST_WIDE_INT offset;
4287 rtx symref;
4289 /* On z10 several optimizer steps may generate larl operands with
4290 an odd addend. */
4291 if (in_p
4292 && s390_loadrelative_operand_p (x, &symref, &offset)
4293 && mode == Pmode
4294 && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4295 && (offset & 1) == 1)
4296 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4297 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4299 /* Handle all the (mem (symref)) accesses we cannot use the z10
4300 instructions for. */
4301 if (MEM_P (x)
4302 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4303 && (mode == QImode
4304 || !reg_class_subset_p (rclass, GENERAL_REGS)
4305 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4306 || !s390_check_symref_alignment (XEXP (x, 0),
4307 GET_MODE_SIZE (mode))))
4309 #define __SECONDARY_RELOAD_CASE(M,m) \
4310 case E_##M##mode: \
4311 if (TARGET_64BIT) \
4312 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4313 CODE_FOR_reload##m##di_tomem_z10; \
4314 else \
4315 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4316 CODE_FOR_reload##m##si_tomem_z10; \
4317 break;
4319 switch (GET_MODE (x))
4321 __SECONDARY_RELOAD_CASE (QI, qi);
4322 __SECONDARY_RELOAD_CASE (HI, hi);
4323 __SECONDARY_RELOAD_CASE (SI, si);
4324 __SECONDARY_RELOAD_CASE (DI, di);
4325 __SECONDARY_RELOAD_CASE (TI, ti);
4326 __SECONDARY_RELOAD_CASE (SF, sf);
4327 __SECONDARY_RELOAD_CASE (DF, df);
4328 __SECONDARY_RELOAD_CASE (TF, tf);
4329 __SECONDARY_RELOAD_CASE (SD, sd);
4330 __SECONDARY_RELOAD_CASE (DD, dd);
4331 __SECONDARY_RELOAD_CASE (TD, td);
4332 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4333 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4334 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4335 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4336 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4337 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4338 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4339 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4340 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4341 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4342 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4343 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4344 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4345 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4346 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4347 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4348 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4349 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4350 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4351 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4352 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4353 default:
4354 gcc_unreachable ();
4356 #undef __SECONDARY_RELOAD_CASE
4360 /* We need a scratch register when loading a PLUS expression which
4361 is not a legitimate operand of the LOAD ADDRESS instruction. */
4362 /* LRA can deal with transformation of plus op very well -- so we
4363 don't need to prompt LRA in this case. */
4364 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4365 sri->icode = (TARGET_64BIT ?
4366 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4368 /* Performing a multiword move from or to memory we have to make sure the
4369 second chunk in memory is addressable without causing a displacement
4370 overflow. If that would be the case we calculate the address in
4371 a scratch register. */
4372 if (MEM_P (x)
4373 && GET_CODE (XEXP (x, 0)) == PLUS
4374 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4375 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4376 + GET_MODE_SIZE (mode) - 1))
4378 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4379 in a s_operand address since we may fallback to lm/stm. So we only
4380 have to care about overflows in the b+i+d case. */
4381 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4382 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4383 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4384 /* For FP_REGS no lm/stm is available so this check is triggered
4385 for displacement overflows in b+i+d and b+d like addresses. */
4386 || (reg_classes_intersect_p (FP_REGS, rclass)
4387 && s390_class_max_nregs (FP_REGS, mode) > 1))
4389 if (in_p)
4390 sri->icode = (TARGET_64BIT ?
4391 CODE_FOR_reloaddi_la_in :
4392 CODE_FOR_reloadsi_la_in);
4393 else
4394 sri->icode = (TARGET_64BIT ?
4395 CODE_FOR_reloaddi_la_out :
4396 CODE_FOR_reloadsi_la_out);
4400 /* A scratch address register is needed when a symbolic constant is
4401 copied to r0 compiling with -fPIC. In other cases the target
4402 register might be used as temporary (see legitimize_pic_address). */
4403 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4404 sri->icode = (TARGET_64BIT ?
4405 CODE_FOR_reloaddi_PIC_addr :
4406 CODE_FOR_reloadsi_PIC_addr);
4408 /* Either scratch or no register needed. */
4409 return NO_REGS;
4412 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
4414 We need secondary memory to move data between GPRs and FPRs.
4416 - With DFP the ldgr lgdr instructions are available. Due to the
4417 different alignment we cannot use them for SFmode. For 31 bit a
4418 64 bit value in GPR would be a register pair so here we still
4419 need to go via memory.
4421 - With z13 we can do the SF/SImode moves with vlgvf. Due to the
4422 overlapping of FPRs and VRs we still disallow TF/TD modes to be
4423 in full VRs so as before also on z13 we do these moves via
4424 memory.
4426 FIXME: Should we try splitting it into two vlgvg's/vlvg's instead? */
4428 static bool
4429 s390_secondary_memory_needed (machine_mode mode,
4430 reg_class_t class1, reg_class_t class2)
4432 return (((reg_classes_intersect_p (class1, VEC_REGS)
4433 && reg_classes_intersect_p (class2, GENERAL_REGS))
4434 || (reg_classes_intersect_p (class1, GENERAL_REGS)
4435 && reg_classes_intersect_p (class2, VEC_REGS)))
4436 && (!TARGET_DFP || !TARGET_64BIT || GET_MODE_SIZE (mode) != 8)
4437 && (!TARGET_VX || (SCALAR_FLOAT_MODE_P (mode)
4438 && GET_MODE_SIZE (mode) > 8)));
4441 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
4443 get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
4444 because the movsi and movsf patterns don't handle r/f moves. */
4446 static machine_mode
4447 s390_secondary_memory_needed_mode (machine_mode mode)
4449 if (GET_MODE_BITSIZE (mode) < 32)
4450 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
4451 return mode;
4454 /* Generate code to load SRC, which is PLUS that is not a
4455 legitimate operand for the LA instruction, into TARGET.
4456 SCRATCH may be used as scratch register. */
4458 void
4459 s390_expand_plus_operand (rtx target, rtx src,
4460 rtx scratch)
4462 rtx sum1, sum2;
4463 struct s390_address ad;
4465 /* src must be a PLUS; get its two operands. */
4466 gcc_assert (GET_CODE (src) == PLUS);
4467 gcc_assert (GET_MODE (src) == Pmode);
4469 /* Check if any of the two operands is already scheduled
4470 for replacement by reload. This can happen e.g. when
4471 float registers occur in an address. */
4472 sum1 = find_replacement (&XEXP (src, 0));
4473 sum2 = find_replacement (&XEXP (src, 1));
4474 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4476 /* If the address is already strictly valid, there's nothing to do. */
4477 if (!s390_decompose_address (src, &ad)
4478 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4479 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4481 /* Otherwise, one of the operands cannot be an address register;
4482 we reload its value into the scratch register. */
4483 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4485 emit_move_insn (scratch, sum1);
4486 sum1 = scratch;
4488 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4490 emit_move_insn (scratch, sum2);
4491 sum2 = scratch;
4494 /* According to the way these invalid addresses are generated
4495 in reload.c, it should never happen (at least on s390) that
4496 *neither* of the PLUS components, after find_replacements
4497 was applied, is an address register. */
4498 if (sum1 == scratch && sum2 == scratch)
4500 debug_rtx (src);
4501 gcc_unreachable ();
4504 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4507 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4508 is only ever performed on addresses, so we can mark the
4509 sum as legitimate for LA in any case. */
4510 s390_load_address (target, src);
4514 /* Return true if ADDR is a valid memory address.
4515 STRICT specifies whether strict register checking applies. */
4517 static bool
4518 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4520 struct s390_address ad;
4522 if (TARGET_Z10
4523 && larl_operand (addr, VOIDmode)
4524 && (mode == VOIDmode
4525 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4526 return true;
4528 if (!s390_decompose_address (addr, &ad))
4529 return false;
4531 if (strict)
4533 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4534 return false;
4536 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4537 return false;
4539 else
4541 if (ad.base
4542 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4543 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4544 return false;
4546 if (ad.indx
4547 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4548 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4549 return false;
4551 return true;
4554 /* Return true if OP is a valid operand for the LA instruction.
4555 In 31-bit, we need to prove that the result is used as an
4556 address, as LA performs only a 31-bit addition. */
4558 bool
4559 legitimate_la_operand_p (rtx op)
4561 struct s390_address addr;
4562 if (!s390_decompose_address (op, &addr))
4563 return false;
4565 return (TARGET_64BIT || addr.pointer);
4568 /* Return true if it is valid *and* preferable to use LA to
4569 compute the sum of OP1 and OP2. */
4571 bool
4572 preferred_la_operand_p (rtx op1, rtx op2)
4574 struct s390_address addr;
4576 if (op2 != const0_rtx)
4577 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4579 if (!s390_decompose_address (op1, &addr))
4580 return false;
4581 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4582 return false;
4583 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4584 return false;
4586 /* Avoid LA instructions with index register on z196; it is
4587 preferable to use regular add instructions when possible.
4588 Starting with zEC12 the la with index register is "uncracked"
4589 again. */
4590 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
4591 return false;
4593 if (!TARGET_64BIT && !addr.pointer)
4594 return false;
4596 if (addr.pointer)
4597 return true;
4599 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4600 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4601 return true;
4603 return false;
4606 /* Emit a forced load-address operation to load SRC into DST.
4607 This will use the LOAD ADDRESS instruction even in situations
4608 where legitimate_la_operand_p (SRC) returns false. */
4610 void
4611 s390_load_address (rtx dst, rtx src)
4613 if (TARGET_64BIT)
4614 emit_move_insn (dst, src);
4615 else
4616 emit_insn (gen_force_la_31 (dst, src));
4619 /* Return true if it ok to use SYMBOL_REF in a relative address. */
4621 bool
4622 s390_rel_address_ok_p (rtx symbol_ref)
4624 tree decl;
4626 if (symbol_ref == s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref))
4627 return true;
4629 decl = SYMBOL_REF_DECL (symbol_ref);
4631 if (!flag_pic || SYMBOL_REF_LOCAL_P (symbol_ref))
4632 return (s390_pic_data_is_text_relative
4633 || (decl
4634 && TREE_CODE (decl) == FUNCTION_DECL));
4636 return false;
4639 /* Return a legitimate reference for ORIG (an address) using the
4640 register REG. If REG is 0, a new pseudo is generated.
4642 There are two types of references that must be handled:
4644 1. Global data references must load the address from the GOT, via
4645 the PIC reg. An insn is emitted to do this load, and the reg is
4646 returned.
4648 2. Static data references, constant pool addresses, and code labels
4649 compute the address as an offset from the GOT, whose base is in
4650 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4651 differentiate them from global data objects. The returned
4652 address is the PIC reg + an unspec constant.
4654 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4655 reg also appears in the address. */
4658 legitimize_pic_address (rtx orig, rtx reg)
4660 rtx addr = orig;
4661 rtx addend = const0_rtx;
4662 rtx new_rtx = orig;
4664 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4666 if (GET_CODE (addr) == CONST)
4667 addr = XEXP (addr, 0);
4669 if (GET_CODE (addr) == PLUS)
4671 addend = XEXP (addr, 1);
4672 addr = XEXP (addr, 0);
4675 if ((GET_CODE (addr) == LABEL_REF
4676 || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (addr))
4677 || (GET_CODE (addr) == UNSPEC &&
4678 (XINT (addr, 1) == UNSPEC_GOTENT
4679 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
4680 && GET_CODE (addend) == CONST_INT)
4682 /* This can be locally addressed. */
4684 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4685 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4686 gen_rtx_CONST (Pmode, addr) : addr);
4688 if (TARGET_CPU_ZARCH
4689 && larl_operand (const_addr, VOIDmode)
4690 && INTVAL (addend) < HOST_WIDE_INT_1 << 31
4691 && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31))
4693 if (INTVAL (addend) & 1)
4695 /* LARL can't handle odd offsets, so emit a pair of LARL
4696 and LA. */
4697 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4699 if (!DISP_IN_RANGE (INTVAL (addend)))
4701 HOST_WIDE_INT even = INTVAL (addend) - 1;
4702 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4703 addr = gen_rtx_CONST (Pmode, addr);
4704 addend = const1_rtx;
4707 emit_move_insn (temp, addr);
4708 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4710 if (reg != 0)
4712 s390_load_address (reg, new_rtx);
4713 new_rtx = reg;
4716 else
4718 /* If the offset is even, we can just use LARL. This
4719 will happen automatically. */
4722 else
4724 /* No larl - Access local symbols relative to the GOT. */
4726 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4728 if (reload_in_progress || reload_completed)
4729 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4731 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4732 if (addend != const0_rtx)
4733 addr = gen_rtx_PLUS (Pmode, addr, addend);
4734 addr = gen_rtx_CONST (Pmode, addr);
4735 addr = force_const_mem (Pmode, addr);
4736 emit_move_insn (temp, addr);
4738 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4739 if (reg != 0)
4741 s390_load_address (reg, new_rtx);
4742 new_rtx = reg;
4746 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4748 /* A non-local symbol reference without addend.
4750 The symbol ref is wrapped into an UNSPEC to make sure the
4751 proper operand modifier (@GOT or @GOTENT) will be emitted.
4752 This will tell the linker to put the symbol into the GOT.
4754 Additionally the code dereferencing the GOT slot is emitted here.
4756 An addend to the symref needs to be added afterwards.
4757 legitimize_pic_address calls itself recursively to handle
4758 that case. So no need to do it here. */
4760 if (reg == 0)
4761 reg = gen_reg_rtx (Pmode);
4763 if (TARGET_Z10)
4765 /* Use load relative if possible.
4766 lgrl <target>, sym@GOTENT */
4767 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4768 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4769 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4771 emit_move_insn (reg, new_rtx);
4772 new_rtx = reg;
4774 else if (flag_pic == 1)
4776 /* Assume GOT offset is a valid displacement operand (< 4k
4777 or < 512k with z990). This is handled the same way in
4778 both 31- and 64-bit code (@GOT).
4779 lg <target>, sym@GOT(r12) */
4781 if (reload_in_progress || reload_completed)
4782 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4784 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4785 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4786 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4787 new_rtx = gen_const_mem (Pmode, new_rtx);
4788 emit_move_insn (reg, new_rtx);
4789 new_rtx = reg;
4791 else if (TARGET_CPU_ZARCH)
4793 /* If the GOT offset might be >= 4k, we determine the position
4794 of the GOT entry via a PC-relative LARL (@GOTENT).
4795 larl temp, sym@GOTENT
4796 lg <target>, 0(temp) */
4798 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4800 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4801 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4803 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4804 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4805 emit_move_insn (temp, new_rtx);
4807 new_rtx = gen_const_mem (Pmode, temp);
4808 emit_move_insn (reg, new_rtx);
4810 new_rtx = reg;
4812 else
4814 /* If the GOT offset might be >= 4k, we have to load it
4815 from the literal pool (@GOT).
4817 lg temp, lit-litbase(r13)
4818 lg <target>, 0(temp)
4819 lit: .long sym@GOT */
4821 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4823 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4824 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4826 if (reload_in_progress || reload_completed)
4827 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4829 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4830 addr = gen_rtx_CONST (Pmode, addr);
4831 addr = force_const_mem (Pmode, addr);
4832 emit_move_insn (temp, addr);
4834 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4835 new_rtx = gen_const_mem (Pmode, new_rtx);
4836 emit_move_insn (reg, new_rtx);
4837 new_rtx = reg;
4840 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
4842 gcc_assert (XVECLEN (addr, 0) == 1);
4843 switch (XINT (addr, 1))
4845 /* These address symbols (or PLT slots) relative to the GOT
4846 (not GOT slots!). In general this will exceed the
4847 displacement range so these value belong into the literal
4848 pool. */
4849 case UNSPEC_GOTOFF:
4850 case UNSPEC_PLTOFF:
4851 new_rtx = force_const_mem (Pmode, orig);
4852 break;
4854 /* For -fPIC the GOT size might exceed the displacement
4855 range so make sure the value is in the literal pool. */
4856 case UNSPEC_GOT:
4857 if (flag_pic == 2)
4858 new_rtx = force_const_mem (Pmode, orig);
4859 break;
4861 /* For @GOTENT larl is used. This is handled like local
4862 symbol refs. */
4863 case UNSPEC_GOTENT:
4864 gcc_unreachable ();
4865 break;
4867 /* @PLT is OK as is on 64-bit, must be converted to
4868 GOT-relative @PLTOFF on 31-bit. */
4869 case UNSPEC_PLT:
4870 if (!TARGET_CPU_ZARCH)
4872 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4874 if (reload_in_progress || reload_completed)
4875 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4877 addr = XVECEXP (addr, 0, 0);
4878 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
4879 UNSPEC_PLTOFF);
4880 if (addend != const0_rtx)
4881 addr = gen_rtx_PLUS (Pmode, addr, addend);
4882 addr = gen_rtx_CONST (Pmode, addr);
4883 addr = force_const_mem (Pmode, addr);
4884 emit_move_insn (temp, addr);
4886 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4887 if (reg != 0)
4889 s390_load_address (reg, new_rtx);
4890 new_rtx = reg;
4893 else
4894 /* On 64 bit larl can be used. This case is handled like
4895 local symbol refs. */
4896 gcc_unreachable ();
4897 break;
4899 /* Everything else cannot happen. */
4900 default:
4901 gcc_unreachable ();
4904 else if (addend != const0_rtx)
4906 /* Otherwise, compute the sum. */
4908 rtx base = legitimize_pic_address (addr, reg);
4909 new_rtx = legitimize_pic_address (addend,
4910 base == reg ? NULL_RTX : reg);
4911 if (GET_CODE (new_rtx) == CONST_INT)
4912 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
4913 else
4915 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
4917 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
4918 new_rtx = XEXP (new_rtx, 1);
4920 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
4923 if (GET_CODE (new_rtx) == CONST)
4924 new_rtx = XEXP (new_rtx, 0);
4925 new_rtx = force_operand (new_rtx, 0);
4928 return new_rtx;
4931 /* Load the thread pointer into a register. */
4934 s390_get_thread_pointer (void)
4936 rtx tp = gen_reg_rtx (Pmode);
4938 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
4939 mark_reg_pointer (tp, BITS_PER_WORD);
4941 return tp;
4944 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
4945 in s390_tls_symbol which always refers to __tls_get_offset.
4946 The returned offset is written to RESULT_REG and an USE rtx is
4947 generated for TLS_CALL. */
4949 static GTY(()) rtx s390_tls_symbol;
4951 static void
4952 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
4954 rtx insn;
4956 if (!flag_pic)
4957 emit_insn (s390_load_got ());
4959 if (!s390_tls_symbol)
4960 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
4962 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
4963 gen_rtx_REG (Pmode, RETURN_REGNUM));
4965 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
4966 RTL_CONST_CALL_P (insn) = 1;
4969 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4970 this (thread-local) address. REG may be used as temporary. */
4972 static rtx
4973 legitimize_tls_address (rtx addr, rtx reg)
4975 rtx new_rtx, tls_call, temp, base, r2;
4976 rtx_insn *insn;
4978 if (GET_CODE (addr) == SYMBOL_REF)
4979 switch (tls_symbolic_operand (addr))
4981 case TLS_MODEL_GLOBAL_DYNAMIC:
4982 start_sequence ();
4983 r2 = gen_rtx_REG (Pmode, 2);
4984 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
4985 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4986 new_rtx = force_const_mem (Pmode, new_rtx);
4987 emit_move_insn (r2, new_rtx);
4988 s390_emit_tls_call_insn (r2, tls_call);
4989 insn = get_insns ();
4990 end_sequence ();
4992 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4993 temp = gen_reg_rtx (Pmode);
4994 emit_libcall_block (insn, temp, r2, new_rtx);
4996 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4997 if (reg != 0)
4999 s390_load_address (reg, new_rtx);
5000 new_rtx = reg;
5002 break;
5004 case TLS_MODEL_LOCAL_DYNAMIC:
5005 start_sequence ();
5006 r2 = gen_rtx_REG (Pmode, 2);
5007 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
5008 new_rtx = gen_rtx_CONST (Pmode, tls_call);
5009 new_rtx = force_const_mem (Pmode, new_rtx);
5010 emit_move_insn (r2, new_rtx);
5011 s390_emit_tls_call_insn (r2, tls_call);
5012 insn = get_insns ();
5013 end_sequence ();
5015 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
5016 temp = gen_reg_rtx (Pmode);
5017 emit_libcall_block (insn, temp, r2, new_rtx);
5019 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5020 base = gen_reg_rtx (Pmode);
5021 s390_load_address (base, new_rtx);
5023 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
5024 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5025 new_rtx = force_const_mem (Pmode, new_rtx);
5026 temp = gen_reg_rtx (Pmode);
5027 emit_move_insn (temp, new_rtx);
5029 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
5030 if (reg != 0)
5032 s390_load_address (reg, new_rtx);
5033 new_rtx = reg;
5035 break;
5037 case TLS_MODEL_INITIAL_EXEC:
5038 if (flag_pic == 1)
5040 /* Assume GOT offset < 4k. This is handled the same way
5041 in both 31- and 64-bit code. */
5043 if (reload_in_progress || reload_completed)
5044 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5046 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5047 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5048 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5049 new_rtx = gen_const_mem (Pmode, new_rtx);
5050 temp = gen_reg_rtx (Pmode);
5051 emit_move_insn (temp, new_rtx);
5053 else if (TARGET_CPU_ZARCH)
5055 /* If the GOT offset might be >= 4k, we determine the position
5056 of the GOT entry via a PC-relative LARL. */
5058 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5059 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5060 temp = gen_reg_rtx (Pmode);
5061 emit_move_insn (temp, new_rtx);
5063 new_rtx = gen_const_mem (Pmode, temp);
5064 temp = gen_reg_rtx (Pmode);
5065 emit_move_insn (temp, new_rtx);
5067 else if (flag_pic)
5069 /* If the GOT offset might be >= 4k, we have to load it
5070 from the literal pool. */
5072 if (reload_in_progress || reload_completed)
5073 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5075 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5076 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5077 new_rtx = force_const_mem (Pmode, new_rtx);
5078 temp = gen_reg_rtx (Pmode);
5079 emit_move_insn (temp, new_rtx);
5081 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
5082 new_rtx = gen_const_mem (Pmode, new_rtx);
5084 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
5085 temp = gen_reg_rtx (Pmode);
5086 emit_insn (gen_rtx_SET (temp, new_rtx));
5088 else
5090 /* In position-dependent code, load the absolute address of
5091 the GOT entry from the literal pool. */
5093 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5094 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5095 new_rtx = force_const_mem (Pmode, new_rtx);
5096 temp = gen_reg_rtx (Pmode);
5097 emit_move_insn (temp, new_rtx);
5099 new_rtx = temp;
5100 new_rtx = gen_const_mem (Pmode, new_rtx);
5101 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
5102 temp = gen_reg_rtx (Pmode);
5103 emit_insn (gen_rtx_SET (temp, new_rtx));
5106 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5107 if (reg != 0)
5109 s390_load_address (reg, new_rtx);
5110 new_rtx = reg;
5112 break;
5114 case TLS_MODEL_LOCAL_EXEC:
5115 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5116 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5117 new_rtx = force_const_mem (Pmode, new_rtx);
5118 temp = gen_reg_rtx (Pmode);
5119 emit_move_insn (temp, new_rtx);
5121 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5122 if (reg != 0)
5124 s390_load_address (reg, new_rtx);
5125 new_rtx = reg;
5127 break;
5129 default:
5130 gcc_unreachable ();
5133 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
5135 switch (XINT (XEXP (addr, 0), 1))
5137 case UNSPEC_INDNTPOFF:
5138 gcc_assert (TARGET_CPU_ZARCH);
5139 new_rtx = addr;
5140 break;
5142 default:
5143 gcc_unreachable ();
5147 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5148 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5150 new_rtx = XEXP (XEXP (addr, 0), 0);
5151 if (GET_CODE (new_rtx) != SYMBOL_REF)
5152 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5154 new_rtx = legitimize_tls_address (new_rtx, reg);
5155 new_rtx = plus_constant (Pmode, new_rtx,
5156 INTVAL (XEXP (XEXP (addr, 0), 1)));
5157 new_rtx = force_operand (new_rtx, 0);
5160 else
5161 gcc_unreachable (); /* for now ... */
5163 return new_rtx;
5166 /* Emit insns making the address in operands[1] valid for a standard
5167 move to operands[0]. operands[1] is replaced by an address which
5168 should be used instead of the former RTX to emit the move
5169 pattern. */
5171 void
5172 emit_symbolic_move (rtx *operands)
5174 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5176 if (GET_CODE (operands[0]) == MEM)
5177 operands[1] = force_reg (Pmode, operands[1]);
5178 else if (TLS_SYMBOLIC_CONST (operands[1]))
5179 operands[1] = legitimize_tls_address (operands[1], temp);
5180 else if (flag_pic)
5181 operands[1] = legitimize_pic_address (operands[1], temp);
5184 /* Try machine-dependent ways of modifying an illegitimate address X
5185 to be legitimate. If we find one, return the new, valid address.
5187 OLDX is the address as it was before break_out_memory_refs was called.
5188 In some cases it is useful to look at this to decide what needs to be done.
5190 MODE is the mode of the operand pointed to by X.
5192 When -fpic is used, special handling is needed for symbolic references.
5193 See comments by legitimize_pic_address for details. */
5195 static rtx
5196 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5197 machine_mode mode ATTRIBUTE_UNUSED)
5199 rtx constant_term = const0_rtx;
5201 if (TLS_SYMBOLIC_CONST (x))
5203 x = legitimize_tls_address (x, 0);
5205 if (s390_legitimate_address_p (mode, x, FALSE))
5206 return x;
5208 else if (GET_CODE (x) == PLUS
5209 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5210 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5212 return x;
5214 else if (flag_pic)
5216 if (SYMBOLIC_CONST (x)
5217 || (GET_CODE (x) == PLUS
5218 && (SYMBOLIC_CONST (XEXP (x, 0))
5219 || SYMBOLIC_CONST (XEXP (x, 1)))))
5220 x = legitimize_pic_address (x, 0);
5222 if (s390_legitimate_address_p (mode, x, FALSE))
5223 return x;
5226 x = eliminate_constant_term (x, &constant_term);
5228 /* Optimize loading of large displacements by splitting them
5229 into the multiple of 4K and the rest; this allows the
5230 former to be CSE'd if possible.
5232 Don't do this if the displacement is added to a register
5233 pointing into the stack frame, as the offsets will
5234 change later anyway. */
5236 if (GET_CODE (constant_term) == CONST_INT
5237 && !TARGET_LONG_DISPLACEMENT
5238 && !DISP_IN_RANGE (INTVAL (constant_term))
5239 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5241 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5242 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5244 rtx temp = gen_reg_rtx (Pmode);
5245 rtx val = force_operand (GEN_INT (upper), temp);
5246 if (val != temp)
5247 emit_move_insn (temp, val);
5249 x = gen_rtx_PLUS (Pmode, x, temp);
5250 constant_term = GEN_INT (lower);
5253 if (GET_CODE (x) == PLUS)
5255 if (GET_CODE (XEXP (x, 0)) == REG)
5257 rtx temp = gen_reg_rtx (Pmode);
5258 rtx val = force_operand (XEXP (x, 1), temp);
5259 if (val != temp)
5260 emit_move_insn (temp, val);
5262 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5265 else if (GET_CODE (XEXP (x, 1)) == REG)
5267 rtx temp = gen_reg_rtx (Pmode);
5268 rtx val = force_operand (XEXP (x, 0), temp);
5269 if (val != temp)
5270 emit_move_insn (temp, val);
5272 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5276 if (constant_term != const0_rtx)
5277 x = gen_rtx_PLUS (Pmode, x, constant_term);
5279 return x;
5282 /* Try a machine-dependent way of reloading an illegitimate address AD
5283 operand. If we find one, push the reload and return the new address.
5285 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5286 and TYPE is the reload type of the current reload. */
5289 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5290 int opnum, int type)
5292 if (!optimize || TARGET_LONG_DISPLACEMENT)
5293 return NULL_RTX;
5295 if (GET_CODE (ad) == PLUS)
5297 rtx tem = simplify_binary_operation (PLUS, Pmode,
5298 XEXP (ad, 0), XEXP (ad, 1));
5299 if (tem)
5300 ad = tem;
5303 if (GET_CODE (ad) == PLUS
5304 && GET_CODE (XEXP (ad, 0)) == REG
5305 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5306 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5308 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5309 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5310 rtx cst, tem, new_rtx;
5312 cst = GEN_INT (upper);
5313 if (!legitimate_reload_constant_p (cst))
5314 cst = force_const_mem (Pmode, cst);
5316 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5317 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5319 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5320 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5321 opnum, (enum reload_type) type);
5322 return new_rtx;
5325 return NULL_RTX;
5328 /* Emit code to move LEN bytes from DST to SRC. */
5330 bool
5331 s390_expand_movmem (rtx dst, rtx src, rtx len)
5333 /* When tuning for z10 or higher we rely on the Glibc functions to
5334 do the right thing. Only for constant lengths below 64k we will
5335 generate inline code. */
5336 if (s390_tune >= PROCESSOR_2097_Z10
5337 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5338 return false;
5340 /* Expand memcpy for constant length operands without a loop if it
5341 is shorter that way.
5343 With a constant length argument a
5344 memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
5345 if (GET_CODE (len) == CONST_INT
5346 && INTVAL (len) >= 0
5347 && INTVAL (len) <= 256 * 6
5348 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5350 HOST_WIDE_INT o, l;
5352 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5354 rtx newdst = adjust_address (dst, BLKmode, o);
5355 rtx newsrc = adjust_address (src, BLKmode, o);
5356 emit_insn (gen_movmem_short (newdst, newsrc,
5357 GEN_INT (l > 256 ? 255 : l - 1)));
5361 else if (TARGET_MVCLE)
5363 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5366 else
5368 rtx dst_addr, src_addr, count, blocks, temp;
5369 rtx_code_label *loop_start_label = gen_label_rtx ();
5370 rtx_code_label *loop_end_label = gen_label_rtx ();
5371 rtx_code_label *end_label = gen_label_rtx ();
5372 machine_mode mode;
5374 mode = GET_MODE (len);
5375 if (mode == VOIDmode)
5376 mode = Pmode;
5378 dst_addr = gen_reg_rtx (Pmode);
5379 src_addr = gen_reg_rtx (Pmode);
5380 count = gen_reg_rtx (mode);
5381 blocks = gen_reg_rtx (mode);
5383 convert_move (count, len, 1);
5384 emit_cmp_and_jump_insns (count, const0_rtx,
5385 EQ, NULL_RTX, mode, 1, end_label);
5387 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5388 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5389 dst = change_address (dst, VOIDmode, dst_addr);
5390 src = change_address (src, VOIDmode, src_addr);
5392 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5393 OPTAB_DIRECT);
5394 if (temp != count)
5395 emit_move_insn (count, temp);
5397 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5398 OPTAB_DIRECT);
5399 if (temp != blocks)
5400 emit_move_insn (blocks, temp);
5402 emit_cmp_and_jump_insns (blocks, const0_rtx,
5403 EQ, NULL_RTX, mode, 1, loop_end_label);
5405 emit_label (loop_start_label);
5407 if (TARGET_Z10
5408 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5410 rtx prefetch;
5412 /* Issue a read prefetch for the +3 cache line. */
5413 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5414 const0_rtx, const0_rtx);
5415 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5416 emit_insn (prefetch);
5418 /* Issue a write prefetch for the +3 cache line. */
5419 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5420 const1_rtx, const0_rtx);
5421 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5422 emit_insn (prefetch);
5425 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
5426 s390_load_address (dst_addr,
5427 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5428 s390_load_address (src_addr,
5429 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5431 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5432 OPTAB_DIRECT);
5433 if (temp != blocks)
5434 emit_move_insn (blocks, temp);
5436 emit_cmp_and_jump_insns (blocks, const0_rtx,
5437 EQ, NULL_RTX, mode, 1, loop_end_label);
5439 emit_jump (loop_start_label);
5440 emit_label (loop_end_label);
5442 emit_insn (gen_movmem_short (dst, src,
5443 convert_to_mode (Pmode, count, 1)));
5444 emit_label (end_label);
5446 return true;
5449 /* Emit code to set LEN bytes at DST to VAL.
5450 Make use of clrmem if VAL is zero. */
5452 void
5453 s390_expand_setmem (rtx dst, rtx len, rtx val)
5455 if (GET_CODE (len) == CONST_INT && INTVAL (len) <= 0)
5456 return;
5458 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5460 /* Expand setmem/clrmem for a constant length operand without a
5461 loop if it will be shorter that way.
5462 With a constant length and without pfd argument a
5463 clrmem loop is 32 bytes -> 5.3 * xc
5464 setmem loop is 36 bytes -> 3.6 * (mvi/stc + mvc) */
5465 if (GET_CODE (len) == CONST_INT
5466 && ((INTVAL (len) <= 256 * 5 && val == const0_rtx)
5467 || INTVAL (len) <= 257 * 3)
5468 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5470 HOST_WIDE_INT o, l;
5472 if (val == const0_rtx)
5473 /* clrmem: emit 256 byte blockwise XCs. */
5474 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5476 rtx newdst = adjust_address (dst, BLKmode, o);
5477 emit_insn (gen_clrmem_short (newdst,
5478 GEN_INT (l > 256 ? 255 : l - 1)));
5480 else
5481 /* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5482 setting first byte to val and using a 256 byte mvc with one
5483 byte overlap to propagate the byte. */
5484 for (l = INTVAL (len), o = 0; l > 0; l -= 257, o += 257)
5486 rtx newdst = adjust_address (dst, BLKmode, o);
5487 emit_move_insn (adjust_address (dst, QImode, o), val);
5488 if (l > 1)
5490 rtx newdstp1 = adjust_address (dst, BLKmode, o + 1);
5491 emit_insn (gen_movmem_short (newdstp1, newdst,
5492 GEN_INT (l > 257 ? 255 : l - 2)));
5497 else if (TARGET_MVCLE)
5499 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5500 if (TARGET_64BIT)
5501 emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5502 val));
5503 else
5504 emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5505 val));
5508 else
5510 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5511 rtx_code_label *loop_start_label = gen_label_rtx ();
5512 rtx_code_label *onebyte_end_label = gen_label_rtx ();
5513 rtx_code_label *zerobyte_end_label = gen_label_rtx ();
5514 rtx_code_label *restbyte_end_label = gen_label_rtx ();
5515 machine_mode mode;
5517 mode = GET_MODE (len);
5518 if (mode == VOIDmode)
5519 mode = Pmode;
5521 dst_addr = gen_reg_rtx (Pmode);
5522 count = gen_reg_rtx (mode);
5523 blocks = gen_reg_rtx (mode);
5525 convert_move (count, len, 1);
5526 emit_cmp_and_jump_insns (count, const0_rtx,
5527 EQ, NULL_RTX, mode, 1, zerobyte_end_label,
5528 profile_probability::very_unlikely ());
5530 /* We need to make a copy of the target address since memset is
5531 supposed to return it unmodified. We have to make it here
5532 already since the new reg is used at onebyte_end_label. */
5533 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5534 dst = change_address (dst, VOIDmode, dst_addr);
5536 if (val != const0_rtx)
5538 /* When using the overlapping mvc the original target
5539 address is only accessed as single byte entity (even by
5540 the mvc reading this value). */
5541 set_mem_size (dst, 1);
5542 dstp1 = adjust_address (dst, VOIDmode, 1);
5543 emit_cmp_and_jump_insns (count,
5544 const1_rtx, EQ, NULL_RTX, mode, 1,
5545 onebyte_end_label,
5546 profile_probability::very_unlikely ());
5549 /* There is one unconditional (mvi+mvc)/xc after the loop
5550 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5551 or one (xc) here leaves this number of bytes to be handled by
5552 it. */
5553 temp = expand_binop (mode, add_optab, count,
5554 val == const0_rtx ? constm1_rtx : GEN_INT (-2),
5555 count, 1, OPTAB_DIRECT);
5556 if (temp != count)
5557 emit_move_insn (count, temp);
5559 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5560 OPTAB_DIRECT);
5561 if (temp != blocks)
5562 emit_move_insn (blocks, temp);
5564 emit_cmp_and_jump_insns (blocks, const0_rtx,
5565 EQ, NULL_RTX, mode, 1, restbyte_end_label);
5567 emit_jump (loop_start_label);
5569 if (val != const0_rtx)
5571 /* The 1 byte != 0 special case. Not handled efficiently
5572 since we require two jumps for that. However, this
5573 should be very rare. */
5574 emit_label (onebyte_end_label);
5575 emit_move_insn (adjust_address (dst, QImode, 0), val);
5576 emit_jump (zerobyte_end_label);
5579 emit_label (loop_start_label);
5581 if (TARGET_Z10
5582 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
5584 /* Issue a write prefetch for the +4 cache line. */
5585 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
5586 GEN_INT (1024)),
5587 const1_rtx, const0_rtx);
5588 emit_insn (prefetch);
5589 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5592 if (val == const0_rtx)
5593 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5594 else
5596 /* Set the first byte in the block to the value and use an
5597 overlapping mvc for the block. */
5598 emit_move_insn (adjust_address (dst, QImode, 0), val);
5599 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (254)));
5601 s390_load_address (dst_addr,
5602 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5604 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5605 OPTAB_DIRECT);
5606 if (temp != blocks)
5607 emit_move_insn (blocks, temp);
5609 emit_cmp_and_jump_insns (blocks, const0_rtx,
5610 NE, NULL_RTX, mode, 1, loop_start_label);
5612 emit_label (restbyte_end_label);
5614 if (val == const0_rtx)
5615 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5616 else
5618 /* Set the first byte in the block to the value and use an
5619 overlapping mvc for the block. */
5620 emit_move_insn (adjust_address (dst, QImode, 0), val);
5621 /* execute only uses the lowest 8 bits of count that's
5622 exactly what we need here. */
5623 emit_insn (gen_movmem_short (dstp1, dst,
5624 convert_to_mode (Pmode, count, 1)));
5627 emit_label (zerobyte_end_label);
5631 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5632 and return the result in TARGET. */
5634 bool
5635 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5637 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5638 rtx tmp;
5640 /* When tuning for z10 or higher we rely on the Glibc functions to
5641 do the right thing. Only for constant lengths below 64k we will
5642 generate inline code. */
5643 if (s390_tune >= PROCESSOR_2097_Z10
5644 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5645 return false;
5647 /* As the result of CMPINT is inverted compared to what we need,
5648 we have to swap the operands. */
5649 tmp = op0; op0 = op1; op1 = tmp;
5651 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5653 if (INTVAL (len) > 0)
5655 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5656 emit_insn (gen_cmpint (target, ccreg));
5658 else
5659 emit_move_insn (target, const0_rtx);
5661 else if (TARGET_MVCLE)
5663 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5664 emit_insn (gen_cmpint (target, ccreg));
5666 else
5668 rtx addr0, addr1, count, blocks, temp;
5669 rtx_code_label *loop_start_label = gen_label_rtx ();
5670 rtx_code_label *loop_end_label = gen_label_rtx ();
5671 rtx_code_label *end_label = gen_label_rtx ();
5672 machine_mode mode;
5674 mode = GET_MODE (len);
5675 if (mode == VOIDmode)
5676 mode = Pmode;
5678 addr0 = gen_reg_rtx (Pmode);
5679 addr1 = gen_reg_rtx (Pmode);
5680 count = gen_reg_rtx (mode);
5681 blocks = gen_reg_rtx (mode);
5683 convert_move (count, len, 1);
5684 emit_cmp_and_jump_insns (count, const0_rtx,
5685 EQ, NULL_RTX, mode, 1, end_label);
5687 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5688 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5689 op0 = change_address (op0, VOIDmode, addr0);
5690 op1 = change_address (op1, VOIDmode, addr1);
5692 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5693 OPTAB_DIRECT);
5694 if (temp != count)
5695 emit_move_insn (count, temp);
5697 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5698 OPTAB_DIRECT);
5699 if (temp != blocks)
5700 emit_move_insn (blocks, temp);
5702 emit_cmp_and_jump_insns (blocks, const0_rtx,
5703 EQ, NULL_RTX, mode, 1, loop_end_label);
5705 emit_label (loop_start_label);
5707 if (TARGET_Z10
5708 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5710 rtx prefetch;
5712 /* Issue a read prefetch for the +2 cache line of operand 1. */
5713 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5714 const0_rtx, const0_rtx);
5715 emit_insn (prefetch);
5716 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5718 /* Issue a read prefetch for the +2 cache line of operand 2. */
5719 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5720 const0_rtx, const0_rtx);
5721 emit_insn (prefetch);
5722 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5725 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5726 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5727 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5728 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5729 temp = gen_rtx_SET (pc_rtx, temp);
5730 emit_jump_insn (temp);
5732 s390_load_address (addr0,
5733 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5734 s390_load_address (addr1,
5735 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5737 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5738 OPTAB_DIRECT);
5739 if (temp != blocks)
5740 emit_move_insn (blocks, temp);
5742 emit_cmp_and_jump_insns (blocks, const0_rtx,
5743 EQ, NULL_RTX, mode, 1, loop_end_label);
5745 emit_jump (loop_start_label);
5746 emit_label (loop_end_label);
5748 emit_insn (gen_cmpmem_short (op0, op1,
5749 convert_to_mode (Pmode, count, 1)));
5750 emit_label (end_label);
5752 emit_insn (gen_cmpint (target, ccreg));
5754 return true;
5757 /* Emit a conditional jump to LABEL for condition code mask MASK using
5758 comparsion operator COMPARISON. Return the emitted jump insn. */
5760 static rtx_insn *
5761 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5763 rtx temp;
5765 gcc_assert (comparison == EQ || comparison == NE);
5766 gcc_assert (mask > 0 && mask < 15);
5768 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5769 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5770 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5771 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5772 temp = gen_rtx_SET (pc_rtx, temp);
5773 return emit_jump_insn (temp);
5776 /* Emit the instructions to implement strlen of STRING and store the
5777 result in TARGET. The string has the known ALIGNMENT. This
5778 version uses vector instructions and is therefore not appropriate
5779 for targets prior to z13. */
5781 void
5782 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5784 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5785 rtx str_reg = gen_reg_rtx (V16QImode);
5786 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5787 rtx str_idx_reg = gen_reg_rtx (Pmode);
5788 rtx result_reg = gen_reg_rtx (V16QImode);
5789 rtx is_aligned_label = gen_label_rtx ();
5790 rtx into_loop_label = NULL_RTX;
5791 rtx loop_start_label = gen_label_rtx ();
5792 rtx temp;
5793 rtx len = gen_reg_rtx (QImode);
5794 rtx cond;
5796 s390_load_address (str_addr_base_reg, XEXP (string, 0));
5797 emit_move_insn (str_idx_reg, const0_rtx);
5799 if (INTVAL (alignment) < 16)
5801 /* Check whether the address happens to be aligned properly so
5802 jump directly to the aligned loop. */
5803 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5804 str_addr_base_reg, GEN_INT (15)),
5805 const0_rtx, EQ, NULL_RTX,
5806 Pmode, 1, is_aligned_label);
5808 temp = gen_reg_rtx (Pmode);
5809 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5810 GEN_INT (15), temp, 1, OPTAB_DIRECT);
5811 gcc_assert (REG_P (temp));
5812 highest_index_to_load_reg =
5813 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5814 highest_index_to_load_reg, 1, OPTAB_DIRECT);
5815 gcc_assert (REG_P (highest_index_to_load_reg));
5816 emit_insn (gen_vllv16qi (str_reg,
5817 convert_to_mode (SImode, highest_index_to_load_reg, 1),
5818 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5820 into_loop_label = gen_label_rtx ();
5821 s390_emit_jump (into_loop_label, NULL_RTX);
5822 emit_barrier ();
5825 emit_label (is_aligned_label);
5826 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5828 /* Reaching this point we are only performing 16 bytes aligned
5829 loads. */
5830 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5832 emit_label (loop_start_label);
5833 LABEL_NUSES (loop_start_label) = 1;
5835 /* Load 16 bytes of the string into VR. */
5836 emit_move_insn (str_reg,
5837 gen_rtx_MEM (V16QImode,
5838 gen_rtx_PLUS (Pmode, str_idx_reg,
5839 str_addr_base_reg)));
5840 if (into_loop_label != NULL_RTX)
5842 emit_label (into_loop_label);
5843 LABEL_NUSES (into_loop_label) = 1;
5846 /* Increment string index by 16 bytes. */
5847 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5848 str_idx_reg, 1, OPTAB_DIRECT);
5850 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5851 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5853 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5854 REG_BR_PROB,
5855 profile_probability::very_likely ().to_reg_br_prob_note ());
5856 emit_insn (gen_vec_extractv16qiqi (len, result_reg, GEN_INT (7)));
5858 /* If the string pointer wasn't aligned we have loaded less then 16
5859 bytes and the remaining bytes got filled with zeros (by vll).
5860 Now we have to check whether the resulting index lies within the
5861 bytes actually part of the string. */
5863 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5864 highest_index_to_load_reg);
5865 s390_load_address (highest_index_to_load_reg,
5866 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
5867 const1_rtx));
5868 if (TARGET_64BIT)
5869 emit_insn (gen_movdicc (str_idx_reg, cond,
5870 highest_index_to_load_reg, str_idx_reg));
5871 else
5872 emit_insn (gen_movsicc (str_idx_reg, cond,
5873 highest_index_to_load_reg, str_idx_reg));
5875 add_reg_br_prob_note (s390_emit_jump (is_aligned_label, cond),
5876 profile_probability::very_unlikely ());
5878 expand_binop (Pmode, add_optab, str_idx_reg,
5879 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
5880 /* FIXME: len is already zero extended - so avoid the llgcr emitted
5881 here. */
5882 temp = expand_binop (Pmode, add_optab, str_idx_reg,
5883 convert_to_mode (Pmode, len, 1),
5884 target, 1, OPTAB_DIRECT);
5885 if (temp != target)
5886 emit_move_insn (target, temp);
5889 void
5890 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
5892 rtx temp = gen_reg_rtx (Pmode);
5893 rtx src_addr = XEXP (src, 0);
5894 rtx dst_addr = XEXP (dst, 0);
5895 rtx src_addr_reg = gen_reg_rtx (Pmode);
5896 rtx dst_addr_reg = gen_reg_rtx (Pmode);
5897 rtx offset = gen_reg_rtx (Pmode);
5898 rtx vsrc = gen_reg_rtx (V16QImode);
5899 rtx vpos = gen_reg_rtx (V16QImode);
5900 rtx loadlen = gen_reg_rtx (SImode);
5901 rtx gpos_qi = gen_reg_rtx(QImode);
5902 rtx gpos = gen_reg_rtx (SImode);
5903 rtx done_label = gen_label_rtx ();
5904 rtx loop_label = gen_label_rtx ();
5905 rtx exit_label = gen_label_rtx ();
5906 rtx full_label = gen_label_rtx ();
5908 /* Perform a quick check for string ending on the first up to 16
5909 bytes and exit early if successful. */
5911 emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
5912 emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
5913 emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
5914 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
5915 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5916 /* gpos is the byte index if a zero was found and 16 otherwise.
5917 So if it is lower than the loaded bytes we have a hit. */
5918 emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
5919 full_label);
5920 emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
5922 force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
5923 1, OPTAB_DIRECT);
5924 emit_jump (exit_label);
5925 emit_barrier ();
5927 emit_label (full_label);
5928 LABEL_NUSES (full_label) = 1;
5930 /* Calculate `offset' so that src + offset points to the last byte
5931 before 16 byte alignment. */
5933 /* temp = src_addr & 0xf */
5934 force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
5935 1, OPTAB_DIRECT);
5937 /* offset = 0xf - temp */
5938 emit_move_insn (offset, GEN_INT (15));
5939 force_expand_binop (Pmode, sub_optab, offset, temp, offset,
5940 1, OPTAB_DIRECT);
5942 /* Store `offset' bytes in the dstination string. The quick check
5943 has loaded at least `offset' bytes into vsrc. */
5945 emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
5947 /* Advance to the next byte to be loaded. */
5948 force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
5949 1, OPTAB_DIRECT);
5951 /* Make sure the addresses are single regs which can be used as a
5952 base. */
5953 emit_move_insn (src_addr_reg, src_addr);
5954 emit_move_insn (dst_addr_reg, dst_addr);
5956 /* MAIN LOOP */
5958 emit_label (loop_label);
5959 LABEL_NUSES (loop_label) = 1;
5961 emit_move_insn (vsrc,
5962 gen_rtx_MEM (V16QImode,
5963 gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
5965 emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
5966 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5967 add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
5968 REG_BR_PROB, profile_probability::very_unlikely ()
5969 .to_reg_br_prob_note ());
5971 emit_move_insn (gen_rtx_MEM (V16QImode,
5972 gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
5973 vsrc);
5974 /* offset += 16 */
5975 force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
5976 offset, 1, OPTAB_DIRECT);
5978 emit_jump (loop_label);
5979 emit_barrier ();
5981 /* REGULAR EXIT */
5983 /* We are done. Add the offset of the zero character to the dst_addr
5984 pointer to get the result. */
5986 emit_label (done_label);
5987 LABEL_NUSES (done_label) = 1;
5989 force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
5990 1, OPTAB_DIRECT);
5992 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
5993 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5995 emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
5997 force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
5998 1, OPTAB_DIRECT);
6000 /* EARLY EXIT */
6002 emit_label (exit_label);
6003 LABEL_NUSES (exit_label) = 1;
6007 /* Expand conditional increment or decrement using alc/slb instructions.
6008 Should generate code setting DST to either SRC or SRC + INCREMENT,
6009 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
6010 Returns true if successful, false otherwise.
6012 That makes it possible to implement some if-constructs without jumps e.g.:
6013 (borrow = CC0 | CC1 and carry = CC2 | CC3)
6014 unsigned int a, b, c;
6015 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
6016 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
6017 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
6018 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
6020 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
6021 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
6022 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
6023 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
6024 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
6026 bool
6027 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
6028 rtx dst, rtx src, rtx increment)
6030 machine_mode cmp_mode;
6031 machine_mode cc_mode;
6032 rtx op_res;
6033 rtx insn;
6034 rtvec p;
6035 int ret;
6037 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
6038 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
6039 cmp_mode = SImode;
6040 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
6041 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
6042 cmp_mode = DImode;
6043 else
6044 return false;
6046 /* Try ADD LOGICAL WITH CARRY. */
6047 if (increment == const1_rtx)
6049 /* Determine CC mode to use. */
6050 if (cmp_code == EQ || cmp_code == NE)
6052 if (cmp_op1 != const0_rtx)
6054 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6055 NULL_RTX, 0, OPTAB_WIDEN);
6056 cmp_op1 = const0_rtx;
6059 cmp_code = cmp_code == EQ ? LEU : GTU;
6062 if (cmp_code == LTU || cmp_code == LEU)
6064 rtx tem = cmp_op0;
6065 cmp_op0 = cmp_op1;
6066 cmp_op1 = tem;
6067 cmp_code = swap_condition (cmp_code);
6070 switch (cmp_code)
6072 case GTU:
6073 cc_mode = CCUmode;
6074 break;
6076 case GEU:
6077 cc_mode = CCL3mode;
6078 break;
6080 default:
6081 return false;
6084 /* Emit comparison instruction pattern. */
6085 if (!register_operand (cmp_op0, cmp_mode))
6086 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6088 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6089 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6090 /* We use insn_invalid_p here to add clobbers if required. */
6091 ret = insn_invalid_p (emit_insn (insn), false);
6092 gcc_assert (!ret);
6094 /* Emit ALC instruction pattern. */
6095 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6096 gen_rtx_REG (cc_mode, CC_REGNUM),
6097 const0_rtx);
6099 if (src != const0_rtx)
6101 if (!register_operand (src, GET_MODE (dst)))
6102 src = force_reg (GET_MODE (dst), src);
6104 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
6105 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
6108 p = rtvec_alloc (2);
6109 RTVEC_ELT (p, 0) =
6110 gen_rtx_SET (dst, op_res);
6111 RTVEC_ELT (p, 1) =
6112 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6113 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6115 return true;
6118 /* Try SUBTRACT LOGICAL WITH BORROW. */
6119 if (increment == constm1_rtx)
6121 /* Determine CC mode to use. */
6122 if (cmp_code == EQ || cmp_code == NE)
6124 if (cmp_op1 != const0_rtx)
6126 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6127 NULL_RTX, 0, OPTAB_WIDEN);
6128 cmp_op1 = const0_rtx;
6131 cmp_code = cmp_code == EQ ? LEU : GTU;
6134 if (cmp_code == GTU || cmp_code == GEU)
6136 rtx tem = cmp_op0;
6137 cmp_op0 = cmp_op1;
6138 cmp_op1 = tem;
6139 cmp_code = swap_condition (cmp_code);
6142 switch (cmp_code)
6144 case LEU:
6145 cc_mode = CCUmode;
6146 break;
6148 case LTU:
6149 cc_mode = CCL3mode;
6150 break;
6152 default:
6153 return false;
6156 /* Emit comparison instruction pattern. */
6157 if (!register_operand (cmp_op0, cmp_mode))
6158 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6160 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6161 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6162 /* We use insn_invalid_p here to add clobbers if required. */
6163 ret = insn_invalid_p (emit_insn (insn), false);
6164 gcc_assert (!ret);
6166 /* Emit SLB instruction pattern. */
6167 if (!register_operand (src, GET_MODE (dst)))
6168 src = force_reg (GET_MODE (dst), src);
6170 op_res = gen_rtx_MINUS (GET_MODE (dst),
6171 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
6172 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6173 gen_rtx_REG (cc_mode, CC_REGNUM),
6174 const0_rtx));
6175 p = rtvec_alloc (2);
6176 RTVEC_ELT (p, 0) =
6177 gen_rtx_SET (dst, op_res);
6178 RTVEC_ELT (p, 1) =
6179 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6180 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6182 return true;
6185 return false;
6188 /* Expand code for the insv template. Return true if successful. */
6190 bool
6191 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6193 int bitsize = INTVAL (op1);
6194 int bitpos = INTVAL (op2);
6195 machine_mode mode = GET_MODE (dest);
6196 machine_mode smode;
6197 int smode_bsize, mode_bsize;
6198 rtx op, clobber;
6200 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6201 return false;
6203 /* Generate INSERT IMMEDIATE (IILL et al). */
6204 /* (set (ze (reg)) (const_int)). */
6205 if (TARGET_ZARCH
6206 && register_operand (dest, word_mode)
6207 && (bitpos % 16) == 0
6208 && (bitsize % 16) == 0
6209 && const_int_operand (src, VOIDmode))
6211 HOST_WIDE_INT val = INTVAL (src);
6212 int regpos = bitpos + bitsize;
6214 while (regpos > bitpos)
6216 machine_mode putmode;
6217 int putsize;
6219 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6220 putmode = SImode;
6221 else
6222 putmode = HImode;
6224 putsize = GET_MODE_BITSIZE (putmode);
6225 regpos -= putsize;
6226 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6227 GEN_INT (putsize),
6228 GEN_INT (regpos)),
6229 gen_int_mode (val, putmode));
6230 val >>= putsize;
6232 gcc_assert (regpos == bitpos);
6233 return true;
6236 smode = smallest_int_mode_for_size (bitsize);
6237 smode_bsize = GET_MODE_BITSIZE (smode);
6238 mode_bsize = GET_MODE_BITSIZE (mode);
6240 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6241 if (bitpos == 0
6242 && (bitsize % BITS_PER_UNIT) == 0
6243 && MEM_P (dest)
6244 && (register_operand (src, word_mode)
6245 || const_int_operand (src, VOIDmode)))
6247 /* Emit standard pattern if possible. */
6248 if (smode_bsize == bitsize)
6250 emit_move_insn (adjust_address (dest, smode, 0),
6251 gen_lowpart (smode, src));
6252 return true;
6255 /* (set (ze (mem)) (const_int)). */
6256 else if (const_int_operand (src, VOIDmode))
6258 int size = bitsize / BITS_PER_UNIT;
6259 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6260 BLKmode,
6261 UNITS_PER_WORD - size);
6263 dest = adjust_address (dest, BLKmode, 0);
6264 set_mem_size (dest, size);
6265 s390_expand_movmem (dest, src_mem, GEN_INT (size));
6266 return true;
6269 /* (set (ze (mem)) (reg)). */
6270 else if (register_operand (src, word_mode))
6272 if (bitsize <= 32)
6273 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6274 const0_rtx), src);
6275 else
6277 /* Emit st,stcmh sequence. */
6278 int stcmh_width = bitsize - 32;
6279 int size = stcmh_width / BITS_PER_UNIT;
6281 emit_move_insn (adjust_address (dest, SImode, size),
6282 gen_lowpart (SImode, src));
6283 set_mem_size (dest, size);
6284 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6285 GEN_INT (stcmh_width),
6286 const0_rtx),
6287 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6289 return true;
6293 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6294 if ((bitpos % BITS_PER_UNIT) == 0
6295 && (bitsize % BITS_PER_UNIT) == 0
6296 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6297 && MEM_P (src)
6298 && (mode == DImode || mode == SImode)
6299 && register_operand (dest, mode))
6301 /* Emit a strict_low_part pattern if possible. */
6302 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6304 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
6305 op = gen_rtx_SET (op, gen_lowpart (smode, src));
6306 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6307 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
6308 return true;
6311 /* ??? There are more powerful versions of ICM that are not
6312 completely represented in the md file. */
6315 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6316 if (TARGET_Z10 && (mode == DImode || mode == SImode))
6318 machine_mode mode_s = GET_MODE (src);
6320 if (CONSTANT_P (src))
6322 /* For constant zero values the representation with AND
6323 appears to be folded in more situations than the (set
6324 (zero_extract) ...).
6325 We only do this when the start and end of the bitfield
6326 remain in the same SImode chunk. That way nihf or nilf
6327 can be used.
6328 The AND patterns might still generate a risbg for this. */
6329 if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
6330 return false;
6331 else
6332 src = force_reg (mode, src);
6334 else if (mode_s != mode)
6336 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6337 src = force_reg (mode_s, src);
6338 src = gen_lowpart (mode, src);
6341 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6342 op = gen_rtx_SET (op, src);
6344 if (!TARGET_ZEC12)
6346 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6347 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6349 emit_insn (op);
6351 return true;
6354 return false;
6357 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6358 register that holds VAL of mode MODE shifted by COUNT bits. */
6360 static inline rtx
6361 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6363 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6364 NULL_RTX, 1, OPTAB_DIRECT);
6365 return expand_simple_binop (SImode, ASHIFT, val, count,
6366 NULL_RTX, 1, OPTAB_DIRECT);
6369 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6370 the result in TARGET. */
6372 void
6373 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6374 rtx cmp_op1, rtx cmp_op2)
6376 machine_mode mode = GET_MODE (target);
6377 bool neg_p = false, swap_p = false;
6378 rtx tmp;
6380 if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
6382 switch (cond)
6384 /* NE a != b -> !(a == b) */
6385 case NE: cond = EQ; neg_p = true; break;
6386 /* UNGT a u> b -> !(b >= a) */
6387 case UNGT: cond = GE; neg_p = true; swap_p = true; break;
6388 /* UNGE a u>= b -> !(b > a) */
6389 case UNGE: cond = GT; neg_p = true; swap_p = true; break;
6390 /* LE: a <= b -> b >= a */
6391 case LE: cond = GE; swap_p = true; break;
6392 /* UNLE: a u<= b -> !(a > b) */
6393 case UNLE: cond = GT; neg_p = true; break;
6394 /* LT: a < b -> b > a */
6395 case LT: cond = GT; swap_p = true; break;
6396 /* UNLT: a u< b -> !(a >= b) */
6397 case UNLT: cond = GE; neg_p = true; break;
6398 case UNEQ:
6399 emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2));
6400 return;
6401 case LTGT:
6402 emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2));
6403 return;
6404 case ORDERED:
6405 emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2));
6406 return;
6407 case UNORDERED:
6408 emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2));
6409 return;
6410 default: break;
6413 else
6415 switch (cond)
6417 /* NE: a != b -> !(a == b) */
6418 case NE: cond = EQ; neg_p = true; break;
6419 /* GE: a >= b -> !(b > a) */
6420 case GE: cond = GT; neg_p = true; swap_p = true; break;
6421 /* GEU: a >= b -> !(b > a) */
6422 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6423 /* LE: a <= b -> !(a > b) */
6424 case LE: cond = GT; neg_p = true; break;
6425 /* LEU: a <= b -> !(a > b) */
6426 case LEU: cond = GTU; neg_p = true; break;
6427 /* LT: a < b -> b > a */
6428 case LT: cond = GT; swap_p = true; break;
6429 /* LTU: a < b -> b > a */
6430 case LTU: cond = GTU; swap_p = true; break;
6431 default: break;
6435 if (swap_p)
6437 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6440 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6441 mode,
6442 cmp_op1, cmp_op2)));
6443 if (neg_p)
6444 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6447 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6448 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6449 elements in CMP1 and CMP2 fulfill the comparison.
6450 This function is only used to emit patterns for the vx builtins and
6451 therefore only handles comparison codes required by the
6452 builtins. */
6453 void
6454 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6455 rtx cmp1, rtx cmp2, bool all_p)
6457 machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
6458 rtx tmp_reg = gen_reg_rtx (SImode);
6459 bool swap_p = false;
6461 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6463 switch (code)
6465 case EQ:
6466 case NE:
6467 cc_producer_mode = CCVEQmode;
6468 break;
6469 case GE:
6470 case LT:
6471 code = swap_condition (code);
6472 swap_p = true;
6473 /* fallthrough */
6474 case GT:
6475 case LE:
6476 cc_producer_mode = CCVIHmode;
6477 break;
6478 case GEU:
6479 case LTU:
6480 code = swap_condition (code);
6481 swap_p = true;
6482 /* fallthrough */
6483 case GTU:
6484 case LEU:
6485 cc_producer_mode = CCVIHUmode;
6486 break;
6487 default:
6488 gcc_unreachable ();
6491 scratch_mode = GET_MODE (cmp1);
6492 /* These codes represent inverted CC interpretations. Inverting
6493 an ALL CC mode results in an ANY CC mode and the other way
6494 around. Invert the all_p flag here to compensate for
6495 that. */
6496 if (code == NE || code == LE || code == LEU)
6497 all_p = !all_p;
6499 cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
6501 else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
6503 bool inv_p = false;
6505 switch (code)
6507 case EQ: cc_producer_mode = CCVEQmode; break;
6508 case NE: cc_producer_mode = CCVEQmode; inv_p = true; break;
6509 case GT: cc_producer_mode = CCVFHmode; break;
6510 case GE: cc_producer_mode = CCVFHEmode; break;
6511 case UNLE: cc_producer_mode = CCVFHmode; inv_p = true; break;
6512 case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
6513 case LT: cc_producer_mode = CCVFHmode; code = GT; swap_p = true; break;
6514 case LE: cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
6515 default: gcc_unreachable ();
6517 scratch_mode = mode_for_int_vector (GET_MODE (cmp1)).require ();
6519 if (inv_p)
6520 all_p = !all_p;
6522 cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
6524 else
6525 gcc_unreachable ();
6527 if (swap_p)
6529 rtx tmp = cmp2;
6530 cmp2 = cmp1;
6531 cmp1 = tmp;
6534 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6535 gen_rtvec (2, gen_rtx_SET (
6536 gen_rtx_REG (cc_producer_mode, CC_REGNUM),
6537 gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
6538 gen_rtx_CLOBBER (VOIDmode,
6539 gen_rtx_SCRATCH (scratch_mode)))));
6540 emit_move_insn (target, const0_rtx);
6541 emit_move_insn (tmp_reg, const1_rtx);
6543 emit_move_insn (target,
6544 gen_rtx_IF_THEN_ELSE (SImode,
6545 gen_rtx_fmt_ee (code, VOIDmode,
6546 gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
6547 const0_rtx),
6548 tmp_reg, target));
6551 /* Invert the comparison CODE applied to a CC mode. This is only safe
6552 if we know whether there result was created by a floating point
6553 compare or not. For the CCV modes this is encoded as part of the
6554 mode. */
6555 enum rtx_code
6556 s390_reverse_condition (machine_mode mode, enum rtx_code code)
6558 /* Reversal of FP compares takes care -- an ordered compare
6559 becomes an unordered compare and vice versa. */
6560 if (mode == CCVFALLmode || mode == CCVFANYmode)
6561 return reverse_condition_maybe_unordered (code);
6562 else if (mode == CCVIALLmode || mode == CCVIANYmode)
6563 return reverse_condition (code);
6564 else
6565 gcc_unreachable ();
6568 /* Generate a vector comparison expression loading either elements of
6569 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6570 and CMP_OP2. */
6572 void
6573 s390_expand_vcond (rtx target, rtx then, rtx els,
6574 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6576 rtx tmp;
6577 machine_mode result_mode;
6578 rtx result_target;
6580 machine_mode target_mode = GET_MODE (target);
6581 machine_mode cmp_mode = GET_MODE (cmp_op1);
6582 rtx op = (cond == LT) ? els : then;
6584 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6585 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6586 for short and byte (x >> 15 and x >> 7 respectively). */
6587 if ((cond == LT || cond == GE)
6588 && target_mode == cmp_mode
6589 && cmp_op2 == CONST0_RTX (cmp_mode)
6590 && op == CONST0_RTX (target_mode)
6591 && s390_vector_mode_supported_p (target_mode)
6592 && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6594 rtx negop = (cond == LT) ? then : els;
6596 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6598 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6599 if (negop == CONST1_RTX (target_mode))
6601 rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6602 GEN_INT (shift), target,
6603 1, OPTAB_DIRECT);
6604 if (res != target)
6605 emit_move_insn (target, res);
6606 return;
6609 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6610 else if (all_ones_operand (negop, target_mode))
6612 rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6613 GEN_INT (shift), target,
6614 0, OPTAB_DIRECT);
6615 if (res != target)
6616 emit_move_insn (target, res);
6617 return;
6621 /* We always use an integral type vector to hold the comparison
6622 result. */
6623 result_mode = mode_for_int_vector (cmp_mode).require ();
6624 result_target = gen_reg_rtx (result_mode);
6626 /* We allow vector immediates as comparison operands that
6627 can be handled by the optimization above but not by the
6628 following code. Hence, force them into registers here. */
6629 if (!REG_P (cmp_op1))
6630 cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6632 if (!REG_P (cmp_op2))
6633 cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6635 s390_expand_vec_compare (result_target, cond,
6636 cmp_op1, cmp_op2);
6638 /* If the results are supposed to be either -1 or 0 we are done
6639 since this is what our compare instructions generate anyway. */
6640 if (all_ones_operand (then, GET_MODE (then))
6641 && const0_operand (els, GET_MODE (els)))
6643 emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6644 result_target, 0));
6645 return;
6648 /* Otherwise we will do a vsel afterwards. */
6649 /* This gets triggered e.g.
6650 with gcc.c-torture/compile/pr53410-1.c */
6651 if (!REG_P (then))
6652 then = force_reg (target_mode, then);
6654 if (!REG_P (els))
6655 els = force_reg (target_mode, els);
6657 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6658 result_target,
6659 CONST0_RTX (result_mode));
6661 /* We compared the result against zero above so we have to swap then
6662 and els here. */
6663 tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6665 gcc_assert (target_mode == GET_MODE (then));
6666 emit_insn (gen_rtx_SET (target, tmp));
6669 /* Emit the RTX necessary to initialize the vector TARGET with values
6670 in VALS. */
6671 void
6672 s390_expand_vec_init (rtx target, rtx vals)
6674 machine_mode mode = GET_MODE (target);
6675 machine_mode inner_mode = GET_MODE_INNER (mode);
6676 int n_elts = GET_MODE_NUNITS (mode);
6677 bool all_same = true, all_regs = true, all_const_int = true;
6678 rtx x;
6679 int i;
6681 for (i = 0; i < n_elts; ++i)
6683 x = XVECEXP (vals, 0, i);
6685 if (!CONST_INT_P (x))
6686 all_const_int = false;
6688 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6689 all_same = false;
6691 if (!REG_P (x))
6692 all_regs = false;
6695 /* Use vector gen mask or vector gen byte mask if possible. */
6696 if (all_same && all_const_int
6697 && (XVECEXP (vals, 0, 0) == const0_rtx
6698 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6699 NULL, NULL)
6700 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6702 emit_insn (gen_rtx_SET (target,
6703 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6704 return;
6707 if (all_same)
6709 emit_insn (gen_rtx_SET (target,
6710 gen_rtx_VEC_DUPLICATE (mode,
6711 XVECEXP (vals, 0, 0))));
6712 return;
6715 if (all_regs
6716 && REG_P (target)
6717 && n_elts == 2
6718 && GET_MODE_SIZE (inner_mode) == 8)
6720 /* Use vector load pair. */
6721 emit_insn (gen_rtx_SET (target,
6722 gen_rtx_VEC_CONCAT (mode,
6723 XVECEXP (vals, 0, 0),
6724 XVECEXP (vals, 0, 1))));
6725 return;
6728 /* Use vector load logical element and zero. */
6729 if (TARGET_VXE && (mode == V4SImode || mode == V4SFmode))
6731 bool found = true;
6733 x = XVECEXP (vals, 0, 0);
6734 if (memory_operand (x, inner_mode))
6736 for (i = 1; i < n_elts; ++i)
6737 found = found && XVECEXP (vals, 0, i) == const0_rtx;
6739 if (found)
6741 machine_mode half_mode = (inner_mode == SFmode
6742 ? V2SFmode : V2SImode);
6743 emit_insn (gen_rtx_SET (target,
6744 gen_rtx_VEC_CONCAT (mode,
6745 gen_rtx_VEC_CONCAT (half_mode,
6747 const0_rtx),
6748 gen_rtx_VEC_CONCAT (half_mode,
6749 const0_rtx,
6750 const0_rtx))));
6751 return;
6756 /* We are about to set the vector elements one by one. Zero out the
6757 full register first in order to help the data flow framework to
6758 detect it as full VR set. */
6759 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6761 /* Unfortunately the vec_init expander is not allowed to fail. So
6762 we have to implement the fallback ourselves. */
6763 for (i = 0; i < n_elts; i++)
6765 rtx elem = XVECEXP (vals, 0, i);
6766 if (!general_operand (elem, GET_MODE (elem)))
6767 elem = force_reg (inner_mode, elem);
6769 emit_insn (gen_rtx_SET (target,
6770 gen_rtx_UNSPEC (mode,
6771 gen_rtvec (3, elem,
6772 GEN_INT (i), target),
6773 UNSPEC_VEC_SET)));
6777 /* Structure to hold the initial parameters for a compare_and_swap operation
6778 in HImode and QImode. */
6780 struct alignment_context
6782 rtx memsi; /* SI aligned memory location. */
6783 rtx shift; /* Bit offset with regard to lsb. */
6784 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
6785 rtx modemaski; /* ~modemask */
6786 bool aligned; /* True if memory is aligned, false else. */
6789 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6790 structure AC for transparent simplifying, if the memory alignment is known
6791 to be at least 32bit. MEM is the memory location for the actual operation
6792 and MODE its mode. */
6794 static void
6795 init_alignment_context (struct alignment_context *ac, rtx mem,
6796 machine_mode mode)
6798 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6799 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6801 if (ac->aligned)
6802 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
6803 else
6805 /* Alignment is unknown. */
6806 rtx byteoffset, addr, align;
6808 /* Force the address into a register. */
6809 addr = force_reg (Pmode, XEXP (mem, 0));
6811 /* Align it to SImode. */
6812 align = expand_simple_binop (Pmode, AND, addr,
6813 GEN_INT (-GET_MODE_SIZE (SImode)),
6814 NULL_RTX, 1, OPTAB_DIRECT);
6815 /* Generate MEM. */
6816 ac->memsi = gen_rtx_MEM (SImode, align);
6817 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6818 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6819 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6821 /* Calculate shiftcount. */
6822 byteoffset = expand_simple_binop (Pmode, AND, addr,
6823 GEN_INT (GET_MODE_SIZE (SImode) - 1),
6824 NULL_RTX, 1, OPTAB_DIRECT);
6825 /* As we already have some offset, evaluate the remaining distance. */
6826 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6827 NULL_RTX, 1, OPTAB_DIRECT);
6830 /* Shift is the byte count, but we need the bitcount. */
6831 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6832 NULL_RTX, 1, OPTAB_DIRECT);
6834 /* Calculate masks. */
6835 ac->modemask = expand_simple_binop (SImode, ASHIFT,
6836 GEN_INT (GET_MODE_MASK (mode)),
6837 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6838 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6839 NULL_RTX, 1);
6842 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
6843 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
6844 perform the merge in SEQ2. */
6846 static rtx
6847 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6848 machine_mode mode, rtx val, rtx ins)
6850 rtx tmp;
6852 if (ac->aligned)
6854 start_sequence ();
6855 tmp = copy_to_mode_reg (SImode, val);
6856 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
6857 const0_rtx, ins))
6859 *seq1 = NULL;
6860 *seq2 = get_insns ();
6861 end_sequence ();
6862 return tmp;
6864 end_sequence ();
6867 /* Failed to use insv. Generate a two part shift and mask. */
6868 start_sequence ();
6869 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
6870 *seq1 = get_insns ();
6871 end_sequence ();
6873 start_sequence ();
6874 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
6875 *seq2 = get_insns ();
6876 end_sequence ();
6878 return tmp;
6881 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
6882 the memory location, CMP the old value to compare MEM with and NEW_RTX the
6883 value to set if CMP == MEM. */
6885 static void
6886 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6887 rtx cmp, rtx new_rtx, bool is_weak)
6889 struct alignment_context ac;
6890 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
6891 rtx res = gen_reg_rtx (SImode);
6892 rtx_code_label *csloop = NULL, *csend = NULL;
6894 gcc_assert (MEM_P (mem));
6896 init_alignment_context (&ac, mem, mode);
6898 /* Load full word. Subsequent loads are performed by CS. */
6899 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
6900 NULL_RTX, 1, OPTAB_DIRECT);
6902 /* Prepare insertions of cmp and new_rtx into the loaded value. When
6903 possible, we try to use insv to make this happen efficiently. If
6904 that fails we'll generate code both inside and outside the loop. */
6905 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
6906 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
6908 if (seq0)
6909 emit_insn (seq0);
6910 if (seq1)
6911 emit_insn (seq1);
6913 /* Start CS loop. */
6914 if (!is_weak)
6916 /* Begin assuming success. */
6917 emit_move_insn (btarget, const1_rtx);
6919 csloop = gen_label_rtx ();
6920 csend = gen_label_rtx ();
6921 emit_label (csloop);
6924 /* val = "<mem>00..0<mem>"
6925 * cmp = "00..0<cmp>00..0"
6926 * new = "00..0<new>00..0"
6929 emit_insn (seq2);
6930 emit_insn (seq3);
6932 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv, CCZ1mode);
6933 if (is_weak)
6934 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
6935 else
6937 rtx tmp;
6939 /* Jump to end if we're done (likely?). */
6940 s390_emit_jump (csend, cc);
6942 /* Check for changes outside mode, and loop internal if so.
6943 Arrange the moves so that the compare is adjacent to the
6944 branch so that we can generate CRJ. */
6945 tmp = copy_to_reg (val);
6946 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
6947 1, OPTAB_DIRECT);
6948 cc = s390_emit_compare (NE, val, tmp);
6949 s390_emit_jump (csloop, cc);
6951 /* Failed. */
6952 emit_move_insn (btarget, const0_rtx);
6953 emit_label (csend);
6956 /* Return the correct part of the bitfield. */
6957 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
6958 NULL_RTX, 1, OPTAB_DIRECT), 1);
6961 /* Variant of s390_expand_cs for SI, DI and TI modes. */
6962 static void
6963 s390_expand_cs_tdsi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6964 rtx cmp, rtx new_rtx, bool is_weak)
6966 rtx output = vtarget;
6967 rtx_code_label *skip_cs_label = NULL;
6968 bool do_const_opt = false;
6970 if (!register_operand (output, mode))
6971 output = gen_reg_rtx (mode);
6973 /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
6974 with the constant first and skip the compare_and_swap because its very
6975 expensive and likely to fail anyway.
6976 Note 1: This is done only for IS_WEAK. C11 allows optimizations that may
6977 cause spurious in that case.
6978 Note 2: It may be useful to do this also for non-constant INPUT.
6979 Note 3: Currently only targets with "load on condition" are supported
6980 (z196 and newer). */
6982 if (TARGET_Z196
6983 && (mode == SImode || mode == DImode))
6984 do_const_opt = (is_weak && CONST_INT_P (cmp));
6986 if (do_const_opt)
6988 rtx cc = gen_rtx_REG (CCZmode, CC_REGNUM);
6990 skip_cs_label = gen_label_rtx ();
6991 emit_move_insn (btarget, const0_rtx);
6992 if (CONST_INT_P (cmp) && INTVAL (cmp) == 0)
6994 rtvec lt = rtvec_alloc (2);
6996 /* Load-and-test + conditional jump. */
6997 RTVEC_ELT (lt, 0)
6998 = gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, mem, cmp));
6999 RTVEC_ELT (lt, 1) = gen_rtx_SET (output, mem);
7000 emit_insn (gen_rtx_PARALLEL (VOIDmode, lt));
7002 else
7004 emit_move_insn (output, mem);
7005 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp)));
7007 s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx));
7008 add_reg_br_prob_note (get_last_insn (),
7009 profile_probability::very_unlikely ());
7010 /* If the jump is not taken, OUTPUT is the expected value. */
7011 cmp = output;
7012 /* Reload newval to a register manually, *after* the compare and jump
7013 above. Otherwise Reload might place it before the jump. */
7015 else
7016 cmp = force_reg (mode, cmp);
7017 new_rtx = force_reg (mode, new_rtx);
7018 s390_emit_compare_and_swap (EQ, output, mem, cmp, new_rtx,
7019 (do_const_opt) ? CCZmode : CCZ1mode);
7020 if (skip_cs_label != NULL)
7021 emit_label (skip_cs_label);
7023 /* We deliberately accept non-register operands in the predicate
7024 to ensure the write back to the output operand happens *before*
7025 the store-flags code below. This makes it easier for combine
7026 to merge the store-flags code with a potential test-and-branch
7027 pattern following (immediately!) afterwards. */
7028 if (output != vtarget)
7029 emit_move_insn (vtarget, output);
7031 if (do_const_opt)
7033 rtx cc, cond, ite;
7035 /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
7036 btarget has already been initialized with 0 above. */
7037 cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7038 cond = gen_rtx_EQ (VOIDmode, cc, const0_rtx);
7039 ite = gen_rtx_IF_THEN_ELSE (SImode, cond, const1_rtx, btarget);
7040 emit_insn (gen_rtx_SET (btarget, ite));
7042 else
7044 rtx cc, cond;
7046 cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
7047 cond = gen_rtx_EQ (SImode, cc, const0_rtx);
7048 emit_insn (gen_cstorecc4 (btarget, cond, cc, const0_rtx));
7052 /* Expand an atomic compare and swap operation. MEM is the memory location,
7053 CMP the old value to compare MEM with and NEW_RTX the value to set if
7054 CMP == MEM. */
7056 void
7057 s390_expand_cs (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7058 rtx cmp, rtx new_rtx, bool is_weak)
7060 switch (mode)
7062 case E_TImode:
7063 case E_DImode:
7064 case E_SImode:
7065 s390_expand_cs_tdsi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7066 break;
7067 case E_HImode:
7068 case E_QImode:
7069 s390_expand_cs_hqi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7070 break;
7071 default:
7072 gcc_unreachable ();
7076 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7077 The memory location MEM is set to INPUT. OUTPUT is set to the previous value
7078 of MEM. */
7080 void
7081 s390_expand_atomic_exchange_tdsi (rtx output, rtx mem, rtx input)
7083 machine_mode mode = GET_MODE (mem);
7084 rtx_code_label *csloop;
7086 if (TARGET_Z196
7087 && (mode == DImode || mode == SImode)
7088 && CONST_INT_P (input) && INTVAL (input) == 0)
7090 emit_move_insn (output, const0_rtx);
7091 if (mode == DImode)
7092 emit_insn (gen_atomic_fetch_anddi (output, mem, const0_rtx, input));
7093 else
7094 emit_insn (gen_atomic_fetch_andsi (output, mem, const0_rtx, input));
7095 return;
7098 input = force_reg (mode, input);
7099 emit_move_insn (output, mem);
7100 csloop = gen_label_rtx ();
7101 emit_label (csloop);
7102 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, output, mem, output,
7103 input, CCZ1mode));
7106 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
7107 and VAL the value to play with. If AFTER is true then store the value
7108 MEM holds after the operation, if AFTER is false then store the value MEM
7109 holds before the operation. If TARGET is zero then discard that value, else
7110 store it to TARGET. */
7112 void
7113 s390_expand_atomic (machine_mode mode, enum rtx_code code,
7114 rtx target, rtx mem, rtx val, bool after)
7116 struct alignment_context ac;
7117 rtx cmp;
7118 rtx new_rtx = gen_reg_rtx (SImode);
7119 rtx orig = gen_reg_rtx (SImode);
7120 rtx_code_label *csloop = gen_label_rtx ();
7122 gcc_assert (!target || register_operand (target, VOIDmode));
7123 gcc_assert (MEM_P (mem));
7125 init_alignment_context (&ac, mem, mode);
7127 /* Shift val to the correct bit positions.
7128 Preserve "icm", but prevent "ex icm". */
7129 if (!(ac.aligned && code == SET && MEM_P (val)))
7130 val = s390_expand_mask_and_shift (val, mode, ac.shift);
7132 /* Further preparation insns. */
7133 if (code == PLUS || code == MINUS)
7134 emit_move_insn (orig, val);
7135 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
7136 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
7137 NULL_RTX, 1, OPTAB_DIRECT);
7139 /* Load full word. Subsequent loads are performed by CS. */
7140 cmp = force_reg (SImode, ac.memsi);
7142 /* Start CS loop. */
7143 emit_label (csloop);
7144 emit_move_insn (new_rtx, cmp);
7146 /* Patch new with val at correct position. */
7147 switch (code)
7149 case PLUS:
7150 case MINUS:
7151 val = expand_simple_binop (SImode, code, new_rtx, orig,
7152 NULL_RTX, 1, OPTAB_DIRECT);
7153 val = expand_simple_binop (SImode, AND, val, ac.modemask,
7154 NULL_RTX, 1, OPTAB_DIRECT);
7155 /* FALLTHRU */
7156 case SET:
7157 if (ac.aligned && MEM_P (val))
7158 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
7159 0, 0, SImode, val, false);
7160 else
7162 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
7163 NULL_RTX, 1, OPTAB_DIRECT);
7164 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
7165 NULL_RTX, 1, OPTAB_DIRECT);
7167 break;
7168 case AND:
7169 case IOR:
7170 case XOR:
7171 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
7172 NULL_RTX, 1, OPTAB_DIRECT);
7173 break;
7174 case MULT: /* NAND */
7175 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
7176 NULL_RTX, 1, OPTAB_DIRECT);
7177 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
7178 NULL_RTX, 1, OPTAB_DIRECT);
7179 break;
7180 default:
7181 gcc_unreachable ();
7184 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
7185 ac.memsi, cmp, new_rtx,
7186 CCZ1mode));
7188 /* Return the correct part of the bitfield. */
7189 if (target)
7190 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
7191 after ? new_rtx : cmp, ac.shift,
7192 NULL_RTX, 1, OPTAB_DIRECT), 1);
7195 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7196 We need to emit DTP-relative relocations. */
7198 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7200 static void
7201 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
7203 switch (size)
7205 case 4:
7206 fputs ("\t.long\t", file);
7207 break;
7208 case 8:
7209 fputs ("\t.quad\t", file);
7210 break;
7211 default:
7212 gcc_unreachable ();
7214 output_addr_const (file, x);
7215 fputs ("@DTPOFF", file);
7218 /* Return the proper mode for REGNO being represented in the dwarf
7219 unwind table. */
7220 machine_mode
7221 s390_dwarf_frame_reg_mode (int regno)
7223 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
7225 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
7226 if (GENERAL_REGNO_P (regno))
7227 save_mode = Pmode;
7229 /* The rightmost 64 bits of vector registers are call-clobbered. */
7230 if (GET_MODE_SIZE (save_mode) > 8)
7231 save_mode = DImode;
7233 return save_mode;
7236 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7237 /* Implement TARGET_MANGLE_TYPE. */
7239 static const char *
7240 s390_mangle_type (const_tree type)
7242 type = TYPE_MAIN_VARIANT (type);
7244 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
7245 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
7246 return NULL;
7248 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
7249 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
7250 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
7251 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
7253 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
7254 && TARGET_LONG_DOUBLE_128)
7255 return "g";
7257 /* For all other types, use normal C++ mangling. */
7258 return NULL;
7260 #endif
7262 /* In the name of slightly smaller debug output, and to cater to
7263 general assembler lossage, recognize various UNSPEC sequences
7264 and turn them back into a direct symbol reference. */
7266 static rtx
7267 s390_delegitimize_address (rtx orig_x)
7269 rtx x, y;
7271 orig_x = delegitimize_mem_from_attrs (orig_x);
7272 x = orig_x;
7274 /* Extract the symbol ref from:
7275 (plus:SI (reg:SI 12 %r12)
7276 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7277 UNSPEC_GOTOFF/PLTOFF)))
7279 (plus:SI (reg:SI 12 %r12)
7280 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7281 UNSPEC_GOTOFF/PLTOFF)
7282 (const_int 4 [0x4])))) */
7283 if (GET_CODE (x) == PLUS
7284 && REG_P (XEXP (x, 0))
7285 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
7286 && GET_CODE (XEXP (x, 1)) == CONST)
7288 HOST_WIDE_INT offset = 0;
7290 /* The const operand. */
7291 y = XEXP (XEXP (x, 1), 0);
7293 if (GET_CODE (y) == PLUS
7294 && GET_CODE (XEXP (y, 1)) == CONST_INT)
7296 offset = INTVAL (XEXP (y, 1));
7297 y = XEXP (y, 0);
7300 if (GET_CODE (y) == UNSPEC
7301 && (XINT (y, 1) == UNSPEC_GOTOFF
7302 || XINT (y, 1) == UNSPEC_PLTOFF))
7303 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
7306 if (GET_CODE (x) != MEM)
7307 return orig_x;
7309 x = XEXP (x, 0);
7310 if (GET_CODE (x) == PLUS
7311 && GET_CODE (XEXP (x, 1)) == CONST
7312 && GET_CODE (XEXP (x, 0)) == REG
7313 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7315 y = XEXP (XEXP (x, 1), 0);
7316 if (GET_CODE (y) == UNSPEC
7317 && XINT (y, 1) == UNSPEC_GOT)
7318 y = XVECEXP (y, 0, 0);
7319 else
7320 return orig_x;
7322 else if (GET_CODE (x) == CONST)
7324 /* Extract the symbol ref from:
7325 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7326 UNSPEC_PLT/GOTENT))) */
7328 y = XEXP (x, 0);
7329 if (GET_CODE (y) == UNSPEC
7330 && (XINT (y, 1) == UNSPEC_GOTENT
7331 || XINT (y, 1) == UNSPEC_PLT))
7332 y = XVECEXP (y, 0, 0);
7333 else
7334 return orig_x;
7336 else
7337 return orig_x;
7339 if (GET_MODE (orig_x) != Pmode)
7341 if (GET_MODE (orig_x) == BLKmode)
7342 return orig_x;
7343 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
7344 if (y == NULL_RTX)
7345 return orig_x;
7347 return y;
7350 /* Output operand OP to stdio stream FILE.
7351 OP is an address (register + offset) which is not used to address data;
7352 instead the rightmost bits are interpreted as the value. */
7354 static void
7355 print_addrstyle_operand (FILE *file, rtx op)
7357 HOST_WIDE_INT offset;
7358 rtx base;
7360 /* Extract base register and offset. */
7361 if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
7362 gcc_unreachable ();
7364 /* Sanity check. */
7365 if (base)
7367 gcc_assert (GET_CODE (base) == REG);
7368 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
7369 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
7372 /* Offsets are constricted to twelve bits. */
7373 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
7374 if (base)
7375 fprintf (file, "(%s)", reg_names[REGNO (base)]);
7378 /* Assigns the number of NOP halfwords to be emitted before and after the
7379 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
7380 If hotpatching is disabled for the function, the values are set to zero.
7383 static void
7384 s390_function_num_hotpatch_hw (tree decl,
7385 int *hw_before,
7386 int *hw_after)
7388 tree attr;
7390 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
7392 /* Handle the arguments of the hotpatch attribute. The values
7393 specified via attribute might override the cmdline argument
7394 values. */
7395 if (attr)
7397 tree args = TREE_VALUE (attr);
7399 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
7400 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
7402 else
7404 /* Use the values specified by the cmdline arguments. */
7405 *hw_before = s390_hotpatch_hw_before_label;
7406 *hw_after = s390_hotpatch_hw_after_label;
7410 /* Write the current .machine and .machinemode specification to the assembler
7411 file. */
7413 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7414 static void
7415 s390_asm_output_machine_for_arch (FILE *asm_out_file)
7417 fprintf (asm_out_file, "\t.machinemode %s\n",
7418 (TARGET_ZARCH) ? "zarch" : "esa");
7419 fprintf (asm_out_file, "\t.machine \"%s",
7420 processor_table[s390_arch].binutils_name);
7421 if (S390_USE_ARCHITECTURE_MODIFIERS)
7423 int cpu_flags;
7425 cpu_flags = processor_flags_table[(int) s390_arch];
7426 if (TARGET_HTM && !(cpu_flags & PF_TX))
7427 fprintf (asm_out_file, "+htm");
7428 else if (!TARGET_HTM && (cpu_flags & PF_TX))
7429 fprintf (asm_out_file, "+nohtm");
7430 if (TARGET_VX && !(cpu_flags & PF_VX))
7431 fprintf (asm_out_file, "+vx");
7432 else if (!TARGET_VX && (cpu_flags & PF_VX))
7433 fprintf (asm_out_file, "+novx");
7435 fprintf (asm_out_file, "\"\n");
7438 /* Write an extra function header before the very start of the function. */
7440 void
7441 s390_asm_output_function_prefix (FILE *asm_out_file,
7442 const char *fnname ATTRIBUTE_UNUSED)
7444 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7445 return;
7446 /* Since only the function specific options are saved but not the indications
7447 which options are set, it's too much work here to figure out which options
7448 have actually changed. Thus, generate .machine and .machinemode whenever a
7449 function has the target attribute or pragma. */
7450 fprintf (asm_out_file, "\t.machinemode push\n");
7451 fprintf (asm_out_file, "\t.machine push\n");
7452 s390_asm_output_machine_for_arch (asm_out_file);
7455 /* Write an extra function footer after the very end of the function. */
7457 void
7458 s390_asm_declare_function_size (FILE *asm_out_file,
7459 const char *fnname, tree decl)
7461 if (!flag_inhibit_size_directive)
7462 ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7463 if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7464 return;
7465 fprintf (asm_out_file, "\t.machine pop\n");
7466 fprintf (asm_out_file, "\t.machinemode pop\n");
7468 #endif
7470 /* Write the extra assembler code needed to declare a function properly. */
7472 void
7473 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
7474 tree decl)
7476 int hw_before, hw_after;
7478 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7479 if (hw_before > 0)
7481 unsigned int function_alignment;
7482 int i;
7484 /* Add a trampoline code area before the function label and initialize it
7485 with two-byte nop instructions. This area can be overwritten with code
7486 that jumps to a patched version of the function. */
7487 asm_fprintf (asm_out_file, "\tnopr\t%%r0"
7488 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7489 hw_before);
7490 for (i = 1; i < hw_before; i++)
7491 fputs ("\tnopr\t%r0\n", asm_out_file);
7493 /* Note: The function label must be aligned so that (a) the bytes of the
7494 following nop do not cross a cacheline boundary, and (b) a jump address
7495 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7496 stored directly before the label without crossing a cacheline
7497 boundary. All this is necessary to make sure the trampoline code can
7498 be changed atomically.
7499 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7500 if there are NOPs before the function label, the alignment is placed
7501 before them. So it is necessary to duplicate the alignment after the
7502 NOPs. */
7503 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7504 if (! DECL_USER_ALIGN (decl))
7505 function_alignment = MAX (function_alignment,
7506 (unsigned int) align_functions);
7507 fputs ("\t# alignment for hotpatch\n", asm_out_file);
7508 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment));
7511 if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7513 asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7514 asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7515 asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7516 asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7517 asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7518 asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7519 s390_warn_framesize);
7520 asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7521 asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7522 asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7523 asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7524 asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7525 asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7526 TARGET_PACKED_STACK);
7527 asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7528 asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7529 asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7530 asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7531 s390_warn_dynamicstack_p);
7533 ASM_OUTPUT_LABEL (asm_out_file, fname);
7534 if (hw_after > 0)
7535 asm_fprintf (asm_out_file,
7536 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7537 hw_after);
7540 /* Output machine-dependent UNSPECs occurring in address constant X
7541 in assembler syntax to stdio stream FILE. Returns true if the
7542 constant X could be recognized, false otherwise. */
7544 static bool
7545 s390_output_addr_const_extra (FILE *file, rtx x)
7547 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7548 switch (XINT (x, 1))
7550 case UNSPEC_GOTENT:
7551 output_addr_const (file, XVECEXP (x, 0, 0));
7552 fprintf (file, "@GOTENT");
7553 return true;
7554 case UNSPEC_GOT:
7555 output_addr_const (file, XVECEXP (x, 0, 0));
7556 fprintf (file, "@GOT");
7557 return true;
7558 case UNSPEC_GOTOFF:
7559 output_addr_const (file, XVECEXP (x, 0, 0));
7560 fprintf (file, "@GOTOFF");
7561 return true;
7562 case UNSPEC_PLT:
7563 output_addr_const (file, XVECEXP (x, 0, 0));
7564 fprintf (file, "@PLT");
7565 return true;
7566 case UNSPEC_PLTOFF:
7567 output_addr_const (file, XVECEXP (x, 0, 0));
7568 fprintf (file, "@PLTOFF");
7569 return true;
7570 case UNSPEC_TLSGD:
7571 output_addr_const (file, XVECEXP (x, 0, 0));
7572 fprintf (file, "@TLSGD");
7573 return true;
7574 case UNSPEC_TLSLDM:
7575 assemble_name (file, get_some_local_dynamic_name ());
7576 fprintf (file, "@TLSLDM");
7577 return true;
7578 case UNSPEC_DTPOFF:
7579 output_addr_const (file, XVECEXP (x, 0, 0));
7580 fprintf (file, "@DTPOFF");
7581 return true;
7582 case UNSPEC_NTPOFF:
7583 output_addr_const (file, XVECEXP (x, 0, 0));
7584 fprintf (file, "@NTPOFF");
7585 return true;
7586 case UNSPEC_GOTNTPOFF:
7587 output_addr_const (file, XVECEXP (x, 0, 0));
7588 fprintf (file, "@GOTNTPOFF");
7589 return true;
7590 case UNSPEC_INDNTPOFF:
7591 output_addr_const (file, XVECEXP (x, 0, 0));
7592 fprintf (file, "@INDNTPOFF");
7593 return true;
7596 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7597 switch (XINT (x, 1))
7599 case UNSPEC_POOL_OFFSET:
7600 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7601 output_addr_const (file, x);
7602 return true;
7604 return false;
7607 /* Output address operand ADDR in assembler syntax to
7608 stdio stream FILE. */
7610 void
7611 print_operand_address (FILE *file, rtx addr)
7613 struct s390_address ad;
7614 memset (&ad, 0, sizeof (s390_address));
7616 if (s390_loadrelative_operand_p (addr, NULL, NULL))
7618 if (!TARGET_Z10)
7620 output_operand_lossage ("symbolic memory references are "
7621 "only supported on z10 or later");
7622 return;
7624 output_addr_const (file, addr);
7625 return;
7628 if (!s390_decompose_address (addr, &ad)
7629 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7630 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7631 output_operand_lossage ("cannot decompose address");
7633 if (ad.disp)
7634 output_addr_const (file, ad.disp);
7635 else
7636 fprintf (file, "0");
7638 if (ad.base && ad.indx)
7639 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7640 reg_names[REGNO (ad.base)]);
7641 else if (ad.base)
7642 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7645 /* Output operand X in assembler syntax to stdio stream FILE.
7646 CODE specified the format flag. The following format flags
7647 are recognized:
7649 'C': print opcode suffix for branch condition.
7650 'D': print opcode suffix for inverse branch condition.
7651 'E': print opcode suffix for branch on index instruction.
7652 'G': print the size of the operand in bytes.
7653 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7654 'M': print the second word of a TImode operand.
7655 'N': print the second word of a DImode operand.
7656 'O': print only the displacement of a memory reference or address.
7657 'R': print only the base register of a memory reference or address.
7658 'S': print S-type memory reference (base+displacement).
7659 'Y': print address style operand without index (e.g. shift count or setmem
7660 operand).
7662 'b': print integer X as if it's an unsigned byte.
7663 'c': print integer X as if it's an signed byte.
7664 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7665 'f': "end" contiguous bitmask X in SImode.
7666 'h': print integer X as if it's a signed halfword.
7667 'i': print the first nonzero HImode part of X.
7668 'j': print the first HImode part unequal to -1 of X.
7669 'k': print the first nonzero SImode part of X.
7670 'm': print the first SImode part unequal to -1 of X.
7671 'o': print integer X as if it's an unsigned 32bit word.
7672 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7673 't': CONST_INT: "start" of contiguous bitmask X in SImode.
7674 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7675 'x': print integer X as if it's an unsigned halfword.
7676 'v': print register number as vector register (v1 instead of f1).
7679 void
7680 print_operand (FILE *file, rtx x, int code)
7682 HOST_WIDE_INT ival;
7684 switch (code)
7686 case 'C':
7687 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7688 return;
7690 case 'D':
7691 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7692 return;
7694 case 'E':
7695 if (GET_CODE (x) == LE)
7696 fprintf (file, "l");
7697 else if (GET_CODE (x) == GT)
7698 fprintf (file, "h");
7699 else
7700 output_operand_lossage ("invalid comparison operator "
7701 "for 'E' output modifier");
7702 return;
7704 case 'J':
7705 if (GET_CODE (x) == SYMBOL_REF)
7707 fprintf (file, "%s", ":tls_load:");
7708 output_addr_const (file, x);
7710 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7712 fprintf (file, "%s", ":tls_gdcall:");
7713 output_addr_const (file, XVECEXP (x, 0, 0));
7715 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7717 fprintf (file, "%s", ":tls_ldcall:");
7718 const char *name = get_some_local_dynamic_name ();
7719 gcc_assert (name);
7720 assemble_name (file, name);
7722 else
7723 output_operand_lossage ("invalid reference for 'J' output modifier");
7724 return;
7726 case 'G':
7727 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7728 return;
7730 case 'O':
7732 struct s390_address ad;
7733 int ret;
7735 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7737 if (!ret
7738 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7739 || ad.indx)
7741 output_operand_lossage ("invalid address for 'O' output modifier");
7742 return;
7745 if (ad.disp)
7746 output_addr_const (file, ad.disp);
7747 else
7748 fprintf (file, "0");
7750 return;
7752 case 'R':
7754 struct s390_address ad;
7755 int ret;
7757 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7759 if (!ret
7760 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7761 || ad.indx)
7763 output_operand_lossage ("invalid address for 'R' output modifier");
7764 return;
7767 if (ad.base)
7768 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7769 else
7770 fprintf (file, "0");
7772 return;
7774 case 'S':
7776 struct s390_address ad;
7777 int ret;
7779 if (!MEM_P (x))
7781 output_operand_lossage ("memory reference expected for "
7782 "'S' output modifier");
7783 return;
7785 ret = s390_decompose_address (XEXP (x, 0), &ad);
7787 if (!ret
7788 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7789 || ad.indx)
7791 output_operand_lossage ("invalid address for 'S' output modifier");
7792 return;
7795 if (ad.disp)
7796 output_addr_const (file, ad.disp);
7797 else
7798 fprintf (file, "0");
7800 if (ad.base)
7801 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7803 return;
7805 case 'N':
7806 if (GET_CODE (x) == REG)
7807 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7808 else if (GET_CODE (x) == MEM)
7809 x = change_address (x, VOIDmode,
7810 plus_constant (Pmode, XEXP (x, 0), 4));
7811 else
7812 output_operand_lossage ("register or memory expression expected "
7813 "for 'N' output modifier");
7814 break;
7816 case 'M':
7817 if (GET_CODE (x) == REG)
7818 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7819 else if (GET_CODE (x) == MEM)
7820 x = change_address (x, VOIDmode,
7821 plus_constant (Pmode, XEXP (x, 0), 8));
7822 else
7823 output_operand_lossage ("register or memory expression expected "
7824 "for 'M' output modifier");
7825 break;
7827 case 'Y':
7828 print_addrstyle_operand (file, x);
7829 return;
7832 switch (GET_CODE (x))
7834 case REG:
7835 /* Print FP regs as fx instead of vx when they are accessed
7836 through non-vector mode. */
7837 if (code == 'v'
7838 || VECTOR_NOFP_REG_P (x)
7839 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
7840 || (VECTOR_REG_P (x)
7841 && (GET_MODE_SIZE (GET_MODE (x)) /
7842 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
7843 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
7844 else
7845 fprintf (file, "%s", reg_names[REGNO (x)]);
7846 break;
7848 case MEM:
7849 output_address (GET_MODE (x), XEXP (x, 0));
7850 break;
7852 case CONST:
7853 case CODE_LABEL:
7854 case LABEL_REF:
7855 case SYMBOL_REF:
7856 output_addr_const (file, x);
7857 break;
7859 case CONST_INT:
7860 ival = INTVAL (x);
7861 switch (code)
7863 case 0:
7864 break;
7865 case 'b':
7866 ival &= 0xff;
7867 break;
7868 case 'c':
7869 ival = ((ival & 0xff) ^ 0x80) - 0x80;
7870 break;
7871 case 'x':
7872 ival &= 0xffff;
7873 break;
7874 case 'h':
7875 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
7876 break;
7877 case 'i':
7878 ival = s390_extract_part (x, HImode, 0);
7879 break;
7880 case 'j':
7881 ival = s390_extract_part (x, HImode, -1);
7882 break;
7883 case 'k':
7884 ival = s390_extract_part (x, SImode, 0);
7885 break;
7886 case 'm':
7887 ival = s390_extract_part (x, SImode, -1);
7888 break;
7889 case 'o':
7890 ival &= 0xffffffff;
7891 break;
7892 case 'e': case 'f':
7893 case 's': case 't':
7895 int start, end;
7896 int len;
7897 bool ok;
7899 len = (code == 's' || code == 'e' ? 64 : 32);
7900 ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
7901 gcc_assert (ok);
7902 if (code == 's' || code == 't')
7903 ival = start;
7904 else
7905 ival = end;
7907 break;
7908 default:
7909 output_operand_lossage ("invalid constant for output modifier '%c'", code);
7911 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7912 break;
7914 case CONST_WIDE_INT:
7915 if (code == 'b')
7916 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7917 CONST_WIDE_INT_ELT (x, 0) & 0xff);
7918 else if (code == 'x')
7919 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7920 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
7921 else if (code == 'h')
7922 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7923 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
7924 else
7926 if (code == 0)
7927 output_operand_lossage ("invalid constant - try using "
7928 "an output modifier");
7929 else
7930 output_operand_lossage ("invalid constant for output modifier '%c'",
7931 code);
7933 break;
7934 case CONST_VECTOR:
7935 switch (code)
7937 case 'h':
7938 gcc_assert (const_vec_duplicate_p (x));
7939 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7940 ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
7941 break;
7942 case 'e':
7943 case 's':
7945 int start, end;
7946 bool ok;
7948 ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
7949 gcc_assert (ok);
7950 ival = (code == 's') ? start : end;
7951 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7953 break;
7954 case 't':
7956 unsigned mask;
7957 bool ok = s390_bytemask_vector_p (x, &mask);
7958 gcc_assert (ok);
7959 fprintf (file, "%u", mask);
7961 break;
7963 default:
7964 output_operand_lossage ("invalid constant vector for output "
7965 "modifier '%c'", code);
7967 break;
7969 default:
7970 if (code == 0)
7971 output_operand_lossage ("invalid expression - try using "
7972 "an output modifier");
7973 else
7974 output_operand_lossage ("invalid expression for output "
7975 "modifier '%c'", code);
7976 break;
7980 /* Target hook for assembling integer objects. We need to define it
7981 here to work a round a bug in some versions of GAS, which couldn't
7982 handle values smaller than INT_MIN when printed in decimal. */
7984 static bool
7985 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
7987 if (size == 8 && aligned_p
7988 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
7990 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
7991 INTVAL (x));
7992 return true;
7994 return default_assemble_integer (x, size, aligned_p);
7997 /* Returns true if register REGNO is used for forming
7998 a memory address in expression X. */
8000 static bool
8001 reg_used_in_mem_p (int regno, rtx x)
8003 enum rtx_code code = GET_CODE (x);
8004 int i, j;
8005 const char *fmt;
8007 if (code == MEM)
8009 if (refers_to_regno_p (regno, XEXP (x, 0)))
8010 return true;
8012 else if (code == SET
8013 && GET_CODE (SET_DEST (x)) == PC)
8015 if (refers_to_regno_p (regno, SET_SRC (x)))
8016 return true;
8019 fmt = GET_RTX_FORMAT (code);
8020 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8022 if (fmt[i] == 'e'
8023 && reg_used_in_mem_p (regno, XEXP (x, i)))
8024 return true;
8026 else if (fmt[i] == 'E')
8027 for (j = 0; j < XVECLEN (x, i); j++)
8028 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
8029 return true;
8031 return false;
8034 /* Returns true if expression DEP_RTX sets an address register
8035 used by instruction INSN to address memory. */
8037 static bool
8038 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
8040 rtx target, pat;
8042 if (NONJUMP_INSN_P (dep_rtx))
8043 dep_rtx = PATTERN (dep_rtx);
8045 if (GET_CODE (dep_rtx) == SET)
8047 target = SET_DEST (dep_rtx);
8048 if (GET_CODE (target) == STRICT_LOW_PART)
8049 target = XEXP (target, 0);
8050 while (GET_CODE (target) == SUBREG)
8051 target = SUBREG_REG (target);
8053 if (GET_CODE (target) == REG)
8055 int regno = REGNO (target);
8057 if (s390_safe_attr_type (insn) == TYPE_LA)
8059 pat = PATTERN (insn);
8060 if (GET_CODE (pat) == PARALLEL)
8062 gcc_assert (XVECLEN (pat, 0) == 2);
8063 pat = XVECEXP (pat, 0, 0);
8065 gcc_assert (GET_CODE (pat) == SET);
8066 return refers_to_regno_p (regno, SET_SRC (pat));
8068 else if (get_attr_atype (insn) == ATYPE_AGEN)
8069 return reg_used_in_mem_p (regno, PATTERN (insn));
8072 return false;
8075 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
8078 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
8080 rtx dep_rtx = PATTERN (dep_insn);
8081 int i;
8083 if (GET_CODE (dep_rtx) == SET
8084 && addr_generation_dependency_p (dep_rtx, insn))
8085 return 1;
8086 else if (GET_CODE (dep_rtx) == PARALLEL)
8088 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
8090 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
8091 return 1;
8094 return 0;
8098 /* A C statement (sans semicolon) to update the integer scheduling priority
8099 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
8100 reduce the priority to execute INSN later. Do not define this macro if
8101 you do not need to adjust the scheduling priorities of insns.
8103 A STD instruction should be scheduled earlier,
8104 in order to use the bypass. */
8105 static int
8106 s390_adjust_priority (rtx_insn *insn, int priority)
8108 if (! INSN_P (insn))
8109 return priority;
8111 if (s390_tune <= PROCESSOR_2064_Z900)
8112 return priority;
8114 switch (s390_safe_attr_type (insn))
8116 case TYPE_FSTOREDF:
8117 case TYPE_FSTORESF:
8118 priority = priority << 3;
8119 break;
8120 case TYPE_STORE:
8121 case TYPE_STM:
8122 priority = priority << 1;
8123 break;
8124 default:
8125 break;
8127 return priority;
8131 /* The number of instructions that can be issued per cycle. */
8133 static int
8134 s390_issue_rate (void)
8136 switch (s390_tune)
8138 case PROCESSOR_2084_Z990:
8139 case PROCESSOR_2094_Z9_109:
8140 case PROCESSOR_2094_Z9_EC:
8141 case PROCESSOR_2817_Z196:
8142 return 3;
8143 case PROCESSOR_2097_Z10:
8144 return 2;
8145 case PROCESSOR_9672_G5:
8146 case PROCESSOR_9672_G6:
8147 case PROCESSOR_2064_Z900:
8148 /* Starting with EC12 we use the sched_reorder hook to take care
8149 of instruction dispatch constraints. The algorithm only
8150 picks the best instruction and assumes only a single
8151 instruction gets issued per cycle. */
8152 case PROCESSOR_2827_ZEC12:
8153 case PROCESSOR_2964_Z13:
8154 case PROCESSOR_3906_Z14:
8155 default:
8156 return 1;
8160 static int
8161 s390_first_cycle_multipass_dfa_lookahead (void)
8163 return 4;
8166 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
8167 Fix up MEMs as required. */
8169 static void
8170 annotate_constant_pool_refs (rtx *x)
8172 int i, j;
8173 const char *fmt;
8175 gcc_assert (GET_CODE (*x) != SYMBOL_REF
8176 || !CONSTANT_POOL_ADDRESS_P (*x));
8178 /* Literal pool references can only occur inside a MEM ... */
8179 if (GET_CODE (*x) == MEM)
8181 rtx memref = XEXP (*x, 0);
8183 if (GET_CODE (memref) == SYMBOL_REF
8184 && CONSTANT_POOL_ADDRESS_P (memref))
8186 rtx base = cfun->machine->base_reg;
8187 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
8188 UNSPEC_LTREF);
8190 *x = replace_equiv_address (*x, addr);
8191 return;
8194 if (GET_CODE (memref) == CONST
8195 && GET_CODE (XEXP (memref, 0)) == PLUS
8196 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
8197 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
8198 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
8200 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
8201 rtx sym = XEXP (XEXP (memref, 0), 0);
8202 rtx base = cfun->machine->base_reg;
8203 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8204 UNSPEC_LTREF);
8206 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
8207 return;
8211 /* ... or a load-address type pattern. */
8212 if (GET_CODE (*x) == SET)
8214 rtx addrref = SET_SRC (*x);
8216 if (GET_CODE (addrref) == SYMBOL_REF
8217 && CONSTANT_POOL_ADDRESS_P (addrref))
8219 rtx base = cfun->machine->base_reg;
8220 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
8221 UNSPEC_LTREF);
8223 SET_SRC (*x) = addr;
8224 return;
8227 if (GET_CODE (addrref) == CONST
8228 && GET_CODE (XEXP (addrref, 0)) == PLUS
8229 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
8230 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
8231 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
8233 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
8234 rtx sym = XEXP (XEXP (addrref, 0), 0);
8235 rtx base = cfun->machine->base_reg;
8236 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8237 UNSPEC_LTREF);
8239 SET_SRC (*x) = plus_constant (Pmode, addr, off);
8240 return;
8244 /* Annotate LTREL_BASE as well. */
8245 if (GET_CODE (*x) == UNSPEC
8246 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
8248 rtx base = cfun->machine->base_reg;
8249 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
8250 UNSPEC_LTREL_BASE);
8251 return;
8254 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8255 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8257 if (fmt[i] == 'e')
8259 annotate_constant_pool_refs (&XEXP (*x, i));
8261 else if (fmt[i] == 'E')
8263 for (j = 0; j < XVECLEN (*x, i); j++)
8264 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
8269 /* Split all branches that exceed the maximum distance.
8270 Returns true if this created a new literal pool entry. */
8272 static int
8273 s390_split_branches (void)
8275 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
8276 int new_literal = 0, ret;
8277 rtx_insn *insn;
8278 rtx pat, target;
8279 rtx *label;
8281 /* We need correct insn addresses. */
8283 shorten_branches (get_insns ());
8285 /* Find all branches that exceed 64KB, and split them. */
8287 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8289 if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
8290 continue;
8292 pat = PATTERN (insn);
8293 if (GET_CODE (pat) == PARALLEL)
8294 pat = XVECEXP (pat, 0, 0);
8295 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
8296 continue;
8298 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
8300 label = &SET_SRC (pat);
8302 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
8304 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
8305 label = &XEXP (SET_SRC (pat), 1);
8306 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
8307 label = &XEXP (SET_SRC (pat), 2);
8308 else
8309 continue;
8311 else
8312 continue;
8314 if (get_attr_length (insn) <= 4)
8315 continue;
8317 /* We are going to use the return register as scratch register,
8318 make sure it will be saved/restored by the prologue/epilogue. */
8319 cfun_frame_layout.save_return_addr_p = 1;
8321 if (!flag_pic)
8323 new_literal = 1;
8324 rtx mem = force_const_mem (Pmode, *label);
8325 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem),
8326 insn);
8327 INSN_ADDRESSES_NEW (set_insn, -1);
8328 annotate_constant_pool_refs (&PATTERN (set_insn));
8330 target = temp_reg;
8332 else
8334 new_literal = 1;
8335 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
8336 UNSPEC_LTREL_OFFSET);
8337 target = gen_rtx_CONST (Pmode, target);
8338 target = force_const_mem (Pmode, target);
8339 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target),
8340 insn);
8341 INSN_ADDRESSES_NEW (set_insn, -1);
8342 annotate_constant_pool_refs (&PATTERN (set_insn));
8344 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
8345 cfun->machine->base_reg),
8346 UNSPEC_LTREL_BASE);
8347 target = gen_rtx_PLUS (Pmode, temp_reg, target);
8350 ret = validate_change (insn, label, target, 0);
8351 gcc_assert (ret);
8354 return new_literal;
8358 /* Find an annotated literal pool symbol referenced in RTX X,
8359 and store it at REF. Will abort if X contains references to
8360 more than one such pool symbol; multiple references to the same
8361 symbol are allowed, however.
8363 The rtx pointed to by REF must be initialized to NULL_RTX
8364 by the caller before calling this routine. */
8366 static void
8367 find_constant_pool_ref (rtx x, rtx *ref)
8369 int i, j;
8370 const char *fmt;
8372 /* Ignore LTREL_BASE references. */
8373 if (GET_CODE (x) == UNSPEC
8374 && XINT (x, 1) == UNSPEC_LTREL_BASE)
8375 return;
8376 /* Likewise POOL_ENTRY insns. */
8377 if (GET_CODE (x) == UNSPEC_VOLATILE
8378 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
8379 return;
8381 gcc_assert (GET_CODE (x) != SYMBOL_REF
8382 || !CONSTANT_POOL_ADDRESS_P (x));
8384 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
8386 rtx sym = XVECEXP (x, 0, 0);
8387 gcc_assert (GET_CODE (sym) == SYMBOL_REF
8388 && CONSTANT_POOL_ADDRESS_P (sym));
8390 if (*ref == NULL_RTX)
8391 *ref = sym;
8392 else
8393 gcc_assert (*ref == sym);
8395 return;
8398 fmt = GET_RTX_FORMAT (GET_CODE (x));
8399 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8401 if (fmt[i] == 'e')
8403 find_constant_pool_ref (XEXP (x, i), ref);
8405 else if (fmt[i] == 'E')
8407 for (j = 0; j < XVECLEN (x, i); j++)
8408 find_constant_pool_ref (XVECEXP (x, i, j), ref);
8413 /* Replace every reference to the annotated literal pool
8414 symbol REF in X by its base plus OFFSET. */
8416 static void
8417 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
8419 int i, j;
8420 const char *fmt;
8422 gcc_assert (*x != ref);
8424 if (GET_CODE (*x) == UNSPEC
8425 && XINT (*x, 1) == UNSPEC_LTREF
8426 && XVECEXP (*x, 0, 0) == ref)
8428 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8429 return;
8432 if (GET_CODE (*x) == PLUS
8433 && GET_CODE (XEXP (*x, 1)) == CONST_INT
8434 && GET_CODE (XEXP (*x, 0)) == UNSPEC
8435 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8436 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8438 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8439 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8440 return;
8443 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8444 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8446 if (fmt[i] == 'e')
8448 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
8450 else if (fmt[i] == 'E')
8452 for (j = 0; j < XVECLEN (*x, i); j++)
8453 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
8458 /* Check whether X contains an UNSPEC_LTREL_BASE.
8459 Return its constant pool symbol if found, NULL_RTX otherwise. */
8461 static rtx
8462 find_ltrel_base (rtx x)
8464 int i, j;
8465 const char *fmt;
8467 if (GET_CODE (x) == UNSPEC
8468 && XINT (x, 1) == UNSPEC_LTREL_BASE)
8469 return XVECEXP (x, 0, 0);
8471 fmt = GET_RTX_FORMAT (GET_CODE (x));
8472 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8474 if (fmt[i] == 'e')
8476 rtx fnd = find_ltrel_base (XEXP (x, i));
8477 if (fnd)
8478 return fnd;
8480 else if (fmt[i] == 'E')
8482 for (j = 0; j < XVECLEN (x, i); j++)
8484 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
8485 if (fnd)
8486 return fnd;
8491 return NULL_RTX;
8494 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
8496 static void
8497 replace_ltrel_base (rtx *x)
8499 int i, j;
8500 const char *fmt;
8502 if (GET_CODE (*x) == UNSPEC
8503 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
8505 *x = XVECEXP (*x, 0, 1);
8506 return;
8509 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8510 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8512 if (fmt[i] == 'e')
8514 replace_ltrel_base (&XEXP (*x, i));
8516 else if (fmt[i] == 'E')
8518 for (j = 0; j < XVECLEN (*x, i); j++)
8519 replace_ltrel_base (&XVECEXP (*x, i, j));
8525 /* We keep a list of constants which we have to add to internal
8526 constant tables in the middle of large functions. */
8528 #define NR_C_MODES 32
8529 machine_mode constant_modes[NR_C_MODES] =
8531 TFmode, TImode, TDmode,
8532 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8533 V4SFmode, V2DFmode, V1TFmode,
8534 DFmode, DImode, DDmode,
8535 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8536 SFmode, SImode, SDmode,
8537 V4QImode, V2HImode, V1SImode, V1SFmode,
8538 HImode,
8539 V2QImode, V1HImode,
8540 QImode,
8541 V1QImode
8544 struct constant
8546 struct constant *next;
8547 rtx value;
8548 rtx_code_label *label;
8551 struct constant_pool
8553 struct constant_pool *next;
8554 rtx_insn *first_insn;
8555 rtx_insn *pool_insn;
8556 bitmap insns;
8557 rtx_insn *emit_pool_after;
8559 struct constant *constants[NR_C_MODES];
8560 struct constant *execute;
8561 rtx_code_label *label;
8562 int size;
8565 /* Allocate new constant_pool structure. */
8567 static struct constant_pool *
8568 s390_alloc_pool (void)
8570 struct constant_pool *pool;
8571 int i;
8573 pool = (struct constant_pool *) xmalloc (sizeof *pool);
8574 pool->next = NULL;
8575 for (i = 0; i < NR_C_MODES; i++)
8576 pool->constants[i] = NULL;
8578 pool->execute = NULL;
8579 pool->label = gen_label_rtx ();
8580 pool->first_insn = NULL;
8581 pool->pool_insn = NULL;
8582 pool->insns = BITMAP_ALLOC (NULL);
8583 pool->size = 0;
8584 pool->emit_pool_after = NULL;
8586 return pool;
8589 /* Create new constant pool covering instructions starting at INSN
8590 and chain it to the end of POOL_LIST. */
8592 static struct constant_pool *
8593 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8595 struct constant_pool *pool, **prev;
8597 pool = s390_alloc_pool ();
8598 pool->first_insn = insn;
8600 for (prev = pool_list; *prev; prev = &(*prev)->next)
8602 *prev = pool;
8604 return pool;
8607 /* End range of instructions covered by POOL at INSN and emit
8608 placeholder insn representing the pool. */
8610 static void
8611 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8613 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8615 if (!insn)
8616 insn = get_last_insn ();
8618 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8619 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8622 /* Add INSN to the list of insns covered by POOL. */
8624 static void
8625 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8627 bitmap_set_bit (pool->insns, INSN_UID (insn));
8630 /* Return pool out of POOL_LIST that covers INSN. */
8632 static struct constant_pool *
8633 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8635 struct constant_pool *pool;
8637 for (pool = pool_list; pool; pool = pool->next)
8638 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8639 break;
8641 return pool;
8644 /* Add constant VAL of mode MODE to the constant pool POOL. */
8646 static void
8647 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8649 struct constant *c;
8650 int i;
8652 for (i = 0; i < NR_C_MODES; i++)
8653 if (constant_modes[i] == mode)
8654 break;
8655 gcc_assert (i != NR_C_MODES);
8657 for (c = pool->constants[i]; c != NULL; c = c->next)
8658 if (rtx_equal_p (val, c->value))
8659 break;
8661 if (c == NULL)
8663 c = (struct constant *) xmalloc (sizeof *c);
8664 c->value = val;
8665 c->label = gen_label_rtx ();
8666 c->next = pool->constants[i];
8667 pool->constants[i] = c;
8668 pool->size += GET_MODE_SIZE (mode);
8672 /* Return an rtx that represents the offset of X from the start of
8673 pool POOL. */
8675 static rtx
8676 s390_pool_offset (struct constant_pool *pool, rtx x)
8678 rtx label;
8680 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8681 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8682 UNSPEC_POOL_OFFSET);
8683 return gen_rtx_CONST (GET_MODE (x), x);
8686 /* Find constant VAL of mode MODE in the constant pool POOL.
8687 Return an RTX describing the distance from the start of
8688 the pool to the location of the new constant. */
8690 static rtx
8691 s390_find_constant (struct constant_pool *pool, rtx val,
8692 machine_mode mode)
8694 struct constant *c;
8695 int i;
8697 for (i = 0; i < NR_C_MODES; i++)
8698 if (constant_modes[i] == mode)
8699 break;
8700 gcc_assert (i != NR_C_MODES);
8702 for (c = pool->constants[i]; c != NULL; c = c->next)
8703 if (rtx_equal_p (val, c->value))
8704 break;
8706 gcc_assert (c);
8708 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8711 /* Check whether INSN is an execute. Return the label_ref to its
8712 execute target template if so, NULL_RTX otherwise. */
8714 static rtx
8715 s390_execute_label (rtx insn)
8717 if (NONJUMP_INSN_P (insn)
8718 && GET_CODE (PATTERN (insn)) == PARALLEL
8719 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8720 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8721 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8723 return NULL_RTX;
8726 /* Add execute target for INSN to the constant pool POOL. */
8728 static void
8729 s390_add_execute (struct constant_pool *pool, rtx insn)
8731 struct constant *c;
8733 for (c = pool->execute; c != NULL; c = c->next)
8734 if (INSN_UID (insn) == INSN_UID (c->value))
8735 break;
8737 if (c == NULL)
8739 c = (struct constant *) xmalloc (sizeof *c);
8740 c->value = insn;
8741 c->label = gen_label_rtx ();
8742 c->next = pool->execute;
8743 pool->execute = c;
8744 pool->size += 6;
8748 /* Find execute target for INSN in the constant pool POOL.
8749 Return an RTX describing the distance from the start of
8750 the pool to the location of the execute target. */
8752 static rtx
8753 s390_find_execute (struct constant_pool *pool, rtx insn)
8755 struct constant *c;
8757 for (c = pool->execute; c != NULL; c = c->next)
8758 if (INSN_UID (insn) == INSN_UID (c->value))
8759 break;
8761 gcc_assert (c);
8763 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8766 /* For an execute INSN, extract the execute target template. */
8768 static rtx
8769 s390_execute_target (rtx insn)
8771 rtx pattern = PATTERN (insn);
8772 gcc_assert (s390_execute_label (insn));
8774 if (XVECLEN (pattern, 0) == 2)
8776 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8778 else
8780 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8781 int i;
8783 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8784 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8786 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8789 return pattern;
8792 /* Indicate that INSN cannot be duplicated. This is the case for
8793 execute insns that carry a unique label. */
8795 static bool
8796 s390_cannot_copy_insn_p (rtx_insn *insn)
8798 rtx label = s390_execute_label (insn);
8799 return label && label != const0_rtx;
8802 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
8803 do not emit the pool base label. */
8805 static void
8806 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8808 struct constant *c;
8809 rtx_insn *insn = pool->pool_insn;
8810 int i;
8812 /* Switch to rodata section. */
8813 if (TARGET_CPU_ZARCH)
8815 insn = emit_insn_after (gen_pool_section_start (), insn);
8816 INSN_ADDRESSES_NEW (insn, -1);
8819 /* Ensure minimum pool alignment. */
8820 if (TARGET_CPU_ZARCH)
8821 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8822 else
8823 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
8824 INSN_ADDRESSES_NEW (insn, -1);
8826 /* Emit pool base label. */
8827 if (!remote_label)
8829 insn = emit_label_after (pool->label, insn);
8830 INSN_ADDRESSES_NEW (insn, -1);
8833 /* Dump constants in descending alignment requirement order,
8834 ensuring proper alignment for every constant. */
8835 for (i = 0; i < NR_C_MODES; i++)
8836 for (c = pool->constants[i]; c; c = c->next)
8838 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
8839 rtx value = copy_rtx (c->value);
8840 if (GET_CODE (value) == CONST
8841 && GET_CODE (XEXP (value, 0)) == UNSPEC
8842 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8843 && XVECLEN (XEXP (value, 0), 0) == 1)
8844 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8846 insn = emit_label_after (c->label, insn);
8847 INSN_ADDRESSES_NEW (insn, -1);
8849 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8850 gen_rtvec (1, value),
8851 UNSPECV_POOL_ENTRY);
8852 insn = emit_insn_after (value, insn);
8853 INSN_ADDRESSES_NEW (insn, -1);
8856 /* Ensure minimum alignment for instructions. */
8857 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8858 INSN_ADDRESSES_NEW (insn, -1);
8860 /* Output in-pool execute template insns. */
8861 for (c = pool->execute; c; c = c->next)
8863 insn = emit_label_after (c->label, insn);
8864 INSN_ADDRESSES_NEW (insn, -1);
8866 insn = emit_insn_after (s390_execute_target (c->value), insn);
8867 INSN_ADDRESSES_NEW (insn, -1);
8870 /* Switch back to previous section. */
8871 if (TARGET_CPU_ZARCH)
8873 insn = emit_insn_after (gen_pool_section_end (), insn);
8874 INSN_ADDRESSES_NEW (insn, -1);
8877 insn = emit_barrier_after (insn);
8878 INSN_ADDRESSES_NEW (insn, -1);
8880 /* Remove placeholder insn. */
8881 remove_insn (pool->pool_insn);
8884 /* Free all memory used by POOL. */
8886 static void
8887 s390_free_pool (struct constant_pool *pool)
8889 struct constant *c, *next;
8890 int i;
8892 for (i = 0; i < NR_C_MODES; i++)
8893 for (c = pool->constants[i]; c; c = next)
8895 next = c->next;
8896 free (c);
8899 for (c = pool->execute; c; c = next)
8901 next = c->next;
8902 free (c);
8905 BITMAP_FREE (pool->insns);
8906 free (pool);
8910 /* Collect main literal pool. Return NULL on overflow. */
8912 static struct constant_pool *
8913 s390_mainpool_start (void)
8915 struct constant_pool *pool;
8916 rtx_insn *insn;
8918 pool = s390_alloc_pool ();
8920 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8922 if (NONJUMP_INSN_P (insn)
8923 && GET_CODE (PATTERN (insn)) == SET
8924 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8925 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8927 /* There might be two main_pool instructions if base_reg
8928 is call-clobbered; one for shrink-wrapped code and one
8929 for the rest. We want to keep the first. */
8930 if (pool->pool_insn)
8932 insn = PREV_INSN (insn);
8933 delete_insn (NEXT_INSN (insn));
8934 continue;
8936 pool->pool_insn = insn;
8939 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8941 s390_add_execute (pool, insn);
8943 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8945 rtx pool_ref = NULL_RTX;
8946 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8947 if (pool_ref)
8949 rtx constant = get_pool_constant (pool_ref);
8950 machine_mode mode = get_pool_mode (pool_ref);
8951 s390_add_constant (pool, constant, mode);
8955 /* If hot/cold partitioning is enabled we have to make sure that
8956 the literal pool is emitted in the same section where the
8957 initialization of the literal pool base pointer takes place.
8958 emit_pool_after is only used in the non-overflow case on non
8959 Z cpus where we can emit the literal pool at the end of the
8960 function body within the text section. */
8961 if (NOTE_P (insn)
8962 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8963 && !pool->emit_pool_after)
8964 pool->emit_pool_after = PREV_INSN (insn);
8967 gcc_assert (pool->pool_insn || pool->size == 0);
8969 if (pool->size >= 4096)
8971 /* We're going to chunkify the pool, so remove the main
8972 pool placeholder insn. */
8973 remove_insn (pool->pool_insn);
8975 s390_free_pool (pool);
8976 pool = NULL;
8979 /* If the functions ends with the section where the literal pool
8980 should be emitted set the marker to its end. */
8981 if (pool && !pool->emit_pool_after)
8982 pool->emit_pool_after = get_last_insn ();
8984 return pool;
8987 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8988 Modify the current function to output the pool constants as well as
8989 the pool register setup instruction. */
8991 static void
8992 s390_mainpool_finish (struct constant_pool *pool)
8994 rtx base_reg = cfun->machine->base_reg;
8996 /* If the pool is empty, we're done. */
8997 if (pool->size == 0)
8999 /* We don't actually need a base register after all. */
9000 cfun->machine->base_reg = NULL_RTX;
9002 if (pool->pool_insn)
9003 remove_insn (pool->pool_insn);
9004 s390_free_pool (pool);
9005 return;
9008 /* We need correct insn addresses. */
9009 shorten_branches (get_insns ());
9011 /* On zSeries, we use a LARL to load the pool register. The pool is
9012 located in the .rodata section, so we emit it after the function. */
9013 if (TARGET_CPU_ZARCH)
9015 rtx set = gen_main_base_64 (base_reg, pool->label);
9016 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
9017 INSN_ADDRESSES_NEW (insn, -1);
9018 remove_insn (pool->pool_insn);
9020 insn = get_last_insn ();
9021 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9022 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9024 s390_dump_pool (pool, 0);
9027 /* On S/390, if the total size of the function's code plus literal pool
9028 does not exceed 4096 bytes, we use BASR to set up a function base
9029 pointer, and emit the literal pool at the end of the function. */
9030 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
9031 + pool->size + 8 /* alignment slop */ < 4096)
9033 rtx set = gen_main_base_31_small (base_reg, pool->label);
9034 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
9035 INSN_ADDRESSES_NEW (insn, -1);
9036 remove_insn (pool->pool_insn);
9038 insn = emit_label_after (pool->label, insn);
9039 INSN_ADDRESSES_NEW (insn, -1);
9041 /* emit_pool_after will be set by s390_mainpool_start to the
9042 last insn of the section where the literal pool should be
9043 emitted. */
9044 insn = pool->emit_pool_after;
9046 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9047 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9049 s390_dump_pool (pool, 1);
9052 /* Otherwise, we emit an inline literal pool and use BASR to branch
9053 over it, setting up the pool register at the same time. */
9054 else
9056 rtx_code_label *pool_end = gen_label_rtx ();
9058 rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
9059 rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
9060 JUMP_LABEL (insn) = pool_end;
9061 INSN_ADDRESSES_NEW (insn, -1);
9062 remove_insn (pool->pool_insn);
9064 insn = emit_label_after (pool->label, insn);
9065 INSN_ADDRESSES_NEW (insn, -1);
9067 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9068 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9070 insn = emit_label_after (pool_end, pool->pool_insn);
9071 INSN_ADDRESSES_NEW (insn, -1);
9073 s390_dump_pool (pool, 1);
9077 /* Replace all literal pool references. */
9079 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
9081 if (INSN_P (insn))
9082 replace_ltrel_base (&PATTERN (insn));
9084 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9086 rtx addr, pool_ref = NULL_RTX;
9087 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9088 if (pool_ref)
9090 if (s390_execute_label (insn))
9091 addr = s390_find_execute (pool, insn);
9092 else
9093 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
9094 get_pool_mode (pool_ref));
9096 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
9097 INSN_CODE (insn) = -1;
9103 /* Free the pool. */
9104 s390_free_pool (pool);
9107 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9108 We have decided we cannot use this pool, so revert all changes
9109 to the current function that were done by s390_mainpool_start. */
9110 static void
9111 s390_mainpool_cancel (struct constant_pool *pool)
9113 /* We didn't actually change the instruction stream, so simply
9114 free the pool memory. */
9115 s390_free_pool (pool);
9119 /* Chunkify the literal pool. */
9121 #define S390_POOL_CHUNK_MIN 0xc00
9122 #define S390_POOL_CHUNK_MAX 0xe00
9124 static struct constant_pool *
9125 s390_chunkify_start (void)
9127 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
9128 int extra_size = 0;
9129 bitmap far_labels;
9130 rtx pending_ltrel = NULL_RTX;
9131 rtx_insn *insn;
9133 rtx (*gen_reload_base) (rtx, rtx) =
9134 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
9137 /* We need correct insn addresses. */
9139 shorten_branches (get_insns ());
9141 /* Scan all insns and move literals to pool chunks. */
9143 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9145 bool section_switch_p = false;
9147 /* Check for pending LTREL_BASE. */
9148 if (INSN_P (insn))
9150 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
9151 if (ltrel_base)
9153 gcc_assert (ltrel_base == pending_ltrel);
9154 pending_ltrel = NULL_RTX;
9158 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
9160 if (!curr_pool)
9161 curr_pool = s390_start_pool (&pool_list, insn);
9163 s390_add_execute (curr_pool, insn);
9164 s390_add_pool_insn (curr_pool, insn);
9166 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9168 rtx pool_ref = NULL_RTX;
9169 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9170 if (pool_ref)
9172 rtx constant = get_pool_constant (pool_ref);
9173 machine_mode mode = get_pool_mode (pool_ref);
9175 if (!curr_pool)
9176 curr_pool = s390_start_pool (&pool_list, insn);
9178 s390_add_constant (curr_pool, constant, mode);
9179 s390_add_pool_insn (curr_pool, insn);
9181 /* Don't split the pool chunk between a LTREL_OFFSET load
9182 and the corresponding LTREL_BASE. */
9183 if (GET_CODE (constant) == CONST
9184 && GET_CODE (XEXP (constant, 0)) == UNSPEC
9185 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
9187 gcc_assert (!pending_ltrel);
9188 pending_ltrel = pool_ref;
9193 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
9195 if (curr_pool)
9196 s390_add_pool_insn (curr_pool, insn);
9197 /* An LTREL_BASE must follow within the same basic block. */
9198 gcc_assert (!pending_ltrel);
9201 if (NOTE_P (insn))
9202 switch (NOTE_KIND (insn))
9204 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
9205 section_switch_p = true;
9206 break;
9207 case NOTE_INSN_VAR_LOCATION:
9208 case NOTE_INSN_CALL_ARG_LOCATION:
9209 continue;
9210 default:
9211 break;
9214 if (!curr_pool
9215 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
9216 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
9217 continue;
9219 if (TARGET_CPU_ZARCH)
9221 if (curr_pool->size < S390_POOL_CHUNK_MAX)
9222 continue;
9224 s390_end_pool (curr_pool, NULL);
9225 curr_pool = NULL;
9227 else
9229 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
9230 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
9231 + extra_size;
9233 /* We will later have to insert base register reload insns.
9234 Those will have an effect on code size, which we need to
9235 consider here. This calculation makes rather pessimistic
9236 worst-case assumptions. */
9237 if (LABEL_P (insn))
9238 extra_size += 6;
9240 if (chunk_size < S390_POOL_CHUNK_MIN
9241 && curr_pool->size < S390_POOL_CHUNK_MIN
9242 && !section_switch_p)
9243 continue;
9245 /* Pool chunks can only be inserted after BARRIERs ... */
9246 if (BARRIER_P (insn))
9248 s390_end_pool (curr_pool, insn);
9249 curr_pool = NULL;
9250 extra_size = 0;
9253 /* ... so if we don't find one in time, create one. */
9254 else if (chunk_size > S390_POOL_CHUNK_MAX
9255 || curr_pool->size > S390_POOL_CHUNK_MAX
9256 || section_switch_p)
9258 rtx_insn *label, *jump, *barrier, *next, *prev;
9260 if (!section_switch_p)
9262 /* We can insert the barrier only after a 'real' insn. */
9263 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
9264 continue;
9265 if (get_attr_length (insn) == 0)
9266 continue;
9267 /* Don't separate LTREL_BASE from the corresponding
9268 LTREL_OFFSET load. */
9269 if (pending_ltrel)
9270 continue;
9271 next = insn;
9274 insn = next;
9275 next = NEXT_INSN (insn);
9277 while (next
9278 && NOTE_P (next)
9279 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
9280 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
9282 else
9284 gcc_assert (!pending_ltrel);
9286 /* The old pool has to end before the section switch
9287 note in order to make it part of the current
9288 section. */
9289 insn = PREV_INSN (insn);
9292 label = gen_label_rtx ();
9293 prev = insn;
9294 if (prev && NOTE_P (prev))
9295 prev = prev_nonnote_insn (prev);
9296 if (prev)
9297 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
9298 INSN_LOCATION (prev));
9299 else
9300 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
9301 barrier = emit_barrier_after (jump);
9302 insn = emit_label_after (label, barrier);
9303 JUMP_LABEL (jump) = label;
9304 LABEL_NUSES (label) = 1;
9306 INSN_ADDRESSES_NEW (jump, -1);
9307 INSN_ADDRESSES_NEW (barrier, -1);
9308 INSN_ADDRESSES_NEW (insn, -1);
9310 s390_end_pool (curr_pool, barrier);
9311 curr_pool = NULL;
9312 extra_size = 0;
9317 if (curr_pool)
9318 s390_end_pool (curr_pool, NULL);
9319 gcc_assert (!pending_ltrel);
9321 /* Find all labels that are branched into
9322 from an insn belonging to a different chunk. */
9324 far_labels = BITMAP_ALLOC (NULL);
9326 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9328 rtx_jump_table_data *table;
9330 /* Labels marked with LABEL_PRESERVE_P can be target
9331 of non-local jumps, so we have to mark them.
9332 The same holds for named labels.
9334 Don't do that, however, if it is the label before
9335 a jump table. */
9337 if (LABEL_P (insn)
9338 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
9340 rtx_insn *vec_insn = NEXT_INSN (insn);
9341 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
9342 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
9344 /* Check potential targets in a table jump (casesi_jump). */
9345 else if (tablejump_p (insn, NULL, &table))
9347 rtx vec_pat = PATTERN (table);
9348 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
9350 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
9352 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
9354 if (s390_find_pool (pool_list, label)
9355 != s390_find_pool (pool_list, insn))
9356 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9359 /* If we have a direct jump (conditional or unconditional),
9360 check all potential targets. */
9361 else if (JUMP_P (insn))
9363 rtx pat = PATTERN (insn);
9365 if (GET_CODE (pat) == PARALLEL)
9366 pat = XVECEXP (pat, 0, 0);
9368 if (GET_CODE (pat) == SET)
9370 rtx label = JUMP_LABEL (insn);
9371 if (label && !ANY_RETURN_P (label))
9373 if (s390_find_pool (pool_list, label)
9374 != s390_find_pool (pool_list, insn))
9375 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9381 /* Insert base register reload insns before every pool. */
9383 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9385 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
9386 curr_pool->label);
9387 rtx_insn *insn = curr_pool->first_insn;
9388 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
9391 /* Insert base register reload insns at every far label. */
9393 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9394 if (LABEL_P (insn)
9395 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
9397 struct constant_pool *pool = s390_find_pool (pool_list, insn);
9398 if (pool)
9400 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
9401 pool->label);
9402 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9407 BITMAP_FREE (far_labels);
9410 /* Recompute insn addresses. */
9412 init_insn_lengths ();
9413 shorten_branches (get_insns ());
9415 return pool_list;
9418 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9419 After we have decided to use this list, finish implementing
9420 all changes to the current function as required. */
9422 static void
9423 s390_chunkify_finish (struct constant_pool *pool_list)
9425 struct constant_pool *curr_pool = NULL;
9426 rtx_insn *insn;
9429 /* Replace all literal pool references. */
9431 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9433 if (INSN_P (insn))
9434 replace_ltrel_base (&PATTERN (insn));
9436 curr_pool = s390_find_pool (pool_list, insn);
9437 if (!curr_pool)
9438 continue;
9440 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9442 rtx addr, pool_ref = NULL_RTX;
9443 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9444 if (pool_ref)
9446 if (s390_execute_label (insn))
9447 addr = s390_find_execute (curr_pool, insn);
9448 else
9449 addr = s390_find_constant (curr_pool,
9450 get_pool_constant (pool_ref),
9451 get_pool_mode (pool_ref));
9453 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
9454 INSN_CODE (insn) = -1;
9459 /* Dump out all literal pools. */
9461 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9462 s390_dump_pool (curr_pool, 0);
9464 /* Free pool list. */
9466 while (pool_list)
9468 struct constant_pool *next = pool_list->next;
9469 s390_free_pool (pool_list);
9470 pool_list = next;
9474 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9475 We have decided we cannot use this list, so revert all changes
9476 to the current function that were done by s390_chunkify_start. */
9478 static void
9479 s390_chunkify_cancel (struct constant_pool *pool_list)
9481 struct constant_pool *curr_pool = NULL;
9482 rtx_insn *insn;
9484 /* Remove all pool placeholder insns. */
9486 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9488 /* Did we insert an extra barrier? Remove it. */
9489 rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
9490 rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
9491 rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
9493 if (jump && JUMP_P (jump)
9494 && barrier && BARRIER_P (barrier)
9495 && label && LABEL_P (label)
9496 && GET_CODE (PATTERN (jump)) == SET
9497 && SET_DEST (PATTERN (jump)) == pc_rtx
9498 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
9499 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
9501 remove_insn (jump);
9502 remove_insn (barrier);
9503 remove_insn (label);
9506 remove_insn (curr_pool->pool_insn);
9509 /* Remove all base register reload insns. */
9511 for (insn = get_insns (); insn; )
9513 rtx_insn *next_insn = NEXT_INSN (insn);
9515 if (NONJUMP_INSN_P (insn)
9516 && GET_CODE (PATTERN (insn)) == SET
9517 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
9518 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
9519 remove_insn (insn);
9521 insn = next_insn;
9524 /* Free pool list. */
9526 while (pool_list)
9528 struct constant_pool *next = pool_list->next;
9529 s390_free_pool (pool_list);
9530 pool_list = next;
9534 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9536 void
9537 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9539 switch (GET_MODE_CLASS (mode))
9541 case MODE_FLOAT:
9542 case MODE_DECIMAL_FLOAT:
9543 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9545 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp),
9546 as_a <scalar_float_mode> (mode), align);
9547 break;
9549 case MODE_INT:
9550 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9551 mark_symbol_refs_as_used (exp);
9552 break;
9554 case MODE_VECTOR_INT:
9555 case MODE_VECTOR_FLOAT:
9557 int i;
9558 machine_mode inner_mode;
9559 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9561 inner_mode = GET_MODE_INNER (GET_MODE (exp));
9562 for (i = 0; i < XVECLEN (exp, 0); i++)
9563 s390_output_pool_entry (XVECEXP (exp, 0, i),
9564 inner_mode,
9565 i == 0
9566 ? align
9567 : GET_MODE_BITSIZE (inner_mode));
9569 break;
9571 default:
9572 gcc_unreachable ();
9577 /* Return an RTL expression representing the value of the return address
9578 for the frame COUNT steps up from the current frame. FRAME is the
9579 frame pointer of that frame. */
9582 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9584 int offset;
9585 rtx addr;
9587 /* Without backchain, we fail for all but the current frame. */
9589 if (!TARGET_BACKCHAIN && count > 0)
9590 return NULL_RTX;
9592 /* For the current frame, we need to make sure the initial
9593 value of RETURN_REGNUM is actually saved. */
9595 if (count == 0)
9597 /* On non-z architectures branch splitting could overwrite r14. */
9598 if (TARGET_CPU_ZARCH)
9599 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9600 else
9602 cfun_frame_layout.save_return_addr_p = true;
9603 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
9607 if (TARGET_PACKED_STACK)
9608 offset = -2 * UNITS_PER_LONG;
9609 else
9610 offset = RETURN_REGNUM * UNITS_PER_LONG;
9612 addr = plus_constant (Pmode, frame, offset);
9613 addr = memory_address (Pmode, addr);
9614 return gen_rtx_MEM (Pmode, addr);
9617 /* Return an RTL expression representing the back chain stored in
9618 the current stack frame. */
9621 s390_back_chain_rtx (void)
9623 rtx chain;
9625 gcc_assert (TARGET_BACKCHAIN);
9627 if (TARGET_PACKED_STACK)
9628 chain = plus_constant (Pmode, stack_pointer_rtx,
9629 STACK_POINTER_OFFSET - UNITS_PER_LONG);
9630 else
9631 chain = stack_pointer_rtx;
9633 chain = gen_rtx_MEM (Pmode, chain);
9634 return chain;
9637 /* Find first call clobbered register unused in a function.
9638 This could be used as base register in a leaf function
9639 or for holding the return address before epilogue. */
9641 static int
9642 find_unused_clobbered_reg (void)
9644 int i;
9645 for (i = 0; i < 6; i++)
9646 if (!df_regs_ever_live_p (i))
9647 return i;
9648 return 0;
9652 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9653 clobbered hard regs in SETREG. */
9655 static void
9656 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9658 char *regs_ever_clobbered = (char *)data;
9659 unsigned int i, regno;
9660 machine_mode mode = GET_MODE (setreg);
9662 if (GET_CODE (setreg) == SUBREG)
9664 rtx inner = SUBREG_REG (setreg);
9665 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9666 return;
9667 regno = subreg_regno (setreg);
9669 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9670 regno = REGNO (setreg);
9671 else
9672 return;
9674 for (i = regno;
9675 i < end_hard_regno (mode, regno);
9676 i++)
9677 regs_ever_clobbered[i] = 1;
9680 /* Walks through all basic blocks of the current function looking
9681 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9682 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9683 each of those regs. */
9685 static void
9686 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9688 basic_block cur_bb;
9689 rtx_insn *cur_insn;
9690 unsigned int i;
9692 memset (regs_ever_clobbered, 0, 32);
9694 /* For non-leaf functions we have to consider all call clobbered regs to be
9695 clobbered. */
9696 if (!crtl->is_leaf)
9698 for (i = 0; i < 32; i++)
9699 regs_ever_clobbered[i] = call_really_used_regs[i];
9702 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9703 this work is done by liveness analysis (mark_regs_live_at_end).
9704 Special care is needed for functions containing landing pads. Landing pads
9705 may use the eh registers, but the code which sets these registers is not
9706 contained in that function. Hence s390_regs_ever_clobbered is not able to
9707 deal with this automatically. */
9708 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9709 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9710 if (crtl->calls_eh_return
9711 || (cfun->machine->has_landing_pad_p
9712 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9713 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9715 /* For nonlocal gotos all call-saved registers have to be saved.
9716 This flag is also set for the unwinding code in libgcc.
9717 See expand_builtin_unwind_init. For regs_ever_live this is done by
9718 reload. */
9719 if (crtl->saves_all_registers)
9720 for (i = 0; i < 32; i++)
9721 if (!call_really_used_regs[i])
9722 regs_ever_clobbered[i] = 1;
9724 FOR_EACH_BB_FN (cur_bb, cfun)
9726 FOR_BB_INSNS (cur_bb, cur_insn)
9728 rtx pat;
9730 if (!INSN_P (cur_insn))
9731 continue;
9733 pat = PATTERN (cur_insn);
9735 /* Ignore GPR restore insns. */
9736 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9738 if (GET_CODE (pat) == SET
9739 && GENERAL_REG_P (SET_DEST (pat)))
9741 /* lgdr */
9742 if (GET_MODE (SET_SRC (pat)) == DImode
9743 && FP_REG_P (SET_SRC (pat)))
9744 continue;
9746 /* l / lg */
9747 if (GET_CODE (SET_SRC (pat)) == MEM)
9748 continue;
9751 /* lm / lmg */
9752 if (GET_CODE (pat) == PARALLEL
9753 && load_multiple_operation (pat, VOIDmode))
9754 continue;
9757 note_stores (pat,
9758 s390_reg_clobbered_rtx,
9759 regs_ever_clobbered);
9764 /* Determine the frame area which actually has to be accessed
9765 in the function epilogue. The values are stored at the
9766 given pointers AREA_BOTTOM (address of the lowest used stack
9767 address) and AREA_TOP (address of the first item which does
9768 not belong to the stack frame). */
9770 static void
9771 s390_frame_area (int *area_bottom, int *area_top)
9773 int b, t;
9775 b = INT_MAX;
9776 t = INT_MIN;
9778 if (cfun_frame_layout.first_restore_gpr != -1)
9780 b = (cfun_frame_layout.gprs_offset
9781 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9782 t = b + (cfun_frame_layout.last_restore_gpr
9783 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9786 if (TARGET_64BIT && cfun_save_high_fprs_p)
9788 b = MIN (b, cfun_frame_layout.f8_offset);
9789 t = MAX (t, (cfun_frame_layout.f8_offset
9790 + cfun_frame_layout.high_fprs * 8));
9793 if (!TARGET_64BIT)
9795 if (cfun_fpr_save_p (FPR4_REGNUM))
9797 b = MIN (b, cfun_frame_layout.f4_offset);
9798 t = MAX (t, cfun_frame_layout.f4_offset + 8);
9800 if (cfun_fpr_save_p (FPR6_REGNUM))
9802 b = MIN (b, cfun_frame_layout.f4_offset + 8);
9803 t = MAX (t, cfun_frame_layout.f4_offset + 16);
9806 *area_bottom = b;
9807 *area_top = t;
9809 /* Update gpr_save_slots in the frame layout trying to make use of
9810 FPRs as GPR save slots.
9811 This is a helper routine of s390_register_info. */
9813 static void
9814 s390_register_info_gprtofpr ()
9816 int save_reg_slot = FPR0_REGNUM;
9817 int i, j;
9819 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9820 return;
9822 /* builtin_eh_return needs to be able to modify the return address
9823 on the stack. It could also adjust the FPR save slot instead but
9824 is it worth the trouble?! */
9825 if (crtl->calls_eh_return)
9826 return;
9828 for (i = 15; i >= 6; i--)
9830 if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9831 continue;
9833 /* Advance to the next FP register which can be used as a
9834 GPR save slot. */
9835 while ((!call_really_used_regs[save_reg_slot]
9836 || df_regs_ever_live_p (save_reg_slot)
9837 || cfun_fpr_save_p (save_reg_slot))
9838 && FP_REGNO_P (save_reg_slot))
9839 save_reg_slot++;
9840 if (!FP_REGNO_P (save_reg_slot))
9842 /* We only want to use ldgr/lgdr if we can get rid of
9843 stm/lm entirely. So undo the gpr slot allocation in
9844 case we ran out of FPR save slots. */
9845 for (j = 6; j <= 15; j++)
9846 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9847 cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9848 break;
9850 cfun_gpr_save_slot (i) = save_reg_slot++;
9854 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9855 stdarg.
9856 This is a helper routine for s390_register_info. */
9858 static void
9859 s390_register_info_stdarg_fpr ()
9861 int i;
9862 int min_fpr;
9863 int max_fpr;
9865 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9866 f0-f4 for 64 bit. */
9867 if (!cfun->stdarg
9868 || !TARGET_HARD_FLOAT
9869 || !cfun->va_list_fpr_size
9870 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9871 return;
9873 min_fpr = crtl->args.info.fprs;
9874 max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9875 if (max_fpr >= FP_ARG_NUM_REG)
9876 max_fpr = FP_ARG_NUM_REG - 1;
9878 /* FPR argument regs start at f0. */
9879 min_fpr += FPR0_REGNUM;
9880 max_fpr += FPR0_REGNUM;
9882 for (i = min_fpr; i <= max_fpr; i++)
9883 cfun_set_fpr_save (i);
9886 /* Reserve the GPR save slots for GPRs which need to be saved due to
9887 stdarg.
9888 This is a helper routine for s390_register_info. */
9890 static void
9891 s390_register_info_stdarg_gpr ()
9893 int i;
9894 int min_gpr;
9895 int max_gpr;
9897 if (!cfun->stdarg
9898 || !cfun->va_list_gpr_size
9899 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9900 return;
9902 min_gpr = crtl->args.info.gprs;
9903 max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9904 if (max_gpr >= GP_ARG_NUM_REG)
9905 max_gpr = GP_ARG_NUM_REG - 1;
9907 /* GPR argument regs start at r2. */
9908 min_gpr += GPR2_REGNUM;
9909 max_gpr += GPR2_REGNUM;
9911 /* If r6 was supposed to be saved into an FPR and now needs to go to
9912 the stack for vararg we have to adjust the restore range to make
9913 sure that the restore is done from stack as well. */
9914 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9915 && min_gpr <= GPR6_REGNUM
9916 && max_gpr >= GPR6_REGNUM)
9918 if (cfun_frame_layout.first_restore_gpr == -1
9919 || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9920 cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9921 if (cfun_frame_layout.last_restore_gpr == -1
9922 || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9923 cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9926 if (cfun_frame_layout.first_save_gpr == -1
9927 || cfun_frame_layout.first_save_gpr > min_gpr)
9928 cfun_frame_layout.first_save_gpr = min_gpr;
9930 if (cfun_frame_layout.last_save_gpr == -1
9931 || cfun_frame_layout.last_save_gpr < max_gpr)
9932 cfun_frame_layout.last_save_gpr = max_gpr;
9934 for (i = min_gpr; i <= max_gpr; i++)
9935 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9938 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9939 prologue and epilogue. */
9941 static void
9942 s390_register_info_set_ranges ()
9944 int i, j;
9946 /* Find the first and the last save slot supposed to use the stack
9947 to set the restore range.
9948 Vararg regs might be marked as save to stack but only the
9949 call-saved regs really need restoring (i.e. r6). This code
9950 assumes that the vararg regs have not yet been recorded in
9951 cfun_gpr_save_slot. */
9952 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
9953 for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
9954 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9955 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9956 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9957 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9960 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9961 for registers which need to be saved in function prologue.
9962 This function can be used until the insns emitted for save/restore
9963 of the regs are visible in the RTL stream. */
9965 static void
9966 s390_register_info ()
9968 int i;
9969 char clobbered_regs[32];
9971 gcc_assert (!epilogue_completed);
9973 if (reload_completed)
9974 /* After reload we rely on our own routine to determine which
9975 registers need saving. */
9976 s390_regs_ever_clobbered (clobbered_regs);
9977 else
9978 /* During reload we use regs_ever_live as a base since reload
9979 does changes in there which we otherwise would not be aware
9980 of. */
9981 for (i = 0; i < 32; i++)
9982 clobbered_regs[i] = df_regs_ever_live_p (i);
9984 for (i = 0; i < 32; i++)
9985 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9987 /* Mark the call-saved FPRs which need to be saved.
9988 This needs to be done before checking the special GPRs since the
9989 stack pointer usage depends on whether high FPRs have to be saved
9990 or not. */
9991 cfun_frame_layout.fpr_bitmap = 0;
9992 cfun_frame_layout.high_fprs = 0;
9993 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9994 if (clobbered_regs[i] && !call_really_used_regs[i])
9996 cfun_set_fpr_save (i);
9997 if (i >= FPR8_REGNUM)
9998 cfun_frame_layout.high_fprs++;
10001 /* Register 12 is used for GOT address, but also as temp in prologue
10002 for split-stack stdarg functions (unless r14 is available). */
10003 clobbered_regs[12]
10004 |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
10005 || (flag_split_stack && cfun->stdarg
10006 && (crtl->is_leaf || TARGET_TPF_PROFILING
10007 || has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
10009 clobbered_regs[BASE_REGNUM]
10010 |= (cfun->machine->base_reg
10011 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
10013 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
10014 |= !!frame_pointer_needed;
10016 /* On pre z900 machines this might take until machine dependent
10017 reorg to decide.
10018 save_return_addr_p will only be set on non-zarch machines so
10019 there is no risk that r14 goes into an FPR instead of a stack
10020 slot. */
10021 clobbered_regs[RETURN_REGNUM]
10022 |= (!crtl->is_leaf
10023 || TARGET_TPF_PROFILING
10024 || cfun->machine->split_branches_pending_p
10025 || cfun_frame_layout.save_return_addr_p
10026 || crtl->calls_eh_return);
10028 clobbered_regs[STACK_POINTER_REGNUM]
10029 |= (!crtl->is_leaf
10030 || TARGET_TPF_PROFILING
10031 || cfun_save_high_fprs_p
10032 || get_frame_size () > 0
10033 || (reload_completed && cfun_frame_layout.frame_size > 0)
10034 || cfun->calls_alloca);
10036 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
10038 for (i = 6; i < 16; i++)
10039 if (clobbered_regs[i])
10040 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
10042 s390_register_info_stdarg_fpr ();
10043 s390_register_info_gprtofpr ();
10044 s390_register_info_set_ranges ();
10045 /* stdarg functions might need to save GPRs 2 to 6. This might
10046 override the GPR->FPR save decision made by
10047 s390_register_info_gprtofpr for r6 since vararg regs must go to
10048 the stack. */
10049 s390_register_info_stdarg_gpr ();
10052 /* This function is called by s390_optimize_prologue in order to get
10053 rid of unnecessary GPR save/restore instructions. The register info
10054 for the GPRs is re-computed and the ranges are re-calculated. */
10056 static void
10057 s390_optimize_register_info ()
10059 char clobbered_regs[32];
10060 int i;
10062 gcc_assert (epilogue_completed);
10063 gcc_assert (!cfun->machine->split_branches_pending_p);
10065 s390_regs_ever_clobbered (clobbered_regs);
10067 for (i = 0; i < 32; i++)
10068 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
10070 /* There is still special treatment needed for cases invisible to
10071 s390_regs_ever_clobbered. */
10072 clobbered_regs[RETURN_REGNUM]
10073 |= (TARGET_TPF_PROFILING
10074 /* When expanding builtin_return_addr in ESA mode we do not
10075 know whether r14 will later be needed as scratch reg when
10076 doing branch splitting. So the builtin always accesses the
10077 r14 save slot and we need to stick to the save/restore
10078 decision for r14 even if it turns out that it didn't get
10079 clobbered. */
10080 || cfun_frame_layout.save_return_addr_p
10081 || crtl->calls_eh_return);
10083 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
10085 for (i = 6; i < 16; i++)
10086 if (!clobbered_regs[i])
10087 cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
10089 s390_register_info_set_ranges ();
10090 s390_register_info_stdarg_gpr ();
10093 /* Fill cfun->machine with info about frame of current function. */
10095 static void
10096 s390_frame_info (void)
10098 HOST_WIDE_INT lowest_offset;
10100 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
10101 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
10103 /* The va_arg builtin uses a constant distance of 16 *
10104 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
10105 pointer. So even if we are going to save the stack pointer in an
10106 FPR we need the stack space in order to keep the offsets
10107 correct. */
10108 if (cfun->stdarg && cfun_save_arg_fprs_p)
10110 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
10112 if (cfun_frame_layout.first_save_gpr_slot == -1)
10113 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
10116 cfun_frame_layout.frame_size = get_frame_size ();
10117 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
10118 fatal_error (input_location,
10119 "total size of local variables exceeds architecture limit");
10121 if (!TARGET_PACKED_STACK)
10123 /* Fixed stack layout. */
10124 cfun_frame_layout.backchain_offset = 0;
10125 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
10126 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
10127 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
10128 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
10129 * UNITS_PER_LONG);
10131 else if (TARGET_BACKCHAIN)
10133 /* Kernel stack layout - packed stack, backchain, no float */
10134 gcc_assert (TARGET_SOFT_FLOAT);
10135 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
10136 - UNITS_PER_LONG);
10138 /* The distance between the backchain and the return address
10139 save slot must not change. So we always need a slot for the
10140 stack pointer which resides in between. */
10141 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
10143 cfun_frame_layout.gprs_offset
10144 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
10146 /* FPRs will not be saved. Nevertheless pick sane values to
10147 keep area calculations valid. */
10148 cfun_frame_layout.f0_offset =
10149 cfun_frame_layout.f4_offset =
10150 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
10152 else
10154 int num_fprs;
10156 /* Packed stack layout without backchain. */
10158 /* With stdarg FPRs need their dedicated slots. */
10159 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
10160 : (cfun_fpr_save_p (FPR4_REGNUM) +
10161 cfun_fpr_save_p (FPR6_REGNUM)));
10162 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
10164 num_fprs = (cfun->stdarg ? 2
10165 : (cfun_fpr_save_p (FPR0_REGNUM)
10166 + cfun_fpr_save_p (FPR2_REGNUM)));
10167 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
10169 cfun_frame_layout.gprs_offset
10170 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
10172 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
10173 - cfun_frame_layout.high_fprs * 8);
10176 if (cfun_save_high_fprs_p)
10177 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
10179 if (!crtl->is_leaf)
10180 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
10182 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
10183 sized area at the bottom of the stack. This is required also for
10184 leaf functions. When GCC generates a local stack reference it
10185 will always add STACK_POINTER_OFFSET to all these references. */
10186 if (crtl->is_leaf
10187 && !TARGET_TPF_PROFILING
10188 && cfun_frame_layout.frame_size == 0
10189 && !cfun->calls_alloca)
10190 return;
10192 /* Calculate the number of bytes we have used in our own register
10193 save area. With the packed stack layout we can re-use the
10194 remaining bytes for normal stack elements. */
10196 if (TARGET_PACKED_STACK)
10197 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
10198 cfun_frame_layout.f4_offset),
10199 cfun_frame_layout.gprs_offset);
10200 else
10201 lowest_offset = 0;
10203 if (TARGET_BACKCHAIN)
10204 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
10206 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
10208 /* If under 31 bit an odd number of gprs has to be saved we have to
10209 adjust the frame size to sustain 8 byte alignment of stack
10210 frames. */
10211 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
10212 STACK_BOUNDARY / BITS_PER_UNIT - 1)
10213 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
10216 /* Generate frame layout. Fills in register and frame data for the current
10217 function in cfun->machine. This routine can be called multiple times;
10218 it will re-do the complete frame layout every time. */
10220 static void
10221 s390_init_frame_layout (void)
10223 HOST_WIDE_INT frame_size;
10224 int base_used;
10226 /* After LRA the frame layout is supposed to be read-only and should
10227 not be re-computed. */
10228 if (reload_completed)
10229 return;
10231 /* On S/390 machines, we may need to perform branch splitting, which
10232 will require both base and return address register. We have no
10233 choice but to assume we're going to need them until right at the
10234 end of the machine dependent reorg phase. */
10235 if (!TARGET_CPU_ZARCH)
10236 cfun->machine->split_branches_pending_p = true;
10240 frame_size = cfun_frame_layout.frame_size;
10242 /* Try to predict whether we'll need the base register. */
10243 base_used = cfun->machine->split_branches_pending_p
10244 || crtl->uses_const_pool
10245 || (!DISP_IN_RANGE (frame_size)
10246 && !CONST_OK_FOR_K (frame_size));
10248 /* Decide which register to use as literal pool base. In small
10249 leaf functions, try to use an unused call-clobbered register
10250 as base register to avoid save/restore overhead. */
10251 if (!base_used)
10252 cfun->machine->base_reg = NULL_RTX;
10253 else
10255 int br = 0;
10257 if (crtl->is_leaf)
10258 /* Prefer r5 (most likely to be free). */
10259 for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
10261 cfun->machine->base_reg =
10262 gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
10265 s390_register_info ();
10266 s390_frame_info ();
10268 while (frame_size != cfun_frame_layout.frame_size);
10271 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
10272 the TX is nonescaping. A transaction is considered escaping if
10273 there is at least one path from tbegin returning CC0 to the
10274 function exit block without an tend.
10276 The check so far has some limitations:
10277 - only single tbegin/tend BBs are supported
10278 - the first cond jump after tbegin must separate the CC0 path from ~CC0
10279 - when CC is copied to a GPR and the CC0 check is done with the GPR
10280 this is not supported
10283 static void
10284 s390_optimize_nonescaping_tx (void)
10286 const unsigned int CC0 = 1 << 3;
10287 basic_block tbegin_bb = NULL;
10288 basic_block tend_bb = NULL;
10289 basic_block bb;
10290 rtx_insn *insn;
10291 bool result = true;
10292 int bb_index;
10293 rtx_insn *tbegin_insn = NULL;
10295 if (!cfun->machine->tbegin_p)
10296 return;
10298 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
10300 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
10302 if (!bb)
10303 continue;
10305 FOR_BB_INSNS (bb, insn)
10307 rtx ite, cc, pat, target;
10308 unsigned HOST_WIDE_INT mask;
10310 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
10311 continue;
10313 pat = PATTERN (insn);
10315 if (GET_CODE (pat) == PARALLEL)
10316 pat = XVECEXP (pat, 0, 0);
10318 if (GET_CODE (pat) != SET
10319 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
10320 continue;
10322 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
10324 rtx_insn *tmp;
10326 tbegin_insn = insn;
10328 /* Just return if the tbegin doesn't have clobbers. */
10329 if (GET_CODE (PATTERN (insn)) != PARALLEL)
10330 return;
10332 if (tbegin_bb != NULL)
10333 return;
10335 /* Find the next conditional jump. */
10336 for (tmp = NEXT_INSN (insn);
10337 tmp != NULL_RTX;
10338 tmp = NEXT_INSN (tmp))
10340 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
10341 return;
10342 if (!JUMP_P (tmp))
10343 continue;
10345 ite = SET_SRC (PATTERN (tmp));
10346 if (GET_CODE (ite) != IF_THEN_ELSE)
10347 continue;
10349 cc = XEXP (XEXP (ite, 0), 0);
10350 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
10351 || GET_MODE (cc) != CCRAWmode
10352 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
10353 return;
10355 if (bb->succs->length () != 2)
10356 return;
10358 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
10359 if (GET_CODE (XEXP (ite, 0)) == NE)
10360 mask ^= 0xf;
10362 if (mask == CC0)
10363 target = XEXP (ite, 1);
10364 else if (mask == (CC0 ^ 0xf))
10365 target = XEXP (ite, 2);
10366 else
10367 return;
10370 edge_iterator ei;
10371 edge e1, e2;
10373 ei = ei_start (bb->succs);
10374 e1 = ei_safe_edge (ei);
10375 ei_next (&ei);
10376 e2 = ei_safe_edge (ei);
10378 if (e2->flags & EDGE_FALLTHRU)
10380 e2 = e1;
10381 e1 = ei_safe_edge (ei);
10384 if (!(e1->flags & EDGE_FALLTHRU))
10385 return;
10387 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
10389 if (tmp == BB_END (bb))
10390 break;
10394 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
10396 if (tend_bb != NULL)
10397 return;
10398 tend_bb = bb;
10403 /* Either we successfully remove the FPR clobbers here or we are not
10404 able to do anything for this TX. Both cases don't qualify for
10405 another look. */
10406 cfun->machine->tbegin_p = false;
10408 if (tbegin_bb == NULL || tend_bb == NULL)
10409 return;
10411 calculate_dominance_info (CDI_POST_DOMINATORS);
10412 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
10413 free_dominance_info (CDI_POST_DOMINATORS);
10415 if (!result)
10416 return;
10418 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10419 gen_rtvec (2,
10420 XVECEXP (PATTERN (tbegin_insn), 0, 0),
10421 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10422 INSN_CODE (tbegin_insn) = -1;
10423 df_insn_rescan (tbegin_insn);
10425 return;
10428 /* Implement TARGET_HARD_REGNO_NREGS. Because all registers in a class
10429 have the same size, this is equivalent to CLASS_MAX_NREGS. */
10431 static unsigned int
10432 s390_hard_regno_nregs (unsigned int regno, machine_mode mode)
10434 return s390_class_max_nregs (REGNO_REG_CLASS (regno), mode);
10437 /* Implement TARGET_HARD_REGNO_MODE_OK.
10439 Integer modes <= word size fit into any GPR.
10440 Integer modes > word size fit into successive GPRs, starting with
10441 an even-numbered register.
10442 SImode and DImode fit into FPRs as well.
10444 Floating point modes <= word size fit into any FPR or GPR.
10445 Floating point modes > word size (i.e. DFmode on 32-bit) fit
10446 into any FPR, or an even-odd GPR pair.
10447 TFmode fits only into an even-odd FPR pair.
10449 Complex floating point modes fit either into two FPRs, or into
10450 successive GPRs (again starting with an even number).
10451 TCmode fits only into two successive even-odd FPR pairs.
10453 Condition code modes fit only into the CC register. */
10455 static bool
10456 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10458 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10459 return false;
10461 switch (REGNO_REG_CLASS (regno))
10463 case VEC_REGS:
10464 return ((GET_MODE_CLASS (mode) == MODE_INT
10465 && s390_class_max_nregs (VEC_REGS, mode) == 1)
10466 || mode == DFmode
10467 || (TARGET_VXE && mode == SFmode)
10468 || s390_vector_mode_supported_p (mode));
10469 break;
10470 case FP_REGS:
10471 if (TARGET_VX
10472 && ((GET_MODE_CLASS (mode) == MODE_INT
10473 && s390_class_max_nregs (FP_REGS, mode) == 1)
10474 || mode == DFmode
10475 || s390_vector_mode_supported_p (mode)))
10476 return true;
10478 if (REGNO_PAIR_OK (regno, mode))
10480 if (mode == SImode || mode == DImode)
10481 return true;
10483 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10484 return true;
10486 break;
10487 case ADDR_REGS:
10488 if (FRAME_REGNO_P (regno) && mode == Pmode)
10489 return true;
10491 /* fallthrough */
10492 case GENERAL_REGS:
10493 if (REGNO_PAIR_OK (regno, mode))
10495 if (TARGET_ZARCH
10496 || (mode != TFmode && mode != TCmode && mode != TDmode))
10497 return true;
10499 break;
10500 case CC_REGS:
10501 if (GET_MODE_CLASS (mode) == MODE_CC)
10502 return true;
10503 break;
10504 case ACCESS_REGS:
10505 if (REGNO_PAIR_OK (regno, mode))
10507 if (mode == SImode || mode == Pmode)
10508 return true;
10510 break;
10511 default:
10512 return false;
10515 return false;
10518 /* Implement TARGET_MODES_TIEABLE_P. */
10520 static bool
10521 s390_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10523 return ((mode1 == SFmode || mode1 == DFmode)
10524 == (mode2 == SFmode || mode2 == DFmode));
10527 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10529 bool
10530 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10532 /* Once we've decided upon a register to use as base register, it must
10533 no longer be used for any other purpose. */
10534 if (cfun->machine->base_reg)
10535 if (REGNO (cfun->machine->base_reg) == old_reg
10536 || REGNO (cfun->machine->base_reg) == new_reg)
10537 return false;
10539 /* Prevent regrename from using call-saved regs which haven't
10540 actually been saved. This is necessary since regrename assumes
10541 the backend save/restore decisions are based on
10542 df_regs_ever_live. Since we have our own routine we have to tell
10543 regrename manually about it. */
10544 if (GENERAL_REGNO_P (new_reg)
10545 && !call_really_used_regs[new_reg]
10546 && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10547 return false;
10549 return true;
10552 /* Return nonzero if register REGNO can be used as a scratch register
10553 in peephole2. */
10555 static bool
10556 s390_hard_regno_scratch_ok (unsigned int regno)
10558 /* See s390_hard_regno_rename_ok. */
10559 if (GENERAL_REGNO_P (regno)
10560 && !call_really_used_regs[regno]
10561 && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10562 return false;
10564 return true;
10567 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. When generating
10568 code that runs in z/Architecture mode, but conforms to the 31-bit
10569 ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
10570 bytes are saved across calls, however. */
10572 static bool
10573 s390_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
10575 if (!TARGET_64BIT
10576 && TARGET_ZARCH
10577 && GET_MODE_SIZE (mode) > 4
10578 && ((regno >= 6 && regno <= 15) || regno == 32))
10579 return true;
10581 if (TARGET_VX
10582 && GET_MODE_SIZE (mode) > 8
10583 && (((TARGET_64BIT && regno >= 24 && regno <= 31))
10584 || (!TARGET_64BIT && (regno == 18 || regno == 19))))
10585 return true;
10587 return false;
10590 /* Maximum number of registers to represent a value of mode MODE
10591 in a register of class RCLASS. */
10594 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10596 int reg_size;
10597 bool reg_pair_required_p = false;
10599 switch (rclass)
10601 case FP_REGS:
10602 case VEC_REGS:
10603 reg_size = TARGET_VX ? 16 : 8;
10605 /* TF and TD modes would fit into a VR but we put them into a
10606 register pair since we do not have 128bit FP instructions on
10607 full VRs. */
10608 if (TARGET_VX
10609 && SCALAR_FLOAT_MODE_P (mode)
10610 && GET_MODE_SIZE (mode) >= 16)
10611 reg_pair_required_p = true;
10613 /* Even if complex types would fit into a single FPR/VR we force
10614 them into a register pair to deal with the parts more easily.
10615 (FIXME: What about complex ints?) */
10616 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10617 reg_pair_required_p = true;
10618 break;
10619 case ACCESS_REGS:
10620 reg_size = 4;
10621 break;
10622 default:
10623 reg_size = UNITS_PER_WORD;
10624 break;
10627 if (reg_pair_required_p)
10628 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10630 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10633 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10635 static bool
10636 s390_can_change_mode_class (machine_mode from_mode,
10637 machine_mode to_mode,
10638 reg_class_t rclass)
10640 machine_mode small_mode;
10641 machine_mode big_mode;
10643 /* V1TF and TF have different representations in vector
10644 registers. */
10645 if (reg_classes_intersect_p (VEC_REGS, rclass)
10646 && ((from_mode == V1TFmode && to_mode == TFmode)
10647 || (from_mode == TFmode && to_mode == V1TFmode)))
10648 return false;
10650 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10651 return true;
10653 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10655 small_mode = from_mode;
10656 big_mode = to_mode;
10658 else
10660 small_mode = to_mode;
10661 big_mode = from_mode;
10664 /* Values residing in VRs are little-endian style. All modes are
10665 placed left-aligned in an VR. This means that we cannot allow
10666 switching between modes with differing sizes. Also if the vector
10667 facility is available we still place TFmode values in VR register
10668 pairs, since the only instructions we have operating on TFmodes
10669 only deal with register pairs. Therefore we have to allow DFmode
10670 subregs of TFmodes to enable the TFmode splitters. */
10671 if (reg_classes_intersect_p (VEC_REGS, rclass)
10672 && (GET_MODE_SIZE (small_mode) < 8
10673 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10674 return false;
10676 /* Likewise for access registers, since they have only half the
10677 word size on 64-bit. */
10678 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10679 return false;
10681 return true;
10684 /* Return true if we use LRA instead of reload pass. */
10685 static bool
10686 s390_lra_p (void)
10688 return s390_lra_flag;
10691 /* Return true if register FROM can be eliminated via register TO. */
10693 static bool
10694 s390_can_eliminate (const int from, const int to)
10696 /* On zSeries machines, we have not marked the base register as fixed.
10697 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10698 If a function requires the base register, we say here that this
10699 elimination cannot be performed. This will cause reload to free
10700 up the base register (as if it were fixed). On the other hand,
10701 if the current function does *not* require the base register, we
10702 say here the elimination succeeds, which in turn allows reload
10703 to allocate the base register for any other purpose. */
10704 if (from == BASE_REGNUM && to == BASE_REGNUM)
10706 if (TARGET_CPU_ZARCH)
10708 s390_init_frame_layout ();
10709 return cfun->machine->base_reg == NULL_RTX;
10712 return false;
10715 /* Everything else must point into the stack frame. */
10716 gcc_assert (to == STACK_POINTER_REGNUM
10717 || to == HARD_FRAME_POINTER_REGNUM);
10719 gcc_assert (from == FRAME_POINTER_REGNUM
10720 || from == ARG_POINTER_REGNUM
10721 || from == RETURN_ADDRESS_POINTER_REGNUM);
10723 /* Make sure we actually saved the return address. */
10724 if (from == RETURN_ADDRESS_POINTER_REGNUM)
10725 if (!crtl->calls_eh_return
10726 && !cfun->stdarg
10727 && !cfun_frame_layout.save_return_addr_p)
10728 return false;
10730 return true;
10733 /* Return offset between register FROM and TO initially after prolog. */
10735 HOST_WIDE_INT
10736 s390_initial_elimination_offset (int from, int to)
10738 HOST_WIDE_INT offset;
10740 /* ??? Why are we called for non-eliminable pairs? */
10741 if (!s390_can_eliminate (from, to))
10742 return 0;
10744 switch (from)
10746 case FRAME_POINTER_REGNUM:
10747 offset = (get_frame_size()
10748 + STACK_POINTER_OFFSET
10749 + crtl->outgoing_args_size);
10750 break;
10752 case ARG_POINTER_REGNUM:
10753 s390_init_frame_layout ();
10754 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10755 break;
10757 case RETURN_ADDRESS_POINTER_REGNUM:
10758 s390_init_frame_layout ();
10760 if (cfun_frame_layout.first_save_gpr_slot == -1)
10762 /* If it turns out that for stdarg nothing went into the reg
10763 save area we also do not need the return address
10764 pointer. */
10765 if (cfun->stdarg && !cfun_save_arg_fprs_p)
10766 return 0;
10768 gcc_unreachable ();
10771 /* In order to make the following work it is not necessary for
10772 r14 to have a save slot. It is sufficient if one other GPR
10773 got one. Since the GPRs are always stored without gaps we
10774 are able to calculate where the r14 save slot would
10775 reside. */
10776 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10777 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10778 UNITS_PER_LONG);
10779 break;
10781 case BASE_REGNUM:
10782 offset = 0;
10783 break;
10785 default:
10786 gcc_unreachable ();
10789 return offset;
10792 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10793 to register BASE. Return generated insn. */
10795 static rtx
10796 save_fpr (rtx base, int offset, int regnum)
10798 rtx addr;
10799 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10801 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10802 set_mem_alias_set (addr, get_varargs_alias_set ());
10803 else
10804 set_mem_alias_set (addr, get_frame_alias_set ());
10806 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10809 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10810 to register BASE. Return generated insn. */
10812 static rtx
10813 restore_fpr (rtx base, int offset, int regnum)
10815 rtx addr;
10816 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10817 set_mem_alias_set (addr, get_frame_alias_set ());
10819 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10822 /* Return true if REGNO is a global register, but not one
10823 of the special ones that need to be saved/restored in anyway. */
10825 static inline bool
10826 global_not_special_regno_p (int regno)
10828 return (global_regs[regno]
10829 /* These registers are special and need to be
10830 restored in any case. */
10831 && !(regno == STACK_POINTER_REGNUM
10832 || regno == RETURN_REGNUM
10833 || regno == BASE_REGNUM
10834 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
10837 /* Generate insn to save registers FIRST to LAST into
10838 the register save area located at offset OFFSET
10839 relative to register BASE. */
10841 static rtx
10842 save_gprs (rtx base, int offset, int first, int last)
10844 rtx addr, insn, note;
10845 int i;
10847 addr = plus_constant (Pmode, base, offset);
10848 addr = gen_rtx_MEM (Pmode, addr);
10850 set_mem_alias_set (addr, get_frame_alias_set ());
10852 /* Special-case single register. */
10853 if (first == last)
10855 if (TARGET_64BIT)
10856 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10857 else
10858 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10860 if (!global_not_special_regno_p (first))
10861 RTX_FRAME_RELATED_P (insn) = 1;
10862 return insn;
10866 insn = gen_store_multiple (addr,
10867 gen_rtx_REG (Pmode, first),
10868 GEN_INT (last - first + 1));
10870 if (first <= 6 && cfun->stdarg)
10871 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10873 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10875 if (first + i <= 6)
10876 set_mem_alias_set (mem, get_varargs_alias_set ());
10879 /* We need to set the FRAME_RELATED flag on all SETs
10880 inside the store-multiple pattern.
10882 However, we must not emit DWARF records for registers 2..5
10883 if they are stored for use by variable arguments ...
10885 ??? Unfortunately, it is not enough to simply not the
10886 FRAME_RELATED flags for those SETs, because the first SET
10887 of the PARALLEL is always treated as if it had the flag
10888 set, even if it does not. Therefore we emit a new pattern
10889 without those registers as REG_FRAME_RELATED_EXPR note. */
10891 if (first >= 6 && !global_not_special_regno_p (first))
10893 rtx pat = PATTERN (insn);
10895 for (i = 0; i < XVECLEN (pat, 0); i++)
10896 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10897 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10898 0, i)))))
10899 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10901 RTX_FRAME_RELATED_P (insn) = 1;
10903 else if (last >= 6)
10905 int start;
10907 for (start = first >= 6 ? first : 6; start <= last; start++)
10908 if (!global_not_special_regno_p (start))
10909 break;
10911 if (start > last)
10912 return insn;
10914 addr = plus_constant (Pmode, base,
10915 offset + (start - first) * UNITS_PER_LONG);
10917 if (start == last)
10919 if (TARGET_64BIT)
10920 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10921 gen_rtx_REG (Pmode, start));
10922 else
10923 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10924 gen_rtx_REG (Pmode, start));
10925 note = PATTERN (note);
10927 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10928 RTX_FRAME_RELATED_P (insn) = 1;
10930 return insn;
10933 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10934 gen_rtx_REG (Pmode, start),
10935 GEN_INT (last - start + 1));
10936 note = PATTERN (note);
10938 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10940 for (i = 0; i < XVECLEN (note, 0); i++)
10941 if (GET_CODE (XVECEXP (note, 0, i)) == SET
10942 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10943 0, i)))))
10944 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10946 RTX_FRAME_RELATED_P (insn) = 1;
10949 return insn;
10952 /* Generate insn to restore registers FIRST to LAST from
10953 the register save area located at offset OFFSET
10954 relative to register BASE. */
10956 static rtx
10957 restore_gprs (rtx base, int offset, int first, int last)
10959 rtx addr, insn;
10961 addr = plus_constant (Pmode, base, offset);
10962 addr = gen_rtx_MEM (Pmode, addr);
10963 set_mem_alias_set (addr, get_frame_alias_set ());
10965 /* Special-case single register. */
10966 if (first == last)
10968 if (TARGET_64BIT)
10969 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10970 else
10971 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10973 RTX_FRAME_RELATED_P (insn) = 1;
10974 return insn;
10977 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10978 addr,
10979 GEN_INT (last - first + 1));
10980 RTX_FRAME_RELATED_P (insn) = 1;
10981 return insn;
10984 /* Return insn sequence to load the GOT register. */
10986 rtx_insn *
10987 s390_load_got (void)
10989 rtx_insn *insns;
10991 /* We cannot use pic_offset_table_rtx here since we use this
10992 function also for non-pic if __tls_get_offset is called and in
10993 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10994 aren't usable. */
10995 rtx got_rtx = gen_rtx_REG (Pmode, 12);
10997 start_sequence ();
10999 if (TARGET_CPU_ZARCH)
11001 emit_move_insn (got_rtx, s390_got_symbol ());
11003 else
11005 rtx offset;
11007 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, s390_got_symbol ()),
11008 UNSPEC_LTREL_OFFSET);
11009 offset = gen_rtx_CONST (Pmode, offset);
11010 offset = force_const_mem (Pmode, offset);
11012 emit_move_insn (got_rtx, offset);
11014 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
11015 UNSPEC_LTREL_BASE);
11016 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
11018 emit_move_insn (got_rtx, offset);
11021 insns = get_insns ();
11022 end_sequence ();
11023 return insns;
11026 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
11027 and the change to the stack pointer. */
11029 static void
11030 s390_emit_stack_tie (void)
11032 rtx mem = gen_frame_mem (BLKmode,
11033 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
11035 emit_insn (gen_stack_tie (mem));
11038 /* Copy GPRS into FPR save slots. */
11040 static void
11041 s390_save_gprs_to_fprs (void)
11043 int i;
11045 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
11046 return;
11048 for (i = 6; i < 16; i++)
11050 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
11052 rtx_insn *insn =
11053 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
11054 gen_rtx_REG (DImode, i));
11055 RTX_FRAME_RELATED_P (insn) = 1;
11056 /* This prevents dwarf2cfi from interpreting the set. Doing
11057 so it might emit def_cfa_register infos setting an FPR as
11058 new CFA. */
11059 add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
11064 /* Restore GPRs from FPR save slots. */
11066 static void
11067 s390_restore_gprs_from_fprs (void)
11069 int i;
11071 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
11072 return;
11074 for (i = 6; i < 16; i++)
11076 rtx_insn *insn;
11078 if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
11079 continue;
11081 rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
11083 if (i == STACK_POINTER_REGNUM)
11084 insn = emit_insn (gen_stack_restore_from_fpr (fpr));
11085 else
11086 insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
11088 df_set_regs_ever_live (i, true);
11089 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
11090 if (i == STACK_POINTER_REGNUM)
11091 add_reg_note (insn, REG_CFA_DEF_CFA,
11092 plus_constant (Pmode, stack_pointer_rtx,
11093 STACK_POINTER_OFFSET));
11094 RTX_FRAME_RELATED_P (insn) = 1;
11099 /* A pass run immediately before shrink-wrapping and prologue and epilogue
11100 generation. */
11102 namespace {
11104 const pass_data pass_data_s390_early_mach =
11106 RTL_PASS, /* type */
11107 "early_mach", /* name */
11108 OPTGROUP_NONE, /* optinfo_flags */
11109 TV_MACH_DEP, /* tv_id */
11110 0, /* properties_required */
11111 0, /* properties_provided */
11112 0, /* properties_destroyed */
11113 0, /* todo_flags_start */
11114 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
11117 class pass_s390_early_mach : public rtl_opt_pass
11119 public:
11120 pass_s390_early_mach (gcc::context *ctxt)
11121 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
11124 /* opt_pass methods: */
11125 virtual unsigned int execute (function *);
11127 }; // class pass_s390_early_mach
11129 unsigned int
11130 pass_s390_early_mach::execute (function *fun)
11132 rtx_insn *insn;
11134 /* Try to get rid of the FPR clobbers. */
11135 s390_optimize_nonescaping_tx ();
11137 /* Re-compute register info. */
11138 s390_register_info ();
11140 /* If we're using a base register, ensure that it is always valid for
11141 the first non-prologue instruction. */
11142 if (fun->machine->base_reg)
11143 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
11145 /* Annotate all constant pool references to let the scheduler know
11146 they implicitly use the base register. */
11147 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11148 if (INSN_P (insn))
11150 annotate_constant_pool_refs (&PATTERN (insn));
11151 df_insn_rescan (insn);
11153 return 0;
11156 } // anon namespace
11158 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
11159 - push too big immediates to the literal pool and annotate the refs
11160 - emit frame related notes for stack pointer changes. */
11162 static rtx
11163 s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool frame_related_p)
11165 rtx insn;
11166 rtx orig_offset = offset;
11168 gcc_assert (REG_P (target));
11169 gcc_assert (REG_P (reg));
11170 gcc_assert (CONST_INT_P (offset));
11172 if (offset == const0_rtx) /* lr/lgr */
11174 insn = emit_move_insn (target, reg);
11176 else if (DISP_IN_RANGE (INTVAL (offset))) /* la */
11178 insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg,
11179 offset));
11181 else
11183 if (!satisfies_constraint_K (offset) /* ahi/aghi */
11184 && (!TARGET_EXTIMM
11185 || (!satisfies_constraint_Op (offset) /* alfi/algfi */
11186 && !satisfies_constraint_On (offset)))) /* slfi/slgfi */
11187 offset = force_const_mem (Pmode, offset);
11189 if (target != reg)
11191 insn = emit_move_insn (target, reg);
11192 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
11195 insn = emit_insn (gen_add2_insn (target, offset));
11197 if (!CONST_INT_P (offset))
11199 annotate_constant_pool_refs (&PATTERN (insn));
11201 if (frame_related_p)
11202 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11203 gen_rtx_SET (target,
11204 gen_rtx_PLUS (Pmode, target,
11205 orig_offset)));
11209 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
11211 /* If this is a stack adjustment and we are generating a stack clash
11212 prologue, then add a REG_STACK_CHECK note to signal that this insn
11213 should be left alone. */
11214 if (flag_stack_clash_protection && target == stack_pointer_rtx)
11215 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
11217 return insn;
11220 /* Emit a compare instruction with a volatile memory access as stack
11221 probe. It does not waste store tags and does not clobber any
11222 registers apart from the condition code. */
11223 static void
11224 s390_emit_stack_probe (rtx addr)
11226 rtx tmp = gen_rtx_MEM (Pmode, addr);
11227 MEM_VOLATILE_P (tmp) = 1;
11228 s390_emit_compare (EQ, gen_rtx_REG (Pmode, 0), tmp);
11229 emit_insn (gen_blockage ());
11232 /* Use a runtime loop if we have to emit more probes than this. */
11233 #define MIN_UNROLL_PROBES 3
11235 /* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
11236 if necessary. LAST_PROBE_OFFSET contains the offset of the closest
11237 probe relative to the stack pointer.
11239 Note that SIZE is negative.
11241 The return value is true if TEMP_REG has been clobbered. */
11242 static bool
11243 allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
11244 rtx temp_reg)
11246 bool temp_reg_clobbered_p = false;
11247 HOST_WIDE_INT probe_interval
11248 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
11249 HOST_WIDE_INT guard_size
11250 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
11252 if (flag_stack_clash_protection)
11254 if (last_probe_offset + -INTVAL (size) < guard_size)
11255 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
11256 else
11258 rtx offset = GEN_INT (probe_interval - UNITS_PER_LONG);
11259 HOST_WIDE_INT rounded_size = -INTVAL (size) & -probe_interval;
11260 HOST_WIDE_INT num_probes = rounded_size / probe_interval;
11261 HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
11263 if (num_probes < MIN_UNROLL_PROBES)
11265 /* Emit unrolled probe statements. */
11267 for (unsigned int i = 0; i < num_probes; i++)
11269 s390_prologue_plus_offset (stack_pointer_rtx,
11270 stack_pointer_rtx,
11271 GEN_INT (-probe_interval), true);
11272 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11273 stack_pointer_rtx,
11274 offset));
11276 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
11278 else
11280 /* Emit a loop probing the pages. */
11282 rtx_code_label *loop_start_label = gen_label_rtx ();
11284 /* From now on temp_reg will be the CFA register. */
11285 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11286 GEN_INT (-rounded_size), true);
11287 emit_label (loop_start_label);
11289 s390_prologue_plus_offset (stack_pointer_rtx,
11290 stack_pointer_rtx,
11291 GEN_INT (-probe_interval), false);
11292 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11293 stack_pointer_rtx,
11294 offset));
11295 emit_cmp_and_jump_insns (stack_pointer_rtx, temp_reg,
11296 GT, NULL_RTX,
11297 Pmode, 1, loop_start_label);
11299 /* Without this make_edges ICEes. */
11300 JUMP_LABEL (get_last_insn ()) = loop_start_label;
11301 LABEL_NUSES (loop_start_label) = 1;
11303 /* That's going to be a NOP since stack pointer and
11304 temp_reg are supposed to be the same here. We just
11305 emit it to set the CFA reg back to r15. */
11306 s390_prologue_plus_offset (stack_pointer_rtx, temp_reg,
11307 const0_rtx, true);
11308 temp_reg_clobbered_p = true;
11309 dump_stack_clash_frame_info (PROBE_LOOP, residual != 0);
11312 /* Handle any residual allocation request. */
11313 s390_prologue_plus_offset (stack_pointer_rtx,
11314 stack_pointer_rtx,
11315 GEN_INT (-residual), true);
11316 last_probe_offset += residual;
11317 if (last_probe_offset >= probe_interval)
11318 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11319 stack_pointer_rtx,
11320 GEN_INT (residual
11321 - UNITS_PER_LONG)));
11323 return temp_reg_clobbered_p;
11327 /* Subtract frame size from stack pointer. */
11328 s390_prologue_plus_offset (stack_pointer_rtx,
11329 stack_pointer_rtx,
11330 size, true);
11332 return temp_reg_clobbered_p;
11335 /* Expand the prologue into a bunch of separate insns. */
11337 void
11338 s390_emit_prologue (void)
11340 rtx insn, addr;
11341 rtx temp_reg;
11342 int i;
11343 int offset;
11344 int next_fpr = 0;
11346 /* Choose best register to use for temp use within prologue.
11347 TPF with profiling must avoid the register 14 - the tracing function
11348 needs the original contents of r14 to be preserved. */
11350 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
11351 && !crtl->is_leaf
11352 && !TARGET_TPF_PROFILING)
11353 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11354 else if (flag_split_stack && cfun->stdarg)
11355 temp_reg = gen_rtx_REG (Pmode, 12);
11356 else
11357 temp_reg = gen_rtx_REG (Pmode, 1);
11359 /* When probing for stack-clash mitigation, we have to track the distance
11360 between the stack pointer and closest known reference.
11362 Most of the time we have to make a worst cast assumption. The
11363 only exception is when TARGET_BACKCHAIN is active, in which case
11364 we know *sp (offset 0) was written. */
11365 HOST_WIDE_INT probe_interval
11366 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
11367 HOST_WIDE_INT last_probe_offset
11368 = (TARGET_BACKCHAIN
11369 ? (TARGET_PACKED_STACK ? STACK_POINTER_OFFSET - UNITS_PER_LONG : 0)
11370 : probe_interval - (STACK_BOUNDARY / UNITS_PER_WORD));
11372 s390_save_gprs_to_fprs ();
11374 /* Save call saved gprs. */
11375 if (cfun_frame_layout.first_save_gpr != -1)
11377 insn = save_gprs (stack_pointer_rtx,
11378 cfun_frame_layout.gprs_offset +
11379 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11380 - cfun_frame_layout.first_save_gpr_slot),
11381 cfun_frame_layout.first_save_gpr,
11382 cfun_frame_layout.last_save_gpr);
11384 /* This is not 100% correct. If we have more than one register saved,
11385 then LAST_PROBE_OFFSET can move even closer to sp. */
11386 last_probe_offset
11387 = (cfun_frame_layout.gprs_offset +
11388 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11389 - cfun_frame_layout.first_save_gpr_slot));
11391 emit_insn (insn);
11394 /* Dummy insn to mark literal pool slot. */
11396 if (cfun->machine->base_reg)
11397 emit_insn (gen_main_pool (cfun->machine->base_reg));
11399 offset = cfun_frame_layout.f0_offset;
11401 /* Save f0 and f2. */
11402 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
11404 if (cfun_fpr_save_p (i))
11406 save_fpr (stack_pointer_rtx, offset, i);
11407 if (offset < last_probe_offset)
11408 last_probe_offset = offset;
11409 offset += 8;
11411 else if (!TARGET_PACKED_STACK || cfun->stdarg)
11412 offset += 8;
11415 /* Save f4 and f6. */
11416 offset = cfun_frame_layout.f4_offset;
11417 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11419 if (cfun_fpr_save_p (i))
11421 insn = save_fpr (stack_pointer_rtx, offset, i);
11422 if (offset < last_probe_offset)
11423 last_probe_offset = offset;
11424 offset += 8;
11426 /* If f4 and f6 are call clobbered they are saved due to
11427 stdargs and therefore are not frame related. */
11428 if (!call_really_used_regs[i])
11429 RTX_FRAME_RELATED_P (insn) = 1;
11431 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
11432 offset += 8;
11435 if (TARGET_PACKED_STACK
11436 && cfun_save_high_fprs_p
11437 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
11439 offset = (cfun_frame_layout.f8_offset
11440 + (cfun_frame_layout.high_fprs - 1) * 8);
11442 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
11443 if (cfun_fpr_save_p (i))
11445 insn = save_fpr (stack_pointer_rtx, offset, i);
11446 if (offset < last_probe_offset)
11447 last_probe_offset = offset;
11449 RTX_FRAME_RELATED_P (insn) = 1;
11450 offset -= 8;
11452 if (offset >= cfun_frame_layout.f8_offset)
11453 next_fpr = i;
11456 if (!TARGET_PACKED_STACK)
11457 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
11459 if (flag_stack_usage_info)
11460 current_function_static_stack_size = cfun_frame_layout.frame_size;
11462 /* Decrement stack pointer. */
11464 if (cfun_frame_layout.frame_size > 0)
11466 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
11467 rtx_insn *stack_pointer_backup_loc;
11468 bool temp_reg_clobbered_p;
11470 if (s390_stack_size)
11472 HOST_WIDE_INT stack_guard;
11474 if (s390_stack_guard)
11475 stack_guard = s390_stack_guard;
11476 else
11478 /* If no value for stack guard is provided the smallest power of 2
11479 larger than the current frame size is chosen. */
11480 stack_guard = 1;
11481 while (stack_guard < cfun_frame_layout.frame_size)
11482 stack_guard <<= 1;
11485 if (cfun_frame_layout.frame_size >= s390_stack_size)
11487 warning (0, "frame size of function %qs is %wd"
11488 " bytes exceeding user provided stack limit of "
11489 "%d bytes. "
11490 "An unconditional trap is added.",
11491 current_function_name(), cfun_frame_layout.frame_size,
11492 s390_stack_size);
11493 emit_insn (gen_trap ());
11494 emit_barrier ();
11496 else
11498 /* stack_guard has to be smaller than s390_stack_size.
11499 Otherwise we would emit an AND with zero which would
11500 not match the test under mask pattern. */
11501 if (stack_guard >= s390_stack_size)
11503 warning (0, "frame size of function %qs is %wd"
11504 " bytes which is more than half the stack size. "
11505 "The dynamic check would not be reliable. "
11506 "No check emitted for this function.",
11507 current_function_name(),
11508 cfun_frame_layout.frame_size);
11510 else
11512 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
11513 & ~(stack_guard - 1));
11515 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
11516 GEN_INT (stack_check_mask));
11517 if (TARGET_64BIT)
11518 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
11519 t, const0_rtx),
11520 t, const0_rtx, const0_rtx));
11521 else
11522 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
11523 t, const0_rtx),
11524 t, const0_rtx, const0_rtx));
11529 if (s390_warn_framesize > 0
11530 && cfun_frame_layout.frame_size >= s390_warn_framesize)
11531 warning (0, "frame size of %qs is %wd bytes",
11532 current_function_name (), cfun_frame_layout.frame_size);
11534 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
11535 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11537 /* Save the location where we could backup the incoming stack
11538 pointer. */
11539 stack_pointer_backup_loc = get_last_insn ();
11541 temp_reg_clobbered_p = allocate_stack_space (frame_off, last_probe_offset,
11542 temp_reg);
11544 if (TARGET_BACKCHAIN || next_fpr)
11546 if (temp_reg_clobbered_p)
11548 /* allocate_stack_space had to make use of temp_reg and
11549 we need it to hold a backup of the incoming stack
11550 pointer. Calculate back that value from the current
11551 stack pointer. */
11552 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11553 GEN_INT (cfun_frame_layout.frame_size),
11554 false);
11556 else
11558 /* allocate_stack_space didn't actually required
11559 temp_reg. Insert the stack pointer backup insn
11560 before the stack pointer decrement code - knowing now
11561 that the value will survive. */
11562 emit_insn_after (gen_move_insn (temp_reg, stack_pointer_rtx),
11563 stack_pointer_backup_loc);
11567 /* Set backchain. */
11569 if (TARGET_BACKCHAIN)
11571 if (cfun_frame_layout.backchain_offset)
11572 addr = gen_rtx_MEM (Pmode,
11573 plus_constant (Pmode, stack_pointer_rtx,
11574 cfun_frame_layout.backchain_offset));
11575 else
11576 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11577 set_mem_alias_set (addr, get_frame_alias_set ());
11578 insn = emit_insn (gen_move_insn (addr, temp_reg));
11581 /* If we support non-call exceptions (e.g. for Java),
11582 we need to make sure the backchain pointer is set up
11583 before any possibly trapping memory access. */
11584 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
11586 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
11587 emit_clobber (addr);
11590 else if (flag_stack_clash_protection)
11591 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
11593 /* Save fprs 8 - 15 (64 bit ABI). */
11595 if (cfun_save_high_fprs_p && next_fpr)
11597 /* If the stack might be accessed through a different register
11598 we have to make sure that the stack pointer decrement is not
11599 moved below the use of the stack slots. */
11600 s390_emit_stack_tie ();
11602 insn = emit_insn (gen_add2_insn (temp_reg,
11603 GEN_INT (cfun_frame_layout.f8_offset)));
11605 offset = 0;
11607 for (i = FPR8_REGNUM; i <= next_fpr; i++)
11608 if (cfun_fpr_save_p (i))
11610 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
11611 cfun_frame_layout.frame_size
11612 + cfun_frame_layout.f8_offset
11613 + offset);
11615 insn = save_fpr (temp_reg, offset, i);
11616 offset += 8;
11617 RTX_FRAME_RELATED_P (insn) = 1;
11618 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11619 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
11620 gen_rtx_REG (DFmode, i)));
11624 /* Set frame pointer, if needed. */
11626 if (frame_pointer_needed)
11628 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11629 RTX_FRAME_RELATED_P (insn) = 1;
11632 /* Set up got pointer, if needed. */
11634 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
11636 rtx_insn *insns = s390_load_got ();
11638 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
11639 annotate_constant_pool_refs (&PATTERN (insn));
11641 emit_insn (insns);
11644 if (TARGET_TPF_PROFILING)
11646 /* Generate a BAS instruction to serve as a function
11647 entry intercept to facilitate the use of tracing
11648 algorithms located at the branch target. */
11649 emit_insn (gen_prologue_tpf ());
11651 /* Emit a blockage here so that all code
11652 lies between the profiling mechanisms. */
11653 emit_insn (gen_blockage ());
11657 /* Expand the epilogue into a bunch of separate insns. */
11659 void
11660 s390_emit_epilogue (bool sibcall)
11662 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
11663 int area_bottom, area_top, offset = 0;
11664 int next_offset;
11665 rtvec p;
11666 int i;
11668 if (TARGET_TPF_PROFILING)
11671 /* Generate a BAS instruction to serve as a function
11672 entry intercept to facilitate the use of tracing
11673 algorithms located at the branch target. */
11675 /* Emit a blockage here so that all code
11676 lies between the profiling mechanisms. */
11677 emit_insn (gen_blockage ());
11679 emit_insn (gen_epilogue_tpf ());
11682 /* Check whether to use frame or stack pointer for restore. */
11684 frame_pointer = (frame_pointer_needed
11685 ? hard_frame_pointer_rtx : stack_pointer_rtx);
11687 s390_frame_area (&area_bottom, &area_top);
11689 /* Check whether we can access the register save area.
11690 If not, increment the frame pointer as required. */
11692 if (area_top <= area_bottom)
11694 /* Nothing to restore. */
11696 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11697 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11699 /* Area is in range. */
11700 offset = cfun_frame_layout.frame_size;
11702 else
11704 rtx insn, frame_off, cfa;
11706 offset = area_bottom < 0 ? -area_bottom : 0;
11707 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11709 cfa = gen_rtx_SET (frame_pointer,
11710 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11711 if (DISP_IN_RANGE (INTVAL (frame_off)))
11713 insn = gen_rtx_SET (frame_pointer,
11714 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11715 insn = emit_insn (insn);
11717 else
11719 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11720 frame_off = force_const_mem (Pmode, frame_off);
11722 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11723 annotate_constant_pool_refs (&PATTERN (insn));
11725 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11726 RTX_FRAME_RELATED_P (insn) = 1;
11729 /* Restore call saved fprs. */
11731 if (TARGET_64BIT)
11733 if (cfun_save_high_fprs_p)
11735 next_offset = cfun_frame_layout.f8_offset;
11736 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11738 if (cfun_fpr_save_p (i))
11740 restore_fpr (frame_pointer,
11741 offset + next_offset, i);
11742 cfa_restores
11743 = alloc_reg_note (REG_CFA_RESTORE,
11744 gen_rtx_REG (DFmode, i), cfa_restores);
11745 next_offset += 8;
11751 else
11753 next_offset = cfun_frame_layout.f4_offset;
11754 /* f4, f6 */
11755 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11757 if (cfun_fpr_save_p (i))
11759 restore_fpr (frame_pointer,
11760 offset + next_offset, i);
11761 cfa_restores
11762 = alloc_reg_note (REG_CFA_RESTORE,
11763 gen_rtx_REG (DFmode, i), cfa_restores);
11764 next_offset += 8;
11766 else if (!TARGET_PACKED_STACK)
11767 next_offset += 8;
11772 /* Return register. */
11774 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11776 /* Restore call saved gprs. */
11778 if (cfun_frame_layout.first_restore_gpr != -1)
11780 rtx insn, addr;
11781 int i;
11783 /* Check for global register and save them
11784 to stack location from where they get restored. */
11786 for (i = cfun_frame_layout.first_restore_gpr;
11787 i <= cfun_frame_layout.last_restore_gpr;
11788 i++)
11790 if (global_not_special_regno_p (i))
11792 addr = plus_constant (Pmode, frame_pointer,
11793 offset + cfun_frame_layout.gprs_offset
11794 + (i - cfun_frame_layout.first_save_gpr_slot)
11795 * UNITS_PER_LONG);
11796 addr = gen_rtx_MEM (Pmode, addr);
11797 set_mem_alias_set (addr, get_frame_alias_set ());
11798 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11800 else
11801 cfa_restores
11802 = alloc_reg_note (REG_CFA_RESTORE,
11803 gen_rtx_REG (Pmode, i), cfa_restores);
11806 /* Fetch return address from stack before load multiple,
11807 this will do good for scheduling.
11809 Only do this if we already decided that r14 needs to be
11810 saved to a stack slot. (And not just because r14 happens to
11811 be in between two GPRs which need saving.) Otherwise it
11812 would be difficult to take that decision back in
11813 s390_optimize_prologue.
11815 This optimization is only helpful on in-order machines. */
11816 if (! sibcall
11817 && cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK
11818 && s390_tune <= PROCESSOR_2097_Z10)
11820 int return_regnum = find_unused_clobbered_reg();
11821 if (!return_regnum)
11822 return_regnum = 4;
11823 return_reg = gen_rtx_REG (Pmode, return_regnum);
11825 addr = plus_constant (Pmode, frame_pointer,
11826 offset + cfun_frame_layout.gprs_offset
11827 + (RETURN_REGNUM
11828 - cfun_frame_layout.first_save_gpr_slot)
11829 * UNITS_PER_LONG);
11830 addr = gen_rtx_MEM (Pmode, addr);
11831 set_mem_alias_set (addr, get_frame_alias_set ());
11832 emit_move_insn (return_reg, addr);
11834 /* Once we did that optimization we have to make sure
11835 s390_optimize_prologue does not try to remove the store
11836 of r14 since we will not be able to find the load issued
11837 here. */
11838 cfun_frame_layout.save_return_addr_p = true;
11841 insn = restore_gprs (frame_pointer,
11842 offset + cfun_frame_layout.gprs_offset
11843 + (cfun_frame_layout.first_restore_gpr
11844 - cfun_frame_layout.first_save_gpr_slot)
11845 * UNITS_PER_LONG,
11846 cfun_frame_layout.first_restore_gpr,
11847 cfun_frame_layout.last_restore_gpr);
11848 insn = emit_insn (insn);
11849 REG_NOTES (insn) = cfa_restores;
11850 add_reg_note (insn, REG_CFA_DEF_CFA,
11851 plus_constant (Pmode, stack_pointer_rtx,
11852 STACK_POINTER_OFFSET));
11853 RTX_FRAME_RELATED_P (insn) = 1;
11856 s390_restore_gprs_from_fprs ();
11858 if (! sibcall)
11861 /* Return to caller. */
11863 p = rtvec_alloc (2);
11865 RTVEC_ELT (p, 0) = ret_rtx;
11866 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
11867 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
11871 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11873 static void
11874 s300_set_up_by_prologue (hard_reg_set_container *regs)
11876 if (cfun->machine->base_reg
11877 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11878 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11881 /* -fsplit-stack support. */
11883 /* A SYMBOL_REF for __morestack. */
11884 static GTY(()) rtx morestack_ref;
11886 /* When using -fsplit-stack, the allocation routines set a field in
11887 the TCB to the bottom of the stack plus this much space, measured
11888 in bytes. */
11890 #define SPLIT_STACK_AVAILABLE 1024
11892 /* Emit -fsplit-stack prologue, which goes before the regular function
11893 prologue. */
11895 void
11896 s390_expand_split_stack_prologue (void)
11898 rtx r1, guard, cc = NULL;
11899 rtx_insn *insn;
11900 /* Offset from thread pointer to __private_ss. */
11901 int psso = TARGET_64BIT ? 0x38 : 0x20;
11902 /* Pointer size in bytes. */
11903 /* Frame size and argument size - the two parameters to __morestack. */
11904 HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11905 /* Align argument size to 8 bytes - simplifies __morestack code. */
11906 HOST_WIDE_INT args_size = crtl->args.size >= 0
11907 ? ((crtl->args.size + 7) & ~7)
11908 : 0;
11909 /* Label to be called by __morestack. */
11910 rtx_code_label *call_done = NULL;
11911 rtx_code_label *parm_base = NULL;
11912 rtx tmp;
11914 gcc_assert (flag_split_stack && reload_completed);
11915 if (!TARGET_CPU_ZARCH)
11917 sorry ("CPUs older than z900 are not supported for -fsplit-stack");
11918 return;
11921 r1 = gen_rtx_REG (Pmode, 1);
11923 /* If no stack frame will be allocated, don't do anything. */
11924 if (!frame_size)
11926 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11928 /* If va_start is used, just use r15. */
11929 emit_move_insn (r1,
11930 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11931 GEN_INT (STACK_POINTER_OFFSET)));
11934 return;
11937 if (morestack_ref == NULL_RTX)
11939 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11940 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11941 | SYMBOL_FLAG_FUNCTION);
11944 if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11946 /* If frame_size will fit in an add instruction, do a stack space
11947 check, and only call __morestack if there's not enough space. */
11949 /* Get thread pointer. r1 is the only register we can always destroy - r0
11950 could contain a static chain (and cannot be used to address memory
11951 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
11952 emit_move_insn (r1, gen_rtx_REG (Pmode, TP_REGNUM));
11953 /* Aim at __private_ss. */
11954 guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
11956 /* If less that 1kiB used, skip addition and compare directly with
11957 __private_ss. */
11958 if (frame_size > SPLIT_STACK_AVAILABLE)
11960 emit_move_insn (r1, guard);
11961 if (TARGET_64BIT)
11962 emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
11963 else
11964 emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
11965 guard = r1;
11968 /* Compare the (maybe adjusted) guard with the stack pointer. */
11969 cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
11972 call_done = gen_label_rtx ();
11973 parm_base = gen_label_rtx ();
11975 /* Emit the parameter block. */
11976 tmp = gen_split_stack_data (parm_base, call_done,
11977 GEN_INT (frame_size),
11978 GEN_INT (args_size));
11979 insn = emit_insn (tmp);
11980 add_reg_note (insn, REG_LABEL_OPERAND, call_done);
11981 LABEL_NUSES (call_done)++;
11982 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11983 LABEL_NUSES (parm_base)++;
11985 /* %r1 = litbase. */
11986 insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
11987 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11988 LABEL_NUSES (parm_base)++;
11990 /* Now, we need to call __morestack. It has very special calling
11991 conventions: it preserves param/return/static chain registers for
11992 calling main function body, and looks for its own parameters at %r1. */
11994 if (cc != NULL)
11996 tmp = gen_split_stack_cond_call (morestack_ref, cc, call_done);
11998 insn = emit_jump_insn (tmp);
11999 JUMP_LABEL (insn) = call_done;
12000 LABEL_NUSES (call_done)++;
12002 /* Mark the jump as very unlikely to be taken. */
12003 add_reg_br_prob_note (insn,
12004 profile_probability::very_unlikely ());
12006 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12008 /* If va_start is used, and __morestack was not called, just use
12009 r15. */
12010 emit_move_insn (r1,
12011 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12012 GEN_INT (STACK_POINTER_OFFSET)));
12015 else
12017 tmp = gen_split_stack_call (morestack_ref, call_done);
12018 insn = emit_jump_insn (tmp);
12019 JUMP_LABEL (insn) = call_done;
12020 LABEL_NUSES (call_done)++;
12021 emit_barrier ();
12024 /* __morestack will call us here. */
12026 emit_label (call_done);
12029 /* We may have to tell the dataflow pass that the split stack prologue
12030 is initializing a register. */
12032 static void
12033 s390_live_on_entry (bitmap regs)
12035 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12037 gcc_assert (flag_split_stack);
12038 bitmap_set_bit (regs, 1);
12042 /* Return true if the function can use simple_return to return outside
12043 of a shrink-wrapped region. At present shrink-wrapping is supported
12044 in all cases. */
12046 bool
12047 s390_can_use_simple_return_insn (void)
12049 return true;
12052 /* Return true if the epilogue is guaranteed to contain only a return
12053 instruction and if a direct return can therefore be used instead.
12054 One of the main advantages of using direct return instructions
12055 is that we can then use conditional returns. */
12057 bool
12058 s390_can_use_return_insn (void)
12060 int i;
12062 if (!reload_completed)
12063 return false;
12065 if (crtl->profile)
12066 return false;
12068 if (TARGET_TPF_PROFILING)
12069 return false;
12071 for (i = 0; i < 16; i++)
12072 if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
12073 return false;
12075 /* For 31 bit this is not covered by the frame_size check below
12076 since f4, f6 are saved in the register save area without needing
12077 additional stack space. */
12078 if (!TARGET_64BIT
12079 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
12080 return false;
12082 if (cfun->machine->base_reg
12083 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
12084 return false;
12086 return cfun_frame_layout.frame_size == 0;
12089 /* The VX ABI differs for vararg functions. Therefore we need the
12090 prototype of the callee to be available when passing vector type
12091 values. */
12092 static const char *
12093 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
12095 return ((TARGET_VX_ABI
12096 && typelist == 0
12097 && VECTOR_TYPE_P (TREE_TYPE (val))
12098 && (funcdecl == NULL_TREE
12099 || (TREE_CODE (funcdecl) == FUNCTION_DECL
12100 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
12101 ? N_("vector argument passed to unprototyped function")
12102 : NULL);
12106 /* Return the size in bytes of a function argument of
12107 type TYPE and/or mode MODE. At least one of TYPE or
12108 MODE must be specified. */
12110 static int
12111 s390_function_arg_size (machine_mode mode, const_tree type)
12113 if (type)
12114 return int_size_in_bytes (type);
12116 /* No type info available for some library calls ... */
12117 if (mode != BLKmode)
12118 return GET_MODE_SIZE (mode);
12120 /* If we have neither type nor mode, abort */
12121 gcc_unreachable ();
12124 /* Return true if a function argument of type TYPE and mode MODE
12125 is to be passed in a vector register, if available. */
12127 bool
12128 s390_function_arg_vector (machine_mode mode, const_tree type)
12130 if (!TARGET_VX_ABI)
12131 return false;
12133 if (s390_function_arg_size (mode, type) > 16)
12134 return false;
12136 /* No type info available for some library calls ... */
12137 if (!type)
12138 return VECTOR_MODE_P (mode);
12140 /* The ABI says that record types with a single member are treated
12141 just like that member would be. */
12142 while (TREE_CODE (type) == RECORD_TYPE)
12144 tree field, single = NULL_TREE;
12146 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
12148 if (TREE_CODE (field) != FIELD_DECL)
12149 continue;
12151 if (single == NULL_TREE)
12152 single = TREE_TYPE (field);
12153 else
12154 return false;
12157 if (single == NULL_TREE)
12158 return false;
12159 else
12161 /* If the field declaration adds extra byte due to
12162 e.g. padding this is not accepted as vector type. */
12163 if (int_size_in_bytes (single) <= 0
12164 || int_size_in_bytes (single) != int_size_in_bytes (type))
12165 return false;
12166 type = single;
12170 return VECTOR_TYPE_P (type);
12173 /* Return true if a function argument of type TYPE and mode MODE
12174 is to be passed in a floating-point register, if available. */
12176 static bool
12177 s390_function_arg_float (machine_mode mode, const_tree type)
12179 if (s390_function_arg_size (mode, type) > 8)
12180 return false;
12182 /* Soft-float changes the ABI: no floating-point registers are used. */
12183 if (TARGET_SOFT_FLOAT)
12184 return false;
12186 /* No type info available for some library calls ... */
12187 if (!type)
12188 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
12190 /* The ABI says that record types with a single member are treated
12191 just like that member would be. */
12192 while (TREE_CODE (type) == RECORD_TYPE)
12194 tree field, single = NULL_TREE;
12196 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
12198 if (TREE_CODE (field) != FIELD_DECL)
12199 continue;
12201 if (single == NULL_TREE)
12202 single = TREE_TYPE (field);
12203 else
12204 return false;
12207 if (single == NULL_TREE)
12208 return false;
12209 else
12210 type = single;
12213 return TREE_CODE (type) == REAL_TYPE;
12216 /* Return true if a function argument of type TYPE and mode MODE
12217 is to be passed in an integer register, or a pair of integer
12218 registers, if available. */
12220 static bool
12221 s390_function_arg_integer (machine_mode mode, const_tree type)
12223 int size = s390_function_arg_size (mode, type);
12224 if (size > 8)
12225 return false;
12227 /* No type info available for some library calls ... */
12228 if (!type)
12229 return GET_MODE_CLASS (mode) == MODE_INT
12230 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
12232 /* We accept small integral (and similar) types. */
12233 if (INTEGRAL_TYPE_P (type)
12234 || POINTER_TYPE_P (type)
12235 || TREE_CODE (type) == NULLPTR_TYPE
12236 || TREE_CODE (type) == OFFSET_TYPE
12237 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
12238 return true;
12240 /* We also accept structs of size 1, 2, 4, 8 that are not
12241 passed in floating-point registers. */
12242 if (AGGREGATE_TYPE_P (type)
12243 && exact_log2 (size) >= 0
12244 && !s390_function_arg_float (mode, type))
12245 return true;
12247 return false;
12250 /* Return 1 if a function argument of type TYPE and mode MODE
12251 is to be passed by reference. The ABI specifies that only
12252 structures of size 1, 2, 4, or 8 bytes are passed by value,
12253 all other structures (and complex numbers) are passed by
12254 reference. */
12256 static bool
12257 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
12258 machine_mode mode, const_tree type,
12259 bool named ATTRIBUTE_UNUSED)
12261 int size = s390_function_arg_size (mode, type);
12263 if (s390_function_arg_vector (mode, type))
12264 return false;
12266 if (size > 8)
12267 return true;
12269 if (type)
12271 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
12272 return true;
12274 if (TREE_CODE (type) == COMPLEX_TYPE
12275 || TREE_CODE (type) == VECTOR_TYPE)
12276 return true;
12279 return false;
12282 /* Update the data in CUM to advance over an argument of mode MODE and
12283 data type TYPE. (TYPE is null for libcalls where that information
12284 may not be available.). The boolean NAMED specifies whether the
12285 argument is a named argument (as opposed to an unnamed argument
12286 matching an ellipsis). */
12288 static void
12289 s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
12290 const_tree type, bool named)
12292 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12294 if (s390_function_arg_vector (mode, type))
12296 /* We are called for unnamed vector stdarg arguments which are
12297 passed on the stack. In this case this hook does not have to
12298 do anything since stack arguments are tracked by common
12299 code. */
12300 if (!named)
12301 return;
12302 cum->vrs += 1;
12304 else if (s390_function_arg_float (mode, type))
12306 cum->fprs += 1;
12308 else if (s390_function_arg_integer (mode, type))
12310 int size = s390_function_arg_size (mode, type);
12311 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
12313 else
12314 gcc_unreachable ();
12317 /* Define where to put the arguments to a function.
12318 Value is zero to push the argument on the stack,
12319 or a hard register in which to store the argument.
12321 MODE is the argument's machine mode.
12322 TYPE is the data type of the argument (as a tree).
12323 This is null for libcalls where that information may
12324 not be available.
12325 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12326 the preceding args and about the function being called.
12327 NAMED is nonzero if this argument is a named parameter
12328 (otherwise it is an extra parameter matching an ellipsis).
12330 On S/390, we use general purpose registers 2 through 6 to
12331 pass integer, pointer, and certain structure arguments, and
12332 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
12333 to pass floating point arguments. All remaining arguments
12334 are pushed to the stack. */
12336 static rtx
12337 s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
12338 const_tree type, bool named)
12340 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12342 if (!named)
12343 s390_check_type_for_vector_abi (type, true, false);
12345 if (s390_function_arg_vector (mode, type))
12347 /* Vector arguments being part of the ellipsis are passed on the
12348 stack. */
12349 if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
12350 return NULL_RTX;
12352 return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
12354 else if (s390_function_arg_float (mode, type))
12356 if (cum->fprs + 1 > FP_ARG_NUM_REG)
12357 return NULL_RTX;
12358 else
12359 return gen_rtx_REG (mode, cum->fprs + 16);
12361 else if (s390_function_arg_integer (mode, type))
12363 int size = s390_function_arg_size (mode, type);
12364 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12366 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
12367 return NULL_RTX;
12368 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
12369 return gen_rtx_REG (mode, cum->gprs + 2);
12370 else if (n_gprs == 2)
12372 rtvec p = rtvec_alloc (2);
12374 RTVEC_ELT (p, 0)
12375 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
12376 const0_rtx);
12377 RTVEC_ELT (p, 1)
12378 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
12379 GEN_INT (4));
12381 return gen_rtx_PARALLEL (mode, p);
12385 /* After the real arguments, expand_call calls us once again
12386 with a void_type_node type. Whatever we return here is
12387 passed as operand 2 to the call expanders.
12389 We don't need this feature ... */
12390 else if (type == void_type_node)
12391 return const0_rtx;
12393 gcc_unreachable ();
12396 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Vector arguments are
12397 left-justified when placed on the stack during parameter passing. */
12399 static pad_direction
12400 s390_function_arg_padding (machine_mode mode, const_tree type)
12402 if (s390_function_arg_vector (mode, type))
12403 return PAD_UPWARD;
12405 return default_function_arg_padding (mode, type);
12408 /* Return true if return values of type TYPE should be returned
12409 in a memory buffer whose address is passed by the caller as
12410 hidden first argument. */
12412 static bool
12413 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
12415 /* We accept small integral (and similar) types. */
12416 if (INTEGRAL_TYPE_P (type)
12417 || POINTER_TYPE_P (type)
12418 || TREE_CODE (type) == OFFSET_TYPE
12419 || TREE_CODE (type) == REAL_TYPE)
12420 return int_size_in_bytes (type) > 8;
12422 /* vector types which fit into a VR. */
12423 if (TARGET_VX_ABI
12424 && VECTOR_TYPE_P (type)
12425 && int_size_in_bytes (type) <= 16)
12426 return false;
12428 /* Aggregates and similar constructs are always returned
12429 in memory. */
12430 if (AGGREGATE_TYPE_P (type)
12431 || TREE_CODE (type) == COMPLEX_TYPE
12432 || VECTOR_TYPE_P (type))
12433 return true;
12435 /* ??? We get called on all sorts of random stuff from
12436 aggregate_value_p. We can't abort, but it's not clear
12437 what's safe to return. Pretend it's a struct I guess. */
12438 return true;
12441 /* Function arguments and return values are promoted to word size. */
12443 static machine_mode
12444 s390_promote_function_mode (const_tree type, machine_mode mode,
12445 int *punsignedp,
12446 const_tree fntype ATTRIBUTE_UNUSED,
12447 int for_return ATTRIBUTE_UNUSED)
12449 if (INTEGRAL_MODE_P (mode)
12450 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
12452 if (type != NULL_TREE && POINTER_TYPE_P (type))
12453 *punsignedp = POINTERS_EXTEND_UNSIGNED;
12454 return Pmode;
12457 return mode;
12460 /* Define where to return a (scalar) value of type RET_TYPE.
12461 If RET_TYPE is null, define where to return a (scalar)
12462 value of mode MODE from a libcall. */
12464 static rtx
12465 s390_function_and_libcall_value (machine_mode mode,
12466 const_tree ret_type,
12467 const_tree fntype_or_decl,
12468 bool outgoing ATTRIBUTE_UNUSED)
12470 /* For vector return types it is important to use the RET_TYPE
12471 argument whenever available since the middle-end might have
12472 changed the mode to a scalar mode. */
12473 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
12474 || (!ret_type && VECTOR_MODE_P (mode)));
12476 /* For normal functions perform the promotion as
12477 promote_function_mode would do. */
12478 if (ret_type)
12480 int unsignedp = TYPE_UNSIGNED (ret_type);
12481 mode = promote_function_mode (ret_type, mode, &unsignedp,
12482 fntype_or_decl, 1);
12485 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
12486 || SCALAR_FLOAT_MODE_P (mode)
12487 || (TARGET_VX_ABI && vector_ret_type_p));
12488 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
12490 if (TARGET_VX_ABI && vector_ret_type_p)
12491 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
12492 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
12493 return gen_rtx_REG (mode, 16);
12494 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
12495 || UNITS_PER_LONG == UNITS_PER_WORD)
12496 return gen_rtx_REG (mode, 2);
12497 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
12499 /* This case is triggered when returning a 64 bit value with
12500 -m31 -mzarch. Although the value would fit into a single
12501 register it has to be forced into a 32 bit register pair in
12502 order to match the ABI. */
12503 rtvec p = rtvec_alloc (2);
12505 RTVEC_ELT (p, 0)
12506 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
12507 RTVEC_ELT (p, 1)
12508 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
12510 return gen_rtx_PARALLEL (mode, p);
12513 gcc_unreachable ();
12516 /* Define where to return a scalar return value of type RET_TYPE. */
12518 static rtx
12519 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
12520 bool outgoing)
12522 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
12523 fn_decl_or_type, outgoing);
12526 /* Define where to return a scalar libcall return value of mode
12527 MODE. */
12529 static rtx
12530 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
12532 return s390_function_and_libcall_value (mode, NULL_TREE,
12533 NULL_TREE, true);
12537 /* Create and return the va_list datatype.
12539 On S/390, va_list is an array type equivalent to
12541 typedef struct __va_list_tag
12543 long __gpr;
12544 long __fpr;
12545 void *__overflow_arg_area;
12546 void *__reg_save_area;
12547 } va_list[1];
12549 where __gpr and __fpr hold the number of general purpose
12550 or floating point arguments used up to now, respectively,
12551 __overflow_arg_area points to the stack location of the
12552 next argument passed on the stack, and __reg_save_area
12553 always points to the start of the register area in the
12554 call frame of the current function. The function prologue
12555 saves all registers used for argument passing into this
12556 area if the function uses variable arguments. */
12558 static tree
12559 s390_build_builtin_va_list (void)
12561 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
12563 record = lang_hooks.types.make_type (RECORD_TYPE);
12565 type_decl =
12566 build_decl (BUILTINS_LOCATION,
12567 TYPE_DECL, get_identifier ("__va_list_tag"), record);
12569 f_gpr = build_decl (BUILTINS_LOCATION,
12570 FIELD_DECL, get_identifier ("__gpr"),
12571 long_integer_type_node);
12572 f_fpr = build_decl (BUILTINS_LOCATION,
12573 FIELD_DECL, get_identifier ("__fpr"),
12574 long_integer_type_node);
12575 f_ovf = build_decl (BUILTINS_LOCATION,
12576 FIELD_DECL, get_identifier ("__overflow_arg_area"),
12577 ptr_type_node);
12578 f_sav = build_decl (BUILTINS_LOCATION,
12579 FIELD_DECL, get_identifier ("__reg_save_area"),
12580 ptr_type_node);
12582 va_list_gpr_counter_field = f_gpr;
12583 va_list_fpr_counter_field = f_fpr;
12585 DECL_FIELD_CONTEXT (f_gpr) = record;
12586 DECL_FIELD_CONTEXT (f_fpr) = record;
12587 DECL_FIELD_CONTEXT (f_ovf) = record;
12588 DECL_FIELD_CONTEXT (f_sav) = record;
12590 TYPE_STUB_DECL (record) = type_decl;
12591 TYPE_NAME (record) = type_decl;
12592 TYPE_FIELDS (record) = f_gpr;
12593 DECL_CHAIN (f_gpr) = f_fpr;
12594 DECL_CHAIN (f_fpr) = f_ovf;
12595 DECL_CHAIN (f_ovf) = f_sav;
12597 layout_type (record);
12599 /* The correct type is an array type of one element. */
12600 return build_array_type (record, build_index_type (size_zero_node));
12603 /* Implement va_start by filling the va_list structure VALIST.
12604 STDARG_P is always true, and ignored.
12605 NEXTARG points to the first anonymous stack argument.
12607 The following global variables are used to initialize
12608 the va_list structure:
12610 crtl->args.info:
12611 holds number of gprs and fprs used for named arguments.
12612 crtl->args.arg_offset_rtx:
12613 holds the offset of the first anonymous stack argument
12614 (relative to the virtual arg pointer). */
12616 static void
12617 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12619 HOST_WIDE_INT n_gpr, n_fpr;
12620 int off;
12621 tree f_gpr, f_fpr, f_ovf, f_sav;
12622 tree gpr, fpr, ovf, sav, t;
12624 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12625 f_fpr = DECL_CHAIN (f_gpr);
12626 f_ovf = DECL_CHAIN (f_fpr);
12627 f_sav = DECL_CHAIN (f_ovf);
12629 valist = build_simple_mem_ref (valist);
12630 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12631 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12632 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12633 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12635 /* Count number of gp and fp argument registers used. */
12637 n_gpr = crtl->args.info.gprs;
12638 n_fpr = crtl->args.info.fprs;
12640 if (cfun->va_list_gpr_size)
12642 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12643 build_int_cst (NULL_TREE, n_gpr));
12644 TREE_SIDE_EFFECTS (t) = 1;
12645 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12648 if (cfun->va_list_fpr_size)
12650 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12651 build_int_cst (NULL_TREE, n_fpr));
12652 TREE_SIDE_EFFECTS (t) = 1;
12653 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12656 if (flag_split_stack
12657 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
12658 == NULL)
12659 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12661 rtx reg;
12662 rtx_insn *seq;
12664 reg = gen_reg_rtx (Pmode);
12665 cfun->machine->split_stack_varargs_pointer = reg;
12667 start_sequence ();
12668 emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
12669 seq = get_insns ();
12670 end_sequence ();
12672 push_topmost_sequence ();
12673 emit_insn_after (seq, entry_of_function ());
12674 pop_topmost_sequence ();
12677 /* Find the overflow area.
12678 FIXME: This currently is too pessimistic when the vector ABI is
12679 enabled. In that case we *always* set up the overflow area
12680 pointer. */
12681 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
12682 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
12683 || TARGET_VX_ABI)
12685 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12686 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
12687 else
12688 t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
12690 off = INTVAL (crtl->args.arg_offset_rtx);
12691 off = off < 0 ? 0 : off;
12692 if (TARGET_DEBUG_ARG)
12693 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12694 (int)n_gpr, (int)n_fpr, off);
12696 t = fold_build_pointer_plus_hwi (t, off);
12698 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12699 TREE_SIDE_EFFECTS (t) = 1;
12700 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12703 /* Find the register save area. */
12704 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12705 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12707 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12708 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12710 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12711 TREE_SIDE_EFFECTS (t) = 1;
12712 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12716 /* Implement va_arg by updating the va_list structure
12717 VALIST as required to retrieve an argument of type
12718 TYPE, and returning that argument.
12720 Generates code equivalent to:
12722 if (integral value) {
12723 if (size <= 4 && args.gpr < 5 ||
12724 size > 4 && args.gpr < 4 )
12725 ret = args.reg_save_area[args.gpr+8]
12726 else
12727 ret = *args.overflow_arg_area++;
12728 } else if (vector value) {
12729 ret = *args.overflow_arg_area;
12730 args.overflow_arg_area += size / 8;
12731 } else if (float value) {
12732 if (args.fgpr < 2)
12733 ret = args.reg_save_area[args.fpr+64]
12734 else
12735 ret = *args.overflow_arg_area++;
12736 } else if (aggregate value) {
12737 if (args.gpr < 5)
12738 ret = *args.reg_save_area[args.gpr]
12739 else
12740 ret = **args.overflow_arg_area++;
12741 } */
12743 static tree
12744 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12745 gimple_seq *post_p ATTRIBUTE_UNUSED)
12747 tree f_gpr, f_fpr, f_ovf, f_sav;
12748 tree gpr, fpr, ovf, sav, reg, t, u;
12749 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12750 tree lab_false, lab_over = NULL_TREE;
12751 tree addr = create_tmp_var (ptr_type_node, "addr");
12752 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12753 a stack slot. */
12755 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12756 f_fpr = DECL_CHAIN (f_gpr);
12757 f_ovf = DECL_CHAIN (f_fpr);
12758 f_sav = DECL_CHAIN (f_ovf);
12760 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12761 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12762 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12764 /* The tree for args* cannot be shared between gpr/fpr and ovf since
12765 both appear on a lhs. */
12766 valist = unshare_expr (valist);
12767 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12769 size = int_size_in_bytes (type);
12771 s390_check_type_for_vector_abi (type, true, false);
12773 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12775 if (TARGET_DEBUG_ARG)
12777 fprintf (stderr, "va_arg: aggregate type");
12778 debug_tree (type);
12781 /* Aggregates are passed by reference. */
12782 indirect_p = 1;
12783 reg = gpr;
12784 n_reg = 1;
12786 /* kernel stack layout on 31 bit: It is assumed here that no padding
12787 will be added by s390_frame_info because for va_args always an even
12788 number of gprs has to be saved r15-r2 = 14 regs. */
12789 sav_ofs = 2 * UNITS_PER_LONG;
12790 sav_scale = UNITS_PER_LONG;
12791 size = UNITS_PER_LONG;
12792 max_reg = GP_ARG_NUM_REG - n_reg;
12793 left_align_p = false;
12795 else if (s390_function_arg_vector (TYPE_MODE (type), type))
12797 if (TARGET_DEBUG_ARG)
12799 fprintf (stderr, "va_arg: vector type");
12800 debug_tree (type);
12803 indirect_p = 0;
12804 reg = NULL_TREE;
12805 n_reg = 0;
12806 sav_ofs = 0;
12807 sav_scale = 8;
12808 max_reg = 0;
12809 left_align_p = true;
12811 else if (s390_function_arg_float (TYPE_MODE (type), type))
12813 if (TARGET_DEBUG_ARG)
12815 fprintf (stderr, "va_arg: float type");
12816 debug_tree (type);
12819 /* FP args go in FP registers, if present. */
12820 indirect_p = 0;
12821 reg = fpr;
12822 n_reg = 1;
12823 sav_ofs = 16 * UNITS_PER_LONG;
12824 sav_scale = 8;
12825 max_reg = FP_ARG_NUM_REG - n_reg;
12826 left_align_p = false;
12828 else
12830 if (TARGET_DEBUG_ARG)
12832 fprintf (stderr, "va_arg: other type");
12833 debug_tree (type);
12836 /* Otherwise into GP registers. */
12837 indirect_p = 0;
12838 reg = gpr;
12839 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12841 /* kernel stack layout on 31 bit: It is assumed here that no padding
12842 will be added by s390_frame_info because for va_args always an even
12843 number of gprs has to be saved r15-r2 = 14 regs. */
12844 sav_ofs = 2 * UNITS_PER_LONG;
12846 if (size < UNITS_PER_LONG)
12847 sav_ofs += UNITS_PER_LONG - size;
12849 sav_scale = UNITS_PER_LONG;
12850 max_reg = GP_ARG_NUM_REG - n_reg;
12851 left_align_p = false;
12854 /* Pull the value out of the saved registers ... */
12856 if (reg != NULL_TREE)
12859 if (reg > ((typeof (reg))max_reg))
12860 goto lab_false;
12862 addr = sav + sav_ofs + reg * save_scale;
12864 goto lab_over;
12866 lab_false:
12869 lab_false = create_artificial_label (UNKNOWN_LOCATION);
12870 lab_over = create_artificial_label (UNKNOWN_LOCATION);
12872 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12873 t = build2 (GT_EXPR, boolean_type_node, reg, t);
12874 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12875 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12876 gimplify_and_add (t, pre_p);
12878 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12879 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12880 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12881 t = fold_build_pointer_plus (t, u);
12883 gimplify_assign (addr, t, pre_p);
12885 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12887 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12890 /* ... Otherwise out of the overflow area. */
12892 t = ovf;
12893 if (size < UNITS_PER_LONG && !left_align_p)
12894 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12896 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12898 gimplify_assign (addr, t, pre_p);
12900 if (size < UNITS_PER_LONG && left_align_p)
12901 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12902 else
12903 t = fold_build_pointer_plus_hwi (t, size);
12905 gimplify_assign (ovf, t, pre_p);
12907 if (reg != NULL_TREE)
12908 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12911 /* Increment register save count. */
12913 if (n_reg > 0)
12915 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12916 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12917 gimplify_and_add (u, pre_p);
12920 if (indirect_p)
12922 t = build_pointer_type_for_mode (build_pointer_type (type),
12923 ptr_mode, true);
12924 addr = fold_convert (t, addr);
12925 addr = build_va_arg_indirect_ref (addr);
12927 else
12929 t = build_pointer_type_for_mode (type, ptr_mode, true);
12930 addr = fold_convert (t, addr);
12933 return build_va_arg_indirect_ref (addr);
12936 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12937 expanders.
12938 DEST - Register location where CC will be stored.
12939 TDB - Pointer to a 256 byte area where to store the transaction.
12940 diagnostic block. NULL if TDB is not needed.
12941 RETRY - Retry count value. If non-NULL a retry loop for CC2
12942 is emitted
12943 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12944 of the tbegin instruction pattern. */
12946 void
12947 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
12949 rtx retry_plus_two = gen_reg_rtx (SImode);
12950 rtx retry_reg = gen_reg_rtx (SImode);
12951 rtx_code_label *retry_label = NULL;
12953 if (retry != NULL_RTX)
12955 emit_move_insn (retry_reg, retry);
12956 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
12957 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
12958 retry_label = gen_label_rtx ();
12959 emit_label (retry_label);
12962 if (clobber_fprs_p)
12964 if (TARGET_VX)
12965 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12966 tdb));
12967 else
12968 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12969 tdb));
12971 else
12972 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12973 tdb));
12975 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
12976 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
12977 CC_REGNUM)),
12978 UNSPEC_CC_TO_INT));
12979 if (retry != NULL_RTX)
12981 const int CC0 = 1 << 3;
12982 const int CC1 = 1 << 2;
12983 const int CC3 = 1 << 0;
12984 rtx jump;
12985 rtx count = gen_reg_rtx (SImode);
12986 rtx_code_label *leave_label = gen_label_rtx ();
12988 /* Exit for success and permanent failures. */
12989 jump = s390_emit_jump (leave_label,
12990 gen_rtx_EQ (VOIDmode,
12991 gen_rtx_REG (CCRAWmode, CC_REGNUM),
12992 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
12993 LABEL_NUSES (leave_label) = 1;
12995 /* CC2 - transient failure. Perform retry with ppa. */
12996 emit_move_insn (count, retry_plus_two);
12997 emit_insn (gen_subsi3 (count, count, retry_reg));
12998 emit_insn (gen_tx_assist (count));
12999 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
13000 retry_reg,
13001 retry_reg));
13002 JUMP_LABEL (jump) = retry_label;
13003 LABEL_NUSES (retry_label) = 1;
13004 emit_label (leave_label);
13009 /* Return the decl for the target specific builtin with the function
13010 code FCODE. */
13012 static tree
13013 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
13015 if (fcode >= S390_BUILTIN_MAX)
13016 return error_mark_node;
13018 return s390_builtin_decls[fcode];
13021 /* We call mcount before the function prologue. So a profiled leaf
13022 function should stay a leaf function. */
13024 static bool
13025 s390_keep_leaf_when_profiled ()
13027 return true;
13030 /* Output assembly code for the trampoline template to
13031 stdio stream FILE.
13033 On S/390, we use gpr 1 internally in the trampoline code;
13034 gpr 0 is used to hold the static chain. */
13036 static void
13037 s390_asm_trampoline_template (FILE *file)
13039 rtx op[2];
13040 op[0] = gen_rtx_REG (Pmode, 0);
13041 op[1] = gen_rtx_REG (Pmode, 1);
13043 if (TARGET_64BIT)
13045 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
13046 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
13047 output_asm_insn ("br\t%1", op); /* 2 byte */
13048 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
13050 else
13052 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
13053 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
13054 output_asm_insn ("br\t%1", op); /* 2 byte */
13055 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
13059 /* Emit RTL insns to initialize the variable parts of a trampoline.
13060 FNADDR is an RTX for the address of the function's pure code.
13061 CXT is an RTX for the static chain value for the function. */
13063 static void
13064 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
13066 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
13067 rtx mem;
13069 emit_block_move (m_tramp, assemble_trampoline_template (),
13070 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
13072 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
13073 emit_move_insn (mem, cxt);
13074 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
13075 emit_move_insn (mem, fnaddr);
13078 /* Output assembler code to FILE to increment profiler label # LABELNO
13079 for profiling a function entry. */
13081 void
13082 s390_function_profiler (FILE *file, int labelno)
13084 rtx op[7];
13086 char label[128];
13087 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
13089 fprintf (file, "# function profiler \n");
13091 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
13092 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
13093 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
13095 op[2] = gen_rtx_REG (Pmode, 1);
13096 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
13097 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
13099 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
13100 if (flag_pic)
13102 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
13103 op[4] = gen_rtx_CONST (Pmode, op[4]);
13106 if (TARGET_64BIT)
13108 output_asm_insn ("stg\t%0,%1", op);
13109 output_asm_insn ("larl\t%2,%3", op);
13110 output_asm_insn ("brasl\t%0,%4", op);
13111 output_asm_insn ("lg\t%0,%1", op);
13113 else if (TARGET_CPU_ZARCH)
13115 output_asm_insn ("st\t%0,%1", op);
13116 output_asm_insn ("larl\t%2,%3", op);
13117 output_asm_insn ("brasl\t%0,%4", op);
13118 output_asm_insn ("l\t%0,%1", op);
13120 else if (!flag_pic)
13122 op[6] = gen_label_rtx ();
13124 output_asm_insn ("st\t%0,%1", op);
13125 output_asm_insn ("bras\t%2,%l6", op);
13126 output_asm_insn (".long\t%4", op);
13127 output_asm_insn (".long\t%3", op);
13128 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
13129 output_asm_insn ("l\t%0,0(%2)", op);
13130 output_asm_insn ("l\t%2,4(%2)", op);
13131 output_asm_insn ("basr\t%0,%0", op);
13132 output_asm_insn ("l\t%0,%1", op);
13134 else
13136 op[5] = gen_label_rtx ();
13137 op[6] = gen_label_rtx ();
13139 output_asm_insn ("st\t%0,%1", op);
13140 output_asm_insn ("bras\t%2,%l6", op);
13141 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
13142 output_asm_insn (".long\t%4-%l5", op);
13143 output_asm_insn (".long\t%3-%l5", op);
13144 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
13145 output_asm_insn ("lr\t%0,%2", op);
13146 output_asm_insn ("a\t%0,0(%2)", op);
13147 output_asm_insn ("a\t%2,4(%2)", op);
13148 output_asm_insn ("basr\t%0,%0", op);
13149 output_asm_insn ("l\t%0,%1", op);
13153 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
13154 into its SYMBOL_REF_FLAGS. */
13156 static void
13157 s390_encode_section_info (tree decl, rtx rtl, int first)
13159 default_encode_section_info (decl, rtl, first);
13161 if (TREE_CODE (decl) == VAR_DECL)
13163 /* Store the alignment to be able to check if we can use
13164 a larl/load-relative instruction. We only handle the cases
13165 that can go wrong (i.e. no FUNC_DECLs). */
13166 if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
13167 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13168 else if (DECL_ALIGN (decl) % 32)
13169 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13170 else if (DECL_ALIGN (decl) % 64)
13171 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13174 /* Literal pool references don't have a decl so they are handled
13175 differently here. We rely on the information in the MEM_ALIGN
13176 entry to decide upon the alignment. */
13177 if (MEM_P (rtl)
13178 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
13179 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
13181 if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
13182 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13183 else if (MEM_ALIGN (rtl) % 32)
13184 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13185 else if (MEM_ALIGN (rtl) % 64)
13186 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13190 /* Output thunk to FILE that implements a C++ virtual function call (with
13191 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
13192 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
13193 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
13194 relative to the resulting this pointer. */
13196 static void
13197 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
13198 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
13199 tree function)
13201 rtx op[10];
13202 int nonlocal = 0;
13204 /* Make sure unwind info is emitted for the thunk if needed. */
13205 final_start_function (emit_barrier (), file, 1);
13207 /* Operand 0 is the target function. */
13208 op[0] = XEXP (DECL_RTL (function), 0);
13209 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
13211 nonlocal = 1;
13212 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
13213 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
13214 op[0] = gen_rtx_CONST (Pmode, op[0]);
13217 /* Operand 1 is the 'this' pointer. */
13218 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
13219 op[1] = gen_rtx_REG (Pmode, 3);
13220 else
13221 op[1] = gen_rtx_REG (Pmode, 2);
13223 /* Operand 2 is the delta. */
13224 op[2] = GEN_INT (delta);
13226 /* Operand 3 is the vcall_offset. */
13227 op[3] = GEN_INT (vcall_offset);
13229 /* Operand 4 is the temporary register. */
13230 op[4] = gen_rtx_REG (Pmode, 1);
13232 /* Operands 5 to 8 can be used as labels. */
13233 op[5] = NULL_RTX;
13234 op[6] = NULL_RTX;
13235 op[7] = NULL_RTX;
13236 op[8] = NULL_RTX;
13238 /* Operand 9 can be used for temporary register. */
13239 op[9] = NULL_RTX;
13241 /* Generate code. */
13242 if (TARGET_64BIT)
13244 /* Setup literal pool pointer if required. */
13245 if ((!DISP_IN_RANGE (delta)
13246 && !CONST_OK_FOR_K (delta)
13247 && !CONST_OK_FOR_Os (delta))
13248 || (!DISP_IN_RANGE (vcall_offset)
13249 && !CONST_OK_FOR_K (vcall_offset)
13250 && !CONST_OK_FOR_Os (vcall_offset)))
13252 op[5] = gen_label_rtx ();
13253 output_asm_insn ("larl\t%4,%5", op);
13256 /* Add DELTA to this pointer. */
13257 if (delta)
13259 if (CONST_OK_FOR_J (delta))
13260 output_asm_insn ("la\t%1,%2(%1)", op);
13261 else if (DISP_IN_RANGE (delta))
13262 output_asm_insn ("lay\t%1,%2(%1)", op);
13263 else if (CONST_OK_FOR_K (delta))
13264 output_asm_insn ("aghi\t%1,%2", op);
13265 else if (CONST_OK_FOR_Os (delta))
13266 output_asm_insn ("agfi\t%1,%2", op);
13267 else
13269 op[6] = gen_label_rtx ();
13270 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
13274 /* Perform vcall adjustment. */
13275 if (vcall_offset)
13277 if (DISP_IN_RANGE (vcall_offset))
13279 output_asm_insn ("lg\t%4,0(%1)", op);
13280 output_asm_insn ("ag\t%1,%3(%4)", op);
13282 else if (CONST_OK_FOR_K (vcall_offset))
13284 output_asm_insn ("lghi\t%4,%3", op);
13285 output_asm_insn ("ag\t%4,0(%1)", op);
13286 output_asm_insn ("ag\t%1,0(%4)", op);
13288 else if (CONST_OK_FOR_Os (vcall_offset))
13290 output_asm_insn ("lgfi\t%4,%3", op);
13291 output_asm_insn ("ag\t%4,0(%1)", op);
13292 output_asm_insn ("ag\t%1,0(%4)", op);
13294 else
13296 op[7] = gen_label_rtx ();
13297 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
13298 output_asm_insn ("ag\t%4,0(%1)", op);
13299 output_asm_insn ("ag\t%1,0(%4)", op);
13303 /* Jump to target. */
13304 output_asm_insn ("jg\t%0", op);
13306 /* Output literal pool if required. */
13307 if (op[5])
13309 output_asm_insn (".align\t4", op);
13310 targetm.asm_out.internal_label (file, "L",
13311 CODE_LABEL_NUMBER (op[5]));
13313 if (op[6])
13315 targetm.asm_out.internal_label (file, "L",
13316 CODE_LABEL_NUMBER (op[6]));
13317 output_asm_insn (".long\t%2", op);
13319 if (op[7])
13321 targetm.asm_out.internal_label (file, "L",
13322 CODE_LABEL_NUMBER (op[7]));
13323 output_asm_insn (".long\t%3", op);
13326 else
13328 /* Setup base pointer if required. */
13329 if (!vcall_offset
13330 || (!DISP_IN_RANGE (delta)
13331 && !CONST_OK_FOR_K (delta)
13332 && !CONST_OK_FOR_Os (delta))
13333 || (!DISP_IN_RANGE (delta)
13334 && !CONST_OK_FOR_K (vcall_offset)
13335 && !CONST_OK_FOR_Os (vcall_offset)))
13337 op[5] = gen_label_rtx ();
13338 output_asm_insn ("basr\t%4,0", op);
13339 targetm.asm_out.internal_label (file, "L",
13340 CODE_LABEL_NUMBER (op[5]));
13343 /* Add DELTA to this pointer. */
13344 if (delta)
13346 if (CONST_OK_FOR_J (delta))
13347 output_asm_insn ("la\t%1,%2(%1)", op);
13348 else if (DISP_IN_RANGE (delta))
13349 output_asm_insn ("lay\t%1,%2(%1)", op);
13350 else if (CONST_OK_FOR_K (delta))
13351 output_asm_insn ("ahi\t%1,%2", op);
13352 else if (CONST_OK_FOR_Os (delta))
13353 output_asm_insn ("afi\t%1,%2", op);
13354 else
13356 op[6] = gen_label_rtx ();
13357 output_asm_insn ("a\t%1,%6-%5(%4)", op);
13361 /* Perform vcall adjustment. */
13362 if (vcall_offset)
13364 if (CONST_OK_FOR_J (vcall_offset))
13366 output_asm_insn ("l\t%4,0(%1)", op);
13367 output_asm_insn ("a\t%1,%3(%4)", op);
13369 else if (DISP_IN_RANGE (vcall_offset))
13371 output_asm_insn ("l\t%4,0(%1)", op);
13372 output_asm_insn ("ay\t%1,%3(%4)", op);
13374 else if (CONST_OK_FOR_K (vcall_offset))
13376 output_asm_insn ("lhi\t%4,%3", op);
13377 output_asm_insn ("a\t%4,0(%1)", op);
13378 output_asm_insn ("a\t%1,0(%4)", op);
13380 else if (CONST_OK_FOR_Os (vcall_offset))
13382 output_asm_insn ("iilf\t%4,%3", op);
13383 output_asm_insn ("a\t%4,0(%1)", op);
13384 output_asm_insn ("a\t%1,0(%4)", op);
13386 else
13388 op[7] = gen_label_rtx ();
13389 output_asm_insn ("l\t%4,%7-%5(%4)", op);
13390 output_asm_insn ("a\t%4,0(%1)", op);
13391 output_asm_insn ("a\t%1,0(%4)", op);
13394 /* We had to clobber the base pointer register.
13395 Re-setup the base pointer (with a different base). */
13396 op[5] = gen_label_rtx ();
13397 output_asm_insn ("basr\t%4,0", op);
13398 targetm.asm_out.internal_label (file, "L",
13399 CODE_LABEL_NUMBER (op[5]));
13402 /* Jump to target. */
13403 op[8] = gen_label_rtx ();
13405 if (!flag_pic)
13406 output_asm_insn ("l\t%4,%8-%5(%4)", op);
13407 else if (!nonlocal)
13408 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13409 /* We cannot call through .plt, since .plt requires %r12 loaded. */
13410 else if (flag_pic == 1)
13412 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13413 output_asm_insn ("l\t%4,%0(%4)", op);
13415 else if (flag_pic == 2)
13417 op[9] = gen_rtx_REG (Pmode, 0);
13418 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
13419 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13420 output_asm_insn ("ar\t%4,%9", op);
13421 output_asm_insn ("l\t%4,0(%4)", op);
13424 output_asm_insn ("br\t%4", op);
13426 /* Output literal pool. */
13427 output_asm_insn (".align\t4", op);
13429 if (nonlocal && flag_pic == 2)
13430 output_asm_insn (".long\t%0", op);
13431 if (nonlocal)
13433 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13434 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
13437 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
13438 if (!flag_pic)
13439 output_asm_insn (".long\t%0", op);
13440 else
13441 output_asm_insn (".long\t%0-%5", op);
13443 if (op[6])
13445 targetm.asm_out.internal_label (file, "L",
13446 CODE_LABEL_NUMBER (op[6]));
13447 output_asm_insn (".long\t%2", op);
13449 if (op[7])
13451 targetm.asm_out.internal_label (file, "L",
13452 CODE_LABEL_NUMBER (op[7]));
13453 output_asm_insn (".long\t%3", op);
13456 final_end_function ();
13459 static bool
13460 s390_valid_pointer_mode (scalar_int_mode mode)
13462 return (mode == SImode || (TARGET_64BIT && mode == DImode));
13465 /* Checks whether the given CALL_EXPR would use a caller
13466 saved register. This is used to decide whether sibling call
13467 optimization could be performed on the respective function
13468 call. */
13470 static bool
13471 s390_call_saved_register_used (tree call_expr)
13473 CUMULATIVE_ARGS cum_v;
13474 cumulative_args_t cum;
13475 tree parameter;
13476 machine_mode mode;
13477 tree type;
13478 rtx parm_rtx;
13479 int reg, i;
13481 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
13482 cum = pack_cumulative_args (&cum_v);
13484 for (i = 0; i < call_expr_nargs (call_expr); i++)
13486 parameter = CALL_EXPR_ARG (call_expr, i);
13487 gcc_assert (parameter);
13489 /* For an undeclared variable passed as parameter we will get
13490 an ERROR_MARK node here. */
13491 if (TREE_CODE (parameter) == ERROR_MARK)
13492 return true;
13494 type = TREE_TYPE (parameter);
13495 gcc_assert (type);
13497 mode = TYPE_MODE (type);
13498 gcc_assert (mode);
13500 /* We assume that in the target function all parameters are
13501 named. This only has an impact on vector argument register
13502 usage none of which is call-saved. */
13503 if (pass_by_reference (&cum_v, mode, type, true))
13505 mode = Pmode;
13506 type = build_pointer_type (type);
13509 parm_rtx = s390_function_arg (cum, mode, type, true);
13511 s390_function_arg_advance (cum, mode, type, true);
13513 if (!parm_rtx)
13514 continue;
13516 if (REG_P (parm_rtx))
13518 for (reg = 0; reg < REG_NREGS (parm_rtx); reg++)
13519 if (!call_used_regs[reg + REGNO (parm_rtx)])
13520 return true;
13523 if (GET_CODE (parm_rtx) == PARALLEL)
13525 int i;
13527 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
13529 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
13531 gcc_assert (REG_P (r));
13533 for (reg = 0; reg < REG_NREGS (r); reg++)
13534 if (!call_used_regs[reg + REGNO (r)])
13535 return true;
13540 return false;
13543 /* Return true if the given call expression can be
13544 turned into a sibling call.
13545 DECL holds the declaration of the function to be called whereas
13546 EXP is the call expression itself. */
13548 static bool
13549 s390_function_ok_for_sibcall (tree decl, tree exp)
13551 /* The TPF epilogue uses register 1. */
13552 if (TARGET_TPF_PROFILING)
13553 return false;
13555 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13556 which would have to be restored before the sibcall. */
13557 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
13558 return false;
13560 /* Register 6 on s390 is available as an argument register but unfortunately
13561 "caller saved". This makes functions needing this register for arguments
13562 not suitable for sibcalls. */
13563 return !s390_call_saved_register_used (exp);
13566 /* Return the fixed registers used for condition codes. */
13568 static bool
13569 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13571 *p1 = CC_REGNUM;
13572 *p2 = INVALID_REGNUM;
13574 return true;
13577 /* This function is used by the call expanders of the machine description.
13578 It emits the call insn itself together with the necessary operations
13579 to adjust the target address and returns the emitted insn.
13580 ADDR_LOCATION is the target address rtx
13581 TLS_CALL the location of the thread-local symbol
13582 RESULT_REG the register where the result of the call should be stored
13583 RETADDR_REG the register where the return address should be stored
13584 If this parameter is NULL_RTX the call is considered
13585 to be a sibling call. */
13587 rtx_insn *
13588 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
13589 rtx retaddr_reg)
13591 bool plt_call = false;
13592 rtx_insn *insn;
13593 rtx call;
13594 rtx clobber;
13595 rtvec vec;
13597 /* Direct function calls need special treatment. */
13598 if (GET_CODE (addr_location) == SYMBOL_REF)
13600 /* When calling a global routine in PIC mode, we must
13601 replace the symbol itself with the PLT stub. */
13602 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
13604 if (TARGET_64BIT || retaddr_reg != NULL_RTX)
13606 addr_location = gen_rtx_UNSPEC (Pmode,
13607 gen_rtvec (1, addr_location),
13608 UNSPEC_PLT);
13609 addr_location = gen_rtx_CONST (Pmode, addr_location);
13610 plt_call = true;
13612 else
13613 /* For -fpic code the PLT entries might use r12 which is
13614 call-saved. Therefore we cannot do a sibcall when
13615 calling directly using a symbol ref. When reaching
13616 this point we decided (in s390_function_ok_for_sibcall)
13617 to do a sibcall for a function pointer but one of the
13618 optimizers was able to get rid of the function pointer
13619 by propagating the symbol ref into the call. This
13620 optimization is illegal for S/390 so we turn the direct
13621 call into a indirect call again. */
13622 addr_location = force_reg (Pmode, addr_location);
13625 /* Unless we can use the bras(l) insn, force the
13626 routine address into a register. */
13627 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
13629 if (flag_pic)
13630 addr_location = legitimize_pic_address (addr_location, 0);
13631 else
13632 addr_location = force_reg (Pmode, addr_location);
13636 /* If it is already an indirect call or the code above moved the
13637 SYMBOL_REF to somewhere else make sure the address can be found in
13638 register 1. */
13639 if (retaddr_reg == NULL_RTX
13640 && GET_CODE (addr_location) != SYMBOL_REF
13641 && !plt_call)
13643 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
13644 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
13647 addr_location = gen_rtx_MEM (QImode, addr_location);
13648 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
13650 if (result_reg != NULL_RTX)
13651 call = gen_rtx_SET (result_reg, call);
13653 if (retaddr_reg != NULL_RTX)
13655 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
13657 if (tls_call != NULL_RTX)
13658 vec = gen_rtvec (3, call, clobber,
13659 gen_rtx_USE (VOIDmode, tls_call));
13660 else
13661 vec = gen_rtvec (2, call, clobber);
13663 call = gen_rtx_PARALLEL (VOIDmode, vec);
13666 insn = emit_call_insn (call);
13668 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
13669 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
13671 /* s390_function_ok_for_sibcall should
13672 have denied sibcalls in this case. */
13673 gcc_assert (retaddr_reg != NULL_RTX);
13674 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
13676 return insn;
13679 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
13681 static void
13682 s390_conditional_register_usage (void)
13684 int i;
13686 if (flag_pic)
13688 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13689 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13691 if (TARGET_CPU_ZARCH)
13693 fixed_regs[BASE_REGNUM] = 0;
13694 call_used_regs[BASE_REGNUM] = 0;
13695 fixed_regs[RETURN_REGNUM] = 0;
13696 call_used_regs[RETURN_REGNUM] = 0;
13698 if (TARGET_64BIT)
13700 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13701 call_used_regs[i] = call_really_used_regs[i] = 0;
13703 else
13705 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
13706 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
13709 if (TARGET_SOFT_FLOAT)
13711 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13712 call_used_regs[i] = fixed_regs[i] = 1;
13715 /* Disable v16 - v31 for non-vector target. */
13716 if (!TARGET_VX)
13718 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13719 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
13723 /* Corresponding function to eh_return expander. */
13725 static GTY(()) rtx s390_tpf_eh_return_symbol;
13726 void
13727 s390_emit_tpf_eh_return (rtx target)
13729 rtx_insn *insn;
13730 rtx reg, orig_ra;
13732 if (!s390_tpf_eh_return_symbol)
13733 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13735 reg = gen_rtx_REG (Pmode, 2);
13736 orig_ra = gen_rtx_REG (Pmode, 3);
13738 emit_move_insn (reg, target);
13739 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13740 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13741 gen_rtx_REG (Pmode, RETURN_REGNUM));
13742 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13743 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13745 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13748 /* Rework the prologue/epilogue to avoid saving/restoring
13749 registers unnecessarily. */
13751 static void
13752 s390_optimize_prologue (void)
13754 rtx_insn *insn, *new_insn, *next_insn;
13756 /* Do a final recompute of the frame-related data. */
13757 s390_optimize_register_info ();
13759 /* If all special registers are in fact used, there's nothing we
13760 can do, so no point in walking the insn list. */
13762 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13763 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
13764 && (TARGET_CPU_ZARCH
13765 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
13766 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
13767 return;
13769 /* Search for prologue/epilogue insns and replace them. */
13771 for (insn = get_insns (); insn; insn = next_insn)
13773 int first, last, off;
13774 rtx set, base, offset;
13775 rtx pat;
13777 next_insn = NEXT_INSN (insn);
13779 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13780 continue;
13782 pat = PATTERN (insn);
13784 /* Remove ldgr/lgdr instructions used for saving and restore
13785 GPRs if possible. */
13786 if (TARGET_Z10)
13788 rtx tmp_pat = pat;
13790 if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
13791 tmp_pat = XVECEXP (pat, 0, 0);
13793 if (GET_CODE (tmp_pat) == SET
13794 && GET_MODE (SET_SRC (tmp_pat)) == DImode
13795 && REG_P (SET_SRC (tmp_pat))
13796 && REG_P (SET_DEST (tmp_pat)))
13798 int src_regno = REGNO (SET_SRC (tmp_pat));
13799 int dest_regno = REGNO (SET_DEST (tmp_pat));
13800 int gpr_regno;
13801 int fpr_regno;
13803 if (!((GENERAL_REGNO_P (src_regno)
13804 && FP_REGNO_P (dest_regno))
13805 || (FP_REGNO_P (src_regno)
13806 && GENERAL_REGNO_P (dest_regno))))
13807 continue;
13809 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
13810 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
13812 /* GPR must be call-saved, FPR must be call-clobbered. */
13813 if (!call_really_used_regs[fpr_regno]
13814 || call_really_used_regs[gpr_regno])
13815 continue;
13817 /* It must not happen that what we once saved in an FPR now
13818 needs a stack slot. */
13819 gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
13821 if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
13823 remove_insn (insn);
13824 continue;
13829 if (GET_CODE (pat) == PARALLEL
13830 && store_multiple_operation (pat, VOIDmode))
13832 set = XVECEXP (pat, 0, 0);
13833 first = REGNO (SET_SRC (set));
13834 last = first + XVECLEN (pat, 0) - 1;
13835 offset = const0_rtx;
13836 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13837 off = INTVAL (offset);
13839 if (GET_CODE (base) != REG || off < 0)
13840 continue;
13841 if (cfun_frame_layout.first_save_gpr != -1
13842 && (cfun_frame_layout.first_save_gpr < first
13843 || cfun_frame_layout.last_save_gpr > last))
13844 continue;
13845 if (REGNO (base) != STACK_POINTER_REGNUM
13846 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13847 continue;
13848 if (first > BASE_REGNUM || last < BASE_REGNUM)
13849 continue;
13851 if (cfun_frame_layout.first_save_gpr != -1)
13853 rtx s_pat = save_gprs (base,
13854 off + (cfun_frame_layout.first_save_gpr
13855 - first) * UNITS_PER_LONG,
13856 cfun_frame_layout.first_save_gpr,
13857 cfun_frame_layout.last_save_gpr);
13858 new_insn = emit_insn_before (s_pat, insn);
13859 INSN_ADDRESSES_NEW (new_insn, -1);
13862 remove_insn (insn);
13863 continue;
13866 if (cfun_frame_layout.first_save_gpr == -1
13867 && GET_CODE (pat) == SET
13868 && GENERAL_REG_P (SET_SRC (pat))
13869 && GET_CODE (SET_DEST (pat)) == MEM)
13871 set = pat;
13872 first = REGNO (SET_SRC (set));
13873 offset = const0_rtx;
13874 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13875 off = INTVAL (offset);
13877 if (GET_CODE (base) != REG || off < 0)
13878 continue;
13879 if (REGNO (base) != STACK_POINTER_REGNUM
13880 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13881 continue;
13883 remove_insn (insn);
13884 continue;
13887 if (GET_CODE (pat) == PARALLEL
13888 && load_multiple_operation (pat, VOIDmode))
13890 set = XVECEXP (pat, 0, 0);
13891 first = REGNO (SET_DEST (set));
13892 last = first + XVECLEN (pat, 0) - 1;
13893 offset = const0_rtx;
13894 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13895 off = INTVAL (offset);
13897 if (GET_CODE (base) != REG || off < 0)
13898 continue;
13900 if (cfun_frame_layout.first_restore_gpr != -1
13901 && (cfun_frame_layout.first_restore_gpr < first
13902 || cfun_frame_layout.last_restore_gpr > last))
13903 continue;
13904 if (REGNO (base) != STACK_POINTER_REGNUM
13905 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13906 continue;
13907 if (first > BASE_REGNUM || last < BASE_REGNUM)
13908 continue;
13910 if (cfun_frame_layout.first_restore_gpr != -1)
13912 rtx rpat = restore_gprs (base,
13913 off + (cfun_frame_layout.first_restore_gpr
13914 - first) * UNITS_PER_LONG,
13915 cfun_frame_layout.first_restore_gpr,
13916 cfun_frame_layout.last_restore_gpr);
13918 /* Remove REG_CFA_RESTOREs for registers that we no
13919 longer need to save. */
13920 REG_NOTES (rpat) = REG_NOTES (insn);
13921 for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
13922 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
13923 && ((int) REGNO (XEXP (*ptr, 0))
13924 < cfun_frame_layout.first_restore_gpr))
13925 *ptr = XEXP (*ptr, 1);
13926 else
13927 ptr = &XEXP (*ptr, 1);
13928 new_insn = emit_insn_before (rpat, insn);
13929 RTX_FRAME_RELATED_P (new_insn) = 1;
13930 INSN_ADDRESSES_NEW (new_insn, -1);
13933 remove_insn (insn);
13934 continue;
13937 if (cfun_frame_layout.first_restore_gpr == -1
13938 && GET_CODE (pat) == SET
13939 && GENERAL_REG_P (SET_DEST (pat))
13940 && GET_CODE (SET_SRC (pat)) == MEM)
13942 set = pat;
13943 first = REGNO (SET_DEST (set));
13944 offset = const0_rtx;
13945 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13946 off = INTVAL (offset);
13948 if (GET_CODE (base) != REG || off < 0)
13949 continue;
13951 if (REGNO (base) != STACK_POINTER_REGNUM
13952 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13953 continue;
13955 remove_insn (insn);
13956 continue;
13961 /* On z10 and later the dynamic branch prediction must see the
13962 backward jump within a certain windows. If not it falls back to
13963 the static prediction. This function rearranges the loop backward
13964 branch in a way which makes the static prediction always correct.
13965 The function returns true if it added an instruction. */
13966 static bool
13967 s390_fix_long_loop_prediction (rtx_insn *insn)
13969 rtx set = single_set (insn);
13970 rtx code_label, label_ref;
13971 rtx_insn *uncond_jump;
13972 rtx_insn *cur_insn;
13973 rtx tmp;
13974 int distance;
13976 /* This will exclude branch on count and branch on index patterns
13977 since these are correctly statically predicted. */
13978 if (!set
13979 || SET_DEST (set) != pc_rtx
13980 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
13981 return false;
13983 /* Skip conditional returns. */
13984 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
13985 && XEXP (SET_SRC (set), 2) == pc_rtx)
13986 return false;
13988 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
13989 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
13991 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
13993 code_label = XEXP (label_ref, 0);
13995 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
13996 || INSN_ADDRESSES (INSN_UID (insn)) == -1
13997 || (INSN_ADDRESSES (INSN_UID (insn))
13998 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
13999 return false;
14001 for (distance = 0, cur_insn = PREV_INSN (insn);
14002 distance < PREDICT_DISTANCE - 6;
14003 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
14004 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
14005 return false;
14007 rtx_code_label *new_label = gen_label_rtx ();
14008 uncond_jump = emit_jump_insn_after (
14009 gen_rtx_SET (pc_rtx,
14010 gen_rtx_LABEL_REF (VOIDmode, code_label)),
14011 insn);
14012 emit_label_after (new_label, uncond_jump);
14014 tmp = XEXP (SET_SRC (set), 1);
14015 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
14016 XEXP (SET_SRC (set), 2) = tmp;
14017 INSN_CODE (insn) = -1;
14019 XEXP (label_ref, 0) = new_label;
14020 JUMP_LABEL (insn) = new_label;
14021 JUMP_LABEL (uncond_jump) = code_label;
14023 return true;
14026 /* Returns 1 if INSN reads the value of REG for purposes not related
14027 to addressing of memory, and 0 otherwise. */
14028 static int
14029 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
14031 return reg_referenced_p (reg, PATTERN (insn))
14032 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
14035 /* Starting from INSN find_cond_jump looks downwards in the insn
14036 stream for a single jump insn which is the last user of the
14037 condition code set in INSN. */
14038 static rtx_insn *
14039 find_cond_jump (rtx_insn *insn)
14041 for (; insn; insn = NEXT_INSN (insn))
14043 rtx ite, cc;
14045 if (LABEL_P (insn))
14046 break;
14048 if (!JUMP_P (insn))
14050 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
14051 break;
14052 continue;
14055 /* This will be triggered by a return. */
14056 if (GET_CODE (PATTERN (insn)) != SET)
14057 break;
14059 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
14060 ite = SET_SRC (PATTERN (insn));
14062 if (GET_CODE (ite) != IF_THEN_ELSE)
14063 break;
14065 cc = XEXP (XEXP (ite, 0), 0);
14066 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
14067 break;
14069 if (find_reg_note (insn, REG_DEAD, cc))
14070 return insn;
14071 break;
14074 return NULL;
14077 /* Swap the condition in COND and the operands in OP0 and OP1 so that
14078 the semantics does not change. If NULL_RTX is passed as COND the
14079 function tries to find the conditional jump starting with INSN. */
14080 static void
14081 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
14083 rtx tmp = *op0;
14085 if (cond == NULL_RTX)
14087 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
14088 rtx set = jump ? single_set (jump) : NULL_RTX;
14090 if (set == NULL_RTX)
14091 return;
14093 cond = XEXP (SET_SRC (set), 0);
14096 *op0 = *op1;
14097 *op1 = tmp;
14098 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
14101 /* On z10, instructions of the compare-and-branch family have the
14102 property to access the register occurring as second operand with
14103 its bits complemented. If such a compare is grouped with a second
14104 instruction that accesses the same register non-complemented, and
14105 if that register's value is delivered via a bypass, then the
14106 pipeline recycles, thereby causing significant performance decline.
14107 This function locates such situations and exchanges the two
14108 operands of the compare. The function return true whenever it
14109 added an insn. */
14110 static bool
14111 s390_z10_optimize_cmp (rtx_insn *insn)
14113 rtx_insn *prev_insn, *next_insn;
14114 bool insn_added_p = false;
14115 rtx cond, *op0, *op1;
14117 if (GET_CODE (PATTERN (insn)) == PARALLEL)
14119 /* Handle compare and branch and branch on count
14120 instructions. */
14121 rtx pattern = single_set (insn);
14123 if (!pattern
14124 || SET_DEST (pattern) != pc_rtx
14125 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
14126 return false;
14128 cond = XEXP (SET_SRC (pattern), 0);
14129 op0 = &XEXP (cond, 0);
14130 op1 = &XEXP (cond, 1);
14132 else if (GET_CODE (PATTERN (insn)) == SET)
14134 rtx src, dest;
14136 /* Handle normal compare instructions. */
14137 src = SET_SRC (PATTERN (insn));
14138 dest = SET_DEST (PATTERN (insn));
14140 if (!REG_P (dest)
14141 || !CC_REGNO_P (REGNO (dest))
14142 || GET_CODE (src) != COMPARE)
14143 return false;
14145 /* s390_swap_cmp will try to find the conditional
14146 jump when passing NULL_RTX as condition. */
14147 cond = NULL_RTX;
14148 op0 = &XEXP (src, 0);
14149 op1 = &XEXP (src, 1);
14151 else
14152 return false;
14154 if (!REG_P (*op0) || !REG_P (*op1))
14155 return false;
14157 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
14158 return false;
14160 /* Swap the COMPARE arguments and its mask if there is a
14161 conflicting access in the previous insn. */
14162 prev_insn = prev_active_insn (insn);
14163 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14164 && reg_referenced_p (*op1, PATTERN (prev_insn)))
14165 s390_swap_cmp (cond, op0, op1, insn);
14167 /* Check if there is a conflict with the next insn. If there
14168 was no conflict with the previous insn, then swap the
14169 COMPARE arguments and its mask. If we already swapped
14170 the operands, or if swapping them would cause a conflict
14171 with the previous insn, issue a NOP after the COMPARE in
14172 order to separate the two instuctions. */
14173 next_insn = next_active_insn (insn);
14174 if (next_insn != NULL_RTX && INSN_P (next_insn)
14175 && s390_non_addr_reg_read_p (*op1, next_insn))
14177 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14178 && s390_non_addr_reg_read_p (*op0, prev_insn))
14180 if (REGNO (*op1) == 0)
14181 emit_insn_after (gen_nop1 (), insn);
14182 else
14183 emit_insn_after (gen_nop (), insn);
14184 insn_added_p = true;
14186 else
14187 s390_swap_cmp (cond, op0, op1, insn);
14189 return insn_added_p;
14192 /* Number of INSNs to be scanned backward in the last BB of the loop
14193 and forward in the first BB of the loop. This usually should be a
14194 bit more than the number of INSNs which could go into one
14195 group. */
14196 #define S390_OSC_SCAN_INSN_NUM 5
14198 /* Scan LOOP for static OSC collisions and return true if a osc_break
14199 should be issued for this loop. */
14200 static bool
14201 s390_adjust_loop_scan_osc (struct loop* loop)
14204 HARD_REG_SET modregs, newregs;
14205 rtx_insn *insn, *store_insn = NULL;
14206 rtx set;
14207 struct s390_address addr_store, addr_load;
14208 subrtx_iterator::array_type array;
14209 int insn_count;
14211 CLEAR_HARD_REG_SET (modregs);
14213 insn_count = 0;
14214 FOR_BB_INSNS_REVERSE (loop->latch, insn)
14216 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14217 continue;
14219 insn_count++;
14220 if (insn_count > S390_OSC_SCAN_INSN_NUM)
14221 return false;
14223 find_all_hard_reg_sets (insn, &newregs, true);
14224 IOR_HARD_REG_SET (modregs, newregs);
14226 set = single_set (insn);
14227 if (!set)
14228 continue;
14230 if (MEM_P (SET_DEST (set))
14231 && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store))
14233 store_insn = insn;
14234 break;
14238 if (store_insn == NULL_RTX)
14239 return false;
14241 insn_count = 0;
14242 FOR_BB_INSNS (loop->header, insn)
14244 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14245 continue;
14247 if (insn == store_insn)
14248 return false;
14250 insn_count++;
14251 if (insn_count > S390_OSC_SCAN_INSN_NUM)
14252 return false;
14254 find_all_hard_reg_sets (insn, &newregs, true);
14255 IOR_HARD_REG_SET (modregs, newregs);
14257 set = single_set (insn);
14258 if (!set)
14259 continue;
14261 /* An intermediate store disrupts static OSC checking
14262 anyway. */
14263 if (MEM_P (SET_DEST (set))
14264 && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL))
14265 return false;
14267 FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
14268 if (MEM_P (*iter)
14269 && s390_decompose_address (XEXP (*iter, 0), &addr_load)
14270 && rtx_equal_p (addr_load.base, addr_store.base)
14271 && rtx_equal_p (addr_load.indx, addr_store.indx)
14272 && rtx_equal_p (addr_load.disp, addr_store.disp))
14274 if ((addr_load.base != NULL_RTX
14275 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base)))
14276 || (addr_load.indx != NULL_RTX
14277 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx))))
14278 return true;
14281 return false;
14284 /* Look for adjustments which can be done on simple innermost
14285 loops. */
14286 static void
14287 s390_adjust_loops ()
14289 struct loop *loop = NULL;
14291 df_analyze ();
14292 compute_bb_for_insn ();
14294 /* Find the loops. */
14295 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
14297 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
14299 if (dump_file)
14301 flow_loop_dump (loop, dump_file, NULL, 0);
14302 fprintf (dump_file, ";; OSC loop scan Loop: ");
14304 if (loop->latch == NULL
14305 || pc_set (BB_END (loop->latch)) == NULL_RTX
14306 || !s390_adjust_loop_scan_osc (loop))
14308 if (dump_file)
14310 if (loop->latch == NULL)
14311 fprintf (dump_file, " muliple backward jumps\n");
14312 else
14314 fprintf (dump_file, " header insn: %d latch insn: %d ",
14315 INSN_UID (BB_HEAD (loop->header)),
14316 INSN_UID (BB_END (loop->latch)));
14317 if (pc_set (BB_END (loop->latch)) == NULL_RTX)
14318 fprintf (dump_file, " loop does not end with jump\n");
14319 else
14320 fprintf (dump_file, " not instrumented\n");
14324 else
14326 rtx_insn *new_insn;
14328 if (dump_file)
14329 fprintf (dump_file, " adding OSC break insn: ");
14330 new_insn = emit_insn_before (gen_osc_break (),
14331 BB_END (loop->latch));
14332 INSN_ADDRESSES_NEW (new_insn, -1);
14336 loop_optimizer_finalize ();
14338 df_finish_pass (false);
14341 /* Perform machine-dependent processing. */
14343 static void
14344 s390_reorg (void)
14346 bool pool_overflow = false;
14347 int hw_before, hw_after;
14349 if (s390_tune == PROCESSOR_2964_Z13)
14350 s390_adjust_loops ();
14352 /* Make sure all splits have been performed; splits after
14353 machine_dependent_reorg might confuse insn length counts. */
14354 split_all_insns_noflow ();
14356 /* Install the main literal pool and the associated base
14357 register load insns.
14359 In addition, there are two problematic situations we need
14360 to correct:
14362 - the literal pool might be > 4096 bytes in size, so that
14363 some of its elements cannot be directly accessed
14365 - a branch target might be > 64K away from the branch, so that
14366 it is not possible to use a PC-relative instruction.
14368 To fix those, we split the single literal pool into multiple
14369 pool chunks, reloading the pool base register at various
14370 points throughout the function to ensure it always points to
14371 the pool chunk the following code expects, and / or replace
14372 PC-relative branches by absolute branches.
14374 However, the two problems are interdependent: splitting the
14375 literal pool can move a branch further away from its target,
14376 causing the 64K limit to overflow, and on the other hand,
14377 replacing a PC-relative branch by an absolute branch means
14378 we need to put the branch target address into the literal
14379 pool, possibly causing it to overflow.
14381 So, we loop trying to fix up both problems until we manage
14382 to satisfy both conditions at the same time. Note that the
14383 loop is guaranteed to terminate as every pass of the loop
14384 strictly decreases the total number of PC-relative branches
14385 in the function. (This is not completely true as there
14386 might be branch-over-pool insns introduced by chunkify_start.
14387 Those never need to be split however.) */
14389 for (;;)
14391 struct constant_pool *pool = NULL;
14393 /* Collect the literal pool. */
14394 if (!pool_overflow)
14396 pool = s390_mainpool_start ();
14397 if (!pool)
14398 pool_overflow = true;
14401 /* If literal pool overflowed, start to chunkify it. */
14402 if (pool_overflow)
14403 pool = s390_chunkify_start ();
14405 /* Split out-of-range branches. If this has created new
14406 literal pool entries, cancel current chunk list and
14407 recompute it. zSeries machines have large branch
14408 instructions, so we never need to split a branch. */
14409 if (!TARGET_CPU_ZARCH && s390_split_branches ())
14411 if (pool_overflow)
14412 s390_chunkify_cancel (pool);
14413 else
14414 s390_mainpool_cancel (pool);
14416 continue;
14419 /* If we made it up to here, both conditions are satisfied.
14420 Finish up literal pool related changes. */
14421 if (pool_overflow)
14422 s390_chunkify_finish (pool);
14423 else
14424 s390_mainpool_finish (pool);
14426 /* We're done splitting branches. */
14427 cfun->machine->split_branches_pending_p = false;
14428 break;
14431 /* Generate out-of-pool execute target insns. */
14432 if (TARGET_CPU_ZARCH)
14434 rtx_insn *insn, *target;
14435 rtx label;
14437 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14439 label = s390_execute_label (insn);
14440 if (!label)
14441 continue;
14443 gcc_assert (label != const0_rtx);
14445 target = emit_label (XEXP (label, 0));
14446 INSN_ADDRESSES_NEW (target, -1);
14448 target = emit_insn (s390_execute_target (insn));
14449 INSN_ADDRESSES_NEW (target, -1);
14453 /* Try to optimize prologue and epilogue further. */
14454 s390_optimize_prologue ();
14456 /* Walk over the insns and do some >=z10 specific changes. */
14457 if (s390_tune >= PROCESSOR_2097_Z10)
14459 rtx_insn *insn;
14460 bool insn_added_p = false;
14462 /* The insn lengths and addresses have to be up to date for the
14463 following manipulations. */
14464 shorten_branches (get_insns ());
14466 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14468 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14469 continue;
14471 if (JUMP_P (insn))
14472 insn_added_p |= s390_fix_long_loop_prediction (insn);
14474 if ((GET_CODE (PATTERN (insn)) == PARALLEL
14475 || GET_CODE (PATTERN (insn)) == SET)
14476 && s390_tune == PROCESSOR_2097_Z10)
14477 insn_added_p |= s390_z10_optimize_cmp (insn);
14480 /* Adjust branches if we added new instructions. */
14481 if (insn_added_p)
14482 shorten_branches (get_insns ());
14485 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
14486 if (hw_after > 0)
14488 rtx_insn *insn;
14490 /* Insert NOPs for hotpatching. */
14491 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14492 /* Emit NOPs
14493 1. inside the area covered by debug information to allow setting
14494 breakpoints at the NOPs,
14495 2. before any insn which results in an asm instruction,
14496 3. before in-function labels to avoid jumping to the NOPs, for
14497 example as part of a loop,
14498 4. before any barrier in case the function is completely empty
14499 (__builtin_unreachable ()) and has neither internal labels nor
14500 active insns.
14502 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
14503 break;
14504 /* Output a series of NOPs before the first active insn. */
14505 while (insn && hw_after > 0)
14507 if (hw_after >= 3 && TARGET_CPU_ZARCH)
14509 emit_insn_before (gen_nop_6_byte (), insn);
14510 hw_after -= 3;
14512 else if (hw_after >= 2)
14514 emit_insn_before (gen_nop_4_byte (), insn);
14515 hw_after -= 2;
14517 else
14519 emit_insn_before (gen_nop_2_byte (), insn);
14520 hw_after -= 1;
14526 /* Return true if INSN is a fp load insn writing register REGNO. */
14527 static inline bool
14528 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
14530 rtx set;
14531 enum attr_type flag = s390_safe_attr_type (insn);
14533 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
14534 return false;
14536 set = single_set (insn);
14538 if (set == NULL_RTX)
14539 return false;
14541 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
14542 return false;
14544 if (REGNO (SET_DEST (set)) != regno)
14545 return false;
14547 return true;
14550 /* This value describes the distance to be avoided between an
14551 arithmetic fp instruction and an fp load writing the same register.
14552 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14553 fine but the exact value has to be avoided. Otherwise the FP
14554 pipeline will throw an exception causing a major penalty. */
14555 #define Z10_EARLYLOAD_DISTANCE 7
14557 /* Rearrange the ready list in order to avoid the situation described
14558 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
14559 moved to the very end of the ready list. */
14560 static void
14561 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
14563 unsigned int regno;
14564 int nready = *nready_p;
14565 rtx_insn *tmp;
14566 int i;
14567 rtx_insn *insn;
14568 rtx set;
14569 enum attr_type flag;
14570 int distance;
14572 /* Skip DISTANCE - 1 active insns. */
14573 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
14574 distance > 0 && insn != NULL_RTX;
14575 distance--, insn = prev_active_insn (insn))
14576 if (CALL_P (insn) || JUMP_P (insn))
14577 return;
14579 if (insn == NULL_RTX)
14580 return;
14582 set = single_set (insn);
14584 if (set == NULL_RTX || !REG_P (SET_DEST (set))
14585 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
14586 return;
14588 flag = s390_safe_attr_type (insn);
14590 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
14591 return;
14593 regno = REGNO (SET_DEST (set));
14594 i = nready - 1;
14596 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
14597 i--;
14599 if (!i)
14600 return;
14602 tmp = ready[i];
14603 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
14604 ready[0] = tmp;
14608 /* The s390_sched_state variable tracks the state of the current or
14609 the last instruction group.
14611 0,1,2 number of instructions scheduled in the current group
14612 3 the last group is complete - normal insns
14613 4 the last group was a cracked/expanded insn */
14615 static int s390_sched_state;
14617 #define S390_SCHED_STATE_NORMAL 3
14618 #define S390_SCHED_STATE_CRACKED 4
14620 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
14621 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
14622 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
14623 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14625 static unsigned int
14626 s390_get_sched_attrmask (rtx_insn *insn)
14628 unsigned int mask = 0;
14630 switch (s390_tune)
14632 case PROCESSOR_2827_ZEC12:
14633 if (get_attr_zEC12_cracked (insn))
14634 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14635 if (get_attr_zEC12_expanded (insn))
14636 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14637 if (get_attr_zEC12_endgroup (insn))
14638 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14639 if (get_attr_zEC12_groupalone (insn))
14640 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14641 break;
14642 case PROCESSOR_2964_Z13:
14643 case PROCESSOR_3906_Z14:
14644 if (get_attr_z13_cracked (insn))
14645 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14646 if (get_attr_z13_expanded (insn))
14647 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14648 if (get_attr_z13_endgroup (insn))
14649 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14650 if (get_attr_z13_groupalone (insn))
14651 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14652 break;
14653 default:
14654 gcc_unreachable ();
14656 return mask;
14659 static unsigned int
14660 s390_get_unit_mask (rtx_insn *insn, int *units)
14662 unsigned int mask = 0;
14664 switch (s390_tune)
14666 case PROCESSOR_2964_Z13:
14667 case PROCESSOR_3906_Z14:
14668 *units = 3;
14669 if (get_attr_z13_unit_lsu (insn))
14670 mask |= 1 << 0;
14671 if (get_attr_z13_unit_fxu (insn))
14672 mask |= 1 << 1;
14673 if (get_attr_z13_unit_vfu (insn))
14674 mask |= 1 << 2;
14675 break;
14676 default:
14677 gcc_unreachable ();
14679 return mask;
14682 /* Return the scheduling score for INSN. The higher the score the
14683 better. The score is calculated from the OOO scheduling attributes
14684 of INSN and the scheduling state s390_sched_state. */
14685 static int
14686 s390_sched_score (rtx_insn *insn)
14688 unsigned int mask = s390_get_sched_attrmask (insn);
14689 int score = 0;
14691 switch (s390_sched_state)
14693 case 0:
14694 /* Try to put insns into the first slot which would otherwise
14695 break a group. */
14696 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14697 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14698 score += 5;
14699 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14700 score += 10;
14701 /* fallthrough */
14702 case 1:
14703 /* Prefer not cracked insns while trying to put together a
14704 group. */
14705 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14706 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14707 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14708 score += 10;
14709 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
14710 score += 5;
14711 break;
14712 case 2:
14713 /* Prefer not cracked insns while trying to put together a
14714 group. */
14715 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14716 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14717 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14718 score += 10;
14719 /* Prefer endgroup insns in the last slot. */
14720 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14721 score += 10;
14722 break;
14723 case S390_SCHED_STATE_NORMAL:
14724 /* Prefer not cracked insns if the last was not cracked. */
14725 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14726 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0)
14727 score += 5;
14728 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14729 score += 10;
14730 break;
14731 case S390_SCHED_STATE_CRACKED:
14732 /* Try to keep cracked insns together to prevent them from
14733 interrupting groups. */
14734 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14735 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14736 score += 5;
14737 break;
14740 if (s390_tune >= PROCESSOR_2964_Z13)
14742 int units, i;
14743 unsigned unit_mask, m = 1;
14745 unit_mask = s390_get_unit_mask (insn, &units);
14746 gcc_assert (units <= MAX_SCHED_UNITS);
14748 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14749 ago the last insn of this unit type got scheduled. This is
14750 supposed to help providing a proper instruction mix to the
14751 CPU. */
14752 for (i = 0; i < units; i++, m <<= 1)
14753 if (m & unit_mask)
14754 score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE /
14755 MAX_SCHED_MIX_DISTANCE);
14757 return score;
14760 /* This function is called via hook TARGET_SCHED_REORDER before
14761 issuing one insn from list READY which contains *NREADYP entries.
14762 For target z10 it reorders load instructions to avoid early load
14763 conflicts in the floating point pipeline */
14764 static int
14765 s390_sched_reorder (FILE *file, int verbose,
14766 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
14768 if (s390_tune == PROCESSOR_2097_Z10
14769 && reload_completed
14770 && *nreadyp > 1)
14771 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
14773 if (s390_tune >= PROCESSOR_2827_ZEC12
14774 && reload_completed
14775 && *nreadyp > 1)
14777 int i;
14778 int last_index = *nreadyp - 1;
14779 int max_index = -1;
14780 int max_score = -1;
14781 rtx_insn *tmp;
14783 /* Just move the insn with the highest score to the top (the
14784 end) of the list. A full sort is not needed since a conflict
14785 in the hazard recognition cannot happen. So the top insn in
14786 the ready list will always be taken. */
14787 for (i = last_index; i >= 0; i--)
14789 int score;
14791 if (recog_memoized (ready[i]) < 0)
14792 continue;
14794 score = s390_sched_score (ready[i]);
14795 if (score > max_score)
14797 max_score = score;
14798 max_index = i;
14802 if (max_index != -1)
14804 if (max_index != last_index)
14806 tmp = ready[max_index];
14807 ready[max_index] = ready[last_index];
14808 ready[last_index] = tmp;
14810 if (verbose > 5)
14811 fprintf (file,
14812 ";;\t\tBACKEND: move insn %d to the top of list\n",
14813 INSN_UID (ready[last_index]));
14815 else if (verbose > 5)
14816 fprintf (file,
14817 ";;\t\tBACKEND: best insn %d already on top\n",
14818 INSN_UID (ready[last_index]));
14821 if (verbose > 5)
14823 fprintf (file, "ready list ooo attributes - sched state: %d\n",
14824 s390_sched_state);
14826 for (i = last_index; i >= 0; i--)
14828 unsigned int sched_mask;
14829 rtx_insn *insn = ready[i];
14831 if (recog_memoized (insn) < 0)
14832 continue;
14834 sched_mask = s390_get_sched_attrmask (insn);
14835 fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
14836 INSN_UID (insn),
14837 s390_sched_score (insn));
14838 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
14839 ((M) & sched_mask) ? #ATTR : "");
14840 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14841 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14842 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14843 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14844 #undef PRINT_SCHED_ATTR
14845 if (s390_tune >= PROCESSOR_2964_Z13)
14847 unsigned int unit_mask, m = 1;
14848 int units, j;
14850 unit_mask = s390_get_unit_mask (insn, &units);
14851 fprintf (file, "(units:");
14852 for (j = 0; j < units; j++, m <<= 1)
14853 if (m & unit_mask)
14854 fprintf (file, " u%d", j);
14855 fprintf (file, ")");
14857 fprintf (file, "\n");
14862 return s390_issue_rate ();
14866 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
14867 the scheduler has issued INSN. It stores the last issued insn into
14868 last_scheduled_insn in order to make it available for
14869 s390_sched_reorder. */
14870 static int
14871 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
14873 last_scheduled_insn = insn;
14875 if (s390_tune >= PROCESSOR_2827_ZEC12
14876 && reload_completed
14877 && recog_memoized (insn) >= 0)
14879 unsigned int mask = s390_get_sched_attrmask (insn);
14881 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14882 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14883 s390_sched_state = S390_SCHED_STATE_CRACKED;
14884 else if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0
14885 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14886 s390_sched_state = S390_SCHED_STATE_NORMAL;
14887 else
14889 /* Only normal insns are left (mask == 0). */
14890 switch (s390_sched_state)
14892 case 0:
14893 case 1:
14894 case 2:
14895 case S390_SCHED_STATE_NORMAL:
14896 if (s390_sched_state == S390_SCHED_STATE_NORMAL)
14897 s390_sched_state = 1;
14898 else
14899 s390_sched_state++;
14901 break;
14902 case S390_SCHED_STATE_CRACKED:
14903 s390_sched_state = S390_SCHED_STATE_NORMAL;
14904 break;
14908 if (s390_tune >= PROCESSOR_2964_Z13)
14910 int units, i;
14911 unsigned unit_mask, m = 1;
14913 unit_mask = s390_get_unit_mask (insn, &units);
14914 gcc_assert (units <= MAX_SCHED_UNITS);
14916 for (i = 0; i < units; i++, m <<= 1)
14917 if (m & unit_mask)
14918 last_scheduled_unit_distance[i] = 0;
14919 else if (last_scheduled_unit_distance[i] < MAX_SCHED_MIX_DISTANCE)
14920 last_scheduled_unit_distance[i]++;
14923 if (verbose > 5)
14925 unsigned int sched_mask;
14927 sched_mask = s390_get_sched_attrmask (insn);
14929 fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
14930 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
14931 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14932 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14933 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14934 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14935 #undef PRINT_SCHED_ATTR
14937 if (s390_tune >= PROCESSOR_2964_Z13)
14939 unsigned int unit_mask, m = 1;
14940 int units, j;
14942 unit_mask = s390_get_unit_mask (insn, &units);
14943 fprintf (file, "(units:");
14944 for (j = 0; j < units; j++, m <<= 1)
14945 if (m & unit_mask)
14946 fprintf (file, " %d", j);
14947 fprintf (file, ")");
14949 fprintf (file, " sched state: %d\n", s390_sched_state);
14951 if (s390_tune >= PROCESSOR_2964_Z13)
14953 int units, j;
14955 s390_get_unit_mask (insn, &units);
14957 fprintf (file, ";;\t\tBACKEND: units unused for: ");
14958 for (j = 0; j < units; j++)
14959 fprintf (file, "%d:%d ", j, last_scheduled_unit_distance[j]);
14960 fprintf (file, "\n");
14965 if (GET_CODE (PATTERN (insn)) != USE
14966 && GET_CODE (PATTERN (insn)) != CLOBBER)
14967 return more - 1;
14968 else
14969 return more;
14972 static void
14973 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
14974 int verbose ATTRIBUTE_UNUSED,
14975 int max_ready ATTRIBUTE_UNUSED)
14977 last_scheduled_insn = NULL;
14978 memset (last_scheduled_unit_distance, 0, MAX_SCHED_UNITS * sizeof (int));
14979 s390_sched_state = 0;
14982 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
14983 a new number struct loop *loop should be unrolled if tuned for cpus with
14984 a built-in stride prefetcher.
14985 The loop is analyzed for memory accesses by calling check_dpu for
14986 each rtx of the loop. Depending on the loop_depth and the amount of
14987 memory accesses a new number <=nunroll is returned to improve the
14988 behavior of the hardware prefetch unit. */
14989 static unsigned
14990 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
14992 basic_block *bbs;
14993 rtx_insn *insn;
14994 unsigned i;
14995 unsigned mem_count = 0;
14997 if (s390_tune < PROCESSOR_2097_Z10)
14998 return nunroll;
15000 /* Count the number of memory references within the loop body. */
15001 bbs = get_loop_body (loop);
15002 subrtx_iterator::array_type array;
15003 for (i = 0; i < loop->num_nodes; i++)
15004 FOR_BB_INSNS (bbs[i], insn)
15005 if (INSN_P (insn) && INSN_CODE (insn) != -1)
15006 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
15007 if (MEM_P (*iter))
15008 mem_count += 1;
15009 free (bbs);
15011 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
15012 if (mem_count == 0)
15013 return nunroll;
15015 switch (loop_depth(loop))
15017 case 1:
15018 return MIN (nunroll, 28 / mem_count);
15019 case 2:
15020 return MIN (nunroll, 22 / mem_count);
15021 default:
15022 return MIN (nunroll, 16 / mem_count);
15026 /* Restore the current options. This is a hook function and also called
15027 internally. */
15029 static void
15030 s390_function_specific_restore (struct gcc_options *opts,
15031 struct cl_target_option *ptr ATTRIBUTE_UNUSED)
15033 opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
15036 static void
15037 s390_option_override_internal (bool main_args_p,
15038 struct gcc_options *opts,
15039 const struct gcc_options *opts_set)
15041 const char *prefix;
15042 const char *suffix;
15044 /* Set up prefix/suffix so the error messages refer to either the command
15045 line argument, or the attribute(target). */
15046 if (main_args_p)
15048 prefix = "-m";
15049 suffix = "";
15051 else
15053 prefix = "option(\"";
15054 suffix = "\")";
15058 /* Architecture mode defaults according to ABI. */
15059 if (!(opts_set->x_target_flags & MASK_ZARCH))
15061 if (TARGET_64BIT)
15062 opts->x_target_flags |= MASK_ZARCH;
15063 else
15064 opts->x_target_flags &= ~MASK_ZARCH;
15067 /* Set the march default in case it hasn't been specified on cmdline. */
15068 if (!opts_set->x_s390_arch)
15069 opts->x_s390_arch = PROCESSOR_2064_Z900;
15070 else if (opts->x_s390_arch == PROCESSOR_9672_G5
15071 || opts->x_s390_arch == PROCESSOR_9672_G6)
15072 warning (OPT_Wdeprecated, "%sarch=%s%s is deprecated and will be removed "
15073 "in future releases; use at least %sarch=z900%s",
15074 prefix, opts->x_s390_arch == PROCESSOR_9672_G5 ? "g5" : "g6",
15075 suffix, prefix, suffix);
15077 opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
15079 /* Determine processor to tune for. */
15080 if (!opts_set->x_s390_tune)
15081 opts->x_s390_tune = opts->x_s390_arch;
15082 else if (opts->x_s390_tune == PROCESSOR_9672_G5
15083 || opts->x_s390_tune == PROCESSOR_9672_G6)
15084 warning (OPT_Wdeprecated, "%stune=%s%s is deprecated and will be removed "
15085 "in future releases; use at least %stune=z900%s",
15086 prefix, opts->x_s390_tune == PROCESSOR_9672_G5 ? "g5" : "g6",
15087 suffix, prefix, suffix);
15089 opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
15091 /* Sanity checks. */
15092 if (opts->x_s390_arch == PROCESSOR_NATIVE
15093 || opts->x_s390_tune == PROCESSOR_NATIVE)
15094 gcc_unreachable ();
15095 if (TARGET_ZARCH_P (opts->x_target_flags) && !TARGET_CPU_ZARCH_P (opts))
15096 error ("z/Architecture mode not supported on %s",
15097 processor_table[(int)opts->x_s390_arch].name);
15098 if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
15099 error ("64-bit ABI not supported in ESA/390 mode");
15101 /* Enable hardware transactions if available and not explicitly
15102 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
15103 if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
15105 if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
15106 opts->x_target_flags |= MASK_OPT_HTM;
15107 else
15108 opts->x_target_flags &= ~MASK_OPT_HTM;
15111 if (TARGET_OPT_VX_P (opts_set->x_target_flags))
15113 if (TARGET_OPT_VX_P (opts->x_target_flags))
15115 if (!TARGET_CPU_VX_P (opts))
15116 error ("hardware vector support not available on %s",
15117 processor_table[(int)opts->x_s390_arch].name);
15118 if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15119 error ("hardware vector support not available with -msoft-float");
15122 else
15124 if (TARGET_CPU_VX_P (opts))
15125 /* Enable vector support if available and not explicitly disabled
15126 by user. E.g. with -m31 -march=z13 -mzarch */
15127 opts->x_target_flags |= MASK_OPT_VX;
15128 else
15129 opts->x_target_flags &= ~MASK_OPT_VX;
15132 /* Use hardware DFP if available and not explicitly disabled by
15133 user. E.g. with -m31 -march=z10 -mzarch */
15134 if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
15136 if (TARGET_DFP_P (opts))
15137 opts->x_target_flags |= MASK_HARD_DFP;
15138 else
15139 opts->x_target_flags &= ~MASK_HARD_DFP;
15142 if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
15144 if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
15146 if (!TARGET_CPU_DFP_P (opts))
15147 error ("hardware decimal floating point instructions"
15148 " not available on %s",
15149 processor_table[(int)opts->x_s390_arch].name);
15150 if (!TARGET_ZARCH_P (opts->x_target_flags))
15151 error ("hardware decimal floating point instructions"
15152 " not available in ESA/390 mode");
15154 else
15155 opts->x_target_flags &= ~MASK_HARD_DFP;
15158 if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
15159 && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15161 if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
15162 && TARGET_HARD_DFP_P (opts->x_target_flags))
15163 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
15165 opts->x_target_flags &= ~MASK_HARD_DFP;
15168 if (TARGET_BACKCHAIN_P (opts->x_target_flags)
15169 && TARGET_PACKED_STACK_P (opts->x_target_flags)
15170 && TARGET_HARD_FLOAT_P (opts->x_target_flags))
15171 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
15172 "in combination");
15174 if (opts->x_s390_stack_size)
15176 if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
15177 error ("stack size must be greater than the stack guard value");
15178 else if (opts->x_s390_stack_size > 1 << 16)
15179 error ("stack size must not be greater than 64k");
15181 else if (opts->x_s390_stack_guard)
15182 error ("-mstack-guard implies use of -mstack-size");
15184 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
15185 if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
15186 opts->x_target_flags |= MASK_LONG_DOUBLE_128;
15187 #endif
15189 if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
15191 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
15192 opts->x_param_values,
15193 opts_set->x_param_values);
15194 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
15195 opts->x_param_values,
15196 opts_set->x_param_values);
15197 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
15198 opts->x_param_values,
15199 opts_set->x_param_values);
15200 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
15201 opts->x_param_values,
15202 opts_set->x_param_values);
15205 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
15206 opts->x_param_values,
15207 opts_set->x_param_values);
15208 /* values for loop prefetching */
15209 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
15210 opts->x_param_values,
15211 opts_set->x_param_values);
15212 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
15213 opts->x_param_values,
15214 opts_set->x_param_values);
15215 /* s390 has more than 2 levels and the size is much larger. Since
15216 we are always running virtualized assume that we only get a small
15217 part of the caches above l1. */
15218 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
15219 opts->x_param_values,
15220 opts_set->x_param_values);
15221 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
15222 opts->x_param_values,
15223 opts_set->x_param_values);
15224 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
15225 opts->x_param_values,
15226 opts_set->x_param_values);
15228 /* Use the alternative scheduling-pressure algorithm by default. */
15229 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
15230 opts->x_param_values,
15231 opts_set->x_param_values);
15233 maybe_set_param_value (PARAM_MIN_VECT_LOOP_BOUND, 2,
15234 opts->x_param_values,
15235 opts_set->x_param_values);
15237 /* Call target specific restore function to do post-init work. At the moment,
15238 this just sets opts->x_s390_cost_pointer. */
15239 s390_function_specific_restore (opts, NULL);
15242 static void
15243 s390_option_override (void)
15245 unsigned int i;
15246 cl_deferred_option *opt;
15247 vec<cl_deferred_option> *v =
15248 (vec<cl_deferred_option> *) s390_deferred_options;
15250 if (v)
15251 FOR_EACH_VEC_ELT (*v, i, opt)
15253 switch (opt->opt_index)
15255 case OPT_mhotpatch_:
15257 int val1;
15258 int val2;
15259 char s[256];
15260 char *t;
15262 strncpy (s, opt->arg, 256);
15263 s[255] = 0;
15264 t = strchr (s, ',');
15265 if (t != NULL)
15267 *t = 0;
15268 t++;
15269 val1 = integral_argument (s);
15270 val2 = integral_argument (t);
15272 else
15274 val1 = -1;
15275 val2 = -1;
15277 if (val1 == -1 || val2 == -1)
15279 /* argument is not a plain number */
15280 error ("arguments to %qs should be non-negative integers",
15281 "-mhotpatch=n,m");
15282 break;
15284 else if (val1 > s390_hotpatch_hw_max
15285 || val2 > s390_hotpatch_hw_max)
15287 error ("argument to %qs is too large (max. %d)",
15288 "-mhotpatch=n,m", s390_hotpatch_hw_max);
15289 break;
15291 s390_hotpatch_hw_before_label = val1;
15292 s390_hotpatch_hw_after_label = val2;
15293 break;
15295 default:
15296 gcc_unreachable ();
15300 /* Set up function hooks. */
15301 init_machine_status = s390_init_machine_status;
15303 s390_option_override_internal (true, &global_options, &global_options_set);
15305 /* Save the initial options in case the user does function specific
15306 options. */
15307 target_option_default_node = build_target_option_node (&global_options);
15308 target_option_current_node = target_option_default_node;
15310 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
15311 requires the arch flags to be evaluated already. Since prefetching
15312 is beneficial on s390, we enable it if available. */
15313 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
15314 flag_prefetch_loop_arrays = 1;
15316 if (!s390_pic_data_is_text_relative && !flag_pic)
15317 error ("-mno-pic-data-is-text-relative cannot be used without -fpic/-fPIC");
15319 if (TARGET_TPF)
15321 /* Don't emit DWARF3/4 unless specifically selected. The TPF
15322 debuggers do not yet support DWARF 3/4. */
15323 if (!global_options_set.x_dwarf_strict)
15324 dwarf_strict = 1;
15325 if (!global_options_set.x_dwarf_version)
15326 dwarf_version = 2;
15329 /* Register a target-specific optimization-and-lowering pass
15330 to run immediately before prologue and epilogue generation.
15332 Registering the pass must be done at start up. It's
15333 convenient to do it here. */
15334 opt_pass *new_pass = new pass_s390_early_mach (g);
15335 struct register_pass_info insert_pass_s390_early_mach =
15337 new_pass, /* pass */
15338 "pro_and_epilogue", /* reference_pass_name */
15339 1, /* ref_pass_instance_number */
15340 PASS_POS_INSERT_BEFORE /* po_op */
15342 register_pass (&insert_pass_s390_early_mach);
15345 #if S390_USE_TARGET_ATTRIBUTE
15346 /* Inner function to process the attribute((target(...))), take an argument and
15347 set the current options from the argument. If we have a list, recursively go
15348 over the list. */
15350 static bool
15351 s390_valid_target_attribute_inner_p (tree args,
15352 struct gcc_options *opts,
15353 struct gcc_options *new_opts_set,
15354 bool force_pragma)
15356 char *next_optstr;
15357 bool ret = true;
15359 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
15360 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
15361 static const struct
15363 const char *string;
15364 size_t len;
15365 int opt;
15366 int has_arg;
15367 int only_as_pragma;
15368 } attrs[] = {
15369 /* enum options */
15370 S390_ATTRIB ("arch=", OPT_march_, 1),
15371 S390_ATTRIB ("tune=", OPT_mtune_, 1),
15372 /* uinteger options */
15373 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
15374 S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
15375 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
15376 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
15377 /* flag options */
15378 S390_ATTRIB ("backchain", OPT_mbackchain, 0),
15379 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
15380 S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
15381 S390_ATTRIB ("htm", OPT_mhtm, 0),
15382 S390_ATTRIB ("vx", OPT_mvx, 0),
15383 S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
15384 S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
15385 S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
15386 S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
15387 S390_PRAGMA ("zvector", OPT_mzvector, 0),
15388 /* boolean options */
15389 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
15391 #undef S390_ATTRIB
15392 #undef S390_PRAGMA
15394 /* If this is a list, recurse to get the options. */
15395 if (TREE_CODE (args) == TREE_LIST)
15397 bool ret = true;
15398 int num_pragma_values;
15399 int i;
15401 /* Note: attribs.c:decl_attributes prepends the values from
15402 current_target_pragma to the list of target attributes. To determine
15403 whether we're looking at a value of the attribute or the pragma we
15404 assume that the first [list_length (current_target_pragma)] values in
15405 the list are the values from the pragma. */
15406 num_pragma_values = (!force_pragma && current_target_pragma != NULL)
15407 ? list_length (current_target_pragma) : 0;
15408 for (i = 0; args; args = TREE_CHAIN (args), i++)
15410 bool is_pragma;
15412 is_pragma = (force_pragma || i < num_pragma_values);
15413 if (TREE_VALUE (args)
15414 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
15415 opts, new_opts_set,
15416 is_pragma))
15418 ret = false;
15421 return ret;
15424 else if (TREE_CODE (args) != STRING_CST)
15426 error ("attribute %<target%> argument not a string");
15427 return false;
15430 /* Handle multiple arguments separated by commas. */
15431 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
15433 while (next_optstr && *next_optstr != '\0')
15435 char *p = next_optstr;
15436 char *orig_p = p;
15437 char *comma = strchr (next_optstr, ',');
15438 size_t len, opt_len;
15439 int opt;
15440 bool opt_set_p;
15441 char ch;
15442 unsigned i;
15443 int mask = 0;
15444 enum cl_var_type var_type;
15445 bool found;
15447 if (comma)
15449 *comma = '\0';
15450 len = comma - next_optstr;
15451 next_optstr = comma + 1;
15453 else
15455 len = strlen (p);
15456 next_optstr = NULL;
15459 /* Recognize no-xxx. */
15460 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
15462 opt_set_p = false;
15463 p += 3;
15464 len -= 3;
15466 else
15467 opt_set_p = true;
15469 /* Find the option. */
15470 ch = *p;
15471 found = false;
15472 for (i = 0; i < ARRAY_SIZE (attrs); i++)
15474 opt_len = attrs[i].len;
15475 if (ch == attrs[i].string[0]
15476 && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
15477 && memcmp (p, attrs[i].string, opt_len) == 0)
15479 opt = attrs[i].opt;
15480 if (!opt_set_p && cl_options[opt].cl_reject_negative)
15481 continue;
15482 mask = cl_options[opt].var_value;
15483 var_type = cl_options[opt].var_type;
15484 found = true;
15485 break;
15489 /* Process the option. */
15490 if (!found)
15492 error ("attribute(target(\"%s\")) is unknown", orig_p);
15493 return false;
15495 else if (attrs[i].only_as_pragma && !force_pragma)
15497 /* Value is not allowed for the target attribute. */
15498 error ("value %qs is not supported by attribute %<target%>",
15499 attrs[i].string);
15500 return false;
15503 else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
15505 if (var_type == CLVC_BIT_CLEAR)
15506 opt_set_p = !opt_set_p;
15508 if (opt_set_p)
15509 opts->x_target_flags |= mask;
15510 else
15511 opts->x_target_flags &= ~mask;
15512 new_opts_set->x_target_flags |= mask;
15515 else if (cl_options[opt].var_type == CLVC_BOOLEAN)
15517 int value;
15519 if (cl_options[opt].cl_uinteger)
15521 /* Unsigned integer argument. Code based on the function
15522 decode_cmdline_option () in opts-common.c. */
15523 value = integral_argument (p + opt_len);
15525 else
15526 value = (opt_set_p) ? 1 : 0;
15528 if (value != -1)
15530 struct cl_decoded_option decoded;
15532 /* Value range check; only implemented for numeric and boolean
15533 options at the moment. */
15534 generate_option (opt, NULL, value, CL_TARGET, &decoded);
15535 s390_handle_option (opts, new_opts_set, &decoded, input_location);
15536 set_option (opts, new_opts_set, opt, value,
15537 p + opt_len, DK_UNSPECIFIED, input_location,
15538 global_dc);
15540 else
15542 error ("attribute(target(\"%s\")) is unknown", orig_p);
15543 ret = false;
15547 else if (cl_options[opt].var_type == CLVC_ENUM)
15549 bool arg_ok;
15550 int value;
15552 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
15553 if (arg_ok)
15554 set_option (opts, new_opts_set, opt, value,
15555 p + opt_len, DK_UNSPECIFIED, input_location,
15556 global_dc);
15557 else
15559 error ("attribute(target(\"%s\")) is unknown", orig_p);
15560 ret = false;
15564 else
15565 gcc_unreachable ();
15567 return ret;
15570 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
15572 tree
15573 s390_valid_target_attribute_tree (tree args,
15574 struct gcc_options *opts,
15575 const struct gcc_options *opts_set,
15576 bool force_pragma)
15578 tree t = NULL_TREE;
15579 struct gcc_options new_opts_set;
15581 memset (&new_opts_set, 0, sizeof (new_opts_set));
15583 /* Process each of the options on the chain. */
15584 if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
15585 force_pragma))
15586 return error_mark_node;
15588 /* If some option was set (even if it has not changed), rerun
15589 s390_option_override_internal, and then save the options away. */
15590 if (new_opts_set.x_target_flags
15591 || new_opts_set.x_s390_arch
15592 || new_opts_set.x_s390_tune
15593 || new_opts_set.x_s390_stack_guard
15594 || new_opts_set.x_s390_stack_size
15595 || new_opts_set.x_s390_branch_cost
15596 || new_opts_set.x_s390_warn_framesize
15597 || new_opts_set.x_s390_warn_dynamicstack_p)
15599 const unsigned char *src = (const unsigned char *)opts_set;
15600 unsigned char *dest = (unsigned char *)&new_opts_set;
15601 unsigned int i;
15603 /* Merge the original option flags into the new ones. */
15604 for (i = 0; i < sizeof(*opts_set); i++)
15605 dest[i] |= src[i];
15607 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
15608 s390_option_override_internal (false, opts, &new_opts_set);
15609 /* Save the current options unless we are validating options for
15610 #pragma. */
15611 t = build_target_option_node (opts);
15613 return t;
15616 /* Hook to validate attribute((target("string"))). */
15618 static bool
15619 s390_valid_target_attribute_p (tree fndecl,
15620 tree ARG_UNUSED (name),
15621 tree args,
15622 int ARG_UNUSED (flags))
15624 struct gcc_options func_options;
15625 tree new_target, new_optimize;
15626 bool ret = true;
15628 /* attribute((target("default"))) does nothing, beyond
15629 affecting multi-versioning. */
15630 if (TREE_VALUE (args)
15631 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
15632 && TREE_CHAIN (args) == NULL_TREE
15633 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
15634 return true;
15636 tree old_optimize = build_optimization_node (&global_options);
15638 /* Get the optimization options of the current function. */
15639 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
15641 if (!func_optimize)
15642 func_optimize = old_optimize;
15644 /* Init func_options. */
15645 memset (&func_options, 0, sizeof (func_options));
15646 init_options_struct (&func_options, NULL);
15647 lang_hooks.init_options_struct (&func_options);
15649 cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize));
15651 /* Initialize func_options to the default before its target options can
15652 be set. */
15653 cl_target_option_restore (&func_options,
15654 TREE_TARGET_OPTION (target_option_default_node));
15656 new_target = s390_valid_target_attribute_tree (args, &func_options,
15657 &global_options_set,
15658 (args ==
15659 current_target_pragma));
15660 new_optimize = build_optimization_node (&func_options);
15661 if (new_target == error_mark_node)
15662 ret = false;
15663 else if (fndecl && new_target)
15665 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
15666 if (old_optimize != new_optimize)
15667 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
15669 return ret;
15672 /* Hook to determine if one function can safely inline another. */
15674 static bool
15675 s390_can_inline_p (tree caller, tree callee)
15677 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
15678 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
15680 if (!callee_tree)
15681 callee_tree = target_option_default_node;
15682 if (!caller_tree)
15683 caller_tree = target_option_default_node;
15684 if (callee_tree == caller_tree)
15685 return true;
15687 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
15688 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
15689 bool ret = true;
15691 if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))
15692 != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)))
15693 ret = false;
15695 /* Don't inline functions to be compiled for a more recent arch into a
15696 function for an older arch. */
15697 else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
15698 ret = false;
15700 /* Inlining a hard float function into a soft float function is only
15701 allowed if the hard float function doesn't actually make use of
15702 floating point.
15704 We are called from FEs for multi-versioning call optimization, so
15705 beware of ipa_fn_summaries not available. */
15706 else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags)
15707 && !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags))
15708 || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags)
15709 && TARGET_HARD_DFP_P (callee_opts->x_target_flags)))
15710 && (! ipa_fn_summaries
15711 || ipa_fn_summaries->get
15712 (cgraph_node::get (callee))->fp_expressions))
15713 ret = false;
15715 return ret;
15718 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
15719 cache. */
15721 void
15722 s390_activate_target_options (tree new_tree)
15724 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
15725 if (TREE_TARGET_GLOBALS (new_tree))
15726 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
15727 else if (new_tree == target_option_default_node)
15728 restore_target_globals (&default_target_globals);
15729 else
15730 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
15731 s390_previous_fndecl = NULL_TREE;
15734 /* Establish appropriate back-end context for processing the function
15735 FNDECL. The argument might be NULL to indicate processing at top
15736 level, outside of any function scope. */
15737 static void
15738 s390_set_current_function (tree fndecl)
15740 /* Only change the context if the function changes. This hook is called
15741 several times in the course of compiling a function, and we don't want to
15742 slow things down too much or call target_reinit when it isn't safe. */
15743 if (fndecl == s390_previous_fndecl)
15744 return;
15746 tree old_tree;
15747 if (s390_previous_fndecl == NULL_TREE)
15748 old_tree = target_option_current_node;
15749 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
15750 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
15751 else
15752 old_tree = target_option_default_node;
15754 if (fndecl == NULL_TREE)
15756 if (old_tree != target_option_current_node)
15757 s390_activate_target_options (target_option_current_node);
15758 return;
15761 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
15762 if (new_tree == NULL_TREE)
15763 new_tree = target_option_default_node;
15765 if (old_tree != new_tree)
15766 s390_activate_target_options (new_tree);
15767 s390_previous_fndecl = fndecl;
15769 #endif
15771 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
15773 static bool
15774 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
15775 unsigned int align ATTRIBUTE_UNUSED,
15776 enum by_pieces_operation op ATTRIBUTE_UNUSED,
15777 bool speed_p ATTRIBUTE_UNUSED)
15779 return (size == 1 || size == 2
15780 || size == 4 || (TARGET_ZARCH && size == 8));
15783 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
15785 static void
15786 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
15788 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
15789 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
15790 tree call_efpc = build_call_expr (efpc, 0);
15791 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
15793 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
15794 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
15795 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
15796 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
15797 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
15798 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
15800 /* Generates the equivalent of feholdexcept (&fenv_var)
15802 fenv_var = __builtin_s390_efpc ();
15803 __builtin_s390_sfpc (fenv_var & mask) */
15804 tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
15805 tree new_fpc =
15806 build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
15807 build_int_cst (unsigned_type_node,
15808 ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
15809 FPC_EXCEPTION_MASK)));
15810 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
15811 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
15813 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
15815 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
15816 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
15817 build_int_cst (unsigned_type_node,
15818 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
15819 *clear = build_call_expr (sfpc, 1, new_fpc);
15821 /* Generates the equivalent of feupdateenv (fenv_var)
15823 old_fpc = __builtin_s390_efpc ();
15824 __builtin_s390_sfpc (fenv_var);
15825 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
15827 old_fpc = create_tmp_var_raw (unsigned_type_node);
15828 tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
15829 old_fpc, call_efpc);
15831 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
15833 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
15834 build_int_cst (unsigned_type_node,
15835 FPC_FLAGS_MASK));
15836 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
15837 build_int_cst (unsigned_type_node,
15838 FPC_FLAGS_SHIFT));
15839 tree atomic_feraiseexcept
15840 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
15841 raise_old_except = build_call_expr (atomic_feraiseexcept,
15842 1, raise_old_except);
15844 *update = build2 (COMPOUND_EXPR, void_type_node,
15845 build2 (COMPOUND_EXPR, void_type_node,
15846 store_old_fpc, set_new_fpc),
15847 raise_old_except);
15849 #undef FPC_EXCEPTION_MASK
15850 #undef FPC_FLAGS_MASK
15851 #undef FPC_DXC_MASK
15852 #undef FPC_EXCEPTION_MASK_SHIFT
15853 #undef FPC_FLAGS_SHIFT
15854 #undef FPC_DXC_SHIFT
15857 /* Return the vector mode to be used for inner mode MODE when doing
15858 vectorization. */
15859 static machine_mode
15860 s390_preferred_simd_mode (scalar_mode mode)
15862 if (TARGET_VXE)
15863 switch (mode)
15865 case E_SFmode:
15866 return V4SFmode;
15867 default:;
15870 if (TARGET_VX)
15871 switch (mode)
15873 case E_DFmode:
15874 return V2DFmode;
15875 case E_DImode:
15876 return V2DImode;
15877 case E_SImode:
15878 return V4SImode;
15879 case E_HImode:
15880 return V8HImode;
15881 case E_QImode:
15882 return V16QImode;
15883 default:;
15885 return word_mode;
15888 /* Our hardware does not require vectors to be strictly aligned. */
15889 static bool
15890 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
15891 const_tree type ATTRIBUTE_UNUSED,
15892 int misalignment ATTRIBUTE_UNUSED,
15893 bool is_packed ATTRIBUTE_UNUSED)
15895 if (TARGET_VX)
15896 return true;
15898 return default_builtin_support_vector_misalignment (mode, type, misalignment,
15899 is_packed);
15902 /* The vector ABI requires vector types to be aligned on an 8 byte
15903 boundary (our stack alignment). However, we allow this to be
15904 overriden by the user, while this definitely breaks the ABI. */
15905 static HOST_WIDE_INT
15906 s390_vector_alignment (const_tree type)
15908 if (!TARGET_VX_ABI)
15909 return default_vector_alignment (type);
15911 if (TYPE_USER_ALIGN (type))
15912 return TYPE_ALIGN (type);
15914 return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
15917 /* Implement TARGET_CONSTANT_ALIGNMENT. Alignment on even addresses for
15918 LARL instruction. */
15920 static HOST_WIDE_INT
15921 s390_constant_alignment (const_tree, HOST_WIDE_INT align)
15923 return MAX (align, 16);
15926 #ifdef HAVE_AS_MACHINE_MACHINEMODE
15927 /* Implement TARGET_ASM_FILE_START. */
15928 static void
15929 s390_asm_file_start (void)
15931 default_file_start ();
15932 s390_asm_output_machine_for_arch (asm_out_file);
15934 #endif
15936 /* Implement TARGET_ASM_FILE_END. */
15937 static void
15938 s390_asm_file_end (void)
15940 #ifdef HAVE_AS_GNU_ATTRIBUTE
15941 varpool_node *vnode;
15942 cgraph_node *cnode;
15944 FOR_EACH_VARIABLE (vnode)
15945 if (TREE_PUBLIC (vnode->decl))
15946 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
15948 FOR_EACH_FUNCTION (cnode)
15949 if (TREE_PUBLIC (cnode->decl))
15950 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
15953 if (s390_vector_abi != 0)
15954 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
15955 s390_vector_abi);
15956 #endif
15957 file_end_indicate_exec_stack ();
15959 if (flag_split_stack)
15960 file_end_indicate_split_stack ();
15963 /* Return true if TYPE is a vector bool type. */
15964 static inline bool
15965 s390_vector_bool_type_p (const_tree type)
15967 return TYPE_VECTOR_OPAQUE (type);
15970 /* Return the diagnostic message string if the binary operation OP is
15971 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15972 static const char*
15973 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
15975 bool bool1_p, bool2_p;
15976 bool plusminus_p;
15977 bool muldiv_p;
15978 bool compare_p;
15979 machine_mode mode1, mode2;
15981 if (!TARGET_ZVECTOR)
15982 return NULL;
15984 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
15985 return NULL;
15987 bool1_p = s390_vector_bool_type_p (type1);
15988 bool2_p = s390_vector_bool_type_p (type2);
15990 /* Mixing signed and unsigned types is forbidden for all
15991 operators. */
15992 if (!bool1_p && !bool2_p
15993 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
15994 return N_("types differ in signedness");
15996 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
15997 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
15998 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
15999 || op == ROUND_DIV_EXPR);
16000 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
16001 || op == EQ_EXPR || op == NE_EXPR);
16003 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
16004 return N_("binary operator does not support two vector bool operands");
16006 if (bool1_p != bool2_p && (muldiv_p || compare_p))
16007 return N_("binary operator does not support vector bool operand");
16009 mode1 = TYPE_MODE (type1);
16010 mode2 = TYPE_MODE (type2);
16012 if (bool1_p != bool2_p && plusminus_p
16013 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
16014 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
16015 return N_("binary operator does not support mixing vector "
16016 "bool with floating point vector operands");
16018 return NULL;
16021 /* Implement TARGET_C_EXCESS_PRECISION.
16023 FIXME: For historical reasons, float_t and double_t are typedef'ed to
16024 double on s390, causing operations on float_t to operate in a higher
16025 precision than is necessary. However, it is not the case that SFmode
16026 operations have implicit excess precision, and we generate more optimal
16027 code if we let the compiler know no implicit extra precision is added.
16029 That means when we are compiling with -fexcess-precision=fast, the value
16030 we set for FLT_EVAL_METHOD will be out of line with the actual precision of
16031 float_t (though they would be correct for -fexcess-precision=standard).
16033 A complete fix would modify glibc to remove the unnecessary typedef
16034 of float_t to double. */
16036 static enum flt_eval_method
16037 s390_excess_precision (enum excess_precision_type type)
16039 switch (type)
16041 case EXCESS_PRECISION_TYPE_IMPLICIT:
16042 case EXCESS_PRECISION_TYPE_FAST:
16043 /* The fastest type to promote to will always be the native type,
16044 whether that occurs with implicit excess precision or
16045 otherwise. */
16046 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
16047 case EXCESS_PRECISION_TYPE_STANDARD:
16048 /* Otherwise, when we are in a standards compliant mode, to
16049 ensure consistency with the implementation in glibc, report that
16050 float is evaluated to the range and precision of double. */
16051 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE;
16052 default:
16053 gcc_unreachable ();
16055 return FLT_EVAL_METHOD_UNPREDICTABLE;
16058 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
16060 static unsigned HOST_WIDE_INT
16061 s390_asan_shadow_offset (void)
16063 return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000);
16066 /* Initialize GCC target structure. */
16068 #undef TARGET_ASM_ALIGNED_HI_OP
16069 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
16070 #undef TARGET_ASM_ALIGNED_DI_OP
16071 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
16072 #undef TARGET_ASM_INTEGER
16073 #define TARGET_ASM_INTEGER s390_assemble_integer
16075 #undef TARGET_ASM_OPEN_PAREN
16076 #define TARGET_ASM_OPEN_PAREN ""
16078 #undef TARGET_ASM_CLOSE_PAREN
16079 #define TARGET_ASM_CLOSE_PAREN ""
16081 #undef TARGET_OPTION_OVERRIDE
16082 #define TARGET_OPTION_OVERRIDE s390_option_override
16084 #ifdef TARGET_THREAD_SSP_OFFSET
16085 #undef TARGET_STACK_PROTECT_GUARD
16086 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
16087 #endif
16089 #undef TARGET_ENCODE_SECTION_INFO
16090 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
16092 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16093 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16095 #ifdef HAVE_AS_TLS
16096 #undef TARGET_HAVE_TLS
16097 #define TARGET_HAVE_TLS true
16098 #endif
16099 #undef TARGET_CANNOT_FORCE_CONST_MEM
16100 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
16102 #undef TARGET_DELEGITIMIZE_ADDRESS
16103 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
16105 #undef TARGET_LEGITIMIZE_ADDRESS
16106 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
16108 #undef TARGET_RETURN_IN_MEMORY
16109 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
16111 #undef TARGET_INIT_BUILTINS
16112 #define TARGET_INIT_BUILTINS s390_init_builtins
16113 #undef TARGET_EXPAND_BUILTIN
16114 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
16115 #undef TARGET_BUILTIN_DECL
16116 #define TARGET_BUILTIN_DECL s390_builtin_decl
16118 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
16119 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
16121 #undef TARGET_ASM_OUTPUT_MI_THUNK
16122 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
16123 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
16124 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
16126 #undef TARGET_C_EXCESS_PRECISION
16127 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
16129 #undef TARGET_SCHED_ADJUST_PRIORITY
16130 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
16131 #undef TARGET_SCHED_ISSUE_RATE
16132 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
16133 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
16134 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
16136 #undef TARGET_SCHED_VARIABLE_ISSUE
16137 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
16138 #undef TARGET_SCHED_REORDER
16139 #define TARGET_SCHED_REORDER s390_sched_reorder
16140 #undef TARGET_SCHED_INIT
16141 #define TARGET_SCHED_INIT s390_sched_init
16143 #undef TARGET_CANNOT_COPY_INSN_P
16144 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
16145 #undef TARGET_RTX_COSTS
16146 #define TARGET_RTX_COSTS s390_rtx_costs
16147 #undef TARGET_ADDRESS_COST
16148 #define TARGET_ADDRESS_COST s390_address_cost
16149 #undef TARGET_REGISTER_MOVE_COST
16150 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
16151 #undef TARGET_MEMORY_MOVE_COST
16152 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
16153 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
16154 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
16155 s390_builtin_vectorization_cost
16157 #undef TARGET_MACHINE_DEPENDENT_REORG
16158 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
16160 #undef TARGET_VALID_POINTER_MODE
16161 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
16163 #undef TARGET_BUILD_BUILTIN_VA_LIST
16164 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
16165 #undef TARGET_EXPAND_BUILTIN_VA_START
16166 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
16167 #undef TARGET_ASAN_SHADOW_OFFSET
16168 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
16169 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
16170 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
16172 #undef TARGET_PROMOTE_FUNCTION_MODE
16173 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
16174 #undef TARGET_PASS_BY_REFERENCE
16175 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
16177 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
16178 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
16179 #undef TARGET_FUNCTION_ARG
16180 #define TARGET_FUNCTION_ARG s390_function_arg
16181 #undef TARGET_FUNCTION_ARG_ADVANCE
16182 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
16183 #undef TARGET_FUNCTION_ARG_PADDING
16184 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
16185 #undef TARGET_FUNCTION_VALUE
16186 #define TARGET_FUNCTION_VALUE s390_function_value
16187 #undef TARGET_LIBCALL_VALUE
16188 #define TARGET_LIBCALL_VALUE s390_libcall_value
16189 #undef TARGET_STRICT_ARGUMENT_NAMING
16190 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
16192 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
16193 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
16195 #undef TARGET_FIXED_CONDITION_CODE_REGS
16196 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
16198 #undef TARGET_CC_MODES_COMPATIBLE
16199 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
16201 #undef TARGET_INVALID_WITHIN_DOLOOP
16202 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
16204 #ifdef HAVE_AS_TLS
16205 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
16206 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
16207 #endif
16209 #undef TARGET_DWARF_FRAME_REG_MODE
16210 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
16212 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
16213 #undef TARGET_MANGLE_TYPE
16214 #define TARGET_MANGLE_TYPE s390_mangle_type
16215 #endif
16217 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16218 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16220 #undef TARGET_VECTOR_MODE_SUPPORTED_P
16221 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
16223 #undef TARGET_PREFERRED_RELOAD_CLASS
16224 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
16226 #undef TARGET_SECONDARY_RELOAD
16227 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
16228 #undef TARGET_SECONDARY_MEMORY_NEEDED
16229 #define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed
16230 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
16231 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode
16233 #undef TARGET_LIBGCC_CMP_RETURN_MODE
16234 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
16236 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
16237 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
16239 #undef TARGET_LEGITIMATE_ADDRESS_P
16240 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
16242 #undef TARGET_LEGITIMATE_CONSTANT_P
16243 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
16245 #undef TARGET_LRA_P
16246 #define TARGET_LRA_P s390_lra_p
16248 #undef TARGET_CAN_ELIMINATE
16249 #define TARGET_CAN_ELIMINATE s390_can_eliminate
16251 #undef TARGET_CONDITIONAL_REGISTER_USAGE
16252 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
16254 #undef TARGET_LOOP_UNROLL_ADJUST
16255 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
16257 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
16258 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
16259 #undef TARGET_TRAMPOLINE_INIT
16260 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
16262 /* PR 79421 */
16263 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
16264 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
16266 #undef TARGET_UNWIND_WORD_MODE
16267 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
16269 #undef TARGET_CANONICALIZE_COMPARISON
16270 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
16272 #undef TARGET_HARD_REGNO_SCRATCH_OK
16273 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
16275 #undef TARGET_HARD_REGNO_NREGS
16276 #define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs
16277 #undef TARGET_HARD_REGNO_MODE_OK
16278 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
16279 #undef TARGET_MODES_TIEABLE_P
16280 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
16282 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
16283 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
16284 s390_hard_regno_call_part_clobbered
16286 #undef TARGET_ATTRIBUTE_TABLE
16287 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
16289 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
16290 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
16292 #undef TARGET_SET_UP_BY_PROLOGUE
16293 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
16295 #undef TARGET_EXTRA_LIVE_ON_ENTRY
16296 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
16298 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
16299 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
16300 s390_use_by_pieces_infrastructure_p
16302 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
16303 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
16305 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
16306 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
16308 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
16309 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
16311 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
16312 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
16314 #undef TARGET_VECTOR_ALIGNMENT
16315 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
16317 #undef TARGET_INVALID_BINARY_OP
16318 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
16320 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16321 #undef TARGET_ASM_FILE_START
16322 #define TARGET_ASM_FILE_START s390_asm_file_start
16323 #endif
16325 #undef TARGET_ASM_FILE_END
16326 #define TARGET_ASM_FILE_END s390_asm_file_end
16328 #if S390_USE_TARGET_ATTRIBUTE
16329 #undef TARGET_SET_CURRENT_FUNCTION
16330 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
16332 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
16333 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
16335 #undef TARGET_CAN_INLINE_P
16336 #define TARGET_CAN_INLINE_P s390_can_inline_p
16337 #endif
16339 #undef TARGET_OPTION_RESTORE
16340 #define TARGET_OPTION_RESTORE s390_function_specific_restore
16342 #undef TARGET_CAN_CHANGE_MODE_CLASS
16343 #define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class
16345 #undef TARGET_CONSTANT_ALIGNMENT
16346 #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
16348 struct gcc_target targetm = TARGET_INITIALIZER;
16350 #include "gt-s390.h"