Make more use of REG_NREGS
[official-gcc.git] / gcc / config / s390 / s390.c
blobde7f3e577d08385f33522d12ba9bd91878b9b468
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2017 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "target-globals.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "cfgloop.h"
34 #include "df.h"
35 #include "memmodel.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "attribs.h"
39 #include "expmed.h"
40 #include "optabs.h"
41 #include "regs.h"
42 #include "emit-rtl.h"
43 #include "recog.h"
44 #include "cgraph.h"
45 #include "diagnostic-core.h"
46 #include "diagnostic.h"
47 #include "alias.h"
48 #include "fold-const.h"
49 #include "print-tree.h"
50 #include "stor-layout.h"
51 #include "varasm.h"
52 #include "calls.h"
53 #include "conditions.h"
54 #include "output.h"
55 #include "insn-attr.h"
56 #include "flags.h"
57 #include "except.h"
58 #include "dojump.h"
59 #include "explow.h"
60 #include "stmt.h"
61 #include "expr.h"
62 #include "reload.h"
63 #include "cfgrtl.h"
64 #include "cfganal.h"
65 #include "lcm.h"
66 #include "cfgbuild.h"
67 #include "cfgcleanup.h"
68 #include "debug.h"
69 #include "langhooks.h"
70 #include "internal-fn.h"
71 #include "gimple-fold.h"
72 #include "tree-eh.h"
73 #include "gimplify.h"
74 #include "params.h"
75 #include "opts.h"
76 #include "tree-pass.h"
77 #include "context.h"
78 #include "builtins.h"
79 #include "rtl-iter.h"
80 #include "intl.h"
81 #include "tm-constrs.h"
82 #include "tree-vrp.h"
83 #include "symbol-summary.h"
84 #include "ipa-prop.h"
85 #include "ipa-fnsummary.h"
87 /* This file should be included last. */
88 #include "target-def.h"
90 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode);
92 /* Remember the last target of s390_set_current_function. */
93 static GTY(()) tree s390_previous_fndecl;
95 /* Define the specific costs for a given cpu. */
97 struct processor_costs
99 /* multiplication */
100 const int m; /* cost of an M instruction. */
101 const int mghi; /* cost of an MGHI instruction. */
102 const int mh; /* cost of an MH instruction. */
103 const int mhi; /* cost of an MHI instruction. */
104 const int ml; /* cost of an ML instruction. */
105 const int mr; /* cost of an MR instruction. */
106 const int ms; /* cost of an MS instruction. */
107 const int msg; /* cost of an MSG instruction. */
108 const int msgf; /* cost of an MSGF instruction. */
109 const int msgfr; /* cost of an MSGFR instruction. */
110 const int msgr; /* cost of an MSGR instruction. */
111 const int msr; /* cost of an MSR instruction. */
112 const int mult_df; /* cost of multiplication in DFmode. */
113 const int mxbr;
114 /* square root */
115 const int sqxbr; /* cost of square root in TFmode. */
116 const int sqdbr; /* cost of square root in DFmode. */
117 const int sqebr; /* cost of square root in SFmode. */
118 /* multiply and add */
119 const int madbr; /* cost of multiply and add in DFmode. */
120 const int maebr; /* cost of multiply and add in SFmode. */
121 /* division */
122 const int dxbr;
123 const int ddbr;
124 const int debr;
125 const int dlgr;
126 const int dlr;
127 const int dr;
128 const int dsgfr;
129 const int dsgr;
132 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
134 static const
135 struct processor_costs z900_cost =
137 COSTS_N_INSNS (5), /* M */
138 COSTS_N_INSNS (10), /* MGHI */
139 COSTS_N_INSNS (5), /* MH */
140 COSTS_N_INSNS (4), /* MHI */
141 COSTS_N_INSNS (5), /* ML */
142 COSTS_N_INSNS (5), /* MR */
143 COSTS_N_INSNS (4), /* MS */
144 COSTS_N_INSNS (15), /* MSG */
145 COSTS_N_INSNS (7), /* MSGF */
146 COSTS_N_INSNS (7), /* MSGFR */
147 COSTS_N_INSNS (10), /* MSGR */
148 COSTS_N_INSNS (4), /* MSR */
149 COSTS_N_INSNS (7), /* multiplication in DFmode */
150 COSTS_N_INSNS (13), /* MXBR */
151 COSTS_N_INSNS (136), /* SQXBR */
152 COSTS_N_INSNS (44), /* SQDBR */
153 COSTS_N_INSNS (35), /* SQEBR */
154 COSTS_N_INSNS (18), /* MADBR */
155 COSTS_N_INSNS (13), /* MAEBR */
156 COSTS_N_INSNS (134), /* DXBR */
157 COSTS_N_INSNS (30), /* DDBR */
158 COSTS_N_INSNS (27), /* DEBR */
159 COSTS_N_INSNS (220), /* DLGR */
160 COSTS_N_INSNS (34), /* DLR */
161 COSTS_N_INSNS (34), /* DR */
162 COSTS_N_INSNS (32), /* DSGFR */
163 COSTS_N_INSNS (32), /* DSGR */
166 static const
167 struct processor_costs z990_cost =
169 COSTS_N_INSNS (4), /* M */
170 COSTS_N_INSNS (2), /* MGHI */
171 COSTS_N_INSNS (2), /* MH */
172 COSTS_N_INSNS (2), /* MHI */
173 COSTS_N_INSNS (4), /* ML */
174 COSTS_N_INSNS (4), /* MR */
175 COSTS_N_INSNS (5), /* MS */
176 COSTS_N_INSNS (6), /* MSG */
177 COSTS_N_INSNS (4), /* MSGF */
178 COSTS_N_INSNS (4), /* MSGFR */
179 COSTS_N_INSNS (4), /* MSGR */
180 COSTS_N_INSNS (4), /* MSR */
181 COSTS_N_INSNS (1), /* multiplication in DFmode */
182 COSTS_N_INSNS (28), /* MXBR */
183 COSTS_N_INSNS (130), /* SQXBR */
184 COSTS_N_INSNS (66), /* SQDBR */
185 COSTS_N_INSNS (38), /* SQEBR */
186 COSTS_N_INSNS (1), /* MADBR */
187 COSTS_N_INSNS (1), /* MAEBR */
188 COSTS_N_INSNS (60), /* DXBR */
189 COSTS_N_INSNS (40), /* DDBR */
190 COSTS_N_INSNS (26), /* DEBR */
191 COSTS_N_INSNS (176), /* DLGR */
192 COSTS_N_INSNS (31), /* DLR */
193 COSTS_N_INSNS (31), /* DR */
194 COSTS_N_INSNS (31), /* DSGFR */
195 COSTS_N_INSNS (31), /* DSGR */
198 static const
199 struct processor_costs z9_109_cost =
201 COSTS_N_INSNS (4), /* M */
202 COSTS_N_INSNS (2), /* MGHI */
203 COSTS_N_INSNS (2), /* MH */
204 COSTS_N_INSNS (2), /* MHI */
205 COSTS_N_INSNS (4), /* ML */
206 COSTS_N_INSNS (4), /* MR */
207 COSTS_N_INSNS (5), /* MS */
208 COSTS_N_INSNS (6), /* MSG */
209 COSTS_N_INSNS (4), /* MSGF */
210 COSTS_N_INSNS (4), /* MSGFR */
211 COSTS_N_INSNS (4), /* MSGR */
212 COSTS_N_INSNS (4), /* MSR */
213 COSTS_N_INSNS (1), /* multiplication in DFmode */
214 COSTS_N_INSNS (28), /* MXBR */
215 COSTS_N_INSNS (130), /* SQXBR */
216 COSTS_N_INSNS (66), /* SQDBR */
217 COSTS_N_INSNS (38), /* SQEBR */
218 COSTS_N_INSNS (1), /* MADBR */
219 COSTS_N_INSNS (1), /* MAEBR */
220 COSTS_N_INSNS (60), /* DXBR */
221 COSTS_N_INSNS (40), /* DDBR */
222 COSTS_N_INSNS (26), /* DEBR */
223 COSTS_N_INSNS (30), /* DLGR */
224 COSTS_N_INSNS (23), /* DLR */
225 COSTS_N_INSNS (23), /* DR */
226 COSTS_N_INSNS (24), /* DSGFR */
227 COSTS_N_INSNS (24), /* DSGR */
230 static const
231 struct processor_costs z10_cost =
233 COSTS_N_INSNS (10), /* M */
234 COSTS_N_INSNS (10), /* MGHI */
235 COSTS_N_INSNS (10), /* MH */
236 COSTS_N_INSNS (10), /* MHI */
237 COSTS_N_INSNS (10), /* ML */
238 COSTS_N_INSNS (10), /* MR */
239 COSTS_N_INSNS (10), /* MS */
240 COSTS_N_INSNS (10), /* MSG */
241 COSTS_N_INSNS (10), /* MSGF */
242 COSTS_N_INSNS (10), /* MSGFR */
243 COSTS_N_INSNS (10), /* MSGR */
244 COSTS_N_INSNS (10), /* MSR */
245 COSTS_N_INSNS (1) , /* multiplication in DFmode */
246 COSTS_N_INSNS (50), /* MXBR */
247 COSTS_N_INSNS (120), /* SQXBR */
248 COSTS_N_INSNS (52), /* SQDBR */
249 COSTS_N_INSNS (38), /* SQEBR */
250 COSTS_N_INSNS (1), /* MADBR */
251 COSTS_N_INSNS (1), /* MAEBR */
252 COSTS_N_INSNS (111), /* DXBR */
253 COSTS_N_INSNS (39), /* DDBR */
254 COSTS_N_INSNS (32), /* DEBR */
255 COSTS_N_INSNS (160), /* DLGR */
256 COSTS_N_INSNS (71), /* DLR */
257 COSTS_N_INSNS (71), /* DR */
258 COSTS_N_INSNS (71), /* DSGFR */
259 COSTS_N_INSNS (71), /* DSGR */
262 static const
263 struct processor_costs z196_cost =
265 COSTS_N_INSNS (7), /* M */
266 COSTS_N_INSNS (5), /* MGHI */
267 COSTS_N_INSNS (5), /* MH */
268 COSTS_N_INSNS (5), /* MHI */
269 COSTS_N_INSNS (7), /* ML */
270 COSTS_N_INSNS (7), /* MR */
271 COSTS_N_INSNS (6), /* MS */
272 COSTS_N_INSNS (8), /* MSG */
273 COSTS_N_INSNS (6), /* MSGF */
274 COSTS_N_INSNS (6), /* MSGFR */
275 COSTS_N_INSNS (8), /* MSGR */
276 COSTS_N_INSNS (6), /* MSR */
277 COSTS_N_INSNS (1) , /* multiplication in DFmode */
278 COSTS_N_INSNS (40), /* MXBR B+40 */
279 COSTS_N_INSNS (100), /* SQXBR B+100 */
280 COSTS_N_INSNS (42), /* SQDBR B+42 */
281 COSTS_N_INSNS (28), /* SQEBR B+28 */
282 COSTS_N_INSNS (1), /* MADBR B */
283 COSTS_N_INSNS (1), /* MAEBR B */
284 COSTS_N_INSNS (101), /* DXBR B+101 */
285 COSTS_N_INSNS (29), /* DDBR */
286 COSTS_N_INSNS (22), /* DEBR */
287 COSTS_N_INSNS (160), /* DLGR cracked */
288 COSTS_N_INSNS (160), /* DLR cracked */
289 COSTS_N_INSNS (160), /* DR expanded */
290 COSTS_N_INSNS (160), /* DSGFR cracked */
291 COSTS_N_INSNS (160), /* DSGR cracked */
294 static const
295 struct processor_costs zEC12_cost =
297 COSTS_N_INSNS (7), /* M */
298 COSTS_N_INSNS (5), /* MGHI */
299 COSTS_N_INSNS (5), /* MH */
300 COSTS_N_INSNS (5), /* MHI */
301 COSTS_N_INSNS (7), /* ML */
302 COSTS_N_INSNS (7), /* MR */
303 COSTS_N_INSNS (6), /* MS */
304 COSTS_N_INSNS (8), /* MSG */
305 COSTS_N_INSNS (6), /* MSGF */
306 COSTS_N_INSNS (6), /* MSGFR */
307 COSTS_N_INSNS (8), /* MSGR */
308 COSTS_N_INSNS (6), /* MSR */
309 COSTS_N_INSNS (1) , /* multiplication in DFmode */
310 COSTS_N_INSNS (40), /* MXBR B+40 */
311 COSTS_N_INSNS (100), /* SQXBR B+100 */
312 COSTS_N_INSNS (42), /* SQDBR B+42 */
313 COSTS_N_INSNS (28), /* SQEBR B+28 */
314 COSTS_N_INSNS (1), /* MADBR B */
315 COSTS_N_INSNS (1), /* MAEBR B */
316 COSTS_N_INSNS (131), /* DXBR B+131 */
317 COSTS_N_INSNS (29), /* DDBR */
318 COSTS_N_INSNS (22), /* DEBR */
319 COSTS_N_INSNS (160), /* DLGR cracked */
320 COSTS_N_INSNS (160), /* DLR cracked */
321 COSTS_N_INSNS (160), /* DR expanded */
322 COSTS_N_INSNS (160), /* DSGFR cracked */
323 COSTS_N_INSNS (160), /* DSGR cracked */
326 static struct
328 /* The preferred name to be used in user visible output. */
329 const char *const name;
330 /* CPU name as it should be passed to Binutils via .machine */
331 const char *const binutils_name;
332 const enum processor_type processor;
333 const struct processor_costs *cost;
335 const processor_table[] =
337 { "g5", "g5", PROCESSOR_9672_G5, &z900_cost },
338 { "g6", "g6", PROCESSOR_9672_G6, &z900_cost },
339 { "z900", "z900", PROCESSOR_2064_Z900, &z900_cost },
340 { "z990", "z990", PROCESSOR_2084_Z990, &z990_cost },
341 { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost },
342 { "z9-ec", "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost },
343 { "z10", "z10", PROCESSOR_2097_Z10, &z10_cost },
344 { "z196", "z196", PROCESSOR_2817_Z196, &z196_cost },
345 { "zEC12", "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost },
346 { "z13", "z13", PROCESSOR_2964_Z13, &zEC12_cost },
347 { "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost },
348 { "native", "", PROCESSOR_NATIVE, NULL }
351 extern int reload_completed;
353 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
354 static rtx_insn *last_scheduled_insn;
355 #define MAX_SCHED_UNITS 3
356 static int last_scheduled_unit_distance[MAX_SCHED_UNITS];
358 /* The maximum score added for an instruction whose unit hasn't been
359 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
360 give instruction mix scheduling more priority over instruction
361 grouping. */
362 #define MAX_SCHED_MIX_SCORE 8
364 /* The maximum distance up to which individual scores will be
365 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
366 Increase this with the OOO windows size of the machine. */
367 #define MAX_SCHED_MIX_DISTANCE 100
369 /* Structure used to hold the components of a S/390 memory
370 address. A legitimate address on S/390 is of the general
371 form
372 base + index + displacement
373 where any of the components is optional.
375 base and index are registers of the class ADDR_REGS,
376 displacement is an unsigned 12-bit immediate constant. */
378 struct s390_address
380 rtx base;
381 rtx indx;
382 rtx disp;
383 bool pointer;
384 bool literal_pool;
387 /* The following structure is embedded in the machine
388 specific part of struct function. */
390 struct GTY (()) s390_frame_layout
392 /* Offset within stack frame. */
393 HOST_WIDE_INT gprs_offset;
394 HOST_WIDE_INT f0_offset;
395 HOST_WIDE_INT f4_offset;
396 HOST_WIDE_INT f8_offset;
397 HOST_WIDE_INT backchain_offset;
399 /* Number of first and last gpr where slots in the register
400 save area are reserved for. */
401 int first_save_gpr_slot;
402 int last_save_gpr_slot;
404 /* Location (FP register number) where GPRs (r0-r15) should
405 be saved to.
406 0 - does not need to be saved at all
407 -1 - stack slot */
408 #define SAVE_SLOT_NONE 0
409 #define SAVE_SLOT_STACK -1
410 signed char gpr_save_slots[16];
412 /* Number of first and last gpr to be saved, restored. */
413 int first_save_gpr;
414 int first_restore_gpr;
415 int last_save_gpr;
416 int last_restore_gpr;
418 /* Bits standing for floating point registers. Set, if the
419 respective register has to be saved. Starting with reg 16 (f0)
420 at the rightmost bit.
421 Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
422 fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0
423 reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */
424 unsigned int fpr_bitmap;
426 /* Number of floating point registers f8-f15 which must be saved. */
427 int high_fprs;
429 /* Set if return address needs to be saved.
430 This flag is set by s390_return_addr_rtx if it could not use
431 the initial value of r14 and therefore depends on r14 saved
432 to the stack. */
433 bool save_return_addr_p;
435 /* Size of stack frame. */
436 HOST_WIDE_INT frame_size;
439 /* Define the structure for the machine field in struct function. */
441 struct GTY(()) machine_function
443 struct s390_frame_layout frame_layout;
445 /* Literal pool base register. */
446 rtx base_reg;
448 /* True if we may need to perform branch splitting. */
449 bool split_branches_pending_p;
451 bool has_landing_pad_p;
453 /* True if the current function may contain a tbegin clobbering
454 FPRs. */
455 bool tbegin_p;
457 /* For -fsplit-stack support: A stack local which holds a pointer to
458 the stack arguments for a function with a variable number of
459 arguments. This is set at the start of the function and is used
460 to initialize the overflow_arg_area field of the va_list
461 structure. */
462 rtx split_stack_varargs_pointer;
465 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
467 #define cfun_frame_layout (cfun->machine->frame_layout)
468 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
469 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
470 ? cfun_frame_layout.fpr_bitmap & 0x0f \
471 : cfun_frame_layout.fpr_bitmap & 0x03))
472 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
473 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
474 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
475 (1 << (REGNO - FPR0_REGNUM)))
476 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
477 (1 << (REGNO - FPR0_REGNUM))))
478 #define cfun_gpr_save_slot(REGNO) \
479 cfun->machine->frame_layout.gpr_save_slots[REGNO]
481 /* Number of GPRs and FPRs used for argument passing. */
482 #define GP_ARG_NUM_REG 5
483 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
484 #define VEC_ARG_NUM_REG 8
486 /* A couple of shortcuts. */
487 #define CONST_OK_FOR_J(x) \
488 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
489 #define CONST_OK_FOR_K(x) \
490 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
491 #define CONST_OK_FOR_Os(x) \
492 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
493 #define CONST_OK_FOR_Op(x) \
494 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
495 #define CONST_OK_FOR_On(x) \
496 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
498 #define REGNO_PAIR_OK(REGNO, MODE) \
499 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
501 /* That's the read ahead of the dynamic branch prediction unit in
502 bytes on a z10 (or higher) CPU. */
503 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
506 /* Indicate which ABI has been used for passing vector args.
507 0 - no vector type arguments have been passed where the ABI is relevant
508 1 - the old ABI has been used
509 2 - a vector type argument has been passed either in a vector register
510 or on the stack by value */
511 static int s390_vector_abi = 0;
513 /* Set the vector ABI marker if TYPE is subject to the vector ABI
514 switch. The vector ABI affects only vector data types. There are
515 two aspects of the vector ABI relevant here:
517 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
518 ABI and natural alignment with the old.
520 2. vector <= 16 bytes are passed in VRs or by value on the stack
521 with the new ABI but by reference on the stack with the old.
523 If ARG_P is true TYPE is used for a function argument or return
524 value. The ABI marker then is set for all vector data types. If
525 ARG_P is false only type 1 vectors are being checked. */
527 static void
528 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
530 static hash_set<const_tree> visited_types_hash;
532 if (s390_vector_abi)
533 return;
535 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
536 return;
538 if (visited_types_hash.contains (type))
539 return;
541 visited_types_hash.add (type);
543 if (VECTOR_TYPE_P (type))
545 int type_size = int_size_in_bytes (type);
547 /* Outside arguments only the alignment is changing and this
548 only happens for vector types >= 16 bytes. */
549 if (!arg_p && type_size < 16)
550 return;
552 /* In arguments vector types > 16 are passed as before (GCC
553 never enforced the bigger alignment for arguments which was
554 required by the old vector ABI). However, it might still be
555 ABI relevant due to the changed alignment if it is a struct
556 member. */
557 if (arg_p && type_size > 16 && !in_struct_p)
558 return;
560 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
562 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
564 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
565 natural alignment there will never be ABI dependent padding
566 in an array type. That's why we do not set in_struct_p to
567 true here. */
568 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
570 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
572 tree arg_chain;
574 /* Check the return type. */
575 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
577 for (arg_chain = TYPE_ARG_TYPES (type);
578 arg_chain;
579 arg_chain = TREE_CHAIN (arg_chain))
580 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
582 else if (RECORD_OR_UNION_TYPE_P (type))
584 tree field;
586 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
588 if (TREE_CODE (field) != FIELD_DECL)
589 continue;
591 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
597 /* System z builtins. */
599 #include "s390-builtins.h"
601 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
603 #undef B_DEF
604 #undef OB_DEF
605 #undef OB_DEF_VAR
606 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
607 #define OB_DEF(...)
608 #define OB_DEF_VAR(...)
609 #include "s390-builtins.def"
613 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
615 #undef B_DEF
616 #undef OB_DEF
617 #undef OB_DEF_VAR
618 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
619 #define OB_DEF(...)
620 #define OB_DEF_VAR(...)
621 #include "s390-builtins.def"
625 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
627 #undef B_DEF
628 #undef OB_DEF
629 #undef OB_DEF_VAR
630 #define B_DEF(...)
631 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
632 #define OB_DEF_VAR(...)
633 #include "s390-builtins.def"
637 const unsigned int
638 bflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
640 #undef B_DEF
641 #undef OB_DEF
642 #undef OB_DEF_VAR
643 #define B_DEF(...)
644 #define OB_DEF(...)
645 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
646 #include "s390-builtins.def"
650 const unsigned int
651 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
653 #undef B_DEF
654 #undef OB_DEF
655 #undef OB_DEF_VAR
656 #define B_DEF(...)
657 #define OB_DEF(...)
658 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
659 #include "s390-builtins.def"
663 tree s390_builtin_types[BT_MAX];
664 tree s390_builtin_fn_types[BT_FN_MAX];
665 tree s390_builtin_decls[S390_BUILTIN_MAX +
666 S390_OVERLOADED_BUILTIN_MAX +
667 S390_OVERLOADED_BUILTIN_VAR_MAX];
669 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
670 #undef B_DEF
671 #undef OB_DEF
672 #undef OB_DEF_VAR
673 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
674 #define OB_DEF(...)
675 #define OB_DEF_VAR(...)
677 #include "s390-builtins.def"
678 CODE_FOR_nothing
681 static void
682 s390_init_builtins (void)
684 /* These definitions are being used in s390-builtins.def. */
685 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
686 NULL, NULL);
687 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
688 tree c_uint64_type_node;
690 /* The uint64_type_node from tree.c is not compatible to the C99
691 uint64_t data type. What we want is c_uint64_type_node from
692 c-common.c. But since backend code is not supposed to interface
693 with the frontend we recreate it here. */
694 if (TARGET_64BIT)
695 c_uint64_type_node = long_unsigned_type_node;
696 else
697 c_uint64_type_node = long_long_unsigned_type_node;
699 #undef DEF_TYPE
700 #define DEF_TYPE(INDEX, NODE, CONST_P) \
701 if (s390_builtin_types[INDEX] == NULL) \
702 s390_builtin_types[INDEX] = (!CONST_P) ? \
703 (NODE) : build_type_variant ((NODE), 1, 0);
705 #undef DEF_POINTER_TYPE
706 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \
707 if (s390_builtin_types[INDEX] == NULL) \
708 s390_builtin_types[INDEX] = \
709 build_pointer_type (s390_builtin_types[INDEX_BASE]);
711 #undef DEF_DISTINCT_TYPE
712 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \
713 if (s390_builtin_types[INDEX] == NULL) \
714 s390_builtin_types[INDEX] = \
715 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
717 #undef DEF_VECTOR_TYPE
718 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
719 if (s390_builtin_types[INDEX] == NULL) \
720 s390_builtin_types[INDEX] = \
721 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
723 #undef DEF_OPAQUE_VECTOR_TYPE
724 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
725 if (s390_builtin_types[INDEX] == NULL) \
726 s390_builtin_types[INDEX] = \
727 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
729 #undef DEF_FN_TYPE
730 #define DEF_FN_TYPE(INDEX, args...) \
731 if (s390_builtin_fn_types[INDEX] == NULL) \
732 s390_builtin_fn_types[INDEX] = \
733 build_function_type_list (args, NULL_TREE);
734 #undef DEF_OV_TYPE
735 #define DEF_OV_TYPE(...)
736 #include "s390-builtin-types.def"
738 #undef B_DEF
739 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
740 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
741 s390_builtin_decls[S390_BUILTIN_##NAME] = \
742 add_builtin_function ("__builtin_" #NAME, \
743 s390_builtin_fn_types[FNTYPE], \
744 S390_BUILTIN_##NAME, \
745 BUILT_IN_MD, \
746 NULL, \
747 ATTRS);
748 #undef OB_DEF
749 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
750 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
751 == NULL) \
752 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
753 add_builtin_function ("__builtin_" #NAME, \
754 s390_builtin_fn_types[FNTYPE], \
755 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
756 BUILT_IN_MD, \
757 NULL, \
759 #undef OB_DEF_VAR
760 #define OB_DEF_VAR(...)
761 #include "s390-builtins.def"
765 /* Return true if ARG is appropriate as argument number ARGNUM of
766 builtin DECL. The operand flags from s390-builtins.def have to
767 passed as OP_FLAGS. */
768 bool
769 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
771 if (O_UIMM_P (op_flags))
773 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
774 int bitwidth = bitwidths[op_flags - O_U1];
776 if (!tree_fits_uhwi_p (arg)
777 || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1)
779 error("constant argument %d for builtin %qF is out of range (0.."
780 HOST_WIDE_INT_PRINT_UNSIGNED ")",
781 argnum, decl,
782 (HOST_WIDE_INT_1U << bitwidth) - 1);
783 return false;
787 if (O_SIMM_P (op_flags))
789 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
790 int bitwidth = bitwidths[op_flags - O_S2];
792 if (!tree_fits_shwi_p (arg)
793 || tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1))
794 || tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1))
796 error("constant argument %d for builtin %qF is out of range ("
797 HOST_WIDE_INT_PRINT_DEC ".."
798 HOST_WIDE_INT_PRINT_DEC ")",
799 argnum, decl,
800 -(HOST_WIDE_INT_1 << (bitwidth - 1)),
801 (HOST_WIDE_INT_1 << (bitwidth - 1)) - 1);
802 return false;
805 return true;
808 /* Expand an expression EXP that calls a built-in function,
809 with result going to TARGET if that's convenient
810 (and in mode MODE if that's convenient).
811 SUBTARGET may be used as the target for computing one of EXP's operands.
812 IGNORE is nonzero if the value is to be ignored. */
814 static rtx
815 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
816 machine_mode mode ATTRIBUTE_UNUSED,
817 int ignore ATTRIBUTE_UNUSED)
819 #define MAX_ARGS 6
821 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
822 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
823 enum insn_code icode;
824 rtx op[MAX_ARGS], pat;
825 int arity;
826 bool nonvoid;
827 tree arg;
828 call_expr_arg_iterator iter;
829 unsigned int all_op_flags = opflags_for_builtin (fcode);
830 machine_mode last_vec_mode = VOIDmode;
832 if (TARGET_DEBUG_ARG)
834 fprintf (stderr,
835 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
836 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
837 bflags_for_builtin (fcode));
840 if (S390_USE_TARGET_ATTRIBUTE)
842 unsigned int bflags;
844 bflags = bflags_for_builtin (fcode);
845 if ((bflags & B_HTM) && !TARGET_HTM)
847 error ("builtin %qF is not supported without -mhtm "
848 "(default with -march=zEC12 and higher).", fndecl);
849 return const0_rtx;
851 if (((bflags & B_VX) || (bflags & B_VXE)) && !TARGET_VX)
853 error ("builtin %qF requires -mvx "
854 "(default with -march=z13 and higher).", fndecl);
855 return const0_rtx;
858 if ((bflags & B_VXE) && !TARGET_VXE)
860 error ("Builtin %qF requires z14 or higher.", fndecl);
861 return const0_rtx;
864 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
865 && fcode < S390_ALL_BUILTIN_MAX)
867 gcc_unreachable ();
869 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
871 icode = code_for_builtin[fcode];
872 /* Set a flag in the machine specific cfun part in order to support
873 saving/restoring of FPRs. */
874 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
875 cfun->machine->tbegin_p = true;
877 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
879 error ("unresolved overloaded builtin");
880 return const0_rtx;
882 else
883 internal_error ("bad builtin fcode");
885 if (icode == 0)
886 internal_error ("bad builtin icode");
888 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
890 if (nonvoid)
892 machine_mode tmode = insn_data[icode].operand[0].mode;
893 if (!target
894 || GET_MODE (target) != tmode
895 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
896 target = gen_reg_rtx (tmode);
898 /* There are builtins (e.g. vec_promote) with no vector
899 arguments but an element selector. So we have to also look
900 at the vector return type when emitting the modulo
901 operation. */
902 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
903 last_vec_mode = insn_data[icode].operand[0].mode;
906 arity = 0;
907 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
909 rtx tmp_rtx;
910 const struct insn_operand_data *insn_op;
911 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
913 all_op_flags = all_op_flags >> O_SHIFT;
915 if (arg == error_mark_node)
916 return NULL_RTX;
917 if (arity >= MAX_ARGS)
918 return NULL_RTX;
920 if (O_IMM_P (op_flags)
921 && TREE_CODE (arg) != INTEGER_CST)
923 error ("constant value required for builtin %qF argument %d",
924 fndecl, arity + 1);
925 return const0_rtx;
928 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
929 return const0_rtx;
931 insn_op = &insn_data[icode].operand[arity + nonvoid];
932 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
934 /* expand_expr truncates constants to the target mode only if it
935 is "convenient". However, our checks below rely on this
936 being done. */
937 if (CONST_INT_P (op[arity])
938 && SCALAR_INT_MODE_P (insn_op->mode)
939 && GET_MODE (op[arity]) != insn_op->mode)
940 op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
941 insn_op->mode));
943 /* Wrap the expanded RTX for pointer types into a MEM expr with
944 the proper mode. This allows us to use e.g. (match_operand
945 "memory_operand"..) in the insn patterns instead of (mem
946 (match_operand "address_operand)). This is helpful for
947 patterns not just accepting MEMs. */
948 if (POINTER_TYPE_P (TREE_TYPE (arg))
949 && insn_op->predicate != address_operand)
950 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
952 /* Expand the module operation required on element selectors. */
953 if (op_flags == O_ELEM)
955 gcc_assert (last_vec_mode != VOIDmode);
956 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
957 op[arity],
958 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
959 NULL_RTX, 1, OPTAB_DIRECT);
962 /* Record the vector mode used for an element selector. This assumes:
963 1. There is no builtin with two different vector modes and an element selector
964 2. The element selector comes after the vector type it is referring to.
965 This currently the true for all the builtins but FIXME we
966 should better check for that. */
967 if (VECTOR_MODE_P (insn_op->mode))
968 last_vec_mode = insn_op->mode;
970 if (insn_op->predicate (op[arity], insn_op->mode))
972 arity++;
973 continue;
976 if (MEM_P (op[arity])
977 && insn_op->predicate == memory_operand
978 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
979 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
981 op[arity] = replace_equiv_address (op[arity],
982 copy_to_mode_reg (Pmode,
983 XEXP (op[arity], 0)));
985 /* Some of the builtins require different modes/types than the
986 pattern in order to implement a specific API. Instead of
987 adding many expanders which do the mode change we do it here.
988 E.g. s390_vec_add_u128 required to have vector unsigned char
989 arguments is mapped to addti3. */
990 else if (insn_op->mode != VOIDmode
991 && GET_MODE (op[arity]) != VOIDmode
992 && GET_MODE (op[arity]) != insn_op->mode
993 && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
994 GET_MODE (op[arity]), 0))
995 != NULL_RTX))
997 op[arity] = tmp_rtx;
999 else if (GET_MODE (op[arity]) == insn_op->mode
1000 || GET_MODE (op[arity]) == VOIDmode
1001 || (insn_op->predicate == address_operand
1002 && GET_MODE (op[arity]) == Pmode))
1004 /* An address_operand usually has VOIDmode in the expander
1005 so we cannot use this. */
1006 machine_mode target_mode =
1007 (insn_op->predicate == address_operand
1008 ? (machine_mode) Pmode : insn_op->mode);
1009 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
1012 if (!insn_op->predicate (op[arity], insn_op->mode))
1014 error ("invalid argument %d for builtin %qF", arity + 1, fndecl);
1015 return const0_rtx;
1017 arity++;
1020 switch (arity)
1022 case 0:
1023 pat = GEN_FCN (icode) (target);
1024 break;
1025 case 1:
1026 if (nonvoid)
1027 pat = GEN_FCN (icode) (target, op[0]);
1028 else
1029 pat = GEN_FCN (icode) (op[0]);
1030 break;
1031 case 2:
1032 if (nonvoid)
1033 pat = GEN_FCN (icode) (target, op[0], op[1]);
1034 else
1035 pat = GEN_FCN (icode) (op[0], op[1]);
1036 break;
1037 case 3:
1038 if (nonvoid)
1039 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1040 else
1041 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1042 break;
1043 case 4:
1044 if (nonvoid)
1045 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1046 else
1047 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1048 break;
1049 case 5:
1050 if (nonvoid)
1051 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1052 else
1053 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1054 break;
1055 case 6:
1056 if (nonvoid)
1057 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1058 else
1059 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1060 break;
1061 default:
1062 gcc_unreachable ();
1064 if (!pat)
1065 return NULL_RTX;
1066 emit_insn (pat);
1068 if (nonvoid)
1069 return target;
1070 else
1071 return const0_rtx;
1075 static const int s390_hotpatch_hw_max = 1000000;
1076 static int s390_hotpatch_hw_before_label = 0;
1077 static int s390_hotpatch_hw_after_label = 0;
1079 /* Check whether the hotpatch attribute is applied to a function and, if it has
1080 an argument, the argument is valid. */
1082 static tree
1083 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1084 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1086 tree expr;
1087 tree expr2;
1088 int err;
1090 if (TREE_CODE (*node) != FUNCTION_DECL)
1092 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1093 name);
1094 *no_add_attrs = true;
1096 if (args != NULL && TREE_CHAIN (args) != NULL)
1098 expr = TREE_VALUE (args);
1099 expr2 = TREE_VALUE (TREE_CHAIN (args));
1101 if (args == NULL || TREE_CHAIN (args) == NULL)
1102 err = 1;
1103 else if (TREE_CODE (expr) != INTEGER_CST
1104 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1105 || wi::gtu_p (expr, s390_hotpatch_hw_max))
1106 err = 1;
1107 else if (TREE_CODE (expr2) != INTEGER_CST
1108 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1109 || wi::gtu_p (expr2, s390_hotpatch_hw_max))
1110 err = 1;
1111 else
1112 err = 0;
1113 if (err)
1115 error ("requested %qE attribute is not a comma separated pair of"
1116 " non-negative integer constants or too large (max. %d)", name,
1117 s390_hotpatch_hw_max);
1118 *no_add_attrs = true;
1121 return NULL_TREE;
1124 /* Expand the s390_vector_bool type attribute. */
1126 static tree
1127 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1128 tree args ATTRIBUTE_UNUSED,
1129 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1131 tree type = *node, result = NULL_TREE;
1132 machine_mode mode;
1134 while (POINTER_TYPE_P (type)
1135 || TREE_CODE (type) == FUNCTION_TYPE
1136 || TREE_CODE (type) == METHOD_TYPE
1137 || TREE_CODE (type) == ARRAY_TYPE)
1138 type = TREE_TYPE (type);
1140 mode = TYPE_MODE (type);
1141 switch (mode)
1143 case E_DImode: case E_V2DImode:
1144 result = s390_builtin_types[BT_BV2DI];
1145 break;
1146 case E_SImode: case E_V4SImode:
1147 result = s390_builtin_types[BT_BV4SI];
1148 break;
1149 case E_HImode: case E_V8HImode:
1150 result = s390_builtin_types[BT_BV8HI];
1151 break;
1152 case E_QImode: case E_V16QImode:
1153 result = s390_builtin_types[BT_BV16QI];
1154 break;
1155 default:
1156 break;
1159 *no_add_attrs = true; /* No need to hang on to the attribute. */
1161 if (result)
1162 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1164 return NULL_TREE;
1167 static const struct attribute_spec s390_attribute_table[] = {
1168 { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false },
1169 { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true },
1170 /* End element. */
1171 { NULL, 0, 0, false, false, false, NULL, false }
1174 /* Return the alignment for LABEL. We default to the -falign-labels
1175 value except for the literal pool base label. */
1177 s390_label_align (rtx_insn *label)
1179 rtx_insn *prev_insn = prev_active_insn (label);
1180 rtx set, src;
1182 if (prev_insn == NULL_RTX)
1183 goto old;
1185 set = single_set (prev_insn);
1187 if (set == NULL_RTX)
1188 goto old;
1190 src = SET_SRC (set);
1192 /* Don't align literal pool base labels. */
1193 if (GET_CODE (src) == UNSPEC
1194 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1195 return 0;
1197 old:
1198 return align_labels_log;
1201 static GTY(()) rtx got_symbol;
1203 /* Return the GOT table symbol. The symbol will be created when the
1204 function is invoked for the first time. */
1206 static rtx
1207 s390_got_symbol (void)
1209 if (!got_symbol)
1211 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1212 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
1215 return got_symbol;
1218 static scalar_int_mode
1219 s390_libgcc_cmp_return_mode (void)
1221 return TARGET_64BIT ? DImode : SImode;
1224 static scalar_int_mode
1225 s390_libgcc_shift_count_mode (void)
1227 return TARGET_64BIT ? DImode : SImode;
1230 static scalar_int_mode
1231 s390_unwind_word_mode (void)
1233 return TARGET_64BIT ? DImode : SImode;
1236 /* Return true if the back end supports mode MODE. */
1237 static bool
1238 s390_scalar_mode_supported_p (scalar_mode mode)
1240 /* In contrast to the default implementation reject TImode constants on 31bit
1241 TARGET_ZARCH for ABI compliance. */
1242 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1243 return false;
1245 if (DECIMAL_FLOAT_MODE_P (mode))
1246 return default_decimal_float_supported_p ();
1248 return default_scalar_mode_supported_p (mode);
1251 /* Return true if the back end supports vector mode MODE. */
1252 static bool
1253 s390_vector_mode_supported_p (machine_mode mode)
1255 machine_mode inner;
1257 if (!VECTOR_MODE_P (mode)
1258 || !TARGET_VX
1259 || GET_MODE_SIZE (mode) > 16)
1260 return false;
1262 inner = GET_MODE_INNER (mode);
1264 switch (inner)
1266 case E_QImode:
1267 case E_HImode:
1268 case E_SImode:
1269 case E_DImode:
1270 case E_TImode:
1271 case E_SFmode:
1272 case E_DFmode:
1273 case E_TFmode:
1274 return true;
1275 default:
1276 return false;
1280 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1282 void
1283 s390_set_has_landing_pad_p (bool value)
1285 cfun->machine->has_landing_pad_p = value;
1288 /* If two condition code modes are compatible, return a condition code
1289 mode which is compatible with both. Otherwise, return
1290 VOIDmode. */
1292 static machine_mode
1293 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1295 if (m1 == m2)
1296 return m1;
1298 switch (m1)
1300 case E_CCZmode:
1301 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1302 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1303 return m2;
1304 return VOIDmode;
1306 case E_CCSmode:
1307 case E_CCUmode:
1308 case E_CCTmode:
1309 case E_CCSRmode:
1310 case E_CCURmode:
1311 case E_CCZ1mode:
1312 if (m2 == CCZmode)
1313 return m1;
1315 return VOIDmode;
1317 default:
1318 return VOIDmode;
1320 return VOIDmode;
1323 /* Return true if SET either doesn't set the CC register, or else
1324 the source and destination have matching CC modes and that
1325 CC mode is at least as constrained as REQ_MODE. */
1327 static bool
1328 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1330 machine_mode set_mode;
1332 gcc_assert (GET_CODE (set) == SET);
1334 /* These modes are supposed to be used only in CC consumer
1335 patterns. */
1336 gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
1337 && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
1339 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1340 return 1;
1342 set_mode = GET_MODE (SET_DEST (set));
1343 switch (set_mode)
1345 case E_CCZ1mode:
1346 case E_CCSmode:
1347 case E_CCSRmode:
1348 case E_CCUmode:
1349 case E_CCURmode:
1350 case E_CCLmode:
1351 case E_CCL1mode:
1352 case E_CCL2mode:
1353 case E_CCL3mode:
1354 case E_CCT1mode:
1355 case E_CCT2mode:
1356 case E_CCT3mode:
1357 case E_CCVEQmode:
1358 case E_CCVIHmode:
1359 case E_CCVIHUmode:
1360 case E_CCVFHmode:
1361 case E_CCVFHEmode:
1362 if (req_mode != set_mode)
1363 return 0;
1364 break;
1366 case E_CCZmode:
1367 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1368 && req_mode != CCSRmode && req_mode != CCURmode
1369 && req_mode != CCZ1mode)
1370 return 0;
1371 break;
1373 case E_CCAPmode:
1374 case E_CCANmode:
1375 if (req_mode != CCAmode)
1376 return 0;
1377 break;
1379 default:
1380 gcc_unreachable ();
1383 return (GET_MODE (SET_SRC (set)) == set_mode);
1386 /* Return true if every SET in INSN that sets the CC register
1387 has source and destination with matching CC modes and that
1388 CC mode is at least as constrained as REQ_MODE.
1389 If REQ_MODE is VOIDmode, always return false. */
1391 bool
1392 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1394 int i;
1396 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1397 if (req_mode == VOIDmode)
1398 return false;
1400 if (GET_CODE (PATTERN (insn)) == SET)
1401 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1403 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1404 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1406 rtx set = XVECEXP (PATTERN (insn), 0, i);
1407 if (GET_CODE (set) == SET)
1408 if (!s390_match_ccmode_set (set, req_mode))
1409 return false;
1412 return true;
1415 /* If a test-under-mask instruction can be used to implement
1416 (compare (and ... OP1) OP2), return the CC mode required
1417 to do that. Otherwise, return VOIDmode.
1418 MIXED is true if the instruction can distinguish between
1419 CC1 and CC2 for mixed selected bits (TMxx), it is false
1420 if the instruction cannot (TM). */
1422 machine_mode
1423 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1425 int bit0, bit1;
1427 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1428 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1429 return VOIDmode;
1431 /* Selected bits all zero: CC0.
1432 e.g.: int a; if ((a & (16 + 128)) == 0) */
1433 if (INTVAL (op2) == 0)
1434 return CCTmode;
1436 /* Selected bits all one: CC3.
1437 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1438 if (INTVAL (op2) == INTVAL (op1))
1439 return CCT3mode;
1441 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1442 int a;
1443 if ((a & (16 + 128)) == 16) -> CCT1
1444 if ((a & (16 + 128)) == 128) -> CCT2 */
1445 if (mixed)
1447 bit1 = exact_log2 (INTVAL (op2));
1448 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1449 if (bit0 != -1 && bit1 != -1)
1450 return bit0 > bit1 ? CCT1mode : CCT2mode;
1453 return VOIDmode;
1456 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1457 OP0 and OP1 of a COMPARE, return the mode to be used for the
1458 comparison. */
1460 machine_mode
1461 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1463 switch (code)
1465 case EQ:
1466 case NE:
1467 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1468 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1469 return CCAPmode;
1470 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1471 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1472 return CCAPmode;
1473 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1474 || GET_CODE (op1) == NEG)
1475 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1476 return CCLmode;
1478 if (GET_CODE (op0) == AND)
1480 /* Check whether we can potentially do it via TM. */
1481 machine_mode ccmode;
1482 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1483 if (ccmode != VOIDmode)
1485 /* Relax CCTmode to CCZmode to allow fall-back to AND
1486 if that turns out to be beneficial. */
1487 return ccmode == CCTmode ? CCZmode : ccmode;
1491 if (register_operand (op0, HImode)
1492 && GET_CODE (op1) == CONST_INT
1493 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1494 return CCT3mode;
1495 if (register_operand (op0, QImode)
1496 && GET_CODE (op1) == CONST_INT
1497 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1498 return CCT3mode;
1500 return CCZmode;
1502 case LE:
1503 case LT:
1504 case GE:
1505 case GT:
1506 /* The only overflow condition of NEG and ABS happens when
1507 -INT_MAX is used as parameter, which stays negative. So
1508 we have an overflow from a positive value to a negative.
1509 Using CCAP mode the resulting cc can be used for comparisons. */
1510 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1511 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1512 return CCAPmode;
1514 /* If constants are involved in an add instruction it is possible to use
1515 the resulting cc for comparisons with zero. Knowing the sign of the
1516 constant the overflow behavior gets predictable. e.g.:
1517 int a, b; if ((b = a + c) > 0)
1518 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1519 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1520 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1521 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1522 /* Avoid INT32_MIN on 32 bit. */
1523 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1525 if (INTVAL (XEXP((op0), 1)) < 0)
1526 return CCANmode;
1527 else
1528 return CCAPmode;
1530 /* Fall through. */
1531 case UNORDERED:
1532 case ORDERED:
1533 case UNEQ:
1534 case UNLE:
1535 case UNLT:
1536 case UNGE:
1537 case UNGT:
1538 case LTGT:
1539 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1540 && GET_CODE (op1) != CONST_INT)
1541 return CCSRmode;
1542 return CCSmode;
1544 case LTU:
1545 case GEU:
1546 if (GET_CODE (op0) == PLUS
1547 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1548 return CCL1mode;
1550 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1551 && GET_CODE (op1) != CONST_INT)
1552 return CCURmode;
1553 return CCUmode;
1555 case LEU:
1556 case GTU:
1557 if (GET_CODE (op0) == MINUS
1558 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1559 return CCL2mode;
1561 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1562 && GET_CODE (op1) != CONST_INT)
1563 return CCURmode;
1564 return CCUmode;
1566 default:
1567 gcc_unreachable ();
1571 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1572 that we can implement more efficiently. */
1574 static void
1575 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1576 bool op0_preserve_value)
1578 if (op0_preserve_value)
1579 return;
1581 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1582 if ((*code == EQ || *code == NE)
1583 && *op1 == const0_rtx
1584 && GET_CODE (*op0) == ZERO_EXTRACT
1585 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1586 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1587 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1589 rtx inner = XEXP (*op0, 0);
1590 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1591 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1592 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1594 if (len > 0 && len < modesize
1595 && pos >= 0 && pos + len <= modesize
1596 && modesize <= HOST_BITS_PER_WIDE_INT)
1598 unsigned HOST_WIDE_INT block;
1599 block = (HOST_WIDE_INT_1U << len) - 1;
1600 block <<= modesize - pos - len;
1602 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1603 gen_int_mode (block, GET_MODE (inner)));
1607 /* Narrow AND of memory against immediate to enable TM. */
1608 if ((*code == EQ || *code == NE)
1609 && *op1 == const0_rtx
1610 && GET_CODE (*op0) == AND
1611 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1612 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1614 rtx inner = XEXP (*op0, 0);
1615 rtx mask = XEXP (*op0, 1);
1617 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1618 if (GET_CODE (inner) == SUBREG
1619 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1620 && (GET_MODE_SIZE (GET_MODE (inner))
1621 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1622 && ((INTVAL (mask)
1623 & GET_MODE_MASK (GET_MODE (inner))
1624 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1625 == 0))
1626 inner = SUBREG_REG (inner);
1628 /* Do not change volatile MEMs. */
1629 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1631 int part = s390_single_part (XEXP (*op0, 1),
1632 GET_MODE (inner), QImode, 0);
1633 if (part >= 0)
1635 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1636 inner = adjust_address_nv (inner, QImode, part);
1637 *op0 = gen_rtx_AND (QImode, inner, mask);
1642 /* Narrow comparisons against 0xffff to HImode if possible. */
1643 if ((*code == EQ || *code == NE)
1644 && GET_CODE (*op1) == CONST_INT
1645 && INTVAL (*op1) == 0xffff
1646 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1647 && (nonzero_bits (*op0, GET_MODE (*op0))
1648 & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1650 *op0 = gen_lowpart (HImode, *op0);
1651 *op1 = constm1_rtx;
1654 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1655 if (GET_CODE (*op0) == UNSPEC
1656 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1657 && XVECLEN (*op0, 0) == 1
1658 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1659 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1660 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1661 && *op1 == const0_rtx)
1663 enum rtx_code new_code = UNKNOWN;
1664 switch (*code)
1666 case EQ: new_code = EQ; break;
1667 case NE: new_code = NE; break;
1668 case LT: new_code = GTU; break;
1669 case GT: new_code = LTU; break;
1670 case LE: new_code = GEU; break;
1671 case GE: new_code = LEU; break;
1672 default: break;
1675 if (new_code != UNKNOWN)
1677 *op0 = XVECEXP (*op0, 0, 0);
1678 *code = new_code;
1682 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1683 if (GET_CODE (*op0) == UNSPEC
1684 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1685 && XVECLEN (*op0, 0) == 1
1686 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1687 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1688 && CONST_INT_P (*op1))
1690 enum rtx_code new_code = UNKNOWN;
1691 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1693 case E_CCZmode:
1694 case E_CCRAWmode:
1695 switch (*code)
1697 case EQ: new_code = EQ; break;
1698 case NE: new_code = NE; break;
1699 default: break;
1701 break;
1702 default: break;
1705 if (new_code != UNKNOWN)
1707 /* For CCRAWmode put the required cc mask into the second
1708 operand. */
1709 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1710 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1711 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1712 *op0 = XVECEXP (*op0, 0, 0);
1713 *code = new_code;
1717 /* Simplify cascaded EQ, NE with const0_rtx. */
1718 if ((*code == NE || *code == EQ)
1719 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1720 && GET_MODE (*op0) == SImode
1721 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1722 && REG_P (XEXP (*op0, 0))
1723 && XEXP (*op0, 1) == const0_rtx
1724 && *op1 == const0_rtx)
1726 if ((*code == EQ && GET_CODE (*op0) == NE)
1727 || (*code == NE && GET_CODE (*op0) == EQ))
1728 *code = EQ;
1729 else
1730 *code = NE;
1731 *op0 = XEXP (*op0, 0);
1734 /* Prefer register over memory as first operand. */
1735 if (MEM_P (*op0) && REG_P (*op1))
1737 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1738 *code = (int)swap_condition ((enum rtx_code)*code);
1741 /* A comparison result is compared against zero. Replace it with
1742 the (perhaps inverted) original comparison.
1743 This probably should be done by simplify_relational_operation. */
1744 if ((*code == EQ || *code == NE)
1745 && *op1 == const0_rtx
1746 && COMPARISON_P (*op0)
1747 && CC_REG_P (XEXP (*op0, 0)))
1749 enum rtx_code new_code;
1751 if (*code == EQ)
1752 new_code = reversed_comparison_code_parts (GET_CODE (*op0),
1753 XEXP (*op0, 0),
1754 XEXP (*op1, 0), NULL);
1755 else
1756 new_code = GET_CODE (*op0);
1758 if (new_code != UNKNOWN)
1760 *code = new_code;
1761 *op1 = XEXP (*op0, 1);
1762 *op0 = XEXP (*op0, 0);
1768 /* Emit a compare instruction suitable to implement the comparison
1769 OP0 CODE OP1. Return the correct condition RTL to be placed in
1770 the IF_THEN_ELSE of the conditional branch testing the result. */
1773 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1775 machine_mode mode = s390_select_ccmode (code, op0, op1);
1776 rtx cc;
1778 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1780 /* Do not output a redundant compare instruction if a
1781 compare_and_swap pattern already computed the result and the
1782 machine modes are compatible. */
1783 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1784 == GET_MODE (op0));
1785 cc = op0;
1787 else
1789 cc = gen_rtx_REG (mode, CC_REGNUM);
1790 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1793 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1796 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1797 matches CMP.
1798 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1799 conditional branch testing the result. */
1801 static rtx
1802 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1803 rtx cmp, rtx new_rtx, machine_mode ccmode)
1805 rtx cc;
1807 cc = gen_rtx_REG (ccmode, CC_REGNUM);
1808 switch (GET_MODE (mem))
1810 case E_SImode:
1811 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp,
1812 new_rtx, cc));
1813 break;
1814 case E_DImode:
1815 emit_insn (gen_atomic_compare_and_swapdi_internal (old, mem, cmp,
1816 new_rtx, cc));
1817 break;
1818 case E_TImode:
1819 emit_insn (gen_atomic_compare_and_swapti_internal (old, mem, cmp,
1820 new_rtx, cc));
1821 break;
1822 case E_QImode:
1823 case E_HImode:
1824 default:
1825 gcc_unreachable ();
1827 return s390_emit_compare (code, cc, const0_rtx);
1830 /* Emit a jump instruction to TARGET and return it. If COND is
1831 NULL_RTX, emit an unconditional jump, else a conditional jump under
1832 condition COND. */
1834 rtx_insn *
1835 s390_emit_jump (rtx target, rtx cond)
1837 rtx insn;
1839 target = gen_rtx_LABEL_REF (VOIDmode, target);
1840 if (cond)
1841 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1843 insn = gen_rtx_SET (pc_rtx, target);
1844 return emit_jump_insn (insn);
1847 /* Return branch condition mask to implement a branch
1848 specified by CODE. Return -1 for invalid comparisons. */
1851 s390_branch_condition_mask (rtx code)
1853 const int CC0 = 1 << 3;
1854 const int CC1 = 1 << 2;
1855 const int CC2 = 1 << 1;
1856 const int CC3 = 1 << 0;
1858 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1859 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1860 gcc_assert (XEXP (code, 1) == const0_rtx
1861 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1862 && CONST_INT_P (XEXP (code, 1))));
1865 switch (GET_MODE (XEXP (code, 0)))
1867 case E_CCZmode:
1868 case E_CCZ1mode:
1869 switch (GET_CODE (code))
1871 case EQ: return CC0;
1872 case NE: return CC1 | CC2 | CC3;
1873 default: return -1;
1875 break;
1877 case E_CCT1mode:
1878 switch (GET_CODE (code))
1880 case EQ: return CC1;
1881 case NE: return CC0 | CC2 | CC3;
1882 default: return -1;
1884 break;
1886 case E_CCT2mode:
1887 switch (GET_CODE (code))
1889 case EQ: return CC2;
1890 case NE: return CC0 | CC1 | CC3;
1891 default: return -1;
1893 break;
1895 case E_CCT3mode:
1896 switch (GET_CODE (code))
1898 case EQ: return CC3;
1899 case NE: return CC0 | CC1 | CC2;
1900 default: return -1;
1902 break;
1904 case E_CCLmode:
1905 switch (GET_CODE (code))
1907 case EQ: return CC0 | CC2;
1908 case NE: return CC1 | CC3;
1909 default: return -1;
1911 break;
1913 case E_CCL1mode:
1914 switch (GET_CODE (code))
1916 case LTU: return CC2 | CC3; /* carry */
1917 case GEU: return CC0 | CC1; /* no carry */
1918 default: return -1;
1920 break;
1922 case E_CCL2mode:
1923 switch (GET_CODE (code))
1925 case GTU: return CC0 | CC1; /* borrow */
1926 case LEU: return CC2 | CC3; /* no borrow */
1927 default: return -1;
1929 break;
1931 case E_CCL3mode:
1932 switch (GET_CODE (code))
1934 case EQ: return CC0 | CC2;
1935 case NE: return CC1 | CC3;
1936 case LTU: return CC1;
1937 case GTU: return CC3;
1938 case LEU: return CC1 | CC2;
1939 case GEU: return CC2 | CC3;
1940 default: return -1;
1943 case E_CCUmode:
1944 switch (GET_CODE (code))
1946 case EQ: return CC0;
1947 case NE: return CC1 | CC2 | CC3;
1948 case LTU: return CC1;
1949 case GTU: return CC2;
1950 case LEU: return CC0 | CC1;
1951 case GEU: return CC0 | CC2;
1952 default: return -1;
1954 break;
1956 case E_CCURmode:
1957 switch (GET_CODE (code))
1959 case EQ: return CC0;
1960 case NE: return CC2 | CC1 | CC3;
1961 case LTU: return CC2;
1962 case GTU: return CC1;
1963 case LEU: return CC0 | CC2;
1964 case GEU: return CC0 | CC1;
1965 default: return -1;
1967 break;
1969 case E_CCAPmode:
1970 switch (GET_CODE (code))
1972 case EQ: return CC0;
1973 case NE: return CC1 | CC2 | CC3;
1974 case LT: return CC1 | CC3;
1975 case GT: return CC2;
1976 case LE: return CC0 | CC1 | CC3;
1977 case GE: return CC0 | CC2;
1978 default: return -1;
1980 break;
1982 case E_CCANmode:
1983 switch (GET_CODE (code))
1985 case EQ: return CC0;
1986 case NE: return CC1 | CC2 | CC3;
1987 case LT: return CC1;
1988 case GT: return CC2 | CC3;
1989 case LE: return CC0 | CC1;
1990 case GE: return CC0 | CC2 | CC3;
1991 default: return -1;
1993 break;
1995 case E_CCSmode:
1996 switch (GET_CODE (code))
1998 case EQ: return CC0;
1999 case NE: return CC1 | CC2 | CC3;
2000 case LT: return CC1;
2001 case GT: return CC2;
2002 case LE: return CC0 | CC1;
2003 case GE: return CC0 | CC2;
2004 case UNORDERED: return CC3;
2005 case ORDERED: return CC0 | CC1 | CC2;
2006 case UNEQ: return CC0 | CC3;
2007 case UNLT: return CC1 | CC3;
2008 case UNGT: return CC2 | CC3;
2009 case UNLE: return CC0 | CC1 | CC3;
2010 case UNGE: return CC0 | CC2 | CC3;
2011 case LTGT: return CC1 | CC2;
2012 default: return -1;
2014 break;
2016 case E_CCSRmode:
2017 switch (GET_CODE (code))
2019 case EQ: return CC0;
2020 case NE: return CC2 | CC1 | CC3;
2021 case LT: return CC2;
2022 case GT: return CC1;
2023 case LE: return CC0 | CC2;
2024 case GE: return CC0 | CC1;
2025 case UNORDERED: return CC3;
2026 case ORDERED: return CC0 | CC2 | CC1;
2027 case UNEQ: return CC0 | CC3;
2028 case UNLT: return CC2 | CC3;
2029 case UNGT: return CC1 | CC3;
2030 case UNLE: return CC0 | CC2 | CC3;
2031 case UNGE: return CC0 | CC1 | CC3;
2032 case LTGT: return CC2 | CC1;
2033 default: return -1;
2035 break;
2037 /* Vector comparison modes. */
2038 /* CC2 will never be set. It however is part of the negated
2039 masks. */
2040 case E_CCVIALLmode:
2041 switch (GET_CODE (code))
2043 case EQ:
2044 case GTU:
2045 case GT:
2046 case GE: return CC0;
2047 /* The inverted modes are in fact *any* modes. */
2048 case NE:
2049 case LEU:
2050 case LE:
2051 case LT: return CC3 | CC1 | CC2;
2052 default: return -1;
2055 case E_CCVIANYmode:
2056 switch (GET_CODE (code))
2058 case EQ:
2059 case GTU:
2060 case GT:
2061 case GE: return CC0 | CC1;
2062 /* The inverted modes are in fact *all* modes. */
2063 case NE:
2064 case LEU:
2065 case LE:
2066 case LT: return CC3 | CC2;
2067 default: return -1;
2069 case E_CCVFALLmode:
2070 switch (GET_CODE (code))
2072 case EQ:
2073 case GT:
2074 case GE: return CC0;
2075 /* The inverted modes are in fact *any* modes. */
2076 case NE:
2077 case UNLE:
2078 case UNLT: return CC3 | CC1 | CC2;
2079 default: return -1;
2082 case E_CCVFANYmode:
2083 switch (GET_CODE (code))
2085 case EQ:
2086 case GT:
2087 case GE: return CC0 | CC1;
2088 /* The inverted modes are in fact *all* modes. */
2089 case NE:
2090 case UNLE:
2091 case UNLT: return CC3 | CC2;
2092 default: return -1;
2095 case E_CCRAWmode:
2096 switch (GET_CODE (code))
2098 case EQ:
2099 return INTVAL (XEXP (code, 1));
2100 case NE:
2101 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2102 default:
2103 gcc_unreachable ();
2106 default:
2107 return -1;
2112 /* Return branch condition mask to implement a compare and branch
2113 specified by CODE. Return -1 for invalid comparisons. */
2116 s390_compare_and_branch_condition_mask (rtx code)
2118 const int CC0 = 1 << 3;
2119 const int CC1 = 1 << 2;
2120 const int CC2 = 1 << 1;
2122 switch (GET_CODE (code))
2124 case EQ:
2125 return CC0;
2126 case NE:
2127 return CC1 | CC2;
2128 case LT:
2129 case LTU:
2130 return CC1;
2131 case GT:
2132 case GTU:
2133 return CC2;
2134 case LE:
2135 case LEU:
2136 return CC0 | CC1;
2137 case GE:
2138 case GEU:
2139 return CC0 | CC2;
2140 default:
2141 gcc_unreachable ();
2143 return -1;
2146 /* If INV is false, return assembler mnemonic string to implement
2147 a branch specified by CODE. If INV is true, return mnemonic
2148 for the corresponding inverted branch. */
2150 static const char *
2151 s390_branch_condition_mnemonic (rtx code, int inv)
2153 int mask;
2155 static const char *const mnemonic[16] =
2157 NULL, "o", "h", "nle",
2158 "l", "nhe", "lh", "ne",
2159 "e", "nlh", "he", "nl",
2160 "le", "nh", "no", NULL
2163 if (GET_CODE (XEXP (code, 0)) == REG
2164 && REGNO (XEXP (code, 0)) == CC_REGNUM
2165 && (XEXP (code, 1) == const0_rtx
2166 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2167 && CONST_INT_P (XEXP (code, 1)))))
2168 mask = s390_branch_condition_mask (code);
2169 else
2170 mask = s390_compare_and_branch_condition_mask (code);
2172 gcc_assert (mask >= 0);
2174 if (inv)
2175 mask ^= 15;
2177 gcc_assert (mask >= 1 && mask <= 14);
2179 return mnemonic[mask];
2182 /* Return the part of op which has a value different from def.
2183 The size of the part is determined by mode.
2184 Use this function only if you already know that op really
2185 contains such a part. */
2187 unsigned HOST_WIDE_INT
2188 s390_extract_part (rtx op, machine_mode mode, int def)
2190 unsigned HOST_WIDE_INT value = 0;
2191 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2192 int part_bits = GET_MODE_BITSIZE (mode);
2193 unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1;
2194 int i;
2196 for (i = 0; i < max_parts; i++)
2198 if (i == 0)
2199 value = UINTVAL (op);
2200 else
2201 value >>= part_bits;
2203 if ((value & part_mask) != (def & part_mask))
2204 return value & part_mask;
2207 gcc_unreachable ();
2210 /* If OP is an integer constant of mode MODE with exactly one
2211 part of mode PART_MODE unequal to DEF, return the number of that
2212 part. Otherwise, return -1. */
2215 s390_single_part (rtx op,
2216 machine_mode mode,
2217 machine_mode part_mode,
2218 int def)
2220 unsigned HOST_WIDE_INT value = 0;
2221 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2222 unsigned HOST_WIDE_INT part_mask
2223 = (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1;
2224 int i, part = -1;
2226 if (GET_CODE (op) != CONST_INT)
2227 return -1;
2229 for (i = 0; i < n_parts; i++)
2231 if (i == 0)
2232 value = UINTVAL (op);
2233 else
2234 value >>= GET_MODE_BITSIZE (part_mode);
2236 if ((value & part_mask) != (def & part_mask))
2238 if (part != -1)
2239 return -1;
2240 else
2241 part = i;
2244 return part == -1 ? -1 : n_parts - 1 - part;
2247 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2248 bits and no other bits are set in (the lower SIZE bits of) IN.
2250 PSTART and PEND can be used to obtain the start and end
2251 position (inclusive) of the bitfield relative to 64
2252 bits. *PSTART / *PEND gives the position of the first/last bit
2253 of the bitfield counting from the highest order bit starting
2254 with zero. */
2256 bool
2257 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2258 int *pstart, int *pend)
2260 int start;
2261 int end = -1;
2262 int lowbit = HOST_BITS_PER_WIDE_INT - 1;
2263 int highbit = HOST_BITS_PER_WIDE_INT - size;
2264 unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U;
2266 gcc_assert (!!pstart == !!pend);
2267 for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2268 if (end == -1)
2270 /* Look for the rightmost bit of a contiguous range of ones. */
2271 if (bitmask & in)
2272 /* Found it. */
2273 end = start;
2275 else
2277 /* Look for the firt zero bit after the range of ones. */
2278 if (! (bitmask & in))
2279 /* Found it. */
2280 break;
2282 /* We're one past the last one-bit. */
2283 start++;
2285 if (end == -1)
2286 /* No one bits found. */
2287 return false;
2289 if (start > highbit)
2291 unsigned HOST_WIDE_INT mask;
2293 /* Calculate a mask for all bits beyond the contiguous bits. */
2294 mask = ((~HOST_WIDE_INT_0U >> highbit)
2295 & (~HOST_WIDE_INT_0U << (lowbit - start + 1)));
2296 if (mask & in)
2297 /* There are more bits set beyond the first range of one bits. */
2298 return false;
2301 if (pstart)
2303 *pstart = start;
2304 *pend = end;
2307 return true;
2310 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2311 if ~IN contains a contiguous bitfield. In that case, *END is <
2312 *START.
2314 If WRAP_P is true, a bitmask that wraps around is also tested.
2315 When a wraparoud occurs *START is greater than *END (in
2316 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2317 part of the range. If WRAP_P is false, no wraparound is
2318 tested. */
2320 bool
2321 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2322 int size, int *start, int *end)
2324 int bs = HOST_BITS_PER_WIDE_INT;
2325 bool b;
2327 gcc_assert (!!start == !!end);
2328 if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0)
2329 /* This cannot be expressed as a contiguous bitmask. Exit early because
2330 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2331 a valid bitmask. */
2332 return false;
2333 b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2334 if (b)
2335 return true;
2336 if (! wrap_p)
2337 return false;
2338 b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2339 if (b && start)
2341 int s = *start;
2342 int e = *end;
2344 gcc_assert (s >= 1);
2345 *start = ((e + 1) & (bs - 1));
2346 *end = ((s - 1 + bs) & (bs - 1));
2349 return b;
2352 /* Return true if OP contains the same contiguous bitfield in *all*
2353 its elements. START and END can be used to obtain the start and
2354 end position of the bitfield.
2356 START/STOP give the position of the first/last bit of the bitfield
2357 counting from the lowest order bit starting with zero. In order to
2358 use these values for S/390 instructions this has to be converted to
2359 "bits big endian" style. */
2361 bool
2362 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2364 unsigned HOST_WIDE_INT mask;
2365 int size;
2366 rtx elt;
2367 bool b;
2369 gcc_assert (!!start == !!end);
2370 if (!const_vec_duplicate_p (op, &elt)
2371 || !CONST_INT_P (elt))
2372 return false;
2374 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2376 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2377 if (size > 64)
2378 return false;
2380 mask = UINTVAL (elt);
2382 b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2383 if (b)
2385 if (start)
2387 *start -= (HOST_BITS_PER_WIDE_INT - size);
2388 *end -= (HOST_BITS_PER_WIDE_INT - size);
2390 return true;
2392 else
2393 return false;
2396 /* Return true if C consists only of byte chunks being either 0 or
2397 0xff. If MASK is !=NULL a byte mask is generated which is
2398 appropriate for the vector generate byte mask instruction. */
2400 bool
2401 s390_bytemask_vector_p (rtx op, unsigned *mask)
2403 int i;
2404 unsigned tmp_mask = 0;
2405 int nunit, unit_size;
2407 if (!VECTOR_MODE_P (GET_MODE (op))
2408 || GET_CODE (op) != CONST_VECTOR
2409 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2410 return false;
2412 nunit = GET_MODE_NUNITS (GET_MODE (op));
2413 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2415 for (i = 0; i < nunit; i++)
2417 unsigned HOST_WIDE_INT c;
2418 int j;
2420 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2421 return false;
2423 c = UINTVAL (XVECEXP (op, 0, i));
2424 for (j = 0; j < unit_size; j++)
2426 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2427 return false;
2428 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2429 c = c >> BITS_PER_UNIT;
2433 if (mask != NULL)
2434 *mask = tmp_mask;
2436 return true;
2439 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2440 equivalent to a shift followed by the AND. In particular, CONTIG
2441 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2442 for ROTL indicate a rotate to the right. */
2444 bool
2445 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2447 int start, end;
2448 bool ok;
2450 ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2451 gcc_assert (ok);
2453 if (rotl >= 0)
2454 return (64 - end >= rotl);
2455 else
2457 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2458 DIMode. */
2459 rotl = -rotl + (64 - bitsize);
2460 return (start >= rotl);
2464 /* Check whether we can (and want to) split a double-word
2465 move in mode MODE from SRC to DST into two single-word
2466 moves, moving the subword FIRST_SUBWORD first. */
2468 bool
2469 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2471 /* Floating point and vector registers cannot be split. */
2472 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2473 return false;
2475 /* Non-offsettable memory references cannot be split. */
2476 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2477 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2478 return false;
2480 /* Moving the first subword must not clobber a register
2481 needed to move the second subword. */
2482 if (register_operand (dst, mode))
2484 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2485 if (reg_overlap_mentioned_p (subreg, src))
2486 return false;
2489 return true;
2492 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2493 and [MEM2, MEM2 + SIZE] do overlap and false
2494 otherwise. */
2496 bool
2497 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2499 rtx addr1, addr2, addr_delta;
2500 HOST_WIDE_INT delta;
2502 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2503 return true;
2505 if (size == 0)
2506 return false;
2508 addr1 = XEXP (mem1, 0);
2509 addr2 = XEXP (mem2, 0);
2511 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2513 /* This overlapping check is used by peepholes merging memory block operations.
2514 Overlapping operations would otherwise be recognized by the S/390 hardware
2515 and would fall back to a slower implementation. Allowing overlapping
2516 operations would lead to slow code but not to wrong code. Therefore we are
2517 somewhat optimistic if we cannot prove that the memory blocks are
2518 overlapping.
2519 That's why we return false here although this may accept operations on
2520 overlapping memory areas. */
2521 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2522 return false;
2524 delta = INTVAL (addr_delta);
2526 if (delta == 0
2527 || (delta > 0 && delta < size)
2528 || (delta < 0 && -delta < size))
2529 return true;
2531 return false;
2534 /* Check whether the address of memory reference MEM2 equals exactly
2535 the address of memory reference MEM1 plus DELTA. Return true if
2536 we can prove this to be the case, false otherwise. */
2538 bool
2539 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2541 rtx addr1, addr2, addr_delta;
2543 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2544 return false;
2546 addr1 = XEXP (mem1, 0);
2547 addr2 = XEXP (mem2, 0);
2549 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2550 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2551 return false;
2553 return true;
2556 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2558 void
2559 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2560 rtx *operands)
2562 machine_mode wmode = mode;
2563 rtx dst = operands[0];
2564 rtx src1 = operands[1];
2565 rtx src2 = operands[2];
2566 rtx op, clob, tem;
2568 /* If we cannot handle the operation directly, use a temp register. */
2569 if (!s390_logical_operator_ok_p (operands))
2570 dst = gen_reg_rtx (mode);
2572 /* QImode and HImode patterns make sense only if we have a destination
2573 in memory. Otherwise perform the operation in SImode. */
2574 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2575 wmode = SImode;
2577 /* Widen operands if required. */
2578 if (mode != wmode)
2580 if (GET_CODE (dst) == SUBREG
2581 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2582 dst = tem;
2583 else if (REG_P (dst))
2584 dst = gen_rtx_SUBREG (wmode, dst, 0);
2585 else
2586 dst = gen_reg_rtx (wmode);
2588 if (GET_CODE (src1) == SUBREG
2589 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2590 src1 = tem;
2591 else if (GET_MODE (src1) != VOIDmode)
2592 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2594 if (GET_CODE (src2) == SUBREG
2595 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2596 src2 = tem;
2597 else if (GET_MODE (src2) != VOIDmode)
2598 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2601 /* Emit the instruction. */
2602 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2603 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2604 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2606 /* Fix up the destination if needed. */
2607 if (dst != operands[0])
2608 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2611 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2613 bool
2614 s390_logical_operator_ok_p (rtx *operands)
2616 /* If the destination operand is in memory, it needs to coincide
2617 with one of the source operands. After reload, it has to be
2618 the first source operand. */
2619 if (GET_CODE (operands[0]) == MEM)
2620 return rtx_equal_p (operands[0], operands[1])
2621 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2623 return true;
2626 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2627 operand IMMOP to switch from SS to SI type instructions. */
2629 void
2630 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2632 int def = code == AND ? -1 : 0;
2633 HOST_WIDE_INT mask;
2634 int part;
2636 gcc_assert (GET_CODE (*memop) == MEM);
2637 gcc_assert (!MEM_VOLATILE_P (*memop));
2639 mask = s390_extract_part (*immop, QImode, def);
2640 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2641 gcc_assert (part >= 0);
2643 *memop = adjust_address (*memop, QImode, part);
2644 *immop = gen_int_mode (mask, QImode);
2648 /* How to allocate a 'struct machine_function'. */
2650 static struct machine_function *
2651 s390_init_machine_status (void)
2653 return ggc_cleared_alloc<machine_function> ();
2656 /* Map for smallest class containing reg regno. */
2658 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2659 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2660 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2661 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2662 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2663 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2664 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2665 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2666 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2667 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2668 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2669 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2670 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2671 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2672 VEC_REGS, VEC_REGS /* 52 */
2675 /* Return attribute type of insn. */
2677 static enum attr_type
2678 s390_safe_attr_type (rtx_insn *insn)
2680 if (recog_memoized (insn) >= 0)
2681 return get_attr_type (insn);
2682 else
2683 return TYPE_NONE;
2686 /* Return true if DISP is a valid short displacement. */
2688 static bool
2689 s390_short_displacement (rtx disp)
2691 /* No displacement is OK. */
2692 if (!disp)
2693 return true;
2695 /* Without the long displacement facility we don't need to
2696 distingiush between long and short displacement. */
2697 if (!TARGET_LONG_DISPLACEMENT)
2698 return true;
2700 /* Integer displacement in range. */
2701 if (GET_CODE (disp) == CONST_INT)
2702 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2704 /* GOT offset is not OK, the GOT can be large. */
2705 if (GET_CODE (disp) == CONST
2706 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2707 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2708 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2709 return false;
2711 /* All other symbolic constants are literal pool references,
2712 which are OK as the literal pool must be small. */
2713 if (GET_CODE (disp) == CONST)
2714 return true;
2716 return false;
2719 /* Decompose a RTL expression ADDR for a memory address into
2720 its components, returned in OUT.
2722 Returns false if ADDR is not a valid memory address, true
2723 otherwise. If OUT is NULL, don't return the components,
2724 but check for validity only.
2726 Note: Only addresses in canonical form are recognized.
2727 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2728 canonical form so that they will be recognized. */
2730 static int
2731 s390_decompose_address (rtx addr, struct s390_address *out)
2733 HOST_WIDE_INT offset = 0;
2734 rtx base = NULL_RTX;
2735 rtx indx = NULL_RTX;
2736 rtx disp = NULL_RTX;
2737 rtx orig_disp;
2738 bool pointer = false;
2739 bool base_ptr = false;
2740 bool indx_ptr = false;
2741 bool literal_pool = false;
2743 /* We may need to substitute the literal pool base register into the address
2744 below. However, at this point we do not know which register is going to
2745 be used as base, so we substitute the arg pointer register. This is going
2746 to be treated as holding a pointer below -- it shouldn't be used for any
2747 other purpose. */
2748 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2750 /* Decompose address into base + index + displacement. */
2752 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2753 base = addr;
2755 else if (GET_CODE (addr) == PLUS)
2757 rtx op0 = XEXP (addr, 0);
2758 rtx op1 = XEXP (addr, 1);
2759 enum rtx_code code0 = GET_CODE (op0);
2760 enum rtx_code code1 = GET_CODE (op1);
2762 if (code0 == REG || code0 == UNSPEC)
2764 if (code1 == REG || code1 == UNSPEC)
2766 indx = op0; /* index + base */
2767 base = op1;
2770 else
2772 base = op0; /* base + displacement */
2773 disp = op1;
2777 else if (code0 == PLUS)
2779 indx = XEXP (op0, 0); /* index + base + disp */
2780 base = XEXP (op0, 1);
2781 disp = op1;
2784 else
2786 return false;
2790 else
2791 disp = addr; /* displacement */
2793 /* Extract integer part of displacement. */
2794 orig_disp = disp;
2795 if (disp)
2797 if (GET_CODE (disp) == CONST_INT)
2799 offset = INTVAL (disp);
2800 disp = NULL_RTX;
2802 else if (GET_CODE (disp) == CONST
2803 && GET_CODE (XEXP (disp, 0)) == PLUS
2804 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2806 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2807 disp = XEXP (XEXP (disp, 0), 0);
2811 /* Strip off CONST here to avoid special case tests later. */
2812 if (disp && GET_CODE (disp) == CONST)
2813 disp = XEXP (disp, 0);
2815 /* We can convert literal pool addresses to
2816 displacements by basing them off the base register. */
2817 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2819 if (base || indx)
2820 return false;
2822 base = fake_pool_base, literal_pool = true;
2824 /* Mark up the displacement. */
2825 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2826 UNSPEC_LTREL_OFFSET);
2829 /* Validate base register. */
2830 if (base)
2832 if (GET_CODE (base) == UNSPEC)
2833 switch (XINT (base, 1))
2835 case UNSPEC_LTREF:
2836 if (!disp)
2837 disp = gen_rtx_UNSPEC (Pmode,
2838 gen_rtvec (1, XVECEXP (base, 0, 0)),
2839 UNSPEC_LTREL_OFFSET);
2840 else
2841 return false;
2843 base = XVECEXP (base, 0, 1);
2844 break;
2846 case UNSPEC_LTREL_BASE:
2847 if (XVECLEN (base, 0) == 1)
2848 base = fake_pool_base, literal_pool = true;
2849 else
2850 base = XVECEXP (base, 0, 1);
2851 break;
2853 default:
2854 return false;
2857 if (!REG_P (base) || GET_MODE (base) != Pmode)
2858 return false;
2860 if (REGNO (base) == STACK_POINTER_REGNUM
2861 || REGNO (base) == FRAME_POINTER_REGNUM
2862 || ((reload_completed || reload_in_progress)
2863 && frame_pointer_needed
2864 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
2865 || REGNO (base) == ARG_POINTER_REGNUM
2866 || (flag_pic
2867 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
2868 pointer = base_ptr = true;
2870 if ((reload_completed || reload_in_progress)
2871 && base == cfun->machine->base_reg)
2872 pointer = base_ptr = literal_pool = true;
2875 /* Validate index register. */
2876 if (indx)
2878 if (GET_CODE (indx) == UNSPEC)
2879 switch (XINT (indx, 1))
2881 case UNSPEC_LTREF:
2882 if (!disp)
2883 disp = gen_rtx_UNSPEC (Pmode,
2884 gen_rtvec (1, XVECEXP (indx, 0, 0)),
2885 UNSPEC_LTREL_OFFSET);
2886 else
2887 return false;
2889 indx = XVECEXP (indx, 0, 1);
2890 break;
2892 case UNSPEC_LTREL_BASE:
2893 if (XVECLEN (indx, 0) == 1)
2894 indx = fake_pool_base, literal_pool = true;
2895 else
2896 indx = XVECEXP (indx, 0, 1);
2897 break;
2899 default:
2900 return false;
2903 if (!REG_P (indx) || GET_MODE (indx) != Pmode)
2904 return false;
2906 if (REGNO (indx) == STACK_POINTER_REGNUM
2907 || REGNO (indx) == FRAME_POINTER_REGNUM
2908 || ((reload_completed || reload_in_progress)
2909 && frame_pointer_needed
2910 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
2911 || REGNO (indx) == ARG_POINTER_REGNUM
2912 || (flag_pic
2913 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
2914 pointer = indx_ptr = true;
2916 if ((reload_completed || reload_in_progress)
2917 && indx == cfun->machine->base_reg)
2918 pointer = indx_ptr = literal_pool = true;
2921 /* Prefer to use pointer as base, not index. */
2922 if (base && indx && !base_ptr
2923 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2925 rtx tmp = base;
2926 base = indx;
2927 indx = tmp;
2930 /* Validate displacement. */
2931 if (!disp)
2933 /* If virtual registers are involved, the displacement will change later
2934 anyway as the virtual registers get eliminated. This could make a
2935 valid displacement invalid, but it is more likely to make an invalid
2936 displacement valid, because we sometimes access the register save area
2937 via negative offsets to one of those registers.
2938 Thus we don't check the displacement for validity here. If after
2939 elimination the displacement turns out to be invalid after all,
2940 this is fixed up by reload in any case. */
2941 /* LRA maintains always displacements up to date and we need to
2942 know the displacement is right during all LRA not only at the
2943 final elimination. */
2944 if (lra_in_progress
2945 || (base != arg_pointer_rtx
2946 && indx != arg_pointer_rtx
2947 && base != return_address_pointer_rtx
2948 && indx != return_address_pointer_rtx
2949 && base != frame_pointer_rtx
2950 && indx != frame_pointer_rtx
2951 && base != virtual_stack_vars_rtx
2952 && indx != virtual_stack_vars_rtx))
2953 if (!DISP_IN_RANGE (offset))
2954 return false;
2956 else
2958 /* All the special cases are pointers. */
2959 pointer = true;
2961 /* In the small-PIC case, the linker converts @GOT
2962 and @GOTNTPOFF offsets to possible displacements. */
2963 if (GET_CODE (disp) == UNSPEC
2964 && (XINT (disp, 1) == UNSPEC_GOT
2965 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
2966 && flag_pic == 1)
2971 /* Accept pool label offsets. */
2972 else if (GET_CODE (disp) == UNSPEC
2973 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
2976 /* Accept literal pool references. */
2977 else if (GET_CODE (disp) == UNSPEC
2978 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
2980 /* In case CSE pulled a non literal pool reference out of
2981 the pool we have to reject the address. This is
2982 especially important when loading the GOT pointer on non
2983 zarch CPUs. In this case the literal pool contains an lt
2984 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
2985 will most likely exceed the displacement. */
2986 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2987 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
2988 return false;
2990 orig_disp = gen_rtx_CONST (Pmode, disp);
2991 if (offset)
2993 /* If we have an offset, make sure it does not
2994 exceed the size of the constant pool entry. */
2995 rtx sym = XVECEXP (disp, 0, 0);
2996 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
2997 return false;
2999 orig_disp = plus_constant (Pmode, orig_disp, offset);
3003 else
3004 return false;
3007 if (!base && !indx)
3008 pointer = true;
3010 if (out)
3012 out->base = base;
3013 out->indx = indx;
3014 out->disp = orig_disp;
3015 out->pointer = pointer;
3016 out->literal_pool = literal_pool;
3019 return true;
3022 /* Decompose a RTL expression OP for an address style operand into its
3023 components, and return the base register in BASE and the offset in
3024 OFFSET. While OP looks like an address it is never supposed to be
3025 used as such.
3027 Return true if OP is a valid address operand, false if not. */
3029 bool
3030 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
3031 HOST_WIDE_INT *offset)
3033 rtx off = NULL_RTX;
3035 /* We can have an integer constant, an address register,
3036 or a sum of the two. */
3037 if (CONST_SCALAR_INT_P (op))
3039 off = op;
3040 op = NULL_RTX;
3042 if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3044 off = XEXP (op, 1);
3045 op = XEXP (op, 0);
3047 while (op && GET_CODE (op) == SUBREG)
3048 op = SUBREG_REG (op);
3050 if (op && GET_CODE (op) != REG)
3051 return false;
3053 if (offset)
3055 if (off == NULL_RTX)
3056 *offset = 0;
3057 else if (CONST_INT_P (off))
3058 *offset = INTVAL (off);
3059 else if (CONST_WIDE_INT_P (off))
3060 /* The offset will anyway be cut down to 12 bits so take just
3061 the lowest order chunk of the wide int. */
3062 *offset = CONST_WIDE_INT_ELT (off, 0);
3063 else
3064 gcc_unreachable ();
3066 if (base)
3067 *base = op;
3069 return true;
3073 /* Return true if CODE is a valid address without index. */
3075 bool
3076 s390_legitimate_address_without_index_p (rtx op)
3078 struct s390_address addr;
3080 if (!s390_decompose_address (XEXP (op, 0), &addr))
3081 return false;
3082 if (addr.indx)
3083 return false;
3085 return true;
3089 /* Return TRUE if ADDR is an operand valid for a load/store relative
3090 instruction. Be aware that the alignment of the operand needs to
3091 be checked separately.
3092 Valid addresses are single references or a sum of a reference and a
3093 constant integer. Return these parts in SYMREF and ADDEND. You can
3094 pass NULL in REF and/or ADDEND if you are not interested in these
3095 values. Literal pool references are *not* considered symbol
3096 references. */
3098 static bool
3099 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3101 HOST_WIDE_INT tmpaddend = 0;
3103 if (GET_CODE (addr) == CONST)
3104 addr = XEXP (addr, 0);
3106 if (GET_CODE (addr) == PLUS)
3108 if (!CONST_INT_P (XEXP (addr, 1)))
3109 return false;
3111 tmpaddend = INTVAL (XEXP (addr, 1));
3112 addr = XEXP (addr, 0);
3115 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
3116 || (GET_CODE (addr) == UNSPEC
3117 && (XINT (addr, 1) == UNSPEC_GOTENT
3118 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3120 if (symref)
3121 *symref = addr;
3122 if (addend)
3123 *addend = tmpaddend;
3125 return true;
3127 return false;
3130 /* Return true if the address in OP is valid for constraint letter C
3131 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3132 pool MEMs should be accepted. Only the Q, R, S, T constraint
3133 letters are allowed for C. */
3135 static int
3136 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3138 struct s390_address addr;
3139 bool decomposed = false;
3141 if (!address_operand (op, GET_MODE (op)))
3142 return 0;
3144 /* This check makes sure that no symbolic address (except literal
3145 pool references) are accepted by the R or T constraints. */
3146 if (s390_loadrelative_operand_p (op, NULL, NULL))
3147 return 0;
3149 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3150 if (!lit_pool_ok)
3152 if (!s390_decompose_address (op, &addr))
3153 return 0;
3154 if (addr.literal_pool)
3155 return 0;
3156 decomposed = true;
3159 /* With reload, we sometimes get intermediate address forms that are
3160 actually invalid as-is, but we need to accept them in the most
3161 generic cases below ('R' or 'T'), since reload will in fact fix
3162 them up. LRA behaves differently here; we never see such forms,
3163 but on the other hand, we need to strictly reject every invalid
3164 address form. Perform this check right up front. */
3165 if (lra_in_progress)
3167 if (!decomposed && !s390_decompose_address (op, &addr))
3168 return 0;
3169 decomposed = true;
3172 switch (c)
3174 case 'Q': /* no index short displacement */
3175 if (!decomposed && !s390_decompose_address (op, &addr))
3176 return 0;
3177 if (addr.indx)
3178 return 0;
3179 if (!s390_short_displacement (addr.disp))
3180 return 0;
3181 break;
3183 case 'R': /* with index short displacement */
3184 if (TARGET_LONG_DISPLACEMENT)
3186 if (!decomposed && !s390_decompose_address (op, &addr))
3187 return 0;
3188 if (!s390_short_displacement (addr.disp))
3189 return 0;
3191 /* Any invalid address here will be fixed up by reload,
3192 so accept it for the most generic constraint. */
3193 break;
3195 case 'S': /* no index long displacement */
3196 if (!decomposed && !s390_decompose_address (op, &addr))
3197 return 0;
3198 if (addr.indx)
3199 return 0;
3200 break;
3202 case 'T': /* with index long displacement */
3203 /* Any invalid address here will be fixed up by reload,
3204 so accept it for the most generic constraint. */
3205 break;
3207 default:
3208 return 0;
3210 return 1;
3214 /* Evaluates constraint strings described by the regular expression
3215 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3216 the constraint given in STR, or 0 else. */
3219 s390_mem_constraint (const char *str, rtx op)
3221 char c = str[0];
3223 switch (c)
3225 case 'A':
3226 /* Check for offsettable variants of memory constraints. */
3227 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3228 return 0;
3229 if ((reload_completed || reload_in_progress)
3230 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3231 return 0;
3232 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3233 case 'B':
3234 /* Check for non-literal-pool variants of memory constraints. */
3235 if (!MEM_P (op))
3236 return 0;
3237 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3238 case 'Q':
3239 case 'R':
3240 case 'S':
3241 case 'T':
3242 if (GET_CODE (op) != MEM)
3243 return 0;
3244 return s390_check_qrst_address (c, XEXP (op, 0), true);
3245 case 'Y':
3246 /* Simply check for the basic form of a shift count. Reload will
3247 take care of making sure we have a proper base register. */
3248 if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3249 return 0;
3250 break;
3251 case 'Z':
3252 return s390_check_qrst_address (str[1], op, true);
3253 default:
3254 return 0;
3256 return 1;
3260 /* Evaluates constraint strings starting with letter O. Input
3261 parameter C is the second letter following the "O" in the constraint
3262 string. Returns 1 if VALUE meets the respective constraint and 0
3263 otherwise. */
3266 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3268 if (!TARGET_EXTIMM)
3269 return 0;
3271 switch (c)
3273 case 's':
3274 return trunc_int_for_mode (value, SImode) == value;
3276 case 'p':
3277 return value == 0
3278 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3280 case 'n':
3281 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3283 default:
3284 gcc_unreachable ();
3289 /* Evaluates constraint strings starting with letter N. Parameter STR
3290 contains the letters following letter "N" in the constraint string.
3291 Returns true if VALUE matches the constraint. */
3294 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3296 machine_mode mode, part_mode;
3297 int def;
3298 int part, part_goal;
3301 if (str[0] == 'x')
3302 part_goal = -1;
3303 else
3304 part_goal = str[0] - '0';
3306 switch (str[1])
3308 case 'Q':
3309 part_mode = QImode;
3310 break;
3311 case 'H':
3312 part_mode = HImode;
3313 break;
3314 case 'S':
3315 part_mode = SImode;
3316 break;
3317 default:
3318 return 0;
3321 switch (str[2])
3323 case 'H':
3324 mode = HImode;
3325 break;
3326 case 'S':
3327 mode = SImode;
3328 break;
3329 case 'D':
3330 mode = DImode;
3331 break;
3332 default:
3333 return 0;
3336 switch (str[3])
3338 case '0':
3339 def = 0;
3340 break;
3341 case 'F':
3342 def = -1;
3343 break;
3344 default:
3345 return 0;
3348 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3349 return 0;
3351 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3352 if (part < 0)
3353 return 0;
3354 if (part_goal != -1 && part_goal != part)
3355 return 0;
3357 return 1;
3361 /* Returns true if the input parameter VALUE is a float zero. */
3364 s390_float_const_zero_p (rtx value)
3366 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3367 && value == CONST0_RTX (GET_MODE (value)));
3370 /* Implement TARGET_REGISTER_MOVE_COST. */
3372 static int
3373 s390_register_move_cost (machine_mode mode,
3374 reg_class_t from, reg_class_t to)
3376 /* On s390, copy between fprs and gprs is expensive. */
3378 /* It becomes somewhat faster having ldgr/lgdr. */
3379 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3381 /* ldgr is single cycle. */
3382 if (reg_classes_intersect_p (from, GENERAL_REGS)
3383 && reg_classes_intersect_p (to, FP_REGS))
3384 return 1;
3385 /* lgdr needs 3 cycles. */
3386 if (reg_classes_intersect_p (to, GENERAL_REGS)
3387 && reg_classes_intersect_p (from, FP_REGS))
3388 return 3;
3391 /* Otherwise copying is done via memory. */
3392 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3393 && reg_classes_intersect_p (to, FP_REGS))
3394 || (reg_classes_intersect_p (from, FP_REGS)
3395 && reg_classes_intersect_p (to, GENERAL_REGS)))
3396 return 10;
3398 return 1;
3401 /* Implement TARGET_MEMORY_MOVE_COST. */
3403 static int
3404 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3405 reg_class_t rclass ATTRIBUTE_UNUSED,
3406 bool in ATTRIBUTE_UNUSED)
3408 return 2;
3411 /* Compute a (partial) cost for rtx X. Return true if the complete
3412 cost has been computed, and false if subexpressions should be
3413 scanned. In either case, *TOTAL contains the cost result. The
3414 initial value of *TOTAL is the default value computed by
3415 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3416 code of the superexpression of x. */
3418 static bool
3419 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3420 int opno ATTRIBUTE_UNUSED,
3421 int *total, bool speed ATTRIBUTE_UNUSED)
3423 int code = GET_CODE (x);
3424 switch (code)
3426 case CONST:
3427 case CONST_INT:
3428 case LABEL_REF:
3429 case SYMBOL_REF:
3430 case CONST_DOUBLE:
3431 case CONST_WIDE_INT:
3432 case MEM:
3433 *total = 0;
3434 return true;
3436 case SET:
3438 /* Without this a conditional move instruction would be
3439 accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3440 comparison operator). That's a bit pessimistic. */
3442 if (!TARGET_Z196 || GET_CODE (SET_SRC (x)) != IF_THEN_ELSE)
3443 return false;
3445 rtx cond = XEXP (SET_SRC (x), 0);
3447 if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1)))
3448 return false;
3450 /* It is going to be a load/store on condition. Make it
3451 slightly more expensive than a normal load. */
3452 *total = COSTS_N_INSNS (1) + 1;
3454 rtx dst = SET_DEST (x);
3455 rtx then = XEXP (SET_SRC (x), 1);
3456 rtx els = XEXP (SET_SRC (x), 2);
3458 /* It is a real IF-THEN-ELSE. An additional move will be
3459 needed to implement that. */
3460 if (reload_completed
3461 && !rtx_equal_p (dst, then)
3462 && !rtx_equal_p (dst, els))
3463 *total += COSTS_N_INSNS (1) / 2;
3465 /* A minor penalty for constants we cannot directly handle. */
3466 if ((CONST_INT_P (then) || CONST_INT_P (els))
3467 && (!TARGET_Z13 || MEM_P (dst)
3468 || (CONST_INT_P (then) && !satisfies_constraint_K (then))
3469 || (CONST_INT_P (els) && !satisfies_constraint_K (els))))
3470 *total += COSTS_N_INSNS (1) / 2;
3472 /* A store on condition can only handle register src operands. */
3473 if (MEM_P (dst) && (!REG_P (then) || !REG_P (els)))
3474 *total += COSTS_N_INSNS (1) / 2;
3476 return true;
3478 case IOR:
3479 /* risbg */
3480 if (GET_CODE (XEXP (x, 0)) == AND
3481 && GET_CODE (XEXP (x, 1)) == ASHIFT
3482 && REG_P (XEXP (XEXP (x, 0), 0))
3483 && REG_P (XEXP (XEXP (x, 1), 0))
3484 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3485 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3486 && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3487 (HOST_WIDE_INT_1U << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3489 *total = COSTS_N_INSNS (2);
3490 return true;
3493 /* ~AND on a 128 bit mode. This can be done using a vector
3494 instruction. */
3495 if (TARGET_VXE
3496 && GET_CODE (XEXP (x, 0)) == NOT
3497 && GET_CODE (XEXP (x, 1)) == NOT
3498 && REG_P (XEXP (XEXP (x, 0), 0))
3499 && REG_P (XEXP (XEXP (x, 1), 0))
3500 && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 0), 0))) == 16
3501 && s390_hard_regno_mode_ok (VR0_REGNUM,
3502 GET_MODE (XEXP (XEXP (x, 0), 0))))
3504 *total = COSTS_N_INSNS (1);
3505 return true;
3507 /* fallthrough */
3508 case ASHIFT:
3509 case ASHIFTRT:
3510 case LSHIFTRT:
3511 case ROTATE:
3512 case ROTATERT:
3513 case AND:
3514 case XOR:
3515 case NEG:
3516 case NOT:
3517 *total = COSTS_N_INSNS (1);
3518 return false;
3520 case PLUS:
3521 case MINUS:
3522 *total = COSTS_N_INSNS (1);
3523 return false;
3525 case MULT:
3526 switch (mode)
3528 case E_SImode:
3530 rtx left = XEXP (x, 0);
3531 rtx right = XEXP (x, 1);
3532 if (GET_CODE (right) == CONST_INT
3533 && CONST_OK_FOR_K (INTVAL (right)))
3534 *total = s390_cost->mhi;
3535 else if (GET_CODE (left) == SIGN_EXTEND)
3536 *total = s390_cost->mh;
3537 else
3538 *total = s390_cost->ms; /* msr, ms, msy */
3539 break;
3541 case E_DImode:
3543 rtx left = XEXP (x, 0);
3544 rtx right = XEXP (x, 1);
3545 if (TARGET_ZARCH)
3547 if (GET_CODE (right) == CONST_INT
3548 && CONST_OK_FOR_K (INTVAL (right)))
3549 *total = s390_cost->mghi;
3550 else if (GET_CODE (left) == SIGN_EXTEND)
3551 *total = s390_cost->msgf;
3552 else
3553 *total = s390_cost->msg; /* msgr, msg */
3555 else /* TARGET_31BIT */
3557 if (GET_CODE (left) == SIGN_EXTEND
3558 && GET_CODE (right) == SIGN_EXTEND)
3559 /* mulsidi case: mr, m */
3560 *total = s390_cost->m;
3561 else if (GET_CODE (left) == ZERO_EXTEND
3562 && GET_CODE (right) == ZERO_EXTEND
3563 && TARGET_CPU_ZARCH)
3564 /* umulsidi case: ml, mlr */
3565 *total = s390_cost->ml;
3566 else
3567 /* Complex calculation is required. */
3568 *total = COSTS_N_INSNS (40);
3570 break;
3572 case E_SFmode:
3573 case E_DFmode:
3574 *total = s390_cost->mult_df;
3575 break;
3576 case E_TFmode:
3577 *total = s390_cost->mxbr;
3578 break;
3579 default:
3580 return false;
3582 return false;
3584 case FMA:
3585 switch (mode)
3587 case E_DFmode:
3588 *total = s390_cost->madbr;
3589 break;
3590 case E_SFmode:
3591 *total = s390_cost->maebr;
3592 break;
3593 default:
3594 return false;
3596 /* Negate in the third argument is free: FMSUB. */
3597 if (GET_CODE (XEXP (x, 2)) == NEG)
3599 *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3600 + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3601 + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3602 return true;
3604 return false;
3606 case UDIV:
3607 case UMOD:
3608 if (mode == TImode) /* 128 bit division */
3609 *total = s390_cost->dlgr;
3610 else if (mode == DImode)
3612 rtx right = XEXP (x, 1);
3613 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3614 *total = s390_cost->dlr;
3615 else /* 64 by 64 bit division */
3616 *total = s390_cost->dlgr;
3618 else if (mode == SImode) /* 32 bit division */
3619 *total = s390_cost->dlr;
3620 return false;
3622 case DIV:
3623 case MOD:
3624 if (mode == DImode)
3626 rtx right = XEXP (x, 1);
3627 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3628 if (TARGET_ZARCH)
3629 *total = s390_cost->dsgfr;
3630 else
3631 *total = s390_cost->dr;
3632 else /* 64 by 64 bit division */
3633 *total = s390_cost->dsgr;
3635 else if (mode == SImode) /* 32 bit division */
3636 *total = s390_cost->dlr;
3637 else if (mode == SFmode)
3639 *total = s390_cost->debr;
3641 else if (mode == DFmode)
3643 *total = s390_cost->ddbr;
3645 else if (mode == TFmode)
3647 *total = s390_cost->dxbr;
3649 return false;
3651 case SQRT:
3652 if (mode == SFmode)
3653 *total = s390_cost->sqebr;
3654 else if (mode == DFmode)
3655 *total = s390_cost->sqdbr;
3656 else /* TFmode */
3657 *total = s390_cost->sqxbr;
3658 return false;
3660 case SIGN_EXTEND:
3661 case ZERO_EXTEND:
3662 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3663 || outer_code == PLUS || outer_code == MINUS
3664 || outer_code == COMPARE)
3665 *total = 0;
3666 return false;
3668 case COMPARE:
3669 *total = COSTS_N_INSNS (1);
3670 if (GET_CODE (XEXP (x, 0)) == AND
3671 && GET_CODE (XEXP (x, 1)) == CONST_INT
3672 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3674 rtx op0 = XEXP (XEXP (x, 0), 0);
3675 rtx op1 = XEXP (XEXP (x, 0), 1);
3676 rtx op2 = XEXP (x, 1);
3678 if (memory_operand (op0, GET_MODE (op0))
3679 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3680 return true;
3681 if (register_operand (op0, GET_MODE (op0))
3682 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3683 return true;
3685 return false;
3687 default:
3688 return false;
3692 /* Return the cost of an address rtx ADDR. */
3694 static int
3695 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3696 addr_space_t as ATTRIBUTE_UNUSED,
3697 bool speed ATTRIBUTE_UNUSED)
3699 struct s390_address ad;
3700 if (!s390_decompose_address (addr, &ad))
3701 return 1000;
3703 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3706 /* Implement targetm.vectorize.builtin_vectorization_cost. */
3707 static int
3708 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3709 tree vectype,
3710 int misalign ATTRIBUTE_UNUSED)
3712 switch (type_of_cost)
3714 case scalar_stmt:
3715 case scalar_load:
3716 case scalar_store:
3717 case vector_stmt:
3718 case vector_load:
3719 case vector_store:
3720 case vec_to_scalar:
3721 case scalar_to_vec:
3722 case cond_branch_not_taken:
3723 case vec_perm:
3724 case vec_promote_demote:
3725 case unaligned_load:
3726 case unaligned_store:
3727 return 1;
3729 case cond_branch_taken:
3730 return 3;
3732 case vec_construct:
3733 return TYPE_VECTOR_SUBPARTS (vectype) - 1;
3735 default:
3736 gcc_unreachable ();
3740 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3741 otherwise return 0. */
3744 tls_symbolic_operand (rtx op)
3746 if (GET_CODE (op) != SYMBOL_REF)
3747 return 0;
3748 return SYMBOL_REF_TLS_MODEL (op);
3751 /* Split DImode access register reference REG (on 64-bit) into its constituent
3752 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3753 gen_highpart cannot be used as they assume all registers are word-sized,
3754 while our access registers have only half that size. */
3756 void
3757 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3759 gcc_assert (TARGET_64BIT);
3760 gcc_assert (ACCESS_REG_P (reg));
3761 gcc_assert (GET_MODE (reg) == DImode);
3762 gcc_assert (!(REGNO (reg) & 1));
3764 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3765 *hi = gen_rtx_REG (SImode, REGNO (reg));
3768 /* Return true if OP contains a symbol reference */
3770 bool
3771 symbolic_reference_mentioned_p (rtx op)
3773 const char *fmt;
3774 int i;
3776 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3777 return 1;
3779 fmt = GET_RTX_FORMAT (GET_CODE (op));
3780 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3782 if (fmt[i] == 'E')
3784 int j;
3786 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3787 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3788 return 1;
3791 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3792 return 1;
3795 return 0;
3798 /* Return true if OP contains a reference to a thread-local symbol. */
3800 bool
3801 tls_symbolic_reference_mentioned_p (rtx op)
3803 const char *fmt;
3804 int i;
3806 if (GET_CODE (op) == SYMBOL_REF)
3807 return tls_symbolic_operand (op);
3809 fmt = GET_RTX_FORMAT (GET_CODE (op));
3810 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3812 if (fmt[i] == 'E')
3814 int j;
3816 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3817 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3818 return true;
3821 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3822 return true;
3825 return false;
3829 /* Return true if OP is a legitimate general operand when
3830 generating PIC code. It is given that flag_pic is on
3831 and that OP satisfies CONSTANT_P. */
3834 legitimate_pic_operand_p (rtx op)
3836 /* Accept all non-symbolic constants. */
3837 if (!SYMBOLIC_CONST (op))
3838 return 1;
3840 /* Reject everything else; must be handled
3841 via emit_symbolic_move. */
3842 return 0;
3845 /* Returns true if the constant value OP is a legitimate general operand.
3846 It is given that OP satisfies CONSTANT_P. */
3848 static bool
3849 s390_legitimate_constant_p (machine_mode mode, rtx op)
3851 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3853 if (GET_MODE_SIZE (mode) != 16)
3854 return 0;
3856 if (!satisfies_constraint_j00 (op)
3857 && !satisfies_constraint_jm1 (op)
3858 && !satisfies_constraint_jKK (op)
3859 && !satisfies_constraint_jxx (op)
3860 && !satisfies_constraint_jyy (op))
3861 return 0;
3864 /* Accept all non-symbolic constants. */
3865 if (!SYMBOLIC_CONST (op))
3866 return 1;
3868 /* Accept immediate LARL operands. */
3869 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
3870 return 1;
3872 /* Thread-local symbols are never legal constants. This is
3873 so that emit_call knows that computing such addresses
3874 might require a function call. */
3875 if (TLS_SYMBOLIC_CONST (op))
3876 return 0;
3878 /* In the PIC case, symbolic constants must *not* be
3879 forced into the literal pool. We accept them here,
3880 so that they will be handled by emit_symbolic_move. */
3881 if (flag_pic)
3882 return 1;
3884 /* All remaining non-PIC symbolic constants are
3885 forced into the literal pool. */
3886 return 0;
3889 /* Determine if it's legal to put X into the constant pool. This
3890 is not possible if X contains the address of a symbol that is
3891 not constant (TLS) or not known at final link time (PIC). */
3893 static bool
3894 s390_cannot_force_const_mem (machine_mode mode, rtx x)
3896 switch (GET_CODE (x))
3898 case CONST_INT:
3899 case CONST_DOUBLE:
3900 case CONST_WIDE_INT:
3901 case CONST_VECTOR:
3902 /* Accept all non-symbolic constants. */
3903 return false;
3905 case LABEL_REF:
3906 /* Labels are OK iff we are non-PIC. */
3907 return flag_pic != 0;
3909 case SYMBOL_REF:
3910 /* 'Naked' TLS symbol references are never OK,
3911 non-TLS symbols are OK iff we are non-PIC. */
3912 if (tls_symbolic_operand (x))
3913 return true;
3914 else
3915 return flag_pic != 0;
3917 case CONST:
3918 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
3919 case PLUS:
3920 case MINUS:
3921 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
3922 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
3924 case UNSPEC:
3925 switch (XINT (x, 1))
3927 /* Only lt-relative or GOT-relative UNSPECs are OK. */
3928 case UNSPEC_LTREL_OFFSET:
3929 case UNSPEC_GOT:
3930 case UNSPEC_GOTOFF:
3931 case UNSPEC_PLTOFF:
3932 case UNSPEC_TLSGD:
3933 case UNSPEC_TLSLDM:
3934 case UNSPEC_NTPOFF:
3935 case UNSPEC_DTPOFF:
3936 case UNSPEC_GOTNTPOFF:
3937 case UNSPEC_INDNTPOFF:
3938 return false;
3940 /* If the literal pool shares the code section, be put
3941 execute template placeholders into the pool as well. */
3942 case UNSPEC_INSN:
3943 return TARGET_CPU_ZARCH;
3945 default:
3946 return true;
3948 break;
3950 default:
3951 gcc_unreachable ();
3955 /* Returns true if the constant value OP is a legitimate general
3956 operand during and after reload. The difference to
3957 legitimate_constant_p is that this function will not accept
3958 a constant that would need to be forced to the literal pool
3959 before it can be used as operand.
3960 This function accepts all constants which can be loaded directly
3961 into a GPR. */
3963 bool
3964 legitimate_reload_constant_p (rtx op)
3966 /* Accept la(y) operands. */
3967 if (GET_CODE (op) == CONST_INT
3968 && DISP_IN_RANGE (INTVAL (op)))
3969 return true;
3971 /* Accept l(g)hi/l(g)fi operands. */
3972 if (GET_CODE (op) == CONST_INT
3973 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
3974 return true;
3976 /* Accept lliXX operands. */
3977 if (TARGET_ZARCH
3978 && GET_CODE (op) == CONST_INT
3979 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3980 && s390_single_part (op, word_mode, HImode, 0) >= 0)
3981 return true;
3983 if (TARGET_EXTIMM
3984 && GET_CODE (op) == CONST_INT
3985 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3986 && s390_single_part (op, word_mode, SImode, 0) >= 0)
3987 return true;
3989 /* Accept larl operands. */
3990 if (TARGET_CPU_ZARCH
3991 && larl_operand (op, VOIDmode))
3992 return true;
3994 /* Accept floating-point zero operands that fit into a single GPR. */
3995 if (GET_CODE (op) == CONST_DOUBLE
3996 && s390_float_const_zero_p (op)
3997 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
3998 return true;
4000 /* Accept double-word operands that can be split. */
4001 if (GET_CODE (op) == CONST_WIDE_INT
4002 || (GET_CODE (op) == CONST_INT
4003 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
4005 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
4006 rtx hi = operand_subword (op, 0, 0, dword_mode);
4007 rtx lo = operand_subword (op, 1, 0, dword_mode);
4008 return legitimate_reload_constant_p (hi)
4009 && legitimate_reload_constant_p (lo);
4012 /* Everything else cannot be handled without reload. */
4013 return false;
4016 /* Returns true if the constant value OP is a legitimate fp operand
4017 during and after reload.
4018 This function accepts all constants which can be loaded directly
4019 into an FPR. */
4021 static bool
4022 legitimate_reload_fp_constant_p (rtx op)
4024 /* Accept floating-point zero operands if the load zero instruction
4025 can be used. Prior to z196 the load fp zero instruction caused a
4026 performance penalty if the result is used as BFP number. */
4027 if (TARGET_Z196
4028 && GET_CODE (op) == CONST_DOUBLE
4029 && s390_float_const_zero_p (op))
4030 return true;
4032 return false;
4035 /* Returns true if the constant value OP is a legitimate vector operand
4036 during and after reload.
4037 This function accepts all constants which can be loaded directly
4038 into an VR. */
4040 static bool
4041 legitimate_reload_vector_constant_p (rtx op)
4043 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
4044 && (satisfies_constraint_j00 (op)
4045 || satisfies_constraint_jm1 (op)
4046 || satisfies_constraint_jKK (op)
4047 || satisfies_constraint_jxx (op)
4048 || satisfies_constraint_jyy (op)))
4049 return true;
4051 return false;
4054 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4055 return the class of reg to actually use. */
4057 static reg_class_t
4058 s390_preferred_reload_class (rtx op, reg_class_t rclass)
4060 switch (GET_CODE (op))
4062 /* Constants we cannot reload into general registers
4063 must be forced into the literal pool. */
4064 case CONST_VECTOR:
4065 case CONST_DOUBLE:
4066 case CONST_INT:
4067 case CONST_WIDE_INT:
4068 if (reg_class_subset_p (GENERAL_REGS, rclass)
4069 && legitimate_reload_constant_p (op))
4070 return GENERAL_REGS;
4071 else if (reg_class_subset_p (ADDR_REGS, rclass)
4072 && legitimate_reload_constant_p (op))
4073 return ADDR_REGS;
4074 else if (reg_class_subset_p (FP_REGS, rclass)
4075 && legitimate_reload_fp_constant_p (op))
4076 return FP_REGS;
4077 else if (reg_class_subset_p (VEC_REGS, rclass)
4078 && legitimate_reload_vector_constant_p (op))
4079 return VEC_REGS;
4081 return NO_REGS;
4083 /* If a symbolic constant or a PLUS is reloaded,
4084 it is most likely being used as an address, so
4085 prefer ADDR_REGS. If 'class' is not a superset
4086 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4087 case CONST:
4088 /* Symrefs cannot be pushed into the literal pool with -fPIC
4089 so we *MUST NOT* return NO_REGS for these cases
4090 (s390_cannot_force_const_mem will return true).
4092 On the other hand we MUST return NO_REGS for symrefs with
4093 invalid addend which might have been pushed to the literal
4094 pool (no -fPIC). Usually we would expect them to be
4095 handled via secondary reload but this does not happen if
4096 they are used as literal pool slot replacement in reload
4097 inheritance (see emit_input_reload_insns). */
4098 if (TARGET_CPU_ZARCH
4099 && GET_CODE (XEXP (op, 0)) == PLUS
4100 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4101 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4103 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4104 return ADDR_REGS;
4105 else
4106 return NO_REGS;
4108 /* fallthrough */
4109 case LABEL_REF:
4110 case SYMBOL_REF:
4111 if (!legitimate_reload_constant_p (op))
4112 return NO_REGS;
4113 /* fallthrough */
4114 case PLUS:
4115 /* load address will be used. */
4116 if (reg_class_subset_p (ADDR_REGS, rclass))
4117 return ADDR_REGS;
4118 else
4119 return NO_REGS;
4121 default:
4122 break;
4125 return rclass;
4128 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4129 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4130 aligned. */
4132 bool
4133 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4135 HOST_WIDE_INT addend;
4136 rtx symref;
4138 /* The "required alignment" might be 0 (e.g. for certain structs
4139 accessed via BLKmode). Early abort in this case, as well as when
4140 an alignment > 8 is required. */
4141 if (alignment < 2 || alignment > 8)
4142 return false;
4144 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4145 return false;
4147 if (addend & (alignment - 1))
4148 return false;
4150 if (GET_CODE (symref) == SYMBOL_REF)
4152 /* We have load-relative instructions for 2-byte, 4-byte, and
4153 8-byte alignment so allow only these. */
4154 switch (alignment)
4156 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4157 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4158 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4159 default: return false;
4163 if (GET_CODE (symref) == UNSPEC
4164 && alignment <= UNITS_PER_LONG)
4165 return true;
4167 return false;
4170 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4171 operand SCRATCH is used to reload the even part of the address and
4172 adding one. */
4174 void
4175 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4177 HOST_WIDE_INT addend;
4178 rtx symref;
4180 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4181 gcc_unreachable ();
4183 if (!(addend & 1))
4184 /* Easy case. The addend is even so larl will do fine. */
4185 emit_move_insn (reg, addr);
4186 else
4188 /* We can leave the scratch register untouched if the target
4189 register is a valid base register. */
4190 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4191 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4192 scratch = reg;
4194 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4195 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4197 if (addend != 1)
4198 emit_move_insn (scratch,
4199 gen_rtx_CONST (Pmode,
4200 gen_rtx_PLUS (Pmode, symref,
4201 GEN_INT (addend - 1))));
4202 else
4203 emit_move_insn (scratch, symref);
4205 /* Increment the address using la in order to avoid clobbering cc. */
4206 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4210 /* Generate what is necessary to move between REG and MEM using
4211 SCRATCH. The direction is given by TOMEM. */
4213 void
4214 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4216 /* Reload might have pulled a constant out of the literal pool.
4217 Force it back in. */
4218 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4219 || GET_CODE (mem) == CONST_WIDE_INT
4220 || GET_CODE (mem) == CONST_VECTOR
4221 || GET_CODE (mem) == CONST)
4222 mem = force_const_mem (GET_MODE (reg), mem);
4224 gcc_assert (MEM_P (mem));
4226 /* For a load from memory we can leave the scratch register
4227 untouched if the target register is a valid base register. */
4228 if (!tomem
4229 && REGNO (reg) < FIRST_PSEUDO_REGISTER
4230 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4231 && GET_MODE (reg) == GET_MODE (scratch))
4232 scratch = reg;
4234 /* Load address into scratch register. Since we can't have a
4235 secondary reload for a secondary reload we have to cover the case
4236 where larl would need a secondary reload here as well. */
4237 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4239 /* Now we can use a standard load/store to do the move. */
4240 if (tomem)
4241 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4242 else
4243 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4246 /* Inform reload about cases where moving X with a mode MODE to a register in
4247 RCLASS requires an extra scratch or immediate register. Return the class
4248 needed for the immediate register. */
4250 static reg_class_t
4251 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4252 machine_mode mode, secondary_reload_info *sri)
4254 enum reg_class rclass = (enum reg_class) rclass_i;
4256 /* Intermediate register needed. */
4257 if (reg_classes_intersect_p (CC_REGS, rclass))
4258 return GENERAL_REGS;
4260 if (TARGET_VX)
4262 /* The vst/vl vector move instructions allow only for short
4263 displacements. */
4264 if (MEM_P (x)
4265 && GET_CODE (XEXP (x, 0)) == PLUS
4266 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4267 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4268 && reg_class_subset_p (rclass, VEC_REGS)
4269 && (!reg_class_subset_p (rclass, FP_REGS)
4270 || (GET_MODE_SIZE (mode) > 8
4271 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4273 if (in_p)
4274 sri->icode = (TARGET_64BIT ?
4275 CODE_FOR_reloaddi_la_in :
4276 CODE_FOR_reloadsi_la_in);
4277 else
4278 sri->icode = (TARGET_64BIT ?
4279 CODE_FOR_reloaddi_la_out :
4280 CODE_FOR_reloadsi_la_out);
4284 if (TARGET_Z10)
4286 HOST_WIDE_INT offset;
4287 rtx symref;
4289 /* On z10 several optimizer steps may generate larl operands with
4290 an odd addend. */
4291 if (in_p
4292 && s390_loadrelative_operand_p (x, &symref, &offset)
4293 && mode == Pmode
4294 && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4295 && (offset & 1) == 1)
4296 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4297 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4299 /* Handle all the (mem (symref)) accesses we cannot use the z10
4300 instructions for. */
4301 if (MEM_P (x)
4302 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4303 && (mode == QImode
4304 || !reg_class_subset_p (rclass, GENERAL_REGS)
4305 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4306 || !s390_check_symref_alignment (XEXP (x, 0),
4307 GET_MODE_SIZE (mode))))
4309 #define __SECONDARY_RELOAD_CASE(M,m) \
4310 case E_##M##mode: \
4311 if (TARGET_64BIT) \
4312 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4313 CODE_FOR_reload##m##di_tomem_z10; \
4314 else \
4315 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4316 CODE_FOR_reload##m##si_tomem_z10; \
4317 break;
4319 switch (GET_MODE (x))
4321 __SECONDARY_RELOAD_CASE (QI, qi);
4322 __SECONDARY_RELOAD_CASE (HI, hi);
4323 __SECONDARY_RELOAD_CASE (SI, si);
4324 __SECONDARY_RELOAD_CASE (DI, di);
4325 __SECONDARY_RELOAD_CASE (TI, ti);
4326 __SECONDARY_RELOAD_CASE (SF, sf);
4327 __SECONDARY_RELOAD_CASE (DF, df);
4328 __SECONDARY_RELOAD_CASE (TF, tf);
4329 __SECONDARY_RELOAD_CASE (SD, sd);
4330 __SECONDARY_RELOAD_CASE (DD, dd);
4331 __SECONDARY_RELOAD_CASE (TD, td);
4332 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4333 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4334 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4335 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4336 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4337 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4338 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4339 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4340 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4341 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4342 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4343 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4344 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4345 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4346 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4347 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4348 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4349 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4350 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4351 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4352 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4353 default:
4354 gcc_unreachable ();
4356 #undef __SECONDARY_RELOAD_CASE
4360 /* We need a scratch register when loading a PLUS expression which
4361 is not a legitimate operand of the LOAD ADDRESS instruction. */
4362 /* LRA can deal with transformation of plus op very well -- so we
4363 don't need to prompt LRA in this case. */
4364 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4365 sri->icode = (TARGET_64BIT ?
4366 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4368 /* Performing a multiword move from or to memory we have to make sure the
4369 second chunk in memory is addressable without causing a displacement
4370 overflow. If that would be the case we calculate the address in
4371 a scratch register. */
4372 if (MEM_P (x)
4373 && GET_CODE (XEXP (x, 0)) == PLUS
4374 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4375 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4376 + GET_MODE_SIZE (mode) - 1))
4378 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4379 in a s_operand address since we may fallback to lm/stm. So we only
4380 have to care about overflows in the b+i+d case. */
4381 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4382 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4383 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4384 /* For FP_REGS no lm/stm is available so this check is triggered
4385 for displacement overflows in b+i+d and b+d like addresses. */
4386 || (reg_classes_intersect_p (FP_REGS, rclass)
4387 && s390_class_max_nregs (FP_REGS, mode) > 1))
4389 if (in_p)
4390 sri->icode = (TARGET_64BIT ?
4391 CODE_FOR_reloaddi_la_in :
4392 CODE_FOR_reloadsi_la_in);
4393 else
4394 sri->icode = (TARGET_64BIT ?
4395 CODE_FOR_reloaddi_la_out :
4396 CODE_FOR_reloadsi_la_out);
4400 /* A scratch address register is needed when a symbolic constant is
4401 copied to r0 compiling with -fPIC. In other cases the target
4402 register might be used as temporary (see legitimize_pic_address). */
4403 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4404 sri->icode = (TARGET_64BIT ?
4405 CODE_FOR_reloaddi_PIC_addr :
4406 CODE_FOR_reloadsi_PIC_addr);
4408 /* Either scratch or no register needed. */
4409 return NO_REGS;
4412 /* Generate code to load SRC, which is PLUS that is not a
4413 legitimate operand for the LA instruction, into TARGET.
4414 SCRATCH may be used as scratch register. */
4416 void
4417 s390_expand_plus_operand (rtx target, rtx src,
4418 rtx scratch)
4420 rtx sum1, sum2;
4421 struct s390_address ad;
4423 /* src must be a PLUS; get its two operands. */
4424 gcc_assert (GET_CODE (src) == PLUS);
4425 gcc_assert (GET_MODE (src) == Pmode);
4427 /* Check if any of the two operands is already scheduled
4428 for replacement by reload. This can happen e.g. when
4429 float registers occur in an address. */
4430 sum1 = find_replacement (&XEXP (src, 0));
4431 sum2 = find_replacement (&XEXP (src, 1));
4432 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4434 /* If the address is already strictly valid, there's nothing to do. */
4435 if (!s390_decompose_address (src, &ad)
4436 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4437 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4439 /* Otherwise, one of the operands cannot be an address register;
4440 we reload its value into the scratch register. */
4441 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4443 emit_move_insn (scratch, sum1);
4444 sum1 = scratch;
4446 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4448 emit_move_insn (scratch, sum2);
4449 sum2 = scratch;
4452 /* According to the way these invalid addresses are generated
4453 in reload.c, it should never happen (at least on s390) that
4454 *neither* of the PLUS components, after find_replacements
4455 was applied, is an address register. */
4456 if (sum1 == scratch && sum2 == scratch)
4458 debug_rtx (src);
4459 gcc_unreachable ();
4462 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4465 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4466 is only ever performed on addresses, so we can mark the
4467 sum as legitimate for LA in any case. */
4468 s390_load_address (target, src);
4472 /* Return true if ADDR is a valid memory address.
4473 STRICT specifies whether strict register checking applies. */
4475 static bool
4476 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4478 struct s390_address ad;
4480 if (TARGET_Z10
4481 && larl_operand (addr, VOIDmode)
4482 && (mode == VOIDmode
4483 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4484 return true;
4486 if (!s390_decompose_address (addr, &ad))
4487 return false;
4489 if (strict)
4491 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4492 return false;
4494 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4495 return false;
4497 else
4499 if (ad.base
4500 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4501 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4502 return false;
4504 if (ad.indx
4505 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4506 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4507 return false;
4509 return true;
4512 /* Return true if OP is a valid operand for the LA instruction.
4513 In 31-bit, we need to prove that the result is used as an
4514 address, as LA performs only a 31-bit addition. */
4516 bool
4517 legitimate_la_operand_p (rtx op)
4519 struct s390_address addr;
4520 if (!s390_decompose_address (op, &addr))
4521 return false;
4523 return (TARGET_64BIT || addr.pointer);
4526 /* Return true if it is valid *and* preferable to use LA to
4527 compute the sum of OP1 and OP2. */
4529 bool
4530 preferred_la_operand_p (rtx op1, rtx op2)
4532 struct s390_address addr;
4534 if (op2 != const0_rtx)
4535 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4537 if (!s390_decompose_address (op1, &addr))
4538 return false;
4539 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4540 return false;
4541 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4542 return false;
4544 /* Avoid LA instructions with index register on z196; it is
4545 preferable to use regular add instructions when possible.
4546 Starting with zEC12 the la with index register is "uncracked"
4547 again. */
4548 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
4549 return false;
4551 if (!TARGET_64BIT && !addr.pointer)
4552 return false;
4554 if (addr.pointer)
4555 return true;
4557 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4558 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4559 return true;
4561 return false;
4564 /* Emit a forced load-address operation to load SRC into DST.
4565 This will use the LOAD ADDRESS instruction even in situations
4566 where legitimate_la_operand_p (SRC) returns false. */
4568 void
4569 s390_load_address (rtx dst, rtx src)
4571 if (TARGET_64BIT)
4572 emit_move_insn (dst, src);
4573 else
4574 emit_insn (gen_force_la_31 (dst, src));
4577 /* Return true if it ok to use SYMBOL_REF in a relative address. */
4579 bool
4580 s390_rel_address_ok_p (rtx symbol_ref)
4582 tree decl;
4584 if (symbol_ref == s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref))
4585 return true;
4587 decl = SYMBOL_REF_DECL (symbol_ref);
4589 if (!flag_pic || SYMBOL_REF_LOCAL_P (symbol_ref))
4590 return (s390_pic_data_is_text_relative
4591 || (decl
4592 && TREE_CODE (decl) == FUNCTION_DECL));
4594 return false;
4597 /* Return a legitimate reference for ORIG (an address) using the
4598 register REG. If REG is 0, a new pseudo is generated.
4600 There are two types of references that must be handled:
4602 1. Global data references must load the address from the GOT, via
4603 the PIC reg. An insn is emitted to do this load, and the reg is
4604 returned.
4606 2. Static data references, constant pool addresses, and code labels
4607 compute the address as an offset from the GOT, whose base is in
4608 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4609 differentiate them from global data objects. The returned
4610 address is the PIC reg + an unspec constant.
4612 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4613 reg also appears in the address. */
4616 legitimize_pic_address (rtx orig, rtx reg)
4618 rtx addr = orig;
4619 rtx addend = const0_rtx;
4620 rtx new_rtx = orig;
4622 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4624 if (GET_CODE (addr) == CONST)
4625 addr = XEXP (addr, 0);
4627 if (GET_CODE (addr) == PLUS)
4629 addend = XEXP (addr, 1);
4630 addr = XEXP (addr, 0);
4633 if ((GET_CODE (addr) == LABEL_REF
4634 || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (addr))
4635 || (GET_CODE (addr) == UNSPEC &&
4636 (XINT (addr, 1) == UNSPEC_GOTENT
4637 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
4638 && GET_CODE (addend) == CONST_INT)
4640 /* This can be locally addressed. */
4642 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4643 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4644 gen_rtx_CONST (Pmode, addr) : addr);
4646 if (TARGET_CPU_ZARCH
4647 && larl_operand (const_addr, VOIDmode)
4648 && INTVAL (addend) < HOST_WIDE_INT_1 << 31
4649 && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31))
4651 if (INTVAL (addend) & 1)
4653 /* LARL can't handle odd offsets, so emit a pair of LARL
4654 and LA. */
4655 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4657 if (!DISP_IN_RANGE (INTVAL (addend)))
4659 HOST_WIDE_INT even = INTVAL (addend) - 1;
4660 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4661 addr = gen_rtx_CONST (Pmode, addr);
4662 addend = const1_rtx;
4665 emit_move_insn (temp, addr);
4666 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4668 if (reg != 0)
4670 s390_load_address (reg, new_rtx);
4671 new_rtx = reg;
4674 else
4676 /* If the offset is even, we can just use LARL. This
4677 will happen automatically. */
4680 else
4682 /* No larl - Access local symbols relative to the GOT. */
4684 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4686 if (reload_in_progress || reload_completed)
4687 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4689 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4690 if (addend != const0_rtx)
4691 addr = gen_rtx_PLUS (Pmode, addr, addend);
4692 addr = gen_rtx_CONST (Pmode, addr);
4693 addr = force_const_mem (Pmode, addr);
4694 emit_move_insn (temp, addr);
4696 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4697 if (reg != 0)
4699 s390_load_address (reg, new_rtx);
4700 new_rtx = reg;
4704 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4706 /* A non-local symbol reference without addend.
4708 The symbol ref is wrapped into an UNSPEC to make sure the
4709 proper operand modifier (@GOT or @GOTENT) will be emitted.
4710 This will tell the linker to put the symbol into the GOT.
4712 Additionally the code dereferencing the GOT slot is emitted here.
4714 An addend to the symref needs to be added afterwards.
4715 legitimize_pic_address calls itself recursively to handle
4716 that case. So no need to do it here. */
4718 if (reg == 0)
4719 reg = gen_reg_rtx (Pmode);
4721 if (TARGET_Z10)
4723 /* Use load relative if possible.
4724 lgrl <target>, sym@GOTENT */
4725 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4726 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4727 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4729 emit_move_insn (reg, new_rtx);
4730 new_rtx = reg;
4732 else if (flag_pic == 1)
4734 /* Assume GOT offset is a valid displacement operand (< 4k
4735 or < 512k with z990). This is handled the same way in
4736 both 31- and 64-bit code (@GOT).
4737 lg <target>, sym@GOT(r12) */
4739 if (reload_in_progress || reload_completed)
4740 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4742 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4743 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4744 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4745 new_rtx = gen_const_mem (Pmode, new_rtx);
4746 emit_move_insn (reg, new_rtx);
4747 new_rtx = reg;
4749 else if (TARGET_CPU_ZARCH)
4751 /* If the GOT offset might be >= 4k, we determine the position
4752 of the GOT entry via a PC-relative LARL (@GOTENT).
4753 larl temp, sym@GOTENT
4754 lg <target>, 0(temp) */
4756 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4758 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4759 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4761 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4762 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4763 emit_move_insn (temp, new_rtx);
4765 new_rtx = gen_const_mem (Pmode, temp);
4766 emit_move_insn (reg, new_rtx);
4768 new_rtx = reg;
4770 else
4772 /* If the GOT offset might be >= 4k, we have to load it
4773 from the literal pool (@GOT).
4775 lg temp, lit-litbase(r13)
4776 lg <target>, 0(temp)
4777 lit: .long sym@GOT */
4779 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4781 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4782 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4784 if (reload_in_progress || reload_completed)
4785 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4787 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4788 addr = gen_rtx_CONST (Pmode, addr);
4789 addr = force_const_mem (Pmode, addr);
4790 emit_move_insn (temp, addr);
4792 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4793 new_rtx = gen_const_mem (Pmode, new_rtx);
4794 emit_move_insn (reg, new_rtx);
4795 new_rtx = reg;
4798 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
4800 gcc_assert (XVECLEN (addr, 0) == 1);
4801 switch (XINT (addr, 1))
4803 /* These address symbols (or PLT slots) relative to the GOT
4804 (not GOT slots!). In general this will exceed the
4805 displacement range so these value belong into the literal
4806 pool. */
4807 case UNSPEC_GOTOFF:
4808 case UNSPEC_PLTOFF:
4809 new_rtx = force_const_mem (Pmode, orig);
4810 break;
4812 /* For -fPIC the GOT size might exceed the displacement
4813 range so make sure the value is in the literal pool. */
4814 case UNSPEC_GOT:
4815 if (flag_pic == 2)
4816 new_rtx = force_const_mem (Pmode, orig);
4817 break;
4819 /* For @GOTENT larl is used. This is handled like local
4820 symbol refs. */
4821 case UNSPEC_GOTENT:
4822 gcc_unreachable ();
4823 break;
4825 /* @PLT is OK as is on 64-bit, must be converted to
4826 GOT-relative @PLTOFF on 31-bit. */
4827 case UNSPEC_PLT:
4828 if (!TARGET_CPU_ZARCH)
4830 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4832 if (reload_in_progress || reload_completed)
4833 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4835 addr = XVECEXP (addr, 0, 0);
4836 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
4837 UNSPEC_PLTOFF);
4838 if (addend != const0_rtx)
4839 addr = gen_rtx_PLUS (Pmode, addr, addend);
4840 addr = gen_rtx_CONST (Pmode, addr);
4841 addr = force_const_mem (Pmode, addr);
4842 emit_move_insn (temp, addr);
4844 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4845 if (reg != 0)
4847 s390_load_address (reg, new_rtx);
4848 new_rtx = reg;
4851 else
4852 /* On 64 bit larl can be used. This case is handled like
4853 local symbol refs. */
4854 gcc_unreachable ();
4855 break;
4857 /* Everything else cannot happen. */
4858 default:
4859 gcc_unreachable ();
4862 else if (addend != const0_rtx)
4864 /* Otherwise, compute the sum. */
4866 rtx base = legitimize_pic_address (addr, reg);
4867 new_rtx = legitimize_pic_address (addend,
4868 base == reg ? NULL_RTX : reg);
4869 if (GET_CODE (new_rtx) == CONST_INT)
4870 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
4871 else
4873 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
4875 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
4876 new_rtx = XEXP (new_rtx, 1);
4878 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
4881 if (GET_CODE (new_rtx) == CONST)
4882 new_rtx = XEXP (new_rtx, 0);
4883 new_rtx = force_operand (new_rtx, 0);
4886 return new_rtx;
4889 /* Load the thread pointer into a register. */
4892 s390_get_thread_pointer (void)
4894 rtx tp = gen_reg_rtx (Pmode);
4896 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
4897 mark_reg_pointer (tp, BITS_PER_WORD);
4899 return tp;
4902 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
4903 in s390_tls_symbol which always refers to __tls_get_offset.
4904 The returned offset is written to RESULT_REG and an USE rtx is
4905 generated for TLS_CALL. */
4907 static GTY(()) rtx s390_tls_symbol;
4909 static void
4910 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
4912 rtx insn;
4914 if (!flag_pic)
4915 emit_insn (s390_load_got ());
4917 if (!s390_tls_symbol)
4918 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
4920 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
4921 gen_rtx_REG (Pmode, RETURN_REGNUM));
4923 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
4924 RTL_CONST_CALL_P (insn) = 1;
4927 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4928 this (thread-local) address. REG may be used as temporary. */
4930 static rtx
4931 legitimize_tls_address (rtx addr, rtx reg)
4933 rtx new_rtx, tls_call, temp, base, r2;
4934 rtx_insn *insn;
4936 if (GET_CODE (addr) == SYMBOL_REF)
4937 switch (tls_symbolic_operand (addr))
4939 case TLS_MODEL_GLOBAL_DYNAMIC:
4940 start_sequence ();
4941 r2 = gen_rtx_REG (Pmode, 2);
4942 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
4943 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4944 new_rtx = force_const_mem (Pmode, new_rtx);
4945 emit_move_insn (r2, new_rtx);
4946 s390_emit_tls_call_insn (r2, tls_call);
4947 insn = get_insns ();
4948 end_sequence ();
4950 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4951 temp = gen_reg_rtx (Pmode);
4952 emit_libcall_block (insn, temp, r2, new_rtx);
4954 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4955 if (reg != 0)
4957 s390_load_address (reg, new_rtx);
4958 new_rtx = reg;
4960 break;
4962 case TLS_MODEL_LOCAL_DYNAMIC:
4963 start_sequence ();
4964 r2 = gen_rtx_REG (Pmode, 2);
4965 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
4966 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4967 new_rtx = force_const_mem (Pmode, new_rtx);
4968 emit_move_insn (r2, new_rtx);
4969 s390_emit_tls_call_insn (r2, tls_call);
4970 insn = get_insns ();
4971 end_sequence ();
4973 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
4974 temp = gen_reg_rtx (Pmode);
4975 emit_libcall_block (insn, temp, r2, new_rtx);
4977 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4978 base = gen_reg_rtx (Pmode);
4979 s390_load_address (base, new_rtx);
4981 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
4982 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4983 new_rtx = force_const_mem (Pmode, new_rtx);
4984 temp = gen_reg_rtx (Pmode);
4985 emit_move_insn (temp, new_rtx);
4987 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
4988 if (reg != 0)
4990 s390_load_address (reg, new_rtx);
4991 new_rtx = reg;
4993 break;
4995 case TLS_MODEL_INITIAL_EXEC:
4996 if (flag_pic == 1)
4998 /* Assume GOT offset < 4k. This is handled the same way
4999 in both 31- and 64-bit code. */
5001 if (reload_in_progress || reload_completed)
5002 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5004 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5005 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5006 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5007 new_rtx = gen_const_mem (Pmode, new_rtx);
5008 temp = gen_reg_rtx (Pmode);
5009 emit_move_insn (temp, new_rtx);
5011 else if (TARGET_CPU_ZARCH)
5013 /* If the GOT offset might be >= 4k, we determine the position
5014 of the GOT entry via a PC-relative LARL. */
5016 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5017 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5018 temp = gen_reg_rtx (Pmode);
5019 emit_move_insn (temp, new_rtx);
5021 new_rtx = gen_const_mem (Pmode, temp);
5022 temp = gen_reg_rtx (Pmode);
5023 emit_move_insn (temp, new_rtx);
5025 else if (flag_pic)
5027 /* If the GOT offset might be >= 4k, we have to load it
5028 from the literal pool. */
5030 if (reload_in_progress || reload_completed)
5031 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5033 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5034 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5035 new_rtx = force_const_mem (Pmode, new_rtx);
5036 temp = gen_reg_rtx (Pmode);
5037 emit_move_insn (temp, new_rtx);
5039 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
5040 new_rtx = gen_const_mem (Pmode, new_rtx);
5042 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
5043 temp = gen_reg_rtx (Pmode);
5044 emit_insn (gen_rtx_SET (temp, new_rtx));
5046 else
5048 /* In position-dependent code, load the absolute address of
5049 the GOT entry from the literal pool. */
5051 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5052 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5053 new_rtx = force_const_mem (Pmode, new_rtx);
5054 temp = gen_reg_rtx (Pmode);
5055 emit_move_insn (temp, new_rtx);
5057 new_rtx = temp;
5058 new_rtx = gen_const_mem (Pmode, new_rtx);
5059 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
5060 temp = gen_reg_rtx (Pmode);
5061 emit_insn (gen_rtx_SET (temp, new_rtx));
5064 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5065 if (reg != 0)
5067 s390_load_address (reg, new_rtx);
5068 new_rtx = reg;
5070 break;
5072 case TLS_MODEL_LOCAL_EXEC:
5073 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5074 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5075 new_rtx = force_const_mem (Pmode, new_rtx);
5076 temp = gen_reg_rtx (Pmode);
5077 emit_move_insn (temp, new_rtx);
5079 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5080 if (reg != 0)
5082 s390_load_address (reg, new_rtx);
5083 new_rtx = reg;
5085 break;
5087 default:
5088 gcc_unreachable ();
5091 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
5093 switch (XINT (XEXP (addr, 0), 1))
5095 case UNSPEC_INDNTPOFF:
5096 gcc_assert (TARGET_CPU_ZARCH);
5097 new_rtx = addr;
5098 break;
5100 default:
5101 gcc_unreachable ();
5105 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5106 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5108 new_rtx = XEXP (XEXP (addr, 0), 0);
5109 if (GET_CODE (new_rtx) != SYMBOL_REF)
5110 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5112 new_rtx = legitimize_tls_address (new_rtx, reg);
5113 new_rtx = plus_constant (Pmode, new_rtx,
5114 INTVAL (XEXP (XEXP (addr, 0), 1)));
5115 new_rtx = force_operand (new_rtx, 0);
5118 else
5119 gcc_unreachable (); /* for now ... */
5121 return new_rtx;
5124 /* Emit insns making the address in operands[1] valid for a standard
5125 move to operands[0]. operands[1] is replaced by an address which
5126 should be used instead of the former RTX to emit the move
5127 pattern. */
5129 void
5130 emit_symbolic_move (rtx *operands)
5132 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5134 if (GET_CODE (operands[0]) == MEM)
5135 operands[1] = force_reg (Pmode, operands[1]);
5136 else if (TLS_SYMBOLIC_CONST (operands[1]))
5137 operands[1] = legitimize_tls_address (operands[1], temp);
5138 else if (flag_pic)
5139 operands[1] = legitimize_pic_address (operands[1], temp);
5142 /* Try machine-dependent ways of modifying an illegitimate address X
5143 to be legitimate. If we find one, return the new, valid address.
5145 OLDX is the address as it was before break_out_memory_refs was called.
5146 In some cases it is useful to look at this to decide what needs to be done.
5148 MODE is the mode of the operand pointed to by X.
5150 When -fpic is used, special handling is needed for symbolic references.
5151 See comments by legitimize_pic_address for details. */
5153 static rtx
5154 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5155 machine_mode mode ATTRIBUTE_UNUSED)
5157 rtx constant_term = const0_rtx;
5159 if (TLS_SYMBOLIC_CONST (x))
5161 x = legitimize_tls_address (x, 0);
5163 if (s390_legitimate_address_p (mode, x, FALSE))
5164 return x;
5166 else if (GET_CODE (x) == PLUS
5167 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5168 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5170 return x;
5172 else if (flag_pic)
5174 if (SYMBOLIC_CONST (x)
5175 || (GET_CODE (x) == PLUS
5176 && (SYMBOLIC_CONST (XEXP (x, 0))
5177 || SYMBOLIC_CONST (XEXP (x, 1)))))
5178 x = legitimize_pic_address (x, 0);
5180 if (s390_legitimate_address_p (mode, x, FALSE))
5181 return x;
5184 x = eliminate_constant_term (x, &constant_term);
5186 /* Optimize loading of large displacements by splitting them
5187 into the multiple of 4K and the rest; this allows the
5188 former to be CSE'd if possible.
5190 Don't do this if the displacement is added to a register
5191 pointing into the stack frame, as the offsets will
5192 change later anyway. */
5194 if (GET_CODE (constant_term) == CONST_INT
5195 && !TARGET_LONG_DISPLACEMENT
5196 && !DISP_IN_RANGE (INTVAL (constant_term))
5197 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5199 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5200 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5202 rtx temp = gen_reg_rtx (Pmode);
5203 rtx val = force_operand (GEN_INT (upper), temp);
5204 if (val != temp)
5205 emit_move_insn (temp, val);
5207 x = gen_rtx_PLUS (Pmode, x, temp);
5208 constant_term = GEN_INT (lower);
5211 if (GET_CODE (x) == PLUS)
5213 if (GET_CODE (XEXP (x, 0)) == REG)
5215 rtx temp = gen_reg_rtx (Pmode);
5216 rtx val = force_operand (XEXP (x, 1), temp);
5217 if (val != temp)
5218 emit_move_insn (temp, val);
5220 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5223 else if (GET_CODE (XEXP (x, 1)) == REG)
5225 rtx temp = gen_reg_rtx (Pmode);
5226 rtx val = force_operand (XEXP (x, 0), temp);
5227 if (val != temp)
5228 emit_move_insn (temp, val);
5230 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5234 if (constant_term != const0_rtx)
5235 x = gen_rtx_PLUS (Pmode, x, constant_term);
5237 return x;
5240 /* Try a machine-dependent way of reloading an illegitimate address AD
5241 operand. If we find one, push the reload and return the new address.
5243 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5244 and TYPE is the reload type of the current reload. */
5247 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5248 int opnum, int type)
5250 if (!optimize || TARGET_LONG_DISPLACEMENT)
5251 return NULL_RTX;
5253 if (GET_CODE (ad) == PLUS)
5255 rtx tem = simplify_binary_operation (PLUS, Pmode,
5256 XEXP (ad, 0), XEXP (ad, 1));
5257 if (tem)
5258 ad = tem;
5261 if (GET_CODE (ad) == PLUS
5262 && GET_CODE (XEXP (ad, 0)) == REG
5263 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5264 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5266 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5267 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5268 rtx cst, tem, new_rtx;
5270 cst = GEN_INT (upper);
5271 if (!legitimate_reload_constant_p (cst))
5272 cst = force_const_mem (Pmode, cst);
5274 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5275 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5277 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5278 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5279 opnum, (enum reload_type) type);
5280 return new_rtx;
5283 return NULL_RTX;
5286 /* Emit code to move LEN bytes from DST to SRC. */
5288 bool
5289 s390_expand_movmem (rtx dst, rtx src, rtx len)
5291 /* When tuning for z10 or higher we rely on the Glibc functions to
5292 do the right thing. Only for constant lengths below 64k we will
5293 generate inline code. */
5294 if (s390_tune >= PROCESSOR_2097_Z10
5295 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5296 return false;
5298 /* Expand memcpy for constant length operands without a loop if it
5299 is shorter that way.
5301 With a constant length argument a
5302 memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
5303 if (GET_CODE (len) == CONST_INT
5304 && INTVAL (len) >= 0
5305 && INTVAL (len) <= 256 * 6
5306 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5308 HOST_WIDE_INT o, l;
5310 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5312 rtx newdst = adjust_address (dst, BLKmode, o);
5313 rtx newsrc = adjust_address (src, BLKmode, o);
5314 emit_insn (gen_movmem_short (newdst, newsrc,
5315 GEN_INT (l > 256 ? 255 : l - 1)));
5319 else if (TARGET_MVCLE)
5321 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5324 else
5326 rtx dst_addr, src_addr, count, blocks, temp;
5327 rtx_code_label *loop_start_label = gen_label_rtx ();
5328 rtx_code_label *loop_end_label = gen_label_rtx ();
5329 rtx_code_label *end_label = gen_label_rtx ();
5330 machine_mode mode;
5332 mode = GET_MODE (len);
5333 if (mode == VOIDmode)
5334 mode = Pmode;
5336 dst_addr = gen_reg_rtx (Pmode);
5337 src_addr = gen_reg_rtx (Pmode);
5338 count = gen_reg_rtx (mode);
5339 blocks = gen_reg_rtx (mode);
5341 convert_move (count, len, 1);
5342 emit_cmp_and_jump_insns (count, const0_rtx,
5343 EQ, NULL_RTX, mode, 1, end_label);
5345 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5346 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5347 dst = change_address (dst, VOIDmode, dst_addr);
5348 src = change_address (src, VOIDmode, src_addr);
5350 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5351 OPTAB_DIRECT);
5352 if (temp != count)
5353 emit_move_insn (count, temp);
5355 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5356 OPTAB_DIRECT);
5357 if (temp != blocks)
5358 emit_move_insn (blocks, temp);
5360 emit_cmp_and_jump_insns (blocks, const0_rtx,
5361 EQ, NULL_RTX, mode, 1, loop_end_label);
5363 emit_label (loop_start_label);
5365 if (TARGET_Z10
5366 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5368 rtx prefetch;
5370 /* Issue a read prefetch for the +3 cache line. */
5371 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5372 const0_rtx, const0_rtx);
5373 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5374 emit_insn (prefetch);
5376 /* Issue a write prefetch for the +3 cache line. */
5377 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5378 const1_rtx, const0_rtx);
5379 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5380 emit_insn (prefetch);
5383 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
5384 s390_load_address (dst_addr,
5385 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5386 s390_load_address (src_addr,
5387 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5389 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5390 OPTAB_DIRECT);
5391 if (temp != blocks)
5392 emit_move_insn (blocks, temp);
5394 emit_cmp_and_jump_insns (blocks, const0_rtx,
5395 EQ, NULL_RTX, mode, 1, loop_end_label);
5397 emit_jump (loop_start_label);
5398 emit_label (loop_end_label);
5400 emit_insn (gen_movmem_short (dst, src,
5401 convert_to_mode (Pmode, count, 1)));
5402 emit_label (end_label);
5404 return true;
5407 /* Emit code to set LEN bytes at DST to VAL.
5408 Make use of clrmem if VAL is zero. */
5410 void
5411 s390_expand_setmem (rtx dst, rtx len, rtx val)
5413 if (GET_CODE (len) == CONST_INT && INTVAL (len) <= 0)
5414 return;
5416 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5418 /* Expand setmem/clrmem for a constant length operand without a
5419 loop if it will be shorter that way.
5420 With a constant length and without pfd argument a
5421 clrmem loop is 32 bytes -> 5.3 * xc
5422 setmem loop is 36 bytes -> 3.6 * (mvi/stc + mvc) */
5423 if (GET_CODE (len) == CONST_INT
5424 && ((INTVAL (len) <= 256 * 5 && val == const0_rtx)
5425 || INTVAL (len) <= 257 * 3)
5426 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5428 HOST_WIDE_INT o, l;
5430 if (val == const0_rtx)
5431 /* clrmem: emit 256 byte blockwise XCs. */
5432 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5434 rtx newdst = adjust_address (dst, BLKmode, o);
5435 emit_insn (gen_clrmem_short (newdst,
5436 GEN_INT (l > 256 ? 255 : l - 1)));
5438 else
5439 /* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5440 setting first byte to val and using a 256 byte mvc with one
5441 byte overlap to propagate the byte. */
5442 for (l = INTVAL (len), o = 0; l > 0; l -= 257, o += 257)
5444 rtx newdst = adjust_address (dst, BLKmode, o);
5445 emit_move_insn (adjust_address (dst, QImode, o), val);
5446 if (l > 1)
5448 rtx newdstp1 = adjust_address (dst, BLKmode, o + 1);
5449 emit_insn (gen_movmem_short (newdstp1, newdst,
5450 GEN_INT (l > 257 ? 255 : l - 2)));
5455 else if (TARGET_MVCLE)
5457 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5458 if (TARGET_64BIT)
5459 emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5460 val));
5461 else
5462 emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5463 val));
5466 else
5468 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5469 rtx_code_label *loop_start_label = gen_label_rtx ();
5470 rtx_code_label *onebyte_end_label = gen_label_rtx ();
5471 rtx_code_label *zerobyte_end_label = gen_label_rtx ();
5472 rtx_code_label *restbyte_end_label = gen_label_rtx ();
5473 machine_mode mode;
5475 mode = GET_MODE (len);
5476 if (mode == VOIDmode)
5477 mode = Pmode;
5479 dst_addr = gen_reg_rtx (Pmode);
5480 count = gen_reg_rtx (mode);
5481 blocks = gen_reg_rtx (mode);
5483 convert_move (count, len, 1);
5484 emit_cmp_and_jump_insns (count, const0_rtx,
5485 EQ, NULL_RTX, mode, 1, zerobyte_end_label,
5486 profile_probability::very_unlikely ());
5488 /* We need to make a copy of the target address since memset is
5489 supposed to return it unmodified. We have to make it here
5490 already since the new reg is used at onebyte_end_label. */
5491 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5492 dst = change_address (dst, VOIDmode, dst_addr);
5494 if (val != const0_rtx)
5496 /* When using the overlapping mvc the original target
5497 address is only accessed as single byte entity (even by
5498 the mvc reading this value). */
5499 set_mem_size (dst, 1);
5500 dstp1 = adjust_address (dst, VOIDmode, 1);
5501 emit_cmp_and_jump_insns (count,
5502 const1_rtx, EQ, NULL_RTX, mode, 1,
5503 onebyte_end_label,
5504 profile_probability::very_unlikely ());
5507 /* There is one unconditional (mvi+mvc)/xc after the loop
5508 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5509 or one (xc) here leaves this number of bytes to be handled by
5510 it. */
5511 temp = expand_binop (mode, add_optab, count,
5512 val == const0_rtx ? constm1_rtx : GEN_INT (-2),
5513 count, 1, OPTAB_DIRECT);
5514 if (temp != count)
5515 emit_move_insn (count, temp);
5517 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5518 OPTAB_DIRECT);
5519 if (temp != blocks)
5520 emit_move_insn (blocks, temp);
5522 emit_cmp_and_jump_insns (blocks, const0_rtx,
5523 EQ, NULL_RTX, mode, 1, restbyte_end_label);
5525 emit_jump (loop_start_label);
5527 if (val != const0_rtx)
5529 /* The 1 byte != 0 special case. Not handled efficiently
5530 since we require two jumps for that. However, this
5531 should be very rare. */
5532 emit_label (onebyte_end_label);
5533 emit_move_insn (adjust_address (dst, QImode, 0), val);
5534 emit_jump (zerobyte_end_label);
5537 emit_label (loop_start_label);
5539 if (TARGET_Z10
5540 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
5542 /* Issue a write prefetch for the +4 cache line. */
5543 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
5544 GEN_INT (1024)),
5545 const1_rtx, const0_rtx);
5546 emit_insn (prefetch);
5547 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5550 if (val == const0_rtx)
5551 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5552 else
5554 /* Set the first byte in the block to the value and use an
5555 overlapping mvc for the block. */
5556 emit_move_insn (adjust_address (dst, QImode, 0), val);
5557 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (254)));
5559 s390_load_address (dst_addr,
5560 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5562 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5563 OPTAB_DIRECT);
5564 if (temp != blocks)
5565 emit_move_insn (blocks, temp);
5567 emit_cmp_and_jump_insns (blocks, const0_rtx,
5568 NE, NULL_RTX, mode, 1, loop_start_label);
5570 emit_label (restbyte_end_label);
5572 if (val == const0_rtx)
5573 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5574 else
5576 /* Set the first byte in the block to the value and use an
5577 overlapping mvc for the block. */
5578 emit_move_insn (adjust_address (dst, QImode, 0), val);
5579 /* execute only uses the lowest 8 bits of count that's
5580 exactly what we need here. */
5581 emit_insn (gen_movmem_short (dstp1, dst,
5582 convert_to_mode (Pmode, count, 1)));
5585 emit_label (zerobyte_end_label);
5589 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5590 and return the result in TARGET. */
5592 bool
5593 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5595 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5596 rtx tmp;
5598 /* When tuning for z10 or higher we rely on the Glibc functions to
5599 do the right thing. Only for constant lengths below 64k we will
5600 generate inline code. */
5601 if (s390_tune >= PROCESSOR_2097_Z10
5602 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5603 return false;
5605 /* As the result of CMPINT is inverted compared to what we need,
5606 we have to swap the operands. */
5607 tmp = op0; op0 = op1; op1 = tmp;
5609 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5611 if (INTVAL (len) > 0)
5613 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5614 emit_insn (gen_cmpint (target, ccreg));
5616 else
5617 emit_move_insn (target, const0_rtx);
5619 else if (TARGET_MVCLE)
5621 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5622 emit_insn (gen_cmpint (target, ccreg));
5624 else
5626 rtx addr0, addr1, count, blocks, temp;
5627 rtx_code_label *loop_start_label = gen_label_rtx ();
5628 rtx_code_label *loop_end_label = gen_label_rtx ();
5629 rtx_code_label *end_label = gen_label_rtx ();
5630 machine_mode mode;
5632 mode = GET_MODE (len);
5633 if (mode == VOIDmode)
5634 mode = Pmode;
5636 addr0 = gen_reg_rtx (Pmode);
5637 addr1 = gen_reg_rtx (Pmode);
5638 count = gen_reg_rtx (mode);
5639 blocks = gen_reg_rtx (mode);
5641 convert_move (count, len, 1);
5642 emit_cmp_and_jump_insns (count, const0_rtx,
5643 EQ, NULL_RTX, mode, 1, end_label);
5645 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5646 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5647 op0 = change_address (op0, VOIDmode, addr0);
5648 op1 = change_address (op1, VOIDmode, addr1);
5650 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5651 OPTAB_DIRECT);
5652 if (temp != count)
5653 emit_move_insn (count, temp);
5655 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5656 OPTAB_DIRECT);
5657 if (temp != blocks)
5658 emit_move_insn (blocks, temp);
5660 emit_cmp_and_jump_insns (blocks, const0_rtx,
5661 EQ, NULL_RTX, mode, 1, loop_end_label);
5663 emit_label (loop_start_label);
5665 if (TARGET_Z10
5666 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5668 rtx prefetch;
5670 /* Issue a read prefetch for the +2 cache line of operand 1. */
5671 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5672 const0_rtx, const0_rtx);
5673 emit_insn (prefetch);
5674 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5676 /* Issue a read prefetch for the +2 cache line of operand 2. */
5677 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5678 const0_rtx, const0_rtx);
5679 emit_insn (prefetch);
5680 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5683 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5684 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5685 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5686 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5687 temp = gen_rtx_SET (pc_rtx, temp);
5688 emit_jump_insn (temp);
5690 s390_load_address (addr0,
5691 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5692 s390_load_address (addr1,
5693 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5695 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5696 OPTAB_DIRECT);
5697 if (temp != blocks)
5698 emit_move_insn (blocks, temp);
5700 emit_cmp_and_jump_insns (blocks, const0_rtx,
5701 EQ, NULL_RTX, mode, 1, loop_end_label);
5703 emit_jump (loop_start_label);
5704 emit_label (loop_end_label);
5706 emit_insn (gen_cmpmem_short (op0, op1,
5707 convert_to_mode (Pmode, count, 1)));
5708 emit_label (end_label);
5710 emit_insn (gen_cmpint (target, ccreg));
5712 return true;
5715 /* Emit a conditional jump to LABEL for condition code mask MASK using
5716 comparsion operator COMPARISON. Return the emitted jump insn. */
5718 static rtx_insn *
5719 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5721 rtx temp;
5723 gcc_assert (comparison == EQ || comparison == NE);
5724 gcc_assert (mask > 0 && mask < 15);
5726 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5727 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5728 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5729 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5730 temp = gen_rtx_SET (pc_rtx, temp);
5731 return emit_jump_insn (temp);
5734 /* Emit the instructions to implement strlen of STRING and store the
5735 result in TARGET. The string has the known ALIGNMENT. This
5736 version uses vector instructions and is therefore not appropriate
5737 for targets prior to z13. */
5739 void
5740 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5742 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5743 rtx str_reg = gen_reg_rtx (V16QImode);
5744 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5745 rtx str_idx_reg = gen_reg_rtx (Pmode);
5746 rtx result_reg = gen_reg_rtx (V16QImode);
5747 rtx is_aligned_label = gen_label_rtx ();
5748 rtx into_loop_label = NULL_RTX;
5749 rtx loop_start_label = gen_label_rtx ();
5750 rtx temp;
5751 rtx len = gen_reg_rtx (QImode);
5752 rtx cond;
5754 s390_load_address (str_addr_base_reg, XEXP (string, 0));
5755 emit_move_insn (str_idx_reg, const0_rtx);
5757 if (INTVAL (alignment) < 16)
5759 /* Check whether the address happens to be aligned properly so
5760 jump directly to the aligned loop. */
5761 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5762 str_addr_base_reg, GEN_INT (15)),
5763 const0_rtx, EQ, NULL_RTX,
5764 Pmode, 1, is_aligned_label);
5766 temp = gen_reg_rtx (Pmode);
5767 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5768 GEN_INT (15), temp, 1, OPTAB_DIRECT);
5769 gcc_assert (REG_P (temp));
5770 highest_index_to_load_reg =
5771 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5772 highest_index_to_load_reg, 1, OPTAB_DIRECT);
5773 gcc_assert (REG_P (highest_index_to_load_reg));
5774 emit_insn (gen_vllv16qi (str_reg,
5775 convert_to_mode (SImode, highest_index_to_load_reg, 1),
5776 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5778 into_loop_label = gen_label_rtx ();
5779 s390_emit_jump (into_loop_label, NULL_RTX);
5780 emit_barrier ();
5783 emit_label (is_aligned_label);
5784 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5786 /* Reaching this point we are only performing 16 bytes aligned
5787 loads. */
5788 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5790 emit_label (loop_start_label);
5791 LABEL_NUSES (loop_start_label) = 1;
5793 /* Load 16 bytes of the string into VR. */
5794 emit_move_insn (str_reg,
5795 gen_rtx_MEM (V16QImode,
5796 gen_rtx_PLUS (Pmode, str_idx_reg,
5797 str_addr_base_reg)));
5798 if (into_loop_label != NULL_RTX)
5800 emit_label (into_loop_label);
5801 LABEL_NUSES (into_loop_label) = 1;
5804 /* Increment string index by 16 bytes. */
5805 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5806 str_idx_reg, 1, OPTAB_DIRECT);
5808 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5809 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5811 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5812 REG_BR_PROB,
5813 profile_probability::very_likely ().to_reg_br_prob_note ());
5814 emit_insn (gen_vec_extractv16qiqi (len, result_reg, GEN_INT (7)));
5816 /* If the string pointer wasn't aligned we have loaded less then 16
5817 bytes and the remaining bytes got filled with zeros (by vll).
5818 Now we have to check whether the resulting index lies within the
5819 bytes actually part of the string. */
5821 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5822 highest_index_to_load_reg);
5823 s390_load_address (highest_index_to_load_reg,
5824 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
5825 const1_rtx));
5826 if (TARGET_64BIT)
5827 emit_insn (gen_movdicc (str_idx_reg, cond,
5828 highest_index_to_load_reg, str_idx_reg));
5829 else
5830 emit_insn (gen_movsicc (str_idx_reg, cond,
5831 highest_index_to_load_reg, str_idx_reg));
5833 add_reg_br_prob_note (s390_emit_jump (is_aligned_label, cond),
5834 profile_probability::very_unlikely ());
5836 expand_binop (Pmode, add_optab, str_idx_reg,
5837 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
5838 /* FIXME: len is already zero extended - so avoid the llgcr emitted
5839 here. */
5840 temp = expand_binop (Pmode, add_optab, str_idx_reg,
5841 convert_to_mode (Pmode, len, 1),
5842 target, 1, OPTAB_DIRECT);
5843 if (temp != target)
5844 emit_move_insn (target, temp);
5847 void
5848 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
5850 rtx temp = gen_reg_rtx (Pmode);
5851 rtx src_addr = XEXP (src, 0);
5852 rtx dst_addr = XEXP (dst, 0);
5853 rtx src_addr_reg = gen_reg_rtx (Pmode);
5854 rtx dst_addr_reg = gen_reg_rtx (Pmode);
5855 rtx offset = gen_reg_rtx (Pmode);
5856 rtx vsrc = gen_reg_rtx (V16QImode);
5857 rtx vpos = gen_reg_rtx (V16QImode);
5858 rtx loadlen = gen_reg_rtx (SImode);
5859 rtx gpos_qi = gen_reg_rtx(QImode);
5860 rtx gpos = gen_reg_rtx (SImode);
5861 rtx done_label = gen_label_rtx ();
5862 rtx loop_label = gen_label_rtx ();
5863 rtx exit_label = gen_label_rtx ();
5864 rtx full_label = gen_label_rtx ();
5866 /* Perform a quick check for string ending on the first up to 16
5867 bytes and exit early if successful. */
5869 emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
5870 emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
5871 emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
5872 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
5873 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5874 /* gpos is the byte index if a zero was found and 16 otherwise.
5875 So if it is lower than the loaded bytes we have a hit. */
5876 emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
5877 full_label);
5878 emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
5880 force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
5881 1, OPTAB_DIRECT);
5882 emit_jump (exit_label);
5883 emit_barrier ();
5885 emit_label (full_label);
5886 LABEL_NUSES (full_label) = 1;
5888 /* Calculate `offset' so that src + offset points to the last byte
5889 before 16 byte alignment. */
5891 /* temp = src_addr & 0xf */
5892 force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
5893 1, OPTAB_DIRECT);
5895 /* offset = 0xf - temp */
5896 emit_move_insn (offset, GEN_INT (15));
5897 force_expand_binop (Pmode, sub_optab, offset, temp, offset,
5898 1, OPTAB_DIRECT);
5900 /* Store `offset' bytes in the dstination string. The quick check
5901 has loaded at least `offset' bytes into vsrc. */
5903 emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
5905 /* Advance to the next byte to be loaded. */
5906 force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
5907 1, OPTAB_DIRECT);
5909 /* Make sure the addresses are single regs which can be used as a
5910 base. */
5911 emit_move_insn (src_addr_reg, src_addr);
5912 emit_move_insn (dst_addr_reg, dst_addr);
5914 /* MAIN LOOP */
5916 emit_label (loop_label);
5917 LABEL_NUSES (loop_label) = 1;
5919 emit_move_insn (vsrc,
5920 gen_rtx_MEM (V16QImode,
5921 gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
5923 emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
5924 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5925 add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
5926 REG_BR_PROB, profile_probability::very_unlikely ()
5927 .to_reg_br_prob_note ());
5929 emit_move_insn (gen_rtx_MEM (V16QImode,
5930 gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
5931 vsrc);
5932 /* offset += 16 */
5933 force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
5934 offset, 1, OPTAB_DIRECT);
5936 emit_jump (loop_label);
5937 emit_barrier ();
5939 /* REGULAR EXIT */
5941 /* We are done. Add the offset of the zero character to the dst_addr
5942 pointer to get the result. */
5944 emit_label (done_label);
5945 LABEL_NUSES (done_label) = 1;
5947 force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
5948 1, OPTAB_DIRECT);
5950 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
5951 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5953 emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
5955 force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
5956 1, OPTAB_DIRECT);
5958 /* EARLY EXIT */
5960 emit_label (exit_label);
5961 LABEL_NUSES (exit_label) = 1;
5965 /* Expand conditional increment or decrement using alc/slb instructions.
5966 Should generate code setting DST to either SRC or SRC + INCREMENT,
5967 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
5968 Returns true if successful, false otherwise.
5970 That makes it possible to implement some if-constructs without jumps e.g.:
5971 (borrow = CC0 | CC1 and carry = CC2 | CC3)
5972 unsigned int a, b, c;
5973 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
5974 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
5975 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
5976 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
5978 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
5979 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
5980 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
5981 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
5982 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
5984 bool
5985 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
5986 rtx dst, rtx src, rtx increment)
5988 machine_mode cmp_mode;
5989 machine_mode cc_mode;
5990 rtx op_res;
5991 rtx insn;
5992 rtvec p;
5993 int ret;
5995 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
5996 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
5997 cmp_mode = SImode;
5998 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
5999 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
6000 cmp_mode = DImode;
6001 else
6002 return false;
6004 /* Try ADD LOGICAL WITH CARRY. */
6005 if (increment == const1_rtx)
6007 /* Determine CC mode to use. */
6008 if (cmp_code == EQ || cmp_code == NE)
6010 if (cmp_op1 != const0_rtx)
6012 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6013 NULL_RTX, 0, OPTAB_WIDEN);
6014 cmp_op1 = const0_rtx;
6017 cmp_code = cmp_code == EQ ? LEU : GTU;
6020 if (cmp_code == LTU || cmp_code == LEU)
6022 rtx tem = cmp_op0;
6023 cmp_op0 = cmp_op1;
6024 cmp_op1 = tem;
6025 cmp_code = swap_condition (cmp_code);
6028 switch (cmp_code)
6030 case GTU:
6031 cc_mode = CCUmode;
6032 break;
6034 case GEU:
6035 cc_mode = CCL3mode;
6036 break;
6038 default:
6039 return false;
6042 /* Emit comparison instruction pattern. */
6043 if (!register_operand (cmp_op0, cmp_mode))
6044 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6046 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6047 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6048 /* We use insn_invalid_p here to add clobbers if required. */
6049 ret = insn_invalid_p (emit_insn (insn), false);
6050 gcc_assert (!ret);
6052 /* Emit ALC instruction pattern. */
6053 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6054 gen_rtx_REG (cc_mode, CC_REGNUM),
6055 const0_rtx);
6057 if (src != const0_rtx)
6059 if (!register_operand (src, GET_MODE (dst)))
6060 src = force_reg (GET_MODE (dst), src);
6062 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
6063 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
6066 p = rtvec_alloc (2);
6067 RTVEC_ELT (p, 0) =
6068 gen_rtx_SET (dst, op_res);
6069 RTVEC_ELT (p, 1) =
6070 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6071 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6073 return true;
6076 /* Try SUBTRACT LOGICAL WITH BORROW. */
6077 if (increment == constm1_rtx)
6079 /* Determine CC mode to use. */
6080 if (cmp_code == EQ || cmp_code == NE)
6082 if (cmp_op1 != const0_rtx)
6084 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6085 NULL_RTX, 0, OPTAB_WIDEN);
6086 cmp_op1 = const0_rtx;
6089 cmp_code = cmp_code == EQ ? LEU : GTU;
6092 if (cmp_code == GTU || cmp_code == GEU)
6094 rtx tem = cmp_op0;
6095 cmp_op0 = cmp_op1;
6096 cmp_op1 = tem;
6097 cmp_code = swap_condition (cmp_code);
6100 switch (cmp_code)
6102 case LEU:
6103 cc_mode = CCUmode;
6104 break;
6106 case LTU:
6107 cc_mode = CCL3mode;
6108 break;
6110 default:
6111 return false;
6114 /* Emit comparison instruction pattern. */
6115 if (!register_operand (cmp_op0, cmp_mode))
6116 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6118 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6119 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6120 /* We use insn_invalid_p here to add clobbers if required. */
6121 ret = insn_invalid_p (emit_insn (insn), false);
6122 gcc_assert (!ret);
6124 /* Emit SLB instruction pattern. */
6125 if (!register_operand (src, GET_MODE (dst)))
6126 src = force_reg (GET_MODE (dst), src);
6128 op_res = gen_rtx_MINUS (GET_MODE (dst),
6129 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
6130 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6131 gen_rtx_REG (cc_mode, CC_REGNUM),
6132 const0_rtx));
6133 p = rtvec_alloc (2);
6134 RTVEC_ELT (p, 0) =
6135 gen_rtx_SET (dst, op_res);
6136 RTVEC_ELT (p, 1) =
6137 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6138 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6140 return true;
6143 return false;
6146 /* Expand code for the insv template. Return true if successful. */
6148 bool
6149 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6151 int bitsize = INTVAL (op1);
6152 int bitpos = INTVAL (op2);
6153 machine_mode mode = GET_MODE (dest);
6154 machine_mode smode;
6155 int smode_bsize, mode_bsize;
6156 rtx op, clobber;
6158 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6159 return false;
6161 /* Generate INSERT IMMEDIATE (IILL et al). */
6162 /* (set (ze (reg)) (const_int)). */
6163 if (TARGET_ZARCH
6164 && register_operand (dest, word_mode)
6165 && (bitpos % 16) == 0
6166 && (bitsize % 16) == 0
6167 && const_int_operand (src, VOIDmode))
6169 HOST_WIDE_INT val = INTVAL (src);
6170 int regpos = bitpos + bitsize;
6172 while (regpos > bitpos)
6174 machine_mode putmode;
6175 int putsize;
6177 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6178 putmode = SImode;
6179 else
6180 putmode = HImode;
6182 putsize = GET_MODE_BITSIZE (putmode);
6183 regpos -= putsize;
6184 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6185 GEN_INT (putsize),
6186 GEN_INT (regpos)),
6187 gen_int_mode (val, putmode));
6188 val >>= putsize;
6190 gcc_assert (regpos == bitpos);
6191 return true;
6194 smode = smallest_int_mode_for_size (bitsize);
6195 smode_bsize = GET_MODE_BITSIZE (smode);
6196 mode_bsize = GET_MODE_BITSIZE (mode);
6198 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6199 if (bitpos == 0
6200 && (bitsize % BITS_PER_UNIT) == 0
6201 && MEM_P (dest)
6202 && (register_operand (src, word_mode)
6203 || const_int_operand (src, VOIDmode)))
6205 /* Emit standard pattern if possible. */
6206 if (smode_bsize == bitsize)
6208 emit_move_insn (adjust_address (dest, smode, 0),
6209 gen_lowpart (smode, src));
6210 return true;
6213 /* (set (ze (mem)) (const_int)). */
6214 else if (const_int_operand (src, VOIDmode))
6216 int size = bitsize / BITS_PER_UNIT;
6217 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6218 BLKmode,
6219 UNITS_PER_WORD - size);
6221 dest = adjust_address (dest, BLKmode, 0);
6222 set_mem_size (dest, size);
6223 s390_expand_movmem (dest, src_mem, GEN_INT (size));
6224 return true;
6227 /* (set (ze (mem)) (reg)). */
6228 else if (register_operand (src, word_mode))
6230 if (bitsize <= 32)
6231 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6232 const0_rtx), src);
6233 else
6235 /* Emit st,stcmh sequence. */
6236 int stcmh_width = bitsize - 32;
6237 int size = stcmh_width / BITS_PER_UNIT;
6239 emit_move_insn (adjust_address (dest, SImode, size),
6240 gen_lowpart (SImode, src));
6241 set_mem_size (dest, size);
6242 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6243 GEN_INT (stcmh_width),
6244 const0_rtx),
6245 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6247 return true;
6251 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6252 if ((bitpos % BITS_PER_UNIT) == 0
6253 && (bitsize % BITS_PER_UNIT) == 0
6254 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6255 && MEM_P (src)
6256 && (mode == DImode || mode == SImode)
6257 && register_operand (dest, mode))
6259 /* Emit a strict_low_part pattern if possible. */
6260 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6262 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
6263 op = gen_rtx_SET (op, gen_lowpart (smode, src));
6264 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6265 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
6266 return true;
6269 /* ??? There are more powerful versions of ICM that are not
6270 completely represented in the md file. */
6273 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6274 if (TARGET_Z10 && (mode == DImode || mode == SImode))
6276 machine_mode mode_s = GET_MODE (src);
6278 if (CONSTANT_P (src))
6280 /* For constant zero values the representation with AND
6281 appears to be folded in more situations than the (set
6282 (zero_extract) ...).
6283 We only do this when the start and end of the bitfield
6284 remain in the same SImode chunk. That way nihf or nilf
6285 can be used.
6286 The AND patterns might still generate a risbg for this. */
6287 if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
6288 return false;
6289 else
6290 src = force_reg (mode, src);
6292 else if (mode_s != mode)
6294 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6295 src = force_reg (mode_s, src);
6296 src = gen_lowpart (mode, src);
6299 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6300 op = gen_rtx_SET (op, src);
6302 if (!TARGET_ZEC12)
6304 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6305 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6307 emit_insn (op);
6309 return true;
6312 return false;
6315 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6316 register that holds VAL of mode MODE shifted by COUNT bits. */
6318 static inline rtx
6319 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6321 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6322 NULL_RTX, 1, OPTAB_DIRECT);
6323 return expand_simple_binop (SImode, ASHIFT, val, count,
6324 NULL_RTX, 1, OPTAB_DIRECT);
6327 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6328 the result in TARGET. */
6330 void
6331 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6332 rtx cmp_op1, rtx cmp_op2)
6334 machine_mode mode = GET_MODE (target);
6335 bool neg_p = false, swap_p = false;
6336 rtx tmp;
6338 if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
6340 switch (cond)
6342 /* NE a != b -> !(a == b) */
6343 case NE: cond = EQ; neg_p = true; break;
6344 /* UNGT a u> b -> !(b >= a) */
6345 case UNGT: cond = GE; neg_p = true; swap_p = true; break;
6346 /* UNGE a u>= b -> !(b > a) */
6347 case UNGE: cond = GT; neg_p = true; swap_p = true; break;
6348 /* LE: a <= b -> b >= a */
6349 case LE: cond = GE; swap_p = true; break;
6350 /* UNLE: a u<= b -> !(a > b) */
6351 case UNLE: cond = GT; neg_p = true; break;
6352 /* LT: a < b -> b > a */
6353 case LT: cond = GT; swap_p = true; break;
6354 /* UNLT: a u< b -> !(a >= b) */
6355 case UNLT: cond = GE; neg_p = true; break;
6356 case UNEQ:
6357 emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2));
6358 return;
6359 case LTGT:
6360 emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2));
6361 return;
6362 case ORDERED:
6363 emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2));
6364 return;
6365 case UNORDERED:
6366 emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2));
6367 return;
6368 default: break;
6371 else
6373 switch (cond)
6375 /* NE: a != b -> !(a == b) */
6376 case NE: cond = EQ; neg_p = true; break;
6377 /* GE: a >= b -> !(b > a) */
6378 case GE: cond = GT; neg_p = true; swap_p = true; break;
6379 /* GEU: a >= b -> !(b > a) */
6380 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6381 /* LE: a <= b -> !(a > b) */
6382 case LE: cond = GT; neg_p = true; break;
6383 /* LEU: a <= b -> !(a > b) */
6384 case LEU: cond = GTU; neg_p = true; break;
6385 /* LT: a < b -> b > a */
6386 case LT: cond = GT; swap_p = true; break;
6387 /* LTU: a < b -> b > a */
6388 case LTU: cond = GTU; swap_p = true; break;
6389 default: break;
6393 if (swap_p)
6395 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6398 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6399 mode,
6400 cmp_op1, cmp_op2)));
6401 if (neg_p)
6402 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6405 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6406 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6407 elements in CMP1 and CMP2 fulfill the comparison.
6408 This function is only used to emit patterns for the vx builtins and
6409 therefore only handles comparison codes required by the
6410 builtins. */
6411 void
6412 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6413 rtx cmp1, rtx cmp2, bool all_p)
6415 machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
6416 rtx tmp_reg = gen_reg_rtx (SImode);
6417 bool swap_p = false;
6419 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6421 switch (code)
6423 case EQ:
6424 case NE:
6425 cc_producer_mode = CCVEQmode;
6426 break;
6427 case GE:
6428 case LT:
6429 code = swap_condition (code);
6430 swap_p = true;
6431 /* fallthrough */
6432 case GT:
6433 case LE:
6434 cc_producer_mode = CCVIHmode;
6435 break;
6436 case GEU:
6437 case LTU:
6438 code = swap_condition (code);
6439 swap_p = true;
6440 /* fallthrough */
6441 case GTU:
6442 case LEU:
6443 cc_producer_mode = CCVIHUmode;
6444 break;
6445 default:
6446 gcc_unreachable ();
6449 scratch_mode = GET_MODE (cmp1);
6450 /* These codes represent inverted CC interpretations. Inverting
6451 an ALL CC mode results in an ANY CC mode and the other way
6452 around. Invert the all_p flag here to compensate for
6453 that. */
6454 if (code == NE || code == LE || code == LEU)
6455 all_p = !all_p;
6457 cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
6459 else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
6461 bool inv_p = false;
6463 switch (code)
6465 case EQ: cc_producer_mode = CCVEQmode; break;
6466 case NE: cc_producer_mode = CCVEQmode; inv_p = true; break;
6467 case GT: cc_producer_mode = CCVFHmode; break;
6468 case GE: cc_producer_mode = CCVFHEmode; break;
6469 case UNLE: cc_producer_mode = CCVFHmode; inv_p = true; break;
6470 case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
6471 case LT: cc_producer_mode = CCVFHmode; code = GT; swap_p = true; break;
6472 case LE: cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
6473 default: gcc_unreachable ();
6475 scratch_mode = mode_for_int_vector (GET_MODE (cmp1)).require ();
6477 if (inv_p)
6478 all_p = !all_p;
6480 cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
6482 else
6483 gcc_unreachable ();
6485 if (swap_p)
6487 rtx tmp = cmp2;
6488 cmp2 = cmp1;
6489 cmp1 = tmp;
6492 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6493 gen_rtvec (2, gen_rtx_SET (
6494 gen_rtx_REG (cc_producer_mode, CC_REGNUM),
6495 gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
6496 gen_rtx_CLOBBER (VOIDmode,
6497 gen_rtx_SCRATCH (scratch_mode)))));
6498 emit_move_insn (target, const0_rtx);
6499 emit_move_insn (tmp_reg, const1_rtx);
6501 emit_move_insn (target,
6502 gen_rtx_IF_THEN_ELSE (SImode,
6503 gen_rtx_fmt_ee (code, VOIDmode,
6504 gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
6505 const0_rtx),
6506 tmp_reg, target));
6509 /* Invert the comparison CODE applied to a CC mode. This is only safe
6510 if we know whether there result was created by a floating point
6511 compare or not. For the CCV modes this is encoded as part of the
6512 mode. */
6513 enum rtx_code
6514 s390_reverse_condition (machine_mode mode, enum rtx_code code)
6516 /* Reversal of FP compares takes care -- an ordered compare
6517 becomes an unordered compare and vice versa. */
6518 if (mode == CCVFALLmode || mode == CCVFANYmode)
6519 return reverse_condition_maybe_unordered (code);
6520 else if (mode == CCVIALLmode || mode == CCVIANYmode)
6521 return reverse_condition (code);
6522 else
6523 gcc_unreachable ();
6526 /* Generate a vector comparison expression loading either elements of
6527 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6528 and CMP_OP2. */
6530 void
6531 s390_expand_vcond (rtx target, rtx then, rtx els,
6532 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6534 rtx tmp;
6535 machine_mode result_mode;
6536 rtx result_target;
6538 machine_mode target_mode = GET_MODE (target);
6539 machine_mode cmp_mode = GET_MODE (cmp_op1);
6540 rtx op = (cond == LT) ? els : then;
6542 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6543 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6544 for short and byte (x >> 15 and x >> 7 respectively). */
6545 if ((cond == LT || cond == GE)
6546 && target_mode == cmp_mode
6547 && cmp_op2 == CONST0_RTX (cmp_mode)
6548 && op == CONST0_RTX (target_mode)
6549 && s390_vector_mode_supported_p (target_mode)
6550 && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6552 rtx negop = (cond == LT) ? then : els;
6554 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6556 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6557 if (negop == CONST1_RTX (target_mode))
6559 rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6560 GEN_INT (shift), target,
6561 1, OPTAB_DIRECT);
6562 if (res != target)
6563 emit_move_insn (target, res);
6564 return;
6567 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6568 else if (all_ones_operand (negop, target_mode))
6570 rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6571 GEN_INT (shift), target,
6572 0, OPTAB_DIRECT);
6573 if (res != target)
6574 emit_move_insn (target, res);
6575 return;
6579 /* We always use an integral type vector to hold the comparison
6580 result. */
6581 result_mode = mode_for_int_vector (cmp_mode).require ();
6582 result_target = gen_reg_rtx (result_mode);
6584 /* We allow vector immediates as comparison operands that
6585 can be handled by the optimization above but not by the
6586 following code. Hence, force them into registers here. */
6587 if (!REG_P (cmp_op1))
6588 cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6590 if (!REG_P (cmp_op2))
6591 cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6593 s390_expand_vec_compare (result_target, cond,
6594 cmp_op1, cmp_op2);
6596 /* If the results are supposed to be either -1 or 0 we are done
6597 since this is what our compare instructions generate anyway. */
6598 if (all_ones_operand (then, GET_MODE (then))
6599 && const0_operand (els, GET_MODE (els)))
6601 emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6602 result_target, 0));
6603 return;
6606 /* Otherwise we will do a vsel afterwards. */
6607 /* This gets triggered e.g.
6608 with gcc.c-torture/compile/pr53410-1.c */
6609 if (!REG_P (then))
6610 then = force_reg (target_mode, then);
6612 if (!REG_P (els))
6613 els = force_reg (target_mode, els);
6615 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6616 result_target,
6617 CONST0_RTX (result_mode));
6619 /* We compared the result against zero above so we have to swap then
6620 and els here. */
6621 tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6623 gcc_assert (target_mode == GET_MODE (then));
6624 emit_insn (gen_rtx_SET (target, tmp));
6627 /* Emit the RTX necessary to initialize the vector TARGET with values
6628 in VALS. */
6629 void
6630 s390_expand_vec_init (rtx target, rtx vals)
6632 machine_mode mode = GET_MODE (target);
6633 machine_mode inner_mode = GET_MODE_INNER (mode);
6634 int n_elts = GET_MODE_NUNITS (mode);
6635 bool all_same = true, all_regs = true, all_const_int = true;
6636 rtx x;
6637 int i;
6639 for (i = 0; i < n_elts; ++i)
6641 x = XVECEXP (vals, 0, i);
6643 if (!CONST_INT_P (x))
6644 all_const_int = false;
6646 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6647 all_same = false;
6649 if (!REG_P (x))
6650 all_regs = false;
6653 /* Use vector gen mask or vector gen byte mask if possible. */
6654 if (all_same && all_const_int
6655 && (XVECEXP (vals, 0, 0) == const0_rtx
6656 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6657 NULL, NULL)
6658 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6660 emit_insn (gen_rtx_SET (target,
6661 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6662 return;
6665 if (all_same)
6667 emit_insn (gen_rtx_SET (target,
6668 gen_rtx_VEC_DUPLICATE (mode,
6669 XVECEXP (vals, 0, 0))));
6670 return;
6673 if (all_regs
6674 && REG_P (target)
6675 && n_elts == 2
6676 && GET_MODE_SIZE (inner_mode) == 8)
6678 /* Use vector load pair. */
6679 emit_insn (gen_rtx_SET (target,
6680 gen_rtx_VEC_CONCAT (mode,
6681 XVECEXP (vals, 0, 0),
6682 XVECEXP (vals, 0, 1))));
6683 return;
6686 /* Use vector load logical element and zero. */
6687 if (TARGET_VXE && (mode == V4SImode || mode == V4SFmode))
6689 bool found = true;
6691 x = XVECEXP (vals, 0, 0);
6692 if (memory_operand (x, inner_mode))
6694 for (i = 1; i < n_elts; ++i)
6695 found = found && XVECEXP (vals, 0, i) == const0_rtx;
6697 if (found)
6699 machine_mode half_mode = (inner_mode == SFmode
6700 ? V2SFmode : V2SImode);
6701 emit_insn (gen_rtx_SET (target,
6702 gen_rtx_VEC_CONCAT (mode,
6703 gen_rtx_VEC_CONCAT (half_mode,
6705 const0_rtx),
6706 gen_rtx_VEC_CONCAT (half_mode,
6707 const0_rtx,
6708 const0_rtx))));
6709 return;
6714 /* We are about to set the vector elements one by one. Zero out the
6715 full register first in order to help the data flow framework to
6716 detect it as full VR set. */
6717 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6719 /* Unfortunately the vec_init expander is not allowed to fail. So
6720 we have to implement the fallback ourselves. */
6721 for (i = 0; i < n_elts; i++)
6723 rtx elem = XVECEXP (vals, 0, i);
6724 if (!general_operand (elem, GET_MODE (elem)))
6725 elem = force_reg (inner_mode, elem);
6727 emit_insn (gen_rtx_SET (target,
6728 gen_rtx_UNSPEC (mode,
6729 gen_rtvec (3, elem,
6730 GEN_INT (i), target),
6731 UNSPEC_VEC_SET)));
6735 /* Structure to hold the initial parameters for a compare_and_swap operation
6736 in HImode and QImode. */
6738 struct alignment_context
6740 rtx memsi; /* SI aligned memory location. */
6741 rtx shift; /* Bit offset with regard to lsb. */
6742 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
6743 rtx modemaski; /* ~modemask */
6744 bool aligned; /* True if memory is aligned, false else. */
6747 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6748 structure AC for transparent simplifying, if the memory alignment is known
6749 to be at least 32bit. MEM is the memory location for the actual operation
6750 and MODE its mode. */
6752 static void
6753 init_alignment_context (struct alignment_context *ac, rtx mem,
6754 machine_mode mode)
6756 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6757 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6759 if (ac->aligned)
6760 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
6761 else
6763 /* Alignment is unknown. */
6764 rtx byteoffset, addr, align;
6766 /* Force the address into a register. */
6767 addr = force_reg (Pmode, XEXP (mem, 0));
6769 /* Align it to SImode. */
6770 align = expand_simple_binop (Pmode, AND, addr,
6771 GEN_INT (-GET_MODE_SIZE (SImode)),
6772 NULL_RTX, 1, OPTAB_DIRECT);
6773 /* Generate MEM. */
6774 ac->memsi = gen_rtx_MEM (SImode, align);
6775 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6776 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6777 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6779 /* Calculate shiftcount. */
6780 byteoffset = expand_simple_binop (Pmode, AND, addr,
6781 GEN_INT (GET_MODE_SIZE (SImode) - 1),
6782 NULL_RTX, 1, OPTAB_DIRECT);
6783 /* As we already have some offset, evaluate the remaining distance. */
6784 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6785 NULL_RTX, 1, OPTAB_DIRECT);
6788 /* Shift is the byte count, but we need the bitcount. */
6789 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6790 NULL_RTX, 1, OPTAB_DIRECT);
6792 /* Calculate masks. */
6793 ac->modemask = expand_simple_binop (SImode, ASHIFT,
6794 GEN_INT (GET_MODE_MASK (mode)),
6795 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6796 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6797 NULL_RTX, 1);
6800 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
6801 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
6802 perform the merge in SEQ2. */
6804 static rtx
6805 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6806 machine_mode mode, rtx val, rtx ins)
6808 rtx tmp;
6810 if (ac->aligned)
6812 start_sequence ();
6813 tmp = copy_to_mode_reg (SImode, val);
6814 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
6815 const0_rtx, ins))
6817 *seq1 = NULL;
6818 *seq2 = get_insns ();
6819 end_sequence ();
6820 return tmp;
6822 end_sequence ();
6825 /* Failed to use insv. Generate a two part shift and mask. */
6826 start_sequence ();
6827 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
6828 *seq1 = get_insns ();
6829 end_sequence ();
6831 start_sequence ();
6832 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
6833 *seq2 = get_insns ();
6834 end_sequence ();
6836 return tmp;
6839 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
6840 the memory location, CMP the old value to compare MEM with and NEW_RTX the
6841 value to set if CMP == MEM. */
6843 static void
6844 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6845 rtx cmp, rtx new_rtx, bool is_weak)
6847 struct alignment_context ac;
6848 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
6849 rtx res = gen_reg_rtx (SImode);
6850 rtx_code_label *csloop = NULL, *csend = NULL;
6852 gcc_assert (MEM_P (mem));
6854 init_alignment_context (&ac, mem, mode);
6856 /* Load full word. Subsequent loads are performed by CS. */
6857 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
6858 NULL_RTX, 1, OPTAB_DIRECT);
6860 /* Prepare insertions of cmp and new_rtx into the loaded value. When
6861 possible, we try to use insv to make this happen efficiently. If
6862 that fails we'll generate code both inside and outside the loop. */
6863 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
6864 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
6866 if (seq0)
6867 emit_insn (seq0);
6868 if (seq1)
6869 emit_insn (seq1);
6871 /* Start CS loop. */
6872 if (!is_weak)
6874 /* Begin assuming success. */
6875 emit_move_insn (btarget, const1_rtx);
6877 csloop = gen_label_rtx ();
6878 csend = gen_label_rtx ();
6879 emit_label (csloop);
6882 /* val = "<mem>00..0<mem>"
6883 * cmp = "00..0<cmp>00..0"
6884 * new = "00..0<new>00..0"
6887 emit_insn (seq2);
6888 emit_insn (seq3);
6890 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv, CCZ1mode);
6891 if (is_weak)
6892 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
6893 else
6895 rtx tmp;
6897 /* Jump to end if we're done (likely?). */
6898 s390_emit_jump (csend, cc);
6900 /* Check for changes outside mode, and loop internal if so.
6901 Arrange the moves so that the compare is adjacent to the
6902 branch so that we can generate CRJ. */
6903 tmp = copy_to_reg (val);
6904 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
6905 1, OPTAB_DIRECT);
6906 cc = s390_emit_compare (NE, val, tmp);
6907 s390_emit_jump (csloop, cc);
6909 /* Failed. */
6910 emit_move_insn (btarget, const0_rtx);
6911 emit_label (csend);
6914 /* Return the correct part of the bitfield. */
6915 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
6916 NULL_RTX, 1, OPTAB_DIRECT), 1);
6919 /* Variant of s390_expand_cs for SI, DI and TI modes. */
6920 static void
6921 s390_expand_cs_tdsi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6922 rtx cmp, rtx new_rtx, bool is_weak)
6924 rtx output = vtarget;
6925 rtx_code_label *skip_cs_label = NULL;
6926 bool do_const_opt = false;
6928 if (!register_operand (output, mode))
6929 output = gen_reg_rtx (mode);
6931 /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
6932 with the constant first and skip the compare_and_swap because its very
6933 expensive and likely to fail anyway.
6934 Note 1: This is done only for IS_WEAK. C11 allows optimizations that may
6935 cause spurious in that case.
6936 Note 2: It may be useful to do this also for non-constant INPUT.
6937 Note 3: Currently only targets with "load on condition" are supported
6938 (z196 and newer). */
6940 if (TARGET_Z196
6941 && (mode == SImode || mode == DImode))
6942 do_const_opt = (is_weak && CONST_INT_P (cmp));
6944 if (do_const_opt)
6946 rtx cc = gen_rtx_REG (CCZmode, CC_REGNUM);
6948 skip_cs_label = gen_label_rtx ();
6949 emit_move_insn (btarget, const0_rtx);
6950 if (CONST_INT_P (cmp) && INTVAL (cmp) == 0)
6952 rtvec lt = rtvec_alloc (2);
6954 /* Load-and-test + conditional jump. */
6955 RTVEC_ELT (lt, 0)
6956 = gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, mem, cmp));
6957 RTVEC_ELT (lt, 1) = gen_rtx_SET (output, mem);
6958 emit_insn (gen_rtx_PARALLEL (VOIDmode, lt));
6960 else
6962 emit_move_insn (output, mem);
6963 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp)));
6965 s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx));
6966 add_reg_br_prob_note (get_last_insn (),
6967 profile_probability::very_unlikely ());
6968 /* If the jump is not taken, OUTPUT is the expected value. */
6969 cmp = output;
6970 /* Reload newval to a register manually, *after* the compare and jump
6971 above. Otherwise Reload might place it before the jump. */
6973 else
6974 cmp = force_reg (mode, cmp);
6975 new_rtx = force_reg (mode, new_rtx);
6976 s390_emit_compare_and_swap (EQ, output, mem, cmp, new_rtx,
6977 (do_const_opt) ? CCZmode : CCZ1mode);
6978 if (skip_cs_label != NULL)
6979 emit_label (skip_cs_label);
6981 /* We deliberately accept non-register operands in the predicate
6982 to ensure the write back to the output operand happens *before*
6983 the store-flags code below. This makes it easier for combine
6984 to merge the store-flags code with a potential test-and-branch
6985 pattern following (immediately!) afterwards. */
6986 if (output != vtarget)
6987 emit_move_insn (vtarget, output);
6989 if (do_const_opt)
6991 rtx cc, cond, ite;
6993 /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
6994 btarget has already been initialized with 0 above. */
6995 cc = gen_rtx_REG (CCZmode, CC_REGNUM);
6996 cond = gen_rtx_EQ (VOIDmode, cc, const0_rtx);
6997 ite = gen_rtx_IF_THEN_ELSE (SImode, cond, const1_rtx, btarget);
6998 emit_insn (gen_rtx_SET (btarget, ite));
7000 else
7002 rtx cc, cond;
7004 cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
7005 cond = gen_rtx_EQ (SImode, cc, const0_rtx);
7006 emit_insn (gen_cstorecc4 (btarget, cond, cc, const0_rtx));
7010 /* Expand an atomic compare and swap operation. MEM is the memory location,
7011 CMP the old value to compare MEM with and NEW_RTX the value to set if
7012 CMP == MEM. */
7014 void
7015 s390_expand_cs (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7016 rtx cmp, rtx new_rtx, bool is_weak)
7018 switch (mode)
7020 case E_TImode:
7021 case E_DImode:
7022 case E_SImode:
7023 s390_expand_cs_tdsi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7024 break;
7025 case E_HImode:
7026 case E_QImode:
7027 s390_expand_cs_hqi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7028 break;
7029 default:
7030 gcc_unreachable ();
7034 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7035 The memory location MEM is set to INPUT. OUTPUT is set to the previous value
7036 of MEM. */
7038 void
7039 s390_expand_atomic_exchange_tdsi (rtx output, rtx mem, rtx input)
7041 machine_mode mode = GET_MODE (mem);
7042 rtx_code_label *csloop;
7044 if (TARGET_Z196
7045 && (mode == DImode || mode == SImode)
7046 && CONST_INT_P (input) && INTVAL (input) == 0)
7048 emit_move_insn (output, const0_rtx);
7049 if (mode == DImode)
7050 emit_insn (gen_atomic_fetch_anddi (output, mem, const0_rtx, input));
7051 else
7052 emit_insn (gen_atomic_fetch_andsi (output, mem, const0_rtx, input));
7053 return;
7056 input = force_reg (mode, input);
7057 emit_move_insn (output, mem);
7058 csloop = gen_label_rtx ();
7059 emit_label (csloop);
7060 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, output, mem, output,
7061 input, CCZ1mode));
7064 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
7065 and VAL the value to play with. If AFTER is true then store the value
7066 MEM holds after the operation, if AFTER is false then store the value MEM
7067 holds before the operation. If TARGET is zero then discard that value, else
7068 store it to TARGET. */
7070 void
7071 s390_expand_atomic (machine_mode mode, enum rtx_code code,
7072 rtx target, rtx mem, rtx val, bool after)
7074 struct alignment_context ac;
7075 rtx cmp;
7076 rtx new_rtx = gen_reg_rtx (SImode);
7077 rtx orig = gen_reg_rtx (SImode);
7078 rtx_code_label *csloop = gen_label_rtx ();
7080 gcc_assert (!target || register_operand (target, VOIDmode));
7081 gcc_assert (MEM_P (mem));
7083 init_alignment_context (&ac, mem, mode);
7085 /* Shift val to the correct bit positions.
7086 Preserve "icm", but prevent "ex icm". */
7087 if (!(ac.aligned && code == SET && MEM_P (val)))
7088 val = s390_expand_mask_and_shift (val, mode, ac.shift);
7090 /* Further preparation insns. */
7091 if (code == PLUS || code == MINUS)
7092 emit_move_insn (orig, val);
7093 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
7094 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
7095 NULL_RTX, 1, OPTAB_DIRECT);
7097 /* Load full word. Subsequent loads are performed by CS. */
7098 cmp = force_reg (SImode, ac.memsi);
7100 /* Start CS loop. */
7101 emit_label (csloop);
7102 emit_move_insn (new_rtx, cmp);
7104 /* Patch new with val at correct position. */
7105 switch (code)
7107 case PLUS:
7108 case MINUS:
7109 val = expand_simple_binop (SImode, code, new_rtx, orig,
7110 NULL_RTX, 1, OPTAB_DIRECT);
7111 val = expand_simple_binop (SImode, AND, val, ac.modemask,
7112 NULL_RTX, 1, OPTAB_DIRECT);
7113 /* FALLTHRU */
7114 case SET:
7115 if (ac.aligned && MEM_P (val))
7116 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
7117 0, 0, SImode, val, false);
7118 else
7120 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
7121 NULL_RTX, 1, OPTAB_DIRECT);
7122 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
7123 NULL_RTX, 1, OPTAB_DIRECT);
7125 break;
7126 case AND:
7127 case IOR:
7128 case XOR:
7129 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
7130 NULL_RTX, 1, OPTAB_DIRECT);
7131 break;
7132 case MULT: /* NAND */
7133 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
7134 NULL_RTX, 1, OPTAB_DIRECT);
7135 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
7136 NULL_RTX, 1, OPTAB_DIRECT);
7137 break;
7138 default:
7139 gcc_unreachable ();
7142 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
7143 ac.memsi, cmp, new_rtx,
7144 CCZ1mode));
7146 /* Return the correct part of the bitfield. */
7147 if (target)
7148 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
7149 after ? new_rtx : cmp, ac.shift,
7150 NULL_RTX, 1, OPTAB_DIRECT), 1);
7153 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7154 We need to emit DTP-relative relocations. */
7156 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7158 static void
7159 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
7161 switch (size)
7163 case 4:
7164 fputs ("\t.long\t", file);
7165 break;
7166 case 8:
7167 fputs ("\t.quad\t", file);
7168 break;
7169 default:
7170 gcc_unreachable ();
7172 output_addr_const (file, x);
7173 fputs ("@DTPOFF", file);
7176 /* Return the proper mode for REGNO being represented in the dwarf
7177 unwind table. */
7178 machine_mode
7179 s390_dwarf_frame_reg_mode (int regno)
7181 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
7183 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
7184 if (GENERAL_REGNO_P (regno))
7185 save_mode = Pmode;
7187 /* The rightmost 64 bits of vector registers are call-clobbered. */
7188 if (GET_MODE_SIZE (save_mode) > 8)
7189 save_mode = DImode;
7191 return save_mode;
7194 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7195 /* Implement TARGET_MANGLE_TYPE. */
7197 static const char *
7198 s390_mangle_type (const_tree type)
7200 type = TYPE_MAIN_VARIANT (type);
7202 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
7203 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
7204 return NULL;
7206 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
7207 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
7208 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
7209 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
7211 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
7212 && TARGET_LONG_DOUBLE_128)
7213 return "g";
7215 /* For all other types, use normal C++ mangling. */
7216 return NULL;
7218 #endif
7220 /* In the name of slightly smaller debug output, and to cater to
7221 general assembler lossage, recognize various UNSPEC sequences
7222 and turn them back into a direct symbol reference. */
7224 static rtx
7225 s390_delegitimize_address (rtx orig_x)
7227 rtx x, y;
7229 orig_x = delegitimize_mem_from_attrs (orig_x);
7230 x = orig_x;
7232 /* Extract the symbol ref from:
7233 (plus:SI (reg:SI 12 %r12)
7234 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7235 UNSPEC_GOTOFF/PLTOFF)))
7237 (plus:SI (reg:SI 12 %r12)
7238 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7239 UNSPEC_GOTOFF/PLTOFF)
7240 (const_int 4 [0x4])))) */
7241 if (GET_CODE (x) == PLUS
7242 && REG_P (XEXP (x, 0))
7243 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
7244 && GET_CODE (XEXP (x, 1)) == CONST)
7246 HOST_WIDE_INT offset = 0;
7248 /* The const operand. */
7249 y = XEXP (XEXP (x, 1), 0);
7251 if (GET_CODE (y) == PLUS
7252 && GET_CODE (XEXP (y, 1)) == CONST_INT)
7254 offset = INTVAL (XEXP (y, 1));
7255 y = XEXP (y, 0);
7258 if (GET_CODE (y) == UNSPEC
7259 && (XINT (y, 1) == UNSPEC_GOTOFF
7260 || XINT (y, 1) == UNSPEC_PLTOFF))
7261 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
7264 if (GET_CODE (x) != MEM)
7265 return orig_x;
7267 x = XEXP (x, 0);
7268 if (GET_CODE (x) == PLUS
7269 && GET_CODE (XEXP (x, 1)) == CONST
7270 && GET_CODE (XEXP (x, 0)) == REG
7271 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7273 y = XEXP (XEXP (x, 1), 0);
7274 if (GET_CODE (y) == UNSPEC
7275 && XINT (y, 1) == UNSPEC_GOT)
7276 y = XVECEXP (y, 0, 0);
7277 else
7278 return orig_x;
7280 else if (GET_CODE (x) == CONST)
7282 /* Extract the symbol ref from:
7283 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7284 UNSPEC_PLT/GOTENT))) */
7286 y = XEXP (x, 0);
7287 if (GET_CODE (y) == UNSPEC
7288 && (XINT (y, 1) == UNSPEC_GOTENT
7289 || XINT (y, 1) == UNSPEC_PLT))
7290 y = XVECEXP (y, 0, 0);
7291 else
7292 return orig_x;
7294 else
7295 return orig_x;
7297 if (GET_MODE (orig_x) != Pmode)
7299 if (GET_MODE (orig_x) == BLKmode)
7300 return orig_x;
7301 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
7302 if (y == NULL_RTX)
7303 return orig_x;
7305 return y;
7308 /* Output operand OP to stdio stream FILE.
7309 OP is an address (register + offset) which is not used to address data;
7310 instead the rightmost bits are interpreted as the value. */
7312 static void
7313 print_addrstyle_operand (FILE *file, rtx op)
7315 HOST_WIDE_INT offset;
7316 rtx base;
7318 /* Extract base register and offset. */
7319 if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
7320 gcc_unreachable ();
7322 /* Sanity check. */
7323 if (base)
7325 gcc_assert (GET_CODE (base) == REG);
7326 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
7327 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
7330 /* Offsets are constricted to twelve bits. */
7331 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
7332 if (base)
7333 fprintf (file, "(%s)", reg_names[REGNO (base)]);
7336 /* Assigns the number of NOP halfwords to be emitted before and after the
7337 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
7338 If hotpatching is disabled for the function, the values are set to zero.
7341 static void
7342 s390_function_num_hotpatch_hw (tree decl,
7343 int *hw_before,
7344 int *hw_after)
7346 tree attr;
7348 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
7350 /* Handle the arguments of the hotpatch attribute. The values
7351 specified via attribute might override the cmdline argument
7352 values. */
7353 if (attr)
7355 tree args = TREE_VALUE (attr);
7357 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
7358 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
7360 else
7362 /* Use the values specified by the cmdline arguments. */
7363 *hw_before = s390_hotpatch_hw_before_label;
7364 *hw_after = s390_hotpatch_hw_after_label;
7368 /* Write the current .machine and .machinemode specification to the assembler
7369 file. */
7371 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7372 static void
7373 s390_asm_output_machine_for_arch (FILE *asm_out_file)
7375 fprintf (asm_out_file, "\t.machinemode %s\n",
7376 (TARGET_ZARCH) ? "zarch" : "esa");
7377 fprintf (asm_out_file, "\t.machine \"%s",
7378 processor_table[s390_arch].binutils_name);
7379 if (S390_USE_ARCHITECTURE_MODIFIERS)
7381 int cpu_flags;
7383 cpu_flags = processor_flags_table[(int) s390_arch];
7384 if (TARGET_HTM && !(cpu_flags & PF_TX))
7385 fprintf (asm_out_file, "+htm");
7386 else if (!TARGET_HTM && (cpu_flags & PF_TX))
7387 fprintf (asm_out_file, "+nohtm");
7388 if (TARGET_VX && !(cpu_flags & PF_VX))
7389 fprintf (asm_out_file, "+vx");
7390 else if (!TARGET_VX && (cpu_flags & PF_VX))
7391 fprintf (asm_out_file, "+novx");
7393 fprintf (asm_out_file, "\"\n");
7396 /* Write an extra function header before the very start of the function. */
7398 void
7399 s390_asm_output_function_prefix (FILE *asm_out_file,
7400 const char *fnname ATTRIBUTE_UNUSED)
7402 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7403 return;
7404 /* Since only the function specific options are saved but not the indications
7405 which options are set, it's too much work here to figure out which options
7406 have actually changed. Thus, generate .machine and .machinemode whenever a
7407 function has the target attribute or pragma. */
7408 fprintf (asm_out_file, "\t.machinemode push\n");
7409 fprintf (asm_out_file, "\t.machine push\n");
7410 s390_asm_output_machine_for_arch (asm_out_file);
7413 /* Write an extra function footer after the very end of the function. */
7415 void
7416 s390_asm_declare_function_size (FILE *asm_out_file,
7417 const char *fnname, tree decl)
7419 if (!flag_inhibit_size_directive)
7420 ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7421 if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7422 return;
7423 fprintf (asm_out_file, "\t.machine pop\n");
7424 fprintf (asm_out_file, "\t.machinemode pop\n");
7426 #endif
7428 /* Write the extra assembler code needed to declare a function properly. */
7430 void
7431 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
7432 tree decl)
7434 int hw_before, hw_after;
7436 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7437 if (hw_before > 0)
7439 unsigned int function_alignment;
7440 int i;
7442 /* Add a trampoline code area before the function label and initialize it
7443 with two-byte nop instructions. This area can be overwritten with code
7444 that jumps to a patched version of the function. */
7445 asm_fprintf (asm_out_file, "\tnopr\t%%r0"
7446 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7447 hw_before);
7448 for (i = 1; i < hw_before; i++)
7449 fputs ("\tnopr\t%r0\n", asm_out_file);
7451 /* Note: The function label must be aligned so that (a) the bytes of the
7452 following nop do not cross a cacheline boundary, and (b) a jump address
7453 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7454 stored directly before the label without crossing a cacheline
7455 boundary. All this is necessary to make sure the trampoline code can
7456 be changed atomically.
7457 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7458 if there are NOPs before the function label, the alignment is placed
7459 before them. So it is necessary to duplicate the alignment after the
7460 NOPs. */
7461 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7462 if (! DECL_USER_ALIGN (decl))
7463 function_alignment = MAX (function_alignment,
7464 (unsigned int) align_functions);
7465 fputs ("\t# alignment for hotpatch\n", asm_out_file);
7466 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment));
7469 if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7471 asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7472 asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7473 asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7474 asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7475 asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7476 asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7477 s390_warn_framesize);
7478 asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7479 asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7480 asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7481 asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7482 asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7483 asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7484 TARGET_PACKED_STACK);
7485 asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7486 asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7487 asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7488 asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7489 s390_warn_dynamicstack_p);
7491 ASM_OUTPUT_LABEL (asm_out_file, fname);
7492 if (hw_after > 0)
7493 asm_fprintf (asm_out_file,
7494 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7495 hw_after);
7498 /* Output machine-dependent UNSPECs occurring in address constant X
7499 in assembler syntax to stdio stream FILE. Returns true if the
7500 constant X could be recognized, false otherwise. */
7502 static bool
7503 s390_output_addr_const_extra (FILE *file, rtx x)
7505 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7506 switch (XINT (x, 1))
7508 case UNSPEC_GOTENT:
7509 output_addr_const (file, XVECEXP (x, 0, 0));
7510 fprintf (file, "@GOTENT");
7511 return true;
7512 case UNSPEC_GOT:
7513 output_addr_const (file, XVECEXP (x, 0, 0));
7514 fprintf (file, "@GOT");
7515 return true;
7516 case UNSPEC_GOTOFF:
7517 output_addr_const (file, XVECEXP (x, 0, 0));
7518 fprintf (file, "@GOTOFF");
7519 return true;
7520 case UNSPEC_PLT:
7521 output_addr_const (file, XVECEXP (x, 0, 0));
7522 fprintf (file, "@PLT");
7523 return true;
7524 case UNSPEC_PLTOFF:
7525 output_addr_const (file, XVECEXP (x, 0, 0));
7526 fprintf (file, "@PLTOFF");
7527 return true;
7528 case UNSPEC_TLSGD:
7529 output_addr_const (file, XVECEXP (x, 0, 0));
7530 fprintf (file, "@TLSGD");
7531 return true;
7532 case UNSPEC_TLSLDM:
7533 assemble_name (file, get_some_local_dynamic_name ());
7534 fprintf (file, "@TLSLDM");
7535 return true;
7536 case UNSPEC_DTPOFF:
7537 output_addr_const (file, XVECEXP (x, 0, 0));
7538 fprintf (file, "@DTPOFF");
7539 return true;
7540 case UNSPEC_NTPOFF:
7541 output_addr_const (file, XVECEXP (x, 0, 0));
7542 fprintf (file, "@NTPOFF");
7543 return true;
7544 case UNSPEC_GOTNTPOFF:
7545 output_addr_const (file, XVECEXP (x, 0, 0));
7546 fprintf (file, "@GOTNTPOFF");
7547 return true;
7548 case UNSPEC_INDNTPOFF:
7549 output_addr_const (file, XVECEXP (x, 0, 0));
7550 fprintf (file, "@INDNTPOFF");
7551 return true;
7554 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7555 switch (XINT (x, 1))
7557 case UNSPEC_POOL_OFFSET:
7558 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7559 output_addr_const (file, x);
7560 return true;
7562 return false;
7565 /* Output address operand ADDR in assembler syntax to
7566 stdio stream FILE. */
7568 void
7569 print_operand_address (FILE *file, rtx addr)
7571 struct s390_address ad;
7572 memset (&ad, 0, sizeof (s390_address));
7574 if (s390_loadrelative_operand_p (addr, NULL, NULL))
7576 if (!TARGET_Z10)
7578 output_operand_lossage ("symbolic memory references are "
7579 "only supported on z10 or later");
7580 return;
7582 output_addr_const (file, addr);
7583 return;
7586 if (!s390_decompose_address (addr, &ad)
7587 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7588 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7589 output_operand_lossage ("cannot decompose address");
7591 if (ad.disp)
7592 output_addr_const (file, ad.disp);
7593 else
7594 fprintf (file, "0");
7596 if (ad.base && ad.indx)
7597 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7598 reg_names[REGNO (ad.base)]);
7599 else if (ad.base)
7600 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7603 /* Output operand X in assembler syntax to stdio stream FILE.
7604 CODE specified the format flag. The following format flags
7605 are recognized:
7607 'C': print opcode suffix for branch condition.
7608 'D': print opcode suffix for inverse branch condition.
7609 'E': print opcode suffix for branch on index instruction.
7610 'G': print the size of the operand in bytes.
7611 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7612 'M': print the second word of a TImode operand.
7613 'N': print the second word of a DImode operand.
7614 'O': print only the displacement of a memory reference or address.
7615 'R': print only the base register of a memory reference or address.
7616 'S': print S-type memory reference (base+displacement).
7617 'Y': print address style operand without index (e.g. shift count or setmem
7618 operand).
7620 'b': print integer X as if it's an unsigned byte.
7621 'c': print integer X as if it's an signed byte.
7622 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7623 'f': "end" contiguous bitmask X in SImode.
7624 'h': print integer X as if it's a signed halfword.
7625 'i': print the first nonzero HImode part of X.
7626 'j': print the first HImode part unequal to -1 of X.
7627 'k': print the first nonzero SImode part of X.
7628 'm': print the first SImode part unequal to -1 of X.
7629 'o': print integer X as if it's an unsigned 32bit word.
7630 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7631 't': CONST_INT: "start" of contiguous bitmask X in SImode.
7632 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7633 'x': print integer X as if it's an unsigned halfword.
7634 'v': print register number as vector register (v1 instead of f1).
7637 void
7638 print_operand (FILE *file, rtx x, int code)
7640 HOST_WIDE_INT ival;
7642 switch (code)
7644 case 'C':
7645 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7646 return;
7648 case 'D':
7649 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7650 return;
7652 case 'E':
7653 if (GET_CODE (x) == LE)
7654 fprintf (file, "l");
7655 else if (GET_CODE (x) == GT)
7656 fprintf (file, "h");
7657 else
7658 output_operand_lossage ("invalid comparison operator "
7659 "for 'E' output modifier");
7660 return;
7662 case 'J':
7663 if (GET_CODE (x) == SYMBOL_REF)
7665 fprintf (file, "%s", ":tls_load:");
7666 output_addr_const (file, x);
7668 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7670 fprintf (file, "%s", ":tls_gdcall:");
7671 output_addr_const (file, XVECEXP (x, 0, 0));
7673 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7675 fprintf (file, "%s", ":tls_ldcall:");
7676 const char *name = get_some_local_dynamic_name ();
7677 gcc_assert (name);
7678 assemble_name (file, name);
7680 else
7681 output_operand_lossage ("invalid reference for 'J' output modifier");
7682 return;
7684 case 'G':
7685 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7686 return;
7688 case 'O':
7690 struct s390_address ad;
7691 int ret;
7693 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7695 if (!ret
7696 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7697 || ad.indx)
7699 output_operand_lossage ("invalid address for 'O' output modifier");
7700 return;
7703 if (ad.disp)
7704 output_addr_const (file, ad.disp);
7705 else
7706 fprintf (file, "0");
7708 return;
7710 case 'R':
7712 struct s390_address ad;
7713 int ret;
7715 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7717 if (!ret
7718 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7719 || ad.indx)
7721 output_operand_lossage ("invalid address for 'R' output modifier");
7722 return;
7725 if (ad.base)
7726 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7727 else
7728 fprintf (file, "0");
7730 return;
7732 case 'S':
7734 struct s390_address ad;
7735 int ret;
7737 if (!MEM_P (x))
7739 output_operand_lossage ("memory reference expected for "
7740 "'S' output modifier");
7741 return;
7743 ret = s390_decompose_address (XEXP (x, 0), &ad);
7745 if (!ret
7746 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7747 || ad.indx)
7749 output_operand_lossage ("invalid address for 'S' output modifier");
7750 return;
7753 if (ad.disp)
7754 output_addr_const (file, ad.disp);
7755 else
7756 fprintf (file, "0");
7758 if (ad.base)
7759 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7761 return;
7763 case 'N':
7764 if (GET_CODE (x) == REG)
7765 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7766 else if (GET_CODE (x) == MEM)
7767 x = change_address (x, VOIDmode,
7768 plus_constant (Pmode, XEXP (x, 0), 4));
7769 else
7770 output_operand_lossage ("register or memory expression expected "
7771 "for 'N' output modifier");
7772 break;
7774 case 'M':
7775 if (GET_CODE (x) == REG)
7776 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7777 else if (GET_CODE (x) == MEM)
7778 x = change_address (x, VOIDmode,
7779 plus_constant (Pmode, XEXP (x, 0), 8));
7780 else
7781 output_operand_lossage ("register or memory expression expected "
7782 "for 'M' output modifier");
7783 break;
7785 case 'Y':
7786 print_addrstyle_operand (file, x);
7787 return;
7790 switch (GET_CODE (x))
7792 case REG:
7793 /* Print FP regs as fx instead of vx when they are accessed
7794 through non-vector mode. */
7795 if (code == 'v'
7796 || VECTOR_NOFP_REG_P (x)
7797 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
7798 || (VECTOR_REG_P (x)
7799 && (GET_MODE_SIZE (GET_MODE (x)) /
7800 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
7801 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
7802 else
7803 fprintf (file, "%s", reg_names[REGNO (x)]);
7804 break;
7806 case MEM:
7807 output_address (GET_MODE (x), XEXP (x, 0));
7808 break;
7810 case CONST:
7811 case CODE_LABEL:
7812 case LABEL_REF:
7813 case SYMBOL_REF:
7814 output_addr_const (file, x);
7815 break;
7817 case CONST_INT:
7818 ival = INTVAL (x);
7819 switch (code)
7821 case 0:
7822 break;
7823 case 'b':
7824 ival &= 0xff;
7825 break;
7826 case 'c':
7827 ival = ((ival & 0xff) ^ 0x80) - 0x80;
7828 break;
7829 case 'x':
7830 ival &= 0xffff;
7831 break;
7832 case 'h':
7833 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
7834 break;
7835 case 'i':
7836 ival = s390_extract_part (x, HImode, 0);
7837 break;
7838 case 'j':
7839 ival = s390_extract_part (x, HImode, -1);
7840 break;
7841 case 'k':
7842 ival = s390_extract_part (x, SImode, 0);
7843 break;
7844 case 'm':
7845 ival = s390_extract_part (x, SImode, -1);
7846 break;
7847 case 'o':
7848 ival &= 0xffffffff;
7849 break;
7850 case 'e': case 'f':
7851 case 's': case 't':
7853 int start, end;
7854 int len;
7855 bool ok;
7857 len = (code == 's' || code == 'e' ? 64 : 32);
7858 ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
7859 gcc_assert (ok);
7860 if (code == 's' || code == 't')
7861 ival = start;
7862 else
7863 ival = end;
7865 break;
7866 default:
7867 output_operand_lossage ("invalid constant for output modifier '%c'", code);
7869 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7870 break;
7872 case CONST_WIDE_INT:
7873 if (code == 'b')
7874 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7875 CONST_WIDE_INT_ELT (x, 0) & 0xff);
7876 else if (code == 'x')
7877 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7878 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
7879 else if (code == 'h')
7880 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7881 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
7882 else
7884 if (code == 0)
7885 output_operand_lossage ("invalid constant - try using "
7886 "an output modifier");
7887 else
7888 output_operand_lossage ("invalid constant for output modifier '%c'",
7889 code);
7891 break;
7892 case CONST_VECTOR:
7893 switch (code)
7895 case 'h':
7896 gcc_assert (const_vec_duplicate_p (x));
7897 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7898 ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
7899 break;
7900 case 'e':
7901 case 's':
7903 int start, end;
7904 bool ok;
7906 ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
7907 gcc_assert (ok);
7908 ival = (code == 's') ? start : end;
7909 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7911 break;
7912 case 't':
7914 unsigned mask;
7915 bool ok = s390_bytemask_vector_p (x, &mask);
7916 gcc_assert (ok);
7917 fprintf (file, "%u", mask);
7919 break;
7921 default:
7922 output_operand_lossage ("invalid constant vector for output "
7923 "modifier '%c'", code);
7925 break;
7927 default:
7928 if (code == 0)
7929 output_operand_lossage ("invalid expression - try using "
7930 "an output modifier");
7931 else
7932 output_operand_lossage ("invalid expression for output "
7933 "modifier '%c'", code);
7934 break;
7938 /* Target hook for assembling integer objects. We need to define it
7939 here to work a round a bug in some versions of GAS, which couldn't
7940 handle values smaller than INT_MIN when printed in decimal. */
7942 static bool
7943 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
7945 if (size == 8 && aligned_p
7946 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
7948 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
7949 INTVAL (x));
7950 return true;
7952 return default_assemble_integer (x, size, aligned_p);
7955 /* Returns true if register REGNO is used for forming
7956 a memory address in expression X. */
7958 static bool
7959 reg_used_in_mem_p (int regno, rtx x)
7961 enum rtx_code code = GET_CODE (x);
7962 int i, j;
7963 const char *fmt;
7965 if (code == MEM)
7967 if (refers_to_regno_p (regno, XEXP (x, 0)))
7968 return true;
7970 else if (code == SET
7971 && GET_CODE (SET_DEST (x)) == PC)
7973 if (refers_to_regno_p (regno, SET_SRC (x)))
7974 return true;
7977 fmt = GET_RTX_FORMAT (code);
7978 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7980 if (fmt[i] == 'e'
7981 && reg_used_in_mem_p (regno, XEXP (x, i)))
7982 return true;
7984 else if (fmt[i] == 'E')
7985 for (j = 0; j < XVECLEN (x, i); j++)
7986 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
7987 return true;
7989 return false;
7992 /* Returns true if expression DEP_RTX sets an address register
7993 used by instruction INSN to address memory. */
7995 static bool
7996 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
7998 rtx target, pat;
8000 if (NONJUMP_INSN_P (dep_rtx))
8001 dep_rtx = PATTERN (dep_rtx);
8003 if (GET_CODE (dep_rtx) == SET)
8005 target = SET_DEST (dep_rtx);
8006 if (GET_CODE (target) == STRICT_LOW_PART)
8007 target = XEXP (target, 0);
8008 while (GET_CODE (target) == SUBREG)
8009 target = SUBREG_REG (target);
8011 if (GET_CODE (target) == REG)
8013 int regno = REGNO (target);
8015 if (s390_safe_attr_type (insn) == TYPE_LA)
8017 pat = PATTERN (insn);
8018 if (GET_CODE (pat) == PARALLEL)
8020 gcc_assert (XVECLEN (pat, 0) == 2);
8021 pat = XVECEXP (pat, 0, 0);
8023 gcc_assert (GET_CODE (pat) == SET);
8024 return refers_to_regno_p (regno, SET_SRC (pat));
8026 else if (get_attr_atype (insn) == ATYPE_AGEN)
8027 return reg_used_in_mem_p (regno, PATTERN (insn));
8030 return false;
8033 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
8036 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
8038 rtx dep_rtx = PATTERN (dep_insn);
8039 int i;
8041 if (GET_CODE (dep_rtx) == SET
8042 && addr_generation_dependency_p (dep_rtx, insn))
8043 return 1;
8044 else if (GET_CODE (dep_rtx) == PARALLEL)
8046 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
8048 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
8049 return 1;
8052 return 0;
8056 /* A C statement (sans semicolon) to update the integer scheduling priority
8057 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
8058 reduce the priority to execute INSN later. Do not define this macro if
8059 you do not need to adjust the scheduling priorities of insns.
8061 A STD instruction should be scheduled earlier,
8062 in order to use the bypass. */
8063 static int
8064 s390_adjust_priority (rtx_insn *insn, int priority)
8066 if (! INSN_P (insn))
8067 return priority;
8069 if (s390_tune <= PROCESSOR_2064_Z900)
8070 return priority;
8072 switch (s390_safe_attr_type (insn))
8074 case TYPE_FSTOREDF:
8075 case TYPE_FSTORESF:
8076 priority = priority << 3;
8077 break;
8078 case TYPE_STORE:
8079 case TYPE_STM:
8080 priority = priority << 1;
8081 break;
8082 default:
8083 break;
8085 return priority;
8089 /* The number of instructions that can be issued per cycle. */
8091 static int
8092 s390_issue_rate (void)
8094 switch (s390_tune)
8096 case PROCESSOR_2084_Z990:
8097 case PROCESSOR_2094_Z9_109:
8098 case PROCESSOR_2094_Z9_EC:
8099 case PROCESSOR_2817_Z196:
8100 return 3;
8101 case PROCESSOR_2097_Z10:
8102 return 2;
8103 case PROCESSOR_9672_G5:
8104 case PROCESSOR_9672_G6:
8105 case PROCESSOR_2064_Z900:
8106 /* Starting with EC12 we use the sched_reorder hook to take care
8107 of instruction dispatch constraints. The algorithm only
8108 picks the best instruction and assumes only a single
8109 instruction gets issued per cycle. */
8110 case PROCESSOR_2827_ZEC12:
8111 case PROCESSOR_2964_Z13:
8112 case PROCESSOR_3906_Z14:
8113 default:
8114 return 1;
8118 static int
8119 s390_first_cycle_multipass_dfa_lookahead (void)
8121 return 4;
8124 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
8125 Fix up MEMs as required. */
8127 static void
8128 annotate_constant_pool_refs (rtx *x)
8130 int i, j;
8131 const char *fmt;
8133 gcc_assert (GET_CODE (*x) != SYMBOL_REF
8134 || !CONSTANT_POOL_ADDRESS_P (*x));
8136 /* Literal pool references can only occur inside a MEM ... */
8137 if (GET_CODE (*x) == MEM)
8139 rtx memref = XEXP (*x, 0);
8141 if (GET_CODE (memref) == SYMBOL_REF
8142 && CONSTANT_POOL_ADDRESS_P (memref))
8144 rtx base = cfun->machine->base_reg;
8145 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
8146 UNSPEC_LTREF);
8148 *x = replace_equiv_address (*x, addr);
8149 return;
8152 if (GET_CODE (memref) == CONST
8153 && GET_CODE (XEXP (memref, 0)) == PLUS
8154 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
8155 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
8156 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
8158 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
8159 rtx sym = XEXP (XEXP (memref, 0), 0);
8160 rtx base = cfun->machine->base_reg;
8161 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8162 UNSPEC_LTREF);
8164 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
8165 return;
8169 /* ... or a load-address type pattern. */
8170 if (GET_CODE (*x) == SET)
8172 rtx addrref = SET_SRC (*x);
8174 if (GET_CODE (addrref) == SYMBOL_REF
8175 && CONSTANT_POOL_ADDRESS_P (addrref))
8177 rtx base = cfun->machine->base_reg;
8178 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
8179 UNSPEC_LTREF);
8181 SET_SRC (*x) = addr;
8182 return;
8185 if (GET_CODE (addrref) == CONST
8186 && GET_CODE (XEXP (addrref, 0)) == PLUS
8187 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
8188 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
8189 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
8191 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
8192 rtx sym = XEXP (XEXP (addrref, 0), 0);
8193 rtx base = cfun->machine->base_reg;
8194 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8195 UNSPEC_LTREF);
8197 SET_SRC (*x) = plus_constant (Pmode, addr, off);
8198 return;
8202 /* Annotate LTREL_BASE as well. */
8203 if (GET_CODE (*x) == UNSPEC
8204 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
8206 rtx base = cfun->machine->base_reg;
8207 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
8208 UNSPEC_LTREL_BASE);
8209 return;
8212 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8213 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8215 if (fmt[i] == 'e')
8217 annotate_constant_pool_refs (&XEXP (*x, i));
8219 else if (fmt[i] == 'E')
8221 for (j = 0; j < XVECLEN (*x, i); j++)
8222 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
8227 /* Split all branches that exceed the maximum distance.
8228 Returns true if this created a new literal pool entry. */
8230 static int
8231 s390_split_branches (void)
8233 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
8234 int new_literal = 0, ret;
8235 rtx_insn *insn;
8236 rtx pat, target;
8237 rtx *label;
8239 /* We need correct insn addresses. */
8241 shorten_branches (get_insns ());
8243 /* Find all branches that exceed 64KB, and split them. */
8245 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8247 if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
8248 continue;
8250 pat = PATTERN (insn);
8251 if (GET_CODE (pat) == PARALLEL)
8252 pat = XVECEXP (pat, 0, 0);
8253 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
8254 continue;
8256 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
8258 label = &SET_SRC (pat);
8260 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
8262 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
8263 label = &XEXP (SET_SRC (pat), 1);
8264 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
8265 label = &XEXP (SET_SRC (pat), 2);
8266 else
8267 continue;
8269 else
8270 continue;
8272 if (get_attr_length (insn) <= 4)
8273 continue;
8275 /* We are going to use the return register as scratch register,
8276 make sure it will be saved/restored by the prologue/epilogue. */
8277 cfun_frame_layout.save_return_addr_p = 1;
8279 if (!flag_pic)
8281 new_literal = 1;
8282 rtx mem = force_const_mem (Pmode, *label);
8283 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem),
8284 insn);
8285 INSN_ADDRESSES_NEW (set_insn, -1);
8286 annotate_constant_pool_refs (&PATTERN (set_insn));
8288 target = temp_reg;
8290 else
8292 new_literal = 1;
8293 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
8294 UNSPEC_LTREL_OFFSET);
8295 target = gen_rtx_CONST (Pmode, target);
8296 target = force_const_mem (Pmode, target);
8297 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target),
8298 insn);
8299 INSN_ADDRESSES_NEW (set_insn, -1);
8300 annotate_constant_pool_refs (&PATTERN (set_insn));
8302 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
8303 cfun->machine->base_reg),
8304 UNSPEC_LTREL_BASE);
8305 target = gen_rtx_PLUS (Pmode, temp_reg, target);
8308 ret = validate_change (insn, label, target, 0);
8309 gcc_assert (ret);
8312 return new_literal;
8316 /* Find an annotated literal pool symbol referenced in RTX X,
8317 and store it at REF. Will abort if X contains references to
8318 more than one such pool symbol; multiple references to the same
8319 symbol are allowed, however.
8321 The rtx pointed to by REF must be initialized to NULL_RTX
8322 by the caller before calling this routine. */
8324 static void
8325 find_constant_pool_ref (rtx x, rtx *ref)
8327 int i, j;
8328 const char *fmt;
8330 /* Ignore LTREL_BASE references. */
8331 if (GET_CODE (x) == UNSPEC
8332 && XINT (x, 1) == UNSPEC_LTREL_BASE)
8333 return;
8334 /* Likewise POOL_ENTRY insns. */
8335 if (GET_CODE (x) == UNSPEC_VOLATILE
8336 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
8337 return;
8339 gcc_assert (GET_CODE (x) != SYMBOL_REF
8340 || !CONSTANT_POOL_ADDRESS_P (x));
8342 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
8344 rtx sym = XVECEXP (x, 0, 0);
8345 gcc_assert (GET_CODE (sym) == SYMBOL_REF
8346 && CONSTANT_POOL_ADDRESS_P (sym));
8348 if (*ref == NULL_RTX)
8349 *ref = sym;
8350 else
8351 gcc_assert (*ref == sym);
8353 return;
8356 fmt = GET_RTX_FORMAT (GET_CODE (x));
8357 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8359 if (fmt[i] == 'e')
8361 find_constant_pool_ref (XEXP (x, i), ref);
8363 else if (fmt[i] == 'E')
8365 for (j = 0; j < XVECLEN (x, i); j++)
8366 find_constant_pool_ref (XVECEXP (x, i, j), ref);
8371 /* Replace every reference to the annotated literal pool
8372 symbol REF in X by its base plus OFFSET. */
8374 static void
8375 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
8377 int i, j;
8378 const char *fmt;
8380 gcc_assert (*x != ref);
8382 if (GET_CODE (*x) == UNSPEC
8383 && XINT (*x, 1) == UNSPEC_LTREF
8384 && XVECEXP (*x, 0, 0) == ref)
8386 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8387 return;
8390 if (GET_CODE (*x) == PLUS
8391 && GET_CODE (XEXP (*x, 1)) == CONST_INT
8392 && GET_CODE (XEXP (*x, 0)) == UNSPEC
8393 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8394 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8396 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8397 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8398 return;
8401 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8402 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8404 if (fmt[i] == 'e')
8406 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
8408 else if (fmt[i] == 'E')
8410 for (j = 0; j < XVECLEN (*x, i); j++)
8411 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
8416 /* Check whether X contains an UNSPEC_LTREL_BASE.
8417 Return its constant pool symbol if found, NULL_RTX otherwise. */
8419 static rtx
8420 find_ltrel_base (rtx x)
8422 int i, j;
8423 const char *fmt;
8425 if (GET_CODE (x) == UNSPEC
8426 && XINT (x, 1) == UNSPEC_LTREL_BASE)
8427 return XVECEXP (x, 0, 0);
8429 fmt = GET_RTX_FORMAT (GET_CODE (x));
8430 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8432 if (fmt[i] == 'e')
8434 rtx fnd = find_ltrel_base (XEXP (x, i));
8435 if (fnd)
8436 return fnd;
8438 else if (fmt[i] == 'E')
8440 for (j = 0; j < XVECLEN (x, i); j++)
8442 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
8443 if (fnd)
8444 return fnd;
8449 return NULL_RTX;
8452 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
8454 static void
8455 replace_ltrel_base (rtx *x)
8457 int i, j;
8458 const char *fmt;
8460 if (GET_CODE (*x) == UNSPEC
8461 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
8463 *x = XVECEXP (*x, 0, 1);
8464 return;
8467 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8468 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8470 if (fmt[i] == 'e')
8472 replace_ltrel_base (&XEXP (*x, i));
8474 else if (fmt[i] == 'E')
8476 for (j = 0; j < XVECLEN (*x, i); j++)
8477 replace_ltrel_base (&XVECEXP (*x, i, j));
8483 /* We keep a list of constants which we have to add to internal
8484 constant tables in the middle of large functions. */
8486 #define NR_C_MODES 32
8487 machine_mode constant_modes[NR_C_MODES] =
8489 TFmode, TImode, TDmode,
8490 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8491 V4SFmode, V2DFmode, V1TFmode,
8492 DFmode, DImode, DDmode,
8493 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8494 SFmode, SImode, SDmode,
8495 V4QImode, V2HImode, V1SImode, V1SFmode,
8496 HImode,
8497 V2QImode, V1HImode,
8498 QImode,
8499 V1QImode
8502 struct constant
8504 struct constant *next;
8505 rtx value;
8506 rtx_code_label *label;
8509 struct constant_pool
8511 struct constant_pool *next;
8512 rtx_insn *first_insn;
8513 rtx_insn *pool_insn;
8514 bitmap insns;
8515 rtx_insn *emit_pool_after;
8517 struct constant *constants[NR_C_MODES];
8518 struct constant *execute;
8519 rtx_code_label *label;
8520 int size;
8523 /* Allocate new constant_pool structure. */
8525 static struct constant_pool *
8526 s390_alloc_pool (void)
8528 struct constant_pool *pool;
8529 int i;
8531 pool = (struct constant_pool *) xmalloc (sizeof *pool);
8532 pool->next = NULL;
8533 for (i = 0; i < NR_C_MODES; i++)
8534 pool->constants[i] = NULL;
8536 pool->execute = NULL;
8537 pool->label = gen_label_rtx ();
8538 pool->first_insn = NULL;
8539 pool->pool_insn = NULL;
8540 pool->insns = BITMAP_ALLOC (NULL);
8541 pool->size = 0;
8542 pool->emit_pool_after = NULL;
8544 return pool;
8547 /* Create new constant pool covering instructions starting at INSN
8548 and chain it to the end of POOL_LIST. */
8550 static struct constant_pool *
8551 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8553 struct constant_pool *pool, **prev;
8555 pool = s390_alloc_pool ();
8556 pool->first_insn = insn;
8558 for (prev = pool_list; *prev; prev = &(*prev)->next)
8560 *prev = pool;
8562 return pool;
8565 /* End range of instructions covered by POOL at INSN and emit
8566 placeholder insn representing the pool. */
8568 static void
8569 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8571 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8573 if (!insn)
8574 insn = get_last_insn ();
8576 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8577 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8580 /* Add INSN to the list of insns covered by POOL. */
8582 static void
8583 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8585 bitmap_set_bit (pool->insns, INSN_UID (insn));
8588 /* Return pool out of POOL_LIST that covers INSN. */
8590 static struct constant_pool *
8591 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8593 struct constant_pool *pool;
8595 for (pool = pool_list; pool; pool = pool->next)
8596 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8597 break;
8599 return pool;
8602 /* Add constant VAL of mode MODE to the constant pool POOL. */
8604 static void
8605 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8607 struct constant *c;
8608 int i;
8610 for (i = 0; i < NR_C_MODES; i++)
8611 if (constant_modes[i] == mode)
8612 break;
8613 gcc_assert (i != NR_C_MODES);
8615 for (c = pool->constants[i]; c != NULL; c = c->next)
8616 if (rtx_equal_p (val, c->value))
8617 break;
8619 if (c == NULL)
8621 c = (struct constant *) xmalloc (sizeof *c);
8622 c->value = val;
8623 c->label = gen_label_rtx ();
8624 c->next = pool->constants[i];
8625 pool->constants[i] = c;
8626 pool->size += GET_MODE_SIZE (mode);
8630 /* Return an rtx that represents the offset of X from the start of
8631 pool POOL. */
8633 static rtx
8634 s390_pool_offset (struct constant_pool *pool, rtx x)
8636 rtx label;
8638 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8639 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8640 UNSPEC_POOL_OFFSET);
8641 return gen_rtx_CONST (GET_MODE (x), x);
8644 /* Find constant VAL of mode MODE in the constant pool POOL.
8645 Return an RTX describing the distance from the start of
8646 the pool to the location of the new constant. */
8648 static rtx
8649 s390_find_constant (struct constant_pool *pool, rtx val,
8650 machine_mode mode)
8652 struct constant *c;
8653 int i;
8655 for (i = 0; i < NR_C_MODES; i++)
8656 if (constant_modes[i] == mode)
8657 break;
8658 gcc_assert (i != NR_C_MODES);
8660 for (c = pool->constants[i]; c != NULL; c = c->next)
8661 if (rtx_equal_p (val, c->value))
8662 break;
8664 gcc_assert (c);
8666 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8669 /* Check whether INSN is an execute. Return the label_ref to its
8670 execute target template if so, NULL_RTX otherwise. */
8672 static rtx
8673 s390_execute_label (rtx insn)
8675 if (NONJUMP_INSN_P (insn)
8676 && GET_CODE (PATTERN (insn)) == PARALLEL
8677 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8678 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8679 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8681 return NULL_RTX;
8684 /* Add execute target for INSN to the constant pool POOL. */
8686 static void
8687 s390_add_execute (struct constant_pool *pool, rtx insn)
8689 struct constant *c;
8691 for (c = pool->execute; c != NULL; c = c->next)
8692 if (INSN_UID (insn) == INSN_UID (c->value))
8693 break;
8695 if (c == NULL)
8697 c = (struct constant *) xmalloc (sizeof *c);
8698 c->value = insn;
8699 c->label = gen_label_rtx ();
8700 c->next = pool->execute;
8701 pool->execute = c;
8702 pool->size += 6;
8706 /* Find execute target for INSN in the constant pool POOL.
8707 Return an RTX describing the distance from the start of
8708 the pool to the location of the execute target. */
8710 static rtx
8711 s390_find_execute (struct constant_pool *pool, rtx insn)
8713 struct constant *c;
8715 for (c = pool->execute; c != NULL; c = c->next)
8716 if (INSN_UID (insn) == INSN_UID (c->value))
8717 break;
8719 gcc_assert (c);
8721 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8724 /* For an execute INSN, extract the execute target template. */
8726 static rtx
8727 s390_execute_target (rtx insn)
8729 rtx pattern = PATTERN (insn);
8730 gcc_assert (s390_execute_label (insn));
8732 if (XVECLEN (pattern, 0) == 2)
8734 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8736 else
8738 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8739 int i;
8741 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8742 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8744 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8747 return pattern;
8750 /* Indicate that INSN cannot be duplicated. This is the case for
8751 execute insns that carry a unique label. */
8753 static bool
8754 s390_cannot_copy_insn_p (rtx_insn *insn)
8756 rtx label = s390_execute_label (insn);
8757 return label && label != const0_rtx;
8760 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
8761 do not emit the pool base label. */
8763 static void
8764 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8766 struct constant *c;
8767 rtx_insn *insn = pool->pool_insn;
8768 int i;
8770 /* Switch to rodata section. */
8771 if (TARGET_CPU_ZARCH)
8773 insn = emit_insn_after (gen_pool_section_start (), insn);
8774 INSN_ADDRESSES_NEW (insn, -1);
8777 /* Ensure minimum pool alignment. */
8778 if (TARGET_CPU_ZARCH)
8779 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8780 else
8781 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
8782 INSN_ADDRESSES_NEW (insn, -1);
8784 /* Emit pool base label. */
8785 if (!remote_label)
8787 insn = emit_label_after (pool->label, insn);
8788 INSN_ADDRESSES_NEW (insn, -1);
8791 /* Dump constants in descending alignment requirement order,
8792 ensuring proper alignment for every constant. */
8793 for (i = 0; i < NR_C_MODES; i++)
8794 for (c = pool->constants[i]; c; c = c->next)
8796 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
8797 rtx value = copy_rtx (c->value);
8798 if (GET_CODE (value) == CONST
8799 && GET_CODE (XEXP (value, 0)) == UNSPEC
8800 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8801 && XVECLEN (XEXP (value, 0), 0) == 1)
8802 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8804 insn = emit_label_after (c->label, insn);
8805 INSN_ADDRESSES_NEW (insn, -1);
8807 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8808 gen_rtvec (1, value),
8809 UNSPECV_POOL_ENTRY);
8810 insn = emit_insn_after (value, insn);
8811 INSN_ADDRESSES_NEW (insn, -1);
8814 /* Ensure minimum alignment for instructions. */
8815 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8816 INSN_ADDRESSES_NEW (insn, -1);
8818 /* Output in-pool execute template insns. */
8819 for (c = pool->execute; c; c = c->next)
8821 insn = emit_label_after (c->label, insn);
8822 INSN_ADDRESSES_NEW (insn, -1);
8824 insn = emit_insn_after (s390_execute_target (c->value), insn);
8825 INSN_ADDRESSES_NEW (insn, -1);
8828 /* Switch back to previous section. */
8829 if (TARGET_CPU_ZARCH)
8831 insn = emit_insn_after (gen_pool_section_end (), insn);
8832 INSN_ADDRESSES_NEW (insn, -1);
8835 insn = emit_barrier_after (insn);
8836 INSN_ADDRESSES_NEW (insn, -1);
8838 /* Remove placeholder insn. */
8839 remove_insn (pool->pool_insn);
8842 /* Free all memory used by POOL. */
8844 static void
8845 s390_free_pool (struct constant_pool *pool)
8847 struct constant *c, *next;
8848 int i;
8850 for (i = 0; i < NR_C_MODES; i++)
8851 for (c = pool->constants[i]; c; c = next)
8853 next = c->next;
8854 free (c);
8857 for (c = pool->execute; c; c = next)
8859 next = c->next;
8860 free (c);
8863 BITMAP_FREE (pool->insns);
8864 free (pool);
8868 /* Collect main literal pool. Return NULL on overflow. */
8870 static struct constant_pool *
8871 s390_mainpool_start (void)
8873 struct constant_pool *pool;
8874 rtx_insn *insn;
8876 pool = s390_alloc_pool ();
8878 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8880 if (NONJUMP_INSN_P (insn)
8881 && GET_CODE (PATTERN (insn)) == SET
8882 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8883 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8885 /* There might be two main_pool instructions if base_reg
8886 is call-clobbered; one for shrink-wrapped code and one
8887 for the rest. We want to keep the first. */
8888 if (pool->pool_insn)
8890 insn = PREV_INSN (insn);
8891 delete_insn (NEXT_INSN (insn));
8892 continue;
8894 pool->pool_insn = insn;
8897 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8899 s390_add_execute (pool, insn);
8901 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8903 rtx pool_ref = NULL_RTX;
8904 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8905 if (pool_ref)
8907 rtx constant = get_pool_constant (pool_ref);
8908 machine_mode mode = get_pool_mode (pool_ref);
8909 s390_add_constant (pool, constant, mode);
8913 /* If hot/cold partitioning is enabled we have to make sure that
8914 the literal pool is emitted in the same section where the
8915 initialization of the literal pool base pointer takes place.
8916 emit_pool_after is only used in the non-overflow case on non
8917 Z cpus where we can emit the literal pool at the end of the
8918 function body within the text section. */
8919 if (NOTE_P (insn)
8920 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8921 && !pool->emit_pool_after)
8922 pool->emit_pool_after = PREV_INSN (insn);
8925 gcc_assert (pool->pool_insn || pool->size == 0);
8927 if (pool->size >= 4096)
8929 /* We're going to chunkify the pool, so remove the main
8930 pool placeholder insn. */
8931 remove_insn (pool->pool_insn);
8933 s390_free_pool (pool);
8934 pool = NULL;
8937 /* If the functions ends with the section where the literal pool
8938 should be emitted set the marker to its end. */
8939 if (pool && !pool->emit_pool_after)
8940 pool->emit_pool_after = get_last_insn ();
8942 return pool;
8945 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8946 Modify the current function to output the pool constants as well as
8947 the pool register setup instruction. */
8949 static void
8950 s390_mainpool_finish (struct constant_pool *pool)
8952 rtx base_reg = cfun->machine->base_reg;
8954 /* If the pool is empty, we're done. */
8955 if (pool->size == 0)
8957 /* We don't actually need a base register after all. */
8958 cfun->machine->base_reg = NULL_RTX;
8960 if (pool->pool_insn)
8961 remove_insn (pool->pool_insn);
8962 s390_free_pool (pool);
8963 return;
8966 /* We need correct insn addresses. */
8967 shorten_branches (get_insns ());
8969 /* On zSeries, we use a LARL to load the pool register. The pool is
8970 located in the .rodata section, so we emit it after the function. */
8971 if (TARGET_CPU_ZARCH)
8973 rtx set = gen_main_base_64 (base_reg, pool->label);
8974 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8975 INSN_ADDRESSES_NEW (insn, -1);
8976 remove_insn (pool->pool_insn);
8978 insn = get_last_insn ();
8979 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8980 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8982 s390_dump_pool (pool, 0);
8985 /* On S/390, if the total size of the function's code plus literal pool
8986 does not exceed 4096 bytes, we use BASR to set up a function base
8987 pointer, and emit the literal pool at the end of the function. */
8988 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
8989 + pool->size + 8 /* alignment slop */ < 4096)
8991 rtx set = gen_main_base_31_small (base_reg, pool->label);
8992 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8993 INSN_ADDRESSES_NEW (insn, -1);
8994 remove_insn (pool->pool_insn);
8996 insn = emit_label_after (pool->label, insn);
8997 INSN_ADDRESSES_NEW (insn, -1);
8999 /* emit_pool_after will be set by s390_mainpool_start to the
9000 last insn of the section where the literal pool should be
9001 emitted. */
9002 insn = pool->emit_pool_after;
9004 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9005 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9007 s390_dump_pool (pool, 1);
9010 /* Otherwise, we emit an inline literal pool and use BASR to branch
9011 over it, setting up the pool register at the same time. */
9012 else
9014 rtx_code_label *pool_end = gen_label_rtx ();
9016 rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
9017 rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
9018 JUMP_LABEL (insn) = pool_end;
9019 INSN_ADDRESSES_NEW (insn, -1);
9020 remove_insn (pool->pool_insn);
9022 insn = emit_label_after (pool->label, insn);
9023 INSN_ADDRESSES_NEW (insn, -1);
9025 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9026 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9028 insn = emit_label_after (pool_end, pool->pool_insn);
9029 INSN_ADDRESSES_NEW (insn, -1);
9031 s390_dump_pool (pool, 1);
9035 /* Replace all literal pool references. */
9037 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
9039 if (INSN_P (insn))
9040 replace_ltrel_base (&PATTERN (insn));
9042 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9044 rtx addr, pool_ref = NULL_RTX;
9045 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9046 if (pool_ref)
9048 if (s390_execute_label (insn))
9049 addr = s390_find_execute (pool, insn);
9050 else
9051 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
9052 get_pool_mode (pool_ref));
9054 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
9055 INSN_CODE (insn) = -1;
9061 /* Free the pool. */
9062 s390_free_pool (pool);
9065 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9066 We have decided we cannot use this pool, so revert all changes
9067 to the current function that were done by s390_mainpool_start. */
9068 static void
9069 s390_mainpool_cancel (struct constant_pool *pool)
9071 /* We didn't actually change the instruction stream, so simply
9072 free the pool memory. */
9073 s390_free_pool (pool);
9077 /* Chunkify the literal pool. */
9079 #define S390_POOL_CHUNK_MIN 0xc00
9080 #define S390_POOL_CHUNK_MAX 0xe00
9082 static struct constant_pool *
9083 s390_chunkify_start (void)
9085 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
9086 int extra_size = 0;
9087 bitmap far_labels;
9088 rtx pending_ltrel = NULL_RTX;
9089 rtx_insn *insn;
9091 rtx (*gen_reload_base) (rtx, rtx) =
9092 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
9095 /* We need correct insn addresses. */
9097 shorten_branches (get_insns ());
9099 /* Scan all insns and move literals to pool chunks. */
9101 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9103 bool section_switch_p = false;
9105 /* Check for pending LTREL_BASE. */
9106 if (INSN_P (insn))
9108 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
9109 if (ltrel_base)
9111 gcc_assert (ltrel_base == pending_ltrel);
9112 pending_ltrel = NULL_RTX;
9116 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
9118 if (!curr_pool)
9119 curr_pool = s390_start_pool (&pool_list, insn);
9121 s390_add_execute (curr_pool, insn);
9122 s390_add_pool_insn (curr_pool, insn);
9124 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9126 rtx pool_ref = NULL_RTX;
9127 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9128 if (pool_ref)
9130 rtx constant = get_pool_constant (pool_ref);
9131 machine_mode mode = get_pool_mode (pool_ref);
9133 if (!curr_pool)
9134 curr_pool = s390_start_pool (&pool_list, insn);
9136 s390_add_constant (curr_pool, constant, mode);
9137 s390_add_pool_insn (curr_pool, insn);
9139 /* Don't split the pool chunk between a LTREL_OFFSET load
9140 and the corresponding LTREL_BASE. */
9141 if (GET_CODE (constant) == CONST
9142 && GET_CODE (XEXP (constant, 0)) == UNSPEC
9143 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
9145 gcc_assert (!pending_ltrel);
9146 pending_ltrel = pool_ref;
9151 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
9153 if (curr_pool)
9154 s390_add_pool_insn (curr_pool, insn);
9155 /* An LTREL_BASE must follow within the same basic block. */
9156 gcc_assert (!pending_ltrel);
9159 if (NOTE_P (insn))
9160 switch (NOTE_KIND (insn))
9162 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
9163 section_switch_p = true;
9164 break;
9165 case NOTE_INSN_VAR_LOCATION:
9166 case NOTE_INSN_CALL_ARG_LOCATION:
9167 continue;
9168 default:
9169 break;
9172 if (!curr_pool
9173 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
9174 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
9175 continue;
9177 if (TARGET_CPU_ZARCH)
9179 if (curr_pool->size < S390_POOL_CHUNK_MAX)
9180 continue;
9182 s390_end_pool (curr_pool, NULL);
9183 curr_pool = NULL;
9185 else
9187 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
9188 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
9189 + extra_size;
9191 /* We will later have to insert base register reload insns.
9192 Those will have an effect on code size, which we need to
9193 consider here. This calculation makes rather pessimistic
9194 worst-case assumptions. */
9195 if (LABEL_P (insn))
9196 extra_size += 6;
9198 if (chunk_size < S390_POOL_CHUNK_MIN
9199 && curr_pool->size < S390_POOL_CHUNK_MIN
9200 && !section_switch_p)
9201 continue;
9203 /* Pool chunks can only be inserted after BARRIERs ... */
9204 if (BARRIER_P (insn))
9206 s390_end_pool (curr_pool, insn);
9207 curr_pool = NULL;
9208 extra_size = 0;
9211 /* ... so if we don't find one in time, create one. */
9212 else if (chunk_size > S390_POOL_CHUNK_MAX
9213 || curr_pool->size > S390_POOL_CHUNK_MAX
9214 || section_switch_p)
9216 rtx_insn *label, *jump, *barrier, *next, *prev;
9218 if (!section_switch_p)
9220 /* We can insert the barrier only after a 'real' insn. */
9221 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
9222 continue;
9223 if (get_attr_length (insn) == 0)
9224 continue;
9225 /* Don't separate LTREL_BASE from the corresponding
9226 LTREL_OFFSET load. */
9227 if (pending_ltrel)
9228 continue;
9229 next = insn;
9232 insn = next;
9233 next = NEXT_INSN (insn);
9235 while (next
9236 && NOTE_P (next)
9237 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
9238 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
9240 else
9242 gcc_assert (!pending_ltrel);
9244 /* The old pool has to end before the section switch
9245 note in order to make it part of the current
9246 section. */
9247 insn = PREV_INSN (insn);
9250 label = gen_label_rtx ();
9251 prev = insn;
9252 if (prev && NOTE_P (prev))
9253 prev = prev_nonnote_insn (prev);
9254 if (prev)
9255 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
9256 INSN_LOCATION (prev));
9257 else
9258 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
9259 barrier = emit_barrier_after (jump);
9260 insn = emit_label_after (label, barrier);
9261 JUMP_LABEL (jump) = label;
9262 LABEL_NUSES (label) = 1;
9264 INSN_ADDRESSES_NEW (jump, -1);
9265 INSN_ADDRESSES_NEW (barrier, -1);
9266 INSN_ADDRESSES_NEW (insn, -1);
9268 s390_end_pool (curr_pool, barrier);
9269 curr_pool = NULL;
9270 extra_size = 0;
9275 if (curr_pool)
9276 s390_end_pool (curr_pool, NULL);
9277 gcc_assert (!pending_ltrel);
9279 /* Find all labels that are branched into
9280 from an insn belonging to a different chunk. */
9282 far_labels = BITMAP_ALLOC (NULL);
9284 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9286 rtx_jump_table_data *table;
9288 /* Labels marked with LABEL_PRESERVE_P can be target
9289 of non-local jumps, so we have to mark them.
9290 The same holds for named labels.
9292 Don't do that, however, if it is the label before
9293 a jump table. */
9295 if (LABEL_P (insn)
9296 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
9298 rtx_insn *vec_insn = NEXT_INSN (insn);
9299 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
9300 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
9302 /* Check potential targets in a table jump (casesi_jump). */
9303 else if (tablejump_p (insn, NULL, &table))
9305 rtx vec_pat = PATTERN (table);
9306 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
9308 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
9310 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
9312 if (s390_find_pool (pool_list, label)
9313 != s390_find_pool (pool_list, insn))
9314 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9317 /* If we have a direct jump (conditional or unconditional),
9318 check all potential targets. */
9319 else if (JUMP_P (insn))
9321 rtx pat = PATTERN (insn);
9323 if (GET_CODE (pat) == PARALLEL)
9324 pat = XVECEXP (pat, 0, 0);
9326 if (GET_CODE (pat) == SET)
9328 rtx label = JUMP_LABEL (insn);
9329 if (label && !ANY_RETURN_P (label))
9331 if (s390_find_pool (pool_list, label)
9332 != s390_find_pool (pool_list, insn))
9333 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9339 /* Insert base register reload insns before every pool. */
9341 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9343 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
9344 curr_pool->label);
9345 rtx_insn *insn = curr_pool->first_insn;
9346 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
9349 /* Insert base register reload insns at every far label. */
9351 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9352 if (LABEL_P (insn)
9353 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
9355 struct constant_pool *pool = s390_find_pool (pool_list, insn);
9356 if (pool)
9358 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
9359 pool->label);
9360 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9365 BITMAP_FREE (far_labels);
9368 /* Recompute insn addresses. */
9370 init_insn_lengths ();
9371 shorten_branches (get_insns ());
9373 return pool_list;
9376 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9377 After we have decided to use this list, finish implementing
9378 all changes to the current function as required. */
9380 static void
9381 s390_chunkify_finish (struct constant_pool *pool_list)
9383 struct constant_pool *curr_pool = NULL;
9384 rtx_insn *insn;
9387 /* Replace all literal pool references. */
9389 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9391 if (INSN_P (insn))
9392 replace_ltrel_base (&PATTERN (insn));
9394 curr_pool = s390_find_pool (pool_list, insn);
9395 if (!curr_pool)
9396 continue;
9398 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9400 rtx addr, pool_ref = NULL_RTX;
9401 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9402 if (pool_ref)
9404 if (s390_execute_label (insn))
9405 addr = s390_find_execute (curr_pool, insn);
9406 else
9407 addr = s390_find_constant (curr_pool,
9408 get_pool_constant (pool_ref),
9409 get_pool_mode (pool_ref));
9411 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
9412 INSN_CODE (insn) = -1;
9417 /* Dump out all literal pools. */
9419 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9420 s390_dump_pool (curr_pool, 0);
9422 /* Free pool list. */
9424 while (pool_list)
9426 struct constant_pool *next = pool_list->next;
9427 s390_free_pool (pool_list);
9428 pool_list = next;
9432 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9433 We have decided we cannot use this list, so revert all changes
9434 to the current function that were done by s390_chunkify_start. */
9436 static void
9437 s390_chunkify_cancel (struct constant_pool *pool_list)
9439 struct constant_pool *curr_pool = NULL;
9440 rtx_insn *insn;
9442 /* Remove all pool placeholder insns. */
9444 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9446 /* Did we insert an extra barrier? Remove it. */
9447 rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
9448 rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
9449 rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
9451 if (jump && JUMP_P (jump)
9452 && barrier && BARRIER_P (barrier)
9453 && label && LABEL_P (label)
9454 && GET_CODE (PATTERN (jump)) == SET
9455 && SET_DEST (PATTERN (jump)) == pc_rtx
9456 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
9457 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
9459 remove_insn (jump);
9460 remove_insn (barrier);
9461 remove_insn (label);
9464 remove_insn (curr_pool->pool_insn);
9467 /* Remove all base register reload insns. */
9469 for (insn = get_insns (); insn; )
9471 rtx_insn *next_insn = NEXT_INSN (insn);
9473 if (NONJUMP_INSN_P (insn)
9474 && GET_CODE (PATTERN (insn)) == SET
9475 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
9476 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
9477 remove_insn (insn);
9479 insn = next_insn;
9482 /* Free pool list. */
9484 while (pool_list)
9486 struct constant_pool *next = pool_list->next;
9487 s390_free_pool (pool_list);
9488 pool_list = next;
9492 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9494 void
9495 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9497 switch (GET_MODE_CLASS (mode))
9499 case MODE_FLOAT:
9500 case MODE_DECIMAL_FLOAT:
9501 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9503 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp),
9504 as_a <scalar_float_mode> (mode), align);
9505 break;
9507 case MODE_INT:
9508 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9509 mark_symbol_refs_as_used (exp);
9510 break;
9512 case MODE_VECTOR_INT:
9513 case MODE_VECTOR_FLOAT:
9515 int i;
9516 machine_mode inner_mode;
9517 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9519 inner_mode = GET_MODE_INNER (GET_MODE (exp));
9520 for (i = 0; i < XVECLEN (exp, 0); i++)
9521 s390_output_pool_entry (XVECEXP (exp, 0, i),
9522 inner_mode,
9523 i == 0
9524 ? align
9525 : GET_MODE_BITSIZE (inner_mode));
9527 break;
9529 default:
9530 gcc_unreachable ();
9535 /* Return an RTL expression representing the value of the return address
9536 for the frame COUNT steps up from the current frame. FRAME is the
9537 frame pointer of that frame. */
9540 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9542 int offset;
9543 rtx addr;
9545 /* Without backchain, we fail for all but the current frame. */
9547 if (!TARGET_BACKCHAIN && count > 0)
9548 return NULL_RTX;
9550 /* For the current frame, we need to make sure the initial
9551 value of RETURN_REGNUM is actually saved. */
9553 if (count == 0)
9555 /* On non-z architectures branch splitting could overwrite r14. */
9556 if (TARGET_CPU_ZARCH)
9557 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9558 else
9560 cfun_frame_layout.save_return_addr_p = true;
9561 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
9565 if (TARGET_PACKED_STACK)
9566 offset = -2 * UNITS_PER_LONG;
9567 else
9568 offset = RETURN_REGNUM * UNITS_PER_LONG;
9570 addr = plus_constant (Pmode, frame, offset);
9571 addr = memory_address (Pmode, addr);
9572 return gen_rtx_MEM (Pmode, addr);
9575 /* Return an RTL expression representing the back chain stored in
9576 the current stack frame. */
9579 s390_back_chain_rtx (void)
9581 rtx chain;
9583 gcc_assert (TARGET_BACKCHAIN);
9585 if (TARGET_PACKED_STACK)
9586 chain = plus_constant (Pmode, stack_pointer_rtx,
9587 STACK_POINTER_OFFSET - UNITS_PER_LONG);
9588 else
9589 chain = stack_pointer_rtx;
9591 chain = gen_rtx_MEM (Pmode, chain);
9592 return chain;
9595 /* Find first call clobbered register unused in a function.
9596 This could be used as base register in a leaf function
9597 or for holding the return address before epilogue. */
9599 static int
9600 find_unused_clobbered_reg (void)
9602 int i;
9603 for (i = 0; i < 6; i++)
9604 if (!df_regs_ever_live_p (i))
9605 return i;
9606 return 0;
9610 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9611 clobbered hard regs in SETREG. */
9613 static void
9614 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9616 char *regs_ever_clobbered = (char *)data;
9617 unsigned int i, regno;
9618 machine_mode mode = GET_MODE (setreg);
9620 if (GET_CODE (setreg) == SUBREG)
9622 rtx inner = SUBREG_REG (setreg);
9623 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9624 return;
9625 regno = subreg_regno (setreg);
9627 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9628 regno = REGNO (setreg);
9629 else
9630 return;
9632 for (i = regno;
9633 i < regno + HARD_REGNO_NREGS (regno, mode);
9634 i++)
9635 regs_ever_clobbered[i] = 1;
9638 /* Walks through all basic blocks of the current function looking
9639 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9640 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9641 each of those regs. */
9643 static void
9644 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9646 basic_block cur_bb;
9647 rtx_insn *cur_insn;
9648 unsigned int i;
9650 memset (regs_ever_clobbered, 0, 32);
9652 /* For non-leaf functions we have to consider all call clobbered regs to be
9653 clobbered. */
9654 if (!crtl->is_leaf)
9656 for (i = 0; i < 32; i++)
9657 regs_ever_clobbered[i] = call_really_used_regs[i];
9660 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9661 this work is done by liveness analysis (mark_regs_live_at_end).
9662 Special care is needed for functions containing landing pads. Landing pads
9663 may use the eh registers, but the code which sets these registers is not
9664 contained in that function. Hence s390_regs_ever_clobbered is not able to
9665 deal with this automatically. */
9666 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9667 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9668 if (crtl->calls_eh_return
9669 || (cfun->machine->has_landing_pad_p
9670 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9671 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9673 /* For nonlocal gotos all call-saved registers have to be saved.
9674 This flag is also set for the unwinding code in libgcc.
9675 See expand_builtin_unwind_init. For regs_ever_live this is done by
9676 reload. */
9677 if (crtl->saves_all_registers)
9678 for (i = 0; i < 32; i++)
9679 if (!call_really_used_regs[i])
9680 regs_ever_clobbered[i] = 1;
9682 FOR_EACH_BB_FN (cur_bb, cfun)
9684 FOR_BB_INSNS (cur_bb, cur_insn)
9686 rtx pat;
9688 if (!INSN_P (cur_insn))
9689 continue;
9691 pat = PATTERN (cur_insn);
9693 /* Ignore GPR restore insns. */
9694 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9696 if (GET_CODE (pat) == SET
9697 && GENERAL_REG_P (SET_DEST (pat)))
9699 /* lgdr */
9700 if (GET_MODE (SET_SRC (pat)) == DImode
9701 && FP_REG_P (SET_SRC (pat)))
9702 continue;
9704 /* l / lg */
9705 if (GET_CODE (SET_SRC (pat)) == MEM)
9706 continue;
9709 /* lm / lmg */
9710 if (GET_CODE (pat) == PARALLEL
9711 && load_multiple_operation (pat, VOIDmode))
9712 continue;
9715 note_stores (pat,
9716 s390_reg_clobbered_rtx,
9717 regs_ever_clobbered);
9722 /* Determine the frame area which actually has to be accessed
9723 in the function epilogue. The values are stored at the
9724 given pointers AREA_BOTTOM (address of the lowest used stack
9725 address) and AREA_TOP (address of the first item which does
9726 not belong to the stack frame). */
9728 static void
9729 s390_frame_area (int *area_bottom, int *area_top)
9731 int b, t;
9733 b = INT_MAX;
9734 t = INT_MIN;
9736 if (cfun_frame_layout.first_restore_gpr != -1)
9738 b = (cfun_frame_layout.gprs_offset
9739 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9740 t = b + (cfun_frame_layout.last_restore_gpr
9741 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9744 if (TARGET_64BIT && cfun_save_high_fprs_p)
9746 b = MIN (b, cfun_frame_layout.f8_offset);
9747 t = MAX (t, (cfun_frame_layout.f8_offset
9748 + cfun_frame_layout.high_fprs * 8));
9751 if (!TARGET_64BIT)
9753 if (cfun_fpr_save_p (FPR4_REGNUM))
9755 b = MIN (b, cfun_frame_layout.f4_offset);
9756 t = MAX (t, cfun_frame_layout.f4_offset + 8);
9758 if (cfun_fpr_save_p (FPR6_REGNUM))
9760 b = MIN (b, cfun_frame_layout.f4_offset + 8);
9761 t = MAX (t, cfun_frame_layout.f4_offset + 16);
9764 *area_bottom = b;
9765 *area_top = t;
9767 /* Update gpr_save_slots in the frame layout trying to make use of
9768 FPRs as GPR save slots.
9769 This is a helper routine of s390_register_info. */
9771 static void
9772 s390_register_info_gprtofpr ()
9774 int save_reg_slot = FPR0_REGNUM;
9775 int i, j;
9777 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9778 return;
9780 /* builtin_eh_return needs to be able to modify the return address
9781 on the stack. It could also adjust the FPR save slot instead but
9782 is it worth the trouble?! */
9783 if (crtl->calls_eh_return)
9784 return;
9786 for (i = 15; i >= 6; i--)
9788 if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9789 continue;
9791 /* Advance to the next FP register which can be used as a
9792 GPR save slot. */
9793 while ((!call_really_used_regs[save_reg_slot]
9794 || df_regs_ever_live_p (save_reg_slot)
9795 || cfun_fpr_save_p (save_reg_slot))
9796 && FP_REGNO_P (save_reg_slot))
9797 save_reg_slot++;
9798 if (!FP_REGNO_P (save_reg_slot))
9800 /* We only want to use ldgr/lgdr if we can get rid of
9801 stm/lm entirely. So undo the gpr slot allocation in
9802 case we ran out of FPR save slots. */
9803 for (j = 6; j <= 15; j++)
9804 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9805 cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9806 break;
9808 cfun_gpr_save_slot (i) = save_reg_slot++;
9812 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9813 stdarg.
9814 This is a helper routine for s390_register_info. */
9816 static void
9817 s390_register_info_stdarg_fpr ()
9819 int i;
9820 int min_fpr;
9821 int max_fpr;
9823 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9824 f0-f4 for 64 bit. */
9825 if (!cfun->stdarg
9826 || !TARGET_HARD_FLOAT
9827 || !cfun->va_list_fpr_size
9828 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9829 return;
9831 min_fpr = crtl->args.info.fprs;
9832 max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9833 if (max_fpr >= FP_ARG_NUM_REG)
9834 max_fpr = FP_ARG_NUM_REG - 1;
9836 /* FPR argument regs start at f0. */
9837 min_fpr += FPR0_REGNUM;
9838 max_fpr += FPR0_REGNUM;
9840 for (i = min_fpr; i <= max_fpr; i++)
9841 cfun_set_fpr_save (i);
9844 /* Reserve the GPR save slots for GPRs which need to be saved due to
9845 stdarg.
9846 This is a helper routine for s390_register_info. */
9848 static void
9849 s390_register_info_stdarg_gpr ()
9851 int i;
9852 int min_gpr;
9853 int max_gpr;
9855 if (!cfun->stdarg
9856 || !cfun->va_list_gpr_size
9857 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9858 return;
9860 min_gpr = crtl->args.info.gprs;
9861 max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9862 if (max_gpr >= GP_ARG_NUM_REG)
9863 max_gpr = GP_ARG_NUM_REG - 1;
9865 /* GPR argument regs start at r2. */
9866 min_gpr += GPR2_REGNUM;
9867 max_gpr += GPR2_REGNUM;
9869 /* If r6 was supposed to be saved into an FPR and now needs to go to
9870 the stack for vararg we have to adjust the restore range to make
9871 sure that the restore is done from stack as well. */
9872 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9873 && min_gpr <= GPR6_REGNUM
9874 && max_gpr >= GPR6_REGNUM)
9876 if (cfun_frame_layout.first_restore_gpr == -1
9877 || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9878 cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9879 if (cfun_frame_layout.last_restore_gpr == -1
9880 || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9881 cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9884 if (cfun_frame_layout.first_save_gpr == -1
9885 || cfun_frame_layout.first_save_gpr > min_gpr)
9886 cfun_frame_layout.first_save_gpr = min_gpr;
9888 if (cfun_frame_layout.last_save_gpr == -1
9889 || cfun_frame_layout.last_save_gpr < max_gpr)
9890 cfun_frame_layout.last_save_gpr = max_gpr;
9892 for (i = min_gpr; i <= max_gpr; i++)
9893 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9896 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9897 prologue and epilogue. */
9899 static void
9900 s390_register_info_set_ranges ()
9902 int i, j;
9904 /* Find the first and the last save slot supposed to use the stack
9905 to set the restore range.
9906 Vararg regs might be marked as save to stack but only the
9907 call-saved regs really need restoring (i.e. r6). This code
9908 assumes that the vararg regs have not yet been recorded in
9909 cfun_gpr_save_slot. */
9910 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
9911 for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
9912 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9913 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9914 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9915 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9918 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9919 for registers which need to be saved in function prologue.
9920 This function can be used until the insns emitted for save/restore
9921 of the regs are visible in the RTL stream. */
9923 static void
9924 s390_register_info ()
9926 int i;
9927 char clobbered_regs[32];
9929 gcc_assert (!epilogue_completed);
9931 if (reload_completed)
9932 /* After reload we rely on our own routine to determine which
9933 registers need saving. */
9934 s390_regs_ever_clobbered (clobbered_regs);
9935 else
9936 /* During reload we use regs_ever_live as a base since reload
9937 does changes in there which we otherwise would not be aware
9938 of. */
9939 for (i = 0; i < 32; i++)
9940 clobbered_regs[i] = df_regs_ever_live_p (i);
9942 for (i = 0; i < 32; i++)
9943 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9945 /* Mark the call-saved FPRs which need to be saved.
9946 This needs to be done before checking the special GPRs since the
9947 stack pointer usage depends on whether high FPRs have to be saved
9948 or not. */
9949 cfun_frame_layout.fpr_bitmap = 0;
9950 cfun_frame_layout.high_fprs = 0;
9951 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9952 if (clobbered_regs[i] && !call_really_used_regs[i])
9954 cfun_set_fpr_save (i);
9955 if (i >= FPR8_REGNUM)
9956 cfun_frame_layout.high_fprs++;
9959 /* Register 12 is used for GOT address, but also as temp in prologue
9960 for split-stack stdarg functions (unless r14 is available). */
9961 clobbered_regs[12]
9962 |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
9963 || (flag_split_stack && cfun->stdarg
9964 && (crtl->is_leaf || TARGET_TPF_PROFILING
9965 || has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
9967 clobbered_regs[BASE_REGNUM]
9968 |= (cfun->machine->base_reg
9969 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9971 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9972 |= !!frame_pointer_needed;
9974 /* On pre z900 machines this might take until machine dependent
9975 reorg to decide.
9976 save_return_addr_p will only be set on non-zarch machines so
9977 there is no risk that r14 goes into an FPR instead of a stack
9978 slot. */
9979 clobbered_regs[RETURN_REGNUM]
9980 |= (!crtl->is_leaf
9981 || TARGET_TPF_PROFILING
9982 || cfun->machine->split_branches_pending_p
9983 || cfun_frame_layout.save_return_addr_p
9984 || crtl->calls_eh_return);
9986 clobbered_regs[STACK_POINTER_REGNUM]
9987 |= (!crtl->is_leaf
9988 || TARGET_TPF_PROFILING
9989 || cfun_save_high_fprs_p
9990 || get_frame_size () > 0
9991 || (reload_completed && cfun_frame_layout.frame_size > 0)
9992 || cfun->calls_alloca);
9994 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
9996 for (i = 6; i < 16; i++)
9997 if (clobbered_regs[i])
9998 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
10000 s390_register_info_stdarg_fpr ();
10001 s390_register_info_gprtofpr ();
10002 s390_register_info_set_ranges ();
10003 /* stdarg functions might need to save GPRs 2 to 6. This might
10004 override the GPR->FPR save decision made by
10005 s390_register_info_gprtofpr for r6 since vararg regs must go to
10006 the stack. */
10007 s390_register_info_stdarg_gpr ();
10010 /* This function is called by s390_optimize_prologue in order to get
10011 rid of unnecessary GPR save/restore instructions. The register info
10012 for the GPRs is re-computed and the ranges are re-calculated. */
10014 static void
10015 s390_optimize_register_info ()
10017 char clobbered_regs[32];
10018 int i;
10020 gcc_assert (epilogue_completed);
10021 gcc_assert (!cfun->machine->split_branches_pending_p);
10023 s390_regs_ever_clobbered (clobbered_regs);
10025 for (i = 0; i < 32; i++)
10026 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
10028 /* There is still special treatment needed for cases invisible to
10029 s390_regs_ever_clobbered. */
10030 clobbered_regs[RETURN_REGNUM]
10031 |= (TARGET_TPF_PROFILING
10032 /* When expanding builtin_return_addr in ESA mode we do not
10033 know whether r14 will later be needed as scratch reg when
10034 doing branch splitting. So the builtin always accesses the
10035 r14 save slot and we need to stick to the save/restore
10036 decision for r14 even if it turns out that it didn't get
10037 clobbered. */
10038 || cfun_frame_layout.save_return_addr_p
10039 || crtl->calls_eh_return);
10041 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
10043 for (i = 6; i < 16; i++)
10044 if (!clobbered_regs[i])
10045 cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
10047 s390_register_info_set_ranges ();
10048 s390_register_info_stdarg_gpr ();
10051 /* Fill cfun->machine with info about frame of current function. */
10053 static void
10054 s390_frame_info (void)
10056 HOST_WIDE_INT lowest_offset;
10058 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
10059 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
10061 /* The va_arg builtin uses a constant distance of 16 *
10062 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
10063 pointer. So even if we are going to save the stack pointer in an
10064 FPR we need the stack space in order to keep the offsets
10065 correct. */
10066 if (cfun->stdarg && cfun_save_arg_fprs_p)
10068 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
10070 if (cfun_frame_layout.first_save_gpr_slot == -1)
10071 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
10074 cfun_frame_layout.frame_size = get_frame_size ();
10075 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
10076 fatal_error (input_location,
10077 "total size of local variables exceeds architecture limit");
10079 if (!TARGET_PACKED_STACK)
10081 /* Fixed stack layout. */
10082 cfun_frame_layout.backchain_offset = 0;
10083 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
10084 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
10085 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
10086 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
10087 * UNITS_PER_LONG);
10089 else if (TARGET_BACKCHAIN)
10091 /* Kernel stack layout - packed stack, backchain, no float */
10092 gcc_assert (TARGET_SOFT_FLOAT);
10093 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
10094 - UNITS_PER_LONG);
10096 /* The distance between the backchain and the return address
10097 save slot must not change. So we always need a slot for the
10098 stack pointer which resides in between. */
10099 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
10101 cfun_frame_layout.gprs_offset
10102 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
10104 /* FPRs will not be saved. Nevertheless pick sane values to
10105 keep area calculations valid. */
10106 cfun_frame_layout.f0_offset =
10107 cfun_frame_layout.f4_offset =
10108 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
10110 else
10112 int num_fprs;
10114 /* Packed stack layout without backchain. */
10116 /* With stdarg FPRs need their dedicated slots. */
10117 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
10118 : (cfun_fpr_save_p (FPR4_REGNUM) +
10119 cfun_fpr_save_p (FPR6_REGNUM)));
10120 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
10122 num_fprs = (cfun->stdarg ? 2
10123 : (cfun_fpr_save_p (FPR0_REGNUM)
10124 + cfun_fpr_save_p (FPR2_REGNUM)));
10125 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
10127 cfun_frame_layout.gprs_offset
10128 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
10130 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
10131 - cfun_frame_layout.high_fprs * 8);
10134 if (cfun_save_high_fprs_p)
10135 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
10137 if (!crtl->is_leaf)
10138 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
10140 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
10141 sized area at the bottom of the stack. This is required also for
10142 leaf functions. When GCC generates a local stack reference it
10143 will always add STACK_POINTER_OFFSET to all these references. */
10144 if (crtl->is_leaf
10145 && !TARGET_TPF_PROFILING
10146 && cfun_frame_layout.frame_size == 0
10147 && !cfun->calls_alloca)
10148 return;
10150 /* Calculate the number of bytes we have used in our own register
10151 save area. With the packed stack layout we can re-use the
10152 remaining bytes for normal stack elements. */
10154 if (TARGET_PACKED_STACK)
10155 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
10156 cfun_frame_layout.f4_offset),
10157 cfun_frame_layout.gprs_offset);
10158 else
10159 lowest_offset = 0;
10161 if (TARGET_BACKCHAIN)
10162 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
10164 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
10166 /* If under 31 bit an odd number of gprs has to be saved we have to
10167 adjust the frame size to sustain 8 byte alignment of stack
10168 frames. */
10169 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
10170 STACK_BOUNDARY / BITS_PER_UNIT - 1)
10171 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
10174 /* Generate frame layout. Fills in register and frame data for the current
10175 function in cfun->machine. This routine can be called multiple times;
10176 it will re-do the complete frame layout every time. */
10178 static void
10179 s390_init_frame_layout (void)
10181 HOST_WIDE_INT frame_size;
10182 int base_used;
10184 /* After LRA the frame layout is supposed to be read-only and should
10185 not be re-computed. */
10186 if (reload_completed)
10187 return;
10189 /* On S/390 machines, we may need to perform branch splitting, which
10190 will require both base and return address register. We have no
10191 choice but to assume we're going to need them until right at the
10192 end of the machine dependent reorg phase. */
10193 if (!TARGET_CPU_ZARCH)
10194 cfun->machine->split_branches_pending_p = true;
10198 frame_size = cfun_frame_layout.frame_size;
10200 /* Try to predict whether we'll need the base register. */
10201 base_used = cfun->machine->split_branches_pending_p
10202 || crtl->uses_const_pool
10203 || (!DISP_IN_RANGE (frame_size)
10204 && !CONST_OK_FOR_K (frame_size));
10206 /* Decide which register to use as literal pool base. In small
10207 leaf functions, try to use an unused call-clobbered register
10208 as base register to avoid save/restore overhead. */
10209 if (!base_used)
10210 cfun->machine->base_reg = NULL_RTX;
10211 else
10213 int br = 0;
10215 if (crtl->is_leaf)
10216 /* Prefer r5 (most likely to be free). */
10217 for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
10219 cfun->machine->base_reg =
10220 gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
10223 s390_register_info ();
10224 s390_frame_info ();
10226 while (frame_size != cfun_frame_layout.frame_size);
10229 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
10230 the TX is nonescaping. A transaction is considered escaping if
10231 there is at least one path from tbegin returning CC0 to the
10232 function exit block without an tend.
10234 The check so far has some limitations:
10235 - only single tbegin/tend BBs are supported
10236 - the first cond jump after tbegin must separate the CC0 path from ~CC0
10237 - when CC is copied to a GPR and the CC0 check is done with the GPR
10238 this is not supported
10241 static void
10242 s390_optimize_nonescaping_tx (void)
10244 const unsigned int CC0 = 1 << 3;
10245 basic_block tbegin_bb = NULL;
10246 basic_block tend_bb = NULL;
10247 basic_block bb;
10248 rtx_insn *insn;
10249 bool result = true;
10250 int bb_index;
10251 rtx_insn *tbegin_insn = NULL;
10253 if (!cfun->machine->tbegin_p)
10254 return;
10256 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
10258 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
10260 if (!bb)
10261 continue;
10263 FOR_BB_INSNS (bb, insn)
10265 rtx ite, cc, pat, target;
10266 unsigned HOST_WIDE_INT mask;
10268 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
10269 continue;
10271 pat = PATTERN (insn);
10273 if (GET_CODE (pat) == PARALLEL)
10274 pat = XVECEXP (pat, 0, 0);
10276 if (GET_CODE (pat) != SET
10277 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
10278 continue;
10280 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
10282 rtx_insn *tmp;
10284 tbegin_insn = insn;
10286 /* Just return if the tbegin doesn't have clobbers. */
10287 if (GET_CODE (PATTERN (insn)) != PARALLEL)
10288 return;
10290 if (tbegin_bb != NULL)
10291 return;
10293 /* Find the next conditional jump. */
10294 for (tmp = NEXT_INSN (insn);
10295 tmp != NULL_RTX;
10296 tmp = NEXT_INSN (tmp))
10298 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
10299 return;
10300 if (!JUMP_P (tmp))
10301 continue;
10303 ite = SET_SRC (PATTERN (tmp));
10304 if (GET_CODE (ite) != IF_THEN_ELSE)
10305 continue;
10307 cc = XEXP (XEXP (ite, 0), 0);
10308 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
10309 || GET_MODE (cc) != CCRAWmode
10310 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
10311 return;
10313 if (bb->succs->length () != 2)
10314 return;
10316 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
10317 if (GET_CODE (XEXP (ite, 0)) == NE)
10318 mask ^= 0xf;
10320 if (mask == CC0)
10321 target = XEXP (ite, 1);
10322 else if (mask == (CC0 ^ 0xf))
10323 target = XEXP (ite, 2);
10324 else
10325 return;
10328 edge_iterator ei;
10329 edge e1, e2;
10331 ei = ei_start (bb->succs);
10332 e1 = ei_safe_edge (ei);
10333 ei_next (&ei);
10334 e2 = ei_safe_edge (ei);
10336 if (e2->flags & EDGE_FALLTHRU)
10338 e2 = e1;
10339 e1 = ei_safe_edge (ei);
10342 if (!(e1->flags & EDGE_FALLTHRU))
10343 return;
10345 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
10347 if (tmp == BB_END (bb))
10348 break;
10352 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
10354 if (tend_bb != NULL)
10355 return;
10356 tend_bb = bb;
10361 /* Either we successfully remove the FPR clobbers here or we are not
10362 able to do anything for this TX. Both cases don't qualify for
10363 another look. */
10364 cfun->machine->tbegin_p = false;
10366 if (tbegin_bb == NULL || tend_bb == NULL)
10367 return;
10369 calculate_dominance_info (CDI_POST_DOMINATORS);
10370 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
10371 free_dominance_info (CDI_POST_DOMINATORS);
10373 if (!result)
10374 return;
10376 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10377 gen_rtvec (2,
10378 XVECEXP (PATTERN (tbegin_insn), 0, 0),
10379 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10380 INSN_CODE (tbegin_insn) = -1;
10381 df_insn_rescan (tbegin_insn);
10383 return;
10386 /* Implement TARGET_HARD_REGNO_MODE_OK. */
10388 static bool
10389 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10391 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10392 return false;
10394 switch (REGNO_REG_CLASS (regno))
10396 case VEC_REGS:
10397 return ((GET_MODE_CLASS (mode) == MODE_INT
10398 && s390_class_max_nregs (VEC_REGS, mode) == 1)
10399 || mode == DFmode
10400 || (TARGET_VXE && mode == SFmode)
10401 || s390_vector_mode_supported_p (mode));
10402 break;
10403 case FP_REGS:
10404 if (TARGET_VX
10405 && ((GET_MODE_CLASS (mode) == MODE_INT
10406 && s390_class_max_nregs (FP_REGS, mode) == 1)
10407 || mode == DFmode
10408 || s390_vector_mode_supported_p (mode)))
10409 return true;
10411 if (REGNO_PAIR_OK (regno, mode))
10413 if (mode == SImode || mode == DImode)
10414 return true;
10416 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10417 return true;
10419 break;
10420 case ADDR_REGS:
10421 if (FRAME_REGNO_P (regno) && mode == Pmode)
10422 return true;
10424 /* fallthrough */
10425 case GENERAL_REGS:
10426 if (REGNO_PAIR_OK (regno, mode))
10428 if (TARGET_ZARCH
10429 || (mode != TFmode && mode != TCmode && mode != TDmode))
10430 return true;
10432 break;
10433 case CC_REGS:
10434 if (GET_MODE_CLASS (mode) == MODE_CC)
10435 return true;
10436 break;
10437 case ACCESS_REGS:
10438 if (REGNO_PAIR_OK (regno, mode))
10440 if (mode == SImode || mode == Pmode)
10441 return true;
10443 break;
10444 default:
10445 return false;
10448 return false;
10451 /* Implement TARGET_MODES_TIEABLE_P. */
10453 static bool
10454 s390_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10456 return ((mode1 == SFmode || mode1 == DFmode)
10457 == (mode2 == SFmode || mode2 == DFmode));
10460 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10462 bool
10463 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10465 /* Once we've decided upon a register to use as base register, it must
10466 no longer be used for any other purpose. */
10467 if (cfun->machine->base_reg)
10468 if (REGNO (cfun->machine->base_reg) == old_reg
10469 || REGNO (cfun->machine->base_reg) == new_reg)
10470 return false;
10472 /* Prevent regrename from using call-saved regs which haven't
10473 actually been saved. This is necessary since regrename assumes
10474 the backend save/restore decisions are based on
10475 df_regs_ever_live. Since we have our own routine we have to tell
10476 regrename manually about it. */
10477 if (GENERAL_REGNO_P (new_reg)
10478 && !call_really_used_regs[new_reg]
10479 && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10480 return false;
10482 return true;
10485 /* Return nonzero if register REGNO can be used as a scratch register
10486 in peephole2. */
10488 static bool
10489 s390_hard_regno_scratch_ok (unsigned int regno)
10491 /* See s390_hard_regno_rename_ok. */
10492 if (GENERAL_REGNO_P (regno)
10493 && !call_really_used_regs[regno]
10494 && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10495 return false;
10497 return true;
10500 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. When generating
10501 code that runs in z/Architecture mode, but conforms to the 31-bit
10502 ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
10503 bytes are saved across calls, however. */
10505 static bool
10506 s390_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
10508 if (!TARGET_64BIT
10509 && TARGET_ZARCH
10510 && GET_MODE_SIZE (mode) > 4
10511 && ((regno >= 6 && regno <= 15) || regno == 32))
10512 return true;
10514 if (TARGET_VX
10515 && GET_MODE_SIZE (mode) > 8
10516 && (((TARGET_64BIT && regno >= 24 && regno <= 31))
10517 || (!TARGET_64BIT && (regno == 18 || regno == 19))))
10518 return true;
10520 return false;
10523 /* Maximum number of registers to represent a value of mode MODE
10524 in a register of class RCLASS. */
10527 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10529 int reg_size;
10530 bool reg_pair_required_p = false;
10532 switch (rclass)
10534 case FP_REGS:
10535 case VEC_REGS:
10536 reg_size = TARGET_VX ? 16 : 8;
10538 /* TF and TD modes would fit into a VR but we put them into a
10539 register pair since we do not have 128bit FP instructions on
10540 full VRs. */
10541 if (TARGET_VX
10542 && SCALAR_FLOAT_MODE_P (mode)
10543 && GET_MODE_SIZE (mode) >= 16)
10544 reg_pair_required_p = true;
10546 /* Even if complex types would fit into a single FPR/VR we force
10547 them into a register pair to deal with the parts more easily.
10548 (FIXME: What about complex ints?) */
10549 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10550 reg_pair_required_p = true;
10551 break;
10552 case ACCESS_REGS:
10553 reg_size = 4;
10554 break;
10555 default:
10556 reg_size = UNITS_PER_WORD;
10557 break;
10560 if (reg_pair_required_p)
10561 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10563 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10566 /* Return TRUE if changing mode from FROM to TO should not be allowed
10567 for register class CLASS. */
10570 s390_cannot_change_mode_class (machine_mode from_mode,
10571 machine_mode to_mode,
10572 enum reg_class rclass)
10574 machine_mode small_mode;
10575 machine_mode big_mode;
10577 /* V1TF and TF have different representations in vector
10578 registers. */
10579 if (reg_classes_intersect_p (VEC_REGS, rclass)
10580 && ((from_mode == V1TFmode && to_mode == TFmode)
10581 || (from_mode == TFmode && to_mode == V1TFmode)))
10582 return 1;
10584 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10585 return 0;
10587 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10589 small_mode = from_mode;
10590 big_mode = to_mode;
10592 else
10594 small_mode = to_mode;
10595 big_mode = from_mode;
10598 /* Values residing in VRs are little-endian style. All modes are
10599 placed left-aligned in an VR. This means that we cannot allow
10600 switching between modes with differing sizes. Also if the vector
10601 facility is available we still place TFmode values in VR register
10602 pairs, since the only instructions we have operating on TFmodes
10603 only deal with register pairs. Therefore we have to allow DFmode
10604 subregs of TFmodes to enable the TFmode splitters. */
10605 if (reg_classes_intersect_p (VEC_REGS, rclass)
10606 && (GET_MODE_SIZE (small_mode) < 8
10607 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10608 return 1;
10610 /* Likewise for access registers, since they have only half the
10611 word size on 64-bit. */
10612 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10613 return 1;
10615 return 0;
10618 /* Return true if we use LRA instead of reload pass. */
10619 static bool
10620 s390_lra_p (void)
10622 return s390_lra_flag;
10625 /* Return true if register FROM can be eliminated via register TO. */
10627 static bool
10628 s390_can_eliminate (const int from, const int to)
10630 /* On zSeries machines, we have not marked the base register as fixed.
10631 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10632 If a function requires the base register, we say here that this
10633 elimination cannot be performed. This will cause reload to free
10634 up the base register (as if it were fixed). On the other hand,
10635 if the current function does *not* require the base register, we
10636 say here the elimination succeeds, which in turn allows reload
10637 to allocate the base register for any other purpose. */
10638 if (from == BASE_REGNUM && to == BASE_REGNUM)
10640 if (TARGET_CPU_ZARCH)
10642 s390_init_frame_layout ();
10643 return cfun->machine->base_reg == NULL_RTX;
10646 return false;
10649 /* Everything else must point into the stack frame. */
10650 gcc_assert (to == STACK_POINTER_REGNUM
10651 || to == HARD_FRAME_POINTER_REGNUM);
10653 gcc_assert (from == FRAME_POINTER_REGNUM
10654 || from == ARG_POINTER_REGNUM
10655 || from == RETURN_ADDRESS_POINTER_REGNUM);
10657 /* Make sure we actually saved the return address. */
10658 if (from == RETURN_ADDRESS_POINTER_REGNUM)
10659 if (!crtl->calls_eh_return
10660 && !cfun->stdarg
10661 && !cfun_frame_layout.save_return_addr_p)
10662 return false;
10664 return true;
10667 /* Return offset between register FROM and TO initially after prolog. */
10669 HOST_WIDE_INT
10670 s390_initial_elimination_offset (int from, int to)
10672 HOST_WIDE_INT offset;
10674 /* ??? Why are we called for non-eliminable pairs? */
10675 if (!s390_can_eliminate (from, to))
10676 return 0;
10678 switch (from)
10680 case FRAME_POINTER_REGNUM:
10681 offset = (get_frame_size()
10682 + STACK_POINTER_OFFSET
10683 + crtl->outgoing_args_size);
10684 break;
10686 case ARG_POINTER_REGNUM:
10687 s390_init_frame_layout ();
10688 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10689 break;
10691 case RETURN_ADDRESS_POINTER_REGNUM:
10692 s390_init_frame_layout ();
10694 if (cfun_frame_layout.first_save_gpr_slot == -1)
10696 /* If it turns out that for stdarg nothing went into the reg
10697 save area we also do not need the return address
10698 pointer. */
10699 if (cfun->stdarg && !cfun_save_arg_fprs_p)
10700 return 0;
10702 gcc_unreachable ();
10705 /* In order to make the following work it is not necessary for
10706 r14 to have a save slot. It is sufficient if one other GPR
10707 got one. Since the GPRs are always stored without gaps we
10708 are able to calculate where the r14 save slot would
10709 reside. */
10710 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10711 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10712 UNITS_PER_LONG);
10713 break;
10715 case BASE_REGNUM:
10716 offset = 0;
10717 break;
10719 default:
10720 gcc_unreachable ();
10723 return offset;
10726 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10727 to register BASE. Return generated insn. */
10729 static rtx
10730 save_fpr (rtx base, int offset, int regnum)
10732 rtx addr;
10733 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10735 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10736 set_mem_alias_set (addr, get_varargs_alias_set ());
10737 else
10738 set_mem_alias_set (addr, get_frame_alias_set ());
10740 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10743 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10744 to register BASE. Return generated insn. */
10746 static rtx
10747 restore_fpr (rtx base, int offset, int regnum)
10749 rtx addr;
10750 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10751 set_mem_alias_set (addr, get_frame_alias_set ());
10753 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10756 /* Return true if REGNO is a global register, but not one
10757 of the special ones that need to be saved/restored in anyway. */
10759 static inline bool
10760 global_not_special_regno_p (int regno)
10762 return (global_regs[regno]
10763 /* These registers are special and need to be
10764 restored in any case. */
10765 && !(regno == STACK_POINTER_REGNUM
10766 || regno == RETURN_REGNUM
10767 || regno == BASE_REGNUM
10768 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
10771 /* Generate insn to save registers FIRST to LAST into
10772 the register save area located at offset OFFSET
10773 relative to register BASE. */
10775 static rtx
10776 save_gprs (rtx base, int offset, int first, int last)
10778 rtx addr, insn, note;
10779 int i;
10781 addr = plus_constant (Pmode, base, offset);
10782 addr = gen_rtx_MEM (Pmode, addr);
10784 set_mem_alias_set (addr, get_frame_alias_set ());
10786 /* Special-case single register. */
10787 if (first == last)
10789 if (TARGET_64BIT)
10790 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10791 else
10792 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10794 if (!global_not_special_regno_p (first))
10795 RTX_FRAME_RELATED_P (insn) = 1;
10796 return insn;
10800 insn = gen_store_multiple (addr,
10801 gen_rtx_REG (Pmode, first),
10802 GEN_INT (last - first + 1));
10804 if (first <= 6 && cfun->stdarg)
10805 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10807 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10809 if (first + i <= 6)
10810 set_mem_alias_set (mem, get_varargs_alias_set ());
10813 /* We need to set the FRAME_RELATED flag on all SETs
10814 inside the store-multiple pattern.
10816 However, we must not emit DWARF records for registers 2..5
10817 if they are stored for use by variable arguments ...
10819 ??? Unfortunately, it is not enough to simply not the
10820 FRAME_RELATED flags for those SETs, because the first SET
10821 of the PARALLEL is always treated as if it had the flag
10822 set, even if it does not. Therefore we emit a new pattern
10823 without those registers as REG_FRAME_RELATED_EXPR note. */
10825 if (first >= 6 && !global_not_special_regno_p (first))
10827 rtx pat = PATTERN (insn);
10829 for (i = 0; i < XVECLEN (pat, 0); i++)
10830 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10831 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10832 0, i)))))
10833 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10835 RTX_FRAME_RELATED_P (insn) = 1;
10837 else if (last >= 6)
10839 int start;
10841 for (start = first >= 6 ? first : 6; start <= last; start++)
10842 if (!global_not_special_regno_p (start))
10843 break;
10845 if (start > last)
10846 return insn;
10848 addr = plus_constant (Pmode, base,
10849 offset + (start - first) * UNITS_PER_LONG);
10851 if (start == last)
10853 if (TARGET_64BIT)
10854 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10855 gen_rtx_REG (Pmode, start));
10856 else
10857 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10858 gen_rtx_REG (Pmode, start));
10859 note = PATTERN (note);
10861 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10862 RTX_FRAME_RELATED_P (insn) = 1;
10864 return insn;
10867 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10868 gen_rtx_REG (Pmode, start),
10869 GEN_INT (last - start + 1));
10870 note = PATTERN (note);
10872 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10874 for (i = 0; i < XVECLEN (note, 0); i++)
10875 if (GET_CODE (XVECEXP (note, 0, i)) == SET
10876 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10877 0, i)))))
10878 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10880 RTX_FRAME_RELATED_P (insn) = 1;
10883 return insn;
10886 /* Generate insn to restore registers FIRST to LAST from
10887 the register save area located at offset OFFSET
10888 relative to register BASE. */
10890 static rtx
10891 restore_gprs (rtx base, int offset, int first, int last)
10893 rtx addr, insn;
10895 addr = plus_constant (Pmode, base, offset);
10896 addr = gen_rtx_MEM (Pmode, addr);
10897 set_mem_alias_set (addr, get_frame_alias_set ());
10899 /* Special-case single register. */
10900 if (first == last)
10902 if (TARGET_64BIT)
10903 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10904 else
10905 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10907 RTX_FRAME_RELATED_P (insn) = 1;
10908 return insn;
10911 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10912 addr,
10913 GEN_INT (last - first + 1));
10914 RTX_FRAME_RELATED_P (insn) = 1;
10915 return insn;
10918 /* Return insn sequence to load the GOT register. */
10920 rtx_insn *
10921 s390_load_got (void)
10923 rtx_insn *insns;
10925 /* We cannot use pic_offset_table_rtx here since we use this
10926 function also for non-pic if __tls_get_offset is called and in
10927 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10928 aren't usable. */
10929 rtx got_rtx = gen_rtx_REG (Pmode, 12);
10931 start_sequence ();
10933 if (TARGET_CPU_ZARCH)
10935 emit_move_insn (got_rtx, s390_got_symbol ());
10937 else
10939 rtx offset;
10941 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, s390_got_symbol ()),
10942 UNSPEC_LTREL_OFFSET);
10943 offset = gen_rtx_CONST (Pmode, offset);
10944 offset = force_const_mem (Pmode, offset);
10946 emit_move_insn (got_rtx, offset);
10948 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
10949 UNSPEC_LTREL_BASE);
10950 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
10952 emit_move_insn (got_rtx, offset);
10955 insns = get_insns ();
10956 end_sequence ();
10957 return insns;
10960 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10961 and the change to the stack pointer. */
10963 static void
10964 s390_emit_stack_tie (void)
10966 rtx mem = gen_frame_mem (BLKmode,
10967 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10969 emit_insn (gen_stack_tie (mem));
10972 /* Copy GPRS into FPR save slots. */
10974 static void
10975 s390_save_gprs_to_fprs (void)
10977 int i;
10979 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10980 return;
10982 for (i = 6; i < 16; i++)
10984 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10986 rtx_insn *insn =
10987 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10988 gen_rtx_REG (DImode, i));
10989 RTX_FRAME_RELATED_P (insn) = 1;
10990 /* This prevents dwarf2cfi from interpreting the set. Doing
10991 so it might emit def_cfa_register infos setting an FPR as
10992 new CFA. */
10993 add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
10998 /* Restore GPRs from FPR save slots. */
11000 static void
11001 s390_restore_gprs_from_fprs (void)
11003 int i;
11005 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
11006 return;
11008 for (i = 6; i < 16; i++)
11010 rtx_insn *insn;
11012 if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
11013 continue;
11015 rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
11017 if (i == STACK_POINTER_REGNUM)
11018 insn = emit_insn (gen_stack_restore_from_fpr (fpr));
11019 else
11020 insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
11022 df_set_regs_ever_live (i, true);
11023 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
11024 if (i == STACK_POINTER_REGNUM)
11025 add_reg_note (insn, REG_CFA_DEF_CFA,
11026 plus_constant (Pmode, stack_pointer_rtx,
11027 STACK_POINTER_OFFSET));
11028 RTX_FRAME_RELATED_P (insn) = 1;
11033 /* A pass run immediately before shrink-wrapping and prologue and epilogue
11034 generation. */
11036 namespace {
11038 const pass_data pass_data_s390_early_mach =
11040 RTL_PASS, /* type */
11041 "early_mach", /* name */
11042 OPTGROUP_NONE, /* optinfo_flags */
11043 TV_MACH_DEP, /* tv_id */
11044 0, /* properties_required */
11045 0, /* properties_provided */
11046 0, /* properties_destroyed */
11047 0, /* todo_flags_start */
11048 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
11051 class pass_s390_early_mach : public rtl_opt_pass
11053 public:
11054 pass_s390_early_mach (gcc::context *ctxt)
11055 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
11058 /* opt_pass methods: */
11059 virtual unsigned int execute (function *);
11061 }; // class pass_s390_early_mach
11063 unsigned int
11064 pass_s390_early_mach::execute (function *fun)
11066 rtx_insn *insn;
11068 /* Try to get rid of the FPR clobbers. */
11069 s390_optimize_nonescaping_tx ();
11071 /* Re-compute register info. */
11072 s390_register_info ();
11074 /* If we're using a base register, ensure that it is always valid for
11075 the first non-prologue instruction. */
11076 if (fun->machine->base_reg)
11077 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
11079 /* Annotate all constant pool references to let the scheduler know
11080 they implicitly use the base register. */
11081 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11082 if (INSN_P (insn))
11084 annotate_constant_pool_refs (&PATTERN (insn));
11085 df_insn_rescan (insn);
11087 return 0;
11090 } // anon namespace
11092 /* Expand the prologue into a bunch of separate insns. */
11094 void
11095 s390_emit_prologue (void)
11097 rtx insn, addr;
11098 rtx temp_reg;
11099 int i;
11100 int offset;
11101 int next_fpr = 0;
11103 /* Choose best register to use for temp use within prologue.
11104 TPF with profiling must avoid the register 14 - the tracing function
11105 needs the original contents of r14 to be preserved. */
11107 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
11108 && !crtl->is_leaf
11109 && !TARGET_TPF_PROFILING)
11110 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11111 else if (flag_split_stack && cfun->stdarg)
11112 temp_reg = gen_rtx_REG (Pmode, 12);
11113 else
11114 temp_reg = gen_rtx_REG (Pmode, 1);
11116 s390_save_gprs_to_fprs ();
11118 /* Save call saved gprs. */
11119 if (cfun_frame_layout.first_save_gpr != -1)
11121 insn = save_gprs (stack_pointer_rtx,
11122 cfun_frame_layout.gprs_offset +
11123 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11124 - cfun_frame_layout.first_save_gpr_slot),
11125 cfun_frame_layout.first_save_gpr,
11126 cfun_frame_layout.last_save_gpr);
11127 emit_insn (insn);
11130 /* Dummy insn to mark literal pool slot. */
11132 if (cfun->machine->base_reg)
11133 emit_insn (gen_main_pool (cfun->machine->base_reg));
11135 offset = cfun_frame_layout.f0_offset;
11137 /* Save f0 and f2. */
11138 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
11140 if (cfun_fpr_save_p (i))
11142 save_fpr (stack_pointer_rtx, offset, i);
11143 offset += 8;
11145 else if (!TARGET_PACKED_STACK || cfun->stdarg)
11146 offset += 8;
11149 /* Save f4 and f6. */
11150 offset = cfun_frame_layout.f4_offset;
11151 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11153 if (cfun_fpr_save_p (i))
11155 insn = save_fpr (stack_pointer_rtx, offset, i);
11156 offset += 8;
11158 /* If f4 and f6 are call clobbered they are saved due to
11159 stdargs and therefore are not frame related. */
11160 if (!call_really_used_regs[i])
11161 RTX_FRAME_RELATED_P (insn) = 1;
11163 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
11164 offset += 8;
11167 if (TARGET_PACKED_STACK
11168 && cfun_save_high_fprs_p
11169 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
11171 offset = (cfun_frame_layout.f8_offset
11172 + (cfun_frame_layout.high_fprs - 1) * 8);
11174 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
11175 if (cfun_fpr_save_p (i))
11177 insn = save_fpr (stack_pointer_rtx, offset, i);
11179 RTX_FRAME_RELATED_P (insn) = 1;
11180 offset -= 8;
11182 if (offset >= cfun_frame_layout.f8_offset)
11183 next_fpr = i;
11186 if (!TARGET_PACKED_STACK)
11187 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
11189 if (flag_stack_usage_info)
11190 current_function_static_stack_size = cfun_frame_layout.frame_size;
11192 /* Decrement stack pointer. */
11194 if (cfun_frame_layout.frame_size > 0)
11196 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
11197 rtx real_frame_off;
11199 if (s390_stack_size)
11201 HOST_WIDE_INT stack_guard;
11203 if (s390_stack_guard)
11204 stack_guard = s390_stack_guard;
11205 else
11207 /* If no value for stack guard is provided the smallest power of 2
11208 larger than the current frame size is chosen. */
11209 stack_guard = 1;
11210 while (stack_guard < cfun_frame_layout.frame_size)
11211 stack_guard <<= 1;
11214 if (cfun_frame_layout.frame_size >= s390_stack_size)
11216 warning (0, "frame size of function %qs is %wd"
11217 " bytes exceeding user provided stack limit of "
11218 "%d bytes. "
11219 "An unconditional trap is added.",
11220 current_function_name(), cfun_frame_layout.frame_size,
11221 s390_stack_size);
11222 emit_insn (gen_trap ());
11223 emit_barrier ();
11225 else
11227 /* stack_guard has to be smaller than s390_stack_size.
11228 Otherwise we would emit an AND with zero which would
11229 not match the test under mask pattern. */
11230 if (stack_guard >= s390_stack_size)
11232 warning (0, "frame size of function %qs is %wd"
11233 " bytes which is more than half the stack size. "
11234 "The dynamic check would not be reliable. "
11235 "No check emitted for this function.",
11236 current_function_name(),
11237 cfun_frame_layout.frame_size);
11239 else
11241 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
11242 & ~(stack_guard - 1));
11244 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
11245 GEN_INT (stack_check_mask));
11246 if (TARGET_64BIT)
11247 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
11248 t, const0_rtx),
11249 t, const0_rtx, const0_rtx));
11250 else
11251 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
11252 t, const0_rtx),
11253 t, const0_rtx, const0_rtx));
11258 if (s390_warn_framesize > 0
11259 && cfun_frame_layout.frame_size >= s390_warn_framesize)
11260 warning (0, "frame size of %qs is %wd bytes",
11261 current_function_name (), cfun_frame_layout.frame_size);
11263 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
11264 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11266 /* Save incoming stack pointer into temp reg. */
11267 if (TARGET_BACKCHAIN || next_fpr)
11268 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
11270 /* Subtract frame size from stack pointer. */
11272 if (DISP_IN_RANGE (INTVAL (frame_off)))
11274 insn = gen_rtx_SET (stack_pointer_rtx,
11275 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11276 frame_off));
11277 insn = emit_insn (insn);
11279 else
11281 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11282 frame_off = force_const_mem (Pmode, frame_off);
11284 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
11285 annotate_constant_pool_refs (&PATTERN (insn));
11288 RTX_FRAME_RELATED_P (insn) = 1;
11289 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
11290 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11291 gen_rtx_SET (stack_pointer_rtx,
11292 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11293 real_frame_off)));
11295 /* Set backchain. */
11297 if (TARGET_BACKCHAIN)
11299 if (cfun_frame_layout.backchain_offset)
11300 addr = gen_rtx_MEM (Pmode,
11301 plus_constant (Pmode, stack_pointer_rtx,
11302 cfun_frame_layout.backchain_offset));
11303 else
11304 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11305 set_mem_alias_set (addr, get_frame_alias_set ());
11306 insn = emit_insn (gen_move_insn (addr, temp_reg));
11309 /* If we support non-call exceptions (e.g. for Java),
11310 we need to make sure the backchain pointer is set up
11311 before any possibly trapping memory access. */
11312 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
11314 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
11315 emit_clobber (addr);
11319 /* Save fprs 8 - 15 (64 bit ABI). */
11321 if (cfun_save_high_fprs_p && next_fpr)
11323 /* If the stack might be accessed through a different register
11324 we have to make sure that the stack pointer decrement is not
11325 moved below the use of the stack slots. */
11326 s390_emit_stack_tie ();
11328 insn = emit_insn (gen_add2_insn (temp_reg,
11329 GEN_INT (cfun_frame_layout.f8_offset)));
11331 offset = 0;
11333 for (i = FPR8_REGNUM; i <= next_fpr; i++)
11334 if (cfun_fpr_save_p (i))
11336 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
11337 cfun_frame_layout.frame_size
11338 + cfun_frame_layout.f8_offset
11339 + offset);
11341 insn = save_fpr (temp_reg, offset, i);
11342 offset += 8;
11343 RTX_FRAME_RELATED_P (insn) = 1;
11344 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11345 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
11346 gen_rtx_REG (DFmode, i)));
11350 /* Set frame pointer, if needed. */
11352 if (frame_pointer_needed)
11354 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11355 RTX_FRAME_RELATED_P (insn) = 1;
11358 /* Set up got pointer, if needed. */
11360 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
11362 rtx_insn *insns = s390_load_got ();
11364 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
11365 annotate_constant_pool_refs (&PATTERN (insn));
11367 emit_insn (insns);
11370 if (TARGET_TPF_PROFILING)
11372 /* Generate a BAS instruction to serve as a function
11373 entry intercept to facilitate the use of tracing
11374 algorithms located at the branch target. */
11375 emit_insn (gen_prologue_tpf ());
11377 /* Emit a blockage here so that all code
11378 lies between the profiling mechanisms. */
11379 emit_insn (gen_blockage ());
11383 /* Expand the epilogue into a bunch of separate insns. */
11385 void
11386 s390_emit_epilogue (bool sibcall)
11388 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
11389 int area_bottom, area_top, offset = 0;
11390 int next_offset;
11391 rtvec p;
11392 int i;
11394 if (TARGET_TPF_PROFILING)
11397 /* Generate a BAS instruction to serve as a function
11398 entry intercept to facilitate the use of tracing
11399 algorithms located at the branch target. */
11401 /* Emit a blockage here so that all code
11402 lies between the profiling mechanisms. */
11403 emit_insn (gen_blockage ());
11405 emit_insn (gen_epilogue_tpf ());
11408 /* Check whether to use frame or stack pointer for restore. */
11410 frame_pointer = (frame_pointer_needed
11411 ? hard_frame_pointer_rtx : stack_pointer_rtx);
11413 s390_frame_area (&area_bottom, &area_top);
11415 /* Check whether we can access the register save area.
11416 If not, increment the frame pointer as required. */
11418 if (area_top <= area_bottom)
11420 /* Nothing to restore. */
11422 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11423 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11425 /* Area is in range. */
11426 offset = cfun_frame_layout.frame_size;
11428 else
11430 rtx insn, frame_off, cfa;
11432 offset = area_bottom < 0 ? -area_bottom : 0;
11433 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11435 cfa = gen_rtx_SET (frame_pointer,
11436 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11437 if (DISP_IN_RANGE (INTVAL (frame_off)))
11439 insn = gen_rtx_SET (frame_pointer,
11440 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11441 insn = emit_insn (insn);
11443 else
11445 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11446 frame_off = force_const_mem (Pmode, frame_off);
11448 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11449 annotate_constant_pool_refs (&PATTERN (insn));
11451 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11452 RTX_FRAME_RELATED_P (insn) = 1;
11455 /* Restore call saved fprs. */
11457 if (TARGET_64BIT)
11459 if (cfun_save_high_fprs_p)
11461 next_offset = cfun_frame_layout.f8_offset;
11462 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11464 if (cfun_fpr_save_p (i))
11466 restore_fpr (frame_pointer,
11467 offset + next_offset, i);
11468 cfa_restores
11469 = alloc_reg_note (REG_CFA_RESTORE,
11470 gen_rtx_REG (DFmode, i), cfa_restores);
11471 next_offset += 8;
11477 else
11479 next_offset = cfun_frame_layout.f4_offset;
11480 /* f4, f6 */
11481 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11483 if (cfun_fpr_save_p (i))
11485 restore_fpr (frame_pointer,
11486 offset + next_offset, i);
11487 cfa_restores
11488 = alloc_reg_note (REG_CFA_RESTORE,
11489 gen_rtx_REG (DFmode, i), cfa_restores);
11490 next_offset += 8;
11492 else if (!TARGET_PACKED_STACK)
11493 next_offset += 8;
11498 /* Return register. */
11500 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11502 /* Restore call saved gprs. */
11504 if (cfun_frame_layout.first_restore_gpr != -1)
11506 rtx insn, addr;
11507 int i;
11509 /* Check for global register and save them
11510 to stack location from where they get restored. */
11512 for (i = cfun_frame_layout.first_restore_gpr;
11513 i <= cfun_frame_layout.last_restore_gpr;
11514 i++)
11516 if (global_not_special_regno_p (i))
11518 addr = plus_constant (Pmode, frame_pointer,
11519 offset + cfun_frame_layout.gprs_offset
11520 + (i - cfun_frame_layout.first_save_gpr_slot)
11521 * UNITS_PER_LONG);
11522 addr = gen_rtx_MEM (Pmode, addr);
11523 set_mem_alias_set (addr, get_frame_alias_set ());
11524 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11526 else
11527 cfa_restores
11528 = alloc_reg_note (REG_CFA_RESTORE,
11529 gen_rtx_REG (Pmode, i), cfa_restores);
11532 /* Fetch return address from stack before load multiple,
11533 this will do good for scheduling.
11535 Only do this if we already decided that r14 needs to be
11536 saved to a stack slot. (And not just because r14 happens to
11537 be in between two GPRs which need saving.) Otherwise it
11538 would be difficult to take that decision back in
11539 s390_optimize_prologue.
11541 This optimization is only helpful on in-order machines. */
11542 if (! sibcall
11543 && cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK
11544 && s390_tune <= PROCESSOR_2097_Z10)
11546 int return_regnum = find_unused_clobbered_reg();
11547 if (!return_regnum)
11548 return_regnum = 4;
11549 return_reg = gen_rtx_REG (Pmode, return_regnum);
11551 addr = plus_constant (Pmode, frame_pointer,
11552 offset + cfun_frame_layout.gprs_offset
11553 + (RETURN_REGNUM
11554 - cfun_frame_layout.first_save_gpr_slot)
11555 * UNITS_PER_LONG);
11556 addr = gen_rtx_MEM (Pmode, addr);
11557 set_mem_alias_set (addr, get_frame_alias_set ());
11558 emit_move_insn (return_reg, addr);
11560 /* Once we did that optimization we have to make sure
11561 s390_optimize_prologue does not try to remove the store
11562 of r14 since we will not be able to find the load issued
11563 here. */
11564 cfun_frame_layout.save_return_addr_p = true;
11567 insn = restore_gprs (frame_pointer,
11568 offset + cfun_frame_layout.gprs_offset
11569 + (cfun_frame_layout.first_restore_gpr
11570 - cfun_frame_layout.first_save_gpr_slot)
11571 * UNITS_PER_LONG,
11572 cfun_frame_layout.first_restore_gpr,
11573 cfun_frame_layout.last_restore_gpr);
11574 insn = emit_insn (insn);
11575 REG_NOTES (insn) = cfa_restores;
11576 add_reg_note (insn, REG_CFA_DEF_CFA,
11577 plus_constant (Pmode, stack_pointer_rtx,
11578 STACK_POINTER_OFFSET));
11579 RTX_FRAME_RELATED_P (insn) = 1;
11582 s390_restore_gprs_from_fprs ();
11584 if (! sibcall)
11587 /* Return to caller. */
11589 p = rtvec_alloc (2);
11591 RTVEC_ELT (p, 0) = ret_rtx;
11592 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
11593 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
11597 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11599 static void
11600 s300_set_up_by_prologue (hard_reg_set_container *regs)
11602 if (cfun->machine->base_reg
11603 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11604 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11607 /* -fsplit-stack support. */
11609 /* A SYMBOL_REF for __morestack. */
11610 static GTY(()) rtx morestack_ref;
11612 /* When using -fsplit-stack, the allocation routines set a field in
11613 the TCB to the bottom of the stack plus this much space, measured
11614 in bytes. */
11616 #define SPLIT_STACK_AVAILABLE 1024
11618 /* Emit -fsplit-stack prologue, which goes before the regular function
11619 prologue. */
11621 void
11622 s390_expand_split_stack_prologue (void)
11624 rtx r1, guard, cc = NULL;
11625 rtx_insn *insn;
11626 /* Offset from thread pointer to __private_ss. */
11627 int psso = TARGET_64BIT ? 0x38 : 0x20;
11628 /* Pointer size in bytes. */
11629 /* Frame size and argument size - the two parameters to __morestack. */
11630 HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11631 /* Align argument size to 8 bytes - simplifies __morestack code. */
11632 HOST_WIDE_INT args_size = crtl->args.size >= 0
11633 ? ((crtl->args.size + 7) & ~7)
11634 : 0;
11635 /* Label to be called by __morestack. */
11636 rtx_code_label *call_done = NULL;
11637 rtx_code_label *parm_base = NULL;
11638 rtx tmp;
11640 gcc_assert (flag_split_stack && reload_completed);
11641 if (!TARGET_CPU_ZARCH)
11643 sorry ("CPUs older than z900 are not supported for -fsplit-stack");
11644 return;
11647 r1 = gen_rtx_REG (Pmode, 1);
11649 /* If no stack frame will be allocated, don't do anything. */
11650 if (!frame_size)
11652 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11654 /* If va_start is used, just use r15. */
11655 emit_move_insn (r1,
11656 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11657 GEN_INT (STACK_POINTER_OFFSET)));
11660 return;
11663 if (morestack_ref == NULL_RTX)
11665 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11666 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11667 | SYMBOL_FLAG_FUNCTION);
11670 if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11672 /* If frame_size will fit in an add instruction, do a stack space
11673 check, and only call __morestack if there's not enough space. */
11675 /* Get thread pointer. r1 is the only register we can always destroy - r0
11676 could contain a static chain (and cannot be used to address memory
11677 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
11678 emit_move_insn (r1, gen_rtx_REG (Pmode, TP_REGNUM));
11679 /* Aim at __private_ss. */
11680 guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
11682 /* If less that 1kiB used, skip addition and compare directly with
11683 __private_ss. */
11684 if (frame_size > SPLIT_STACK_AVAILABLE)
11686 emit_move_insn (r1, guard);
11687 if (TARGET_64BIT)
11688 emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
11689 else
11690 emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
11691 guard = r1;
11694 /* Compare the (maybe adjusted) guard with the stack pointer. */
11695 cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
11698 call_done = gen_label_rtx ();
11699 parm_base = gen_label_rtx ();
11701 /* Emit the parameter block. */
11702 tmp = gen_split_stack_data (parm_base, call_done,
11703 GEN_INT (frame_size),
11704 GEN_INT (args_size));
11705 insn = emit_insn (tmp);
11706 add_reg_note (insn, REG_LABEL_OPERAND, call_done);
11707 LABEL_NUSES (call_done)++;
11708 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11709 LABEL_NUSES (parm_base)++;
11711 /* %r1 = litbase. */
11712 insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
11713 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11714 LABEL_NUSES (parm_base)++;
11716 /* Now, we need to call __morestack. It has very special calling
11717 conventions: it preserves param/return/static chain registers for
11718 calling main function body, and looks for its own parameters at %r1. */
11720 if (cc != NULL)
11722 tmp = gen_split_stack_cond_call (morestack_ref, cc, call_done);
11724 insn = emit_jump_insn (tmp);
11725 JUMP_LABEL (insn) = call_done;
11726 LABEL_NUSES (call_done)++;
11728 /* Mark the jump as very unlikely to be taken. */
11729 add_reg_br_prob_note (insn,
11730 profile_probability::very_unlikely ());
11732 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11734 /* If va_start is used, and __morestack was not called, just use
11735 r15. */
11736 emit_move_insn (r1,
11737 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11738 GEN_INT (STACK_POINTER_OFFSET)));
11741 else
11743 tmp = gen_split_stack_call (morestack_ref, call_done);
11744 insn = emit_jump_insn (tmp);
11745 JUMP_LABEL (insn) = call_done;
11746 LABEL_NUSES (call_done)++;
11747 emit_barrier ();
11750 /* __morestack will call us here. */
11752 emit_label (call_done);
11755 /* We may have to tell the dataflow pass that the split stack prologue
11756 is initializing a register. */
11758 static void
11759 s390_live_on_entry (bitmap regs)
11761 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11763 gcc_assert (flag_split_stack);
11764 bitmap_set_bit (regs, 1);
11768 /* Return true if the function can use simple_return to return outside
11769 of a shrink-wrapped region. At present shrink-wrapping is supported
11770 in all cases. */
11772 bool
11773 s390_can_use_simple_return_insn (void)
11775 return true;
11778 /* Return true if the epilogue is guaranteed to contain only a return
11779 instruction and if a direct return can therefore be used instead.
11780 One of the main advantages of using direct return instructions
11781 is that we can then use conditional returns. */
11783 bool
11784 s390_can_use_return_insn (void)
11786 int i;
11788 if (!reload_completed)
11789 return false;
11791 if (crtl->profile)
11792 return false;
11794 if (TARGET_TPF_PROFILING)
11795 return false;
11797 for (i = 0; i < 16; i++)
11798 if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
11799 return false;
11801 /* For 31 bit this is not covered by the frame_size check below
11802 since f4, f6 are saved in the register save area without needing
11803 additional stack space. */
11804 if (!TARGET_64BIT
11805 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
11806 return false;
11808 if (cfun->machine->base_reg
11809 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11810 return false;
11812 return cfun_frame_layout.frame_size == 0;
11815 /* The VX ABI differs for vararg functions. Therefore we need the
11816 prototype of the callee to be available when passing vector type
11817 values. */
11818 static const char *
11819 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
11821 return ((TARGET_VX_ABI
11822 && typelist == 0
11823 && VECTOR_TYPE_P (TREE_TYPE (val))
11824 && (funcdecl == NULL_TREE
11825 || (TREE_CODE (funcdecl) == FUNCTION_DECL
11826 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
11827 ? N_("vector argument passed to unprototyped function")
11828 : NULL);
11832 /* Return the size in bytes of a function argument of
11833 type TYPE and/or mode MODE. At least one of TYPE or
11834 MODE must be specified. */
11836 static int
11837 s390_function_arg_size (machine_mode mode, const_tree type)
11839 if (type)
11840 return int_size_in_bytes (type);
11842 /* No type info available for some library calls ... */
11843 if (mode != BLKmode)
11844 return GET_MODE_SIZE (mode);
11846 /* If we have neither type nor mode, abort */
11847 gcc_unreachable ();
11850 /* Return true if a function argument of type TYPE and mode MODE
11851 is to be passed in a vector register, if available. */
11853 bool
11854 s390_function_arg_vector (machine_mode mode, const_tree type)
11856 if (!TARGET_VX_ABI)
11857 return false;
11859 if (s390_function_arg_size (mode, type) > 16)
11860 return false;
11862 /* No type info available for some library calls ... */
11863 if (!type)
11864 return VECTOR_MODE_P (mode);
11866 /* The ABI says that record types with a single member are treated
11867 just like that member would be. */
11868 while (TREE_CODE (type) == RECORD_TYPE)
11870 tree field, single = NULL_TREE;
11872 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11874 if (TREE_CODE (field) != FIELD_DECL)
11875 continue;
11877 if (single == NULL_TREE)
11878 single = TREE_TYPE (field);
11879 else
11880 return false;
11883 if (single == NULL_TREE)
11884 return false;
11885 else
11887 /* If the field declaration adds extra byte due to
11888 e.g. padding this is not accepted as vector type. */
11889 if (int_size_in_bytes (single) <= 0
11890 || int_size_in_bytes (single) != int_size_in_bytes (type))
11891 return false;
11892 type = single;
11896 return VECTOR_TYPE_P (type);
11899 /* Return true if a function argument of type TYPE and mode MODE
11900 is to be passed in a floating-point register, if available. */
11902 static bool
11903 s390_function_arg_float (machine_mode mode, const_tree type)
11905 if (s390_function_arg_size (mode, type) > 8)
11906 return false;
11908 /* Soft-float changes the ABI: no floating-point registers are used. */
11909 if (TARGET_SOFT_FLOAT)
11910 return false;
11912 /* No type info available for some library calls ... */
11913 if (!type)
11914 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
11916 /* The ABI says that record types with a single member are treated
11917 just like that member would be. */
11918 while (TREE_CODE (type) == RECORD_TYPE)
11920 tree field, single = NULL_TREE;
11922 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11924 if (TREE_CODE (field) != FIELD_DECL)
11925 continue;
11927 if (single == NULL_TREE)
11928 single = TREE_TYPE (field);
11929 else
11930 return false;
11933 if (single == NULL_TREE)
11934 return false;
11935 else
11936 type = single;
11939 return TREE_CODE (type) == REAL_TYPE;
11942 /* Return true if a function argument of type TYPE and mode MODE
11943 is to be passed in an integer register, or a pair of integer
11944 registers, if available. */
11946 static bool
11947 s390_function_arg_integer (machine_mode mode, const_tree type)
11949 int size = s390_function_arg_size (mode, type);
11950 if (size > 8)
11951 return false;
11953 /* No type info available for some library calls ... */
11954 if (!type)
11955 return GET_MODE_CLASS (mode) == MODE_INT
11956 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
11958 /* We accept small integral (and similar) types. */
11959 if (INTEGRAL_TYPE_P (type)
11960 || POINTER_TYPE_P (type)
11961 || TREE_CODE (type) == NULLPTR_TYPE
11962 || TREE_CODE (type) == OFFSET_TYPE
11963 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
11964 return true;
11966 /* We also accept structs of size 1, 2, 4, 8 that are not
11967 passed in floating-point registers. */
11968 if (AGGREGATE_TYPE_P (type)
11969 && exact_log2 (size) >= 0
11970 && !s390_function_arg_float (mode, type))
11971 return true;
11973 return false;
11976 /* Return 1 if a function argument of type TYPE and mode MODE
11977 is to be passed by reference. The ABI specifies that only
11978 structures of size 1, 2, 4, or 8 bytes are passed by value,
11979 all other structures (and complex numbers) are passed by
11980 reference. */
11982 static bool
11983 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
11984 machine_mode mode, const_tree type,
11985 bool named ATTRIBUTE_UNUSED)
11987 int size = s390_function_arg_size (mode, type);
11989 if (s390_function_arg_vector (mode, type))
11990 return false;
11992 if (size > 8)
11993 return true;
11995 if (type)
11997 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
11998 return true;
12000 if (TREE_CODE (type) == COMPLEX_TYPE
12001 || TREE_CODE (type) == VECTOR_TYPE)
12002 return true;
12005 return false;
12008 /* Update the data in CUM to advance over an argument of mode MODE and
12009 data type TYPE. (TYPE is null for libcalls where that information
12010 may not be available.). The boolean NAMED specifies whether the
12011 argument is a named argument (as opposed to an unnamed argument
12012 matching an ellipsis). */
12014 static void
12015 s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
12016 const_tree type, bool named)
12018 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12020 if (s390_function_arg_vector (mode, type))
12022 /* We are called for unnamed vector stdarg arguments which are
12023 passed on the stack. In this case this hook does not have to
12024 do anything since stack arguments are tracked by common
12025 code. */
12026 if (!named)
12027 return;
12028 cum->vrs += 1;
12030 else if (s390_function_arg_float (mode, type))
12032 cum->fprs += 1;
12034 else if (s390_function_arg_integer (mode, type))
12036 int size = s390_function_arg_size (mode, type);
12037 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
12039 else
12040 gcc_unreachable ();
12043 /* Define where to put the arguments to a function.
12044 Value is zero to push the argument on the stack,
12045 or a hard register in which to store the argument.
12047 MODE is the argument's machine mode.
12048 TYPE is the data type of the argument (as a tree).
12049 This is null for libcalls where that information may
12050 not be available.
12051 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12052 the preceding args and about the function being called.
12053 NAMED is nonzero if this argument is a named parameter
12054 (otherwise it is an extra parameter matching an ellipsis).
12056 On S/390, we use general purpose registers 2 through 6 to
12057 pass integer, pointer, and certain structure arguments, and
12058 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
12059 to pass floating point arguments. All remaining arguments
12060 are pushed to the stack. */
12062 static rtx
12063 s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
12064 const_tree type, bool named)
12066 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12068 if (!named)
12069 s390_check_type_for_vector_abi (type, true, false);
12071 if (s390_function_arg_vector (mode, type))
12073 /* Vector arguments being part of the ellipsis are passed on the
12074 stack. */
12075 if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
12076 return NULL_RTX;
12078 return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
12080 else if (s390_function_arg_float (mode, type))
12082 if (cum->fprs + 1 > FP_ARG_NUM_REG)
12083 return NULL_RTX;
12084 else
12085 return gen_rtx_REG (mode, cum->fprs + 16);
12087 else if (s390_function_arg_integer (mode, type))
12089 int size = s390_function_arg_size (mode, type);
12090 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12092 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
12093 return NULL_RTX;
12094 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
12095 return gen_rtx_REG (mode, cum->gprs + 2);
12096 else if (n_gprs == 2)
12098 rtvec p = rtvec_alloc (2);
12100 RTVEC_ELT (p, 0)
12101 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
12102 const0_rtx);
12103 RTVEC_ELT (p, 1)
12104 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
12105 GEN_INT (4));
12107 return gen_rtx_PARALLEL (mode, p);
12111 /* After the real arguments, expand_call calls us once again
12112 with a void_type_node type. Whatever we return here is
12113 passed as operand 2 to the call expanders.
12115 We don't need this feature ... */
12116 else if (type == void_type_node)
12117 return const0_rtx;
12119 gcc_unreachable ();
12122 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Vector arguments are
12123 left-justified when placed on the stack during parameter passing. */
12125 static pad_direction
12126 s390_function_arg_padding (machine_mode mode, const_tree type)
12128 if (s390_function_arg_vector (mode, type))
12129 return PAD_UPWARD;
12131 return default_function_arg_padding (mode, type);
12134 /* Return true if return values of type TYPE should be returned
12135 in a memory buffer whose address is passed by the caller as
12136 hidden first argument. */
12138 static bool
12139 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
12141 /* We accept small integral (and similar) types. */
12142 if (INTEGRAL_TYPE_P (type)
12143 || POINTER_TYPE_P (type)
12144 || TREE_CODE (type) == OFFSET_TYPE
12145 || TREE_CODE (type) == REAL_TYPE)
12146 return int_size_in_bytes (type) > 8;
12148 /* vector types which fit into a VR. */
12149 if (TARGET_VX_ABI
12150 && VECTOR_TYPE_P (type)
12151 && int_size_in_bytes (type) <= 16)
12152 return false;
12154 /* Aggregates and similar constructs are always returned
12155 in memory. */
12156 if (AGGREGATE_TYPE_P (type)
12157 || TREE_CODE (type) == COMPLEX_TYPE
12158 || VECTOR_TYPE_P (type))
12159 return true;
12161 /* ??? We get called on all sorts of random stuff from
12162 aggregate_value_p. We can't abort, but it's not clear
12163 what's safe to return. Pretend it's a struct I guess. */
12164 return true;
12167 /* Function arguments and return values are promoted to word size. */
12169 static machine_mode
12170 s390_promote_function_mode (const_tree type, machine_mode mode,
12171 int *punsignedp,
12172 const_tree fntype ATTRIBUTE_UNUSED,
12173 int for_return ATTRIBUTE_UNUSED)
12175 if (INTEGRAL_MODE_P (mode)
12176 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
12178 if (type != NULL_TREE && POINTER_TYPE_P (type))
12179 *punsignedp = POINTERS_EXTEND_UNSIGNED;
12180 return Pmode;
12183 return mode;
12186 /* Define where to return a (scalar) value of type RET_TYPE.
12187 If RET_TYPE is null, define where to return a (scalar)
12188 value of mode MODE from a libcall. */
12190 static rtx
12191 s390_function_and_libcall_value (machine_mode mode,
12192 const_tree ret_type,
12193 const_tree fntype_or_decl,
12194 bool outgoing ATTRIBUTE_UNUSED)
12196 /* For vector return types it is important to use the RET_TYPE
12197 argument whenever available since the middle-end might have
12198 changed the mode to a scalar mode. */
12199 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
12200 || (!ret_type && VECTOR_MODE_P (mode)));
12202 /* For normal functions perform the promotion as
12203 promote_function_mode would do. */
12204 if (ret_type)
12206 int unsignedp = TYPE_UNSIGNED (ret_type);
12207 mode = promote_function_mode (ret_type, mode, &unsignedp,
12208 fntype_or_decl, 1);
12211 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
12212 || SCALAR_FLOAT_MODE_P (mode)
12213 || (TARGET_VX_ABI && vector_ret_type_p));
12214 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
12216 if (TARGET_VX_ABI && vector_ret_type_p)
12217 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
12218 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
12219 return gen_rtx_REG (mode, 16);
12220 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
12221 || UNITS_PER_LONG == UNITS_PER_WORD)
12222 return gen_rtx_REG (mode, 2);
12223 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
12225 /* This case is triggered when returning a 64 bit value with
12226 -m31 -mzarch. Although the value would fit into a single
12227 register it has to be forced into a 32 bit register pair in
12228 order to match the ABI. */
12229 rtvec p = rtvec_alloc (2);
12231 RTVEC_ELT (p, 0)
12232 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
12233 RTVEC_ELT (p, 1)
12234 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
12236 return gen_rtx_PARALLEL (mode, p);
12239 gcc_unreachable ();
12242 /* Define where to return a scalar return value of type RET_TYPE. */
12244 static rtx
12245 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
12246 bool outgoing)
12248 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
12249 fn_decl_or_type, outgoing);
12252 /* Define where to return a scalar libcall return value of mode
12253 MODE. */
12255 static rtx
12256 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
12258 return s390_function_and_libcall_value (mode, NULL_TREE,
12259 NULL_TREE, true);
12263 /* Create and return the va_list datatype.
12265 On S/390, va_list is an array type equivalent to
12267 typedef struct __va_list_tag
12269 long __gpr;
12270 long __fpr;
12271 void *__overflow_arg_area;
12272 void *__reg_save_area;
12273 } va_list[1];
12275 where __gpr and __fpr hold the number of general purpose
12276 or floating point arguments used up to now, respectively,
12277 __overflow_arg_area points to the stack location of the
12278 next argument passed on the stack, and __reg_save_area
12279 always points to the start of the register area in the
12280 call frame of the current function. The function prologue
12281 saves all registers used for argument passing into this
12282 area if the function uses variable arguments. */
12284 static tree
12285 s390_build_builtin_va_list (void)
12287 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
12289 record = lang_hooks.types.make_type (RECORD_TYPE);
12291 type_decl =
12292 build_decl (BUILTINS_LOCATION,
12293 TYPE_DECL, get_identifier ("__va_list_tag"), record);
12295 f_gpr = build_decl (BUILTINS_LOCATION,
12296 FIELD_DECL, get_identifier ("__gpr"),
12297 long_integer_type_node);
12298 f_fpr = build_decl (BUILTINS_LOCATION,
12299 FIELD_DECL, get_identifier ("__fpr"),
12300 long_integer_type_node);
12301 f_ovf = build_decl (BUILTINS_LOCATION,
12302 FIELD_DECL, get_identifier ("__overflow_arg_area"),
12303 ptr_type_node);
12304 f_sav = build_decl (BUILTINS_LOCATION,
12305 FIELD_DECL, get_identifier ("__reg_save_area"),
12306 ptr_type_node);
12308 va_list_gpr_counter_field = f_gpr;
12309 va_list_fpr_counter_field = f_fpr;
12311 DECL_FIELD_CONTEXT (f_gpr) = record;
12312 DECL_FIELD_CONTEXT (f_fpr) = record;
12313 DECL_FIELD_CONTEXT (f_ovf) = record;
12314 DECL_FIELD_CONTEXT (f_sav) = record;
12316 TYPE_STUB_DECL (record) = type_decl;
12317 TYPE_NAME (record) = type_decl;
12318 TYPE_FIELDS (record) = f_gpr;
12319 DECL_CHAIN (f_gpr) = f_fpr;
12320 DECL_CHAIN (f_fpr) = f_ovf;
12321 DECL_CHAIN (f_ovf) = f_sav;
12323 layout_type (record);
12325 /* The correct type is an array type of one element. */
12326 return build_array_type (record, build_index_type (size_zero_node));
12329 /* Implement va_start by filling the va_list structure VALIST.
12330 STDARG_P is always true, and ignored.
12331 NEXTARG points to the first anonymous stack argument.
12333 The following global variables are used to initialize
12334 the va_list structure:
12336 crtl->args.info:
12337 holds number of gprs and fprs used for named arguments.
12338 crtl->args.arg_offset_rtx:
12339 holds the offset of the first anonymous stack argument
12340 (relative to the virtual arg pointer). */
12342 static void
12343 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12345 HOST_WIDE_INT n_gpr, n_fpr;
12346 int off;
12347 tree f_gpr, f_fpr, f_ovf, f_sav;
12348 tree gpr, fpr, ovf, sav, t;
12350 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12351 f_fpr = DECL_CHAIN (f_gpr);
12352 f_ovf = DECL_CHAIN (f_fpr);
12353 f_sav = DECL_CHAIN (f_ovf);
12355 valist = build_simple_mem_ref (valist);
12356 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12357 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12358 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12359 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12361 /* Count number of gp and fp argument registers used. */
12363 n_gpr = crtl->args.info.gprs;
12364 n_fpr = crtl->args.info.fprs;
12366 if (cfun->va_list_gpr_size)
12368 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12369 build_int_cst (NULL_TREE, n_gpr));
12370 TREE_SIDE_EFFECTS (t) = 1;
12371 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12374 if (cfun->va_list_fpr_size)
12376 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12377 build_int_cst (NULL_TREE, n_fpr));
12378 TREE_SIDE_EFFECTS (t) = 1;
12379 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12382 if (flag_split_stack
12383 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
12384 == NULL)
12385 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12387 rtx reg;
12388 rtx_insn *seq;
12390 reg = gen_reg_rtx (Pmode);
12391 cfun->machine->split_stack_varargs_pointer = reg;
12393 start_sequence ();
12394 emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
12395 seq = get_insns ();
12396 end_sequence ();
12398 push_topmost_sequence ();
12399 emit_insn_after (seq, entry_of_function ());
12400 pop_topmost_sequence ();
12403 /* Find the overflow area.
12404 FIXME: This currently is too pessimistic when the vector ABI is
12405 enabled. In that case we *always* set up the overflow area
12406 pointer. */
12407 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
12408 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
12409 || TARGET_VX_ABI)
12411 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12412 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
12413 else
12414 t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
12416 off = INTVAL (crtl->args.arg_offset_rtx);
12417 off = off < 0 ? 0 : off;
12418 if (TARGET_DEBUG_ARG)
12419 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12420 (int)n_gpr, (int)n_fpr, off);
12422 t = fold_build_pointer_plus_hwi (t, off);
12424 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12425 TREE_SIDE_EFFECTS (t) = 1;
12426 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12429 /* Find the register save area. */
12430 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12431 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12433 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12434 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12436 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12437 TREE_SIDE_EFFECTS (t) = 1;
12438 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12442 /* Implement va_arg by updating the va_list structure
12443 VALIST as required to retrieve an argument of type
12444 TYPE, and returning that argument.
12446 Generates code equivalent to:
12448 if (integral value) {
12449 if (size <= 4 && args.gpr < 5 ||
12450 size > 4 && args.gpr < 4 )
12451 ret = args.reg_save_area[args.gpr+8]
12452 else
12453 ret = *args.overflow_arg_area++;
12454 } else if (vector value) {
12455 ret = *args.overflow_arg_area;
12456 args.overflow_arg_area += size / 8;
12457 } else if (float value) {
12458 if (args.fgpr < 2)
12459 ret = args.reg_save_area[args.fpr+64]
12460 else
12461 ret = *args.overflow_arg_area++;
12462 } else if (aggregate value) {
12463 if (args.gpr < 5)
12464 ret = *args.reg_save_area[args.gpr]
12465 else
12466 ret = **args.overflow_arg_area++;
12467 } */
12469 static tree
12470 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12471 gimple_seq *post_p ATTRIBUTE_UNUSED)
12473 tree f_gpr, f_fpr, f_ovf, f_sav;
12474 tree gpr, fpr, ovf, sav, reg, t, u;
12475 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12476 tree lab_false, lab_over = NULL_TREE;
12477 tree addr = create_tmp_var (ptr_type_node, "addr");
12478 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12479 a stack slot. */
12481 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12482 f_fpr = DECL_CHAIN (f_gpr);
12483 f_ovf = DECL_CHAIN (f_fpr);
12484 f_sav = DECL_CHAIN (f_ovf);
12486 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12487 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12488 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12490 /* The tree for args* cannot be shared between gpr/fpr and ovf since
12491 both appear on a lhs. */
12492 valist = unshare_expr (valist);
12493 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12495 size = int_size_in_bytes (type);
12497 s390_check_type_for_vector_abi (type, true, false);
12499 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12501 if (TARGET_DEBUG_ARG)
12503 fprintf (stderr, "va_arg: aggregate type");
12504 debug_tree (type);
12507 /* Aggregates are passed by reference. */
12508 indirect_p = 1;
12509 reg = gpr;
12510 n_reg = 1;
12512 /* kernel stack layout on 31 bit: It is assumed here that no padding
12513 will be added by s390_frame_info because for va_args always an even
12514 number of gprs has to be saved r15-r2 = 14 regs. */
12515 sav_ofs = 2 * UNITS_PER_LONG;
12516 sav_scale = UNITS_PER_LONG;
12517 size = UNITS_PER_LONG;
12518 max_reg = GP_ARG_NUM_REG - n_reg;
12519 left_align_p = false;
12521 else if (s390_function_arg_vector (TYPE_MODE (type), type))
12523 if (TARGET_DEBUG_ARG)
12525 fprintf (stderr, "va_arg: vector type");
12526 debug_tree (type);
12529 indirect_p = 0;
12530 reg = NULL_TREE;
12531 n_reg = 0;
12532 sav_ofs = 0;
12533 sav_scale = 8;
12534 max_reg = 0;
12535 left_align_p = true;
12537 else if (s390_function_arg_float (TYPE_MODE (type), type))
12539 if (TARGET_DEBUG_ARG)
12541 fprintf (stderr, "va_arg: float type");
12542 debug_tree (type);
12545 /* FP args go in FP registers, if present. */
12546 indirect_p = 0;
12547 reg = fpr;
12548 n_reg = 1;
12549 sav_ofs = 16 * UNITS_PER_LONG;
12550 sav_scale = 8;
12551 max_reg = FP_ARG_NUM_REG - n_reg;
12552 left_align_p = false;
12554 else
12556 if (TARGET_DEBUG_ARG)
12558 fprintf (stderr, "va_arg: other type");
12559 debug_tree (type);
12562 /* Otherwise into GP registers. */
12563 indirect_p = 0;
12564 reg = gpr;
12565 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12567 /* kernel stack layout on 31 bit: It is assumed here that no padding
12568 will be added by s390_frame_info because for va_args always an even
12569 number of gprs has to be saved r15-r2 = 14 regs. */
12570 sav_ofs = 2 * UNITS_PER_LONG;
12572 if (size < UNITS_PER_LONG)
12573 sav_ofs += UNITS_PER_LONG - size;
12575 sav_scale = UNITS_PER_LONG;
12576 max_reg = GP_ARG_NUM_REG - n_reg;
12577 left_align_p = false;
12580 /* Pull the value out of the saved registers ... */
12582 if (reg != NULL_TREE)
12585 if (reg > ((typeof (reg))max_reg))
12586 goto lab_false;
12588 addr = sav + sav_ofs + reg * save_scale;
12590 goto lab_over;
12592 lab_false:
12595 lab_false = create_artificial_label (UNKNOWN_LOCATION);
12596 lab_over = create_artificial_label (UNKNOWN_LOCATION);
12598 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12599 t = build2 (GT_EXPR, boolean_type_node, reg, t);
12600 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12601 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12602 gimplify_and_add (t, pre_p);
12604 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12605 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12606 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12607 t = fold_build_pointer_plus (t, u);
12609 gimplify_assign (addr, t, pre_p);
12611 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12613 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12616 /* ... Otherwise out of the overflow area. */
12618 t = ovf;
12619 if (size < UNITS_PER_LONG && !left_align_p)
12620 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12622 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12624 gimplify_assign (addr, t, pre_p);
12626 if (size < UNITS_PER_LONG && left_align_p)
12627 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12628 else
12629 t = fold_build_pointer_plus_hwi (t, size);
12631 gimplify_assign (ovf, t, pre_p);
12633 if (reg != NULL_TREE)
12634 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12637 /* Increment register save count. */
12639 if (n_reg > 0)
12641 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12642 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12643 gimplify_and_add (u, pre_p);
12646 if (indirect_p)
12648 t = build_pointer_type_for_mode (build_pointer_type (type),
12649 ptr_mode, true);
12650 addr = fold_convert (t, addr);
12651 addr = build_va_arg_indirect_ref (addr);
12653 else
12655 t = build_pointer_type_for_mode (type, ptr_mode, true);
12656 addr = fold_convert (t, addr);
12659 return build_va_arg_indirect_ref (addr);
12662 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12663 expanders.
12664 DEST - Register location where CC will be stored.
12665 TDB - Pointer to a 256 byte area where to store the transaction.
12666 diagnostic block. NULL if TDB is not needed.
12667 RETRY - Retry count value. If non-NULL a retry loop for CC2
12668 is emitted
12669 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12670 of the tbegin instruction pattern. */
12672 void
12673 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
12675 rtx retry_plus_two = gen_reg_rtx (SImode);
12676 rtx retry_reg = gen_reg_rtx (SImode);
12677 rtx_code_label *retry_label = NULL;
12679 if (retry != NULL_RTX)
12681 emit_move_insn (retry_reg, retry);
12682 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
12683 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
12684 retry_label = gen_label_rtx ();
12685 emit_label (retry_label);
12688 if (clobber_fprs_p)
12690 if (TARGET_VX)
12691 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12692 tdb));
12693 else
12694 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12695 tdb));
12697 else
12698 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12699 tdb));
12701 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
12702 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
12703 CC_REGNUM)),
12704 UNSPEC_CC_TO_INT));
12705 if (retry != NULL_RTX)
12707 const int CC0 = 1 << 3;
12708 const int CC1 = 1 << 2;
12709 const int CC3 = 1 << 0;
12710 rtx jump;
12711 rtx count = gen_reg_rtx (SImode);
12712 rtx_code_label *leave_label = gen_label_rtx ();
12714 /* Exit for success and permanent failures. */
12715 jump = s390_emit_jump (leave_label,
12716 gen_rtx_EQ (VOIDmode,
12717 gen_rtx_REG (CCRAWmode, CC_REGNUM),
12718 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
12719 LABEL_NUSES (leave_label) = 1;
12721 /* CC2 - transient failure. Perform retry with ppa. */
12722 emit_move_insn (count, retry_plus_two);
12723 emit_insn (gen_subsi3 (count, count, retry_reg));
12724 emit_insn (gen_tx_assist (count));
12725 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
12726 retry_reg,
12727 retry_reg));
12728 JUMP_LABEL (jump) = retry_label;
12729 LABEL_NUSES (retry_label) = 1;
12730 emit_label (leave_label);
12735 /* Return the decl for the target specific builtin with the function
12736 code FCODE. */
12738 static tree
12739 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
12741 if (fcode >= S390_BUILTIN_MAX)
12742 return error_mark_node;
12744 return s390_builtin_decls[fcode];
12747 /* We call mcount before the function prologue. So a profiled leaf
12748 function should stay a leaf function. */
12750 static bool
12751 s390_keep_leaf_when_profiled ()
12753 return true;
12756 /* Output assembly code for the trampoline template to
12757 stdio stream FILE.
12759 On S/390, we use gpr 1 internally in the trampoline code;
12760 gpr 0 is used to hold the static chain. */
12762 static void
12763 s390_asm_trampoline_template (FILE *file)
12765 rtx op[2];
12766 op[0] = gen_rtx_REG (Pmode, 0);
12767 op[1] = gen_rtx_REG (Pmode, 1);
12769 if (TARGET_64BIT)
12771 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12772 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
12773 output_asm_insn ("br\t%1", op); /* 2 byte */
12774 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
12776 else
12778 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12779 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
12780 output_asm_insn ("br\t%1", op); /* 2 byte */
12781 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
12785 /* Emit RTL insns to initialize the variable parts of a trampoline.
12786 FNADDR is an RTX for the address of the function's pure code.
12787 CXT is an RTX for the static chain value for the function. */
12789 static void
12790 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
12792 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
12793 rtx mem;
12795 emit_block_move (m_tramp, assemble_trampoline_template (),
12796 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
12798 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
12799 emit_move_insn (mem, cxt);
12800 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
12801 emit_move_insn (mem, fnaddr);
12804 /* Output assembler code to FILE to increment profiler label # LABELNO
12805 for profiling a function entry. */
12807 void
12808 s390_function_profiler (FILE *file, int labelno)
12810 rtx op[7];
12812 char label[128];
12813 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
12815 fprintf (file, "# function profiler \n");
12817 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
12818 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
12819 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
12821 op[2] = gen_rtx_REG (Pmode, 1);
12822 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
12823 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
12825 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
12826 if (flag_pic)
12828 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
12829 op[4] = gen_rtx_CONST (Pmode, op[4]);
12832 if (TARGET_64BIT)
12834 output_asm_insn ("stg\t%0,%1", op);
12835 output_asm_insn ("larl\t%2,%3", op);
12836 output_asm_insn ("brasl\t%0,%4", op);
12837 output_asm_insn ("lg\t%0,%1", op);
12839 else if (TARGET_CPU_ZARCH)
12841 output_asm_insn ("st\t%0,%1", op);
12842 output_asm_insn ("larl\t%2,%3", op);
12843 output_asm_insn ("brasl\t%0,%4", op);
12844 output_asm_insn ("l\t%0,%1", op);
12846 else if (!flag_pic)
12848 op[6] = gen_label_rtx ();
12850 output_asm_insn ("st\t%0,%1", op);
12851 output_asm_insn ("bras\t%2,%l6", op);
12852 output_asm_insn (".long\t%4", op);
12853 output_asm_insn (".long\t%3", op);
12854 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
12855 output_asm_insn ("l\t%0,0(%2)", op);
12856 output_asm_insn ("l\t%2,4(%2)", op);
12857 output_asm_insn ("basr\t%0,%0", op);
12858 output_asm_insn ("l\t%0,%1", op);
12860 else
12862 op[5] = gen_label_rtx ();
12863 op[6] = gen_label_rtx ();
12865 output_asm_insn ("st\t%0,%1", op);
12866 output_asm_insn ("bras\t%2,%l6", op);
12867 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
12868 output_asm_insn (".long\t%4-%l5", op);
12869 output_asm_insn (".long\t%3-%l5", op);
12870 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
12871 output_asm_insn ("lr\t%0,%2", op);
12872 output_asm_insn ("a\t%0,0(%2)", op);
12873 output_asm_insn ("a\t%2,4(%2)", op);
12874 output_asm_insn ("basr\t%0,%0", op);
12875 output_asm_insn ("l\t%0,%1", op);
12879 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
12880 into its SYMBOL_REF_FLAGS. */
12882 static void
12883 s390_encode_section_info (tree decl, rtx rtl, int first)
12885 default_encode_section_info (decl, rtl, first);
12887 if (TREE_CODE (decl) == VAR_DECL)
12889 /* Store the alignment to be able to check if we can use
12890 a larl/load-relative instruction. We only handle the cases
12891 that can go wrong (i.e. no FUNC_DECLs). */
12892 if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
12893 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12894 else if (DECL_ALIGN (decl) % 32)
12895 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12896 else if (DECL_ALIGN (decl) % 64)
12897 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12900 /* Literal pool references don't have a decl so they are handled
12901 differently here. We rely on the information in the MEM_ALIGN
12902 entry to decide upon the alignment. */
12903 if (MEM_P (rtl)
12904 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
12905 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
12907 if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
12908 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12909 else if (MEM_ALIGN (rtl) % 32)
12910 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12911 else if (MEM_ALIGN (rtl) % 64)
12912 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12916 /* Output thunk to FILE that implements a C++ virtual function call (with
12917 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
12918 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
12919 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
12920 relative to the resulting this pointer. */
12922 static void
12923 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
12924 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12925 tree function)
12927 rtx op[10];
12928 int nonlocal = 0;
12930 /* Make sure unwind info is emitted for the thunk if needed. */
12931 final_start_function (emit_barrier (), file, 1);
12933 /* Operand 0 is the target function. */
12934 op[0] = XEXP (DECL_RTL (function), 0);
12935 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
12937 nonlocal = 1;
12938 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
12939 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
12940 op[0] = gen_rtx_CONST (Pmode, op[0]);
12943 /* Operand 1 is the 'this' pointer. */
12944 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12945 op[1] = gen_rtx_REG (Pmode, 3);
12946 else
12947 op[1] = gen_rtx_REG (Pmode, 2);
12949 /* Operand 2 is the delta. */
12950 op[2] = GEN_INT (delta);
12952 /* Operand 3 is the vcall_offset. */
12953 op[3] = GEN_INT (vcall_offset);
12955 /* Operand 4 is the temporary register. */
12956 op[4] = gen_rtx_REG (Pmode, 1);
12958 /* Operands 5 to 8 can be used as labels. */
12959 op[5] = NULL_RTX;
12960 op[6] = NULL_RTX;
12961 op[7] = NULL_RTX;
12962 op[8] = NULL_RTX;
12964 /* Operand 9 can be used for temporary register. */
12965 op[9] = NULL_RTX;
12967 /* Generate code. */
12968 if (TARGET_64BIT)
12970 /* Setup literal pool pointer if required. */
12971 if ((!DISP_IN_RANGE (delta)
12972 && !CONST_OK_FOR_K (delta)
12973 && !CONST_OK_FOR_Os (delta))
12974 || (!DISP_IN_RANGE (vcall_offset)
12975 && !CONST_OK_FOR_K (vcall_offset)
12976 && !CONST_OK_FOR_Os (vcall_offset)))
12978 op[5] = gen_label_rtx ();
12979 output_asm_insn ("larl\t%4,%5", op);
12982 /* Add DELTA to this pointer. */
12983 if (delta)
12985 if (CONST_OK_FOR_J (delta))
12986 output_asm_insn ("la\t%1,%2(%1)", op);
12987 else if (DISP_IN_RANGE (delta))
12988 output_asm_insn ("lay\t%1,%2(%1)", op);
12989 else if (CONST_OK_FOR_K (delta))
12990 output_asm_insn ("aghi\t%1,%2", op);
12991 else if (CONST_OK_FOR_Os (delta))
12992 output_asm_insn ("agfi\t%1,%2", op);
12993 else
12995 op[6] = gen_label_rtx ();
12996 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
13000 /* Perform vcall adjustment. */
13001 if (vcall_offset)
13003 if (DISP_IN_RANGE (vcall_offset))
13005 output_asm_insn ("lg\t%4,0(%1)", op);
13006 output_asm_insn ("ag\t%1,%3(%4)", op);
13008 else if (CONST_OK_FOR_K (vcall_offset))
13010 output_asm_insn ("lghi\t%4,%3", op);
13011 output_asm_insn ("ag\t%4,0(%1)", op);
13012 output_asm_insn ("ag\t%1,0(%4)", op);
13014 else if (CONST_OK_FOR_Os (vcall_offset))
13016 output_asm_insn ("lgfi\t%4,%3", op);
13017 output_asm_insn ("ag\t%4,0(%1)", op);
13018 output_asm_insn ("ag\t%1,0(%4)", op);
13020 else
13022 op[7] = gen_label_rtx ();
13023 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
13024 output_asm_insn ("ag\t%4,0(%1)", op);
13025 output_asm_insn ("ag\t%1,0(%4)", op);
13029 /* Jump to target. */
13030 output_asm_insn ("jg\t%0", op);
13032 /* Output literal pool if required. */
13033 if (op[5])
13035 output_asm_insn (".align\t4", op);
13036 targetm.asm_out.internal_label (file, "L",
13037 CODE_LABEL_NUMBER (op[5]));
13039 if (op[6])
13041 targetm.asm_out.internal_label (file, "L",
13042 CODE_LABEL_NUMBER (op[6]));
13043 output_asm_insn (".long\t%2", op);
13045 if (op[7])
13047 targetm.asm_out.internal_label (file, "L",
13048 CODE_LABEL_NUMBER (op[7]));
13049 output_asm_insn (".long\t%3", op);
13052 else
13054 /* Setup base pointer if required. */
13055 if (!vcall_offset
13056 || (!DISP_IN_RANGE (delta)
13057 && !CONST_OK_FOR_K (delta)
13058 && !CONST_OK_FOR_Os (delta))
13059 || (!DISP_IN_RANGE (delta)
13060 && !CONST_OK_FOR_K (vcall_offset)
13061 && !CONST_OK_FOR_Os (vcall_offset)))
13063 op[5] = gen_label_rtx ();
13064 output_asm_insn ("basr\t%4,0", op);
13065 targetm.asm_out.internal_label (file, "L",
13066 CODE_LABEL_NUMBER (op[5]));
13069 /* Add DELTA to this pointer. */
13070 if (delta)
13072 if (CONST_OK_FOR_J (delta))
13073 output_asm_insn ("la\t%1,%2(%1)", op);
13074 else if (DISP_IN_RANGE (delta))
13075 output_asm_insn ("lay\t%1,%2(%1)", op);
13076 else if (CONST_OK_FOR_K (delta))
13077 output_asm_insn ("ahi\t%1,%2", op);
13078 else if (CONST_OK_FOR_Os (delta))
13079 output_asm_insn ("afi\t%1,%2", op);
13080 else
13082 op[6] = gen_label_rtx ();
13083 output_asm_insn ("a\t%1,%6-%5(%4)", op);
13087 /* Perform vcall adjustment. */
13088 if (vcall_offset)
13090 if (CONST_OK_FOR_J (vcall_offset))
13092 output_asm_insn ("l\t%4,0(%1)", op);
13093 output_asm_insn ("a\t%1,%3(%4)", op);
13095 else if (DISP_IN_RANGE (vcall_offset))
13097 output_asm_insn ("l\t%4,0(%1)", op);
13098 output_asm_insn ("ay\t%1,%3(%4)", op);
13100 else if (CONST_OK_FOR_K (vcall_offset))
13102 output_asm_insn ("lhi\t%4,%3", op);
13103 output_asm_insn ("a\t%4,0(%1)", op);
13104 output_asm_insn ("a\t%1,0(%4)", op);
13106 else if (CONST_OK_FOR_Os (vcall_offset))
13108 output_asm_insn ("iilf\t%4,%3", op);
13109 output_asm_insn ("a\t%4,0(%1)", op);
13110 output_asm_insn ("a\t%1,0(%4)", op);
13112 else
13114 op[7] = gen_label_rtx ();
13115 output_asm_insn ("l\t%4,%7-%5(%4)", op);
13116 output_asm_insn ("a\t%4,0(%1)", op);
13117 output_asm_insn ("a\t%1,0(%4)", op);
13120 /* We had to clobber the base pointer register.
13121 Re-setup the base pointer (with a different base). */
13122 op[5] = gen_label_rtx ();
13123 output_asm_insn ("basr\t%4,0", op);
13124 targetm.asm_out.internal_label (file, "L",
13125 CODE_LABEL_NUMBER (op[5]));
13128 /* Jump to target. */
13129 op[8] = gen_label_rtx ();
13131 if (!flag_pic)
13132 output_asm_insn ("l\t%4,%8-%5(%4)", op);
13133 else if (!nonlocal)
13134 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13135 /* We cannot call through .plt, since .plt requires %r12 loaded. */
13136 else if (flag_pic == 1)
13138 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13139 output_asm_insn ("l\t%4,%0(%4)", op);
13141 else if (flag_pic == 2)
13143 op[9] = gen_rtx_REG (Pmode, 0);
13144 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
13145 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13146 output_asm_insn ("ar\t%4,%9", op);
13147 output_asm_insn ("l\t%4,0(%4)", op);
13150 output_asm_insn ("br\t%4", op);
13152 /* Output literal pool. */
13153 output_asm_insn (".align\t4", op);
13155 if (nonlocal && flag_pic == 2)
13156 output_asm_insn (".long\t%0", op);
13157 if (nonlocal)
13159 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13160 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
13163 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
13164 if (!flag_pic)
13165 output_asm_insn (".long\t%0", op);
13166 else
13167 output_asm_insn (".long\t%0-%5", op);
13169 if (op[6])
13171 targetm.asm_out.internal_label (file, "L",
13172 CODE_LABEL_NUMBER (op[6]));
13173 output_asm_insn (".long\t%2", op);
13175 if (op[7])
13177 targetm.asm_out.internal_label (file, "L",
13178 CODE_LABEL_NUMBER (op[7]));
13179 output_asm_insn (".long\t%3", op);
13182 final_end_function ();
13185 static bool
13186 s390_valid_pointer_mode (scalar_int_mode mode)
13188 return (mode == SImode || (TARGET_64BIT && mode == DImode));
13191 /* Checks whether the given CALL_EXPR would use a caller
13192 saved register. This is used to decide whether sibling call
13193 optimization could be performed on the respective function
13194 call. */
13196 static bool
13197 s390_call_saved_register_used (tree call_expr)
13199 CUMULATIVE_ARGS cum_v;
13200 cumulative_args_t cum;
13201 tree parameter;
13202 machine_mode mode;
13203 tree type;
13204 rtx parm_rtx;
13205 int reg, i;
13207 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
13208 cum = pack_cumulative_args (&cum_v);
13210 for (i = 0; i < call_expr_nargs (call_expr); i++)
13212 parameter = CALL_EXPR_ARG (call_expr, i);
13213 gcc_assert (parameter);
13215 /* For an undeclared variable passed as parameter we will get
13216 an ERROR_MARK node here. */
13217 if (TREE_CODE (parameter) == ERROR_MARK)
13218 return true;
13220 type = TREE_TYPE (parameter);
13221 gcc_assert (type);
13223 mode = TYPE_MODE (type);
13224 gcc_assert (mode);
13226 /* We assume that in the target function all parameters are
13227 named. This only has an impact on vector argument register
13228 usage none of which is call-saved. */
13229 if (pass_by_reference (&cum_v, mode, type, true))
13231 mode = Pmode;
13232 type = build_pointer_type (type);
13235 parm_rtx = s390_function_arg (cum, mode, type, true);
13237 s390_function_arg_advance (cum, mode, type, true);
13239 if (!parm_rtx)
13240 continue;
13242 if (REG_P (parm_rtx))
13244 for (reg = 0; reg < REG_NREGS (parm_rtx); reg++)
13245 if (!call_used_regs[reg + REGNO (parm_rtx)])
13246 return true;
13249 if (GET_CODE (parm_rtx) == PARALLEL)
13251 int i;
13253 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
13255 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
13257 gcc_assert (REG_P (r));
13259 for (reg = 0; reg < REG_NREGS (r); reg++)
13260 if (!call_used_regs[reg + REGNO (r)])
13261 return true;
13266 return false;
13269 /* Return true if the given call expression can be
13270 turned into a sibling call.
13271 DECL holds the declaration of the function to be called whereas
13272 EXP is the call expression itself. */
13274 static bool
13275 s390_function_ok_for_sibcall (tree decl, tree exp)
13277 /* The TPF epilogue uses register 1. */
13278 if (TARGET_TPF_PROFILING)
13279 return false;
13281 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13282 which would have to be restored before the sibcall. */
13283 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
13284 return false;
13286 /* Register 6 on s390 is available as an argument register but unfortunately
13287 "caller saved". This makes functions needing this register for arguments
13288 not suitable for sibcalls. */
13289 return !s390_call_saved_register_used (exp);
13292 /* Return the fixed registers used for condition codes. */
13294 static bool
13295 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13297 *p1 = CC_REGNUM;
13298 *p2 = INVALID_REGNUM;
13300 return true;
13303 /* This function is used by the call expanders of the machine description.
13304 It emits the call insn itself together with the necessary operations
13305 to adjust the target address and returns the emitted insn.
13306 ADDR_LOCATION is the target address rtx
13307 TLS_CALL the location of the thread-local symbol
13308 RESULT_REG the register where the result of the call should be stored
13309 RETADDR_REG the register where the return address should be stored
13310 If this parameter is NULL_RTX the call is considered
13311 to be a sibling call. */
13313 rtx_insn *
13314 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
13315 rtx retaddr_reg)
13317 bool plt_call = false;
13318 rtx_insn *insn;
13319 rtx call;
13320 rtx clobber;
13321 rtvec vec;
13323 /* Direct function calls need special treatment. */
13324 if (GET_CODE (addr_location) == SYMBOL_REF)
13326 /* When calling a global routine in PIC mode, we must
13327 replace the symbol itself with the PLT stub. */
13328 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
13330 if (TARGET_64BIT || retaddr_reg != NULL_RTX)
13332 addr_location = gen_rtx_UNSPEC (Pmode,
13333 gen_rtvec (1, addr_location),
13334 UNSPEC_PLT);
13335 addr_location = gen_rtx_CONST (Pmode, addr_location);
13336 plt_call = true;
13338 else
13339 /* For -fpic code the PLT entries might use r12 which is
13340 call-saved. Therefore we cannot do a sibcall when
13341 calling directly using a symbol ref. When reaching
13342 this point we decided (in s390_function_ok_for_sibcall)
13343 to do a sibcall for a function pointer but one of the
13344 optimizers was able to get rid of the function pointer
13345 by propagating the symbol ref into the call. This
13346 optimization is illegal for S/390 so we turn the direct
13347 call into a indirect call again. */
13348 addr_location = force_reg (Pmode, addr_location);
13351 /* Unless we can use the bras(l) insn, force the
13352 routine address into a register. */
13353 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
13355 if (flag_pic)
13356 addr_location = legitimize_pic_address (addr_location, 0);
13357 else
13358 addr_location = force_reg (Pmode, addr_location);
13362 /* If it is already an indirect call or the code above moved the
13363 SYMBOL_REF to somewhere else make sure the address can be found in
13364 register 1. */
13365 if (retaddr_reg == NULL_RTX
13366 && GET_CODE (addr_location) != SYMBOL_REF
13367 && !plt_call)
13369 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
13370 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
13373 addr_location = gen_rtx_MEM (QImode, addr_location);
13374 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
13376 if (result_reg != NULL_RTX)
13377 call = gen_rtx_SET (result_reg, call);
13379 if (retaddr_reg != NULL_RTX)
13381 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
13383 if (tls_call != NULL_RTX)
13384 vec = gen_rtvec (3, call, clobber,
13385 gen_rtx_USE (VOIDmode, tls_call));
13386 else
13387 vec = gen_rtvec (2, call, clobber);
13389 call = gen_rtx_PARALLEL (VOIDmode, vec);
13392 insn = emit_call_insn (call);
13394 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
13395 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
13397 /* s390_function_ok_for_sibcall should
13398 have denied sibcalls in this case. */
13399 gcc_assert (retaddr_reg != NULL_RTX);
13400 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
13402 return insn;
13405 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
13407 static void
13408 s390_conditional_register_usage (void)
13410 int i;
13412 if (flag_pic)
13414 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13415 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13417 if (TARGET_CPU_ZARCH)
13419 fixed_regs[BASE_REGNUM] = 0;
13420 call_used_regs[BASE_REGNUM] = 0;
13421 fixed_regs[RETURN_REGNUM] = 0;
13422 call_used_regs[RETURN_REGNUM] = 0;
13424 if (TARGET_64BIT)
13426 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13427 call_used_regs[i] = call_really_used_regs[i] = 0;
13429 else
13431 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
13432 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
13435 if (TARGET_SOFT_FLOAT)
13437 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13438 call_used_regs[i] = fixed_regs[i] = 1;
13441 /* Disable v16 - v31 for non-vector target. */
13442 if (!TARGET_VX)
13444 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13445 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
13449 /* Corresponding function to eh_return expander. */
13451 static GTY(()) rtx s390_tpf_eh_return_symbol;
13452 void
13453 s390_emit_tpf_eh_return (rtx target)
13455 rtx_insn *insn;
13456 rtx reg, orig_ra;
13458 if (!s390_tpf_eh_return_symbol)
13459 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13461 reg = gen_rtx_REG (Pmode, 2);
13462 orig_ra = gen_rtx_REG (Pmode, 3);
13464 emit_move_insn (reg, target);
13465 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13466 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13467 gen_rtx_REG (Pmode, RETURN_REGNUM));
13468 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13469 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13471 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13474 /* Rework the prologue/epilogue to avoid saving/restoring
13475 registers unnecessarily. */
13477 static void
13478 s390_optimize_prologue (void)
13480 rtx_insn *insn, *new_insn, *next_insn;
13482 /* Do a final recompute of the frame-related data. */
13483 s390_optimize_register_info ();
13485 /* If all special registers are in fact used, there's nothing we
13486 can do, so no point in walking the insn list. */
13488 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13489 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
13490 && (TARGET_CPU_ZARCH
13491 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
13492 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
13493 return;
13495 /* Search for prologue/epilogue insns and replace them. */
13497 for (insn = get_insns (); insn; insn = next_insn)
13499 int first, last, off;
13500 rtx set, base, offset;
13501 rtx pat;
13503 next_insn = NEXT_INSN (insn);
13505 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13506 continue;
13508 pat = PATTERN (insn);
13510 /* Remove ldgr/lgdr instructions used for saving and restore
13511 GPRs if possible. */
13512 if (TARGET_Z10)
13514 rtx tmp_pat = pat;
13516 if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
13517 tmp_pat = XVECEXP (pat, 0, 0);
13519 if (GET_CODE (tmp_pat) == SET
13520 && GET_MODE (SET_SRC (tmp_pat)) == DImode
13521 && REG_P (SET_SRC (tmp_pat))
13522 && REG_P (SET_DEST (tmp_pat)))
13524 int src_regno = REGNO (SET_SRC (tmp_pat));
13525 int dest_regno = REGNO (SET_DEST (tmp_pat));
13526 int gpr_regno;
13527 int fpr_regno;
13529 if (!((GENERAL_REGNO_P (src_regno)
13530 && FP_REGNO_P (dest_regno))
13531 || (FP_REGNO_P (src_regno)
13532 && GENERAL_REGNO_P (dest_regno))))
13533 continue;
13535 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
13536 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
13538 /* GPR must be call-saved, FPR must be call-clobbered. */
13539 if (!call_really_used_regs[fpr_regno]
13540 || call_really_used_regs[gpr_regno])
13541 continue;
13543 /* It must not happen that what we once saved in an FPR now
13544 needs a stack slot. */
13545 gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
13547 if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
13549 remove_insn (insn);
13550 continue;
13555 if (GET_CODE (pat) == PARALLEL
13556 && store_multiple_operation (pat, VOIDmode))
13558 set = XVECEXP (pat, 0, 0);
13559 first = REGNO (SET_SRC (set));
13560 last = first + XVECLEN (pat, 0) - 1;
13561 offset = const0_rtx;
13562 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13563 off = INTVAL (offset);
13565 if (GET_CODE (base) != REG || off < 0)
13566 continue;
13567 if (cfun_frame_layout.first_save_gpr != -1
13568 && (cfun_frame_layout.first_save_gpr < first
13569 || cfun_frame_layout.last_save_gpr > last))
13570 continue;
13571 if (REGNO (base) != STACK_POINTER_REGNUM
13572 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13573 continue;
13574 if (first > BASE_REGNUM || last < BASE_REGNUM)
13575 continue;
13577 if (cfun_frame_layout.first_save_gpr != -1)
13579 rtx s_pat = save_gprs (base,
13580 off + (cfun_frame_layout.first_save_gpr
13581 - first) * UNITS_PER_LONG,
13582 cfun_frame_layout.first_save_gpr,
13583 cfun_frame_layout.last_save_gpr);
13584 new_insn = emit_insn_before (s_pat, insn);
13585 INSN_ADDRESSES_NEW (new_insn, -1);
13588 remove_insn (insn);
13589 continue;
13592 if (cfun_frame_layout.first_save_gpr == -1
13593 && GET_CODE (pat) == SET
13594 && GENERAL_REG_P (SET_SRC (pat))
13595 && GET_CODE (SET_DEST (pat)) == MEM)
13597 set = pat;
13598 first = REGNO (SET_SRC (set));
13599 offset = const0_rtx;
13600 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13601 off = INTVAL (offset);
13603 if (GET_CODE (base) != REG || off < 0)
13604 continue;
13605 if (REGNO (base) != STACK_POINTER_REGNUM
13606 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13607 continue;
13609 remove_insn (insn);
13610 continue;
13613 if (GET_CODE (pat) == PARALLEL
13614 && load_multiple_operation (pat, VOIDmode))
13616 set = XVECEXP (pat, 0, 0);
13617 first = REGNO (SET_DEST (set));
13618 last = first + XVECLEN (pat, 0) - 1;
13619 offset = const0_rtx;
13620 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13621 off = INTVAL (offset);
13623 if (GET_CODE (base) != REG || off < 0)
13624 continue;
13626 if (cfun_frame_layout.first_restore_gpr != -1
13627 && (cfun_frame_layout.first_restore_gpr < first
13628 || cfun_frame_layout.last_restore_gpr > last))
13629 continue;
13630 if (REGNO (base) != STACK_POINTER_REGNUM
13631 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13632 continue;
13633 if (first > BASE_REGNUM || last < BASE_REGNUM)
13634 continue;
13636 if (cfun_frame_layout.first_restore_gpr != -1)
13638 rtx rpat = restore_gprs (base,
13639 off + (cfun_frame_layout.first_restore_gpr
13640 - first) * UNITS_PER_LONG,
13641 cfun_frame_layout.first_restore_gpr,
13642 cfun_frame_layout.last_restore_gpr);
13644 /* Remove REG_CFA_RESTOREs for registers that we no
13645 longer need to save. */
13646 REG_NOTES (rpat) = REG_NOTES (insn);
13647 for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
13648 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
13649 && ((int) REGNO (XEXP (*ptr, 0))
13650 < cfun_frame_layout.first_restore_gpr))
13651 *ptr = XEXP (*ptr, 1);
13652 else
13653 ptr = &XEXP (*ptr, 1);
13654 new_insn = emit_insn_before (rpat, insn);
13655 RTX_FRAME_RELATED_P (new_insn) = 1;
13656 INSN_ADDRESSES_NEW (new_insn, -1);
13659 remove_insn (insn);
13660 continue;
13663 if (cfun_frame_layout.first_restore_gpr == -1
13664 && GET_CODE (pat) == SET
13665 && GENERAL_REG_P (SET_DEST (pat))
13666 && GET_CODE (SET_SRC (pat)) == MEM)
13668 set = pat;
13669 first = REGNO (SET_DEST (set));
13670 offset = const0_rtx;
13671 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13672 off = INTVAL (offset);
13674 if (GET_CODE (base) != REG || off < 0)
13675 continue;
13677 if (REGNO (base) != STACK_POINTER_REGNUM
13678 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13679 continue;
13681 remove_insn (insn);
13682 continue;
13687 /* On z10 and later the dynamic branch prediction must see the
13688 backward jump within a certain windows. If not it falls back to
13689 the static prediction. This function rearranges the loop backward
13690 branch in a way which makes the static prediction always correct.
13691 The function returns true if it added an instruction. */
13692 static bool
13693 s390_fix_long_loop_prediction (rtx_insn *insn)
13695 rtx set = single_set (insn);
13696 rtx code_label, label_ref;
13697 rtx_insn *uncond_jump;
13698 rtx_insn *cur_insn;
13699 rtx tmp;
13700 int distance;
13702 /* This will exclude branch on count and branch on index patterns
13703 since these are correctly statically predicted. */
13704 if (!set
13705 || SET_DEST (set) != pc_rtx
13706 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
13707 return false;
13709 /* Skip conditional returns. */
13710 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
13711 && XEXP (SET_SRC (set), 2) == pc_rtx)
13712 return false;
13714 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
13715 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
13717 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
13719 code_label = XEXP (label_ref, 0);
13721 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
13722 || INSN_ADDRESSES (INSN_UID (insn)) == -1
13723 || (INSN_ADDRESSES (INSN_UID (insn))
13724 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
13725 return false;
13727 for (distance = 0, cur_insn = PREV_INSN (insn);
13728 distance < PREDICT_DISTANCE - 6;
13729 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
13730 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
13731 return false;
13733 rtx_code_label *new_label = gen_label_rtx ();
13734 uncond_jump = emit_jump_insn_after (
13735 gen_rtx_SET (pc_rtx,
13736 gen_rtx_LABEL_REF (VOIDmode, code_label)),
13737 insn);
13738 emit_label_after (new_label, uncond_jump);
13740 tmp = XEXP (SET_SRC (set), 1);
13741 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
13742 XEXP (SET_SRC (set), 2) = tmp;
13743 INSN_CODE (insn) = -1;
13745 XEXP (label_ref, 0) = new_label;
13746 JUMP_LABEL (insn) = new_label;
13747 JUMP_LABEL (uncond_jump) = code_label;
13749 return true;
13752 /* Returns 1 if INSN reads the value of REG for purposes not related
13753 to addressing of memory, and 0 otherwise. */
13754 static int
13755 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
13757 return reg_referenced_p (reg, PATTERN (insn))
13758 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
13761 /* Starting from INSN find_cond_jump looks downwards in the insn
13762 stream for a single jump insn which is the last user of the
13763 condition code set in INSN. */
13764 static rtx_insn *
13765 find_cond_jump (rtx_insn *insn)
13767 for (; insn; insn = NEXT_INSN (insn))
13769 rtx ite, cc;
13771 if (LABEL_P (insn))
13772 break;
13774 if (!JUMP_P (insn))
13776 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
13777 break;
13778 continue;
13781 /* This will be triggered by a return. */
13782 if (GET_CODE (PATTERN (insn)) != SET)
13783 break;
13785 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
13786 ite = SET_SRC (PATTERN (insn));
13788 if (GET_CODE (ite) != IF_THEN_ELSE)
13789 break;
13791 cc = XEXP (XEXP (ite, 0), 0);
13792 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
13793 break;
13795 if (find_reg_note (insn, REG_DEAD, cc))
13796 return insn;
13797 break;
13800 return NULL;
13803 /* Swap the condition in COND and the operands in OP0 and OP1 so that
13804 the semantics does not change. If NULL_RTX is passed as COND the
13805 function tries to find the conditional jump starting with INSN. */
13806 static void
13807 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
13809 rtx tmp = *op0;
13811 if (cond == NULL_RTX)
13813 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
13814 rtx set = jump ? single_set (jump) : NULL_RTX;
13816 if (set == NULL_RTX)
13817 return;
13819 cond = XEXP (SET_SRC (set), 0);
13822 *op0 = *op1;
13823 *op1 = tmp;
13824 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
13827 /* On z10, instructions of the compare-and-branch family have the
13828 property to access the register occurring as second operand with
13829 its bits complemented. If such a compare is grouped with a second
13830 instruction that accesses the same register non-complemented, and
13831 if that register's value is delivered via a bypass, then the
13832 pipeline recycles, thereby causing significant performance decline.
13833 This function locates such situations and exchanges the two
13834 operands of the compare. The function return true whenever it
13835 added an insn. */
13836 static bool
13837 s390_z10_optimize_cmp (rtx_insn *insn)
13839 rtx_insn *prev_insn, *next_insn;
13840 bool insn_added_p = false;
13841 rtx cond, *op0, *op1;
13843 if (GET_CODE (PATTERN (insn)) == PARALLEL)
13845 /* Handle compare and branch and branch on count
13846 instructions. */
13847 rtx pattern = single_set (insn);
13849 if (!pattern
13850 || SET_DEST (pattern) != pc_rtx
13851 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
13852 return false;
13854 cond = XEXP (SET_SRC (pattern), 0);
13855 op0 = &XEXP (cond, 0);
13856 op1 = &XEXP (cond, 1);
13858 else if (GET_CODE (PATTERN (insn)) == SET)
13860 rtx src, dest;
13862 /* Handle normal compare instructions. */
13863 src = SET_SRC (PATTERN (insn));
13864 dest = SET_DEST (PATTERN (insn));
13866 if (!REG_P (dest)
13867 || !CC_REGNO_P (REGNO (dest))
13868 || GET_CODE (src) != COMPARE)
13869 return false;
13871 /* s390_swap_cmp will try to find the conditional
13872 jump when passing NULL_RTX as condition. */
13873 cond = NULL_RTX;
13874 op0 = &XEXP (src, 0);
13875 op1 = &XEXP (src, 1);
13877 else
13878 return false;
13880 if (!REG_P (*op0) || !REG_P (*op1))
13881 return false;
13883 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
13884 return false;
13886 /* Swap the COMPARE arguments and its mask if there is a
13887 conflicting access in the previous insn. */
13888 prev_insn = prev_active_insn (insn);
13889 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13890 && reg_referenced_p (*op1, PATTERN (prev_insn)))
13891 s390_swap_cmp (cond, op0, op1, insn);
13893 /* Check if there is a conflict with the next insn. If there
13894 was no conflict with the previous insn, then swap the
13895 COMPARE arguments and its mask. If we already swapped
13896 the operands, or if swapping them would cause a conflict
13897 with the previous insn, issue a NOP after the COMPARE in
13898 order to separate the two instuctions. */
13899 next_insn = next_active_insn (insn);
13900 if (next_insn != NULL_RTX && INSN_P (next_insn)
13901 && s390_non_addr_reg_read_p (*op1, next_insn))
13903 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13904 && s390_non_addr_reg_read_p (*op0, prev_insn))
13906 if (REGNO (*op1) == 0)
13907 emit_insn_after (gen_nop1 (), insn);
13908 else
13909 emit_insn_after (gen_nop (), insn);
13910 insn_added_p = true;
13912 else
13913 s390_swap_cmp (cond, op0, op1, insn);
13915 return insn_added_p;
13918 /* Number of INSNs to be scanned backward in the last BB of the loop
13919 and forward in the first BB of the loop. This usually should be a
13920 bit more than the number of INSNs which could go into one
13921 group. */
13922 #define S390_OSC_SCAN_INSN_NUM 5
13924 /* Scan LOOP for static OSC collisions and return true if a osc_break
13925 should be issued for this loop. */
13926 static bool
13927 s390_adjust_loop_scan_osc (struct loop* loop)
13930 HARD_REG_SET modregs, newregs;
13931 rtx_insn *insn, *store_insn = NULL;
13932 rtx set;
13933 struct s390_address addr_store, addr_load;
13934 subrtx_iterator::array_type array;
13935 int insn_count;
13937 CLEAR_HARD_REG_SET (modregs);
13939 insn_count = 0;
13940 FOR_BB_INSNS_REVERSE (loop->latch, insn)
13942 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
13943 continue;
13945 insn_count++;
13946 if (insn_count > S390_OSC_SCAN_INSN_NUM)
13947 return false;
13949 find_all_hard_reg_sets (insn, &newregs, true);
13950 IOR_HARD_REG_SET (modregs, newregs);
13952 set = single_set (insn);
13953 if (!set)
13954 continue;
13956 if (MEM_P (SET_DEST (set))
13957 && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store))
13959 store_insn = insn;
13960 break;
13964 if (store_insn == NULL_RTX)
13965 return false;
13967 insn_count = 0;
13968 FOR_BB_INSNS (loop->header, insn)
13970 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
13971 continue;
13973 if (insn == store_insn)
13974 return false;
13976 insn_count++;
13977 if (insn_count > S390_OSC_SCAN_INSN_NUM)
13978 return false;
13980 find_all_hard_reg_sets (insn, &newregs, true);
13981 IOR_HARD_REG_SET (modregs, newregs);
13983 set = single_set (insn);
13984 if (!set)
13985 continue;
13987 /* An intermediate store disrupts static OSC checking
13988 anyway. */
13989 if (MEM_P (SET_DEST (set))
13990 && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL))
13991 return false;
13993 FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
13994 if (MEM_P (*iter)
13995 && s390_decompose_address (XEXP (*iter, 0), &addr_load)
13996 && rtx_equal_p (addr_load.base, addr_store.base)
13997 && rtx_equal_p (addr_load.indx, addr_store.indx)
13998 && rtx_equal_p (addr_load.disp, addr_store.disp))
14000 if ((addr_load.base != NULL_RTX
14001 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base)))
14002 || (addr_load.indx != NULL_RTX
14003 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx))))
14004 return true;
14007 return false;
14010 /* Look for adjustments which can be done on simple innermost
14011 loops. */
14012 static void
14013 s390_adjust_loops ()
14015 struct loop *loop = NULL;
14017 df_analyze ();
14018 compute_bb_for_insn ();
14020 /* Find the loops. */
14021 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
14023 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
14025 if (dump_file)
14027 flow_loop_dump (loop, dump_file, NULL, 0);
14028 fprintf (dump_file, ";; OSC loop scan Loop: ");
14030 if (loop->latch == NULL
14031 || pc_set (BB_END (loop->latch)) == NULL_RTX
14032 || !s390_adjust_loop_scan_osc (loop))
14034 if (dump_file)
14036 if (loop->latch == NULL)
14037 fprintf (dump_file, " muliple backward jumps\n");
14038 else
14040 fprintf (dump_file, " header insn: %d latch insn: %d ",
14041 INSN_UID (BB_HEAD (loop->header)),
14042 INSN_UID (BB_END (loop->latch)));
14043 if (pc_set (BB_END (loop->latch)) == NULL_RTX)
14044 fprintf (dump_file, " loop does not end with jump\n");
14045 else
14046 fprintf (dump_file, " not instrumented\n");
14050 else
14052 rtx_insn *new_insn;
14054 if (dump_file)
14055 fprintf (dump_file, " adding OSC break insn: ");
14056 new_insn = emit_insn_before (gen_osc_break (),
14057 BB_END (loop->latch));
14058 INSN_ADDRESSES_NEW (new_insn, -1);
14062 loop_optimizer_finalize ();
14064 df_finish_pass (false);
14067 /* Perform machine-dependent processing. */
14069 static void
14070 s390_reorg (void)
14072 bool pool_overflow = false;
14073 int hw_before, hw_after;
14075 if (s390_tune == PROCESSOR_2964_Z13)
14076 s390_adjust_loops ();
14078 /* Make sure all splits have been performed; splits after
14079 machine_dependent_reorg might confuse insn length counts. */
14080 split_all_insns_noflow ();
14082 /* Install the main literal pool and the associated base
14083 register load insns.
14085 In addition, there are two problematic situations we need
14086 to correct:
14088 - the literal pool might be > 4096 bytes in size, so that
14089 some of its elements cannot be directly accessed
14091 - a branch target might be > 64K away from the branch, so that
14092 it is not possible to use a PC-relative instruction.
14094 To fix those, we split the single literal pool into multiple
14095 pool chunks, reloading the pool base register at various
14096 points throughout the function to ensure it always points to
14097 the pool chunk the following code expects, and / or replace
14098 PC-relative branches by absolute branches.
14100 However, the two problems are interdependent: splitting the
14101 literal pool can move a branch further away from its target,
14102 causing the 64K limit to overflow, and on the other hand,
14103 replacing a PC-relative branch by an absolute branch means
14104 we need to put the branch target address into the literal
14105 pool, possibly causing it to overflow.
14107 So, we loop trying to fix up both problems until we manage
14108 to satisfy both conditions at the same time. Note that the
14109 loop is guaranteed to terminate as every pass of the loop
14110 strictly decreases the total number of PC-relative branches
14111 in the function. (This is not completely true as there
14112 might be branch-over-pool insns introduced by chunkify_start.
14113 Those never need to be split however.) */
14115 for (;;)
14117 struct constant_pool *pool = NULL;
14119 /* Collect the literal pool. */
14120 if (!pool_overflow)
14122 pool = s390_mainpool_start ();
14123 if (!pool)
14124 pool_overflow = true;
14127 /* If literal pool overflowed, start to chunkify it. */
14128 if (pool_overflow)
14129 pool = s390_chunkify_start ();
14131 /* Split out-of-range branches. If this has created new
14132 literal pool entries, cancel current chunk list and
14133 recompute it. zSeries machines have large branch
14134 instructions, so we never need to split a branch. */
14135 if (!TARGET_CPU_ZARCH && s390_split_branches ())
14137 if (pool_overflow)
14138 s390_chunkify_cancel (pool);
14139 else
14140 s390_mainpool_cancel (pool);
14142 continue;
14145 /* If we made it up to here, both conditions are satisfied.
14146 Finish up literal pool related changes. */
14147 if (pool_overflow)
14148 s390_chunkify_finish (pool);
14149 else
14150 s390_mainpool_finish (pool);
14152 /* We're done splitting branches. */
14153 cfun->machine->split_branches_pending_p = false;
14154 break;
14157 /* Generate out-of-pool execute target insns. */
14158 if (TARGET_CPU_ZARCH)
14160 rtx_insn *insn, *target;
14161 rtx label;
14163 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14165 label = s390_execute_label (insn);
14166 if (!label)
14167 continue;
14169 gcc_assert (label != const0_rtx);
14171 target = emit_label (XEXP (label, 0));
14172 INSN_ADDRESSES_NEW (target, -1);
14174 target = emit_insn (s390_execute_target (insn));
14175 INSN_ADDRESSES_NEW (target, -1);
14179 /* Try to optimize prologue and epilogue further. */
14180 s390_optimize_prologue ();
14182 /* Walk over the insns and do some >=z10 specific changes. */
14183 if (s390_tune >= PROCESSOR_2097_Z10)
14185 rtx_insn *insn;
14186 bool insn_added_p = false;
14188 /* The insn lengths and addresses have to be up to date for the
14189 following manipulations. */
14190 shorten_branches (get_insns ());
14192 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14194 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14195 continue;
14197 if (JUMP_P (insn))
14198 insn_added_p |= s390_fix_long_loop_prediction (insn);
14200 if ((GET_CODE (PATTERN (insn)) == PARALLEL
14201 || GET_CODE (PATTERN (insn)) == SET)
14202 && s390_tune == PROCESSOR_2097_Z10)
14203 insn_added_p |= s390_z10_optimize_cmp (insn);
14206 /* Adjust branches if we added new instructions. */
14207 if (insn_added_p)
14208 shorten_branches (get_insns ());
14211 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
14212 if (hw_after > 0)
14214 rtx_insn *insn;
14216 /* Insert NOPs for hotpatching. */
14217 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14218 /* Emit NOPs
14219 1. inside the area covered by debug information to allow setting
14220 breakpoints at the NOPs,
14221 2. before any insn which results in an asm instruction,
14222 3. before in-function labels to avoid jumping to the NOPs, for
14223 example as part of a loop,
14224 4. before any barrier in case the function is completely empty
14225 (__builtin_unreachable ()) and has neither internal labels nor
14226 active insns.
14228 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
14229 break;
14230 /* Output a series of NOPs before the first active insn. */
14231 while (insn && hw_after > 0)
14233 if (hw_after >= 3 && TARGET_CPU_ZARCH)
14235 emit_insn_before (gen_nop_6_byte (), insn);
14236 hw_after -= 3;
14238 else if (hw_after >= 2)
14240 emit_insn_before (gen_nop_4_byte (), insn);
14241 hw_after -= 2;
14243 else
14245 emit_insn_before (gen_nop_2_byte (), insn);
14246 hw_after -= 1;
14252 /* Return true if INSN is a fp load insn writing register REGNO. */
14253 static inline bool
14254 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
14256 rtx set;
14257 enum attr_type flag = s390_safe_attr_type (insn);
14259 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
14260 return false;
14262 set = single_set (insn);
14264 if (set == NULL_RTX)
14265 return false;
14267 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
14268 return false;
14270 if (REGNO (SET_DEST (set)) != regno)
14271 return false;
14273 return true;
14276 /* This value describes the distance to be avoided between an
14277 arithmetic fp instruction and an fp load writing the same register.
14278 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14279 fine but the exact value has to be avoided. Otherwise the FP
14280 pipeline will throw an exception causing a major penalty. */
14281 #define Z10_EARLYLOAD_DISTANCE 7
14283 /* Rearrange the ready list in order to avoid the situation described
14284 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
14285 moved to the very end of the ready list. */
14286 static void
14287 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
14289 unsigned int regno;
14290 int nready = *nready_p;
14291 rtx_insn *tmp;
14292 int i;
14293 rtx_insn *insn;
14294 rtx set;
14295 enum attr_type flag;
14296 int distance;
14298 /* Skip DISTANCE - 1 active insns. */
14299 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
14300 distance > 0 && insn != NULL_RTX;
14301 distance--, insn = prev_active_insn (insn))
14302 if (CALL_P (insn) || JUMP_P (insn))
14303 return;
14305 if (insn == NULL_RTX)
14306 return;
14308 set = single_set (insn);
14310 if (set == NULL_RTX || !REG_P (SET_DEST (set))
14311 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
14312 return;
14314 flag = s390_safe_attr_type (insn);
14316 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
14317 return;
14319 regno = REGNO (SET_DEST (set));
14320 i = nready - 1;
14322 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
14323 i--;
14325 if (!i)
14326 return;
14328 tmp = ready[i];
14329 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
14330 ready[0] = tmp;
14334 /* The s390_sched_state variable tracks the state of the current or
14335 the last instruction group.
14337 0,1,2 number of instructions scheduled in the current group
14338 3 the last group is complete - normal insns
14339 4 the last group was a cracked/expanded insn */
14341 static int s390_sched_state;
14343 #define S390_SCHED_STATE_NORMAL 3
14344 #define S390_SCHED_STATE_CRACKED 4
14346 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
14347 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
14348 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
14349 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14351 static unsigned int
14352 s390_get_sched_attrmask (rtx_insn *insn)
14354 unsigned int mask = 0;
14356 switch (s390_tune)
14358 case PROCESSOR_2827_ZEC12:
14359 if (get_attr_zEC12_cracked (insn))
14360 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14361 if (get_attr_zEC12_expanded (insn))
14362 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14363 if (get_attr_zEC12_endgroup (insn))
14364 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14365 if (get_attr_zEC12_groupalone (insn))
14366 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14367 break;
14368 case PROCESSOR_2964_Z13:
14369 case PROCESSOR_3906_Z14:
14370 if (get_attr_z13_cracked (insn))
14371 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14372 if (get_attr_z13_expanded (insn))
14373 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14374 if (get_attr_z13_endgroup (insn))
14375 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14376 if (get_attr_z13_groupalone (insn))
14377 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14378 break;
14379 default:
14380 gcc_unreachable ();
14382 return mask;
14385 static unsigned int
14386 s390_get_unit_mask (rtx_insn *insn, int *units)
14388 unsigned int mask = 0;
14390 switch (s390_tune)
14392 case PROCESSOR_2964_Z13:
14393 case PROCESSOR_3906_Z14:
14394 *units = 3;
14395 if (get_attr_z13_unit_lsu (insn))
14396 mask |= 1 << 0;
14397 if (get_attr_z13_unit_fxu (insn))
14398 mask |= 1 << 1;
14399 if (get_attr_z13_unit_vfu (insn))
14400 mask |= 1 << 2;
14401 break;
14402 default:
14403 gcc_unreachable ();
14405 return mask;
14408 /* Return the scheduling score for INSN. The higher the score the
14409 better. The score is calculated from the OOO scheduling attributes
14410 of INSN and the scheduling state s390_sched_state. */
14411 static int
14412 s390_sched_score (rtx_insn *insn)
14414 unsigned int mask = s390_get_sched_attrmask (insn);
14415 int score = 0;
14417 switch (s390_sched_state)
14419 case 0:
14420 /* Try to put insns into the first slot which would otherwise
14421 break a group. */
14422 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14423 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14424 score += 5;
14425 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14426 score += 10;
14427 /* fallthrough */
14428 case 1:
14429 /* Prefer not cracked insns while trying to put together a
14430 group. */
14431 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14432 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14433 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14434 score += 10;
14435 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
14436 score += 5;
14437 break;
14438 case 2:
14439 /* Prefer not cracked insns while trying to put together a
14440 group. */
14441 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14442 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14443 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14444 score += 10;
14445 /* Prefer endgroup insns in the last slot. */
14446 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14447 score += 10;
14448 break;
14449 case S390_SCHED_STATE_NORMAL:
14450 /* Prefer not cracked insns if the last was not cracked. */
14451 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14452 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0)
14453 score += 5;
14454 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14455 score += 10;
14456 break;
14457 case S390_SCHED_STATE_CRACKED:
14458 /* Try to keep cracked insns together to prevent them from
14459 interrupting groups. */
14460 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14461 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14462 score += 5;
14463 break;
14466 if (s390_tune >= PROCESSOR_2964_Z13)
14468 int units, i;
14469 unsigned unit_mask, m = 1;
14471 unit_mask = s390_get_unit_mask (insn, &units);
14472 gcc_assert (units <= MAX_SCHED_UNITS);
14474 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14475 ago the last insn of this unit type got scheduled. This is
14476 supposed to help providing a proper instruction mix to the
14477 CPU. */
14478 for (i = 0; i < units; i++, m <<= 1)
14479 if (m & unit_mask)
14480 score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE /
14481 MAX_SCHED_MIX_DISTANCE);
14483 return score;
14486 /* This function is called via hook TARGET_SCHED_REORDER before
14487 issuing one insn from list READY which contains *NREADYP entries.
14488 For target z10 it reorders load instructions to avoid early load
14489 conflicts in the floating point pipeline */
14490 static int
14491 s390_sched_reorder (FILE *file, int verbose,
14492 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
14494 if (s390_tune == PROCESSOR_2097_Z10
14495 && reload_completed
14496 && *nreadyp > 1)
14497 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
14499 if (s390_tune >= PROCESSOR_2827_ZEC12
14500 && reload_completed
14501 && *nreadyp > 1)
14503 int i;
14504 int last_index = *nreadyp - 1;
14505 int max_index = -1;
14506 int max_score = -1;
14507 rtx_insn *tmp;
14509 /* Just move the insn with the highest score to the top (the
14510 end) of the list. A full sort is not needed since a conflict
14511 in the hazard recognition cannot happen. So the top insn in
14512 the ready list will always be taken. */
14513 for (i = last_index; i >= 0; i--)
14515 int score;
14517 if (recog_memoized (ready[i]) < 0)
14518 continue;
14520 score = s390_sched_score (ready[i]);
14521 if (score > max_score)
14523 max_score = score;
14524 max_index = i;
14528 if (max_index != -1)
14530 if (max_index != last_index)
14532 tmp = ready[max_index];
14533 ready[max_index] = ready[last_index];
14534 ready[last_index] = tmp;
14536 if (verbose > 5)
14537 fprintf (file,
14538 ";;\t\tBACKEND: move insn %d to the top of list\n",
14539 INSN_UID (ready[last_index]));
14541 else if (verbose > 5)
14542 fprintf (file,
14543 ";;\t\tBACKEND: best insn %d already on top\n",
14544 INSN_UID (ready[last_index]));
14547 if (verbose > 5)
14549 fprintf (file, "ready list ooo attributes - sched state: %d\n",
14550 s390_sched_state);
14552 for (i = last_index; i >= 0; i--)
14554 unsigned int sched_mask;
14555 rtx_insn *insn = ready[i];
14557 if (recog_memoized (insn) < 0)
14558 continue;
14560 sched_mask = s390_get_sched_attrmask (insn);
14561 fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
14562 INSN_UID (insn),
14563 s390_sched_score (insn));
14564 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
14565 ((M) & sched_mask) ? #ATTR : "");
14566 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14567 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14568 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14569 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14570 #undef PRINT_SCHED_ATTR
14571 if (s390_tune >= PROCESSOR_2964_Z13)
14573 unsigned int unit_mask, m = 1;
14574 int units, j;
14576 unit_mask = s390_get_unit_mask (insn, &units);
14577 fprintf (file, "(units:");
14578 for (j = 0; j < units; j++, m <<= 1)
14579 if (m & unit_mask)
14580 fprintf (file, " u%d", j);
14581 fprintf (file, ")");
14583 fprintf (file, "\n");
14588 return s390_issue_rate ();
14592 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
14593 the scheduler has issued INSN. It stores the last issued insn into
14594 last_scheduled_insn in order to make it available for
14595 s390_sched_reorder. */
14596 static int
14597 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
14599 last_scheduled_insn = insn;
14601 if (s390_tune >= PROCESSOR_2827_ZEC12
14602 && reload_completed
14603 && recog_memoized (insn) >= 0)
14605 unsigned int mask = s390_get_sched_attrmask (insn);
14607 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14608 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14609 s390_sched_state = S390_SCHED_STATE_CRACKED;
14610 else if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0
14611 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14612 s390_sched_state = S390_SCHED_STATE_NORMAL;
14613 else
14615 /* Only normal insns are left (mask == 0). */
14616 switch (s390_sched_state)
14618 case 0:
14619 case 1:
14620 case 2:
14621 case S390_SCHED_STATE_NORMAL:
14622 if (s390_sched_state == S390_SCHED_STATE_NORMAL)
14623 s390_sched_state = 1;
14624 else
14625 s390_sched_state++;
14627 break;
14628 case S390_SCHED_STATE_CRACKED:
14629 s390_sched_state = S390_SCHED_STATE_NORMAL;
14630 break;
14634 if (s390_tune >= PROCESSOR_2964_Z13)
14636 int units, i;
14637 unsigned unit_mask, m = 1;
14639 unit_mask = s390_get_unit_mask (insn, &units);
14640 gcc_assert (units <= MAX_SCHED_UNITS);
14642 for (i = 0; i < units; i++, m <<= 1)
14643 if (m & unit_mask)
14644 last_scheduled_unit_distance[i] = 0;
14645 else if (last_scheduled_unit_distance[i] < MAX_SCHED_MIX_DISTANCE)
14646 last_scheduled_unit_distance[i]++;
14649 if (verbose > 5)
14651 unsigned int sched_mask;
14653 sched_mask = s390_get_sched_attrmask (insn);
14655 fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
14656 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
14657 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14658 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14659 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14660 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14661 #undef PRINT_SCHED_ATTR
14663 if (s390_tune >= PROCESSOR_2964_Z13)
14665 unsigned int unit_mask, m = 1;
14666 int units, j;
14668 unit_mask = s390_get_unit_mask (insn, &units);
14669 fprintf (file, "(units:");
14670 for (j = 0; j < units; j++, m <<= 1)
14671 if (m & unit_mask)
14672 fprintf (file, " %d", j);
14673 fprintf (file, ")");
14675 fprintf (file, " sched state: %d\n", s390_sched_state);
14677 if (s390_tune >= PROCESSOR_2964_Z13)
14679 int units, j;
14681 s390_get_unit_mask (insn, &units);
14683 fprintf (file, ";;\t\tBACKEND: units unused for: ");
14684 for (j = 0; j < units; j++)
14685 fprintf (file, "%d:%d ", j, last_scheduled_unit_distance[j]);
14686 fprintf (file, "\n");
14691 if (GET_CODE (PATTERN (insn)) != USE
14692 && GET_CODE (PATTERN (insn)) != CLOBBER)
14693 return more - 1;
14694 else
14695 return more;
14698 static void
14699 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
14700 int verbose ATTRIBUTE_UNUSED,
14701 int max_ready ATTRIBUTE_UNUSED)
14703 last_scheduled_insn = NULL;
14704 memset (last_scheduled_unit_distance, 0, MAX_SCHED_UNITS * sizeof (int));
14705 s390_sched_state = 0;
14708 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
14709 a new number struct loop *loop should be unrolled if tuned for cpus with
14710 a built-in stride prefetcher.
14711 The loop is analyzed for memory accesses by calling check_dpu for
14712 each rtx of the loop. Depending on the loop_depth and the amount of
14713 memory accesses a new number <=nunroll is returned to improve the
14714 behavior of the hardware prefetch unit. */
14715 static unsigned
14716 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
14718 basic_block *bbs;
14719 rtx_insn *insn;
14720 unsigned i;
14721 unsigned mem_count = 0;
14723 if (s390_tune < PROCESSOR_2097_Z10)
14724 return nunroll;
14726 /* Count the number of memory references within the loop body. */
14727 bbs = get_loop_body (loop);
14728 subrtx_iterator::array_type array;
14729 for (i = 0; i < loop->num_nodes; i++)
14730 FOR_BB_INSNS (bbs[i], insn)
14731 if (INSN_P (insn) && INSN_CODE (insn) != -1)
14732 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
14733 if (MEM_P (*iter))
14734 mem_count += 1;
14735 free (bbs);
14737 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
14738 if (mem_count == 0)
14739 return nunroll;
14741 switch (loop_depth(loop))
14743 case 1:
14744 return MIN (nunroll, 28 / mem_count);
14745 case 2:
14746 return MIN (nunroll, 22 / mem_count);
14747 default:
14748 return MIN (nunroll, 16 / mem_count);
14752 /* Restore the current options. This is a hook function and also called
14753 internally. */
14755 static void
14756 s390_function_specific_restore (struct gcc_options *opts,
14757 struct cl_target_option *ptr ATTRIBUTE_UNUSED)
14759 opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
14762 static void
14763 s390_option_override_internal (bool main_args_p,
14764 struct gcc_options *opts,
14765 const struct gcc_options *opts_set)
14767 const char *prefix;
14768 const char *suffix;
14770 /* Set up prefix/suffix so the error messages refer to either the command
14771 line argument, or the attribute(target). */
14772 if (main_args_p)
14774 prefix = "-m";
14775 suffix = "";
14777 else
14779 prefix = "option(\"";
14780 suffix = "\")";
14784 /* Architecture mode defaults according to ABI. */
14785 if (!(opts_set->x_target_flags & MASK_ZARCH))
14787 if (TARGET_64BIT)
14788 opts->x_target_flags |= MASK_ZARCH;
14789 else
14790 opts->x_target_flags &= ~MASK_ZARCH;
14793 /* Set the march default in case it hasn't been specified on cmdline. */
14794 if (!opts_set->x_s390_arch)
14795 opts->x_s390_arch = PROCESSOR_2064_Z900;
14796 else if (opts->x_s390_arch == PROCESSOR_9672_G5
14797 || opts->x_s390_arch == PROCESSOR_9672_G6)
14798 warning (OPT_Wdeprecated, "%sarch=%s%s is deprecated and will be removed "
14799 "in future releases; use at least %sarch=z900%s",
14800 prefix, opts->x_s390_arch == PROCESSOR_9672_G5 ? "g5" : "g6",
14801 suffix, prefix, suffix);
14803 opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
14805 /* Determine processor to tune for. */
14806 if (!opts_set->x_s390_tune)
14807 opts->x_s390_tune = opts->x_s390_arch;
14808 else if (opts->x_s390_tune == PROCESSOR_9672_G5
14809 || opts->x_s390_tune == PROCESSOR_9672_G6)
14810 warning (OPT_Wdeprecated, "%stune=%s%s is deprecated and will be removed "
14811 "in future releases; use at least %stune=z900%s",
14812 prefix, opts->x_s390_tune == PROCESSOR_9672_G5 ? "g5" : "g6",
14813 suffix, prefix, suffix);
14815 opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
14817 /* Sanity checks. */
14818 if (opts->x_s390_arch == PROCESSOR_NATIVE
14819 || opts->x_s390_tune == PROCESSOR_NATIVE)
14820 gcc_unreachable ();
14821 if (TARGET_ZARCH_P (opts->x_target_flags) && !TARGET_CPU_ZARCH_P (opts))
14822 error ("z/Architecture mode not supported on %s",
14823 processor_table[(int)opts->x_s390_arch].name);
14824 if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
14825 error ("64-bit ABI not supported in ESA/390 mode");
14827 /* Enable hardware transactions if available and not explicitly
14828 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
14829 if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
14831 if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
14832 opts->x_target_flags |= MASK_OPT_HTM;
14833 else
14834 opts->x_target_flags &= ~MASK_OPT_HTM;
14837 if (TARGET_OPT_VX_P (opts_set->x_target_flags))
14839 if (TARGET_OPT_VX_P (opts->x_target_flags))
14841 if (!TARGET_CPU_VX_P (opts))
14842 error ("hardware vector support not available on %s",
14843 processor_table[(int)opts->x_s390_arch].name);
14844 if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
14845 error ("hardware vector support not available with -msoft-float");
14848 else
14850 if (TARGET_CPU_VX_P (opts))
14851 /* Enable vector support if available and not explicitly disabled
14852 by user. E.g. with -m31 -march=z13 -mzarch */
14853 opts->x_target_flags |= MASK_OPT_VX;
14854 else
14855 opts->x_target_flags &= ~MASK_OPT_VX;
14858 /* Use hardware DFP if available and not explicitly disabled by
14859 user. E.g. with -m31 -march=z10 -mzarch */
14860 if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
14862 if (TARGET_DFP_P (opts))
14863 opts->x_target_flags |= MASK_HARD_DFP;
14864 else
14865 opts->x_target_flags &= ~MASK_HARD_DFP;
14868 if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
14870 if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
14872 if (!TARGET_CPU_DFP_P (opts))
14873 error ("hardware decimal floating point instructions"
14874 " not available on %s",
14875 processor_table[(int)opts->x_s390_arch].name);
14876 if (!TARGET_ZARCH_P (opts->x_target_flags))
14877 error ("hardware decimal floating point instructions"
14878 " not available in ESA/390 mode");
14880 else
14881 opts->x_target_flags &= ~MASK_HARD_DFP;
14884 if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
14885 && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
14887 if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
14888 && TARGET_HARD_DFP_P (opts->x_target_flags))
14889 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
14891 opts->x_target_flags &= ~MASK_HARD_DFP;
14894 if (TARGET_BACKCHAIN_P (opts->x_target_flags)
14895 && TARGET_PACKED_STACK_P (opts->x_target_flags)
14896 && TARGET_HARD_FLOAT_P (opts->x_target_flags))
14897 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
14898 "in combination");
14900 if (opts->x_s390_stack_size)
14902 if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
14903 error ("stack size must be greater than the stack guard value");
14904 else if (opts->x_s390_stack_size > 1 << 16)
14905 error ("stack size must not be greater than 64k");
14907 else if (opts->x_s390_stack_guard)
14908 error ("-mstack-guard implies use of -mstack-size");
14910 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
14911 if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
14912 opts->x_target_flags |= MASK_LONG_DOUBLE_128;
14913 #endif
14915 if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
14917 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
14918 opts->x_param_values,
14919 opts_set->x_param_values);
14920 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
14921 opts->x_param_values,
14922 opts_set->x_param_values);
14923 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
14924 opts->x_param_values,
14925 opts_set->x_param_values);
14926 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
14927 opts->x_param_values,
14928 opts_set->x_param_values);
14931 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
14932 opts->x_param_values,
14933 opts_set->x_param_values);
14934 /* values for loop prefetching */
14935 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
14936 opts->x_param_values,
14937 opts_set->x_param_values);
14938 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
14939 opts->x_param_values,
14940 opts_set->x_param_values);
14941 /* s390 has more than 2 levels and the size is much larger. Since
14942 we are always running virtualized assume that we only get a small
14943 part of the caches above l1. */
14944 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
14945 opts->x_param_values,
14946 opts_set->x_param_values);
14947 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
14948 opts->x_param_values,
14949 opts_set->x_param_values);
14950 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
14951 opts->x_param_values,
14952 opts_set->x_param_values);
14954 /* Use the alternative scheduling-pressure algorithm by default. */
14955 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
14956 opts->x_param_values,
14957 opts_set->x_param_values);
14959 maybe_set_param_value (PARAM_MIN_VECT_LOOP_BOUND, 2,
14960 opts->x_param_values,
14961 opts_set->x_param_values);
14963 /* Call target specific restore function to do post-init work. At the moment,
14964 this just sets opts->x_s390_cost_pointer. */
14965 s390_function_specific_restore (opts, NULL);
14968 static void
14969 s390_option_override (void)
14971 unsigned int i;
14972 cl_deferred_option *opt;
14973 vec<cl_deferred_option> *v =
14974 (vec<cl_deferred_option> *) s390_deferred_options;
14976 if (v)
14977 FOR_EACH_VEC_ELT (*v, i, opt)
14979 switch (opt->opt_index)
14981 case OPT_mhotpatch_:
14983 int val1;
14984 int val2;
14985 char s[256];
14986 char *t;
14988 strncpy (s, opt->arg, 256);
14989 s[255] = 0;
14990 t = strchr (s, ',');
14991 if (t != NULL)
14993 *t = 0;
14994 t++;
14995 val1 = integral_argument (s);
14996 val2 = integral_argument (t);
14998 else
15000 val1 = -1;
15001 val2 = -1;
15003 if (val1 == -1 || val2 == -1)
15005 /* argument is not a plain number */
15006 error ("arguments to %qs should be non-negative integers",
15007 "-mhotpatch=n,m");
15008 break;
15010 else if (val1 > s390_hotpatch_hw_max
15011 || val2 > s390_hotpatch_hw_max)
15013 error ("argument to %qs is too large (max. %d)",
15014 "-mhotpatch=n,m", s390_hotpatch_hw_max);
15015 break;
15017 s390_hotpatch_hw_before_label = val1;
15018 s390_hotpatch_hw_after_label = val2;
15019 break;
15021 default:
15022 gcc_unreachable ();
15026 /* Set up function hooks. */
15027 init_machine_status = s390_init_machine_status;
15029 s390_option_override_internal (true, &global_options, &global_options_set);
15031 /* Save the initial options in case the user does function specific
15032 options. */
15033 target_option_default_node = build_target_option_node (&global_options);
15034 target_option_current_node = target_option_default_node;
15036 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
15037 requires the arch flags to be evaluated already. Since prefetching
15038 is beneficial on s390, we enable it if available. */
15039 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
15040 flag_prefetch_loop_arrays = 1;
15042 if (!s390_pic_data_is_text_relative && !flag_pic)
15043 error ("-mno-pic-data-is-text-relative cannot be used without -fpic/-fPIC");
15045 if (TARGET_TPF)
15047 /* Don't emit DWARF3/4 unless specifically selected. The TPF
15048 debuggers do not yet support DWARF 3/4. */
15049 if (!global_options_set.x_dwarf_strict)
15050 dwarf_strict = 1;
15051 if (!global_options_set.x_dwarf_version)
15052 dwarf_version = 2;
15055 /* Register a target-specific optimization-and-lowering pass
15056 to run immediately before prologue and epilogue generation.
15058 Registering the pass must be done at start up. It's
15059 convenient to do it here. */
15060 opt_pass *new_pass = new pass_s390_early_mach (g);
15061 struct register_pass_info insert_pass_s390_early_mach =
15063 new_pass, /* pass */
15064 "pro_and_epilogue", /* reference_pass_name */
15065 1, /* ref_pass_instance_number */
15066 PASS_POS_INSERT_BEFORE /* po_op */
15068 register_pass (&insert_pass_s390_early_mach);
15071 #if S390_USE_TARGET_ATTRIBUTE
15072 /* Inner function to process the attribute((target(...))), take an argument and
15073 set the current options from the argument. If we have a list, recursively go
15074 over the list. */
15076 static bool
15077 s390_valid_target_attribute_inner_p (tree args,
15078 struct gcc_options *opts,
15079 struct gcc_options *new_opts_set,
15080 bool force_pragma)
15082 char *next_optstr;
15083 bool ret = true;
15085 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
15086 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
15087 static const struct
15089 const char *string;
15090 size_t len;
15091 int opt;
15092 int has_arg;
15093 int only_as_pragma;
15094 } attrs[] = {
15095 /* enum options */
15096 S390_ATTRIB ("arch=", OPT_march_, 1),
15097 S390_ATTRIB ("tune=", OPT_mtune_, 1),
15098 /* uinteger options */
15099 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
15100 S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
15101 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
15102 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
15103 /* flag options */
15104 S390_ATTRIB ("backchain", OPT_mbackchain, 0),
15105 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
15106 S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
15107 S390_ATTRIB ("htm", OPT_mhtm, 0),
15108 S390_ATTRIB ("vx", OPT_mvx, 0),
15109 S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
15110 S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
15111 S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
15112 S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
15113 S390_PRAGMA ("zvector", OPT_mzvector, 0),
15114 /* boolean options */
15115 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
15117 #undef S390_ATTRIB
15118 #undef S390_PRAGMA
15120 /* If this is a list, recurse to get the options. */
15121 if (TREE_CODE (args) == TREE_LIST)
15123 bool ret = true;
15124 int num_pragma_values;
15125 int i;
15127 /* Note: attribs.c:decl_attributes prepends the values from
15128 current_target_pragma to the list of target attributes. To determine
15129 whether we're looking at a value of the attribute or the pragma we
15130 assume that the first [list_length (current_target_pragma)] values in
15131 the list are the values from the pragma. */
15132 num_pragma_values = (!force_pragma && current_target_pragma != NULL)
15133 ? list_length (current_target_pragma) : 0;
15134 for (i = 0; args; args = TREE_CHAIN (args), i++)
15136 bool is_pragma;
15138 is_pragma = (force_pragma || i < num_pragma_values);
15139 if (TREE_VALUE (args)
15140 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
15141 opts, new_opts_set,
15142 is_pragma))
15144 ret = false;
15147 return ret;
15150 else if (TREE_CODE (args) != STRING_CST)
15152 error ("attribute %<target%> argument not a string");
15153 return false;
15156 /* Handle multiple arguments separated by commas. */
15157 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
15159 while (next_optstr && *next_optstr != '\0')
15161 char *p = next_optstr;
15162 char *orig_p = p;
15163 char *comma = strchr (next_optstr, ',');
15164 size_t len, opt_len;
15165 int opt;
15166 bool opt_set_p;
15167 char ch;
15168 unsigned i;
15169 int mask = 0;
15170 enum cl_var_type var_type;
15171 bool found;
15173 if (comma)
15175 *comma = '\0';
15176 len = comma - next_optstr;
15177 next_optstr = comma + 1;
15179 else
15181 len = strlen (p);
15182 next_optstr = NULL;
15185 /* Recognize no-xxx. */
15186 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
15188 opt_set_p = false;
15189 p += 3;
15190 len -= 3;
15192 else
15193 opt_set_p = true;
15195 /* Find the option. */
15196 ch = *p;
15197 found = false;
15198 for (i = 0; i < ARRAY_SIZE (attrs); i++)
15200 opt_len = attrs[i].len;
15201 if (ch == attrs[i].string[0]
15202 && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
15203 && memcmp (p, attrs[i].string, opt_len) == 0)
15205 opt = attrs[i].opt;
15206 if (!opt_set_p && cl_options[opt].cl_reject_negative)
15207 continue;
15208 mask = cl_options[opt].var_value;
15209 var_type = cl_options[opt].var_type;
15210 found = true;
15211 break;
15215 /* Process the option. */
15216 if (!found)
15218 error ("attribute(target(\"%s\")) is unknown", orig_p);
15219 return false;
15221 else if (attrs[i].only_as_pragma && !force_pragma)
15223 /* Value is not allowed for the target attribute. */
15224 error ("value %qs is not supported by attribute %<target%>",
15225 attrs[i].string);
15226 return false;
15229 else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
15231 if (var_type == CLVC_BIT_CLEAR)
15232 opt_set_p = !opt_set_p;
15234 if (opt_set_p)
15235 opts->x_target_flags |= mask;
15236 else
15237 opts->x_target_flags &= ~mask;
15238 new_opts_set->x_target_flags |= mask;
15241 else if (cl_options[opt].var_type == CLVC_BOOLEAN)
15243 int value;
15245 if (cl_options[opt].cl_uinteger)
15247 /* Unsigned integer argument. Code based on the function
15248 decode_cmdline_option () in opts-common.c. */
15249 value = integral_argument (p + opt_len);
15251 else
15252 value = (opt_set_p) ? 1 : 0;
15254 if (value != -1)
15256 struct cl_decoded_option decoded;
15258 /* Value range check; only implemented for numeric and boolean
15259 options at the moment. */
15260 generate_option (opt, NULL, value, CL_TARGET, &decoded);
15261 s390_handle_option (opts, new_opts_set, &decoded, input_location);
15262 set_option (opts, new_opts_set, opt, value,
15263 p + opt_len, DK_UNSPECIFIED, input_location,
15264 global_dc);
15266 else
15268 error ("attribute(target(\"%s\")) is unknown", orig_p);
15269 ret = false;
15273 else if (cl_options[opt].var_type == CLVC_ENUM)
15275 bool arg_ok;
15276 int value;
15278 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
15279 if (arg_ok)
15280 set_option (opts, new_opts_set, opt, value,
15281 p + opt_len, DK_UNSPECIFIED, input_location,
15282 global_dc);
15283 else
15285 error ("attribute(target(\"%s\")) is unknown", orig_p);
15286 ret = false;
15290 else
15291 gcc_unreachable ();
15293 return ret;
15296 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
15298 tree
15299 s390_valid_target_attribute_tree (tree args,
15300 struct gcc_options *opts,
15301 const struct gcc_options *opts_set,
15302 bool force_pragma)
15304 tree t = NULL_TREE;
15305 struct gcc_options new_opts_set;
15307 memset (&new_opts_set, 0, sizeof (new_opts_set));
15309 /* Process each of the options on the chain. */
15310 if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
15311 force_pragma))
15312 return error_mark_node;
15314 /* If some option was set (even if it has not changed), rerun
15315 s390_option_override_internal, and then save the options away. */
15316 if (new_opts_set.x_target_flags
15317 || new_opts_set.x_s390_arch
15318 || new_opts_set.x_s390_tune
15319 || new_opts_set.x_s390_stack_guard
15320 || new_opts_set.x_s390_stack_size
15321 || new_opts_set.x_s390_branch_cost
15322 || new_opts_set.x_s390_warn_framesize
15323 || new_opts_set.x_s390_warn_dynamicstack_p)
15325 const unsigned char *src = (const unsigned char *)opts_set;
15326 unsigned char *dest = (unsigned char *)&new_opts_set;
15327 unsigned int i;
15329 /* Merge the original option flags into the new ones. */
15330 for (i = 0; i < sizeof(*opts_set); i++)
15331 dest[i] |= src[i];
15333 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
15334 s390_option_override_internal (false, opts, &new_opts_set);
15335 /* Save the current options unless we are validating options for
15336 #pragma. */
15337 t = build_target_option_node (opts);
15339 return t;
15342 /* Hook to validate attribute((target("string"))). */
15344 static bool
15345 s390_valid_target_attribute_p (tree fndecl,
15346 tree ARG_UNUSED (name),
15347 tree args,
15348 int ARG_UNUSED (flags))
15350 struct gcc_options func_options;
15351 tree new_target, new_optimize;
15352 bool ret = true;
15354 /* attribute((target("default"))) does nothing, beyond
15355 affecting multi-versioning. */
15356 if (TREE_VALUE (args)
15357 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
15358 && TREE_CHAIN (args) == NULL_TREE
15359 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
15360 return true;
15362 tree old_optimize = build_optimization_node (&global_options);
15364 /* Get the optimization options of the current function. */
15365 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
15367 if (!func_optimize)
15368 func_optimize = old_optimize;
15370 /* Init func_options. */
15371 memset (&func_options, 0, sizeof (func_options));
15372 init_options_struct (&func_options, NULL);
15373 lang_hooks.init_options_struct (&func_options);
15375 cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize));
15377 /* Initialize func_options to the default before its target options can
15378 be set. */
15379 cl_target_option_restore (&func_options,
15380 TREE_TARGET_OPTION (target_option_default_node));
15382 new_target = s390_valid_target_attribute_tree (args, &func_options,
15383 &global_options_set,
15384 (args ==
15385 current_target_pragma));
15386 new_optimize = build_optimization_node (&func_options);
15387 if (new_target == error_mark_node)
15388 ret = false;
15389 else if (fndecl && new_target)
15391 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
15392 if (old_optimize != new_optimize)
15393 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
15395 return ret;
15398 /* Hook to determine if one function can safely inline another. */
15400 static bool
15401 s390_can_inline_p (tree caller, tree callee)
15403 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
15404 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
15406 if (!callee_tree)
15407 callee_tree = target_option_default_node;
15408 if (!caller_tree)
15409 caller_tree = target_option_default_node;
15410 if (callee_tree == caller_tree)
15411 return true;
15413 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
15414 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
15415 bool ret = true;
15417 if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))
15418 != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)))
15419 ret = false;
15421 /* Don't inline functions to be compiled for a more recent arch into a
15422 function for an older arch. */
15423 else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
15424 ret = false;
15426 /* Inlining a hard float function into a soft float function is only
15427 allowed if the hard float function doesn't actually make use of
15428 floating point.
15430 We are called from FEs for multi-versioning call optimization, so
15431 beware of ipa_fn_summaries not available. */
15432 else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags)
15433 && !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags))
15434 || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags)
15435 && TARGET_HARD_DFP_P (callee_opts->x_target_flags)))
15436 && (! ipa_fn_summaries
15437 || ipa_fn_summaries->get
15438 (cgraph_node::get (callee))->fp_expressions))
15439 ret = false;
15441 return ret;
15444 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
15445 cache. */
15447 void
15448 s390_activate_target_options (tree new_tree)
15450 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
15451 if (TREE_TARGET_GLOBALS (new_tree))
15452 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
15453 else if (new_tree == target_option_default_node)
15454 restore_target_globals (&default_target_globals);
15455 else
15456 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
15457 s390_previous_fndecl = NULL_TREE;
15460 /* Establish appropriate back-end context for processing the function
15461 FNDECL. The argument might be NULL to indicate processing at top
15462 level, outside of any function scope. */
15463 static void
15464 s390_set_current_function (tree fndecl)
15466 /* Only change the context if the function changes. This hook is called
15467 several times in the course of compiling a function, and we don't want to
15468 slow things down too much or call target_reinit when it isn't safe. */
15469 if (fndecl == s390_previous_fndecl)
15470 return;
15472 tree old_tree;
15473 if (s390_previous_fndecl == NULL_TREE)
15474 old_tree = target_option_current_node;
15475 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
15476 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
15477 else
15478 old_tree = target_option_default_node;
15480 if (fndecl == NULL_TREE)
15482 if (old_tree != target_option_current_node)
15483 s390_activate_target_options (target_option_current_node);
15484 return;
15487 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
15488 if (new_tree == NULL_TREE)
15489 new_tree = target_option_default_node;
15491 if (old_tree != new_tree)
15492 s390_activate_target_options (new_tree);
15493 s390_previous_fndecl = fndecl;
15495 #endif
15497 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
15499 static bool
15500 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
15501 unsigned int align ATTRIBUTE_UNUSED,
15502 enum by_pieces_operation op ATTRIBUTE_UNUSED,
15503 bool speed_p ATTRIBUTE_UNUSED)
15505 return (size == 1 || size == 2
15506 || size == 4 || (TARGET_ZARCH && size == 8));
15509 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
15511 static void
15512 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
15514 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
15515 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
15516 tree call_efpc = build_call_expr (efpc, 0);
15517 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
15519 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
15520 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
15521 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
15522 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
15523 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
15524 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
15526 /* Generates the equivalent of feholdexcept (&fenv_var)
15528 fenv_var = __builtin_s390_efpc ();
15529 __builtin_s390_sfpc (fenv_var & mask) */
15530 tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
15531 tree new_fpc =
15532 build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
15533 build_int_cst (unsigned_type_node,
15534 ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
15535 FPC_EXCEPTION_MASK)));
15536 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
15537 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
15539 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
15541 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
15542 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
15543 build_int_cst (unsigned_type_node,
15544 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
15545 *clear = build_call_expr (sfpc, 1, new_fpc);
15547 /* Generates the equivalent of feupdateenv (fenv_var)
15549 old_fpc = __builtin_s390_efpc ();
15550 __builtin_s390_sfpc (fenv_var);
15551 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
15553 old_fpc = create_tmp_var_raw (unsigned_type_node);
15554 tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
15555 old_fpc, call_efpc);
15557 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
15559 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
15560 build_int_cst (unsigned_type_node,
15561 FPC_FLAGS_MASK));
15562 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
15563 build_int_cst (unsigned_type_node,
15564 FPC_FLAGS_SHIFT));
15565 tree atomic_feraiseexcept
15566 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
15567 raise_old_except = build_call_expr (atomic_feraiseexcept,
15568 1, raise_old_except);
15570 *update = build2 (COMPOUND_EXPR, void_type_node,
15571 build2 (COMPOUND_EXPR, void_type_node,
15572 store_old_fpc, set_new_fpc),
15573 raise_old_except);
15575 #undef FPC_EXCEPTION_MASK
15576 #undef FPC_FLAGS_MASK
15577 #undef FPC_DXC_MASK
15578 #undef FPC_EXCEPTION_MASK_SHIFT
15579 #undef FPC_FLAGS_SHIFT
15580 #undef FPC_DXC_SHIFT
15583 /* Return the vector mode to be used for inner mode MODE when doing
15584 vectorization. */
15585 static machine_mode
15586 s390_preferred_simd_mode (scalar_mode mode)
15588 if (TARGET_VX)
15589 switch (mode)
15591 case E_DFmode:
15592 return V2DFmode;
15593 case E_DImode:
15594 return V2DImode;
15595 case E_SImode:
15596 return V4SImode;
15597 case E_HImode:
15598 return V8HImode;
15599 case E_QImode:
15600 return V16QImode;
15601 default:;
15603 return word_mode;
15606 /* Our hardware does not require vectors to be strictly aligned. */
15607 static bool
15608 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
15609 const_tree type ATTRIBUTE_UNUSED,
15610 int misalignment ATTRIBUTE_UNUSED,
15611 bool is_packed ATTRIBUTE_UNUSED)
15613 if (TARGET_VX)
15614 return true;
15616 return default_builtin_support_vector_misalignment (mode, type, misalignment,
15617 is_packed);
15620 /* The vector ABI requires vector types to be aligned on an 8 byte
15621 boundary (our stack alignment). However, we allow this to be
15622 overriden by the user, while this definitely breaks the ABI. */
15623 static HOST_WIDE_INT
15624 s390_vector_alignment (const_tree type)
15626 if (!TARGET_VX_ABI)
15627 return default_vector_alignment (type);
15629 if (TYPE_USER_ALIGN (type))
15630 return TYPE_ALIGN (type);
15632 return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
15635 #ifdef HAVE_AS_MACHINE_MACHINEMODE
15636 /* Implement TARGET_ASM_FILE_START. */
15637 static void
15638 s390_asm_file_start (void)
15640 default_file_start ();
15641 s390_asm_output_machine_for_arch (asm_out_file);
15643 #endif
15645 /* Implement TARGET_ASM_FILE_END. */
15646 static void
15647 s390_asm_file_end (void)
15649 #ifdef HAVE_AS_GNU_ATTRIBUTE
15650 varpool_node *vnode;
15651 cgraph_node *cnode;
15653 FOR_EACH_VARIABLE (vnode)
15654 if (TREE_PUBLIC (vnode->decl))
15655 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
15657 FOR_EACH_FUNCTION (cnode)
15658 if (TREE_PUBLIC (cnode->decl))
15659 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
15662 if (s390_vector_abi != 0)
15663 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
15664 s390_vector_abi);
15665 #endif
15666 file_end_indicate_exec_stack ();
15668 if (flag_split_stack)
15669 file_end_indicate_split_stack ();
15672 /* Return true if TYPE is a vector bool type. */
15673 static inline bool
15674 s390_vector_bool_type_p (const_tree type)
15676 return TYPE_VECTOR_OPAQUE (type);
15679 /* Return the diagnostic message string if the binary operation OP is
15680 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15681 static const char*
15682 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
15684 bool bool1_p, bool2_p;
15685 bool plusminus_p;
15686 bool muldiv_p;
15687 bool compare_p;
15688 machine_mode mode1, mode2;
15690 if (!TARGET_ZVECTOR)
15691 return NULL;
15693 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
15694 return NULL;
15696 bool1_p = s390_vector_bool_type_p (type1);
15697 bool2_p = s390_vector_bool_type_p (type2);
15699 /* Mixing signed and unsigned types is forbidden for all
15700 operators. */
15701 if (!bool1_p && !bool2_p
15702 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
15703 return N_("types differ in signedness");
15705 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
15706 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
15707 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
15708 || op == ROUND_DIV_EXPR);
15709 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
15710 || op == EQ_EXPR || op == NE_EXPR);
15712 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
15713 return N_("binary operator does not support two vector bool operands");
15715 if (bool1_p != bool2_p && (muldiv_p || compare_p))
15716 return N_("binary operator does not support vector bool operand");
15718 mode1 = TYPE_MODE (type1);
15719 mode2 = TYPE_MODE (type2);
15721 if (bool1_p != bool2_p && plusminus_p
15722 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
15723 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
15724 return N_("binary operator does not support mixing vector "
15725 "bool with floating point vector operands");
15727 return NULL;
15730 /* Implement TARGET_C_EXCESS_PRECISION.
15732 FIXME: For historical reasons, float_t and double_t are typedef'ed to
15733 double on s390, causing operations on float_t to operate in a higher
15734 precision than is necessary. However, it is not the case that SFmode
15735 operations have implicit excess precision, and we generate more optimal
15736 code if we let the compiler know no implicit extra precision is added.
15738 That means when we are compiling with -fexcess-precision=fast, the value
15739 we set for FLT_EVAL_METHOD will be out of line with the actual precision of
15740 float_t (though they would be correct for -fexcess-precision=standard).
15742 A complete fix would modify glibc to remove the unnecessary typedef
15743 of float_t to double. */
15745 static enum flt_eval_method
15746 s390_excess_precision (enum excess_precision_type type)
15748 switch (type)
15750 case EXCESS_PRECISION_TYPE_IMPLICIT:
15751 case EXCESS_PRECISION_TYPE_FAST:
15752 /* The fastest type to promote to will always be the native type,
15753 whether that occurs with implicit excess precision or
15754 otherwise. */
15755 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
15756 case EXCESS_PRECISION_TYPE_STANDARD:
15757 /* Otherwise, when we are in a standards compliant mode, to
15758 ensure consistency with the implementation in glibc, report that
15759 float is evaluated to the range and precision of double. */
15760 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE;
15761 default:
15762 gcc_unreachable ();
15764 return FLT_EVAL_METHOD_UNPREDICTABLE;
15767 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
15769 static unsigned HOST_WIDE_INT
15770 s390_asan_shadow_offset (void)
15772 return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000);
15775 /* Initialize GCC target structure. */
15777 #undef TARGET_ASM_ALIGNED_HI_OP
15778 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
15779 #undef TARGET_ASM_ALIGNED_DI_OP
15780 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
15781 #undef TARGET_ASM_INTEGER
15782 #define TARGET_ASM_INTEGER s390_assemble_integer
15784 #undef TARGET_ASM_OPEN_PAREN
15785 #define TARGET_ASM_OPEN_PAREN ""
15787 #undef TARGET_ASM_CLOSE_PAREN
15788 #define TARGET_ASM_CLOSE_PAREN ""
15790 #undef TARGET_OPTION_OVERRIDE
15791 #define TARGET_OPTION_OVERRIDE s390_option_override
15793 #ifdef TARGET_THREAD_SSP_OFFSET
15794 #undef TARGET_STACK_PROTECT_GUARD
15795 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
15796 #endif
15798 #undef TARGET_ENCODE_SECTION_INFO
15799 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
15801 #undef TARGET_SCALAR_MODE_SUPPORTED_P
15802 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
15804 #ifdef HAVE_AS_TLS
15805 #undef TARGET_HAVE_TLS
15806 #define TARGET_HAVE_TLS true
15807 #endif
15808 #undef TARGET_CANNOT_FORCE_CONST_MEM
15809 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
15811 #undef TARGET_DELEGITIMIZE_ADDRESS
15812 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
15814 #undef TARGET_LEGITIMIZE_ADDRESS
15815 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
15817 #undef TARGET_RETURN_IN_MEMORY
15818 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
15820 #undef TARGET_INIT_BUILTINS
15821 #define TARGET_INIT_BUILTINS s390_init_builtins
15822 #undef TARGET_EXPAND_BUILTIN
15823 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
15824 #undef TARGET_BUILTIN_DECL
15825 #define TARGET_BUILTIN_DECL s390_builtin_decl
15827 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
15828 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
15830 #undef TARGET_ASM_OUTPUT_MI_THUNK
15831 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
15832 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
15833 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
15835 #undef TARGET_C_EXCESS_PRECISION
15836 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
15838 #undef TARGET_SCHED_ADJUST_PRIORITY
15839 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
15840 #undef TARGET_SCHED_ISSUE_RATE
15841 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
15842 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
15843 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
15845 #undef TARGET_SCHED_VARIABLE_ISSUE
15846 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
15847 #undef TARGET_SCHED_REORDER
15848 #define TARGET_SCHED_REORDER s390_sched_reorder
15849 #undef TARGET_SCHED_INIT
15850 #define TARGET_SCHED_INIT s390_sched_init
15852 #undef TARGET_CANNOT_COPY_INSN_P
15853 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
15854 #undef TARGET_RTX_COSTS
15855 #define TARGET_RTX_COSTS s390_rtx_costs
15856 #undef TARGET_ADDRESS_COST
15857 #define TARGET_ADDRESS_COST s390_address_cost
15858 #undef TARGET_REGISTER_MOVE_COST
15859 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
15860 #undef TARGET_MEMORY_MOVE_COST
15861 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
15862 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
15863 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
15864 s390_builtin_vectorization_cost
15866 #undef TARGET_MACHINE_DEPENDENT_REORG
15867 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
15869 #undef TARGET_VALID_POINTER_MODE
15870 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
15872 #undef TARGET_BUILD_BUILTIN_VA_LIST
15873 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
15874 #undef TARGET_EXPAND_BUILTIN_VA_START
15875 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
15876 #undef TARGET_ASAN_SHADOW_OFFSET
15877 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
15878 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
15879 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
15881 #undef TARGET_PROMOTE_FUNCTION_MODE
15882 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
15883 #undef TARGET_PASS_BY_REFERENCE
15884 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
15886 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
15887 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
15888 #undef TARGET_FUNCTION_ARG
15889 #define TARGET_FUNCTION_ARG s390_function_arg
15890 #undef TARGET_FUNCTION_ARG_ADVANCE
15891 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
15892 #undef TARGET_FUNCTION_ARG_PADDING
15893 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
15894 #undef TARGET_FUNCTION_VALUE
15895 #define TARGET_FUNCTION_VALUE s390_function_value
15896 #undef TARGET_LIBCALL_VALUE
15897 #define TARGET_LIBCALL_VALUE s390_libcall_value
15898 #undef TARGET_STRICT_ARGUMENT_NAMING
15899 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
15901 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
15902 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
15904 #undef TARGET_FIXED_CONDITION_CODE_REGS
15905 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
15907 #undef TARGET_CC_MODES_COMPATIBLE
15908 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
15910 #undef TARGET_INVALID_WITHIN_DOLOOP
15911 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
15913 #ifdef HAVE_AS_TLS
15914 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
15915 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
15916 #endif
15918 #undef TARGET_DWARF_FRAME_REG_MODE
15919 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
15921 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
15922 #undef TARGET_MANGLE_TYPE
15923 #define TARGET_MANGLE_TYPE s390_mangle_type
15924 #endif
15926 #undef TARGET_SCALAR_MODE_SUPPORTED_P
15927 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
15929 #undef TARGET_VECTOR_MODE_SUPPORTED_P
15930 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
15932 #undef TARGET_PREFERRED_RELOAD_CLASS
15933 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
15935 #undef TARGET_SECONDARY_RELOAD
15936 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
15938 #undef TARGET_LIBGCC_CMP_RETURN_MODE
15939 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
15941 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
15942 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
15944 #undef TARGET_LEGITIMATE_ADDRESS_P
15945 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
15947 #undef TARGET_LEGITIMATE_CONSTANT_P
15948 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
15950 #undef TARGET_LRA_P
15951 #define TARGET_LRA_P s390_lra_p
15953 #undef TARGET_CAN_ELIMINATE
15954 #define TARGET_CAN_ELIMINATE s390_can_eliminate
15956 #undef TARGET_CONDITIONAL_REGISTER_USAGE
15957 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
15959 #undef TARGET_LOOP_UNROLL_ADJUST
15960 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
15962 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
15963 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
15964 #undef TARGET_TRAMPOLINE_INIT
15965 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
15967 /* PR 79421 */
15968 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
15969 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
15971 #undef TARGET_UNWIND_WORD_MODE
15972 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
15974 #undef TARGET_CANONICALIZE_COMPARISON
15975 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
15977 #undef TARGET_HARD_REGNO_SCRATCH_OK
15978 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
15980 #undef TARGET_HARD_REGNO_MODE_OK
15981 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
15982 #undef TARGET_MODES_TIEABLE_P
15983 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
15985 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
15986 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
15987 s390_hard_regno_call_part_clobbered
15989 #undef TARGET_ATTRIBUTE_TABLE
15990 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
15992 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
15993 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
15995 #undef TARGET_SET_UP_BY_PROLOGUE
15996 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
15998 #undef TARGET_EXTRA_LIVE_ON_ENTRY
15999 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
16001 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
16002 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
16003 s390_use_by_pieces_infrastructure_p
16005 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
16006 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
16008 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
16009 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
16011 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
16012 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
16014 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
16015 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
16017 #undef TARGET_VECTOR_ALIGNMENT
16018 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
16020 #undef TARGET_INVALID_BINARY_OP
16021 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
16023 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16024 #undef TARGET_ASM_FILE_START
16025 #define TARGET_ASM_FILE_START s390_asm_file_start
16026 #endif
16028 #undef TARGET_ASM_FILE_END
16029 #define TARGET_ASM_FILE_END s390_asm_file_end
16031 #if S390_USE_TARGET_ATTRIBUTE
16032 #undef TARGET_SET_CURRENT_FUNCTION
16033 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
16035 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
16036 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
16038 #undef TARGET_CAN_INLINE_P
16039 #define TARGET_CAN_INLINE_P s390_can_inline_p
16040 #endif
16042 #undef TARGET_OPTION_RESTORE
16043 #define TARGET_OPTION_RESTORE s390_function_specific_restore
16045 struct gcc_target targetm = TARGET_INITIALIZER;
16047 #include "gt-s390.h"