S/390: Disallow SImode in s390_decompose_address
[official-gcc.git] / gcc / config / s390 / s390.c
blob8924367a3baf9816620c96421ae338266035dbb4
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2016 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "target-globals.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "cfgloop.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "expmed.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "emit-rtl.h"
41 #include "recog.h"
42 #include "cgraph.h"
43 #include "diagnostic-core.h"
44 #include "diagnostic.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "print-tree.h"
48 #include "stor-layout.h"
49 #include "varasm.h"
50 #include "calls.h"
51 #include "conditions.h"
52 #include "output.h"
53 #include "insn-attr.h"
54 #include "flags.h"
55 #include "except.h"
56 #include "dojump.h"
57 #include "explow.h"
58 #include "stmt.h"
59 #include "expr.h"
60 #include "reload.h"
61 #include "cfgrtl.h"
62 #include "cfganal.h"
63 #include "lcm.h"
64 #include "cfgbuild.h"
65 #include "cfgcleanup.h"
66 #include "debug.h"
67 #include "langhooks.h"
68 #include "internal-fn.h"
69 #include "gimple-fold.h"
70 #include "tree-eh.h"
71 #include "gimplify.h"
72 #include "params.h"
73 #include "opts.h"
74 #include "tree-pass.h"
75 #include "context.h"
76 #include "builtins.h"
77 #include "rtl-iter.h"
78 #include "intl.h"
79 #include "tm-constrs.h"
81 /* This file should be included last. */
82 #include "target-def.h"
84 /* Remember the last target of s390_set_current_function. */
85 static GTY(()) tree s390_previous_fndecl;
87 /* Define the specific costs for a given cpu. */
89 struct processor_costs
91 /* multiplication */
92 const int m; /* cost of an M instruction. */
93 const int mghi; /* cost of an MGHI instruction. */
94 const int mh; /* cost of an MH instruction. */
95 const int mhi; /* cost of an MHI instruction. */
96 const int ml; /* cost of an ML instruction. */
97 const int mr; /* cost of an MR instruction. */
98 const int ms; /* cost of an MS instruction. */
99 const int msg; /* cost of an MSG instruction. */
100 const int msgf; /* cost of an MSGF instruction. */
101 const int msgfr; /* cost of an MSGFR instruction. */
102 const int msgr; /* cost of an MSGR instruction. */
103 const int msr; /* cost of an MSR instruction. */
104 const int mult_df; /* cost of multiplication in DFmode. */
105 const int mxbr;
106 /* square root */
107 const int sqxbr; /* cost of square root in TFmode. */
108 const int sqdbr; /* cost of square root in DFmode. */
109 const int sqebr; /* cost of square root in SFmode. */
110 /* multiply and add */
111 const int madbr; /* cost of multiply and add in DFmode. */
112 const int maebr; /* cost of multiply and add in SFmode. */
113 /* division */
114 const int dxbr;
115 const int ddbr;
116 const int debr;
117 const int dlgr;
118 const int dlr;
119 const int dr;
120 const int dsgfr;
121 const int dsgr;
124 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
126 static const
127 struct processor_costs z900_cost =
129 COSTS_N_INSNS (5), /* M */
130 COSTS_N_INSNS (10), /* MGHI */
131 COSTS_N_INSNS (5), /* MH */
132 COSTS_N_INSNS (4), /* MHI */
133 COSTS_N_INSNS (5), /* ML */
134 COSTS_N_INSNS (5), /* MR */
135 COSTS_N_INSNS (4), /* MS */
136 COSTS_N_INSNS (15), /* MSG */
137 COSTS_N_INSNS (7), /* MSGF */
138 COSTS_N_INSNS (7), /* MSGFR */
139 COSTS_N_INSNS (10), /* MSGR */
140 COSTS_N_INSNS (4), /* MSR */
141 COSTS_N_INSNS (7), /* multiplication in DFmode */
142 COSTS_N_INSNS (13), /* MXBR */
143 COSTS_N_INSNS (136), /* SQXBR */
144 COSTS_N_INSNS (44), /* SQDBR */
145 COSTS_N_INSNS (35), /* SQEBR */
146 COSTS_N_INSNS (18), /* MADBR */
147 COSTS_N_INSNS (13), /* MAEBR */
148 COSTS_N_INSNS (134), /* DXBR */
149 COSTS_N_INSNS (30), /* DDBR */
150 COSTS_N_INSNS (27), /* DEBR */
151 COSTS_N_INSNS (220), /* DLGR */
152 COSTS_N_INSNS (34), /* DLR */
153 COSTS_N_INSNS (34), /* DR */
154 COSTS_N_INSNS (32), /* DSGFR */
155 COSTS_N_INSNS (32), /* DSGR */
158 static const
159 struct processor_costs z990_cost =
161 COSTS_N_INSNS (4), /* M */
162 COSTS_N_INSNS (2), /* MGHI */
163 COSTS_N_INSNS (2), /* MH */
164 COSTS_N_INSNS (2), /* MHI */
165 COSTS_N_INSNS (4), /* ML */
166 COSTS_N_INSNS (4), /* MR */
167 COSTS_N_INSNS (5), /* MS */
168 COSTS_N_INSNS (6), /* MSG */
169 COSTS_N_INSNS (4), /* MSGF */
170 COSTS_N_INSNS (4), /* MSGFR */
171 COSTS_N_INSNS (4), /* MSGR */
172 COSTS_N_INSNS (4), /* MSR */
173 COSTS_N_INSNS (1), /* multiplication in DFmode */
174 COSTS_N_INSNS (28), /* MXBR */
175 COSTS_N_INSNS (130), /* SQXBR */
176 COSTS_N_INSNS (66), /* SQDBR */
177 COSTS_N_INSNS (38), /* SQEBR */
178 COSTS_N_INSNS (1), /* MADBR */
179 COSTS_N_INSNS (1), /* MAEBR */
180 COSTS_N_INSNS (60), /* DXBR */
181 COSTS_N_INSNS (40), /* DDBR */
182 COSTS_N_INSNS (26), /* DEBR */
183 COSTS_N_INSNS (176), /* DLGR */
184 COSTS_N_INSNS (31), /* DLR */
185 COSTS_N_INSNS (31), /* DR */
186 COSTS_N_INSNS (31), /* DSGFR */
187 COSTS_N_INSNS (31), /* DSGR */
190 static const
191 struct processor_costs z9_109_cost =
193 COSTS_N_INSNS (4), /* M */
194 COSTS_N_INSNS (2), /* MGHI */
195 COSTS_N_INSNS (2), /* MH */
196 COSTS_N_INSNS (2), /* MHI */
197 COSTS_N_INSNS (4), /* ML */
198 COSTS_N_INSNS (4), /* MR */
199 COSTS_N_INSNS (5), /* MS */
200 COSTS_N_INSNS (6), /* MSG */
201 COSTS_N_INSNS (4), /* MSGF */
202 COSTS_N_INSNS (4), /* MSGFR */
203 COSTS_N_INSNS (4), /* MSGR */
204 COSTS_N_INSNS (4), /* MSR */
205 COSTS_N_INSNS (1), /* multiplication in DFmode */
206 COSTS_N_INSNS (28), /* MXBR */
207 COSTS_N_INSNS (130), /* SQXBR */
208 COSTS_N_INSNS (66), /* SQDBR */
209 COSTS_N_INSNS (38), /* SQEBR */
210 COSTS_N_INSNS (1), /* MADBR */
211 COSTS_N_INSNS (1), /* MAEBR */
212 COSTS_N_INSNS (60), /* DXBR */
213 COSTS_N_INSNS (40), /* DDBR */
214 COSTS_N_INSNS (26), /* DEBR */
215 COSTS_N_INSNS (30), /* DLGR */
216 COSTS_N_INSNS (23), /* DLR */
217 COSTS_N_INSNS (23), /* DR */
218 COSTS_N_INSNS (24), /* DSGFR */
219 COSTS_N_INSNS (24), /* DSGR */
222 static const
223 struct processor_costs z10_cost =
225 COSTS_N_INSNS (10), /* M */
226 COSTS_N_INSNS (10), /* MGHI */
227 COSTS_N_INSNS (10), /* MH */
228 COSTS_N_INSNS (10), /* MHI */
229 COSTS_N_INSNS (10), /* ML */
230 COSTS_N_INSNS (10), /* MR */
231 COSTS_N_INSNS (10), /* MS */
232 COSTS_N_INSNS (10), /* MSG */
233 COSTS_N_INSNS (10), /* MSGF */
234 COSTS_N_INSNS (10), /* MSGFR */
235 COSTS_N_INSNS (10), /* MSGR */
236 COSTS_N_INSNS (10), /* MSR */
237 COSTS_N_INSNS (1) , /* multiplication in DFmode */
238 COSTS_N_INSNS (50), /* MXBR */
239 COSTS_N_INSNS (120), /* SQXBR */
240 COSTS_N_INSNS (52), /* SQDBR */
241 COSTS_N_INSNS (38), /* SQEBR */
242 COSTS_N_INSNS (1), /* MADBR */
243 COSTS_N_INSNS (1), /* MAEBR */
244 COSTS_N_INSNS (111), /* DXBR */
245 COSTS_N_INSNS (39), /* DDBR */
246 COSTS_N_INSNS (32), /* DEBR */
247 COSTS_N_INSNS (160), /* DLGR */
248 COSTS_N_INSNS (71), /* DLR */
249 COSTS_N_INSNS (71), /* DR */
250 COSTS_N_INSNS (71), /* DSGFR */
251 COSTS_N_INSNS (71), /* DSGR */
254 static const
255 struct processor_costs z196_cost =
257 COSTS_N_INSNS (7), /* M */
258 COSTS_N_INSNS (5), /* MGHI */
259 COSTS_N_INSNS (5), /* MH */
260 COSTS_N_INSNS (5), /* MHI */
261 COSTS_N_INSNS (7), /* ML */
262 COSTS_N_INSNS (7), /* MR */
263 COSTS_N_INSNS (6), /* MS */
264 COSTS_N_INSNS (8), /* MSG */
265 COSTS_N_INSNS (6), /* MSGF */
266 COSTS_N_INSNS (6), /* MSGFR */
267 COSTS_N_INSNS (8), /* MSGR */
268 COSTS_N_INSNS (6), /* MSR */
269 COSTS_N_INSNS (1) , /* multiplication in DFmode */
270 COSTS_N_INSNS (40), /* MXBR B+40 */
271 COSTS_N_INSNS (100), /* SQXBR B+100 */
272 COSTS_N_INSNS (42), /* SQDBR B+42 */
273 COSTS_N_INSNS (28), /* SQEBR B+28 */
274 COSTS_N_INSNS (1), /* MADBR B */
275 COSTS_N_INSNS (1), /* MAEBR B */
276 COSTS_N_INSNS (101), /* DXBR B+101 */
277 COSTS_N_INSNS (29), /* DDBR */
278 COSTS_N_INSNS (22), /* DEBR */
279 COSTS_N_INSNS (160), /* DLGR cracked */
280 COSTS_N_INSNS (160), /* DLR cracked */
281 COSTS_N_INSNS (160), /* DR expanded */
282 COSTS_N_INSNS (160), /* DSGFR cracked */
283 COSTS_N_INSNS (160), /* DSGR cracked */
286 static const
287 struct processor_costs zEC12_cost =
289 COSTS_N_INSNS (7), /* M */
290 COSTS_N_INSNS (5), /* MGHI */
291 COSTS_N_INSNS (5), /* MH */
292 COSTS_N_INSNS (5), /* MHI */
293 COSTS_N_INSNS (7), /* ML */
294 COSTS_N_INSNS (7), /* MR */
295 COSTS_N_INSNS (6), /* MS */
296 COSTS_N_INSNS (8), /* MSG */
297 COSTS_N_INSNS (6), /* MSGF */
298 COSTS_N_INSNS (6), /* MSGFR */
299 COSTS_N_INSNS (8), /* MSGR */
300 COSTS_N_INSNS (6), /* MSR */
301 COSTS_N_INSNS (1) , /* multiplication in DFmode */
302 COSTS_N_INSNS (40), /* MXBR B+40 */
303 COSTS_N_INSNS (100), /* SQXBR B+100 */
304 COSTS_N_INSNS (42), /* SQDBR B+42 */
305 COSTS_N_INSNS (28), /* SQEBR B+28 */
306 COSTS_N_INSNS (1), /* MADBR B */
307 COSTS_N_INSNS (1), /* MAEBR B */
308 COSTS_N_INSNS (131), /* DXBR B+131 */
309 COSTS_N_INSNS (29), /* DDBR */
310 COSTS_N_INSNS (22), /* DEBR */
311 COSTS_N_INSNS (160), /* DLGR cracked */
312 COSTS_N_INSNS (160), /* DLR cracked */
313 COSTS_N_INSNS (160), /* DR expanded */
314 COSTS_N_INSNS (160), /* DSGFR cracked */
315 COSTS_N_INSNS (160), /* DSGR cracked */
318 static struct
320 const char *const name;
321 const enum processor_type processor;
322 const struct processor_costs *cost;
324 const processor_table[] =
326 { "g5", PROCESSOR_9672_G5, &z900_cost },
327 { "g6", PROCESSOR_9672_G6, &z900_cost },
328 { "z900", PROCESSOR_2064_Z900, &z900_cost },
329 { "z990", PROCESSOR_2084_Z990, &z990_cost },
330 { "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost },
331 { "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost },
332 { "z10", PROCESSOR_2097_Z10, &z10_cost },
333 { "z196", PROCESSOR_2817_Z196, &z196_cost },
334 { "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost },
335 { "z13", PROCESSOR_2964_Z13, &zEC12_cost },
336 { "native", PROCESSOR_NATIVE, NULL }
339 extern int reload_completed;
341 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
342 static rtx_insn *last_scheduled_insn;
343 #define MAX_SCHED_UNITS 3
344 static int last_scheduled_unit_distance[MAX_SCHED_UNITS];
346 /* The maximum score added for an instruction whose unit hasn't been
347 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
348 give instruction mix scheduling more priority over instruction
349 grouping. */
350 #define MAX_SCHED_MIX_SCORE 8
352 /* The maximum distance up to which individual scores will be
353 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
354 Increase this with the OOO windows size of the machine. */
355 #define MAX_SCHED_MIX_DISTANCE 100
357 /* Structure used to hold the components of a S/390 memory
358 address. A legitimate address on S/390 is of the general
359 form
360 base + index + displacement
361 where any of the components is optional.
363 base and index are registers of the class ADDR_REGS,
364 displacement is an unsigned 12-bit immediate constant. */
366 struct s390_address
368 rtx base;
369 rtx indx;
370 rtx disp;
371 bool pointer;
372 bool literal_pool;
375 /* The following structure is embedded in the machine
376 specific part of struct function. */
378 struct GTY (()) s390_frame_layout
380 /* Offset within stack frame. */
381 HOST_WIDE_INT gprs_offset;
382 HOST_WIDE_INT f0_offset;
383 HOST_WIDE_INT f4_offset;
384 HOST_WIDE_INT f8_offset;
385 HOST_WIDE_INT backchain_offset;
387 /* Number of first and last gpr where slots in the register
388 save area are reserved for. */
389 int first_save_gpr_slot;
390 int last_save_gpr_slot;
392 /* Location (FP register number) where GPRs (r0-r15) should
393 be saved to.
394 0 - does not need to be saved at all
395 -1 - stack slot */
396 #define SAVE_SLOT_NONE 0
397 #define SAVE_SLOT_STACK -1
398 signed char gpr_save_slots[16];
400 /* Number of first and last gpr to be saved, restored. */
401 int first_save_gpr;
402 int first_restore_gpr;
403 int last_save_gpr;
404 int last_restore_gpr;
406 /* Bits standing for floating point registers. Set, if the
407 respective register has to be saved. Starting with reg 16 (f0)
408 at the rightmost bit.
409 Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
410 fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0
411 reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */
412 unsigned int fpr_bitmap;
414 /* Number of floating point registers f8-f15 which must be saved. */
415 int high_fprs;
417 /* Set if return address needs to be saved.
418 This flag is set by s390_return_addr_rtx if it could not use
419 the initial value of r14 and therefore depends on r14 saved
420 to the stack. */
421 bool save_return_addr_p;
423 /* Size of stack frame. */
424 HOST_WIDE_INT frame_size;
427 /* Define the structure for the machine field in struct function. */
429 struct GTY(()) machine_function
431 struct s390_frame_layout frame_layout;
433 /* Literal pool base register. */
434 rtx base_reg;
436 /* True if we may need to perform branch splitting. */
437 bool split_branches_pending_p;
439 bool has_landing_pad_p;
441 /* True if the current function may contain a tbegin clobbering
442 FPRs. */
443 bool tbegin_p;
445 /* For -fsplit-stack support: A stack local which holds a pointer to
446 the stack arguments for a function with a variable number of
447 arguments. This is set at the start of the function and is used
448 to initialize the overflow_arg_area field of the va_list
449 structure. */
450 rtx split_stack_varargs_pointer;
453 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
455 #define cfun_frame_layout (cfun->machine->frame_layout)
456 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
457 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
458 ? cfun_frame_layout.fpr_bitmap & 0x0f \
459 : cfun_frame_layout.fpr_bitmap & 0x03))
460 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
461 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
462 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
463 (1 << (REGNO - FPR0_REGNUM)))
464 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
465 (1 << (REGNO - FPR0_REGNUM))))
466 #define cfun_gpr_save_slot(REGNO) \
467 cfun->machine->frame_layout.gpr_save_slots[REGNO]
469 /* Number of GPRs and FPRs used for argument passing. */
470 #define GP_ARG_NUM_REG 5
471 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
472 #define VEC_ARG_NUM_REG 8
474 /* A couple of shortcuts. */
475 #define CONST_OK_FOR_J(x) \
476 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
477 #define CONST_OK_FOR_K(x) \
478 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
479 #define CONST_OK_FOR_Os(x) \
480 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
481 #define CONST_OK_FOR_Op(x) \
482 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
483 #define CONST_OK_FOR_On(x) \
484 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
486 #define REGNO_PAIR_OK(REGNO, MODE) \
487 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
489 /* That's the read ahead of the dynamic branch prediction unit in
490 bytes on a z10 (or higher) CPU. */
491 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
494 /* Indicate which ABI has been used for passing vector args.
495 0 - no vector type arguments have been passed where the ABI is relevant
496 1 - the old ABI has been used
497 2 - a vector type argument has been passed either in a vector register
498 or on the stack by value */
499 static int s390_vector_abi = 0;
501 /* Set the vector ABI marker if TYPE is subject to the vector ABI
502 switch. The vector ABI affects only vector data types. There are
503 two aspects of the vector ABI relevant here:
505 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
506 ABI and natural alignment with the old.
508 2. vector <= 16 bytes are passed in VRs or by value on the stack
509 with the new ABI but by reference on the stack with the old.
511 If ARG_P is true TYPE is used for a function argument or return
512 value. The ABI marker then is set for all vector data types. If
513 ARG_P is false only type 1 vectors are being checked. */
515 static void
516 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
518 static hash_set<const_tree> visited_types_hash;
520 if (s390_vector_abi)
521 return;
523 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
524 return;
526 if (visited_types_hash.contains (type))
527 return;
529 visited_types_hash.add (type);
531 if (VECTOR_TYPE_P (type))
533 int type_size = int_size_in_bytes (type);
535 /* Outside arguments only the alignment is changing and this
536 only happens for vector types >= 16 bytes. */
537 if (!arg_p && type_size < 16)
538 return;
540 /* In arguments vector types > 16 are passed as before (GCC
541 never enforced the bigger alignment for arguments which was
542 required by the old vector ABI). However, it might still be
543 ABI relevant due to the changed alignment if it is a struct
544 member. */
545 if (arg_p && type_size > 16 && !in_struct_p)
546 return;
548 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
550 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
552 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
553 natural alignment there will never be ABI dependent padding
554 in an array type. That's why we do not set in_struct_p to
555 true here. */
556 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
558 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
560 tree arg_chain;
562 /* Check the return type. */
563 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
565 for (arg_chain = TYPE_ARG_TYPES (type);
566 arg_chain;
567 arg_chain = TREE_CHAIN (arg_chain))
568 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
570 else if (RECORD_OR_UNION_TYPE_P (type))
572 tree field;
574 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
576 if (TREE_CODE (field) != FIELD_DECL)
577 continue;
579 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
585 /* System z builtins. */
587 #include "s390-builtins.h"
589 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
591 #undef B_DEF
592 #undef OB_DEF
593 #undef OB_DEF_VAR
594 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
595 #define OB_DEF(...)
596 #define OB_DEF_VAR(...)
597 #include "s390-builtins.def"
601 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
603 #undef B_DEF
604 #undef OB_DEF
605 #undef OB_DEF_VAR
606 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
607 #define OB_DEF(...)
608 #define OB_DEF_VAR(...)
609 #include "s390-builtins.def"
613 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
615 #undef B_DEF
616 #undef OB_DEF
617 #undef OB_DEF_VAR
618 #define B_DEF(...)
619 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
620 #define OB_DEF_VAR(...)
621 #include "s390-builtins.def"
625 const unsigned int
626 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
628 #undef B_DEF
629 #undef OB_DEF
630 #undef OB_DEF_VAR
631 #define B_DEF(...)
632 #define OB_DEF(...)
633 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, FNTYPE) FLAGS,
634 #include "s390-builtins.def"
638 tree s390_builtin_types[BT_MAX];
639 tree s390_builtin_fn_types[BT_FN_MAX];
640 tree s390_builtin_decls[S390_BUILTIN_MAX +
641 S390_OVERLOADED_BUILTIN_MAX +
642 S390_OVERLOADED_BUILTIN_VAR_MAX];
644 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
645 #undef B_DEF
646 #undef OB_DEF
647 #undef OB_DEF_VAR
648 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
649 #define OB_DEF(...)
650 #define OB_DEF_VAR(...)
652 #include "s390-builtins.def"
653 CODE_FOR_nothing
656 static void
657 s390_init_builtins (void)
659 /* These definitions are being used in s390-builtins.def. */
660 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
661 NULL, NULL);
662 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
663 tree c_uint64_type_node;
665 /* The uint64_type_node from tree.c is not compatible to the C99
666 uint64_t data type. What we want is c_uint64_type_node from
667 c-common.c. But since backend code is not supposed to interface
668 with the frontend we recreate it here. */
669 if (TARGET_64BIT)
670 c_uint64_type_node = long_unsigned_type_node;
671 else
672 c_uint64_type_node = long_long_unsigned_type_node;
674 #undef DEF_TYPE
675 #define DEF_TYPE(INDEX, BFLAGS, NODE, CONST_P) \
676 if (s390_builtin_types[INDEX] == NULL) \
677 s390_builtin_types[INDEX] = (!CONST_P) ? \
678 (NODE) : build_type_variant ((NODE), 1, 0);
680 #undef DEF_POINTER_TYPE
681 #define DEF_POINTER_TYPE(INDEX, BFLAGS, INDEX_BASE) \
682 if (s390_builtin_types[INDEX] == NULL) \
683 s390_builtin_types[INDEX] = \
684 build_pointer_type (s390_builtin_types[INDEX_BASE]);
686 #undef DEF_DISTINCT_TYPE
687 #define DEF_DISTINCT_TYPE(INDEX, BFLAGS, INDEX_BASE) \
688 if (s390_builtin_types[INDEX] == NULL) \
689 s390_builtin_types[INDEX] = \
690 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
692 #undef DEF_VECTOR_TYPE
693 #define DEF_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \
694 if (s390_builtin_types[INDEX] == NULL) \
695 s390_builtin_types[INDEX] = \
696 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
698 #undef DEF_OPAQUE_VECTOR_TYPE
699 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \
700 if (s390_builtin_types[INDEX] == NULL) \
701 s390_builtin_types[INDEX] = \
702 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
704 #undef DEF_FN_TYPE
705 #define DEF_FN_TYPE(INDEX, BFLAGS, args...) \
706 if (s390_builtin_fn_types[INDEX] == NULL) \
707 s390_builtin_fn_types[INDEX] = \
708 build_function_type_list (args, NULL_TREE);
709 #undef DEF_OV_TYPE
710 #define DEF_OV_TYPE(...)
711 #include "s390-builtin-types.def"
713 #undef B_DEF
714 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
715 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
716 s390_builtin_decls[S390_BUILTIN_##NAME] = \
717 add_builtin_function ("__builtin_" #NAME, \
718 s390_builtin_fn_types[FNTYPE], \
719 S390_BUILTIN_##NAME, \
720 BUILT_IN_MD, \
721 NULL, \
722 ATTRS);
723 #undef OB_DEF
724 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
725 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
726 == NULL) \
727 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
728 add_builtin_function ("__builtin_" #NAME, \
729 s390_builtin_fn_types[FNTYPE], \
730 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
731 BUILT_IN_MD, \
732 NULL, \
734 #undef OB_DEF_VAR
735 #define OB_DEF_VAR(...)
736 #include "s390-builtins.def"
740 /* Return true if ARG is appropriate as argument number ARGNUM of
741 builtin DECL. The operand flags from s390-builtins.def have to
742 passed as OP_FLAGS. */
743 bool
744 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
746 if (O_UIMM_P (op_flags))
748 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
749 int bitwidth = bitwidths[op_flags - O_U1];
751 if (!tree_fits_uhwi_p (arg)
752 || tree_to_uhwi (arg) > ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1)
754 error("constant argument %d for builtin %qF is out of range (0.."
755 HOST_WIDE_INT_PRINT_UNSIGNED ")",
756 argnum, decl,
757 ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1);
758 return false;
762 if (O_SIMM_P (op_flags))
764 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
765 int bitwidth = bitwidths[op_flags - O_S2];
767 if (!tree_fits_shwi_p (arg)
768 || tree_to_shwi (arg) < -((HOST_WIDE_INT)1 << (bitwidth - 1))
769 || tree_to_shwi (arg) > (((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1))
771 error("constant argument %d for builtin %qF is out of range ("
772 HOST_WIDE_INT_PRINT_DEC ".."
773 HOST_WIDE_INT_PRINT_DEC ")",
774 argnum, decl,
775 -((HOST_WIDE_INT)1 << (bitwidth - 1)),
776 ((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1);
777 return false;
780 return true;
783 /* Expand an expression EXP that calls a built-in function,
784 with result going to TARGET if that's convenient
785 (and in mode MODE if that's convenient).
786 SUBTARGET may be used as the target for computing one of EXP's operands.
787 IGNORE is nonzero if the value is to be ignored. */
789 static rtx
790 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
791 machine_mode mode ATTRIBUTE_UNUSED,
792 int ignore ATTRIBUTE_UNUSED)
794 #define MAX_ARGS 5
796 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
797 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
798 enum insn_code icode;
799 rtx op[MAX_ARGS], pat;
800 int arity;
801 bool nonvoid;
802 tree arg;
803 call_expr_arg_iterator iter;
804 unsigned int all_op_flags = opflags_for_builtin (fcode);
805 machine_mode last_vec_mode = VOIDmode;
807 if (TARGET_DEBUG_ARG)
809 fprintf (stderr,
810 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
811 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
812 bflags_for_builtin (fcode));
815 if (S390_USE_TARGET_ATTRIBUTE)
817 unsigned int bflags;
819 bflags = bflags_for_builtin (fcode);
820 if ((bflags & B_HTM) && !TARGET_HTM)
822 error ("Builtin %qF is not supported without -mhtm "
823 "(default with -march=zEC12 and higher).", fndecl);
824 return const0_rtx;
826 if ((bflags & B_VX) && !TARGET_VX)
828 error ("Builtin %qF is not supported without -mvx "
829 "(default with -march=z13 and higher).", fndecl);
830 return const0_rtx;
833 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
834 && fcode < S390_ALL_BUILTIN_MAX)
836 gcc_unreachable ();
838 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
840 icode = code_for_builtin[fcode];
841 /* Set a flag in the machine specific cfun part in order to support
842 saving/restoring of FPRs. */
843 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
844 cfun->machine->tbegin_p = true;
846 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
848 error ("Unresolved overloaded builtin");
849 return const0_rtx;
851 else
852 internal_error ("bad builtin fcode");
854 if (icode == 0)
855 internal_error ("bad builtin icode");
857 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
859 if (nonvoid)
861 machine_mode tmode = insn_data[icode].operand[0].mode;
862 if (!target
863 || GET_MODE (target) != tmode
864 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
865 target = gen_reg_rtx (tmode);
867 /* There are builtins (e.g. vec_promote) with no vector
868 arguments but an element selector. So we have to also look
869 at the vector return type when emitting the modulo
870 operation. */
871 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
872 last_vec_mode = insn_data[icode].operand[0].mode;
875 arity = 0;
876 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
878 const struct insn_operand_data *insn_op;
879 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
881 all_op_flags = all_op_flags >> O_SHIFT;
883 if (arg == error_mark_node)
884 return NULL_RTX;
885 if (arity >= MAX_ARGS)
886 return NULL_RTX;
888 if (O_IMM_P (op_flags)
889 && TREE_CODE (arg) != INTEGER_CST)
891 error ("constant value required for builtin %qF argument %d",
892 fndecl, arity + 1);
893 return const0_rtx;
896 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
897 return const0_rtx;
899 insn_op = &insn_data[icode].operand[arity + nonvoid];
900 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
902 /* expand_expr truncates constants to the target mode only if it
903 is "convenient". However, our checks below rely on this
904 being done. */
905 if (CONST_INT_P (op[arity])
906 && SCALAR_INT_MODE_P (insn_op->mode)
907 && GET_MODE (op[arity]) != insn_op->mode)
908 op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
909 insn_op->mode));
911 /* Wrap the expanded RTX for pointer types into a MEM expr with
912 the proper mode. This allows us to use e.g. (match_operand
913 "memory_operand"..) in the insn patterns instead of (mem
914 (match_operand "address_operand)). This is helpful for
915 patterns not just accepting MEMs. */
916 if (POINTER_TYPE_P (TREE_TYPE (arg))
917 && insn_op->predicate != address_operand)
918 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
920 /* Expand the module operation required on element selectors. */
921 if (op_flags == O_ELEM)
923 gcc_assert (last_vec_mode != VOIDmode);
924 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
925 op[arity],
926 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
927 NULL_RTX, 1, OPTAB_DIRECT);
930 /* Record the vector mode used for an element selector. This assumes:
931 1. There is no builtin with two different vector modes and an element selector
932 2. The element selector comes after the vector type it is referring to.
933 This currently the true for all the builtins but FIXME we
934 should better check for that. */
935 if (VECTOR_MODE_P (insn_op->mode))
936 last_vec_mode = insn_op->mode;
938 if (insn_op->predicate (op[arity], insn_op->mode))
940 arity++;
941 continue;
944 if (MEM_P (op[arity])
945 && insn_op->predicate == memory_operand
946 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
947 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
949 op[arity] = replace_equiv_address (op[arity],
950 copy_to_mode_reg (Pmode,
951 XEXP (op[arity], 0)));
953 else if (GET_MODE (op[arity]) == insn_op->mode
954 || GET_MODE (op[arity]) == VOIDmode
955 || (insn_op->predicate == address_operand
956 && GET_MODE (op[arity]) == Pmode))
958 /* An address_operand usually has VOIDmode in the expander
959 so we cannot use this. */
960 machine_mode target_mode =
961 (insn_op->predicate == address_operand
962 ? Pmode : insn_op->mode);
963 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
966 if (!insn_op->predicate (op[arity], insn_op->mode))
968 error ("Invalid argument %d for builtin %qF", arity + 1, fndecl);
969 return const0_rtx;
971 arity++;
974 switch (arity)
976 case 0:
977 pat = GEN_FCN (icode) (target);
978 break;
979 case 1:
980 if (nonvoid)
981 pat = GEN_FCN (icode) (target, op[0]);
982 else
983 pat = GEN_FCN (icode) (op[0]);
984 break;
985 case 2:
986 if (nonvoid)
987 pat = GEN_FCN (icode) (target, op[0], op[1]);
988 else
989 pat = GEN_FCN (icode) (op[0], op[1]);
990 break;
991 case 3:
992 if (nonvoid)
993 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
994 else
995 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
996 break;
997 case 4:
998 if (nonvoid)
999 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1000 else
1001 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1002 break;
1003 case 5:
1004 if (nonvoid)
1005 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1006 else
1007 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1008 break;
1009 case 6:
1010 if (nonvoid)
1011 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1012 else
1013 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1014 break;
1015 default:
1016 gcc_unreachable ();
1018 if (!pat)
1019 return NULL_RTX;
1020 emit_insn (pat);
1022 if (nonvoid)
1023 return target;
1024 else
1025 return const0_rtx;
1029 static const int s390_hotpatch_hw_max = 1000000;
1030 static int s390_hotpatch_hw_before_label = 0;
1031 static int s390_hotpatch_hw_after_label = 0;
1033 /* Check whether the hotpatch attribute is applied to a function and, if it has
1034 an argument, the argument is valid. */
1036 static tree
1037 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1038 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1040 tree expr;
1041 tree expr2;
1042 int err;
1044 if (TREE_CODE (*node) != FUNCTION_DECL)
1046 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1047 name);
1048 *no_add_attrs = true;
1050 if (args != NULL && TREE_CHAIN (args) != NULL)
1052 expr = TREE_VALUE (args);
1053 expr2 = TREE_VALUE (TREE_CHAIN (args));
1055 if (args == NULL || TREE_CHAIN (args) == NULL)
1056 err = 1;
1057 else if (TREE_CODE (expr) != INTEGER_CST
1058 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1059 || wi::gtu_p (expr, s390_hotpatch_hw_max))
1060 err = 1;
1061 else if (TREE_CODE (expr2) != INTEGER_CST
1062 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1063 || wi::gtu_p (expr2, s390_hotpatch_hw_max))
1064 err = 1;
1065 else
1066 err = 0;
1067 if (err)
1069 error ("requested %qE attribute is not a comma separated pair of"
1070 " non-negative integer constants or too large (max. %d)", name,
1071 s390_hotpatch_hw_max);
1072 *no_add_attrs = true;
1075 return NULL_TREE;
1078 /* Expand the s390_vector_bool type attribute. */
1080 static tree
1081 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1082 tree args ATTRIBUTE_UNUSED,
1083 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1085 tree type = *node, result = NULL_TREE;
1086 machine_mode mode;
1088 while (POINTER_TYPE_P (type)
1089 || TREE_CODE (type) == FUNCTION_TYPE
1090 || TREE_CODE (type) == METHOD_TYPE
1091 || TREE_CODE (type) == ARRAY_TYPE)
1092 type = TREE_TYPE (type);
1094 mode = TYPE_MODE (type);
1095 switch (mode)
1097 case DImode: case V2DImode: result = s390_builtin_types[BT_BV2DI]; break;
1098 case SImode: case V4SImode: result = s390_builtin_types[BT_BV4SI]; break;
1099 case HImode: case V8HImode: result = s390_builtin_types[BT_BV8HI]; break;
1100 case QImode: case V16QImode: result = s390_builtin_types[BT_BV16QI];
1101 default: break;
1104 *no_add_attrs = true; /* No need to hang on to the attribute. */
1106 if (result)
1107 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1109 return NULL_TREE;
1112 static const struct attribute_spec s390_attribute_table[] = {
1113 { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false },
1114 { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true },
1115 /* End element. */
1116 { NULL, 0, 0, false, false, false, NULL, false }
1119 /* Return the alignment for LABEL. We default to the -falign-labels
1120 value except for the literal pool base label. */
1122 s390_label_align (rtx label)
1124 rtx_insn *prev_insn = prev_active_insn (label);
1125 rtx set, src;
1127 if (prev_insn == NULL_RTX)
1128 goto old;
1130 set = single_set (prev_insn);
1132 if (set == NULL_RTX)
1133 goto old;
1135 src = SET_SRC (set);
1137 /* Don't align literal pool base labels. */
1138 if (GET_CODE (src) == UNSPEC
1139 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1140 return 0;
1142 old:
1143 return align_labels_log;
1146 static machine_mode
1147 s390_libgcc_cmp_return_mode (void)
1149 return TARGET_64BIT ? DImode : SImode;
1152 static machine_mode
1153 s390_libgcc_shift_count_mode (void)
1155 return TARGET_64BIT ? DImode : SImode;
1158 static machine_mode
1159 s390_unwind_word_mode (void)
1161 return TARGET_64BIT ? DImode : SImode;
1164 /* Return true if the back end supports mode MODE. */
1165 static bool
1166 s390_scalar_mode_supported_p (machine_mode mode)
1168 /* In contrast to the default implementation reject TImode constants on 31bit
1169 TARGET_ZARCH for ABI compliance. */
1170 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1171 return false;
1173 if (DECIMAL_FLOAT_MODE_P (mode))
1174 return default_decimal_float_supported_p ();
1176 return default_scalar_mode_supported_p (mode);
1179 /* Return true if the back end supports vector mode MODE. */
1180 static bool
1181 s390_vector_mode_supported_p (machine_mode mode)
1183 machine_mode inner;
1185 if (!VECTOR_MODE_P (mode)
1186 || !TARGET_VX
1187 || GET_MODE_SIZE (mode) > 16)
1188 return false;
1190 inner = GET_MODE_INNER (mode);
1192 switch (inner)
1194 case QImode:
1195 case HImode:
1196 case SImode:
1197 case DImode:
1198 case TImode:
1199 case SFmode:
1200 case DFmode:
1201 case TFmode:
1202 return true;
1203 default:
1204 return false;
1208 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1210 void
1211 s390_set_has_landing_pad_p (bool value)
1213 cfun->machine->has_landing_pad_p = value;
1216 /* If two condition code modes are compatible, return a condition code
1217 mode which is compatible with both. Otherwise, return
1218 VOIDmode. */
1220 static machine_mode
1221 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1223 if (m1 == m2)
1224 return m1;
1226 switch (m1)
1228 case CCZmode:
1229 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1230 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1231 return m2;
1232 return VOIDmode;
1234 case CCSmode:
1235 case CCUmode:
1236 case CCTmode:
1237 case CCSRmode:
1238 case CCURmode:
1239 case CCZ1mode:
1240 if (m2 == CCZmode)
1241 return m1;
1243 return VOIDmode;
1245 default:
1246 return VOIDmode;
1248 return VOIDmode;
1251 /* Return true if SET either doesn't set the CC register, or else
1252 the source and destination have matching CC modes and that
1253 CC mode is at least as constrained as REQ_MODE. */
1255 static bool
1256 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1258 machine_mode set_mode;
1260 gcc_assert (GET_CODE (set) == SET);
1262 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1263 return 1;
1265 set_mode = GET_MODE (SET_DEST (set));
1266 switch (set_mode)
1268 case CCSmode:
1269 case CCSRmode:
1270 case CCUmode:
1271 case CCURmode:
1272 case CCLmode:
1273 case CCL1mode:
1274 case CCL2mode:
1275 case CCL3mode:
1276 case CCT1mode:
1277 case CCT2mode:
1278 case CCT3mode:
1279 case CCVEQmode:
1280 case CCVHmode:
1281 case CCVHUmode:
1282 case CCVFHmode:
1283 case CCVFHEmode:
1284 if (req_mode != set_mode)
1285 return 0;
1286 break;
1288 case CCZmode:
1289 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1290 && req_mode != CCSRmode && req_mode != CCURmode)
1291 return 0;
1292 break;
1294 case CCAPmode:
1295 case CCANmode:
1296 if (req_mode != CCAmode)
1297 return 0;
1298 break;
1300 default:
1301 gcc_unreachable ();
1304 return (GET_MODE (SET_SRC (set)) == set_mode);
1307 /* Return true if every SET in INSN that sets the CC register
1308 has source and destination with matching CC modes and that
1309 CC mode is at least as constrained as REQ_MODE.
1310 If REQ_MODE is VOIDmode, always return false. */
1312 bool
1313 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1315 int i;
1317 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1318 if (req_mode == VOIDmode)
1319 return false;
1321 if (GET_CODE (PATTERN (insn)) == SET)
1322 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1324 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1325 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1327 rtx set = XVECEXP (PATTERN (insn), 0, i);
1328 if (GET_CODE (set) == SET)
1329 if (!s390_match_ccmode_set (set, req_mode))
1330 return false;
1333 return true;
1336 /* If a test-under-mask instruction can be used to implement
1337 (compare (and ... OP1) OP2), return the CC mode required
1338 to do that. Otherwise, return VOIDmode.
1339 MIXED is true if the instruction can distinguish between
1340 CC1 and CC2 for mixed selected bits (TMxx), it is false
1341 if the instruction cannot (TM). */
1343 machine_mode
1344 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1346 int bit0, bit1;
1348 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1349 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1350 return VOIDmode;
1352 /* Selected bits all zero: CC0.
1353 e.g.: int a; if ((a & (16 + 128)) == 0) */
1354 if (INTVAL (op2) == 0)
1355 return CCTmode;
1357 /* Selected bits all one: CC3.
1358 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1359 if (INTVAL (op2) == INTVAL (op1))
1360 return CCT3mode;
1362 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1363 int a;
1364 if ((a & (16 + 128)) == 16) -> CCT1
1365 if ((a & (16 + 128)) == 128) -> CCT2 */
1366 if (mixed)
1368 bit1 = exact_log2 (INTVAL (op2));
1369 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1370 if (bit0 != -1 && bit1 != -1)
1371 return bit0 > bit1 ? CCT1mode : CCT2mode;
1374 return VOIDmode;
1377 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1378 OP0 and OP1 of a COMPARE, return the mode to be used for the
1379 comparison. */
1381 machine_mode
1382 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1384 if (TARGET_VX
1385 && register_operand (op0, DFmode)
1386 && register_operand (op1, DFmode))
1388 /* LT, LE, UNGT, UNGE require swapping OP0 and OP1. Either
1389 s390_emit_compare or s390_canonicalize_comparison will take
1390 care of it. */
1391 switch (code)
1393 case EQ:
1394 case NE:
1395 return CCVEQmode;
1396 case GT:
1397 case UNLE:
1398 return CCVFHmode;
1399 case GE:
1400 case UNLT:
1401 return CCVFHEmode;
1402 default:
1407 switch (code)
1409 case EQ:
1410 case NE:
1411 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1412 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1413 return CCAPmode;
1414 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1415 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1416 return CCAPmode;
1417 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1418 || GET_CODE (op1) == NEG)
1419 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1420 return CCLmode;
1422 if (GET_CODE (op0) == AND)
1424 /* Check whether we can potentially do it via TM. */
1425 machine_mode ccmode;
1426 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1427 if (ccmode != VOIDmode)
1429 /* Relax CCTmode to CCZmode to allow fall-back to AND
1430 if that turns out to be beneficial. */
1431 return ccmode == CCTmode ? CCZmode : ccmode;
1435 if (register_operand (op0, HImode)
1436 && GET_CODE (op1) == CONST_INT
1437 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1438 return CCT3mode;
1439 if (register_operand (op0, QImode)
1440 && GET_CODE (op1) == CONST_INT
1441 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1442 return CCT3mode;
1444 return CCZmode;
1446 case LE:
1447 case LT:
1448 case GE:
1449 case GT:
1450 /* The only overflow condition of NEG and ABS happens when
1451 -INT_MAX is used as parameter, which stays negative. So
1452 we have an overflow from a positive value to a negative.
1453 Using CCAP mode the resulting cc can be used for comparisons. */
1454 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1455 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1456 return CCAPmode;
1458 /* If constants are involved in an add instruction it is possible to use
1459 the resulting cc for comparisons with zero. Knowing the sign of the
1460 constant the overflow behavior gets predictable. e.g.:
1461 int a, b; if ((b = a + c) > 0)
1462 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1463 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1464 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1465 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1466 /* Avoid INT32_MIN on 32 bit. */
1467 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1469 if (INTVAL (XEXP((op0), 1)) < 0)
1470 return CCANmode;
1471 else
1472 return CCAPmode;
1474 /* Fall through. */
1475 case UNORDERED:
1476 case ORDERED:
1477 case UNEQ:
1478 case UNLE:
1479 case UNLT:
1480 case UNGE:
1481 case UNGT:
1482 case LTGT:
1483 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1484 && GET_CODE (op1) != CONST_INT)
1485 return CCSRmode;
1486 return CCSmode;
1488 case LTU:
1489 case GEU:
1490 if (GET_CODE (op0) == PLUS
1491 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1492 return CCL1mode;
1494 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1495 && GET_CODE (op1) != CONST_INT)
1496 return CCURmode;
1497 return CCUmode;
1499 case LEU:
1500 case GTU:
1501 if (GET_CODE (op0) == MINUS
1502 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1503 return CCL2mode;
1505 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1506 && GET_CODE (op1) != CONST_INT)
1507 return CCURmode;
1508 return CCUmode;
1510 default:
1511 gcc_unreachable ();
1515 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1516 that we can implement more efficiently. */
1518 static void
1519 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1520 bool op0_preserve_value)
1522 if (op0_preserve_value)
1523 return;
1525 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1526 if ((*code == EQ || *code == NE)
1527 && *op1 == const0_rtx
1528 && GET_CODE (*op0) == ZERO_EXTRACT
1529 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1530 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1531 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1533 rtx inner = XEXP (*op0, 0);
1534 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1535 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1536 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1538 if (len > 0 && len < modesize
1539 && pos >= 0 && pos + len <= modesize
1540 && modesize <= HOST_BITS_PER_WIDE_INT)
1542 unsigned HOST_WIDE_INT block;
1543 block = ((unsigned HOST_WIDE_INT) 1 << len) - 1;
1544 block <<= modesize - pos - len;
1546 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1547 gen_int_mode (block, GET_MODE (inner)));
1551 /* Narrow AND of memory against immediate to enable TM. */
1552 if ((*code == EQ || *code == NE)
1553 && *op1 == const0_rtx
1554 && GET_CODE (*op0) == AND
1555 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1556 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1558 rtx inner = XEXP (*op0, 0);
1559 rtx mask = XEXP (*op0, 1);
1561 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1562 if (GET_CODE (inner) == SUBREG
1563 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1564 && (GET_MODE_SIZE (GET_MODE (inner))
1565 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1566 && ((INTVAL (mask)
1567 & GET_MODE_MASK (GET_MODE (inner))
1568 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1569 == 0))
1570 inner = SUBREG_REG (inner);
1572 /* Do not change volatile MEMs. */
1573 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1575 int part = s390_single_part (XEXP (*op0, 1),
1576 GET_MODE (inner), QImode, 0);
1577 if (part >= 0)
1579 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1580 inner = adjust_address_nv (inner, QImode, part);
1581 *op0 = gen_rtx_AND (QImode, inner, mask);
1586 /* Narrow comparisons against 0xffff to HImode if possible. */
1587 if ((*code == EQ || *code == NE)
1588 && GET_CODE (*op1) == CONST_INT
1589 && INTVAL (*op1) == 0xffff
1590 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1591 && (nonzero_bits (*op0, GET_MODE (*op0))
1592 & ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
1594 *op0 = gen_lowpart (HImode, *op0);
1595 *op1 = constm1_rtx;
1598 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1599 if (GET_CODE (*op0) == UNSPEC
1600 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1601 && XVECLEN (*op0, 0) == 1
1602 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1603 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1604 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1605 && *op1 == const0_rtx)
1607 enum rtx_code new_code = UNKNOWN;
1608 switch (*code)
1610 case EQ: new_code = EQ; break;
1611 case NE: new_code = NE; break;
1612 case LT: new_code = GTU; break;
1613 case GT: new_code = LTU; break;
1614 case LE: new_code = GEU; break;
1615 case GE: new_code = LEU; break;
1616 default: break;
1619 if (new_code != UNKNOWN)
1621 *op0 = XVECEXP (*op0, 0, 0);
1622 *code = new_code;
1626 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1627 if (GET_CODE (*op0) == UNSPEC
1628 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1629 && XVECLEN (*op0, 0) == 1
1630 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1631 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1632 && CONST_INT_P (*op1))
1634 enum rtx_code new_code = UNKNOWN;
1635 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1637 case CCZmode:
1638 case CCRAWmode:
1639 switch (*code)
1641 case EQ: new_code = EQ; break;
1642 case NE: new_code = NE; break;
1643 default: break;
1645 break;
1646 default: break;
1649 if (new_code != UNKNOWN)
1651 /* For CCRAWmode put the required cc mask into the second
1652 operand. */
1653 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1654 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1655 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1656 *op0 = XVECEXP (*op0, 0, 0);
1657 *code = new_code;
1661 /* Simplify cascaded EQ, NE with const0_rtx. */
1662 if ((*code == NE || *code == EQ)
1663 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1664 && GET_MODE (*op0) == SImode
1665 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1666 && REG_P (XEXP (*op0, 0))
1667 && XEXP (*op0, 1) == const0_rtx
1668 && *op1 == const0_rtx)
1670 if ((*code == EQ && GET_CODE (*op0) == NE)
1671 || (*code == NE && GET_CODE (*op0) == EQ))
1672 *code = EQ;
1673 else
1674 *code = NE;
1675 *op0 = XEXP (*op0, 0);
1678 /* Prefer register over memory as first operand. */
1679 if (MEM_P (*op0) && REG_P (*op1))
1681 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1682 *code = (int)swap_condition ((enum rtx_code)*code);
1685 /* Using the scalar variants of vector instructions for 64 bit FP
1686 comparisons might require swapping the operands. */
1687 if (TARGET_VX
1688 && register_operand (*op0, DFmode)
1689 && register_operand (*op1, DFmode)
1690 && (*code == LT || *code == LE || *code == UNGT || *code == UNGE))
1692 rtx tmp;
1694 switch (*code)
1696 case LT: *code = GT; break;
1697 case LE: *code = GE; break;
1698 case UNGT: *code = UNLE; break;
1699 case UNGE: *code = UNLT; break;
1700 default: ;
1702 tmp = *op0; *op0 = *op1; *op1 = tmp;
1706 /* Helper function for s390_emit_compare. If possible emit a 64 bit
1707 FP compare using the single element variant of vector instructions.
1708 Replace CODE with the comparison code to be used in the CC reg
1709 compare and return the condition code register RTX in CC. */
1711 static bool
1712 s390_expand_vec_compare_scalar (enum rtx_code *code, rtx cmp1, rtx cmp2,
1713 rtx *cc)
1715 machine_mode cmp_mode;
1716 bool swap_p = false;
1718 switch (*code)
1720 case EQ: cmp_mode = CCVEQmode; break;
1721 case NE: cmp_mode = CCVEQmode; break;
1722 case GT: cmp_mode = CCVFHmode; break;
1723 case GE: cmp_mode = CCVFHEmode; break;
1724 case UNLE: cmp_mode = CCVFHmode; break;
1725 case UNLT: cmp_mode = CCVFHEmode; break;
1726 case LT: cmp_mode = CCVFHmode; *code = GT; swap_p = true; break;
1727 case LE: cmp_mode = CCVFHEmode; *code = GE; swap_p = true; break;
1728 case UNGE: cmp_mode = CCVFHmode; *code = UNLE; swap_p = true; break;
1729 case UNGT: cmp_mode = CCVFHEmode; *code = UNLT; swap_p = true; break;
1730 default: return false;
1733 if (swap_p)
1735 rtx tmp = cmp2;
1736 cmp2 = cmp1;
1737 cmp1 = tmp;
1739 *cc = gen_rtx_REG (cmp_mode, CC_REGNUM);
1740 emit_insn (gen_rtx_PARALLEL (VOIDmode,
1741 gen_rtvec (2,
1742 gen_rtx_SET (*cc,
1743 gen_rtx_COMPARE (cmp_mode, cmp1,
1744 cmp2)),
1745 gen_rtx_CLOBBER (VOIDmode,
1746 gen_rtx_SCRATCH (V2DImode)))));
1747 return true;
1751 /* Emit a compare instruction suitable to implement the comparison
1752 OP0 CODE OP1. Return the correct condition RTL to be placed in
1753 the IF_THEN_ELSE of the conditional branch testing the result. */
1756 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1758 machine_mode mode = s390_select_ccmode (code, op0, op1);
1759 rtx cc;
1761 if (TARGET_VX
1762 && register_operand (op0, DFmode)
1763 && register_operand (op1, DFmode)
1764 && s390_expand_vec_compare_scalar (&code, op0, op1, &cc))
1766 /* Work has been done by s390_expand_vec_compare_scalar already. */
1768 else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1770 /* Do not output a redundant compare instruction if a
1771 compare_and_swap pattern already computed the result and the
1772 machine modes are compatible. */
1773 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1774 == GET_MODE (op0));
1775 cc = op0;
1777 else
1779 cc = gen_rtx_REG (mode, CC_REGNUM);
1780 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1783 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1786 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1787 matches CMP.
1788 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1789 conditional branch testing the result. */
1791 static rtx
1792 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1793 rtx cmp, rtx new_rtx)
1795 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
1796 return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
1797 const0_rtx);
1800 /* Emit a jump instruction to TARGET and return it. If COND is
1801 NULL_RTX, emit an unconditional jump, else a conditional jump under
1802 condition COND. */
1804 rtx_insn *
1805 s390_emit_jump (rtx target, rtx cond)
1807 rtx insn;
1809 target = gen_rtx_LABEL_REF (VOIDmode, target);
1810 if (cond)
1811 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1813 insn = gen_rtx_SET (pc_rtx, target);
1814 return emit_jump_insn (insn);
1817 /* Return branch condition mask to implement a branch
1818 specified by CODE. Return -1 for invalid comparisons. */
1821 s390_branch_condition_mask (rtx code)
1823 const int CC0 = 1 << 3;
1824 const int CC1 = 1 << 2;
1825 const int CC2 = 1 << 1;
1826 const int CC3 = 1 << 0;
1828 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1829 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1830 gcc_assert (XEXP (code, 1) == const0_rtx
1831 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1832 && CONST_INT_P (XEXP (code, 1))));
1835 switch (GET_MODE (XEXP (code, 0)))
1837 case CCZmode:
1838 case CCZ1mode:
1839 switch (GET_CODE (code))
1841 case EQ: return CC0;
1842 case NE: return CC1 | CC2 | CC3;
1843 default: return -1;
1845 break;
1847 case CCT1mode:
1848 switch (GET_CODE (code))
1850 case EQ: return CC1;
1851 case NE: return CC0 | CC2 | CC3;
1852 default: return -1;
1854 break;
1856 case CCT2mode:
1857 switch (GET_CODE (code))
1859 case EQ: return CC2;
1860 case NE: return CC0 | CC1 | CC3;
1861 default: return -1;
1863 break;
1865 case CCT3mode:
1866 switch (GET_CODE (code))
1868 case EQ: return CC3;
1869 case NE: return CC0 | CC1 | CC2;
1870 default: return -1;
1872 break;
1874 case CCLmode:
1875 switch (GET_CODE (code))
1877 case EQ: return CC0 | CC2;
1878 case NE: return CC1 | CC3;
1879 default: return -1;
1881 break;
1883 case CCL1mode:
1884 switch (GET_CODE (code))
1886 case LTU: return CC2 | CC3; /* carry */
1887 case GEU: return CC0 | CC1; /* no carry */
1888 default: return -1;
1890 break;
1892 case CCL2mode:
1893 switch (GET_CODE (code))
1895 case GTU: return CC0 | CC1; /* borrow */
1896 case LEU: return CC2 | CC3; /* no borrow */
1897 default: return -1;
1899 break;
1901 case CCL3mode:
1902 switch (GET_CODE (code))
1904 case EQ: return CC0 | CC2;
1905 case NE: return CC1 | CC3;
1906 case LTU: return CC1;
1907 case GTU: return CC3;
1908 case LEU: return CC1 | CC2;
1909 case GEU: return CC2 | CC3;
1910 default: return -1;
1913 case CCUmode:
1914 switch (GET_CODE (code))
1916 case EQ: return CC0;
1917 case NE: return CC1 | CC2 | CC3;
1918 case LTU: return CC1;
1919 case GTU: return CC2;
1920 case LEU: return CC0 | CC1;
1921 case GEU: return CC0 | CC2;
1922 default: return -1;
1924 break;
1926 case CCURmode:
1927 switch (GET_CODE (code))
1929 case EQ: return CC0;
1930 case NE: return CC2 | CC1 | CC3;
1931 case LTU: return CC2;
1932 case GTU: return CC1;
1933 case LEU: return CC0 | CC2;
1934 case GEU: return CC0 | CC1;
1935 default: return -1;
1937 break;
1939 case CCAPmode:
1940 switch (GET_CODE (code))
1942 case EQ: return CC0;
1943 case NE: return CC1 | CC2 | CC3;
1944 case LT: return CC1 | CC3;
1945 case GT: return CC2;
1946 case LE: return CC0 | CC1 | CC3;
1947 case GE: return CC0 | CC2;
1948 default: return -1;
1950 break;
1952 case CCANmode:
1953 switch (GET_CODE (code))
1955 case EQ: return CC0;
1956 case NE: return CC1 | CC2 | CC3;
1957 case LT: return CC1;
1958 case GT: return CC2 | CC3;
1959 case LE: return CC0 | CC1;
1960 case GE: return CC0 | CC2 | CC3;
1961 default: return -1;
1963 break;
1965 case CCSmode:
1966 switch (GET_CODE (code))
1968 case EQ: return CC0;
1969 case NE: return CC1 | CC2 | CC3;
1970 case LT: return CC1;
1971 case GT: return CC2;
1972 case LE: return CC0 | CC1;
1973 case GE: return CC0 | CC2;
1974 case UNORDERED: return CC3;
1975 case ORDERED: return CC0 | CC1 | CC2;
1976 case UNEQ: return CC0 | CC3;
1977 case UNLT: return CC1 | CC3;
1978 case UNGT: return CC2 | CC3;
1979 case UNLE: return CC0 | CC1 | CC3;
1980 case UNGE: return CC0 | CC2 | CC3;
1981 case LTGT: return CC1 | CC2;
1982 default: return -1;
1984 break;
1986 case CCSRmode:
1987 switch (GET_CODE (code))
1989 case EQ: return CC0;
1990 case NE: return CC2 | CC1 | CC3;
1991 case LT: return CC2;
1992 case GT: return CC1;
1993 case LE: return CC0 | CC2;
1994 case GE: return CC0 | CC1;
1995 case UNORDERED: return CC3;
1996 case ORDERED: return CC0 | CC2 | CC1;
1997 case UNEQ: return CC0 | CC3;
1998 case UNLT: return CC2 | CC3;
1999 case UNGT: return CC1 | CC3;
2000 case UNLE: return CC0 | CC2 | CC3;
2001 case UNGE: return CC0 | CC1 | CC3;
2002 case LTGT: return CC2 | CC1;
2003 default: return -1;
2005 break;
2007 /* Vector comparison modes. */
2009 case CCVEQmode:
2010 switch (GET_CODE (code))
2012 case EQ: return CC0;
2013 case NE: return CC3;
2014 default: return -1;
2017 case CCVEQANYmode:
2018 switch (GET_CODE (code))
2020 case EQ: return CC0 | CC1;
2021 case NE: return CC3 | CC1;
2022 default: return -1;
2025 /* Integer vector compare modes. */
2027 case CCVHmode:
2028 switch (GET_CODE (code))
2030 case GT: return CC0;
2031 case LE: return CC3;
2032 default: return -1;
2035 case CCVHANYmode:
2036 switch (GET_CODE (code))
2038 case GT: return CC0 | CC1;
2039 case LE: return CC3 | CC1;
2040 default: return -1;
2043 case CCVHUmode:
2044 switch (GET_CODE (code))
2046 case GTU: return CC0;
2047 case LEU: return CC3;
2048 default: return -1;
2051 case CCVHUANYmode:
2052 switch (GET_CODE (code))
2054 case GTU: return CC0 | CC1;
2055 case LEU: return CC3 | CC1;
2056 default: return -1;
2059 /* FP vector compare modes. */
2061 case CCVFHmode:
2062 switch (GET_CODE (code))
2064 case GT: return CC0;
2065 case UNLE: return CC3;
2066 default: return -1;
2069 case CCVFHANYmode:
2070 switch (GET_CODE (code))
2072 case GT: return CC0 | CC1;
2073 case UNLE: return CC3 | CC1;
2074 default: return -1;
2077 case CCVFHEmode:
2078 switch (GET_CODE (code))
2080 case GE: return CC0;
2081 case UNLT: return CC3;
2082 default: return -1;
2085 case CCVFHEANYmode:
2086 switch (GET_CODE (code))
2088 case GE: return CC0 | CC1;
2089 case UNLT: return CC3 | CC1;
2090 default: return -1;
2094 case CCRAWmode:
2095 switch (GET_CODE (code))
2097 case EQ:
2098 return INTVAL (XEXP (code, 1));
2099 case NE:
2100 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2101 default:
2102 gcc_unreachable ();
2105 default:
2106 return -1;
2111 /* Return branch condition mask to implement a compare and branch
2112 specified by CODE. Return -1 for invalid comparisons. */
2115 s390_compare_and_branch_condition_mask (rtx code)
2117 const int CC0 = 1 << 3;
2118 const int CC1 = 1 << 2;
2119 const int CC2 = 1 << 1;
2121 switch (GET_CODE (code))
2123 case EQ:
2124 return CC0;
2125 case NE:
2126 return CC1 | CC2;
2127 case LT:
2128 case LTU:
2129 return CC1;
2130 case GT:
2131 case GTU:
2132 return CC2;
2133 case LE:
2134 case LEU:
2135 return CC0 | CC1;
2136 case GE:
2137 case GEU:
2138 return CC0 | CC2;
2139 default:
2140 gcc_unreachable ();
2142 return -1;
2145 /* If INV is false, return assembler mnemonic string to implement
2146 a branch specified by CODE. If INV is true, return mnemonic
2147 for the corresponding inverted branch. */
2149 static const char *
2150 s390_branch_condition_mnemonic (rtx code, int inv)
2152 int mask;
2154 static const char *const mnemonic[16] =
2156 NULL, "o", "h", "nle",
2157 "l", "nhe", "lh", "ne",
2158 "e", "nlh", "he", "nl",
2159 "le", "nh", "no", NULL
2162 if (GET_CODE (XEXP (code, 0)) == REG
2163 && REGNO (XEXP (code, 0)) == CC_REGNUM
2164 && (XEXP (code, 1) == const0_rtx
2165 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2166 && CONST_INT_P (XEXP (code, 1)))))
2167 mask = s390_branch_condition_mask (code);
2168 else
2169 mask = s390_compare_and_branch_condition_mask (code);
2171 gcc_assert (mask >= 0);
2173 if (inv)
2174 mask ^= 15;
2176 gcc_assert (mask >= 1 && mask <= 14);
2178 return mnemonic[mask];
2181 /* Return the part of op which has a value different from def.
2182 The size of the part is determined by mode.
2183 Use this function only if you already know that op really
2184 contains such a part. */
2186 unsigned HOST_WIDE_INT
2187 s390_extract_part (rtx op, machine_mode mode, int def)
2189 unsigned HOST_WIDE_INT value = 0;
2190 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2191 int part_bits = GET_MODE_BITSIZE (mode);
2192 unsigned HOST_WIDE_INT part_mask
2193 = ((unsigned HOST_WIDE_INT)1 << part_bits) - 1;
2194 int i;
2196 for (i = 0; i < max_parts; i++)
2198 if (i == 0)
2199 value = (unsigned HOST_WIDE_INT) INTVAL (op);
2200 else
2201 value >>= part_bits;
2203 if ((value & part_mask) != (def & part_mask))
2204 return value & part_mask;
2207 gcc_unreachable ();
2210 /* If OP is an integer constant of mode MODE with exactly one
2211 part of mode PART_MODE unequal to DEF, return the number of that
2212 part. Otherwise, return -1. */
2215 s390_single_part (rtx op,
2216 machine_mode mode,
2217 machine_mode part_mode,
2218 int def)
2220 unsigned HOST_WIDE_INT value = 0;
2221 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2222 unsigned HOST_WIDE_INT part_mask
2223 = ((unsigned HOST_WIDE_INT)1 << GET_MODE_BITSIZE (part_mode)) - 1;
2224 int i, part = -1;
2226 if (GET_CODE (op) != CONST_INT)
2227 return -1;
2229 for (i = 0; i < n_parts; i++)
2231 if (i == 0)
2232 value = (unsigned HOST_WIDE_INT) INTVAL (op);
2233 else
2234 value >>= GET_MODE_BITSIZE (part_mode);
2236 if ((value & part_mask) != (def & part_mask))
2238 if (part != -1)
2239 return -1;
2240 else
2241 part = i;
2244 return part == -1 ? -1 : n_parts - 1 - part;
2247 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2248 bits and no other bits are set in IN. POS and LENGTH can be used
2249 to obtain the start position and the length of the bitfield.
2251 POS gives the position of the first bit of the bitfield counting
2252 from the lowest order bit starting with zero. In order to use this
2253 value for S/390 instructions this has to be converted to "bits big
2254 endian" style. */
2256 bool
2257 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size,
2258 int *pos, int *length)
2260 int tmp_pos = 0;
2261 int tmp_length = 0;
2262 int i;
2263 unsigned HOST_WIDE_INT mask = 1ULL;
2264 bool contiguous = false;
2266 for (i = 0; i < size; mask <<= 1, i++)
2268 if (contiguous)
2270 if (mask & in)
2271 tmp_length++;
2272 else
2273 break;
2275 else
2277 if (mask & in)
2279 contiguous = true;
2280 tmp_length++;
2282 else
2283 tmp_pos++;
2287 if (!tmp_length)
2288 return false;
2290 /* Calculate a mask for all bits beyond the contiguous bits. */
2291 mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1));
2293 if ((unsigned)size < sizeof (HOST_WIDE_INT) * BITS_PER_UNIT)
2294 mask &= (HOST_WIDE_INT_1U << size) - 1;
2296 if (mask & in)
2297 return false;
2299 if (tmp_length + tmp_pos - 1 > size)
2300 return false;
2302 if (length)
2303 *length = tmp_length;
2305 if (pos)
2306 *pos = tmp_pos;
2308 return true;
2311 /* Return true if OP contains the same contiguous bitfield in *all*
2312 its elements. START and END can be used to obtain the start and
2313 end position of the bitfield.
2315 START/STOP give the position of the first/last bit of the bitfield
2316 counting from the lowest order bit starting with zero. In order to
2317 use these values for S/390 instructions this has to be converted to
2318 "bits big endian" style. */
2320 bool
2321 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2323 unsigned HOST_WIDE_INT mask;
2324 int length, size;
2325 rtx elt;
2327 if (!const_vec_duplicate_p (op, &elt)
2328 || !CONST_INT_P (elt))
2329 return false;
2331 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2333 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2334 if (size > 64)
2335 return false;
2337 mask = UINTVAL (elt);
2338 if (s390_contiguous_bitmask_p (mask, size, start,
2339 end != NULL ? &length : NULL))
2341 if (end != NULL)
2342 *end = *start + length - 1;
2343 return true;
2345 /* 0xff00000f style immediates can be covered by swapping start and
2346 end indices in vgm. */
2347 if (s390_contiguous_bitmask_p (~mask, size, start,
2348 end != NULL ? &length : NULL))
2350 if (end != NULL)
2351 *end = *start - 1;
2352 if (start != NULL)
2353 *start = *start + length;
2354 return true;
2356 return false;
2359 /* Return true if C consists only of byte chunks being either 0 or
2360 0xff. If MASK is !=NULL a byte mask is generated which is
2361 appropriate for the vector generate byte mask instruction. */
2363 bool
2364 s390_bytemask_vector_p (rtx op, unsigned *mask)
2366 int i;
2367 unsigned tmp_mask = 0;
2368 int nunit, unit_size;
2370 if (!VECTOR_MODE_P (GET_MODE (op))
2371 || GET_CODE (op) != CONST_VECTOR
2372 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2373 return false;
2375 nunit = GET_MODE_NUNITS (GET_MODE (op));
2376 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2378 for (i = 0; i < nunit; i++)
2380 unsigned HOST_WIDE_INT c;
2381 int j;
2383 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2384 return false;
2386 c = UINTVAL (XVECEXP (op, 0, i));
2387 for (j = 0; j < unit_size; j++)
2389 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2390 return false;
2391 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2392 c = c >> BITS_PER_UNIT;
2396 if (mask != NULL)
2397 *mask = tmp_mask;
2399 return true;
2402 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2403 equivalent to a shift followed by the AND. In particular, CONTIG
2404 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2405 for ROTL indicate a rotate to the right. */
2407 bool
2408 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2410 int pos, len;
2411 bool ok;
2413 ok = s390_contiguous_bitmask_p (contig, bitsize, &pos, &len);
2414 gcc_assert (ok);
2416 return ((rotl >= 0 && rotl <= pos)
2417 || (rotl < 0 && -rotl <= bitsize - len - pos));
2420 /* Check whether we can (and want to) split a double-word
2421 move in mode MODE from SRC to DST into two single-word
2422 moves, moving the subword FIRST_SUBWORD first. */
2424 bool
2425 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2427 /* Floating point and vector registers cannot be split. */
2428 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2429 return false;
2431 /* We don't need to split if operands are directly accessible. */
2432 if (s_operand (src, mode) || s_operand (dst, mode))
2433 return false;
2435 /* Non-offsettable memory references cannot be split. */
2436 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2437 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2438 return false;
2440 /* Moving the first subword must not clobber a register
2441 needed to move the second subword. */
2442 if (register_operand (dst, mode))
2444 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2445 if (reg_overlap_mentioned_p (subreg, src))
2446 return false;
2449 return true;
2452 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2453 and [MEM2, MEM2 + SIZE] do overlap and false
2454 otherwise. */
2456 bool
2457 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2459 rtx addr1, addr2, addr_delta;
2460 HOST_WIDE_INT delta;
2462 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2463 return true;
2465 if (size == 0)
2466 return false;
2468 addr1 = XEXP (mem1, 0);
2469 addr2 = XEXP (mem2, 0);
2471 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2473 /* This overlapping check is used by peepholes merging memory block operations.
2474 Overlapping operations would otherwise be recognized by the S/390 hardware
2475 and would fall back to a slower implementation. Allowing overlapping
2476 operations would lead to slow code but not to wrong code. Therefore we are
2477 somewhat optimistic if we cannot prove that the memory blocks are
2478 overlapping.
2479 That's why we return false here although this may accept operations on
2480 overlapping memory areas. */
2481 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2482 return false;
2484 delta = INTVAL (addr_delta);
2486 if (delta == 0
2487 || (delta > 0 && delta < size)
2488 || (delta < 0 && -delta < size))
2489 return true;
2491 return false;
2494 /* Check whether the address of memory reference MEM2 equals exactly
2495 the address of memory reference MEM1 plus DELTA. Return true if
2496 we can prove this to be the case, false otherwise. */
2498 bool
2499 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2501 rtx addr1, addr2, addr_delta;
2503 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2504 return false;
2506 addr1 = XEXP (mem1, 0);
2507 addr2 = XEXP (mem2, 0);
2509 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2510 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2511 return false;
2513 return true;
2516 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2518 void
2519 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2520 rtx *operands)
2522 machine_mode wmode = mode;
2523 rtx dst = operands[0];
2524 rtx src1 = operands[1];
2525 rtx src2 = operands[2];
2526 rtx op, clob, tem;
2528 /* If we cannot handle the operation directly, use a temp register. */
2529 if (!s390_logical_operator_ok_p (operands))
2530 dst = gen_reg_rtx (mode);
2532 /* QImode and HImode patterns make sense only if we have a destination
2533 in memory. Otherwise perform the operation in SImode. */
2534 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2535 wmode = SImode;
2537 /* Widen operands if required. */
2538 if (mode != wmode)
2540 if (GET_CODE (dst) == SUBREG
2541 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2542 dst = tem;
2543 else if (REG_P (dst))
2544 dst = gen_rtx_SUBREG (wmode, dst, 0);
2545 else
2546 dst = gen_reg_rtx (wmode);
2548 if (GET_CODE (src1) == SUBREG
2549 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2550 src1 = tem;
2551 else if (GET_MODE (src1) != VOIDmode)
2552 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2554 if (GET_CODE (src2) == SUBREG
2555 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2556 src2 = tem;
2557 else if (GET_MODE (src2) != VOIDmode)
2558 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2561 /* Emit the instruction. */
2562 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2563 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2564 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2566 /* Fix up the destination if needed. */
2567 if (dst != operands[0])
2568 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2571 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2573 bool
2574 s390_logical_operator_ok_p (rtx *operands)
2576 /* If the destination operand is in memory, it needs to coincide
2577 with one of the source operands. After reload, it has to be
2578 the first source operand. */
2579 if (GET_CODE (operands[0]) == MEM)
2580 return rtx_equal_p (operands[0], operands[1])
2581 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2583 return true;
2586 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2587 operand IMMOP to switch from SS to SI type instructions. */
2589 void
2590 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2592 int def = code == AND ? -1 : 0;
2593 HOST_WIDE_INT mask;
2594 int part;
2596 gcc_assert (GET_CODE (*memop) == MEM);
2597 gcc_assert (!MEM_VOLATILE_P (*memop));
2599 mask = s390_extract_part (*immop, QImode, def);
2600 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2601 gcc_assert (part >= 0);
2603 *memop = adjust_address (*memop, QImode, part);
2604 *immop = gen_int_mode (mask, QImode);
2608 /* How to allocate a 'struct machine_function'. */
2610 static struct machine_function *
2611 s390_init_machine_status (void)
2613 return ggc_cleared_alloc<machine_function> ();
2616 /* Map for smallest class containing reg regno. */
2618 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2619 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2620 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2621 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2622 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2623 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2624 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2625 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2626 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2627 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2628 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2629 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2630 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2631 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2632 VEC_REGS, VEC_REGS /* 52 */
2635 /* Return attribute type of insn. */
2637 static enum attr_type
2638 s390_safe_attr_type (rtx_insn *insn)
2640 if (recog_memoized (insn) >= 0)
2641 return get_attr_type (insn);
2642 else
2643 return TYPE_NONE;
2646 /* Return true if DISP is a valid short displacement. */
2648 static bool
2649 s390_short_displacement (rtx disp)
2651 /* No displacement is OK. */
2652 if (!disp)
2653 return true;
2655 /* Without the long displacement facility we don't need to
2656 distingiush between long and short displacement. */
2657 if (!TARGET_LONG_DISPLACEMENT)
2658 return true;
2660 /* Integer displacement in range. */
2661 if (GET_CODE (disp) == CONST_INT)
2662 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2664 /* GOT offset is not OK, the GOT can be large. */
2665 if (GET_CODE (disp) == CONST
2666 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2667 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2668 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2669 return false;
2671 /* All other symbolic constants are literal pool references,
2672 which are OK as the literal pool must be small. */
2673 if (GET_CODE (disp) == CONST)
2674 return true;
2676 return false;
2679 /* Decompose a RTL expression ADDR for a memory address into
2680 its components, returned in OUT.
2682 Returns false if ADDR is not a valid memory address, true
2683 otherwise. If OUT is NULL, don't return the components,
2684 but check for validity only.
2686 Note: Only addresses in canonical form are recognized.
2687 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2688 canonical form so that they will be recognized. */
2690 static int
2691 s390_decompose_address (rtx addr, struct s390_address *out)
2693 HOST_WIDE_INT offset = 0;
2694 rtx base = NULL_RTX;
2695 rtx indx = NULL_RTX;
2696 rtx disp = NULL_RTX;
2697 rtx orig_disp;
2698 bool pointer = false;
2699 bool base_ptr = false;
2700 bool indx_ptr = false;
2701 bool literal_pool = false;
2703 /* We may need to substitute the literal pool base register into the address
2704 below. However, at this point we do not know which register is going to
2705 be used as base, so we substitute the arg pointer register. This is going
2706 to be treated as holding a pointer below -- it shouldn't be used for any
2707 other purpose. */
2708 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2710 /* Decompose address into base + index + displacement. */
2712 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2713 base = addr;
2715 else if (GET_CODE (addr) == PLUS)
2717 rtx op0 = XEXP (addr, 0);
2718 rtx op1 = XEXP (addr, 1);
2719 enum rtx_code code0 = GET_CODE (op0);
2720 enum rtx_code code1 = GET_CODE (op1);
2722 if (code0 == REG || code0 == UNSPEC)
2724 if (code1 == REG || code1 == UNSPEC)
2726 indx = op0; /* index + base */
2727 base = op1;
2730 else
2732 base = op0; /* base + displacement */
2733 disp = op1;
2737 else if (code0 == PLUS)
2739 indx = XEXP (op0, 0); /* index + base + disp */
2740 base = XEXP (op0, 1);
2741 disp = op1;
2744 else
2746 return false;
2750 else
2751 disp = addr; /* displacement */
2753 /* Extract integer part of displacement. */
2754 orig_disp = disp;
2755 if (disp)
2757 if (GET_CODE (disp) == CONST_INT)
2759 offset = INTVAL (disp);
2760 disp = NULL_RTX;
2762 else if (GET_CODE (disp) == CONST
2763 && GET_CODE (XEXP (disp, 0)) == PLUS
2764 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2766 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2767 disp = XEXP (XEXP (disp, 0), 0);
2771 /* Strip off CONST here to avoid special case tests later. */
2772 if (disp && GET_CODE (disp) == CONST)
2773 disp = XEXP (disp, 0);
2775 /* We can convert literal pool addresses to
2776 displacements by basing them off the base register. */
2777 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2779 /* Either base or index must be free to hold the base register. */
2780 if (!base)
2781 base = fake_pool_base, literal_pool = true;
2782 else if (!indx)
2783 indx = fake_pool_base, literal_pool = true;
2784 else
2785 return false;
2787 /* Mark up the displacement. */
2788 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2789 UNSPEC_LTREL_OFFSET);
2792 /* Validate base register. */
2793 if (base)
2795 if (GET_CODE (base) == UNSPEC)
2796 switch (XINT (base, 1))
2798 case UNSPEC_LTREF:
2799 if (!disp)
2800 disp = gen_rtx_UNSPEC (Pmode,
2801 gen_rtvec (1, XVECEXP (base, 0, 0)),
2802 UNSPEC_LTREL_OFFSET);
2803 else
2804 return false;
2806 base = XVECEXP (base, 0, 1);
2807 break;
2809 case UNSPEC_LTREL_BASE:
2810 if (XVECLEN (base, 0) == 1)
2811 base = fake_pool_base, literal_pool = true;
2812 else
2813 base = XVECEXP (base, 0, 1);
2814 break;
2816 default:
2817 return false;
2820 if (!REG_P (base) || GET_MODE (base) != Pmode)
2821 return false;
2823 if (REGNO (base) == STACK_POINTER_REGNUM
2824 || REGNO (base) == FRAME_POINTER_REGNUM
2825 || ((reload_completed || reload_in_progress)
2826 && frame_pointer_needed
2827 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
2828 || REGNO (base) == ARG_POINTER_REGNUM
2829 || (flag_pic
2830 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
2831 pointer = base_ptr = true;
2833 if ((reload_completed || reload_in_progress)
2834 && base == cfun->machine->base_reg)
2835 pointer = base_ptr = literal_pool = true;
2838 /* Validate index register. */
2839 if (indx)
2841 if (GET_CODE (indx) == UNSPEC)
2842 switch (XINT (indx, 1))
2844 case UNSPEC_LTREF:
2845 if (!disp)
2846 disp = gen_rtx_UNSPEC (Pmode,
2847 gen_rtvec (1, XVECEXP (indx, 0, 0)),
2848 UNSPEC_LTREL_OFFSET);
2849 else
2850 return false;
2852 indx = XVECEXP (indx, 0, 1);
2853 break;
2855 case UNSPEC_LTREL_BASE:
2856 if (XVECLEN (indx, 0) == 1)
2857 indx = fake_pool_base, literal_pool = true;
2858 else
2859 indx = XVECEXP (indx, 0, 1);
2860 break;
2862 default:
2863 return false;
2866 if (!REG_P (indx) || GET_MODE (indx) != Pmode)
2867 return false;
2869 if (REGNO (indx) == STACK_POINTER_REGNUM
2870 || REGNO (indx) == FRAME_POINTER_REGNUM
2871 || ((reload_completed || reload_in_progress)
2872 && frame_pointer_needed
2873 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
2874 || REGNO (indx) == ARG_POINTER_REGNUM
2875 || (flag_pic
2876 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
2877 pointer = indx_ptr = true;
2879 if ((reload_completed || reload_in_progress)
2880 && indx == cfun->machine->base_reg)
2881 pointer = indx_ptr = literal_pool = true;
2884 /* Prefer to use pointer as base, not index. */
2885 if (base && indx && !base_ptr
2886 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2888 rtx tmp = base;
2889 base = indx;
2890 indx = tmp;
2893 /* Validate displacement. */
2894 if (!disp)
2896 /* If virtual registers are involved, the displacement will change later
2897 anyway as the virtual registers get eliminated. This could make a
2898 valid displacement invalid, but it is more likely to make an invalid
2899 displacement valid, because we sometimes access the register save area
2900 via negative offsets to one of those registers.
2901 Thus we don't check the displacement for validity here. If after
2902 elimination the displacement turns out to be invalid after all,
2903 this is fixed up by reload in any case. */
2904 /* LRA maintains always displacements up to date and we need to
2905 know the displacement is right during all LRA not only at the
2906 final elimination. */
2907 if (lra_in_progress
2908 || (base != arg_pointer_rtx
2909 && indx != arg_pointer_rtx
2910 && base != return_address_pointer_rtx
2911 && indx != return_address_pointer_rtx
2912 && base != frame_pointer_rtx
2913 && indx != frame_pointer_rtx
2914 && base != virtual_stack_vars_rtx
2915 && indx != virtual_stack_vars_rtx))
2916 if (!DISP_IN_RANGE (offset))
2917 return false;
2919 else
2921 /* All the special cases are pointers. */
2922 pointer = true;
2924 /* In the small-PIC case, the linker converts @GOT
2925 and @GOTNTPOFF offsets to possible displacements. */
2926 if (GET_CODE (disp) == UNSPEC
2927 && (XINT (disp, 1) == UNSPEC_GOT
2928 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
2929 && flag_pic == 1)
2934 /* Accept pool label offsets. */
2935 else if (GET_CODE (disp) == UNSPEC
2936 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
2939 /* Accept literal pool references. */
2940 else if (GET_CODE (disp) == UNSPEC
2941 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
2943 /* In case CSE pulled a non literal pool reference out of
2944 the pool we have to reject the address. This is
2945 especially important when loading the GOT pointer on non
2946 zarch CPUs. In this case the literal pool contains an lt
2947 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
2948 will most likely exceed the displacement. */
2949 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2950 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
2951 return false;
2953 orig_disp = gen_rtx_CONST (Pmode, disp);
2954 if (offset)
2956 /* If we have an offset, make sure it does not
2957 exceed the size of the constant pool entry. */
2958 rtx sym = XVECEXP (disp, 0, 0);
2959 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
2960 return false;
2962 orig_disp = plus_constant (Pmode, orig_disp, offset);
2966 else
2967 return false;
2970 if (!base && !indx)
2971 pointer = true;
2973 if (out)
2975 out->base = base;
2976 out->indx = indx;
2977 out->disp = orig_disp;
2978 out->pointer = pointer;
2979 out->literal_pool = literal_pool;
2982 return true;
2985 /* Decompose a RTL expression OP for a shift count into its components,
2986 and return the base register in BASE and the offset in OFFSET.
2988 Return true if OP is a valid shift count, false if not. */
2990 bool
2991 s390_decompose_shift_count (rtx op, rtx *base, HOST_WIDE_INT *offset)
2993 rtx off = NULL_RTX;
2995 /* We can have an integer constant, an address register,
2996 or a sum of the two. */
2997 if (CONST_SCALAR_INT_P (op))
2999 off = op;
3000 op = NULL_RTX;
3002 if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3004 off = XEXP (op, 1);
3005 op = XEXP (op, 0);
3007 while (op && GET_CODE (op) == SUBREG)
3008 op = SUBREG_REG (op);
3010 if (op && GET_CODE (op) != REG)
3011 return false;
3013 if (offset)
3015 if (off == NULL_RTX)
3016 *offset = 0;
3017 else if (CONST_INT_P (off))
3018 *offset = INTVAL (off);
3019 else if (CONST_WIDE_INT_P (off))
3020 /* The offset will anyway be cut down to 12 bits so take just
3021 the lowest order chunk of the wide int. */
3022 *offset = CONST_WIDE_INT_ELT (off, 0);
3023 else
3024 gcc_unreachable ();
3026 if (base)
3027 *base = op;
3029 return true;
3033 /* Return true if CODE is a valid address without index. */
3035 bool
3036 s390_legitimate_address_without_index_p (rtx op)
3038 struct s390_address addr;
3040 if (!s390_decompose_address (XEXP (op, 0), &addr))
3041 return false;
3042 if (addr.indx)
3043 return false;
3045 return true;
3049 /* Return TRUE if ADDR is an operand valid for a load/store relative
3050 instruction. Be aware that the alignment of the operand needs to
3051 be checked separately.
3052 Valid addresses are single references or a sum of a reference and a
3053 constant integer. Return these parts in SYMREF and ADDEND. You can
3054 pass NULL in REF and/or ADDEND if you are not interested in these
3055 values. Literal pool references are *not* considered symbol
3056 references. */
3058 static bool
3059 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3061 HOST_WIDE_INT tmpaddend = 0;
3063 if (GET_CODE (addr) == CONST)
3064 addr = XEXP (addr, 0);
3066 if (GET_CODE (addr) == PLUS)
3068 if (!CONST_INT_P (XEXP (addr, 1)))
3069 return false;
3071 tmpaddend = INTVAL (XEXP (addr, 1));
3072 addr = XEXP (addr, 0);
3075 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
3076 || (GET_CODE (addr) == UNSPEC
3077 && (XINT (addr, 1) == UNSPEC_GOTENT
3078 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3080 if (symref)
3081 *symref = addr;
3082 if (addend)
3083 *addend = tmpaddend;
3085 return true;
3087 return false;
3090 /* Return true if the address in OP is valid for constraint letter C
3091 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3092 pool MEMs should be accepted. Only the Q, R, S, T constraint
3093 letters are allowed for C. */
3095 static int
3096 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3098 struct s390_address addr;
3099 bool decomposed = false;
3101 /* This check makes sure that no symbolic address (except literal
3102 pool references) are accepted by the R or T constraints. */
3103 if (s390_loadrelative_operand_p (op, NULL, NULL))
3104 return 0;
3106 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3107 if (!lit_pool_ok)
3109 if (!s390_decompose_address (op, &addr))
3110 return 0;
3111 if (addr.literal_pool)
3112 return 0;
3113 decomposed = true;
3116 switch (c)
3118 case 'Q': /* no index short displacement */
3119 if (!decomposed && !s390_decompose_address (op, &addr))
3120 return 0;
3121 if (addr.indx)
3122 return 0;
3123 if (!s390_short_displacement (addr.disp))
3124 return 0;
3125 break;
3127 case 'R': /* with index short displacement */
3128 if (TARGET_LONG_DISPLACEMENT)
3130 if (!decomposed && !s390_decompose_address (op, &addr))
3131 return 0;
3132 if (!s390_short_displacement (addr.disp))
3133 return 0;
3135 /* Any invalid address here will be fixed up by reload,
3136 so accept it for the most generic constraint. */
3137 break;
3139 case 'S': /* no index long displacement */
3140 if (!TARGET_LONG_DISPLACEMENT)
3141 return 0;
3142 if (!decomposed && !s390_decompose_address (op, &addr))
3143 return 0;
3144 if (addr.indx)
3145 return 0;
3146 if (s390_short_displacement (addr.disp))
3147 return 0;
3148 break;
3150 case 'T': /* with index long displacement */
3151 if (!TARGET_LONG_DISPLACEMENT)
3152 return 0;
3153 /* Any invalid address here will be fixed up by reload,
3154 so accept it for the most generic constraint. */
3155 if ((decomposed || s390_decompose_address (op, &addr))
3156 && s390_short_displacement (addr.disp))
3157 return 0;
3158 break;
3159 default:
3160 return 0;
3162 return 1;
3166 /* Evaluates constraint strings described by the regular expression
3167 ([A|B|Z](Q|R|S|T))|U|W|Y and returns 1 if OP is a valid operand for
3168 the constraint given in STR, or 0 else. */
3171 s390_mem_constraint (const char *str, rtx op)
3173 char c = str[0];
3175 switch (c)
3177 case 'A':
3178 /* Check for offsettable variants of memory constraints. */
3179 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3180 return 0;
3181 if ((reload_completed || reload_in_progress)
3182 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3183 return 0;
3184 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3185 case 'B':
3186 /* Check for non-literal-pool variants of memory constraints. */
3187 if (!MEM_P (op))
3188 return 0;
3189 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3190 case 'Q':
3191 case 'R':
3192 case 'S':
3193 case 'T':
3194 if (GET_CODE (op) != MEM)
3195 return 0;
3196 return s390_check_qrst_address (c, XEXP (op, 0), true);
3197 case 'U':
3198 return (s390_check_qrst_address ('Q', op, true)
3199 || s390_check_qrst_address ('R', op, true));
3200 case 'W':
3201 return (s390_check_qrst_address ('S', op, true)
3202 || s390_check_qrst_address ('T', op, true));
3203 case 'Y':
3204 /* Simply check for the basic form of a shift count. Reload will
3205 take care of making sure we have a proper base register. */
3206 if (!s390_decompose_shift_count (op, NULL, NULL))
3207 return 0;
3208 break;
3209 case 'Z':
3210 return s390_check_qrst_address (str[1], op, true);
3211 default:
3212 return 0;
3214 return 1;
3218 /* Evaluates constraint strings starting with letter O. Input
3219 parameter C is the second letter following the "O" in the constraint
3220 string. Returns 1 if VALUE meets the respective constraint and 0
3221 otherwise. */
3224 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3226 if (!TARGET_EXTIMM)
3227 return 0;
3229 switch (c)
3231 case 's':
3232 return trunc_int_for_mode (value, SImode) == value;
3234 case 'p':
3235 return value == 0
3236 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3238 case 'n':
3239 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3241 default:
3242 gcc_unreachable ();
3247 /* Evaluates constraint strings starting with letter N. Parameter STR
3248 contains the letters following letter "N" in the constraint string.
3249 Returns true if VALUE matches the constraint. */
3252 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3254 machine_mode mode, part_mode;
3255 int def;
3256 int part, part_goal;
3259 if (str[0] == 'x')
3260 part_goal = -1;
3261 else
3262 part_goal = str[0] - '0';
3264 switch (str[1])
3266 case 'Q':
3267 part_mode = QImode;
3268 break;
3269 case 'H':
3270 part_mode = HImode;
3271 break;
3272 case 'S':
3273 part_mode = SImode;
3274 break;
3275 default:
3276 return 0;
3279 switch (str[2])
3281 case 'H':
3282 mode = HImode;
3283 break;
3284 case 'S':
3285 mode = SImode;
3286 break;
3287 case 'D':
3288 mode = DImode;
3289 break;
3290 default:
3291 return 0;
3294 switch (str[3])
3296 case '0':
3297 def = 0;
3298 break;
3299 case 'F':
3300 def = -1;
3301 break;
3302 default:
3303 return 0;
3306 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3307 return 0;
3309 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3310 if (part < 0)
3311 return 0;
3312 if (part_goal != -1 && part_goal != part)
3313 return 0;
3315 return 1;
3319 /* Returns true if the input parameter VALUE is a float zero. */
3322 s390_float_const_zero_p (rtx value)
3324 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3325 && value == CONST0_RTX (GET_MODE (value)));
3328 /* Implement TARGET_REGISTER_MOVE_COST. */
3330 static int
3331 s390_register_move_cost (machine_mode mode,
3332 reg_class_t from, reg_class_t to)
3334 /* On s390, copy between fprs and gprs is expensive. */
3336 /* It becomes somewhat faster having ldgr/lgdr. */
3337 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3339 /* ldgr is single cycle. */
3340 if (reg_classes_intersect_p (from, GENERAL_REGS)
3341 && reg_classes_intersect_p (to, FP_REGS))
3342 return 1;
3343 /* lgdr needs 3 cycles. */
3344 if (reg_classes_intersect_p (to, GENERAL_REGS)
3345 && reg_classes_intersect_p (from, FP_REGS))
3346 return 3;
3349 /* Otherwise copying is done via memory. */
3350 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3351 && reg_classes_intersect_p (to, FP_REGS))
3352 || (reg_classes_intersect_p (from, FP_REGS)
3353 && reg_classes_intersect_p (to, GENERAL_REGS)))
3354 return 10;
3356 return 1;
3359 /* Implement TARGET_MEMORY_MOVE_COST. */
3361 static int
3362 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3363 reg_class_t rclass ATTRIBUTE_UNUSED,
3364 bool in ATTRIBUTE_UNUSED)
3366 return 2;
3369 /* Compute a (partial) cost for rtx X. Return true if the complete
3370 cost has been computed, and false if subexpressions should be
3371 scanned. In either case, *TOTAL contains the cost result.
3372 OUTER_CODE contains the code of the superexpression of x. */
3374 static bool
3375 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3376 int opno ATTRIBUTE_UNUSED,
3377 int *total, bool speed ATTRIBUTE_UNUSED)
3379 int code = GET_CODE (x);
3380 switch (code)
3382 case CONST:
3383 case CONST_INT:
3384 case LABEL_REF:
3385 case SYMBOL_REF:
3386 case CONST_DOUBLE:
3387 case CONST_WIDE_INT:
3388 case MEM:
3389 *total = 0;
3390 return true;
3392 case IOR:
3393 /* risbg */
3394 if (GET_CODE (XEXP (x, 0)) == AND
3395 && GET_CODE (XEXP (x, 1)) == ASHIFT
3396 && REG_P (XEXP (XEXP (x, 0), 0))
3397 && REG_P (XEXP (XEXP (x, 1), 0))
3398 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3399 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3400 && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3401 (1UL << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3403 *total = COSTS_N_INSNS (2);
3404 return true;
3406 case ASHIFT:
3407 case ASHIFTRT:
3408 case LSHIFTRT:
3409 case ROTATE:
3410 case ROTATERT:
3411 case AND:
3412 case XOR:
3413 case NEG:
3414 case NOT:
3415 *total = COSTS_N_INSNS (1);
3416 return false;
3418 case PLUS:
3419 case MINUS:
3420 *total = COSTS_N_INSNS (1);
3421 return false;
3423 case MULT:
3424 switch (mode)
3426 case SImode:
3428 rtx left = XEXP (x, 0);
3429 rtx right = XEXP (x, 1);
3430 if (GET_CODE (right) == CONST_INT
3431 && CONST_OK_FOR_K (INTVAL (right)))
3432 *total = s390_cost->mhi;
3433 else if (GET_CODE (left) == SIGN_EXTEND)
3434 *total = s390_cost->mh;
3435 else
3436 *total = s390_cost->ms; /* msr, ms, msy */
3437 break;
3439 case DImode:
3441 rtx left = XEXP (x, 0);
3442 rtx right = XEXP (x, 1);
3443 if (TARGET_ZARCH)
3445 if (GET_CODE (right) == CONST_INT
3446 && CONST_OK_FOR_K (INTVAL (right)))
3447 *total = s390_cost->mghi;
3448 else if (GET_CODE (left) == SIGN_EXTEND)
3449 *total = s390_cost->msgf;
3450 else
3451 *total = s390_cost->msg; /* msgr, msg */
3453 else /* TARGET_31BIT */
3455 if (GET_CODE (left) == SIGN_EXTEND
3456 && GET_CODE (right) == SIGN_EXTEND)
3457 /* mulsidi case: mr, m */
3458 *total = s390_cost->m;
3459 else if (GET_CODE (left) == ZERO_EXTEND
3460 && GET_CODE (right) == ZERO_EXTEND
3461 && TARGET_CPU_ZARCH)
3462 /* umulsidi case: ml, mlr */
3463 *total = s390_cost->ml;
3464 else
3465 /* Complex calculation is required. */
3466 *total = COSTS_N_INSNS (40);
3468 break;
3470 case SFmode:
3471 case DFmode:
3472 *total = s390_cost->mult_df;
3473 break;
3474 case TFmode:
3475 *total = s390_cost->mxbr;
3476 break;
3477 default:
3478 return false;
3480 return false;
3482 case FMA:
3483 switch (mode)
3485 case DFmode:
3486 *total = s390_cost->madbr;
3487 break;
3488 case SFmode:
3489 *total = s390_cost->maebr;
3490 break;
3491 default:
3492 return false;
3494 /* Negate in the third argument is free: FMSUB. */
3495 if (GET_CODE (XEXP (x, 2)) == NEG)
3497 *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3498 + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3499 + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3500 return true;
3502 return false;
3504 case UDIV:
3505 case UMOD:
3506 if (mode == TImode) /* 128 bit division */
3507 *total = s390_cost->dlgr;
3508 else if (mode == DImode)
3510 rtx right = XEXP (x, 1);
3511 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3512 *total = s390_cost->dlr;
3513 else /* 64 by 64 bit division */
3514 *total = s390_cost->dlgr;
3516 else if (mode == SImode) /* 32 bit division */
3517 *total = s390_cost->dlr;
3518 return false;
3520 case DIV:
3521 case MOD:
3522 if (mode == DImode)
3524 rtx right = XEXP (x, 1);
3525 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3526 if (TARGET_ZARCH)
3527 *total = s390_cost->dsgfr;
3528 else
3529 *total = s390_cost->dr;
3530 else /* 64 by 64 bit division */
3531 *total = s390_cost->dsgr;
3533 else if (mode == SImode) /* 32 bit division */
3534 *total = s390_cost->dlr;
3535 else if (mode == SFmode)
3537 *total = s390_cost->debr;
3539 else if (mode == DFmode)
3541 *total = s390_cost->ddbr;
3543 else if (mode == TFmode)
3545 *total = s390_cost->dxbr;
3547 return false;
3549 case SQRT:
3550 if (mode == SFmode)
3551 *total = s390_cost->sqebr;
3552 else if (mode == DFmode)
3553 *total = s390_cost->sqdbr;
3554 else /* TFmode */
3555 *total = s390_cost->sqxbr;
3556 return false;
3558 case SIGN_EXTEND:
3559 case ZERO_EXTEND:
3560 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3561 || outer_code == PLUS || outer_code == MINUS
3562 || outer_code == COMPARE)
3563 *total = 0;
3564 return false;
3566 case COMPARE:
3567 *total = COSTS_N_INSNS (1);
3568 if (GET_CODE (XEXP (x, 0)) == AND
3569 && GET_CODE (XEXP (x, 1)) == CONST_INT
3570 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3572 rtx op0 = XEXP (XEXP (x, 0), 0);
3573 rtx op1 = XEXP (XEXP (x, 0), 1);
3574 rtx op2 = XEXP (x, 1);
3576 if (memory_operand (op0, GET_MODE (op0))
3577 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3578 return true;
3579 if (register_operand (op0, GET_MODE (op0))
3580 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3581 return true;
3583 return false;
3585 default:
3586 return false;
3590 /* Return the cost of an address rtx ADDR. */
3592 static int
3593 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3594 addr_space_t as ATTRIBUTE_UNUSED,
3595 bool speed ATTRIBUTE_UNUSED)
3597 struct s390_address ad;
3598 if (!s390_decompose_address (addr, &ad))
3599 return 1000;
3601 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3604 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3605 otherwise return 0. */
3608 tls_symbolic_operand (rtx op)
3610 if (GET_CODE (op) != SYMBOL_REF)
3611 return 0;
3612 return SYMBOL_REF_TLS_MODEL (op);
3615 /* Split DImode access register reference REG (on 64-bit) into its constituent
3616 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3617 gen_highpart cannot be used as they assume all registers are word-sized,
3618 while our access registers have only half that size. */
3620 void
3621 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3623 gcc_assert (TARGET_64BIT);
3624 gcc_assert (ACCESS_REG_P (reg));
3625 gcc_assert (GET_MODE (reg) == DImode);
3626 gcc_assert (!(REGNO (reg) & 1));
3628 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3629 *hi = gen_rtx_REG (SImode, REGNO (reg));
3632 /* Return true if OP contains a symbol reference */
3634 bool
3635 symbolic_reference_mentioned_p (rtx op)
3637 const char *fmt;
3638 int i;
3640 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3641 return 1;
3643 fmt = GET_RTX_FORMAT (GET_CODE (op));
3644 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3646 if (fmt[i] == 'E')
3648 int j;
3650 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3651 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3652 return 1;
3655 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3656 return 1;
3659 return 0;
3662 /* Return true if OP contains a reference to a thread-local symbol. */
3664 bool
3665 tls_symbolic_reference_mentioned_p (rtx op)
3667 const char *fmt;
3668 int i;
3670 if (GET_CODE (op) == SYMBOL_REF)
3671 return tls_symbolic_operand (op);
3673 fmt = GET_RTX_FORMAT (GET_CODE (op));
3674 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3676 if (fmt[i] == 'E')
3678 int j;
3680 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3681 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3682 return true;
3685 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3686 return true;
3689 return false;
3693 /* Return true if OP is a legitimate general operand when
3694 generating PIC code. It is given that flag_pic is on
3695 and that OP satisfies CONSTANT_P. */
3698 legitimate_pic_operand_p (rtx op)
3700 /* Accept all non-symbolic constants. */
3701 if (!SYMBOLIC_CONST (op))
3702 return 1;
3704 /* Reject everything else; must be handled
3705 via emit_symbolic_move. */
3706 return 0;
3709 /* Returns true if the constant value OP is a legitimate general operand.
3710 It is given that OP satisfies CONSTANT_P. */
3712 static bool
3713 s390_legitimate_constant_p (machine_mode mode, rtx op)
3715 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3717 if (GET_MODE_SIZE (mode) != 16)
3718 return 0;
3720 if (!satisfies_constraint_j00 (op)
3721 && !satisfies_constraint_jm1 (op)
3722 && !satisfies_constraint_jKK (op)
3723 && !satisfies_constraint_jxx (op)
3724 && !satisfies_constraint_jyy (op))
3725 return 0;
3728 /* Accept all non-symbolic constants. */
3729 if (!SYMBOLIC_CONST (op))
3730 return 1;
3732 /* Accept immediate LARL operands. */
3733 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
3734 return 1;
3736 /* Thread-local symbols are never legal constants. This is
3737 so that emit_call knows that computing such addresses
3738 might require a function call. */
3739 if (TLS_SYMBOLIC_CONST (op))
3740 return 0;
3742 /* In the PIC case, symbolic constants must *not* be
3743 forced into the literal pool. We accept them here,
3744 so that they will be handled by emit_symbolic_move. */
3745 if (flag_pic)
3746 return 1;
3748 /* All remaining non-PIC symbolic constants are
3749 forced into the literal pool. */
3750 return 0;
3753 /* Determine if it's legal to put X into the constant pool. This
3754 is not possible if X contains the address of a symbol that is
3755 not constant (TLS) or not known at final link time (PIC). */
3757 static bool
3758 s390_cannot_force_const_mem (machine_mode mode, rtx x)
3760 switch (GET_CODE (x))
3762 case CONST_INT:
3763 case CONST_DOUBLE:
3764 case CONST_WIDE_INT:
3765 case CONST_VECTOR:
3766 /* Accept all non-symbolic constants. */
3767 return false;
3769 case LABEL_REF:
3770 /* Labels are OK iff we are non-PIC. */
3771 return flag_pic != 0;
3773 case SYMBOL_REF:
3774 /* 'Naked' TLS symbol references are never OK,
3775 non-TLS symbols are OK iff we are non-PIC. */
3776 if (tls_symbolic_operand (x))
3777 return true;
3778 else
3779 return flag_pic != 0;
3781 case CONST:
3782 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
3783 case PLUS:
3784 case MINUS:
3785 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
3786 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
3788 case UNSPEC:
3789 switch (XINT (x, 1))
3791 /* Only lt-relative or GOT-relative UNSPECs are OK. */
3792 case UNSPEC_LTREL_OFFSET:
3793 case UNSPEC_GOT:
3794 case UNSPEC_GOTOFF:
3795 case UNSPEC_PLTOFF:
3796 case UNSPEC_TLSGD:
3797 case UNSPEC_TLSLDM:
3798 case UNSPEC_NTPOFF:
3799 case UNSPEC_DTPOFF:
3800 case UNSPEC_GOTNTPOFF:
3801 case UNSPEC_INDNTPOFF:
3802 return false;
3804 /* If the literal pool shares the code section, be put
3805 execute template placeholders into the pool as well. */
3806 case UNSPEC_INSN:
3807 return TARGET_CPU_ZARCH;
3809 default:
3810 return true;
3812 break;
3814 default:
3815 gcc_unreachable ();
3819 /* Returns true if the constant value OP is a legitimate general
3820 operand during and after reload. The difference to
3821 legitimate_constant_p is that this function will not accept
3822 a constant that would need to be forced to the literal pool
3823 before it can be used as operand.
3824 This function accepts all constants which can be loaded directly
3825 into a GPR. */
3827 bool
3828 legitimate_reload_constant_p (rtx op)
3830 /* Accept la(y) operands. */
3831 if (GET_CODE (op) == CONST_INT
3832 && DISP_IN_RANGE (INTVAL (op)))
3833 return true;
3835 /* Accept l(g)hi/l(g)fi operands. */
3836 if (GET_CODE (op) == CONST_INT
3837 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
3838 return true;
3840 /* Accept lliXX operands. */
3841 if (TARGET_ZARCH
3842 && GET_CODE (op) == CONST_INT
3843 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3844 && s390_single_part (op, word_mode, HImode, 0) >= 0)
3845 return true;
3847 if (TARGET_EXTIMM
3848 && GET_CODE (op) == CONST_INT
3849 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3850 && s390_single_part (op, word_mode, SImode, 0) >= 0)
3851 return true;
3853 /* Accept larl operands. */
3854 if (TARGET_CPU_ZARCH
3855 && larl_operand (op, VOIDmode))
3856 return true;
3858 /* Accept floating-point zero operands that fit into a single GPR. */
3859 if (GET_CODE (op) == CONST_DOUBLE
3860 && s390_float_const_zero_p (op)
3861 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
3862 return true;
3864 /* Accept double-word operands that can be split. */
3865 if (GET_CODE (op) == CONST_WIDE_INT
3866 || (GET_CODE (op) == CONST_INT
3867 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
3869 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
3870 rtx hi = operand_subword (op, 0, 0, dword_mode);
3871 rtx lo = operand_subword (op, 1, 0, dword_mode);
3872 return legitimate_reload_constant_p (hi)
3873 && legitimate_reload_constant_p (lo);
3876 /* Everything else cannot be handled without reload. */
3877 return false;
3880 /* Returns true if the constant value OP is a legitimate fp operand
3881 during and after reload.
3882 This function accepts all constants which can be loaded directly
3883 into an FPR. */
3885 static bool
3886 legitimate_reload_fp_constant_p (rtx op)
3888 /* Accept floating-point zero operands if the load zero instruction
3889 can be used. Prior to z196 the load fp zero instruction caused a
3890 performance penalty if the result is used as BFP number. */
3891 if (TARGET_Z196
3892 && GET_CODE (op) == CONST_DOUBLE
3893 && s390_float_const_zero_p (op))
3894 return true;
3896 return false;
3899 /* Returns true if the constant value OP is a legitimate vector operand
3900 during and after reload.
3901 This function accepts all constants which can be loaded directly
3902 into an VR. */
3904 static bool
3905 legitimate_reload_vector_constant_p (rtx op)
3907 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
3908 && (satisfies_constraint_j00 (op)
3909 || satisfies_constraint_jm1 (op)
3910 || satisfies_constraint_jKK (op)
3911 || satisfies_constraint_jxx (op)
3912 || satisfies_constraint_jyy (op)))
3913 return true;
3915 return false;
3918 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
3919 return the class of reg to actually use. */
3921 static reg_class_t
3922 s390_preferred_reload_class (rtx op, reg_class_t rclass)
3924 switch (GET_CODE (op))
3926 /* Constants we cannot reload into general registers
3927 must be forced into the literal pool. */
3928 case CONST_VECTOR:
3929 case CONST_DOUBLE:
3930 case CONST_INT:
3931 case CONST_WIDE_INT:
3932 if (reg_class_subset_p (GENERAL_REGS, rclass)
3933 && legitimate_reload_constant_p (op))
3934 return GENERAL_REGS;
3935 else if (reg_class_subset_p (ADDR_REGS, rclass)
3936 && legitimate_reload_constant_p (op))
3937 return ADDR_REGS;
3938 else if (reg_class_subset_p (FP_REGS, rclass)
3939 && legitimate_reload_fp_constant_p (op))
3940 return FP_REGS;
3941 else if (reg_class_subset_p (VEC_REGS, rclass)
3942 && legitimate_reload_vector_constant_p (op))
3943 return VEC_REGS;
3945 return NO_REGS;
3947 /* If a symbolic constant or a PLUS is reloaded,
3948 it is most likely being used as an address, so
3949 prefer ADDR_REGS. If 'class' is not a superset
3950 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
3951 case CONST:
3952 /* Symrefs cannot be pushed into the literal pool with -fPIC
3953 so we *MUST NOT* return NO_REGS for these cases
3954 (s390_cannot_force_const_mem will return true).
3956 On the other hand we MUST return NO_REGS for symrefs with
3957 invalid addend which might have been pushed to the literal
3958 pool (no -fPIC). Usually we would expect them to be
3959 handled via secondary reload but this does not happen if
3960 they are used as literal pool slot replacement in reload
3961 inheritance (see emit_input_reload_insns). */
3962 if (TARGET_CPU_ZARCH
3963 && GET_CODE (XEXP (op, 0)) == PLUS
3964 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
3965 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
3967 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
3968 return ADDR_REGS;
3969 else
3970 return NO_REGS;
3972 /* fallthrough */
3973 case LABEL_REF:
3974 case SYMBOL_REF:
3975 if (!legitimate_reload_constant_p (op))
3976 return NO_REGS;
3977 /* fallthrough */
3978 case PLUS:
3979 /* load address will be used. */
3980 if (reg_class_subset_p (ADDR_REGS, rclass))
3981 return ADDR_REGS;
3982 else
3983 return NO_REGS;
3985 default:
3986 break;
3989 return rclass;
3992 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
3993 multiple of ALIGNMENT and the SYMBOL_REF being naturally
3994 aligned. */
3996 bool
3997 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
3999 HOST_WIDE_INT addend;
4000 rtx symref;
4002 /* The "required alignment" might be 0 (e.g. for certain structs
4003 accessed via BLKmode). Early abort in this case, as well as when
4004 an alignment > 8 is required. */
4005 if (alignment < 2 || alignment > 8)
4006 return false;
4008 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4009 return false;
4011 if (addend & (alignment - 1))
4012 return false;
4014 if (GET_CODE (symref) == SYMBOL_REF)
4016 /* We have load-relative instructions for 2-byte, 4-byte, and
4017 8-byte alignment so allow only these. */
4018 switch (alignment)
4020 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4021 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4022 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4023 default: return false;
4027 if (GET_CODE (symref) == UNSPEC
4028 && alignment <= UNITS_PER_LONG)
4029 return true;
4031 return false;
4034 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4035 operand SCRATCH is used to reload the even part of the address and
4036 adding one. */
4038 void
4039 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4041 HOST_WIDE_INT addend;
4042 rtx symref;
4044 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4045 gcc_unreachable ();
4047 if (!(addend & 1))
4048 /* Easy case. The addend is even so larl will do fine. */
4049 emit_move_insn (reg, addr);
4050 else
4052 /* We can leave the scratch register untouched if the target
4053 register is a valid base register. */
4054 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4055 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4056 scratch = reg;
4058 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4059 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4061 if (addend != 1)
4062 emit_move_insn (scratch,
4063 gen_rtx_CONST (Pmode,
4064 gen_rtx_PLUS (Pmode, symref,
4065 GEN_INT (addend - 1))));
4066 else
4067 emit_move_insn (scratch, symref);
4069 /* Increment the address using la in order to avoid clobbering cc. */
4070 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4074 /* Generate what is necessary to move between REG and MEM using
4075 SCRATCH. The direction is given by TOMEM. */
4077 void
4078 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4080 /* Reload might have pulled a constant out of the literal pool.
4081 Force it back in. */
4082 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4083 || GET_CODE (mem) == CONST_WIDE_INT
4084 || GET_CODE (mem) == CONST_VECTOR
4085 || GET_CODE (mem) == CONST)
4086 mem = force_const_mem (GET_MODE (reg), mem);
4088 gcc_assert (MEM_P (mem));
4090 /* For a load from memory we can leave the scratch register
4091 untouched if the target register is a valid base register. */
4092 if (!tomem
4093 && REGNO (reg) < FIRST_PSEUDO_REGISTER
4094 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4095 && GET_MODE (reg) == GET_MODE (scratch))
4096 scratch = reg;
4098 /* Load address into scratch register. Since we can't have a
4099 secondary reload for a secondary reload we have to cover the case
4100 where larl would need a secondary reload here as well. */
4101 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4103 /* Now we can use a standard load/store to do the move. */
4104 if (tomem)
4105 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4106 else
4107 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4110 /* Inform reload about cases where moving X with a mode MODE to a register in
4111 RCLASS requires an extra scratch or immediate register. Return the class
4112 needed for the immediate register. */
4114 static reg_class_t
4115 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4116 machine_mode mode, secondary_reload_info *sri)
4118 enum reg_class rclass = (enum reg_class) rclass_i;
4120 /* Intermediate register needed. */
4121 if (reg_classes_intersect_p (CC_REGS, rclass))
4122 return GENERAL_REGS;
4124 if (TARGET_VX)
4126 /* The vst/vl vector move instructions allow only for short
4127 displacements. */
4128 if (MEM_P (x)
4129 && GET_CODE (XEXP (x, 0)) == PLUS
4130 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4131 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4132 && reg_class_subset_p (rclass, VEC_REGS)
4133 && (!reg_class_subset_p (rclass, FP_REGS)
4134 || (GET_MODE_SIZE (mode) > 8
4135 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4137 if (in_p)
4138 sri->icode = (TARGET_64BIT ?
4139 CODE_FOR_reloaddi_la_in :
4140 CODE_FOR_reloadsi_la_in);
4141 else
4142 sri->icode = (TARGET_64BIT ?
4143 CODE_FOR_reloaddi_la_out :
4144 CODE_FOR_reloadsi_la_out);
4148 if (TARGET_Z10)
4150 HOST_WIDE_INT offset;
4151 rtx symref;
4153 /* On z10 several optimizer steps may generate larl operands with
4154 an odd addend. */
4155 if (in_p
4156 && s390_loadrelative_operand_p (x, &symref, &offset)
4157 && mode == Pmode
4158 && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4159 && (offset & 1) == 1)
4160 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4161 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4163 /* Handle all the (mem (symref)) accesses we cannot use the z10
4164 instructions for. */
4165 if (MEM_P (x)
4166 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4167 && (mode == QImode
4168 || !reg_class_subset_p (rclass, GENERAL_REGS)
4169 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4170 || !s390_check_symref_alignment (XEXP (x, 0),
4171 GET_MODE_SIZE (mode))))
4173 #define __SECONDARY_RELOAD_CASE(M,m) \
4174 case M##mode: \
4175 if (TARGET_64BIT) \
4176 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4177 CODE_FOR_reload##m##di_tomem_z10; \
4178 else \
4179 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4180 CODE_FOR_reload##m##si_tomem_z10; \
4181 break;
4183 switch (GET_MODE (x))
4185 __SECONDARY_RELOAD_CASE (QI, qi);
4186 __SECONDARY_RELOAD_CASE (HI, hi);
4187 __SECONDARY_RELOAD_CASE (SI, si);
4188 __SECONDARY_RELOAD_CASE (DI, di);
4189 __SECONDARY_RELOAD_CASE (TI, ti);
4190 __SECONDARY_RELOAD_CASE (SF, sf);
4191 __SECONDARY_RELOAD_CASE (DF, df);
4192 __SECONDARY_RELOAD_CASE (TF, tf);
4193 __SECONDARY_RELOAD_CASE (SD, sd);
4194 __SECONDARY_RELOAD_CASE (DD, dd);
4195 __SECONDARY_RELOAD_CASE (TD, td);
4196 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4197 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4198 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4199 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4200 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4201 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4202 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4203 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4204 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4205 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4206 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4207 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4208 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4209 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4210 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4211 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4212 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4213 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4214 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4215 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4216 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4217 default:
4218 gcc_unreachable ();
4220 #undef __SECONDARY_RELOAD_CASE
4224 /* We need a scratch register when loading a PLUS expression which
4225 is not a legitimate operand of the LOAD ADDRESS instruction. */
4226 /* LRA can deal with transformation of plus op very well -- so we
4227 don't need to prompt LRA in this case. */
4228 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4229 sri->icode = (TARGET_64BIT ?
4230 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4232 /* Performing a multiword move from or to memory we have to make sure the
4233 second chunk in memory is addressable without causing a displacement
4234 overflow. If that would be the case we calculate the address in
4235 a scratch register. */
4236 if (MEM_P (x)
4237 && GET_CODE (XEXP (x, 0)) == PLUS
4238 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4239 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4240 + GET_MODE_SIZE (mode) - 1))
4242 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4243 in a s_operand address since we may fallback to lm/stm. So we only
4244 have to care about overflows in the b+i+d case. */
4245 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4246 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4247 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4248 /* For FP_REGS no lm/stm is available so this check is triggered
4249 for displacement overflows in b+i+d and b+d like addresses. */
4250 || (reg_classes_intersect_p (FP_REGS, rclass)
4251 && s390_class_max_nregs (FP_REGS, mode) > 1))
4253 if (in_p)
4254 sri->icode = (TARGET_64BIT ?
4255 CODE_FOR_reloaddi_la_in :
4256 CODE_FOR_reloadsi_la_in);
4257 else
4258 sri->icode = (TARGET_64BIT ?
4259 CODE_FOR_reloaddi_la_out :
4260 CODE_FOR_reloadsi_la_out);
4264 /* A scratch address register is needed when a symbolic constant is
4265 copied to r0 compiling with -fPIC. In other cases the target
4266 register might be used as temporary (see legitimize_pic_address). */
4267 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4268 sri->icode = (TARGET_64BIT ?
4269 CODE_FOR_reloaddi_PIC_addr :
4270 CODE_FOR_reloadsi_PIC_addr);
4272 /* Either scratch or no register needed. */
4273 return NO_REGS;
4276 /* Generate code to load SRC, which is PLUS that is not a
4277 legitimate operand for the LA instruction, into TARGET.
4278 SCRATCH may be used as scratch register. */
4280 void
4281 s390_expand_plus_operand (rtx target, rtx src,
4282 rtx scratch)
4284 rtx sum1, sum2;
4285 struct s390_address ad;
4287 /* src must be a PLUS; get its two operands. */
4288 gcc_assert (GET_CODE (src) == PLUS);
4289 gcc_assert (GET_MODE (src) == Pmode);
4291 /* Check if any of the two operands is already scheduled
4292 for replacement by reload. This can happen e.g. when
4293 float registers occur in an address. */
4294 sum1 = find_replacement (&XEXP (src, 0));
4295 sum2 = find_replacement (&XEXP (src, 1));
4296 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4298 /* If the address is already strictly valid, there's nothing to do. */
4299 if (!s390_decompose_address (src, &ad)
4300 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4301 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4303 /* Otherwise, one of the operands cannot be an address register;
4304 we reload its value into the scratch register. */
4305 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4307 emit_move_insn (scratch, sum1);
4308 sum1 = scratch;
4310 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4312 emit_move_insn (scratch, sum2);
4313 sum2 = scratch;
4316 /* According to the way these invalid addresses are generated
4317 in reload.c, it should never happen (at least on s390) that
4318 *neither* of the PLUS components, after find_replacements
4319 was applied, is an address register. */
4320 if (sum1 == scratch && sum2 == scratch)
4322 debug_rtx (src);
4323 gcc_unreachable ();
4326 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4329 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4330 is only ever performed on addresses, so we can mark the
4331 sum as legitimate for LA in any case. */
4332 s390_load_address (target, src);
4336 /* Return true if ADDR is a valid memory address.
4337 STRICT specifies whether strict register checking applies. */
4339 static bool
4340 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4342 struct s390_address ad;
4344 if (TARGET_Z10
4345 && larl_operand (addr, VOIDmode)
4346 && (mode == VOIDmode
4347 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4348 return true;
4350 if (!s390_decompose_address (addr, &ad))
4351 return false;
4353 if (strict)
4355 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4356 return false;
4358 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4359 return false;
4361 else
4363 if (ad.base
4364 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4365 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4366 return false;
4368 if (ad.indx
4369 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4370 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4371 return false;
4373 return true;
4376 /* Return true if OP is a valid operand for the LA instruction.
4377 In 31-bit, we need to prove that the result is used as an
4378 address, as LA performs only a 31-bit addition. */
4380 bool
4381 legitimate_la_operand_p (rtx op)
4383 struct s390_address addr;
4384 if (!s390_decompose_address (op, &addr))
4385 return false;
4387 return (TARGET_64BIT || addr.pointer);
4390 /* Return true if it is valid *and* preferable to use LA to
4391 compute the sum of OP1 and OP2. */
4393 bool
4394 preferred_la_operand_p (rtx op1, rtx op2)
4396 struct s390_address addr;
4398 if (op2 != const0_rtx)
4399 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4401 if (!s390_decompose_address (op1, &addr))
4402 return false;
4403 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4404 return false;
4405 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4406 return false;
4408 /* Avoid LA instructions with index register on z196; it is
4409 preferable to use regular add instructions when possible.
4410 Starting with zEC12 the la with index register is "uncracked"
4411 again. */
4412 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
4413 return false;
4415 if (!TARGET_64BIT && !addr.pointer)
4416 return false;
4418 if (addr.pointer)
4419 return true;
4421 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4422 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4423 return true;
4425 return false;
4428 /* Emit a forced load-address operation to load SRC into DST.
4429 This will use the LOAD ADDRESS instruction even in situations
4430 where legitimate_la_operand_p (SRC) returns false. */
4432 void
4433 s390_load_address (rtx dst, rtx src)
4435 if (TARGET_64BIT)
4436 emit_move_insn (dst, src);
4437 else
4438 emit_insn (gen_force_la_31 (dst, src));
4441 /* Return a legitimate reference for ORIG (an address) using the
4442 register REG. If REG is 0, a new pseudo is generated.
4444 There are two types of references that must be handled:
4446 1. Global data references must load the address from the GOT, via
4447 the PIC reg. An insn is emitted to do this load, and the reg is
4448 returned.
4450 2. Static data references, constant pool addresses, and code labels
4451 compute the address as an offset from the GOT, whose base is in
4452 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4453 differentiate them from global data objects. The returned
4454 address is the PIC reg + an unspec constant.
4456 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4457 reg also appears in the address. */
4460 legitimize_pic_address (rtx orig, rtx reg)
4462 rtx addr = orig;
4463 rtx addend = const0_rtx;
4464 rtx new_rtx = orig;
4466 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4468 if (GET_CODE (addr) == CONST)
4469 addr = XEXP (addr, 0);
4471 if (GET_CODE (addr) == PLUS)
4473 addend = XEXP (addr, 1);
4474 addr = XEXP (addr, 0);
4477 if ((GET_CODE (addr) == LABEL_REF
4478 || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr))
4479 || (GET_CODE (addr) == UNSPEC &&
4480 (XINT (addr, 1) == UNSPEC_GOTENT
4481 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
4482 && GET_CODE (addend) == CONST_INT)
4484 /* This can be locally addressed. */
4486 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4487 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4488 gen_rtx_CONST (Pmode, addr) : addr);
4490 if (TARGET_CPU_ZARCH
4491 && larl_operand (const_addr, VOIDmode)
4492 && INTVAL (addend) < (HOST_WIDE_INT)1 << 31
4493 && INTVAL (addend) >= -((HOST_WIDE_INT)1 << 31))
4495 if (INTVAL (addend) & 1)
4497 /* LARL can't handle odd offsets, so emit a pair of LARL
4498 and LA. */
4499 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4501 if (!DISP_IN_RANGE (INTVAL (addend)))
4503 HOST_WIDE_INT even = INTVAL (addend) - 1;
4504 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4505 addr = gen_rtx_CONST (Pmode, addr);
4506 addend = const1_rtx;
4509 emit_move_insn (temp, addr);
4510 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4512 if (reg != 0)
4514 s390_load_address (reg, new_rtx);
4515 new_rtx = reg;
4518 else
4520 /* If the offset is even, we can just use LARL. This
4521 will happen automatically. */
4524 else
4526 /* No larl - Access local symbols relative to the GOT. */
4528 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4530 if (reload_in_progress || reload_completed)
4531 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4533 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4534 if (addend != const0_rtx)
4535 addr = gen_rtx_PLUS (Pmode, addr, addend);
4536 addr = gen_rtx_CONST (Pmode, addr);
4537 addr = force_const_mem (Pmode, addr);
4538 emit_move_insn (temp, addr);
4540 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4541 if (reg != 0)
4543 s390_load_address (reg, new_rtx);
4544 new_rtx = reg;
4548 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4550 /* A non-local symbol reference without addend.
4552 The symbol ref is wrapped into an UNSPEC to make sure the
4553 proper operand modifier (@GOT or @GOTENT) will be emitted.
4554 This will tell the linker to put the symbol into the GOT.
4556 Additionally the code dereferencing the GOT slot is emitted here.
4558 An addend to the symref needs to be added afterwards.
4559 legitimize_pic_address calls itself recursively to handle
4560 that case. So no need to do it here. */
4562 if (reg == 0)
4563 reg = gen_reg_rtx (Pmode);
4565 if (TARGET_Z10)
4567 /* Use load relative if possible.
4568 lgrl <target>, sym@GOTENT */
4569 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4570 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4571 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4573 emit_move_insn (reg, new_rtx);
4574 new_rtx = reg;
4576 else if (flag_pic == 1)
4578 /* Assume GOT offset is a valid displacement operand (< 4k
4579 or < 512k with z990). This is handled the same way in
4580 both 31- and 64-bit code (@GOT).
4581 lg <target>, sym@GOT(r12) */
4583 if (reload_in_progress || reload_completed)
4584 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4586 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4587 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4588 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4589 new_rtx = gen_const_mem (Pmode, new_rtx);
4590 emit_move_insn (reg, new_rtx);
4591 new_rtx = reg;
4593 else if (TARGET_CPU_ZARCH)
4595 /* If the GOT offset might be >= 4k, we determine the position
4596 of the GOT entry via a PC-relative LARL (@GOTENT).
4597 larl temp, sym@GOTENT
4598 lg <target>, 0(temp) */
4600 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4602 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4603 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4605 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4606 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4607 emit_move_insn (temp, new_rtx);
4609 new_rtx = gen_const_mem (Pmode, temp);
4610 emit_move_insn (reg, new_rtx);
4612 new_rtx = reg;
4614 else
4616 /* If the GOT offset might be >= 4k, we have to load it
4617 from the literal pool (@GOT).
4619 lg temp, lit-litbase(r13)
4620 lg <target>, 0(temp)
4621 lit: .long sym@GOT */
4623 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4625 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4626 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4628 if (reload_in_progress || reload_completed)
4629 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4631 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4632 addr = gen_rtx_CONST (Pmode, addr);
4633 addr = force_const_mem (Pmode, addr);
4634 emit_move_insn (temp, addr);
4636 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4637 new_rtx = gen_const_mem (Pmode, new_rtx);
4638 emit_move_insn (reg, new_rtx);
4639 new_rtx = reg;
4642 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
4644 gcc_assert (XVECLEN (addr, 0) == 1);
4645 switch (XINT (addr, 1))
4647 /* These address symbols (or PLT slots) relative to the GOT
4648 (not GOT slots!). In general this will exceed the
4649 displacement range so these value belong into the literal
4650 pool. */
4651 case UNSPEC_GOTOFF:
4652 case UNSPEC_PLTOFF:
4653 new_rtx = force_const_mem (Pmode, orig);
4654 break;
4656 /* For -fPIC the GOT size might exceed the displacement
4657 range so make sure the value is in the literal pool. */
4658 case UNSPEC_GOT:
4659 if (flag_pic == 2)
4660 new_rtx = force_const_mem (Pmode, orig);
4661 break;
4663 /* For @GOTENT larl is used. This is handled like local
4664 symbol refs. */
4665 case UNSPEC_GOTENT:
4666 gcc_unreachable ();
4667 break;
4669 /* @PLT is OK as is on 64-bit, must be converted to
4670 GOT-relative @PLTOFF on 31-bit. */
4671 case UNSPEC_PLT:
4672 if (!TARGET_CPU_ZARCH)
4674 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4676 if (reload_in_progress || reload_completed)
4677 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4679 addr = XVECEXP (addr, 0, 0);
4680 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
4681 UNSPEC_PLTOFF);
4682 if (addend != const0_rtx)
4683 addr = gen_rtx_PLUS (Pmode, addr, addend);
4684 addr = gen_rtx_CONST (Pmode, addr);
4685 addr = force_const_mem (Pmode, addr);
4686 emit_move_insn (temp, addr);
4688 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4689 if (reg != 0)
4691 s390_load_address (reg, new_rtx);
4692 new_rtx = reg;
4695 else
4696 /* On 64 bit larl can be used. This case is handled like
4697 local symbol refs. */
4698 gcc_unreachable ();
4699 break;
4701 /* Everything else cannot happen. */
4702 default:
4703 gcc_unreachable ();
4706 else if (addend != const0_rtx)
4708 /* Otherwise, compute the sum. */
4710 rtx base = legitimize_pic_address (addr, reg);
4711 new_rtx = legitimize_pic_address (addend,
4712 base == reg ? NULL_RTX : reg);
4713 if (GET_CODE (new_rtx) == CONST_INT)
4714 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
4715 else
4717 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
4719 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
4720 new_rtx = XEXP (new_rtx, 1);
4722 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
4725 if (GET_CODE (new_rtx) == CONST)
4726 new_rtx = XEXP (new_rtx, 0);
4727 new_rtx = force_operand (new_rtx, 0);
4730 return new_rtx;
4733 /* Load the thread pointer into a register. */
4736 s390_get_thread_pointer (void)
4738 rtx tp = gen_reg_rtx (Pmode);
4740 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
4741 mark_reg_pointer (tp, BITS_PER_WORD);
4743 return tp;
4746 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
4747 in s390_tls_symbol which always refers to __tls_get_offset.
4748 The returned offset is written to RESULT_REG and an USE rtx is
4749 generated for TLS_CALL. */
4751 static GTY(()) rtx s390_tls_symbol;
4753 static void
4754 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
4756 rtx insn;
4758 if (!flag_pic)
4759 emit_insn (s390_load_got ());
4761 if (!s390_tls_symbol)
4762 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
4764 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
4765 gen_rtx_REG (Pmode, RETURN_REGNUM));
4767 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
4768 RTL_CONST_CALL_P (insn) = 1;
4771 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4772 this (thread-local) address. REG may be used as temporary. */
4774 static rtx
4775 legitimize_tls_address (rtx addr, rtx reg)
4777 rtx new_rtx, tls_call, temp, base, r2, insn;
4779 if (GET_CODE (addr) == SYMBOL_REF)
4780 switch (tls_symbolic_operand (addr))
4782 case TLS_MODEL_GLOBAL_DYNAMIC:
4783 start_sequence ();
4784 r2 = gen_rtx_REG (Pmode, 2);
4785 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
4786 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4787 new_rtx = force_const_mem (Pmode, new_rtx);
4788 emit_move_insn (r2, new_rtx);
4789 s390_emit_tls_call_insn (r2, tls_call);
4790 insn = get_insns ();
4791 end_sequence ();
4793 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4794 temp = gen_reg_rtx (Pmode);
4795 emit_libcall_block (insn, temp, r2, new_rtx);
4797 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4798 if (reg != 0)
4800 s390_load_address (reg, new_rtx);
4801 new_rtx = reg;
4803 break;
4805 case TLS_MODEL_LOCAL_DYNAMIC:
4806 start_sequence ();
4807 r2 = gen_rtx_REG (Pmode, 2);
4808 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
4809 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4810 new_rtx = force_const_mem (Pmode, new_rtx);
4811 emit_move_insn (r2, new_rtx);
4812 s390_emit_tls_call_insn (r2, tls_call);
4813 insn = get_insns ();
4814 end_sequence ();
4816 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
4817 temp = gen_reg_rtx (Pmode);
4818 emit_libcall_block (insn, temp, r2, new_rtx);
4820 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4821 base = gen_reg_rtx (Pmode);
4822 s390_load_address (base, new_rtx);
4824 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
4825 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4826 new_rtx = force_const_mem (Pmode, new_rtx);
4827 temp = gen_reg_rtx (Pmode);
4828 emit_move_insn (temp, new_rtx);
4830 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
4831 if (reg != 0)
4833 s390_load_address (reg, new_rtx);
4834 new_rtx = reg;
4836 break;
4838 case TLS_MODEL_INITIAL_EXEC:
4839 if (flag_pic == 1)
4841 /* Assume GOT offset < 4k. This is handled the same way
4842 in both 31- and 64-bit code. */
4844 if (reload_in_progress || reload_completed)
4845 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4847 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4848 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4849 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4850 new_rtx = gen_const_mem (Pmode, new_rtx);
4851 temp = gen_reg_rtx (Pmode);
4852 emit_move_insn (temp, new_rtx);
4854 else if (TARGET_CPU_ZARCH)
4856 /* If the GOT offset might be >= 4k, we determine the position
4857 of the GOT entry via a PC-relative LARL. */
4859 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4860 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4861 temp = gen_reg_rtx (Pmode);
4862 emit_move_insn (temp, new_rtx);
4864 new_rtx = gen_const_mem (Pmode, temp);
4865 temp = gen_reg_rtx (Pmode);
4866 emit_move_insn (temp, new_rtx);
4868 else if (flag_pic)
4870 /* If the GOT offset might be >= 4k, we have to load it
4871 from the literal pool. */
4873 if (reload_in_progress || reload_completed)
4874 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4876 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4877 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4878 new_rtx = force_const_mem (Pmode, new_rtx);
4879 temp = gen_reg_rtx (Pmode);
4880 emit_move_insn (temp, new_rtx);
4882 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4883 new_rtx = gen_const_mem (Pmode, new_rtx);
4885 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4886 temp = gen_reg_rtx (Pmode);
4887 emit_insn (gen_rtx_SET (temp, new_rtx));
4889 else
4891 /* In position-dependent code, load the absolute address of
4892 the GOT entry from the literal pool. */
4894 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4895 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4896 new_rtx = force_const_mem (Pmode, new_rtx);
4897 temp = gen_reg_rtx (Pmode);
4898 emit_move_insn (temp, new_rtx);
4900 new_rtx = temp;
4901 new_rtx = gen_const_mem (Pmode, new_rtx);
4902 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4903 temp = gen_reg_rtx (Pmode);
4904 emit_insn (gen_rtx_SET (temp, new_rtx));
4907 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4908 if (reg != 0)
4910 s390_load_address (reg, new_rtx);
4911 new_rtx = reg;
4913 break;
4915 case TLS_MODEL_LOCAL_EXEC:
4916 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4917 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4918 new_rtx = force_const_mem (Pmode, new_rtx);
4919 temp = gen_reg_rtx (Pmode);
4920 emit_move_insn (temp, new_rtx);
4922 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4923 if (reg != 0)
4925 s390_load_address (reg, new_rtx);
4926 new_rtx = reg;
4928 break;
4930 default:
4931 gcc_unreachable ();
4934 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
4936 switch (XINT (XEXP (addr, 0), 1))
4938 case UNSPEC_INDNTPOFF:
4939 gcc_assert (TARGET_CPU_ZARCH);
4940 new_rtx = addr;
4941 break;
4943 default:
4944 gcc_unreachable ();
4948 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
4949 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4951 new_rtx = XEXP (XEXP (addr, 0), 0);
4952 if (GET_CODE (new_rtx) != SYMBOL_REF)
4953 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4955 new_rtx = legitimize_tls_address (new_rtx, reg);
4956 new_rtx = plus_constant (Pmode, new_rtx,
4957 INTVAL (XEXP (XEXP (addr, 0), 1)));
4958 new_rtx = force_operand (new_rtx, 0);
4961 else
4962 gcc_unreachable (); /* for now ... */
4964 return new_rtx;
4967 /* Emit insns making the address in operands[1] valid for a standard
4968 move to operands[0]. operands[1] is replaced by an address which
4969 should be used instead of the former RTX to emit the move
4970 pattern. */
4972 void
4973 emit_symbolic_move (rtx *operands)
4975 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
4977 if (GET_CODE (operands[0]) == MEM)
4978 operands[1] = force_reg (Pmode, operands[1]);
4979 else if (TLS_SYMBOLIC_CONST (operands[1]))
4980 operands[1] = legitimize_tls_address (operands[1], temp);
4981 else if (flag_pic)
4982 operands[1] = legitimize_pic_address (operands[1], temp);
4985 /* Try machine-dependent ways of modifying an illegitimate address X
4986 to be legitimate. If we find one, return the new, valid address.
4988 OLDX is the address as it was before break_out_memory_refs was called.
4989 In some cases it is useful to look at this to decide what needs to be done.
4991 MODE is the mode of the operand pointed to by X.
4993 When -fpic is used, special handling is needed for symbolic references.
4994 See comments by legitimize_pic_address for details. */
4996 static rtx
4997 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4998 machine_mode mode ATTRIBUTE_UNUSED)
5000 rtx constant_term = const0_rtx;
5002 if (TLS_SYMBOLIC_CONST (x))
5004 x = legitimize_tls_address (x, 0);
5006 if (s390_legitimate_address_p (mode, x, FALSE))
5007 return x;
5009 else if (GET_CODE (x) == PLUS
5010 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5011 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5013 return x;
5015 else if (flag_pic)
5017 if (SYMBOLIC_CONST (x)
5018 || (GET_CODE (x) == PLUS
5019 && (SYMBOLIC_CONST (XEXP (x, 0))
5020 || SYMBOLIC_CONST (XEXP (x, 1)))))
5021 x = legitimize_pic_address (x, 0);
5023 if (s390_legitimate_address_p (mode, x, FALSE))
5024 return x;
5027 x = eliminate_constant_term (x, &constant_term);
5029 /* Optimize loading of large displacements by splitting them
5030 into the multiple of 4K and the rest; this allows the
5031 former to be CSE'd if possible.
5033 Don't do this if the displacement is added to a register
5034 pointing into the stack frame, as the offsets will
5035 change later anyway. */
5037 if (GET_CODE (constant_term) == CONST_INT
5038 && !TARGET_LONG_DISPLACEMENT
5039 && !DISP_IN_RANGE (INTVAL (constant_term))
5040 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5042 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5043 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5045 rtx temp = gen_reg_rtx (Pmode);
5046 rtx val = force_operand (GEN_INT (upper), temp);
5047 if (val != temp)
5048 emit_move_insn (temp, val);
5050 x = gen_rtx_PLUS (Pmode, x, temp);
5051 constant_term = GEN_INT (lower);
5054 if (GET_CODE (x) == PLUS)
5056 if (GET_CODE (XEXP (x, 0)) == REG)
5058 rtx temp = gen_reg_rtx (Pmode);
5059 rtx val = force_operand (XEXP (x, 1), temp);
5060 if (val != temp)
5061 emit_move_insn (temp, val);
5063 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5066 else if (GET_CODE (XEXP (x, 1)) == REG)
5068 rtx temp = gen_reg_rtx (Pmode);
5069 rtx val = force_operand (XEXP (x, 0), temp);
5070 if (val != temp)
5071 emit_move_insn (temp, val);
5073 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5077 if (constant_term != const0_rtx)
5078 x = gen_rtx_PLUS (Pmode, x, constant_term);
5080 return x;
5083 /* Try a machine-dependent way of reloading an illegitimate address AD
5084 operand. If we find one, push the reload and return the new address.
5086 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5087 and TYPE is the reload type of the current reload. */
5090 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5091 int opnum, int type)
5093 if (!optimize || TARGET_LONG_DISPLACEMENT)
5094 return NULL_RTX;
5096 if (GET_CODE (ad) == PLUS)
5098 rtx tem = simplify_binary_operation (PLUS, Pmode,
5099 XEXP (ad, 0), XEXP (ad, 1));
5100 if (tem)
5101 ad = tem;
5104 if (GET_CODE (ad) == PLUS
5105 && GET_CODE (XEXP (ad, 0)) == REG
5106 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5107 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5109 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5110 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5111 rtx cst, tem, new_rtx;
5113 cst = GEN_INT (upper);
5114 if (!legitimate_reload_constant_p (cst))
5115 cst = force_const_mem (Pmode, cst);
5117 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5118 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5120 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5121 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5122 opnum, (enum reload_type) type);
5123 return new_rtx;
5126 return NULL_RTX;
5129 /* Emit code to move LEN bytes from DST to SRC. */
5131 bool
5132 s390_expand_movmem (rtx dst, rtx src, rtx len)
5134 /* When tuning for z10 or higher we rely on the Glibc functions to
5135 do the right thing. Only for constant lengths below 64k we will
5136 generate inline code. */
5137 if (s390_tune >= PROCESSOR_2097_Z10
5138 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5139 return false;
5141 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5143 if (INTVAL (len) > 0)
5144 emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
5147 else if (TARGET_MVCLE)
5149 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5152 else
5154 rtx dst_addr, src_addr, count, blocks, temp;
5155 rtx_code_label *loop_start_label = gen_label_rtx ();
5156 rtx_code_label *loop_end_label = gen_label_rtx ();
5157 rtx_code_label *end_label = gen_label_rtx ();
5158 machine_mode mode;
5160 mode = GET_MODE (len);
5161 if (mode == VOIDmode)
5162 mode = Pmode;
5164 dst_addr = gen_reg_rtx (Pmode);
5165 src_addr = gen_reg_rtx (Pmode);
5166 count = gen_reg_rtx (mode);
5167 blocks = gen_reg_rtx (mode);
5169 convert_move (count, len, 1);
5170 emit_cmp_and_jump_insns (count, const0_rtx,
5171 EQ, NULL_RTX, mode, 1, end_label);
5173 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5174 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5175 dst = change_address (dst, VOIDmode, dst_addr);
5176 src = change_address (src, VOIDmode, src_addr);
5178 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5179 OPTAB_DIRECT);
5180 if (temp != count)
5181 emit_move_insn (count, temp);
5183 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5184 OPTAB_DIRECT);
5185 if (temp != blocks)
5186 emit_move_insn (blocks, temp);
5188 emit_cmp_and_jump_insns (blocks, const0_rtx,
5189 EQ, NULL_RTX, mode, 1, loop_end_label);
5191 emit_label (loop_start_label);
5193 if (TARGET_Z10
5194 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5196 rtx prefetch;
5198 /* Issue a read prefetch for the +3 cache line. */
5199 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5200 const0_rtx, const0_rtx);
5201 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5202 emit_insn (prefetch);
5204 /* Issue a write prefetch for the +3 cache line. */
5205 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5206 const1_rtx, const0_rtx);
5207 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5208 emit_insn (prefetch);
5211 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
5212 s390_load_address (dst_addr,
5213 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5214 s390_load_address (src_addr,
5215 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5217 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5218 OPTAB_DIRECT);
5219 if (temp != blocks)
5220 emit_move_insn (blocks, temp);
5222 emit_cmp_and_jump_insns (blocks, const0_rtx,
5223 EQ, NULL_RTX, mode, 1, loop_end_label);
5225 emit_jump (loop_start_label);
5226 emit_label (loop_end_label);
5228 emit_insn (gen_movmem_short (dst, src,
5229 convert_to_mode (Pmode, count, 1)));
5230 emit_label (end_label);
5232 return true;
5235 /* Emit code to set LEN bytes at DST to VAL.
5236 Make use of clrmem if VAL is zero. */
5238 void
5239 s390_expand_setmem (rtx dst, rtx len, rtx val)
5241 if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0)
5242 return;
5244 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5246 if (GET_CODE (len) == CONST_INT && INTVAL (len) > 0 && INTVAL (len) <= 257)
5248 if (val == const0_rtx && INTVAL (len) <= 256)
5249 emit_insn (gen_clrmem_short (dst, GEN_INT (INTVAL (len) - 1)));
5250 else
5252 /* Initialize memory by storing the first byte. */
5253 emit_move_insn (adjust_address (dst, QImode, 0), val);
5255 if (INTVAL (len) > 1)
5257 /* Initiate 1 byte overlap move.
5258 The first byte of DST is propagated through DSTP1.
5259 Prepare a movmem for: DST+1 = DST (length = LEN - 1).
5260 DST is set to size 1 so the rest of the memory location
5261 does not count as source operand. */
5262 rtx dstp1 = adjust_address (dst, VOIDmode, 1);
5263 set_mem_size (dst, 1);
5265 emit_insn (gen_movmem_short (dstp1, dst,
5266 GEN_INT (INTVAL (len) - 2)));
5271 else if (TARGET_MVCLE)
5273 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5274 if (TARGET_64BIT)
5275 emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5276 val));
5277 else
5278 emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5279 val));
5282 else
5284 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5285 rtx_code_label *loop_start_label = gen_label_rtx ();
5286 rtx_code_label *loop_end_label = gen_label_rtx ();
5287 rtx_code_label *end_label = gen_label_rtx ();
5288 machine_mode mode;
5290 mode = GET_MODE (len);
5291 if (mode == VOIDmode)
5292 mode = Pmode;
5294 dst_addr = gen_reg_rtx (Pmode);
5295 count = gen_reg_rtx (mode);
5296 blocks = gen_reg_rtx (mode);
5298 convert_move (count, len, 1);
5299 emit_cmp_and_jump_insns (count, const0_rtx,
5300 EQ, NULL_RTX, mode, 1, end_label);
5302 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5303 dst = change_address (dst, VOIDmode, dst_addr);
5305 if (val == const0_rtx)
5306 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5307 OPTAB_DIRECT);
5308 else
5310 dstp1 = adjust_address (dst, VOIDmode, 1);
5311 set_mem_size (dst, 1);
5313 /* Initialize memory by storing the first byte. */
5314 emit_move_insn (adjust_address (dst, QImode, 0), val);
5316 /* If count is 1 we are done. */
5317 emit_cmp_and_jump_insns (count, const1_rtx,
5318 EQ, NULL_RTX, mode, 1, end_label);
5320 temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
5321 OPTAB_DIRECT);
5323 if (temp != count)
5324 emit_move_insn (count, temp);
5326 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5327 OPTAB_DIRECT);
5328 if (temp != blocks)
5329 emit_move_insn (blocks, temp);
5331 emit_cmp_and_jump_insns (blocks, const0_rtx,
5332 EQ, NULL_RTX, mode, 1, loop_end_label);
5334 emit_label (loop_start_label);
5336 if (TARGET_Z10
5337 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
5339 /* Issue a write prefetch for the +4 cache line. */
5340 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
5341 GEN_INT (1024)),
5342 const1_rtx, const0_rtx);
5343 emit_insn (prefetch);
5344 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5347 if (val == const0_rtx)
5348 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5349 else
5350 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255)));
5351 s390_load_address (dst_addr,
5352 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5354 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5355 OPTAB_DIRECT);
5356 if (temp != blocks)
5357 emit_move_insn (blocks, temp);
5359 emit_cmp_and_jump_insns (blocks, const0_rtx,
5360 EQ, NULL_RTX, mode, 1, loop_end_label);
5362 emit_jump (loop_start_label);
5363 emit_label (loop_end_label);
5365 if (val == const0_rtx)
5366 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5367 else
5368 emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1)));
5369 emit_label (end_label);
5373 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5374 and return the result in TARGET. */
5376 bool
5377 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5379 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5380 rtx tmp;
5382 /* When tuning for z10 or higher we rely on the Glibc functions to
5383 do the right thing. Only for constant lengths below 64k we will
5384 generate inline code. */
5385 if (s390_tune >= PROCESSOR_2097_Z10
5386 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5387 return false;
5389 /* As the result of CMPINT is inverted compared to what we need,
5390 we have to swap the operands. */
5391 tmp = op0; op0 = op1; op1 = tmp;
5393 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5395 if (INTVAL (len) > 0)
5397 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5398 emit_insn (gen_cmpint (target, ccreg));
5400 else
5401 emit_move_insn (target, const0_rtx);
5403 else if (TARGET_MVCLE)
5405 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5406 emit_insn (gen_cmpint (target, ccreg));
5408 else
5410 rtx addr0, addr1, count, blocks, temp;
5411 rtx_code_label *loop_start_label = gen_label_rtx ();
5412 rtx_code_label *loop_end_label = gen_label_rtx ();
5413 rtx_code_label *end_label = gen_label_rtx ();
5414 machine_mode mode;
5416 mode = GET_MODE (len);
5417 if (mode == VOIDmode)
5418 mode = Pmode;
5420 addr0 = gen_reg_rtx (Pmode);
5421 addr1 = gen_reg_rtx (Pmode);
5422 count = gen_reg_rtx (mode);
5423 blocks = gen_reg_rtx (mode);
5425 convert_move (count, len, 1);
5426 emit_cmp_and_jump_insns (count, const0_rtx,
5427 EQ, NULL_RTX, mode, 1, end_label);
5429 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5430 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5431 op0 = change_address (op0, VOIDmode, addr0);
5432 op1 = change_address (op1, VOIDmode, addr1);
5434 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5435 OPTAB_DIRECT);
5436 if (temp != count)
5437 emit_move_insn (count, temp);
5439 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5440 OPTAB_DIRECT);
5441 if (temp != blocks)
5442 emit_move_insn (blocks, temp);
5444 emit_cmp_and_jump_insns (blocks, const0_rtx,
5445 EQ, NULL_RTX, mode, 1, loop_end_label);
5447 emit_label (loop_start_label);
5449 if (TARGET_Z10
5450 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5452 rtx prefetch;
5454 /* Issue a read prefetch for the +2 cache line of operand 1. */
5455 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5456 const0_rtx, const0_rtx);
5457 emit_insn (prefetch);
5458 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5460 /* Issue a read prefetch for the +2 cache line of operand 2. */
5461 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5462 const0_rtx, const0_rtx);
5463 emit_insn (prefetch);
5464 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5467 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5468 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5469 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5470 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5471 temp = gen_rtx_SET (pc_rtx, temp);
5472 emit_jump_insn (temp);
5474 s390_load_address (addr0,
5475 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5476 s390_load_address (addr1,
5477 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5479 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5480 OPTAB_DIRECT);
5481 if (temp != blocks)
5482 emit_move_insn (blocks, temp);
5484 emit_cmp_and_jump_insns (blocks, const0_rtx,
5485 EQ, NULL_RTX, mode, 1, loop_end_label);
5487 emit_jump (loop_start_label);
5488 emit_label (loop_end_label);
5490 emit_insn (gen_cmpmem_short (op0, op1,
5491 convert_to_mode (Pmode, count, 1)));
5492 emit_label (end_label);
5494 emit_insn (gen_cmpint (target, ccreg));
5496 return true;
5499 /* Emit a conditional jump to LABEL for condition code mask MASK using
5500 comparsion operator COMPARISON. Return the emitted jump insn. */
5502 static rtx
5503 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5505 rtx temp;
5507 gcc_assert (comparison == EQ || comparison == NE);
5508 gcc_assert (mask > 0 && mask < 15);
5510 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5511 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5512 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5513 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5514 temp = gen_rtx_SET (pc_rtx, temp);
5515 return emit_jump_insn (temp);
5518 /* Emit the instructions to implement strlen of STRING and store the
5519 result in TARGET. The string has the known ALIGNMENT. This
5520 version uses vector instructions and is therefore not appropriate
5521 for targets prior to z13. */
5523 void
5524 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5526 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5527 int very_likely = REG_BR_PROB_BASE - 1;
5528 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5529 rtx str_reg = gen_reg_rtx (V16QImode);
5530 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5531 rtx str_idx_reg = gen_reg_rtx (Pmode);
5532 rtx result_reg = gen_reg_rtx (V16QImode);
5533 rtx is_aligned_label = gen_label_rtx ();
5534 rtx into_loop_label = NULL_RTX;
5535 rtx loop_start_label = gen_label_rtx ();
5536 rtx temp;
5537 rtx len = gen_reg_rtx (QImode);
5538 rtx cond;
5540 s390_load_address (str_addr_base_reg, XEXP (string, 0));
5541 emit_move_insn (str_idx_reg, const0_rtx);
5543 if (INTVAL (alignment) < 16)
5545 /* Check whether the address happens to be aligned properly so
5546 jump directly to the aligned loop. */
5547 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5548 str_addr_base_reg, GEN_INT (15)),
5549 const0_rtx, EQ, NULL_RTX,
5550 Pmode, 1, is_aligned_label);
5552 temp = gen_reg_rtx (Pmode);
5553 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5554 GEN_INT (15), temp, 1, OPTAB_DIRECT);
5555 gcc_assert (REG_P (temp));
5556 highest_index_to_load_reg =
5557 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5558 highest_index_to_load_reg, 1, OPTAB_DIRECT);
5559 gcc_assert (REG_P (highest_index_to_load_reg));
5560 emit_insn (gen_vllv16qi (str_reg,
5561 convert_to_mode (SImode, highest_index_to_load_reg, 1),
5562 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5564 into_loop_label = gen_label_rtx ();
5565 s390_emit_jump (into_loop_label, NULL_RTX);
5566 emit_barrier ();
5569 emit_label (is_aligned_label);
5570 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5572 /* Reaching this point we are only performing 16 bytes aligned
5573 loads. */
5574 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5576 emit_label (loop_start_label);
5577 LABEL_NUSES (loop_start_label) = 1;
5579 /* Load 16 bytes of the string into VR. */
5580 emit_move_insn (str_reg,
5581 gen_rtx_MEM (V16QImode,
5582 gen_rtx_PLUS (Pmode, str_idx_reg,
5583 str_addr_base_reg)));
5584 if (into_loop_label != NULL_RTX)
5586 emit_label (into_loop_label);
5587 LABEL_NUSES (into_loop_label) = 1;
5590 /* Increment string index by 16 bytes. */
5591 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5592 str_idx_reg, 1, OPTAB_DIRECT);
5594 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5595 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5597 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5598 REG_BR_PROB, very_likely);
5599 emit_insn (gen_vec_extractv16qi (len, result_reg, GEN_INT (7)));
5601 /* If the string pointer wasn't aligned we have loaded less then 16
5602 bytes and the remaining bytes got filled with zeros (by vll).
5603 Now we have to check whether the resulting index lies within the
5604 bytes actually part of the string. */
5606 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5607 highest_index_to_load_reg);
5608 s390_load_address (highest_index_to_load_reg,
5609 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
5610 const1_rtx));
5611 if (TARGET_64BIT)
5612 emit_insn (gen_movdicc (str_idx_reg, cond,
5613 highest_index_to_load_reg, str_idx_reg));
5614 else
5615 emit_insn (gen_movsicc (str_idx_reg, cond,
5616 highest_index_to_load_reg, str_idx_reg));
5618 add_int_reg_note (s390_emit_jump (is_aligned_label, cond), REG_BR_PROB,
5619 very_unlikely);
5621 expand_binop (Pmode, add_optab, str_idx_reg,
5622 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
5623 /* FIXME: len is already zero extended - so avoid the llgcr emitted
5624 here. */
5625 temp = expand_binop (Pmode, add_optab, str_idx_reg,
5626 convert_to_mode (Pmode, len, 1),
5627 target, 1, OPTAB_DIRECT);
5628 if (temp != target)
5629 emit_move_insn (target, temp);
5632 void
5633 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
5635 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5636 rtx temp = gen_reg_rtx (Pmode);
5637 rtx src_addr = XEXP (src, 0);
5638 rtx dst_addr = XEXP (dst, 0);
5639 rtx src_addr_reg = gen_reg_rtx (Pmode);
5640 rtx dst_addr_reg = gen_reg_rtx (Pmode);
5641 rtx offset = gen_reg_rtx (Pmode);
5642 rtx vsrc = gen_reg_rtx (V16QImode);
5643 rtx vpos = gen_reg_rtx (V16QImode);
5644 rtx loadlen = gen_reg_rtx (SImode);
5645 rtx gpos_qi = gen_reg_rtx(QImode);
5646 rtx gpos = gen_reg_rtx (SImode);
5647 rtx done_label = gen_label_rtx ();
5648 rtx loop_label = gen_label_rtx ();
5649 rtx exit_label = gen_label_rtx ();
5650 rtx full_label = gen_label_rtx ();
5652 /* Perform a quick check for string ending on the first up to 16
5653 bytes and exit early if successful. */
5655 emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
5656 emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
5657 emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
5658 emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7)));
5659 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5660 /* gpos is the byte index if a zero was found and 16 otherwise.
5661 So if it is lower than the loaded bytes we have a hit. */
5662 emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
5663 full_label);
5664 emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
5666 force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
5667 1, OPTAB_DIRECT);
5668 emit_jump (exit_label);
5669 emit_barrier ();
5671 emit_label (full_label);
5672 LABEL_NUSES (full_label) = 1;
5674 /* Calculate `offset' so that src + offset points to the last byte
5675 before 16 byte alignment. */
5677 /* temp = src_addr & 0xf */
5678 force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
5679 1, OPTAB_DIRECT);
5681 /* offset = 0xf - temp */
5682 emit_move_insn (offset, GEN_INT (15));
5683 force_expand_binop (Pmode, sub_optab, offset, temp, offset,
5684 1, OPTAB_DIRECT);
5686 /* Store `offset' bytes in the dstination string. The quick check
5687 has loaded at least `offset' bytes into vsrc. */
5689 emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
5691 /* Advance to the next byte to be loaded. */
5692 force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
5693 1, OPTAB_DIRECT);
5695 /* Make sure the addresses are single regs which can be used as a
5696 base. */
5697 emit_move_insn (src_addr_reg, src_addr);
5698 emit_move_insn (dst_addr_reg, dst_addr);
5700 /* MAIN LOOP */
5702 emit_label (loop_label);
5703 LABEL_NUSES (loop_label) = 1;
5705 emit_move_insn (vsrc,
5706 gen_rtx_MEM (V16QImode,
5707 gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
5709 emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
5710 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5711 add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
5712 REG_BR_PROB, very_unlikely);
5714 emit_move_insn (gen_rtx_MEM (V16QImode,
5715 gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
5716 vsrc);
5717 /* offset += 16 */
5718 force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
5719 offset, 1, OPTAB_DIRECT);
5721 emit_jump (loop_label);
5722 emit_barrier ();
5724 /* REGULAR EXIT */
5726 /* We are done. Add the offset of the zero character to the dst_addr
5727 pointer to get the result. */
5729 emit_label (done_label);
5730 LABEL_NUSES (done_label) = 1;
5732 force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
5733 1, OPTAB_DIRECT);
5735 emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7)));
5736 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5738 emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
5740 force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
5741 1, OPTAB_DIRECT);
5743 /* EARLY EXIT */
5745 emit_label (exit_label);
5746 LABEL_NUSES (exit_label) = 1;
5750 /* Expand conditional increment or decrement using alc/slb instructions.
5751 Should generate code setting DST to either SRC or SRC + INCREMENT,
5752 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
5753 Returns true if successful, false otherwise.
5755 That makes it possible to implement some if-constructs without jumps e.g.:
5756 (borrow = CC0 | CC1 and carry = CC2 | CC3)
5757 unsigned int a, b, c;
5758 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
5759 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
5760 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
5761 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
5763 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
5764 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
5765 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
5766 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
5767 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
5769 bool
5770 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
5771 rtx dst, rtx src, rtx increment)
5773 machine_mode cmp_mode;
5774 machine_mode cc_mode;
5775 rtx op_res;
5776 rtx insn;
5777 rtvec p;
5778 int ret;
5780 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
5781 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
5782 cmp_mode = SImode;
5783 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
5784 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
5785 cmp_mode = DImode;
5786 else
5787 return false;
5789 /* Try ADD LOGICAL WITH CARRY. */
5790 if (increment == const1_rtx)
5792 /* Determine CC mode to use. */
5793 if (cmp_code == EQ || cmp_code == NE)
5795 if (cmp_op1 != const0_rtx)
5797 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5798 NULL_RTX, 0, OPTAB_WIDEN);
5799 cmp_op1 = const0_rtx;
5802 cmp_code = cmp_code == EQ ? LEU : GTU;
5805 if (cmp_code == LTU || cmp_code == LEU)
5807 rtx tem = cmp_op0;
5808 cmp_op0 = cmp_op1;
5809 cmp_op1 = tem;
5810 cmp_code = swap_condition (cmp_code);
5813 switch (cmp_code)
5815 case GTU:
5816 cc_mode = CCUmode;
5817 break;
5819 case GEU:
5820 cc_mode = CCL3mode;
5821 break;
5823 default:
5824 return false;
5827 /* Emit comparison instruction pattern. */
5828 if (!register_operand (cmp_op0, cmp_mode))
5829 cmp_op0 = force_reg (cmp_mode, cmp_op0);
5831 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
5832 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5833 /* We use insn_invalid_p here to add clobbers if required. */
5834 ret = insn_invalid_p (emit_insn (insn), false);
5835 gcc_assert (!ret);
5837 /* Emit ALC instruction pattern. */
5838 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5839 gen_rtx_REG (cc_mode, CC_REGNUM),
5840 const0_rtx);
5842 if (src != const0_rtx)
5844 if (!register_operand (src, GET_MODE (dst)))
5845 src = force_reg (GET_MODE (dst), src);
5847 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
5848 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
5851 p = rtvec_alloc (2);
5852 RTVEC_ELT (p, 0) =
5853 gen_rtx_SET (dst, op_res);
5854 RTVEC_ELT (p, 1) =
5855 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5856 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5858 return true;
5861 /* Try SUBTRACT LOGICAL WITH BORROW. */
5862 if (increment == constm1_rtx)
5864 /* Determine CC mode to use. */
5865 if (cmp_code == EQ || cmp_code == NE)
5867 if (cmp_op1 != const0_rtx)
5869 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5870 NULL_RTX, 0, OPTAB_WIDEN);
5871 cmp_op1 = const0_rtx;
5874 cmp_code = cmp_code == EQ ? LEU : GTU;
5877 if (cmp_code == GTU || cmp_code == GEU)
5879 rtx tem = cmp_op0;
5880 cmp_op0 = cmp_op1;
5881 cmp_op1 = tem;
5882 cmp_code = swap_condition (cmp_code);
5885 switch (cmp_code)
5887 case LEU:
5888 cc_mode = CCUmode;
5889 break;
5891 case LTU:
5892 cc_mode = CCL3mode;
5893 break;
5895 default:
5896 return false;
5899 /* Emit comparison instruction pattern. */
5900 if (!register_operand (cmp_op0, cmp_mode))
5901 cmp_op0 = force_reg (cmp_mode, cmp_op0);
5903 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
5904 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5905 /* We use insn_invalid_p here to add clobbers if required. */
5906 ret = insn_invalid_p (emit_insn (insn), false);
5907 gcc_assert (!ret);
5909 /* Emit SLB instruction pattern. */
5910 if (!register_operand (src, GET_MODE (dst)))
5911 src = force_reg (GET_MODE (dst), src);
5913 op_res = gen_rtx_MINUS (GET_MODE (dst),
5914 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
5915 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5916 gen_rtx_REG (cc_mode, CC_REGNUM),
5917 const0_rtx));
5918 p = rtvec_alloc (2);
5919 RTVEC_ELT (p, 0) =
5920 gen_rtx_SET (dst, op_res);
5921 RTVEC_ELT (p, 1) =
5922 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5923 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5925 return true;
5928 return false;
5931 /* Expand code for the insv template. Return true if successful. */
5933 bool
5934 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
5936 int bitsize = INTVAL (op1);
5937 int bitpos = INTVAL (op2);
5938 machine_mode mode = GET_MODE (dest);
5939 machine_mode smode;
5940 int smode_bsize, mode_bsize;
5941 rtx op, clobber;
5943 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
5944 return false;
5946 /* Generate INSERT IMMEDIATE (IILL et al). */
5947 /* (set (ze (reg)) (const_int)). */
5948 if (TARGET_ZARCH
5949 && register_operand (dest, word_mode)
5950 && (bitpos % 16) == 0
5951 && (bitsize % 16) == 0
5952 && const_int_operand (src, VOIDmode))
5954 HOST_WIDE_INT val = INTVAL (src);
5955 int regpos = bitpos + bitsize;
5957 while (regpos > bitpos)
5959 machine_mode putmode;
5960 int putsize;
5962 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
5963 putmode = SImode;
5964 else
5965 putmode = HImode;
5967 putsize = GET_MODE_BITSIZE (putmode);
5968 regpos -= putsize;
5969 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
5970 GEN_INT (putsize),
5971 GEN_INT (regpos)),
5972 gen_int_mode (val, putmode));
5973 val >>= putsize;
5975 gcc_assert (regpos == bitpos);
5976 return true;
5979 smode = smallest_mode_for_size (bitsize, MODE_INT);
5980 smode_bsize = GET_MODE_BITSIZE (smode);
5981 mode_bsize = GET_MODE_BITSIZE (mode);
5983 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
5984 if (bitpos == 0
5985 && (bitsize % BITS_PER_UNIT) == 0
5986 && MEM_P (dest)
5987 && (register_operand (src, word_mode)
5988 || const_int_operand (src, VOIDmode)))
5990 /* Emit standard pattern if possible. */
5991 if (smode_bsize == bitsize)
5993 emit_move_insn (adjust_address (dest, smode, 0),
5994 gen_lowpart (smode, src));
5995 return true;
5998 /* (set (ze (mem)) (const_int)). */
5999 else if (const_int_operand (src, VOIDmode))
6001 int size = bitsize / BITS_PER_UNIT;
6002 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6003 BLKmode,
6004 UNITS_PER_WORD - size);
6006 dest = adjust_address (dest, BLKmode, 0);
6007 set_mem_size (dest, size);
6008 s390_expand_movmem (dest, src_mem, GEN_INT (size));
6009 return true;
6012 /* (set (ze (mem)) (reg)). */
6013 else if (register_operand (src, word_mode))
6015 if (bitsize <= 32)
6016 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6017 const0_rtx), src);
6018 else
6020 /* Emit st,stcmh sequence. */
6021 int stcmh_width = bitsize - 32;
6022 int size = stcmh_width / BITS_PER_UNIT;
6024 emit_move_insn (adjust_address (dest, SImode, size),
6025 gen_lowpart (SImode, src));
6026 set_mem_size (dest, size);
6027 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6028 GEN_INT (stcmh_width),
6029 const0_rtx),
6030 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6032 return true;
6036 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6037 if ((bitpos % BITS_PER_UNIT) == 0
6038 && (bitsize % BITS_PER_UNIT) == 0
6039 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6040 && MEM_P (src)
6041 && (mode == DImode || mode == SImode)
6042 && register_operand (dest, mode))
6044 /* Emit a strict_low_part pattern if possible. */
6045 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6047 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
6048 op = gen_rtx_SET (op, gen_lowpart (smode, src));
6049 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6050 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
6051 return true;
6054 /* ??? There are more powerful versions of ICM that are not
6055 completely represented in the md file. */
6058 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6059 if (TARGET_Z10 && (mode == DImode || mode == SImode))
6061 machine_mode mode_s = GET_MODE (src);
6063 if (mode_s == VOIDmode)
6065 /* For constant zero values the representation with AND
6066 appears to be folded in more situations than the (set
6067 (zero_extract) ...).
6068 We only do this when the start and end of the bitfield
6069 remain in the same SImode chunk. That way nihf or nilf
6070 can be used.
6071 The AND patterns might still generate a risbg for this. */
6072 if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
6073 return false;
6074 else
6075 src = force_reg (mode, src);
6077 else if (mode_s != mode)
6079 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6080 src = force_reg (mode_s, src);
6081 src = gen_lowpart (mode, src);
6084 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6085 op = gen_rtx_SET (op, src);
6087 if (!TARGET_ZEC12)
6089 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6090 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6092 emit_insn (op);
6094 return true;
6097 return false;
6100 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6101 register that holds VAL of mode MODE shifted by COUNT bits. */
6103 static inline rtx
6104 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6106 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6107 NULL_RTX, 1, OPTAB_DIRECT);
6108 return expand_simple_binop (SImode, ASHIFT, val, count,
6109 NULL_RTX, 1, OPTAB_DIRECT);
6112 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6113 the result in TARGET. */
6115 void
6116 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6117 rtx cmp_op1, rtx cmp_op2)
6119 machine_mode mode = GET_MODE (target);
6120 bool neg_p = false, swap_p = false;
6121 rtx tmp;
6123 if (GET_MODE (cmp_op1) == V2DFmode)
6125 switch (cond)
6127 /* NE a != b -> !(a == b) */
6128 case NE: cond = EQ; neg_p = true; break;
6129 /* UNGT a u> b -> !(b >= a) */
6130 case UNGT: cond = GE; neg_p = true; swap_p = true; break;
6131 /* UNGE a u>= b -> !(b > a) */
6132 case UNGE: cond = GT; neg_p = true; swap_p = true; break;
6133 /* LE: a <= b -> b >= a */
6134 case LE: cond = GE; swap_p = true; break;
6135 /* UNLE: a u<= b -> !(a > b) */
6136 case UNLE: cond = GT; neg_p = true; break;
6137 /* LT: a < b -> b > a */
6138 case LT: cond = GT; swap_p = true; break;
6139 /* UNLT: a u< b -> !(a >= b) */
6140 case UNLT: cond = GE; neg_p = true; break;
6141 case UNEQ:
6142 emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2));
6143 return;
6144 case LTGT:
6145 emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2));
6146 return;
6147 case ORDERED:
6148 emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2));
6149 return;
6150 case UNORDERED:
6151 emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2));
6152 return;
6153 default: break;
6156 else
6158 switch (cond)
6160 /* NE: a != b -> !(a == b) */
6161 case NE: cond = EQ; neg_p = true; break;
6162 /* GE: a >= b -> !(b > a) */
6163 case GE: cond = GT; neg_p = true; swap_p = true; break;
6164 /* GEU: a >= b -> !(b > a) */
6165 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6166 /* LE: a <= b -> !(a > b) */
6167 case LE: cond = GT; neg_p = true; break;
6168 /* LEU: a <= b -> !(a > b) */
6169 case LEU: cond = GTU; neg_p = true; break;
6170 /* LT: a < b -> b > a */
6171 case LT: cond = GT; swap_p = true; break;
6172 /* LTU: a < b -> b > a */
6173 case LTU: cond = GTU; swap_p = true; break;
6174 default: break;
6178 if (swap_p)
6180 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6183 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6184 mode,
6185 cmp_op1, cmp_op2)));
6186 if (neg_p)
6187 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6190 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6191 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6192 elements in CMP1 and CMP2 fulfill the comparison. */
6193 void
6194 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6195 rtx cmp1, rtx cmp2, bool all_p)
6197 enum rtx_code new_code = code;
6198 machine_mode cmp_mode, full_cmp_mode, scratch_mode;
6199 rtx tmp_reg = gen_reg_rtx (SImode);
6200 bool swap_p = false;
6202 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6204 switch (code)
6206 case EQ: cmp_mode = CCVEQmode; break;
6207 case NE: cmp_mode = CCVEQmode; break;
6208 case GT: cmp_mode = CCVHmode; break;
6209 case GE: cmp_mode = CCVHmode; new_code = LE; swap_p = true; break;
6210 case LT: cmp_mode = CCVHmode; new_code = GT; swap_p = true; break;
6211 case LE: cmp_mode = CCVHmode; new_code = LE; break;
6212 case GTU: cmp_mode = CCVHUmode; break;
6213 case GEU: cmp_mode = CCVHUmode; new_code = LEU; swap_p = true; break;
6214 case LTU: cmp_mode = CCVHUmode; new_code = GTU; swap_p = true; break;
6215 case LEU: cmp_mode = CCVHUmode; new_code = LEU; break;
6216 default: gcc_unreachable ();
6218 scratch_mode = GET_MODE (cmp1);
6220 else if (GET_MODE (cmp1) == V2DFmode)
6222 switch (code)
6224 case EQ: cmp_mode = CCVEQmode; break;
6225 case NE: cmp_mode = CCVEQmode; break;
6226 case GT: cmp_mode = CCVFHmode; break;
6227 case GE: cmp_mode = CCVFHEmode; break;
6228 case UNLE: cmp_mode = CCVFHmode; break;
6229 case UNLT: cmp_mode = CCVFHEmode; break;
6230 case LT: cmp_mode = CCVFHmode; new_code = GT; swap_p = true; break;
6231 case LE: cmp_mode = CCVFHEmode; new_code = GE; swap_p = true; break;
6232 default: gcc_unreachable ();
6234 scratch_mode = V2DImode;
6236 else
6237 gcc_unreachable ();
6239 if (!all_p)
6240 switch (cmp_mode)
6242 case CCVEQmode: full_cmp_mode = CCVEQANYmode; break;
6243 case CCVHmode: full_cmp_mode = CCVHANYmode; break;
6244 case CCVHUmode: full_cmp_mode = CCVHUANYmode; break;
6245 case CCVFHmode: full_cmp_mode = CCVFHANYmode; break;
6246 case CCVFHEmode: full_cmp_mode = CCVFHEANYmode; break;
6247 default: gcc_unreachable ();
6249 else
6250 /* The modes without ANY match the ALL modes. */
6251 full_cmp_mode = cmp_mode;
6253 if (swap_p)
6255 rtx tmp = cmp2;
6256 cmp2 = cmp1;
6257 cmp1 = tmp;
6260 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6261 gen_rtvec (2, gen_rtx_SET (
6262 gen_rtx_REG (cmp_mode, CC_REGNUM),
6263 gen_rtx_COMPARE (cmp_mode, cmp1, cmp2)),
6264 gen_rtx_CLOBBER (VOIDmode,
6265 gen_rtx_SCRATCH (scratch_mode)))));
6266 emit_move_insn (target, const0_rtx);
6267 emit_move_insn (tmp_reg, const1_rtx);
6269 emit_move_insn (target,
6270 gen_rtx_IF_THEN_ELSE (SImode,
6271 gen_rtx_fmt_ee (new_code, VOIDmode,
6272 gen_rtx_REG (full_cmp_mode, CC_REGNUM),
6273 const0_rtx),
6274 target, tmp_reg));
6277 /* Generate a vector comparison expression loading either elements of
6278 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6279 and CMP_OP2. */
6281 void
6282 s390_expand_vcond (rtx target, rtx then, rtx els,
6283 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6285 rtx tmp;
6286 machine_mode result_mode;
6287 rtx result_target;
6289 machine_mode target_mode = GET_MODE (target);
6290 machine_mode cmp_mode = GET_MODE (cmp_op1);
6291 rtx op = (cond == LT) ? els : then;
6293 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6294 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6295 for short and byte (x >> 15 and x >> 7 respectively). */
6296 if ((cond == LT || cond == GE)
6297 && target_mode == cmp_mode
6298 && cmp_op2 == CONST0_RTX (cmp_mode)
6299 && op == CONST0_RTX (target_mode)
6300 && s390_vector_mode_supported_p (target_mode)
6301 && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6303 rtx negop = (cond == LT) ? then : els;
6305 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6307 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6308 if (negop == CONST1_RTX (target_mode))
6310 rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6311 GEN_INT (shift), target,
6312 1, OPTAB_DIRECT);
6313 if (res != target)
6314 emit_move_insn (target, res);
6315 return;
6318 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6319 else if (all_ones_operand (negop, target_mode))
6321 rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6322 GEN_INT (shift), target,
6323 0, OPTAB_DIRECT);
6324 if (res != target)
6325 emit_move_insn (target, res);
6326 return;
6330 /* We always use an integral type vector to hold the comparison
6331 result. */
6332 result_mode = cmp_mode == V2DFmode ? V2DImode : cmp_mode;
6333 result_target = gen_reg_rtx (result_mode);
6335 /* We allow vector immediates as comparison operands that
6336 can be handled by the optimization above but not by the
6337 following code. Hence, force them into registers here. */
6338 if (!REG_P (cmp_op1))
6339 cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6341 if (!REG_P (cmp_op2))
6342 cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6344 s390_expand_vec_compare (result_target, cond,
6345 cmp_op1, cmp_op2);
6347 /* If the results are supposed to be either -1 or 0 we are done
6348 since this is what our compare instructions generate anyway. */
6349 if (all_ones_operand (then, GET_MODE (then))
6350 && const0_operand (els, GET_MODE (els)))
6352 emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6353 result_target, 0));
6354 return;
6357 /* Otherwise we will do a vsel afterwards. */
6358 /* This gets triggered e.g.
6359 with gcc.c-torture/compile/pr53410-1.c */
6360 if (!REG_P (then))
6361 then = force_reg (target_mode, then);
6363 if (!REG_P (els))
6364 els = force_reg (target_mode, els);
6366 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6367 result_target,
6368 CONST0_RTX (result_mode));
6370 /* We compared the result against zero above so we have to swap then
6371 and els here. */
6372 tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6374 gcc_assert (target_mode == GET_MODE (then));
6375 emit_insn (gen_rtx_SET (target, tmp));
6378 /* Emit the RTX necessary to initialize the vector TARGET with values
6379 in VALS. */
6380 void
6381 s390_expand_vec_init (rtx target, rtx vals)
6383 machine_mode mode = GET_MODE (target);
6384 machine_mode inner_mode = GET_MODE_INNER (mode);
6385 int n_elts = GET_MODE_NUNITS (mode);
6386 bool all_same = true, all_regs = true, all_const_int = true;
6387 rtx x;
6388 int i;
6390 for (i = 0; i < n_elts; ++i)
6392 x = XVECEXP (vals, 0, i);
6394 if (!CONST_INT_P (x))
6395 all_const_int = false;
6397 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6398 all_same = false;
6400 if (!REG_P (x))
6401 all_regs = false;
6404 /* Use vector gen mask or vector gen byte mask if possible. */
6405 if (all_same && all_const_int
6406 && (XVECEXP (vals, 0, 0) == const0_rtx
6407 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6408 NULL, NULL)
6409 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6411 emit_insn (gen_rtx_SET (target,
6412 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6413 return;
6416 if (all_same)
6418 emit_insn (gen_rtx_SET (target,
6419 gen_rtx_VEC_DUPLICATE (mode,
6420 XVECEXP (vals, 0, 0))));
6421 return;
6424 if (all_regs && REG_P (target) && n_elts == 2 && inner_mode == DImode)
6426 /* Use vector load pair. */
6427 emit_insn (gen_rtx_SET (target,
6428 gen_rtx_VEC_CONCAT (mode,
6429 XVECEXP (vals, 0, 0),
6430 XVECEXP (vals, 0, 1))));
6431 return;
6434 /* We are about to set the vector elements one by one. Zero out the
6435 full register first in order to help the data flow framework to
6436 detect it as full VR set. */
6437 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6439 /* Unfortunately the vec_init expander is not allowed to fail. So
6440 we have to implement the fallback ourselves. */
6441 for (i = 0; i < n_elts; i++)
6442 emit_insn (gen_rtx_SET (target,
6443 gen_rtx_UNSPEC (mode,
6444 gen_rtvec (3, XVECEXP (vals, 0, i),
6445 GEN_INT (i), target),
6446 UNSPEC_VEC_SET)));
6449 /* Structure to hold the initial parameters for a compare_and_swap operation
6450 in HImode and QImode. */
6452 struct alignment_context
6454 rtx memsi; /* SI aligned memory location. */
6455 rtx shift; /* Bit offset with regard to lsb. */
6456 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
6457 rtx modemaski; /* ~modemask */
6458 bool aligned; /* True if memory is aligned, false else. */
6461 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6462 structure AC for transparent simplifying, if the memory alignment is known
6463 to be at least 32bit. MEM is the memory location for the actual operation
6464 and MODE its mode. */
6466 static void
6467 init_alignment_context (struct alignment_context *ac, rtx mem,
6468 machine_mode mode)
6470 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6471 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6473 if (ac->aligned)
6474 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
6475 else
6477 /* Alignment is unknown. */
6478 rtx byteoffset, addr, align;
6480 /* Force the address into a register. */
6481 addr = force_reg (Pmode, XEXP (mem, 0));
6483 /* Align it to SImode. */
6484 align = expand_simple_binop (Pmode, AND, addr,
6485 GEN_INT (-GET_MODE_SIZE (SImode)),
6486 NULL_RTX, 1, OPTAB_DIRECT);
6487 /* Generate MEM. */
6488 ac->memsi = gen_rtx_MEM (SImode, align);
6489 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6490 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6491 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6493 /* Calculate shiftcount. */
6494 byteoffset = expand_simple_binop (Pmode, AND, addr,
6495 GEN_INT (GET_MODE_SIZE (SImode) - 1),
6496 NULL_RTX, 1, OPTAB_DIRECT);
6497 /* As we already have some offset, evaluate the remaining distance. */
6498 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6499 NULL_RTX, 1, OPTAB_DIRECT);
6502 /* Shift is the byte count, but we need the bitcount. */
6503 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6504 NULL_RTX, 1, OPTAB_DIRECT);
6506 /* Calculate masks. */
6507 ac->modemask = expand_simple_binop (SImode, ASHIFT,
6508 GEN_INT (GET_MODE_MASK (mode)),
6509 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6510 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6511 NULL_RTX, 1);
6514 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
6515 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
6516 perform the merge in SEQ2. */
6518 static rtx
6519 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6520 machine_mode mode, rtx val, rtx ins)
6522 rtx tmp;
6524 if (ac->aligned)
6526 start_sequence ();
6527 tmp = copy_to_mode_reg (SImode, val);
6528 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
6529 const0_rtx, ins))
6531 *seq1 = NULL;
6532 *seq2 = get_insns ();
6533 end_sequence ();
6534 return tmp;
6536 end_sequence ();
6539 /* Failed to use insv. Generate a two part shift and mask. */
6540 start_sequence ();
6541 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
6542 *seq1 = get_insns ();
6543 end_sequence ();
6545 start_sequence ();
6546 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
6547 *seq2 = get_insns ();
6548 end_sequence ();
6550 return tmp;
6553 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
6554 the memory location, CMP the old value to compare MEM with and NEW_RTX the
6555 value to set if CMP == MEM. */
6557 void
6558 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6559 rtx cmp, rtx new_rtx, bool is_weak)
6561 struct alignment_context ac;
6562 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
6563 rtx res = gen_reg_rtx (SImode);
6564 rtx_code_label *csloop = NULL, *csend = NULL;
6566 gcc_assert (MEM_P (mem));
6568 init_alignment_context (&ac, mem, mode);
6570 /* Load full word. Subsequent loads are performed by CS. */
6571 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
6572 NULL_RTX, 1, OPTAB_DIRECT);
6574 /* Prepare insertions of cmp and new_rtx into the loaded value. When
6575 possible, we try to use insv to make this happen efficiently. If
6576 that fails we'll generate code both inside and outside the loop. */
6577 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
6578 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
6580 if (seq0)
6581 emit_insn (seq0);
6582 if (seq1)
6583 emit_insn (seq1);
6585 /* Start CS loop. */
6586 if (!is_weak)
6588 /* Begin assuming success. */
6589 emit_move_insn (btarget, const1_rtx);
6591 csloop = gen_label_rtx ();
6592 csend = gen_label_rtx ();
6593 emit_label (csloop);
6596 /* val = "<mem>00..0<mem>"
6597 * cmp = "00..0<cmp>00..0"
6598 * new = "00..0<new>00..0"
6601 emit_insn (seq2);
6602 emit_insn (seq3);
6604 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
6605 if (is_weak)
6606 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
6607 else
6609 rtx tmp;
6611 /* Jump to end if we're done (likely?). */
6612 s390_emit_jump (csend, cc);
6614 /* Check for changes outside mode, and loop internal if so.
6615 Arrange the moves so that the compare is adjacent to the
6616 branch so that we can generate CRJ. */
6617 tmp = copy_to_reg (val);
6618 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
6619 1, OPTAB_DIRECT);
6620 cc = s390_emit_compare (NE, val, tmp);
6621 s390_emit_jump (csloop, cc);
6623 /* Failed. */
6624 emit_move_insn (btarget, const0_rtx);
6625 emit_label (csend);
6628 /* Return the correct part of the bitfield. */
6629 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
6630 NULL_RTX, 1, OPTAB_DIRECT), 1);
6633 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
6634 and VAL the value to play with. If AFTER is true then store the value
6635 MEM holds after the operation, if AFTER is false then store the value MEM
6636 holds before the operation. If TARGET is zero then discard that value, else
6637 store it to TARGET. */
6639 void
6640 s390_expand_atomic (machine_mode mode, enum rtx_code code,
6641 rtx target, rtx mem, rtx val, bool after)
6643 struct alignment_context ac;
6644 rtx cmp;
6645 rtx new_rtx = gen_reg_rtx (SImode);
6646 rtx orig = gen_reg_rtx (SImode);
6647 rtx_code_label *csloop = gen_label_rtx ();
6649 gcc_assert (!target || register_operand (target, VOIDmode));
6650 gcc_assert (MEM_P (mem));
6652 init_alignment_context (&ac, mem, mode);
6654 /* Shift val to the correct bit positions.
6655 Preserve "icm", but prevent "ex icm". */
6656 if (!(ac.aligned && code == SET && MEM_P (val)))
6657 val = s390_expand_mask_and_shift (val, mode, ac.shift);
6659 /* Further preparation insns. */
6660 if (code == PLUS || code == MINUS)
6661 emit_move_insn (orig, val);
6662 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
6663 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
6664 NULL_RTX, 1, OPTAB_DIRECT);
6666 /* Load full word. Subsequent loads are performed by CS. */
6667 cmp = force_reg (SImode, ac.memsi);
6669 /* Start CS loop. */
6670 emit_label (csloop);
6671 emit_move_insn (new_rtx, cmp);
6673 /* Patch new with val at correct position. */
6674 switch (code)
6676 case PLUS:
6677 case MINUS:
6678 val = expand_simple_binop (SImode, code, new_rtx, orig,
6679 NULL_RTX, 1, OPTAB_DIRECT);
6680 val = expand_simple_binop (SImode, AND, val, ac.modemask,
6681 NULL_RTX, 1, OPTAB_DIRECT);
6682 /* FALLTHRU */
6683 case SET:
6684 if (ac.aligned && MEM_P (val))
6685 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
6686 0, 0, SImode, val, false);
6687 else
6689 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
6690 NULL_RTX, 1, OPTAB_DIRECT);
6691 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
6692 NULL_RTX, 1, OPTAB_DIRECT);
6694 break;
6695 case AND:
6696 case IOR:
6697 case XOR:
6698 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
6699 NULL_RTX, 1, OPTAB_DIRECT);
6700 break;
6701 case MULT: /* NAND */
6702 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
6703 NULL_RTX, 1, OPTAB_DIRECT);
6704 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
6705 NULL_RTX, 1, OPTAB_DIRECT);
6706 break;
6707 default:
6708 gcc_unreachable ();
6711 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
6712 ac.memsi, cmp, new_rtx));
6714 /* Return the correct part of the bitfield. */
6715 if (target)
6716 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
6717 after ? new_rtx : cmp, ac.shift,
6718 NULL_RTX, 1, OPTAB_DIRECT), 1);
6721 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6722 We need to emit DTP-relative relocations. */
6724 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
6726 static void
6727 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
6729 switch (size)
6731 case 4:
6732 fputs ("\t.long\t", file);
6733 break;
6734 case 8:
6735 fputs ("\t.quad\t", file);
6736 break;
6737 default:
6738 gcc_unreachable ();
6740 output_addr_const (file, x);
6741 fputs ("@DTPOFF", file);
6744 /* Return the proper mode for REGNO being represented in the dwarf
6745 unwind table. */
6746 machine_mode
6747 s390_dwarf_frame_reg_mode (int regno)
6749 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
6751 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
6752 if (GENERAL_REGNO_P (regno))
6753 save_mode = Pmode;
6755 /* The rightmost 64 bits of vector registers are call-clobbered. */
6756 if (GET_MODE_SIZE (save_mode) > 8)
6757 save_mode = DImode;
6759 return save_mode;
6762 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
6763 /* Implement TARGET_MANGLE_TYPE. */
6765 static const char *
6766 s390_mangle_type (const_tree type)
6768 type = TYPE_MAIN_VARIANT (type);
6770 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
6771 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
6772 return NULL;
6774 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
6775 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
6776 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
6777 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
6779 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
6780 && TARGET_LONG_DOUBLE_128)
6781 return "g";
6783 /* For all other types, use normal C++ mangling. */
6784 return NULL;
6786 #endif
6788 /* In the name of slightly smaller debug output, and to cater to
6789 general assembler lossage, recognize various UNSPEC sequences
6790 and turn them back into a direct symbol reference. */
6792 static rtx
6793 s390_delegitimize_address (rtx orig_x)
6795 rtx x, y;
6797 orig_x = delegitimize_mem_from_attrs (orig_x);
6798 x = orig_x;
6800 /* Extract the symbol ref from:
6801 (plus:SI (reg:SI 12 %r12)
6802 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
6803 UNSPEC_GOTOFF/PLTOFF)))
6805 (plus:SI (reg:SI 12 %r12)
6806 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
6807 UNSPEC_GOTOFF/PLTOFF)
6808 (const_int 4 [0x4])))) */
6809 if (GET_CODE (x) == PLUS
6810 && REG_P (XEXP (x, 0))
6811 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
6812 && GET_CODE (XEXP (x, 1)) == CONST)
6814 HOST_WIDE_INT offset = 0;
6816 /* The const operand. */
6817 y = XEXP (XEXP (x, 1), 0);
6819 if (GET_CODE (y) == PLUS
6820 && GET_CODE (XEXP (y, 1)) == CONST_INT)
6822 offset = INTVAL (XEXP (y, 1));
6823 y = XEXP (y, 0);
6826 if (GET_CODE (y) == UNSPEC
6827 && (XINT (y, 1) == UNSPEC_GOTOFF
6828 || XINT (y, 1) == UNSPEC_PLTOFF))
6829 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
6832 if (GET_CODE (x) != MEM)
6833 return orig_x;
6835 x = XEXP (x, 0);
6836 if (GET_CODE (x) == PLUS
6837 && GET_CODE (XEXP (x, 1)) == CONST
6838 && GET_CODE (XEXP (x, 0)) == REG
6839 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6841 y = XEXP (XEXP (x, 1), 0);
6842 if (GET_CODE (y) == UNSPEC
6843 && XINT (y, 1) == UNSPEC_GOT)
6844 y = XVECEXP (y, 0, 0);
6845 else
6846 return orig_x;
6848 else if (GET_CODE (x) == CONST)
6850 /* Extract the symbol ref from:
6851 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
6852 UNSPEC_PLT/GOTENT))) */
6854 y = XEXP (x, 0);
6855 if (GET_CODE (y) == UNSPEC
6856 && (XINT (y, 1) == UNSPEC_GOTENT
6857 || XINT (y, 1) == UNSPEC_PLT))
6858 y = XVECEXP (y, 0, 0);
6859 else
6860 return orig_x;
6862 else
6863 return orig_x;
6865 if (GET_MODE (orig_x) != Pmode)
6867 if (GET_MODE (orig_x) == BLKmode)
6868 return orig_x;
6869 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
6870 if (y == NULL_RTX)
6871 return orig_x;
6873 return y;
6876 /* Output operand OP to stdio stream FILE.
6877 OP is an address (register + offset) which is not used to address data;
6878 instead the rightmost bits are interpreted as the value. */
6880 static void
6881 print_shift_count_operand (FILE *file, rtx op)
6883 HOST_WIDE_INT offset;
6884 rtx base;
6886 /* Extract base register and offset. */
6887 if (!s390_decompose_shift_count (op, &base, &offset))
6888 gcc_unreachable ();
6890 /* Sanity check. */
6891 if (base)
6893 gcc_assert (GET_CODE (base) == REG);
6894 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
6895 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
6898 /* Offsets are constricted to twelve bits. */
6899 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
6900 if (base)
6901 fprintf (file, "(%s)", reg_names[REGNO (base)]);
6904 /* Assigns the number of NOP halfwords to be emitted before and after the
6905 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
6906 If hotpatching is disabled for the function, the values are set to zero.
6909 static void
6910 s390_function_num_hotpatch_hw (tree decl,
6911 int *hw_before,
6912 int *hw_after)
6914 tree attr;
6916 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
6918 /* Handle the arguments of the hotpatch attribute. The values
6919 specified via attribute might override the cmdline argument
6920 values. */
6921 if (attr)
6923 tree args = TREE_VALUE (attr);
6925 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
6926 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
6928 else
6930 /* Use the values specified by the cmdline arguments. */
6931 *hw_before = s390_hotpatch_hw_before_label;
6932 *hw_after = s390_hotpatch_hw_after_label;
6936 /* Write the current .machine and .machinemode specification to the assembler
6937 file. */
6939 #ifdef HAVE_AS_MACHINE_MACHINEMODE
6940 static void
6941 s390_asm_output_machine_for_arch (FILE *asm_out_file)
6943 fprintf (asm_out_file, "\t.machinemode %s\n",
6944 (TARGET_ZARCH) ? "zarch" : "esa");
6945 fprintf (asm_out_file, "\t.machine \"%s", processor_table[s390_arch].name);
6946 if (S390_USE_ARCHITECTURE_MODIFIERS)
6948 int cpu_flags;
6950 cpu_flags = processor_flags_table[(int) s390_arch];
6951 if (TARGET_HTM && !(cpu_flags & PF_TX))
6952 fprintf (asm_out_file, "+htm");
6953 else if (!TARGET_HTM && (cpu_flags & PF_TX))
6954 fprintf (asm_out_file, "+nohtm");
6955 if (TARGET_VX && !(cpu_flags & PF_VX))
6956 fprintf (asm_out_file, "+vx");
6957 else if (!TARGET_VX && (cpu_flags & PF_VX))
6958 fprintf (asm_out_file, "+novx");
6960 fprintf (asm_out_file, "\"\n");
6963 /* Write an extra function header before the very start of the function. */
6965 void
6966 s390_asm_output_function_prefix (FILE *asm_out_file,
6967 const char *fnname ATTRIBUTE_UNUSED)
6969 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
6970 return;
6971 /* Since only the function specific options are saved but not the indications
6972 which options are set, it's too much work here to figure out which options
6973 have actually changed. Thus, generate .machine and .machinemode whenever a
6974 function has the target attribute or pragma. */
6975 fprintf (asm_out_file, "\t.machinemode push\n");
6976 fprintf (asm_out_file, "\t.machine push\n");
6977 s390_asm_output_machine_for_arch (asm_out_file);
6980 /* Write an extra function footer after the very end of the function. */
6982 void
6983 s390_asm_declare_function_size (FILE *asm_out_file,
6984 const char *fnname, tree decl)
6986 if (!flag_inhibit_size_directive)
6987 ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
6988 if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
6989 return;
6990 fprintf (asm_out_file, "\t.machine pop\n");
6991 fprintf (asm_out_file, "\t.machinemode pop\n");
6993 #endif
6995 /* Write the extra assembler code needed to declare a function properly. */
6997 void
6998 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
6999 tree decl)
7001 int hw_before, hw_after;
7003 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7004 if (hw_before > 0)
7006 unsigned int function_alignment;
7007 int i;
7009 /* Add a trampoline code area before the function label and initialize it
7010 with two-byte nop instructions. This area can be overwritten with code
7011 that jumps to a patched version of the function. */
7012 asm_fprintf (asm_out_file, "\tnopr\t%%r7"
7013 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7014 hw_before);
7015 for (i = 1; i < hw_before; i++)
7016 fputs ("\tnopr\t%r7\n", asm_out_file);
7018 /* Note: The function label must be aligned so that (a) the bytes of the
7019 following nop do not cross a cacheline boundary, and (b) a jump address
7020 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7021 stored directly before the label without crossing a cacheline
7022 boundary. All this is necessary to make sure the trampoline code can
7023 be changed atomically.
7024 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7025 if there are NOPs before the function label, the alignment is placed
7026 before them. So it is necessary to duplicate the alignment after the
7027 NOPs. */
7028 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7029 if (! DECL_USER_ALIGN (decl))
7030 function_alignment = MAX (function_alignment,
7031 (unsigned int) align_functions);
7032 fputs ("\t# alignment for hotpatch\n", asm_out_file);
7033 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment));
7036 if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7038 asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7039 asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7040 asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7041 asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7042 asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7043 asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7044 s390_warn_framesize);
7045 asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7046 asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7047 asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7048 asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7049 asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7050 asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7051 TARGET_PACKED_STACK);
7052 asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7053 asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7054 asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7055 asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7056 s390_warn_dynamicstack_p);
7058 ASM_OUTPUT_LABEL (asm_out_file, fname);
7059 if (hw_after > 0)
7060 asm_fprintf (asm_out_file,
7061 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7062 hw_after);
7065 /* Output machine-dependent UNSPECs occurring in address constant X
7066 in assembler syntax to stdio stream FILE. Returns true if the
7067 constant X could be recognized, false otherwise. */
7069 static bool
7070 s390_output_addr_const_extra (FILE *file, rtx x)
7072 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7073 switch (XINT (x, 1))
7075 case UNSPEC_GOTENT:
7076 output_addr_const (file, XVECEXP (x, 0, 0));
7077 fprintf (file, "@GOTENT");
7078 return true;
7079 case UNSPEC_GOT:
7080 output_addr_const (file, XVECEXP (x, 0, 0));
7081 fprintf (file, "@GOT");
7082 return true;
7083 case UNSPEC_GOTOFF:
7084 output_addr_const (file, XVECEXP (x, 0, 0));
7085 fprintf (file, "@GOTOFF");
7086 return true;
7087 case UNSPEC_PLT:
7088 output_addr_const (file, XVECEXP (x, 0, 0));
7089 fprintf (file, "@PLT");
7090 return true;
7091 case UNSPEC_PLTOFF:
7092 output_addr_const (file, XVECEXP (x, 0, 0));
7093 fprintf (file, "@PLTOFF");
7094 return true;
7095 case UNSPEC_TLSGD:
7096 output_addr_const (file, XVECEXP (x, 0, 0));
7097 fprintf (file, "@TLSGD");
7098 return true;
7099 case UNSPEC_TLSLDM:
7100 assemble_name (file, get_some_local_dynamic_name ());
7101 fprintf (file, "@TLSLDM");
7102 return true;
7103 case UNSPEC_DTPOFF:
7104 output_addr_const (file, XVECEXP (x, 0, 0));
7105 fprintf (file, "@DTPOFF");
7106 return true;
7107 case UNSPEC_NTPOFF:
7108 output_addr_const (file, XVECEXP (x, 0, 0));
7109 fprintf (file, "@NTPOFF");
7110 return true;
7111 case UNSPEC_GOTNTPOFF:
7112 output_addr_const (file, XVECEXP (x, 0, 0));
7113 fprintf (file, "@GOTNTPOFF");
7114 return true;
7115 case UNSPEC_INDNTPOFF:
7116 output_addr_const (file, XVECEXP (x, 0, 0));
7117 fprintf (file, "@INDNTPOFF");
7118 return true;
7121 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7122 switch (XINT (x, 1))
7124 case UNSPEC_POOL_OFFSET:
7125 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7126 output_addr_const (file, x);
7127 return true;
7129 return false;
7132 /* Output address operand ADDR in assembler syntax to
7133 stdio stream FILE. */
7135 void
7136 print_operand_address (FILE *file, rtx addr)
7138 struct s390_address ad;
7140 if (s390_loadrelative_operand_p (addr, NULL, NULL))
7142 if (!TARGET_Z10)
7144 output_operand_lossage ("symbolic memory references are "
7145 "only supported on z10 or later");
7146 return;
7148 output_addr_const (file, addr);
7149 return;
7152 if (!s390_decompose_address (addr, &ad)
7153 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7154 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7155 output_operand_lossage ("cannot decompose address");
7157 if (ad.disp)
7158 output_addr_const (file, ad.disp);
7159 else
7160 fprintf (file, "0");
7162 if (ad.base && ad.indx)
7163 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7164 reg_names[REGNO (ad.base)]);
7165 else if (ad.base)
7166 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7169 /* Output operand X in assembler syntax to stdio stream FILE.
7170 CODE specified the format flag. The following format flags
7171 are recognized:
7173 'C': print opcode suffix for branch condition.
7174 'D': print opcode suffix for inverse branch condition.
7175 'E': print opcode suffix for branch on index instruction.
7176 'G': print the size of the operand in bytes.
7177 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7178 'M': print the second word of a TImode operand.
7179 'N': print the second word of a DImode operand.
7180 'O': print only the displacement of a memory reference or address.
7181 'R': print only the base register of a memory reference or address.
7182 'S': print S-type memory reference (base+displacement).
7183 'Y': print shift count operand.
7185 'b': print integer X as if it's an unsigned byte.
7186 'c': print integer X as if it's an signed byte.
7187 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7188 'f': "end" contiguous bitmask X in SImode.
7189 'h': print integer X as if it's a signed halfword.
7190 'i': print the first nonzero HImode part of X.
7191 'j': print the first HImode part unequal to -1 of X.
7192 'k': print the first nonzero SImode part of X.
7193 'm': print the first SImode part unequal to -1 of X.
7194 'o': print integer X as if it's an unsigned 32bit word.
7195 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7196 't': CONST_INT: "start" of contiguous bitmask X in SImode.
7197 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7198 'x': print integer X as if it's an unsigned halfword.
7199 'v': print register number as vector register (v1 instead of f1).
7202 void
7203 print_operand (FILE *file, rtx x, int code)
7205 HOST_WIDE_INT ival;
7207 switch (code)
7209 case 'C':
7210 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7211 return;
7213 case 'D':
7214 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7215 return;
7217 case 'E':
7218 if (GET_CODE (x) == LE)
7219 fprintf (file, "l");
7220 else if (GET_CODE (x) == GT)
7221 fprintf (file, "h");
7222 else
7223 output_operand_lossage ("invalid comparison operator "
7224 "for 'E' output modifier");
7225 return;
7227 case 'J':
7228 if (GET_CODE (x) == SYMBOL_REF)
7230 fprintf (file, "%s", ":tls_load:");
7231 output_addr_const (file, x);
7233 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7235 fprintf (file, "%s", ":tls_gdcall:");
7236 output_addr_const (file, XVECEXP (x, 0, 0));
7238 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7240 fprintf (file, "%s", ":tls_ldcall:");
7241 const char *name = get_some_local_dynamic_name ();
7242 gcc_assert (name);
7243 assemble_name (file, name);
7245 else
7246 output_operand_lossage ("invalid reference for 'J' output modifier");
7247 return;
7249 case 'G':
7250 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7251 return;
7253 case 'O':
7255 struct s390_address ad;
7256 int ret;
7258 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7260 if (!ret
7261 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7262 || ad.indx)
7264 output_operand_lossage ("invalid address for 'O' output modifier");
7265 return;
7268 if (ad.disp)
7269 output_addr_const (file, ad.disp);
7270 else
7271 fprintf (file, "0");
7273 return;
7275 case 'R':
7277 struct s390_address ad;
7278 int ret;
7280 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7282 if (!ret
7283 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7284 || ad.indx)
7286 output_operand_lossage ("invalid address for 'R' output modifier");
7287 return;
7290 if (ad.base)
7291 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7292 else
7293 fprintf (file, "0");
7295 return;
7297 case 'S':
7299 struct s390_address ad;
7300 int ret;
7302 if (!MEM_P (x))
7304 output_operand_lossage ("memory reference expected for "
7305 "'S' output modifier");
7306 return;
7308 ret = s390_decompose_address (XEXP (x, 0), &ad);
7310 if (!ret
7311 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7312 || ad.indx)
7314 output_operand_lossage ("invalid address for 'S' output modifier");
7315 return;
7318 if (ad.disp)
7319 output_addr_const (file, ad.disp);
7320 else
7321 fprintf (file, "0");
7323 if (ad.base)
7324 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7326 return;
7328 case 'N':
7329 if (GET_CODE (x) == REG)
7330 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7331 else if (GET_CODE (x) == MEM)
7332 x = change_address (x, VOIDmode,
7333 plus_constant (Pmode, XEXP (x, 0), 4));
7334 else
7335 output_operand_lossage ("register or memory expression expected "
7336 "for 'N' output modifier");
7337 break;
7339 case 'M':
7340 if (GET_CODE (x) == REG)
7341 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7342 else if (GET_CODE (x) == MEM)
7343 x = change_address (x, VOIDmode,
7344 plus_constant (Pmode, XEXP (x, 0), 8));
7345 else
7346 output_operand_lossage ("register or memory expression expected "
7347 "for 'M' output modifier");
7348 break;
7350 case 'Y':
7351 print_shift_count_operand (file, x);
7352 return;
7355 switch (GET_CODE (x))
7357 case REG:
7358 /* Print FP regs as fx instead of vx when they are accessed
7359 through non-vector mode. */
7360 if (code == 'v'
7361 || VECTOR_NOFP_REG_P (x)
7362 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
7363 || (VECTOR_REG_P (x)
7364 && (GET_MODE_SIZE (GET_MODE (x)) /
7365 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
7366 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
7367 else
7368 fprintf (file, "%s", reg_names[REGNO (x)]);
7369 break;
7371 case MEM:
7372 output_address (GET_MODE (x), XEXP (x, 0));
7373 break;
7375 case CONST:
7376 case CODE_LABEL:
7377 case LABEL_REF:
7378 case SYMBOL_REF:
7379 output_addr_const (file, x);
7380 break;
7382 case CONST_INT:
7383 ival = INTVAL (x);
7384 switch (code)
7386 case 0:
7387 break;
7388 case 'b':
7389 ival &= 0xff;
7390 break;
7391 case 'c':
7392 ival = ((ival & 0xff) ^ 0x80) - 0x80;
7393 break;
7394 case 'x':
7395 ival &= 0xffff;
7396 break;
7397 case 'h':
7398 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
7399 break;
7400 case 'i':
7401 ival = s390_extract_part (x, HImode, 0);
7402 break;
7403 case 'j':
7404 ival = s390_extract_part (x, HImode, -1);
7405 break;
7406 case 'k':
7407 ival = s390_extract_part (x, SImode, 0);
7408 break;
7409 case 'm':
7410 ival = s390_extract_part (x, SImode, -1);
7411 break;
7412 case 'o':
7413 ival &= 0xffffffff;
7414 break;
7415 case 'e': case 'f':
7416 case 's': case 't':
7418 int pos, len;
7419 bool ok;
7421 len = (code == 's' || code == 'e' ? 64 : 32);
7422 ok = s390_contiguous_bitmask_p (ival, len, &pos, &len);
7423 gcc_assert (ok);
7424 if (code == 's' || code == 't')
7425 ival = 64 - pos - len;
7426 else
7427 ival = 64 - 1 - pos;
7429 break;
7430 default:
7431 output_operand_lossage ("invalid constant for output modifier '%c'", code);
7433 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7434 break;
7436 case CONST_WIDE_INT:
7437 if (code == 'b')
7438 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7439 CONST_WIDE_INT_ELT (x, 0) & 0xff);
7440 else if (code == 'x')
7441 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7442 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
7443 else if (code == 'h')
7444 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7445 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
7446 else
7448 if (code == 0)
7449 output_operand_lossage ("invalid constant - try using "
7450 "an output modifier");
7451 else
7452 output_operand_lossage ("invalid constant for output modifier '%c'",
7453 code);
7455 break;
7456 case CONST_VECTOR:
7457 switch (code)
7459 case 'h':
7460 gcc_assert (const_vec_duplicate_p (x));
7461 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7462 ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
7463 break;
7464 case 'e':
7465 case 's':
7467 int start, stop, inner_len;
7468 bool ok;
7470 inner_len = GET_MODE_UNIT_BITSIZE (GET_MODE (x));
7471 ok = s390_contiguous_bitmask_vector_p (x, &start, &stop);
7472 gcc_assert (ok);
7473 if (code == 's' || code == 't')
7474 ival = inner_len - stop - 1;
7475 else
7476 ival = inner_len - start - 1;
7477 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7479 break;
7480 case 't':
7482 unsigned mask;
7483 bool ok = s390_bytemask_vector_p (x, &mask);
7484 gcc_assert (ok);
7485 fprintf (file, "%u", mask);
7487 break;
7489 default:
7490 output_operand_lossage ("invalid constant vector for output "
7491 "modifier '%c'", code);
7493 break;
7495 default:
7496 if (code == 0)
7497 output_operand_lossage ("invalid expression - try using "
7498 "an output modifier");
7499 else
7500 output_operand_lossage ("invalid expression for output "
7501 "modifier '%c'", code);
7502 break;
7506 /* Target hook for assembling integer objects. We need to define it
7507 here to work a round a bug in some versions of GAS, which couldn't
7508 handle values smaller than INT_MIN when printed in decimal. */
7510 static bool
7511 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
7513 if (size == 8 && aligned_p
7514 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
7516 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
7517 INTVAL (x));
7518 return true;
7520 return default_assemble_integer (x, size, aligned_p);
7523 /* Returns true if register REGNO is used for forming
7524 a memory address in expression X. */
7526 static bool
7527 reg_used_in_mem_p (int regno, rtx x)
7529 enum rtx_code code = GET_CODE (x);
7530 int i, j;
7531 const char *fmt;
7533 if (code == MEM)
7535 if (refers_to_regno_p (regno, XEXP (x, 0)))
7536 return true;
7538 else if (code == SET
7539 && GET_CODE (SET_DEST (x)) == PC)
7541 if (refers_to_regno_p (regno, SET_SRC (x)))
7542 return true;
7545 fmt = GET_RTX_FORMAT (code);
7546 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7548 if (fmt[i] == 'e'
7549 && reg_used_in_mem_p (regno, XEXP (x, i)))
7550 return true;
7552 else if (fmt[i] == 'E')
7553 for (j = 0; j < XVECLEN (x, i); j++)
7554 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
7555 return true;
7557 return false;
7560 /* Returns true if expression DEP_RTX sets an address register
7561 used by instruction INSN to address memory. */
7563 static bool
7564 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
7566 rtx target, pat;
7568 if (NONJUMP_INSN_P (dep_rtx))
7569 dep_rtx = PATTERN (dep_rtx);
7571 if (GET_CODE (dep_rtx) == SET)
7573 target = SET_DEST (dep_rtx);
7574 if (GET_CODE (target) == STRICT_LOW_PART)
7575 target = XEXP (target, 0);
7576 while (GET_CODE (target) == SUBREG)
7577 target = SUBREG_REG (target);
7579 if (GET_CODE (target) == REG)
7581 int regno = REGNO (target);
7583 if (s390_safe_attr_type (insn) == TYPE_LA)
7585 pat = PATTERN (insn);
7586 if (GET_CODE (pat) == PARALLEL)
7588 gcc_assert (XVECLEN (pat, 0) == 2);
7589 pat = XVECEXP (pat, 0, 0);
7591 gcc_assert (GET_CODE (pat) == SET);
7592 return refers_to_regno_p (regno, SET_SRC (pat));
7594 else if (get_attr_atype (insn) == ATYPE_AGEN)
7595 return reg_used_in_mem_p (regno, PATTERN (insn));
7598 return false;
7601 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
7604 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
7606 rtx dep_rtx = PATTERN (dep_insn);
7607 int i;
7609 if (GET_CODE (dep_rtx) == SET
7610 && addr_generation_dependency_p (dep_rtx, insn))
7611 return 1;
7612 else if (GET_CODE (dep_rtx) == PARALLEL)
7614 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
7616 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
7617 return 1;
7620 return 0;
7624 /* A C statement (sans semicolon) to update the integer scheduling priority
7625 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
7626 reduce the priority to execute INSN later. Do not define this macro if
7627 you do not need to adjust the scheduling priorities of insns.
7629 A STD instruction should be scheduled earlier,
7630 in order to use the bypass. */
7631 static int
7632 s390_adjust_priority (rtx_insn *insn, int priority)
7634 if (! INSN_P (insn))
7635 return priority;
7637 if (s390_tune <= PROCESSOR_2064_Z900)
7638 return priority;
7640 switch (s390_safe_attr_type (insn))
7642 case TYPE_FSTOREDF:
7643 case TYPE_FSTORESF:
7644 priority = priority << 3;
7645 break;
7646 case TYPE_STORE:
7647 case TYPE_STM:
7648 priority = priority << 1;
7649 break;
7650 default:
7651 break;
7653 return priority;
7657 /* The number of instructions that can be issued per cycle. */
7659 static int
7660 s390_issue_rate (void)
7662 switch (s390_tune)
7664 case PROCESSOR_2084_Z990:
7665 case PROCESSOR_2094_Z9_109:
7666 case PROCESSOR_2094_Z9_EC:
7667 case PROCESSOR_2817_Z196:
7668 return 3;
7669 case PROCESSOR_2097_Z10:
7670 return 2;
7671 case PROCESSOR_9672_G5:
7672 case PROCESSOR_9672_G6:
7673 case PROCESSOR_2064_Z900:
7674 /* Starting with EC12 we use the sched_reorder hook to take care
7675 of instruction dispatch constraints. The algorithm only
7676 picks the best instruction and assumes only a single
7677 instruction gets issued per cycle. */
7678 case PROCESSOR_2827_ZEC12:
7679 case PROCESSOR_2964_Z13:
7680 default:
7681 return 1;
7685 static int
7686 s390_first_cycle_multipass_dfa_lookahead (void)
7688 return 4;
7691 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
7692 Fix up MEMs as required. */
7694 static void
7695 annotate_constant_pool_refs (rtx *x)
7697 int i, j;
7698 const char *fmt;
7700 gcc_assert (GET_CODE (*x) != SYMBOL_REF
7701 || !CONSTANT_POOL_ADDRESS_P (*x));
7703 /* Literal pool references can only occur inside a MEM ... */
7704 if (GET_CODE (*x) == MEM)
7706 rtx memref = XEXP (*x, 0);
7708 if (GET_CODE (memref) == SYMBOL_REF
7709 && CONSTANT_POOL_ADDRESS_P (memref))
7711 rtx base = cfun->machine->base_reg;
7712 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
7713 UNSPEC_LTREF);
7715 *x = replace_equiv_address (*x, addr);
7716 return;
7719 if (GET_CODE (memref) == CONST
7720 && GET_CODE (XEXP (memref, 0)) == PLUS
7721 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
7722 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
7723 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
7725 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
7726 rtx sym = XEXP (XEXP (memref, 0), 0);
7727 rtx base = cfun->machine->base_reg;
7728 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7729 UNSPEC_LTREF);
7731 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
7732 return;
7736 /* ... or a load-address type pattern. */
7737 if (GET_CODE (*x) == SET)
7739 rtx addrref = SET_SRC (*x);
7741 if (GET_CODE (addrref) == SYMBOL_REF
7742 && CONSTANT_POOL_ADDRESS_P (addrref))
7744 rtx base = cfun->machine->base_reg;
7745 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
7746 UNSPEC_LTREF);
7748 SET_SRC (*x) = addr;
7749 return;
7752 if (GET_CODE (addrref) == CONST
7753 && GET_CODE (XEXP (addrref, 0)) == PLUS
7754 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
7755 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
7756 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
7758 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
7759 rtx sym = XEXP (XEXP (addrref, 0), 0);
7760 rtx base = cfun->machine->base_reg;
7761 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7762 UNSPEC_LTREF);
7764 SET_SRC (*x) = plus_constant (Pmode, addr, off);
7765 return;
7769 /* Annotate LTREL_BASE as well. */
7770 if (GET_CODE (*x) == UNSPEC
7771 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
7773 rtx base = cfun->machine->base_reg;
7774 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
7775 UNSPEC_LTREL_BASE);
7776 return;
7779 fmt = GET_RTX_FORMAT (GET_CODE (*x));
7780 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7782 if (fmt[i] == 'e')
7784 annotate_constant_pool_refs (&XEXP (*x, i));
7786 else if (fmt[i] == 'E')
7788 for (j = 0; j < XVECLEN (*x, i); j++)
7789 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
7794 /* Split all branches that exceed the maximum distance.
7795 Returns true if this created a new literal pool entry. */
7797 static int
7798 s390_split_branches (void)
7800 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
7801 int new_literal = 0, ret;
7802 rtx_insn *insn;
7803 rtx pat, target;
7804 rtx *label;
7806 /* We need correct insn addresses. */
7808 shorten_branches (get_insns ());
7810 /* Find all branches that exceed 64KB, and split them. */
7812 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7814 if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
7815 continue;
7817 pat = PATTERN (insn);
7818 if (GET_CODE (pat) == PARALLEL)
7819 pat = XVECEXP (pat, 0, 0);
7820 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
7821 continue;
7823 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
7825 label = &SET_SRC (pat);
7827 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
7829 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
7830 label = &XEXP (SET_SRC (pat), 1);
7831 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
7832 label = &XEXP (SET_SRC (pat), 2);
7833 else
7834 continue;
7836 else
7837 continue;
7839 if (get_attr_length (insn) <= 4)
7840 continue;
7842 /* We are going to use the return register as scratch register,
7843 make sure it will be saved/restored by the prologue/epilogue. */
7844 cfun_frame_layout.save_return_addr_p = 1;
7846 if (!flag_pic)
7848 new_literal = 1;
7849 rtx mem = force_const_mem (Pmode, *label);
7850 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem),
7851 insn);
7852 INSN_ADDRESSES_NEW (set_insn, -1);
7853 annotate_constant_pool_refs (&PATTERN (set_insn));
7855 target = temp_reg;
7857 else
7859 new_literal = 1;
7860 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
7861 UNSPEC_LTREL_OFFSET);
7862 target = gen_rtx_CONST (Pmode, target);
7863 target = force_const_mem (Pmode, target);
7864 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target),
7865 insn);
7866 INSN_ADDRESSES_NEW (set_insn, -1);
7867 annotate_constant_pool_refs (&PATTERN (set_insn));
7869 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
7870 cfun->machine->base_reg),
7871 UNSPEC_LTREL_BASE);
7872 target = gen_rtx_PLUS (Pmode, temp_reg, target);
7875 ret = validate_change (insn, label, target, 0);
7876 gcc_assert (ret);
7879 return new_literal;
7883 /* Find an annotated literal pool symbol referenced in RTX X,
7884 and store it at REF. Will abort if X contains references to
7885 more than one such pool symbol; multiple references to the same
7886 symbol are allowed, however.
7888 The rtx pointed to by REF must be initialized to NULL_RTX
7889 by the caller before calling this routine. */
7891 static void
7892 find_constant_pool_ref (rtx x, rtx *ref)
7894 int i, j;
7895 const char *fmt;
7897 /* Ignore LTREL_BASE references. */
7898 if (GET_CODE (x) == UNSPEC
7899 && XINT (x, 1) == UNSPEC_LTREL_BASE)
7900 return;
7901 /* Likewise POOL_ENTRY insns. */
7902 if (GET_CODE (x) == UNSPEC_VOLATILE
7903 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
7904 return;
7906 gcc_assert (GET_CODE (x) != SYMBOL_REF
7907 || !CONSTANT_POOL_ADDRESS_P (x));
7909 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
7911 rtx sym = XVECEXP (x, 0, 0);
7912 gcc_assert (GET_CODE (sym) == SYMBOL_REF
7913 && CONSTANT_POOL_ADDRESS_P (sym));
7915 if (*ref == NULL_RTX)
7916 *ref = sym;
7917 else
7918 gcc_assert (*ref == sym);
7920 return;
7923 fmt = GET_RTX_FORMAT (GET_CODE (x));
7924 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7926 if (fmt[i] == 'e')
7928 find_constant_pool_ref (XEXP (x, i), ref);
7930 else if (fmt[i] == 'E')
7932 for (j = 0; j < XVECLEN (x, i); j++)
7933 find_constant_pool_ref (XVECEXP (x, i, j), ref);
7938 /* Replace every reference to the annotated literal pool
7939 symbol REF in X by its base plus OFFSET. */
7941 static void
7942 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
7944 int i, j;
7945 const char *fmt;
7947 gcc_assert (*x != ref);
7949 if (GET_CODE (*x) == UNSPEC
7950 && XINT (*x, 1) == UNSPEC_LTREF
7951 && XVECEXP (*x, 0, 0) == ref)
7953 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
7954 return;
7957 if (GET_CODE (*x) == PLUS
7958 && GET_CODE (XEXP (*x, 1)) == CONST_INT
7959 && GET_CODE (XEXP (*x, 0)) == UNSPEC
7960 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
7961 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
7963 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
7964 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
7965 return;
7968 fmt = GET_RTX_FORMAT (GET_CODE (*x));
7969 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7971 if (fmt[i] == 'e')
7973 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
7975 else if (fmt[i] == 'E')
7977 for (j = 0; j < XVECLEN (*x, i); j++)
7978 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
7983 /* Check whether X contains an UNSPEC_LTREL_BASE.
7984 Return its constant pool symbol if found, NULL_RTX otherwise. */
7986 static rtx
7987 find_ltrel_base (rtx x)
7989 int i, j;
7990 const char *fmt;
7992 if (GET_CODE (x) == UNSPEC
7993 && XINT (x, 1) == UNSPEC_LTREL_BASE)
7994 return XVECEXP (x, 0, 0);
7996 fmt = GET_RTX_FORMAT (GET_CODE (x));
7997 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7999 if (fmt[i] == 'e')
8001 rtx fnd = find_ltrel_base (XEXP (x, i));
8002 if (fnd)
8003 return fnd;
8005 else if (fmt[i] == 'E')
8007 for (j = 0; j < XVECLEN (x, i); j++)
8009 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
8010 if (fnd)
8011 return fnd;
8016 return NULL_RTX;
8019 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
8021 static void
8022 replace_ltrel_base (rtx *x)
8024 int i, j;
8025 const char *fmt;
8027 if (GET_CODE (*x) == UNSPEC
8028 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
8030 *x = XVECEXP (*x, 0, 1);
8031 return;
8034 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8035 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8037 if (fmt[i] == 'e')
8039 replace_ltrel_base (&XEXP (*x, i));
8041 else if (fmt[i] == 'E')
8043 for (j = 0; j < XVECLEN (*x, i); j++)
8044 replace_ltrel_base (&XVECEXP (*x, i, j));
8050 /* We keep a list of constants which we have to add to internal
8051 constant tables in the middle of large functions. */
8053 #define NR_C_MODES 32
8054 machine_mode constant_modes[NR_C_MODES] =
8056 TFmode, TImode, TDmode,
8057 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8058 V4SFmode, V2DFmode, V1TFmode,
8059 DFmode, DImode, DDmode,
8060 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8061 SFmode, SImode, SDmode,
8062 V4QImode, V2HImode, V1SImode, V1SFmode,
8063 HImode,
8064 V2QImode, V1HImode,
8065 QImode,
8066 V1QImode
8069 struct constant
8071 struct constant *next;
8072 rtx value;
8073 rtx_code_label *label;
8076 struct constant_pool
8078 struct constant_pool *next;
8079 rtx_insn *first_insn;
8080 rtx_insn *pool_insn;
8081 bitmap insns;
8082 rtx_insn *emit_pool_after;
8084 struct constant *constants[NR_C_MODES];
8085 struct constant *execute;
8086 rtx_code_label *label;
8087 int size;
8090 /* Allocate new constant_pool structure. */
8092 static struct constant_pool *
8093 s390_alloc_pool (void)
8095 struct constant_pool *pool;
8096 int i;
8098 pool = (struct constant_pool *) xmalloc (sizeof *pool);
8099 pool->next = NULL;
8100 for (i = 0; i < NR_C_MODES; i++)
8101 pool->constants[i] = NULL;
8103 pool->execute = NULL;
8104 pool->label = gen_label_rtx ();
8105 pool->first_insn = NULL;
8106 pool->pool_insn = NULL;
8107 pool->insns = BITMAP_ALLOC (NULL);
8108 pool->size = 0;
8109 pool->emit_pool_after = NULL;
8111 return pool;
8114 /* Create new constant pool covering instructions starting at INSN
8115 and chain it to the end of POOL_LIST. */
8117 static struct constant_pool *
8118 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8120 struct constant_pool *pool, **prev;
8122 pool = s390_alloc_pool ();
8123 pool->first_insn = insn;
8125 for (prev = pool_list; *prev; prev = &(*prev)->next)
8127 *prev = pool;
8129 return pool;
8132 /* End range of instructions covered by POOL at INSN and emit
8133 placeholder insn representing the pool. */
8135 static void
8136 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8138 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8140 if (!insn)
8141 insn = get_last_insn ();
8143 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8144 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8147 /* Add INSN to the list of insns covered by POOL. */
8149 static void
8150 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8152 bitmap_set_bit (pool->insns, INSN_UID (insn));
8155 /* Return pool out of POOL_LIST that covers INSN. */
8157 static struct constant_pool *
8158 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8160 struct constant_pool *pool;
8162 for (pool = pool_list; pool; pool = pool->next)
8163 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8164 break;
8166 return pool;
8169 /* Add constant VAL of mode MODE to the constant pool POOL. */
8171 static void
8172 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8174 struct constant *c;
8175 int i;
8177 for (i = 0; i < NR_C_MODES; i++)
8178 if (constant_modes[i] == mode)
8179 break;
8180 gcc_assert (i != NR_C_MODES);
8182 for (c = pool->constants[i]; c != NULL; c = c->next)
8183 if (rtx_equal_p (val, c->value))
8184 break;
8186 if (c == NULL)
8188 c = (struct constant *) xmalloc (sizeof *c);
8189 c->value = val;
8190 c->label = gen_label_rtx ();
8191 c->next = pool->constants[i];
8192 pool->constants[i] = c;
8193 pool->size += GET_MODE_SIZE (mode);
8197 /* Return an rtx that represents the offset of X from the start of
8198 pool POOL. */
8200 static rtx
8201 s390_pool_offset (struct constant_pool *pool, rtx x)
8203 rtx label;
8205 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8206 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8207 UNSPEC_POOL_OFFSET);
8208 return gen_rtx_CONST (GET_MODE (x), x);
8211 /* Find constant VAL of mode MODE in the constant pool POOL.
8212 Return an RTX describing the distance from the start of
8213 the pool to the location of the new constant. */
8215 static rtx
8216 s390_find_constant (struct constant_pool *pool, rtx val,
8217 machine_mode mode)
8219 struct constant *c;
8220 int i;
8222 for (i = 0; i < NR_C_MODES; i++)
8223 if (constant_modes[i] == mode)
8224 break;
8225 gcc_assert (i != NR_C_MODES);
8227 for (c = pool->constants[i]; c != NULL; c = c->next)
8228 if (rtx_equal_p (val, c->value))
8229 break;
8231 gcc_assert (c);
8233 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8236 /* Check whether INSN is an execute. Return the label_ref to its
8237 execute target template if so, NULL_RTX otherwise. */
8239 static rtx
8240 s390_execute_label (rtx insn)
8242 if (NONJUMP_INSN_P (insn)
8243 && GET_CODE (PATTERN (insn)) == PARALLEL
8244 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8245 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8246 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8248 return NULL_RTX;
8251 /* Add execute target for INSN to the constant pool POOL. */
8253 static void
8254 s390_add_execute (struct constant_pool *pool, rtx insn)
8256 struct constant *c;
8258 for (c = pool->execute; c != NULL; c = c->next)
8259 if (INSN_UID (insn) == INSN_UID (c->value))
8260 break;
8262 if (c == NULL)
8264 c = (struct constant *) xmalloc (sizeof *c);
8265 c->value = insn;
8266 c->label = gen_label_rtx ();
8267 c->next = pool->execute;
8268 pool->execute = c;
8269 pool->size += 6;
8273 /* Find execute target for INSN in the constant pool POOL.
8274 Return an RTX describing the distance from the start of
8275 the pool to the location of the execute target. */
8277 static rtx
8278 s390_find_execute (struct constant_pool *pool, rtx insn)
8280 struct constant *c;
8282 for (c = pool->execute; c != NULL; c = c->next)
8283 if (INSN_UID (insn) == INSN_UID (c->value))
8284 break;
8286 gcc_assert (c);
8288 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8291 /* For an execute INSN, extract the execute target template. */
8293 static rtx
8294 s390_execute_target (rtx insn)
8296 rtx pattern = PATTERN (insn);
8297 gcc_assert (s390_execute_label (insn));
8299 if (XVECLEN (pattern, 0) == 2)
8301 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8303 else
8305 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8306 int i;
8308 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8309 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8311 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8314 return pattern;
8317 /* Indicate that INSN cannot be duplicated. This is the case for
8318 execute insns that carry a unique label. */
8320 static bool
8321 s390_cannot_copy_insn_p (rtx_insn *insn)
8323 rtx label = s390_execute_label (insn);
8324 return label && label != const0_rtx;
8327 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
8328 do not emit the pool base label. */
8330 static void
8331 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8333 struct constant *c;
8334 rtx_insn *insn = pool->pool_insn;
8335 int i;
8337 /* Switch to rodata section. */
8338 if (TARGET_CPU_ZARCH)
8340 insn = emit_insn_after (gen_pool_section_start (), insn);
8341 INSN_ADDRESSES_NEW (insn, -1);
8344 /* Ensure minimum pool alignment. */
8345 if (TARGET_CPU_ZARCH)
8346 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8347 else
8348 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
8349 INSN_ADDRESSES_NEW (insn, -1);
8351 /* Emit pool base label. */
8352 if (!remote_label)
8354 insn = emit_label_after (pool->label, insn);
8355 INSN_ADDRESSES_NEW (insn, -1);
8358 /* Dump constants in descending alignment requirement order,
8359 ensuring proper alignment for every constant. */
8360 for (i = 0; i < NR_C_MODES; i++)
8361 for (c = pool->constants[i]; c; c = c->next)
8363 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
8364 rtx value = copy_rtx (c->value);
8365 if (GET_CODE (value) == CONST
8366 && GET_CODE (XEXP (value, 0)) == UNSPEC
8367 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8368 && XVECLEN (XEXP (value, 0), 0) == 1)
8369 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8371 insn = emit_label_after (c->label, insn);
8372 INSN_ADDRESSES_NEW (insn, -1);
8374 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8375 gen_rtvec (1, value),
8376 UNSPECV_POOL_ENTRY);
8377 insn = emit_insn_after (value, insn);
8378 INSN_ADDRESSES_NEW (insn, -1);
8381 /* Ensure minimum alignment for instructions. */
8382 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8383 INSN_ADDRESSES_NEW (insn, -1);
8385 /* Output in-pool execute template insns. */
8386 for (c = pool->execute; c; c = c->next)
8388 insn = emit_label_after (c->label, insn);
8389 INSN_ADDRESSES_NEW (insn, -1);
8391 insn = emit_insn_after (s390_execute_target (c->value), insn);
8392 INSN_ADDRESSES_NEW (insn, -1);
8395 /* Switch back to previous section. */
8396 if (TARGET_CPU_ZARCH)
8398 insn = emit_insn_after (gen_pool_section_end (), insn);
8399 INSN_ADDRESSES_NEW (insn, -1);
8402 insn = emit_barrier_after (insn);
8403 INSN_ADDRESSES_NEW (insn, -1);
8405 /* Remove placeholder insn. */
8406 remove_insn (pool->pool_insn);
8409 /* Free all memory used by POOL. */
8411 static void
8412 s390_free_pool (struct constant_pool *pool)
8414 struct constant *c, *next;
8415 int i;
8417 for (i = 0; i < NR_C_MODES; i++)
8418 for (c = pool->constants[i]; c; c = next)
8420 next = c->next;
8421 free (c);
8424 for (c = pool->execute; c; c = next)
8426 next = c->next;
8427 free (c);
8430 BITMAP_FREE (pool->insns);
8431 free (pool);
8435 /* Collect main literal pool. Return NULL on overflow. */
8437 static struct constant_pool *
8438 s390_mainpool_start (void)
8440 struct constant_pool *pool;
8441 rtx_insn *insn;
8443 pool = s390_alloc_pool ();
8445 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8447 if (NONJUMP_INSN_P (insn)
8448 && GET_CODE (PATTERN (insn)) == SET
8449 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8450 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8452 /* There might be two main_pool instructions if base_reg
8453 is call-clobbered; one for shrink-wrapped code and one
8454 for the rest. We want to keep the first. */
8455 if (pool->pool_insn)
8457 insn = PREV_INSN (insn);
8458 delete_insn (NEXT_INSN (insn));
8459 continue;
8461 pool->pool_insn = insn;
8464 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8466 s390_add_execute (pool, insn);
8468 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8470 rtx pool_ref = NULL_RTX;
8471 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8472 if (pool_ref)
8474 rtx constant = get_pool_constant (pool_ref);
8475 machine_mode mode = get_pool_mode (pool_ref);
8476 s390_add_constant (pool, constant, mode);
8480 /* If hot/cold partitioning is enabled we have to make sure that
8481 the literal pool is emitted in the same section where the
8482 initialization of the literal pool base pointer takes place.
8483 emit_pool_after is only used in the non-overflow case on non
8484 Z cpus where we can emit the literal pool at the end of the
8485 function body within the text section. */
8486 if (NOTE_P (insn)
8487 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8488 && !pool->emit_pool_after)
8489 pool->emit_pool_after = PREV_INSN (insn);
8492 gcc_assert (pool->pool_insn || pool->size == 0);
8494 if (pool->size >= 4096)
8496 /* We're going to chunkify the pool, so remove the main
8497 pool placeholder insn. */
8498 remove_insn (pool->pool_insn);
8500 s390_free_pool (pool);
8501 pool = NULL;
8504 /* If the functions ends with the section where the literal pool
8505 should be emitted set the marker to its end. */
8506 if (pool && !pool->emit_pool_after)
8507 pool->emit_pool_after = get_last_insn ();
8509 return pool;
8512 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8513 Modify the current function to output the pool constants as well as
8514 the pool register setup instruction. */
8516 static void
8517 s390_mainpool_finish (struct constant_pool *pool)
8519 rtx base_reg = cfun->machine->base_reg;
8521 /* If the pool is empty, we're done. */
8522 if (pool->size == 0)
8524 /* We don't actually need a base register after all. */
8525 cfun->machine->base_reg = NULL_RTX;
8527 if (pool->pool_insn)
8528 remove_insn (pool->pool_insn);
8529 s390_free_pool (pool);
8530 return;
8533 /* We need correct insn addresses. */
8534 shorten_branches (get_insns ());
8536 /* On zSeries, we use a LARL to load the pool register. The pool is
8537 located in the .rodata section, so we emit it after the function. */
8538 if (TARGET_CPU_ZARCH)
8540 rtx set = gen_main_base_64 (base_reg, pool->label);
8541 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8542 INSN_ADDRESSES_NEW (insn, -1);
8543 remove_insn (pool->pool_insn);
8545 insn = get_last_insn ();
8546 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8547 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8549 s390_dump_pool (pool, 0);
8552 /* On S/390, if the total size of the function's code plus literal pool
8553 does not exceed 4096 bytes, we use BASR to set up a function base
8554 pointer, and emit the literal pool at the end of the function. */
8555 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
8556 + pool->size + 8 /* alignment slop */ < 4096)
8558 rtx set = gen_main_base_31_small (base_reg, pool->label);
8559 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8560 INSN_ADDRESSES_NEW (insn, -1);
8561 remove_insn (pool->pool_insn);
8563 insn = emit_label_after (pool->label, insn);
8564 INSN_ADDRESSES_NEW (insn, -1);
8566 /* emit_pool_after will be set by s390_mainpool_start to the
8567 last insn of the section where the literal pool should be
8568 emitted. */
8569 insn = pool->emit_pool_after;
8571 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8572 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8574 s390_dump_pool (pool, 1);
8577 /* Otherwise, we emit an inline literal pool and use BASR to branch
8578 over it, setting up the pool register at the same time. */
8579 else
8581 rtx_code_label *pool_end = gen_label_rtx ();
8583 rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
8584 rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
8585 JUMP_LABEL (insn) = pool_end;
8586 INSN_ADDRESSES_NEW (insn, -1);
8587 remove_insn (pool->pool_insn);
8589 insn = emit_label_after (pool->label, insn);
8590 INSN_ADDRESSES_NEW (insn, -1);
8592 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8593 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8595 insn = emit_label_after (pool_end, pool->pool_insn);
8596 INSN_ADDRESSES_NEW (insn, -1);
8598 s390_dump_pool (pool, 1);
8602 /* Replace all literal pool references. */
8604 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
8606 if (INSN_P (insn))
8607 replace_ltrel_base (&PATTERN (insn));
8609 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8611 rtx addr, pool_ref = NULL_RTX;
8612 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8613 if (pool_ref)
8615 if (s390_execute_label (insn))
8616 addr = s390_find_execute (pool, insn);
8617 else
8618 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
8619 get_pool_mode (pool_ref));
8621 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
8622 INSN_CODE (insn) = -1;
8628 /* Free the pool. */
8629 s390_free_pool (pool);
8632 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8633 We have decided we cannot use this pool, so revert all changes
8634 to the current function that were done by s390_mainpool_start. */
8635 static void
8636 s390_mainpool_cancel (struct constant_pool *pool)
8638 /* We didn't actually change the instruction stream, so simply
8639 free the pool memory. */
8640 s390_free_pool (pool);
8644 /* Chunkify the literal pool. */
8646 #define S390_POOL_CHUNK_MIN 0xc00
8647 #define S390_POOL_CHUNK_MAX 0xe00
8649 static struct constant_pool *
8650 s390_chunkify_start (void)
8652 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
8653 int extra_size = 0;
8654 bitmap far_labels;
8655 rtx pending_ltrel = NULL_RTX;
8656 rtx_insn *insn;
8658 rtx (*gen_reload_base) (rtx, rtx) =
8659 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
8662 /* We need correct insn addresses. */
8664 shorten_branches (get_insns ());
8666 /* Scan all insns and move literals to pool chunks. */
8668 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8670 bool section_switch_p = false;
8672 /* Check for pending LTREL_BASE. */
8673 if (INSN_P (insn))
8675 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
8676 if (ltrel_base)
8678 gcc_assert (ltrel_base == pending_ltrel);
8679 pending_ltrel = NULL_RTX;
8683 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8685 if (!curr_pool)
8686 curr_pool = s390_start_pool (&pool_list, insn);
8688 s390_add_execute (curr_pool, insn);
8689 s390_add_pool_insn (curr_pool, insn);
8691 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8693 rtx pool_ref = NULL_RTX;
8694 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8695 if (pool_ref)
8697 rtx constant = get_pool_constant (pool_ref);
8698 machine_mode mode = get_pool_mode (pool_ref);
8700 if (!curr_pool)
8701 curr_pool = s390_start_pool (&pool_list, insn);
8703 s390_add_constant (curr_pool, constant, mode);
8704 s390_add_pool_insn (curr_pool, insn);
8706 /* Don't split the pool chunk between a LTREL_OFFSET load
8707 and the corresponding LTREL_BASE. */
8708 if (GET_CODE (constant) == CONST
8709 && GET_CODE (XEXP (constant, 0)) == UNSPEC
8710 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
8712 gcc_assert (!pending_ltrel);
8713 pending_ltrel = pool_ref;
8718 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
8720 if (curr_pool)
8721 s390_add_pool_insn (curr_pool, insn);
8722 /* An LTREL_BASE must follow within the same basic block. */
8723 gcc_assert (!pending_ltrel);
8726 if (NOTE_P (insn))
8727 switch (NOTE_KIND (insn))
8729 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
8730 section_switch_p = true;
8731 break;
8732 case NOTE_INSN_VAR_LOCATION:
8733 case NOTE_INSN_CALL_ARG_LOCATION:
8734 continue;
8735 default:
8736 break;
8739 if (!curr_pool
8740 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
8741 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
8742 continue;
8744 if (TARGET_CPU_ZARCH)
8746 if (curr_pool->size < S390_POOL_CHUNK_MAX)
8747 continue;
8749 s390_end_pool (curr_pool, NULL);
8750 curr_pool = NULL;
8752 else
8754 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
8755 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
8756 + extra_size;
8758 /* We will later have to insert base register reload insns.
8759 Those will have an effect on code size, which we need to
8760 consider here. This calculation makes rather pessimistic
8761 worst-case assumptions. */
8762 if (LABEL_P (insn))
8763 extra_size += 6;
8765 if (chunk_size < S390_POOL_CHUNK_MIN
8766 && curr_pool->size < S390_POOL_CHUNK_MIN
8767 && !section_switch_p)
8768 continue;
8770 /* Pool chunks can only be inserted after BARRIERs ... */
8771 if (BARRIER_P (insn))
8773 s390_end_pool (curr_pool, insn);
8774 curr_pool = NULL;
8775 extra_size = 0;
8778 /* ... so if we don't find one in time, create one. */
8779 else if (chunk_size > S390_POOL_CHUNK_MAX
8780 || curr_pool->size > S390_POOL_CHUNK_MAX
8781 || section_switch_p)
8783 rtx_insn *label, *jump, *barrier, *next, *prev;
8785 if (!section_switch_p)
8787 /* We can insert the barrier only after a 'real' insn. */
8788 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
8789 continue;
8790 if (get_attr_length (insn) == 0)
8791 continue;
8792 /* Don't separate LTREL_BASE from the corresponding
8793 LTREL_OFFSET load. */
8794 if (pending_ltrel)
8795 continue;
8796 next = insn;
8799 insn = next;
8800 next = NEXT_INSN (insn);
8802 while (next
8803 && NOTE_P (next)
8804 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
8805 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
8807 else
8809 gcc_assert (!pending_ltrel);
8811 /* The old pool has to end before the section switch
8812 note in order to make it part of the current
8813 section. */
8814 insn = PREV_INSN (insn);
8817 label = gen_label_rtx ();
8818 prev = insn;
8819 if (prev && NOTE_P (prev))
8820 prev = prev_nonnote_insn (prev);
8821 if (prev)
8822 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
8823 INSN_LOCATION (prev));
8824 else
8825 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
8826 barrier = emit_barrier_after (jump);
8827 insn = emit_label_after (label, barrier);
8828 JUMP_LABEL (jump) = label;
8829 LABEL_NUSES (label) = 1;
8831 INSN_ADDRESSES_NEW (jump, -1);
8832 INSN_ADDRESSES_NEW (barrier, -1);
8833 INSN_ADDRESSES_NEW (insn, -1);
8835 s390_end_pool (curr_pool, barrier);
8836 curr_pool = NULL;
8837 extra_size = 0;
8842 if (curr_pool)
8843 s390_end_pool (curr_pool, NULL);
8844 gcc_assert (!pending_ltrel);
8846 /* Find all labels that are branched into
8847 from an insn belonging to a different chunk. */
8849 far_labels = BITMAP_ALLOC (NULL);
8851 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8853 rtx_jump_table_data *table;
8855 /* Labels marked with LABEL_PRESERVE_P can be target
8856 of non-local jumps, so we have to mark them.
8857 The same holds for named labels.
8859 Don't do that, however, if it is the label before
8860 a jump table. */
8862 if (LABEL_P (insn)
8863 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
8865 rtx_insn *vec_insn = NEXT_INSN (insn);
8866 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
8867 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
8869 /* Check potential targets in a table jump (casesi_jump). */
8870 else if (tablejump_p (insn, NULL, &table))
8872 rtx vec_pat = PATTERN (table);
8873 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
8875 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
8877 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
8879 if (s390_find_pool (pool_list, label)
8880 != s390_find_pool (pool_list, insn))
8881 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
8884 /* If we have a direct jump (conditional or unconditional),
8885 check all potential targets. */
8886 else if (JUMP_P (insn))
8888 rtx pat = PATTERN (insn);
8890 if (GET_CODE (pat) == PARALLEL)
8891 pat = XVECEXP (pat, 0, 0);
8893 if (GET_CODE (pat) == SET)
8895 rtx label = JUMP_LABEL (insn);
8896 if (label && !ANY_RETURN_P (label))
8898 if (s390_find_pool (pool_list, label)
8899 != s390_find_pool (pool_list, insn))
8900 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
8906 /* Insert base register reload insns before every pool. */
8908 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
8910 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
8911 curr_pool->label);
8912 rtx_insn *insn = curr_pool->first_insn;
8913 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
8916 /* Insert base register reload insns at every far label. */
8918 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8919 if (LABEL_P (insn)
8920 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
8922 struct constant_pool *pool = s390_find_pool (pool_list, insn);
8923 if (pool)
8925 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
8926 pool->label);
8927 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
8932 BITMAP_FREE (far_labels);
8935 /* Recompute insn addresses. */
8937 init_insn_lengths ();
8938 shorten_branches (get_insns ());
8940 return pool_list;
8943 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
8944 After we have decided to use this list, finish implementing
8945 all changes to the current function as required. */
8947 static void
8948 s390_chunkify_finish (struct constant_pool *pool_list)
8950 struct constant_pool *curr_pool = NULL;
8951 rtx_insn *insn;
8954 /* Replace all literal pool references. */
8956 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8958 if (INSN_P (insn))
8959 replace_ltrel_base (&PATTERN (insn));
8961 curr_pool = s390_find_pool (pool_list, insn);
8962 if (!curr_pool)
8963 continue;
8965 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8967 rtx addr, pool_ref = NULL_RTX;
8968 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8969 if (pool_ref)
8971 if (s390_execute_label (insn))
8972 addr = s390_find_execute (curr_pool, insn);
8973 else
8974 addr = s390_find_constant (curr_pool,
8975 get_pool_constant (pool_ref),
8976 get_pool_mode (pool_ref));
8978 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
8979 INSN_CODE (insn) = -1;
8984 /* Dump out all literal pools. */
8986 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
8987 s390_dump_pool (curr_pool, 0);
8989 /* Free pool list. */
8991 while (pool_list)
8993 struct constant_pool *next = pool_list->next;
8994 s390_free_pool (pool_list);
8995 pool_list = next;
8999 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9000 We have decided we cannot use this list, so revert all changes
9001 to the current function that were done by s390_chunkify_start. */
9003 static void
9004 s390_chunkify_cancel (struct constant_pool *pool_list)
9006 struct constant_pool *curr_pool = NULL;
9007 rtx_insn *insn;
9009 /* Remove all pool placeholder insns. */
9011 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9013 /* Did we insert an extra barrier? Remove it. */
9014 rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
9015 rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
9016 rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
9018 if (jump && JUMP_P (jump)
9019 && barrier && BARRIER_P (barrier)
9020 && label && LABEL_P (label)
9021 && GET_CODE (PATTERN (jump)) == SET
9022 && SET_DEST (PATTERN (jump)) == pc_rtx
9023 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
9024 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
9026 remove_insn (jump);
9027 remove_insn (barrier);
9028 remove_insn (label);
9031 remove_insn (curr_pool->pool_insn);
9034 /* Remove all base register reload insns. */
9036 for (insn = get_insns (); insn; )
9038 rtx_insn *next_insn = NEXT_INSN (insn);
9040 if (NONJUMP_INSN_P (insn)
9041 && GET_CODE (PATTERN (insn)) == SET
9042 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
9043 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
9044 remove_insn (insn);
9046 insn = next_insn;
9049 /* Free pool list. */
9051 while (pool_list)
9053 struct constant_pool *next = pool_list->next;
9054 s390_free_pool (pool_list);
9055 pool_list = next;
9059 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9061 void
9062 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9064 switch (GET_MODE_CLASS (mode))
9066 case MODE_FLOAT:
9067 case MODE_DECIMAL_FLOAT:
9068 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9070 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp), mode, align);
9071 break;
9073 case MODE_INT:
9074 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9075 mark_symbol_refs_as_used (exp);
9076 break;
9078 case MODE_VECTOR_INT:
9079 case MODE_VECTOR_FLOAT:
9081 int i;
9082 machine_mode inner_mode;
9083 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9085 inner_mode = GET_MODE_INNER (GET_MODE (exp));
9086 for (i = 0; i < XVECLEN (exp, 0); i++)
9087 s390_output_pool_entry (XVECEXP (exp, 0, i),
9088 inner_mode,
9089 i == 0
9090 ? align
9091 : GET_MODE_BITSIZE (inner_mode));
9093 break;
9095 default:
9096 gcc_unreachable ();
9101 /* Return an RTL expression representing the value of the return address
9102 for the frame COUNT steps up from the current frame. FRAME is the
9103 frame pointer of that frame. */
9106 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9108 int offset;
9109 rtx addr;
9111 /* Without backchain, we fail for all but the current frame. */
9113 if (!TARGET_BACKCHAIN && count > 0)
9114 return NULL_RTX;
9116 /* For the current frame, we need to make sure the initial
9117 value of RETURN_REGNUM is actually saved. */
9119 if (count == 0)
9121 /* On non-z architectures branch splitting could overwrite r14. */
9122 if (TARGET_CPU_ZARCH)
9123 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9124 else
9126 cfun_frame_layout.save_return_addr_p = true;
9127 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
9131 if (TARGET_PACKED_STACK)
9132 offset = -2 * UNITS_PER_LONG;
9133 else
9134 offset = RETURN_REGNUM * UNITS_PER_LONG;
9136 addr = plus_constant (Pmode, frame, offset);
9137 addr = memory_address (Pmode, addr);
9138 return gen_rtx_MEM (Pmode, addr);
9141 /* Return an RTL expression representing the back chain stored in
9142 the current stack frame. */
9145 s390_back_chain_rtx (void)
9147 rtx chain;
9149 gcc_assert (TARGET_BACKCHAIN);
9151 if (TARGET_PACKED_STACK)
9152 chain = plus_constant (Pmode, stack_pointer_rtx,
9153 STACK_POINTER_OFFSET - UNITS_PER_LONG);
9154 else
9155 chain = stack_pointer_rtx;
9157 chain = gen_rtx_MEM (Pmode, chain);
9158 return chain;
9161 /* Find first call clobbered register unused in a function.
9162 This could be used as base register in a leaf function
9163 or for holding the return address before epilogue. */
9165 static int
9166 find_unused_clobbered_reg (void)
9168 int i;
9169 for (i = 0; i < 6; i++)
9170 if (!df_regs_ever_live_p (i))
9171 return i;
9172 return 0;
9176 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9177 clobbered hard regs in SETREG. */
9179 static void
9180 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9182 char *regs_ever_clobbered = (char *)data;
9183 unsigned int i, regno;
9184 machine_mode mode = GET_MODE (setreg);
9186 if (GET_CODE (setreg) == SUBREG)
9188 rtx inner = SUBREG_REG (setreg);
9189 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9190 return;
9191 regno = subreg_regno (setreg);
9193 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9194 regno = REGNO (setreg);
9195 else
9196 return;
9198 for (i = regno;
9199 i < regno + HARD_REGNO_NREGS (regno, mode);
9200 i++)
9201 regs_ever_clobbered[i] = 1;
9204 /* Walks through all basic blocks of the current function looking
9205 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9206 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9207 each of those regs. */
9209 static void
9210 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9212 basic_block cur_bb;
9213 rtx_insn *cur_insn;
9214 unsigned int i;
9216 memset (regs_ever_clobbered, 0, 32);
9218 /* For non-leaf functions we have to consider all call clobbered regs to be
9219 clobbered. */
9220 if (!crtl->is_leaf)
9222 for (i = 0; i < 32; i++)
9223 regs_ever_clobbered[i] = call_really_used_regs[i];
9226 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9227 this work is done by liveness analysis (mark_regs_live_at_end).
9228 Special care is needed for functions containing landing pads. Landing pads
9229 may use the eh registers, but the code which sets these registers is not
9230 contained in that function. Hence s390_regs_ever_clobbered is not able to
9231 deal with this automatically. */
9232 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9233 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9234 if (crtl->calls_eh_return
9235 || (cfun->machine->has_landing_pad_p
9236 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9237 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9239 /* For nonlocal gotos all call-saved registers have to be saved.
9240 This flag is also set for the unwinding code in libgcc.
9241 See expand_builtin_unwind_init. For regs_ever_live this is done by
9242 reload. */
9243 if (crtl->saves_all_registers)
9244 for (i = 0; i < 32; i++)
9245 if (!call_really_used_regs[i])
9246 regs_ever_clobbered[i] = 1;
9248 FOR_EACH_BB_FN (cur_bb, cfun)
9250 FOR_BB_INSNS (cur_bb, cur_insn)
9252 rtx pat;
9254 if (!INSN_P (cur_insn))
9255 continue;
9257 pat = PATTERN (cur_insn);
9259 /* Ignore GPR restore insns. */
9260 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9262 if (GET_CODE (pat) == SET
9263 && GENERAL_REG_P (SET_DEST (pat)))
9265 /* lgdr */
9266 if (GET_MODE (SET_SRC (pat)) == DImode
9267 && FP_REG_P (SET_SRC (pat)))
9268 continue;
9270 /* l / lg */
9271 if (GET_CODE (SET_SRC (pat)) == MEM)
9272 continue;
9275 /* lm / lmg */
9276 if (GET_CODE (pat) == PARALLEL
9277 && load_multiple_operation (pat, VOIDmode))
9278 continue;
9281 note_stores (pat,
9282 s390_reg_clobbered_rtx,
9283 regs_ever_clobbered);
9288 /* Determine the frame area which actually has to be accessed
9289 in the function epilogue. The values are stored at the
9290 given pointers AREA_BOTTOM (address of the lowest used stack
9291 address) and AREA_TOP (address of the first item which does
9292 not belong to the stack frame). */
9294 static void
9295 s390_frame_area (int *area_bottom, int *area_top)
9297 int b, t;
9299 b = INT_MAX;
9300 t = INT_MIN;
9302 if (cfun_frame_layout.first_restore_gpr != -1)
9304 b = (cfun_frame_layout.gprs_offset
9305 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9306 t = b + (cfun_frame_layout.last_restore_gpr
9307 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9310 if (TARGET_64BIT && cfun_save_high_fprs_p)
9312 b = MIN (b, cfun_frame_layout.f8_offset);
9313 t = MAX (t, (cfun_frame_layout.f8_offset
9314 + cfun_frame_layout.high_fprs * 8));
9317 if (!TARGET_64BIT)
9319 if (cfun_fpr_save_p (FPR4_REGNUM))
9321 b = MIN (b, cfun_frame_layout.f4_offset);
9322 t = MAX (t, cfun_frame_layout.f4_offset + 8);
9324 if (cfun_fpr_save_p (FPR6_REGNUM))
9326 b = MIN (b, cfun_frame_layout.f4_offset + 8);
9327 t = MAX (t, cfun_frame_layout.f4_offset + 16);
9330 *area_bottom = b;
9331 *area_top = t;
9333 /* Update gpr_save_slots in the frame layout trying to make use of
9334 FPRs as GPR save slots.
9335 This is a helper routine of s390_register_info. */
9337 static void
9338 s390_register_info_gprtofpr ()
9340 int save_reg_slot = FPR0_REGNUM;
9341 int i, j;
9343 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9344 return;
9346 for (i = 15; i >= 6; i--)
9348 if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9349 continue;
9351 /* Advance to the next FP register which can be used as a
9352 GPR save slot. */
9353 while ((!call_really_used_regs[save_reg_slot]
9354 || df_regs_ever_live_p (save_reg_slot)
9355 || cfun_fpr_save_p (save_reg_slot))
9356 && FP_REGNO_P (save_reg_slot))
9357 save_reg_slot++;
9358 if (!FP_REGNO_P (save_reg_slot))
9360 /* We only want to use ldgr/lgdr if we can get rid of
9361 stm/lm entirely. So undo the gpr slot allocation in
9362 case we ran out of FPR save slots. */
9363 for (j = 6; j <= 15; j++)
9364 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9365 cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9366 break;
9368 cfun_gpr_save_slot (i) = save_reg_slot++;
9372 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9373 stdarg.
9374 This is a helper routine for s390_register_info. */
9376 static void
9377 s390_register_info_stdarg_fpr ()
9379 int i;
9380 int min_fpr;
9381 int max_fpr;
9383 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9384 f0-f4 for 64 bit. */
9385 if (!cfun->stdarg
9386 || !TARGET_HARD_FLOAT
9387 || !cfun->va_list_fpr_size
9388 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9389 return;
9391 min_fpr = crtl->args.info.fprs;
9392 max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9393 if (max_fpr >= FP_ARG_NUM_REG)
9394 max_fpr = FP_ARG_NUM_REG - 1;
9396 /* FPR argument regs start at f0. */
9397 min_fpr += FPR0_REGNUM;
9398 max_fpr += FPR0_REGNUM;
9400 for (i = min_fpr; i <= max_fpr; i++)
9401 cfun_set_fpr_save (i);
9404 /* Reserve the GPR save slots for GPRs which need to be saved due to
9405 stdarg.
9406 This is a helper routine for s390_register_info. */
9408 static void
9409 s390_register_info_stdarg_gpr ()
9411 int i;
9412 int min_gpr;
9413 int max_gpr;
9415 if (!cfun->stdarg
9416 || !cfun->va_list_gpr_size
9417 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9418 return;
9420 min_gpr = crtl->args.info.gprs;
9421 max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9422 if (max_gpr >= GP_ARG_NUM_REG)
9423 max_gpr = GP_ARG_NUM_REG - 1;
9425 /* GPR argument regs start at r2. */
9426 min_gpr += GPR2_REGNUM;
9427 max_gpr += GPR2_REGNUM;
9429 /* If r6 was supposed to be saved into an FPR and now needs to go to
9430 the stack for vararg we have to adjust the restore range to make
9431 sure that the restore is done from stack as well. */
9432 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9433 && min_gpr <= GPR6_REGNUM
9434 && max_gpr >= GPR6_REGNUM)
9436 if (cfun_frame_layout.first_restore_gpr == -1
9437 || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9438 cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9439 if (cfun_frame_layout.last_restore_gpr == -1
9440 || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9441 cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9444 if (cfun_frame_layout.first_save_gpr == -1
9445 || cfun_frame_layout.first_save_gpr > min_gpr)
9446 cfun_frame_layout.first_save_gpr = min_gpr;
9448 if (cfun_frame_layout.last_save_gpr == -1
9449 || cfun_frame_layout.last_save_gpr < max_gpr)
9450 cfun_frame_layout.last_save_gpr = max_gpr;
9452 for (i = min_gpr; i <= max_gpr; i++)
9453 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9456 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9457 prologue and epilogue. */
9459 static void
9460 s390_register_info_set_ranges ()
9462 int i, j;
9464 /* Find the first and the last save slot supposed to use the stack
9465 to set the restore range.
9466 Vararg regs might be marked as save to stack but only the
9467 call-saved regs really need restoring (i.e. r6). This code
9468 assumes that the vararg regs have not yet been recorded in
9469 cfun_gpr_save_slot. */
9470 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
9471 for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
9472 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9473 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9474 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9475 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9478 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9479 for registers which need to be saved in function prologue.
9480 This function can be used until the insns emitted for save/restore
9481 of the regs are visible in the RTL stream. */
9483 static void
9484 s390_register_info ()
9486 int i;
9487 char clobbered_regs[32];
9489 gcc_assert (!epilogue_completed);
9491 if (reload_completed)
9492 /* After reload we rely on our own routine to determine which
9493 registers need saving. */
9494 s390_regs_ever_clobbered (clobbered_regs);
9495 else
9496 /* During reload we use regs_ever_live as a base since reload
9497 does changes in there which we otherwise would not be aware
9498 of. */
9499 for (i = 0; i < 32; i++)
9500 clobbered_regs[i] = df_regs_ever_live_p (i);
9502 for (i = 0; i < 32; i++)
9503 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9505 /* Mark the call-saved FPRs which need to be saved.
9506 This needs to be done before checking the special GPRs since the
9507 stack pointer usage depends on whether high FPRs have to be saved
9508 or not. */
9509 cfun_frame_layout.fpr_bitmap = 0;
9510 cfun_frame_layout.high_fprs = 0;
9511 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9512 if (clobbered_regs[i] && !call_really_used_regs[i])
9514 cfun_set_fpr_save (i);
9515 if (i >= FPR8_REGNUM)
9516 cfun_frame_layout.high_fprs++;
9519 /* Register 12 is used for GOT address, but also as temp in prologue
9520 for split-stack stdarg functions (unless r14 is available). */
9521 clobbered_regs[12]
9522 |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
9523 || (flag_split_stack && cfun->stdarg
9524 && (crtl->is_leaf || TARGET_TPF_PROFILING
9525 || has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
9527 clobbered_regs[BASE_REGNUM]
9528 |= (cfun->machine->base_reg
9529 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9531 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9532 |= !!frame_pointer_needed;
9534 /* On pre z900 machines this might take until machine dependent
9535 reorg to decide.
9536 save_return_addr_p will only be set on non-zarch machines so
9537 there is no risk that r14 goes into an FPR instead of a stack
9538 slot. */
9539 clobbered_regs[RETURN_REGNUM]
9540 |= (!crtl->is_leaf
9541 || TARGET_TPF_PROFILING
9542 || cfun->machine->split_branches_pending_p
9543 || cfun_frame_layout.save_return_addr_p
9544 || crtl->calls_eh_return);
9546 clobbered_regs[STACK_POINTER_REGNUM]
9547 |= (!crtl->is_leaf
9548 || TARGET_TPF_PROFILING
9549 || cfun_save_high_fprs_p
9550 || get_frame_size () > 0
9551 || (reload_completed && cfun_frame_layout.frame_size > 0)
9552 || cfun->calls_alloca);
9554 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
9556 for (i = 6; i < 16; i++)
9557 if (clobbered_regs[i])
9558 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9560 s390_register_info_stdarg_fpr ();
9561 s390_register_info_gprtofpr ();
9562 s390_register_info_set_ranges ();
9563 /* stdarg functions might need to save GPRs 2 to 6. This might
9564 override the GPR->FPR save decision made by
9565 s390_register_info_gprtofpr for r6 since vararg regs must go to
9566 the stack. */
9567 s390_register_info_stdarg_gpr ();
9570 /* This function is called by s390_optimize_prologue in order to get
9571 rid of unnecessary GPR save/restore instructions. The register info
9572 for the GPRs is re-computed and the ranges are re-calculated. */
9574 static void
9575 s390_optimize_register_info ()
9577 char clobbered_regs[32];
9578 int i;
9580 gcc_assert (epilogue_completed);
9581 gcc_assert (!cfun->machine->split_branches_pending_p);
9583 s390_regs_ever_clobbered (clobbered_regs);
9585 for (i = 0; i < 32; i++)
9586 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9588 /* There is still special treatment needed for cases invisible to
9589 s390_regs_ever_clobbered. */
9590 clobbered_regs[RETURN_REGNUM]
9591 |= (TARGET_TPF_PROFILING
9592 /* When expanding builtin_return_addr in ESA mode we do not
9593 know whether r14 will later be needed as scratch reg when
9594 doing branch splitting. So the builtin always accesses the
9595 r14 save slot and we need to stick to the save/restore
9596 decision for r14 even if it turns out that it didn't get
9597 clobbered. */
9598 || cfun_frame_layout.save_return_addr_p
9599 || crtl->calls_eh_return);
9601 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
9603 for (i = 6; i < 16; i++)
9604 if (!clobbered_regs[i])
9605 cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
9607 s390_register_info_set_ranges ();
9608 s390_register_info_stdarg_gpr ();
9611 /* Fill cfun->machine with info about frame of current function. */
9613 static void
9614 s390_frame_info (void)
9616 HOST_WIDE_INT lowest_offset;
9618 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
9619 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
9621 /* The va_arg builtin uses a constant distance of 16 *
9622 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9623 pointer. So even if we are going to save the stack pointer in an
9624 FPR we need the stack space in order to keep the offsets
9625 correct. */
9626 if (cfun->stdarg && cfun_save_arg_fprs_p)
9628 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9630 if (cfun_frame_layout.first_save_gpr_slot == -1)
9631 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
9634 cfun_frame_layout.frame_size = get_frame_size ();
9635 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
9636 fatal_error (input_location,
9637 "total size of local variables exceeds architecture limit");
9639 if (!TARGET_PACKED_STACK)
9641 /* Fixed stack layout. */
9642 cfun_frame_layout.backchain_offset = 0;
9643 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
9644 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
9645 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
9646 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
9647 * UNITS_PER_LONG);
9649 else if (TARGET_BACKCHAIN)
9651 /* Kernel stack layout - packed stack, backchain, no float */
9652 gcc_assert (TARGET_SOFT_FLOAT);
9653 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
9654 - UNITS_PER_LONG);
9656 /* The distance between the backchain and the return address
9657 save slot must not change. So we always need a slot for the
9658 stack pointer which resides in between. */
9659 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9661 cfun_frame_layout.gprs_offset
9662 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
9664 /* FPRs will not be saved. Nevertheless pick sane values to
9665 keep area calculations valid. */
9666 cfun_frame_layout.f0_offset =
9667 cfun_frame_layout.f4_offset =
9668 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
9670 else
9672 int num_fprs;
9674 /* Packed stack layout without backchain. */
9676 /* With stdarg FPRs need their dedicated slots. */
9677 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
9678 : (cfun_fpr_save_p (FPR4_REGNUM) +
9679 cfun_fpr_save_p (FPR6_REGNUM)));
9680 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
9682 num_fprs = (cfun->stdarg ? 2
9683 : (cfun_fpr_save_p (FPR0_REGNUM)
9684 + cfun_fpr_save_p (FPR2_REGNUM)));
9685 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
9687 cfun_frame_layout.gprs_offset
9688 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
9690 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
9691 - cfun_frame_layout.high_fprs * 8);
9694 if (cfun_save_high_fprs_p)
9695 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
9697 if (!crtl->is_leaf)
9698 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
9700 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
9701 sized area at the bottom of the stack. This is required also for
9702 leaf functions. When GCC generates a local stack reference it
9703 will always add STACK_POINTER_OFFSET to all these references. */
9704 if (crtl->is_leaf
9705 && !TARGET_TPF_PROFILING
9706 && cfun_frame_layout.frame_size == 0
9707 && !cfun->calls_alloca)
9708 return;
9710 /* Calculate the number of bytes we have used in our own register
9711 save area. With the packed stack layout we can re-use the
9712 remaining bytes for normal stack elements. */
9714 if (TARGET_PACKED_STACK)
9715 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
9716 cfun_frame_layout.f4_offset),
9717 cfun_frame_layout.gprs_offset);
9718 else
9719 lowest_offset = 0;
9721 if (TARGET_BACKCHAIN)
9722 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
9724 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
9726 /* If under 31 bit an odd number of gprs has to be saved we have to
9727 adjust the frame size to sustain 8 byte alignment of stack
9728 frames. */
9729 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
9730 STACK_BOUNDARY / BITS_PER_UNIT - 1)
9731 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
9734 /* Generate frame layout. Fills in register and frame data for the current
9735 function in cfun->machine. This routine can be called multiple times;
9736 it will re-do the complete frame layout every time. */
9738 static void
9739 s390_init_frame_layout (void)
9741 HOST_WIDE_INT frame_size;
9742 int base_used;
9744 /* After LRA the frame layout is supposed to be read-only and should
9745 not be re-computed. */
9746 if (reload_completed)
9747 return;
9749 /* On S/390 machines, we may need to perform branch splitting, which
9750 will require both base and return address register. We have no
9751 choice but to assume we're going to need them until right at the
9752 end of the machine dependent reorg phase. */
9753 if (!TARGET_CPU_ZARCH)
9754 cfun->machine->split_branches_pending_p = true;
9758 frame_size = cfun_frame_layout.frame_size;
9760 /* Try to predict whether we'll need the base register. */
9761 base_used = cfun->machine->split_branches_pending_p
9762 || crtl->uses_const_pool
9763 || (!DISP_IN_RANGE (frame_size)
9764 && !CONST_OK_FOR_K (frame_size));
9766 /* Decide which register to use as literal pool base. In small
9767 leaf functions, try to use an unused call-clobbered register
9768 as base register to avoid save/restore overhead. */
9769 if (!base_used)
9770 cfun->machine->base_reg = NULL_RTX;
9771 else
9773 int br = 0;
9775 if (crtl->is_leaf)
9776 /* Prefer r5 (most likely to be free). */
9777 for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
9779 cfun->machine->base_reg =
9780 gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
9783 s390_register_info ();
9784 s390_frame_info ();
9786 while (frame_size != cfun_frame_layout.frame_size);
9789 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
9790 the TX is nonescaping. A transaction is considered escaping if
9791 there is at least one path from tbegin returning CC0 to the
9792 function exit block without an tend.
9794 The check so far has some limitations:
9795 - only single tbegin/tend BBs are supported
9796 - the first cond jump after tbegin must separate the CC0 path from ~CC0
9797 - when CC is copied to a GPR and the CC0 check is done with the GPR
9798 this is not supported
9801 static void
9802 s390_optimize_nonescaping_tx (void)
9804 const unsigned int CC0 = 1 << 3;
9805 basic_block tbegin_bb = NULL;
9806 basic_block tend_bb = NULL;
9807 basic_block bb;
9808 rtx_insn *insn;
9809 bool result = true;
9810 int bb_index;
9811 rtx_insn *tbegin_insn = NULL;
9813 if (!cfun->machine->tbegin_p)
9814 return;
9816 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
9818 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
9820 if (!bb)
9821 continue;
9823 FOR_BB_INSNS (bb, insn)
9825 rtx ite, cc, pat, target;
9826 unsigned HOST_WIDE_INT mask;
9828 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
9829 continue;
9831 pat = PATTERN (insn);
9833 if (GET_CODE (pat) == PARALLEL)
9834 pat = XVECEXP (pat, 0, 0);
9836 if (GET_CODE (pat) != SET
9837 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
9838 continue;
9840 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
9842 rtx_insn *tmp;
9844 tbegin_insn = insn;
9846 /* Just return if the tbegin doesn't have clobbers. */
9847 if (GET_CODE (PATTERN (insn)) != PARALLEL)
9848 return;
9850 if (tbegin_bb != NULL)
9851 return;
9853 /* Find the next conditional jump. */
9854 for (tmp = NEXT_INSN (insn);
9855 tmp != NULL_RTX;
9856 tmp = NEXT_INSN (tmp))
9858 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
9859 return;
9860 if (!JUMP_P (tmp))
9861 continue;
9863 ite = SET_SRC (PATTERN (tmp));
9864 if (GET_CODE (ite) != IF_THEN_ELSE)
9865 continue;
9867 cc = XEXP (XEXP (ite, 0), 0);
9868 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
9869 || GET_MODE (cc) != CCRAWmode
9870 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
9871 return;
9873 if (bb->succs->length () != 2)
9874 return;
9876 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
9877 if (GET_CODE (XEXP (ite, 0)) == NE)
9878 mask ^= 0xf;
9880 if (mask == CC0)
9881 target = XEXP (ite, 1);
9882 else if (mask == (CC0 ^ 0xf))
9883 target = XEXP (ite, 2);
9884 else
9885 return;
9888 edge_iterator ei;
9889 edge e1, e2;
9891 ei = ei_start (bb->succs);
9892 e1 = ei_safe_edge (ei);
9893 ei_next (&ei);
9894 e2 = ei_safe_edge (ei);
9896 if (e2->flags & EDGE_FALLTHRU)
9898 e2 = e1;
9899 e1 = ei_safe_edge (ei);
9902 if (!(e1->flags & EDGE_FALLTHRU))
9903 return;
9905 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
9907 if (tmp == BB_END (bb))
9908 break;
9912 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
9914 if (tend_bb != NULL)
9915 return;
9916 tend_bb = bb;
9921 /* Either we successfully remove the FPR clobbers here or we are not
9922 able to do anything for this TX. Both cases don't qualify for
9923 another look. */
9924 cfun->machine->tbegin_p = false;
9926 if (tbegin_bb == NULL || tend_bb == NULL)
9927 return;
9929 calculate_dominance_info (CDI_POST_DOMINATORS);
9930 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
9931 free_dominance_info (CDI_POST_DOMINATORS);
9933 if (!result)
9934 return;
9936 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
9937 gen_rtvec (2,
9938 XVECEXP (PATTERN (tbegin_insn), 0, 0),
9939 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
9940 INSN_CODE (tbegin_insn) = -1;
9941 df_insn_rescan (tbegin_insn);
9943 return;
9946 /* Return true if it is legal to put a value with MODE into REGNO. */
9948 bool
9949 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
9951 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
9952 return false;
9954 switch (REGNO_REG_CLASS (regno))
9956 case VEC_REGS:
9957 return ((GET_MODE_CLASS (mode) == MODE_INT
9958 && s390_class_max_nregs (VEC_REGS, mode) == 1)
9959 || mode == DFmode
9960 || s390_vector_mode_supported_p (mode));
9961 break;
9962 case FP_REGS:
9963 if (TARGET_VX
9964 && ((GET_MODE_CLASS (mode) == MODE_INT
9965 && s390_class_max_nregs (FP_REGS, mode) == 1)
9966 || mode == DFmode
9967 || s390_vector_mode_supported_p (mode)))
9968 return true;
9970 if (REGNO_PAIR_OK (regno, mode))
9972 if (mode == SImode || mode == DImode)
9973 return true;
9975 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
9976 return true;
9978 break;
9979 case ADDR_REGS:
9980 if (FRAME_REGNO_P (regno) && mode == Pmode)
9981 return true;
9983 /* fallthrough */
9984 case GENERAL_REGS:
9985 if (REGNO_PAIR_OK (regno, mode))
9987 if (TARGET_ZARCH
9988 || (mode != TFmode && mode != TCmode && mode != TDmode))
9989 return true;
9991 break;
9992 case CC_REGS:
9993 if (GET_MODE_CLASS (mode) == MODE_CC)
9994 return true;
9995 break;
9996 case ACCESS_REGS:
9997 if (REGNO_PAIR_OK (regno, mode))
9999 if (mode == SImode || mode == Pmode)
10000 return true;
10002 break;
10003 default:
10004 return false;
10007 return false;
10010 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10012 bool
10013 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10015 /* Once we've decided upon a register to use as base register, it must
10016 no longer be used for any other purpose. */
10017 if (cfun->machine->base_reg)
10018 if (REGNO (cfun->machine->base_reg) == old_reg
10019 || REGNO (cfun->machine->base_reg) == new_reg)
10020 return false;
10022 /* Prevent regrename from using call-saved regs which haven't
10023 actually been saved. This is necessary since regrename assumes
10024 the backend save/restore decisions are based on
10025 df_regs_ever_live. Since we have our own routine we have to tell
10026 regrename manually about it. */
10027 if (GENERAL_REGNO_P (new_reg)
10028 && !call_really_used_regs[new_reg]
10029 && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10030 return false;
10032 return true;
10035 /* Return nonzero if register REGNO can be used as a scratch register
10036 in peephole2. */
10038 static bool
10039 s390_hard_regno_scratch_ok (unsigned int regno)
10041 /* See s390_hard_regno_rename_ok. */
10042 if (GENERAL_REGNO_P (regno)
10043 && !call_really_used_regs[regno]
10044 && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10045 return false;
10047 return true;
10050 /* Maximum number of registers to represent a value of mode MODE
10051 in a register of class RCLASS. */
10054 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10056 int reg_size;
10057 bool reg_pair_required_p = false;
10059 switch (rclass)
10061 case FP_REGS:
10062 case VEC_REGS:
10063 reg_size = TARGET_VX ? 16 : 8;
10065 /* TF and TD modes would fit into a VR but we put them into a
10066 register pair since we do not have 128bit FP instructions on
10067 full VRs. */
10068 if (TARGET_VX
10069 && SCALAR_FLOAT_MODE_P (mode)
10070 && GET_MODE_SIZE (mode) >= 16)
10071 reg_pair_required_p = true;
10073 /* Even if complex types would fit into a single FPR/VR we force
10074 them into a register pair to deal with the parts more easily.
10075 (FIXME: What about complex ints?) */
10076 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10077 reg_pair_required_p = true;
10078 break;
10079 case ACCESS_REGS:
10080 reg_size = 4;
10081 break;
10082 default:
10083 reg_size = UNITS_PER_WORD;
10084 break;
10087 if (reg_pair_required_p)
10088 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10090 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10093 /* Return TRUE if changing mode from FROM to TO should not be allowed
10094 for register class CLASS. */
10097 s390_cannot_change_mode_class (machine_mode from_mode,
10098 machine_mode to_mode,
10099 enum reg_class rclass)
10101 machine_mode small_mode;
10102 machine_mode big_mode;
10104 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10105 return 0;
10107 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10109 small_mode = from_mode;
10110 big_mode = to_mode;
10112 else
10114 small_mode = to_mode;
10115 big_mode = from_mode;
10118 /* Values residing in VRs are little-endian style. All modes are
10119 placed left-aligned in an VR. This means that we cannot allow
10120 switching between modes with differing sizes. Also if the vector
10121 facility is available we still place TFmode values in VR register
10122 pairs, since the only instructions we have operating on TFmodes
10123 only deal with register pairs. Therefore we have to allow DFmode
10124 subregs of TFmodes to enable the TFmode splitters. */
10125 if (reg_classes_intersect_p (VEC_REGS, rclass)
10126 && (GET_MODE_SIZE (small_mode) < 8
10127 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10128 return 1;
10130 /* Likewise for access registers, since they have only half the
10131 word size on 64-bit. */
10132 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10133 return 1;
10135 return 0;
10138 /* Return true if we use LRA instead of reload pass. */
10139 static bool
10140 s390_lra_p (void)
10142 return s390_lra_flag;
10145 /* Return true if register FROM can be eliminated via register TO. */
10147 static bool
10148 s390_can_eliminate (const int from, const int to)
10150 /* On zSeries machines, we have not marked the base register as fixed.
10151 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10152 If a function requires the base register, we say here that this
10153 elimination cannot be performed. This will cause reload to free
10154 up the base register (as if it were fixed). On the other hand,
10155 if the current function does *not* require the base register, we
10156 say here the elimination succeeds, which in turn allows reload
10157 to allocate the base register for any other purpose. */
10158 if (from == BASE_REGNUM && to == BASE_REGNUM)
10160 if (TARGET_CPU_ZARCH)
10162 s390_init_frame_layout ();
10163 return cfun->machine->base_reg == NULL_RTX;
10166 return false;
10169 /* Everything else must point into the stack frame. */
10170 gcc_assert (to == STACK_POINTER_REGNUM
10171 || to == HARD_FRAME_POINTER_REGNUM);
10173 gcc_assert (from == FRAME_POINTER_REGNUM
10174 || from == ARG_POINTER_REGNUM
10175 || from == RETURN_ADDRESS_POINTER_REGNUM);
10177 /* Make sure we actually saved the return address. */
10178 if (from == RETURN_ADDRESS_POINTER_REGNUM)
10179 if (!crtl->calls_eh_return
10180 && !cfun->stdarg
10181 && !cfun_frame_layout.save_return_addr_p)
10182 return false;
10184 return true;
10187 /* Return offset between register FROM and TO initially after prolog. */
10189 HOST_WIDE_INT
10190 s390_initial_elimination_offset (int from, int to)
10192 HOST_WIDE_INT offset;
10194 /* ??? Why are we called for non-eliminable pairs? */
10195 if (!s390_can_eliminate (from, to))
10196 return 0;
10198 switch (from)
10200 case FRAME_POINTER_REGNUM:
10201 offset = (get_frame_size()
10202 + STACK_POINTER_OFFSET
10203 + crtl->outgoing_args_size);
10204 break;
10206 case ARG_POINTER_REGNUM:
10207 s390_init_frame_layout ();
10208 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10209 break;
10211 case RETURN_ADDRESS_POINTER_REGNUM:
10212 s390_init_frame_layout ();
10214 if (cfun_frame_layout.first_save_gpr_slot == -1)
10216 /* If it turns out that for stdarg nothing went into the reg
10217 save area we also do not need the return address
10218 pointer. */
10219 if (cfun->stdarg && !cfun_save_arg_fprs_p)
10220 return 0;
10222 gcc_unreachable ();
10225 /* In order to make the following work it is not necessary for
10226 r14 to have a save slot. It is sufficient if one other GPR
10227 got one. Since the GPRs are always stored without gaps we
10228 are able to calculate where the r14 save slot would
10229 reside. */
10230 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10231 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10232 UNITS_PER_LONG);
10233 break;
10235 case BASE_REGNUM:
10236 offset = 0;
10237 break;
10239 default:
10240 gcc_unreachable ();
10243 return offset;
10246 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10247 to register BASE. Return generated insn. */
10249 static rtx
10250 save_fpr (rtx base, int offset, int regnum)
10252 rtx addr;
10253 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10255 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10256 set_mem_alias_set (addr, get_varargs_alias_set ());
10257 else
10258 set_mem_alias_set (addr, get_frame_alias_set ());
10260 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10263 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10264 to register BASE. Return generated insn. */
10266 static rtx
10267 restore_fpr (rtx base, int offset, int regnum)
10269 rtx addr;
10270 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10271 set_mem_alias_set (addr, get_frame_alias_set ());
10273 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10276 /* Return true if REGNO is a global register, but not one
10277 of the special ones that need to be saved/restored in anyway. */
10279 static inline bool
10280 global_not_special_regno_p (int regno)
10282 return (global_regs[regno]
10283 /* These registers are special and need to be
10284 restored in any case. */
10285 && !(regno == STACK_POINTER_REGNUM
10286 || regno == RETURN_REGNUM
10287 || regno == BASE_REGNUM
10288 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
10291 /* Generate insn to save registers FIRST to LAST into
10292 the register save area located at offset OFFSET
10293 relative to register BASE. */
10295 static rtx
10296 save_gprs (rtx base, int offset, int first, int last)
10298 rtx addr, insn, note;
10299 int i;
10301 addr = plus_constant (Pmode, base, offset);
10302 addr = gen_rtx_MEM (Pmode, addr);
10304 set_mem_alias_set (addr, get_frame_alias_set ());
10306 /* Special-case single register. */
10307 if (first == last)
10309 if (TARGET_64BIT)
10310 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10311 else
10312 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10314 if (!global_not_special_regno_p (first))
10315 RTX_FRAME_RELATED_P (insn) = 1;
10316 return insn;
10320 insn = gen_store_multiple (addr,
10321 gen_rtx_REG (Pmode, first),
10322 GEN_INT (last - first + 1));
10324 if (first <= 6 && cfun->stdarg)
10325 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10327 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10329 if (first + i <= 6)
10330 set_mem_alias_set (mem, get_varargs_alias_set ());
10333 /* We need to set the FRAME_RELATED flag on all SETs
10334 inside the store-multiple pattern.
10336 However, we must not emit DWARF records for registers 2..5
10337 if they are stored for use by variable arguments ...
10339 ??? Unfortunately, it is not enough to simply not the
10340 FRAME_RELATED flags for those SETs, because the first SET
10341 of the PARALLEL is always treated as if it had the flag
10342 set, even if it does not. Therefore we emit a new pattern
10343 without those registers as REG_FRAME_RELATED_EXPR note. */
10345 if (first >= 6 && !global_not_special_regno_p (first))
10347 rtx pat = PATTERN (insn);
10349 for (i = 0; i < XVECLEN (pat, 0); i++)
10350 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10351 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10352 0, i)))))
10353 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10355 RTX_FRAME_RELATED_P (insn) = 1;
10357 else if (last >= 6)
10359 int start;
10361 for (start = first >= 6 ? first : 6; start <= last; start++)
10362 if (!global_not_special_regno_p (start))
10363 break;
10365 if (start > last)
10366 return insn;
10368 addr = plus_constant (Pmode, base,
10369 offset + (start - first) * UNITS_PER_LONG);
10371 if (start == last)
10373 if (TARGET_64BIT)
10374 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10375 gen_rtx_REG (Pmode, start));
10376 else
10377 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10378 gen_rtx_REG (Pmode, start));
10379 note = PATTERN (note);
10381 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10382 RTX_FRAME_RELATED_P (insn) = 1;
10384 return insn;
10387 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10388 gen_rtx_REG (Pmode, start),
10389 GEN_INT (last - start + 1));
10390 note = PATTERN (note);
10392 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10394 for (i = 0; i < XVECLEN (note, 0); i++)
10395 if (GET_CODE (XVECEXP (note, 0, i)) == SET
10396 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10397 0, i)))))
10398 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10400 RTX_FRAME_RELATED_P (insn) = 1;
10403 return insn;
10406 /* Generate insn to restore registers FIRST to LAST from
10407 the register save area located at offset OFFSET
10408 relative to register BASE. */
10410 static rtx
10411 restore_gprs (rtx base, int offset, int first, int last)
10413 rtx addr, insn;
10415 addr = plus_constant (Pmode, base, offset);
10416 addr = gen_rtx_MEM (Pmode, addr);
10417 set_mem_alias_set (addr, get_frame_alias_set ());
10419 /* Special-case single register. */
10420 if (first == last)
10422 if (TARGET_64BIT)
10423 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10424 else
10425 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10427 RTX_FRAME_RELATED_P (insn) = 1;
10428 return insn;
10431 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10432 addr,
10433 GEN_INT (last - first + 1));
10434 RTX_FRAME_RELATED_P (insn) = 1;
10435 return insn;
10438 /* Return insn sequence to load the GOT register. */
10440 static GTY(()) rtx got_symbol;
10441 rtx_insn *
10442 s390_load_got (void)
10444 rtx_insn *insns;
10446 /* We cannot use pic_offset_table_rtx here since we use this
10447 function also for non-pic if __tls_get_offset is called and in
10448 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10449 aren't usable. */
10450 rtx got_rtx = gen_rtx_REG (Pmode, 12);
10452 if (!got_symbol)
10454 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
10455 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
10458 start_sequence ();
10460 if (TARGET_CPU_ZARCH)
10462 emit_move_insn (got_rtx, got_symbol);
10464 else
10466 rtx offset;
10468 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
10469 UNSPEC_LTREL_OFFSET);
10470 offset = gen_rtx_CONST (Pmode, offset);
10471 offset = force_const_mem (Pmode, offset);
10473 emit_move_insn (got_rtx, offset);
10475 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
10476 UNSPEC_LTREL_BASE);
10477 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
10479 emit_move_insn (got_rtx, offset);
10482 insns = get_insns ();
10483 end_sequence ();
10484 return insns;
10487 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10488 and the change to the stack pointer. */
10490 static void
10491 s390_emit_stack_tie (void)
10493 rtx mem = gen_frame_mem (BLKmode,
10494 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10496 emit_insn (gen_stack_tie (mem));
10499 /* Copy GPRS into FPR save slots. */
10501 static void
10502 s390_save_gprs_to_fprs (void)
10504 int i;
10506 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10507 return;
10509 for (i = 6; i < 16; i++)
10511 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10513 rtx_insn *insn =
10514 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10515 gen_rtx_REG (DImode, i));
10516 RTX_FRAME_RELATED_P (insn) = 1;
10517 /* This prevents dwarf2cfi from interpreting the set. Doing
10518 so it might emit def_cfa_register infos setting an FPR as
10519 new CFA. */
10520 add_reg_note (insn, REG_CFA_REGISTER, PATTERN (insn));
10525 /* Restore GPRs from FPR save slots. */
10527 static void
10528 s390_restore_gprs_from_fprs (void)
10530 int i;
10532 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10533 return;
10535 for (i = 6; i < 16; i++)
10537 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10539 rtx_insn *insn =
10540 emit_move_insn (gen_rtx_REG (DImode, i),
10541 gen_rtx_REG (DImode, cfun_gpr_save_slot (i)));
10542 df_set_regs_ever_live (i, true);
10543 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
10544 if (i == STACK_POINTER_REGNUM)
10545 add_reg_note (insn, REG_CFA_DEF_CFA,
10546 plus_constant (Pmode, stack_pointer_rtx,
10547 STACK_POINTER_OFFSET));
10548 RTX_FRAME_RELATED_P (insn) = 1;
10554 /* A pass run immediately before shrink-wrapping and prologue and epilogue
10555 generation. */
10557 namespace {
10559 const pass_data pass_data_s390_early_mach =
10561 RTL_PASS, /* type */
10562 "early_mach", /* name */
10563 OPTGROUP_NONE, /* optinfo_flags */
10564 TV_MACH_DEP, /* tv_id */
10565 0, /* properties_required */
10566 0, /* properties_provided */
10567 0, /* properties_destroyed */
10568 0, /* todo_flags_start */
10569 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
10572 class pass_s390_early_mach : public rtl_opt_pass
10574 public:
10575 pass_s390_early_mach (gcc::context *ctxt)
10576 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
10579 /* opt_pass methods: */
10580 virtual unsigned int execute (function *);
10582 }; // class pass_s390_early_mach
10584 unsigned int
10585 pass_s390_early_mach::execute (function *fun)
10587 rtx_insn *insn;
10589 /* Try to get rid of the FPR clobbers. */
10590 s390_optimize_nonescaping_tx ();
10592 /* Re-compute register info. */
10593 s390_register_info ();
10595 /* If we're using a base register, ensure that it is always valid for
10596 the first non-prologue instruction. */
10597 if (fun->machine->base_reg)
10598 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
10600 /* Annotate all constant pool references to let the scheduler know
10601 they implicitly use the base register. */
10602 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10603 if (INSN_P (insn))
10605 annotate_constant_pool_refs (&PATTERN (insn));
10606 df_insn_rescan (insn);
10608 return 0;
10611 } // anon namespace
10613 /* Expand the prologue into a bunch of separate insns. */
10615 void
10616 s390_emit_prologue (void)
10618 rtx insn, addr;
10619 rtx temp_reg;
10620 int i;
10621 int offset;
10622 int next_fpr = 0;
10624 /* Choose best register to use for temp use within prologue.
10625 TPF with profiling must avoid the register 14 - the tracing function
10626 needs the original contents of r14 to be preserved. */
10628 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
10629 && !crtl->is_leaf
10630 && !TARGET_TPF_PROFILING)
10631 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
10632 else if (flag_split_stack && cfun->stdarg)
10633 temp_reg = gen_rtx_REG (Pmode, 12);
10634 else
10635 temp_reg = gen_rtx_REG (Pmode, 1);
10637 s390_save_gprs_to_fprs ();
10639 /* Save call saved gprs. */
10640 if (cfun_frame_layout.first_save_gpr != -1)
10642 insn = save_gprs (stack_pointer_rtx,
10643 cfun_frame_layout.gprs_offset +
10644 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
10645 - cfun_frame_layout.first_save_gpr_slot),
10646 cfun_frame_layout.first_save_gpr,
10647 cfun_frame_layout.last_save_gpr);
10648 emit_insn (insn);
10651 /* Dummy insn to mark literal pool slot. */
10653 if (cfun->machine->base_reg)
10654 emit_insn (gen_main_pool (cfun->machine->base_reg));
10656 offset = cfun_frame_layout.f0_offset;
10658 /* Save f0 and f2. */
10659 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
10661 if (cfun_fpr_save_p (i))
10663 save_fpr (stack_pointer_rtx, offset, i);
10664 offset += 8;
10666 else if (!TARGET_PACKED_STACK || cfun->stdarg)
10667 offset += 8;
10670 /* Save f4 and f6. */
10671 offset = cfun_frame_layout.f4_offset;
10672 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
10674 if (cfun_fpr_save_p (i))
10676 insn = save_fpr (stack_pointer_rtx, offset, i);
10677 offset += 8;
10679 /* If f4 and f6 are call clobbered they are saved due to
10680 stdargs and therefore are not frame related. */
10681 if (!call_really_used_regs[i])
10682 RTX_FRAME_RELATED_P (insn) = 1;
10684 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
10685 offset += 8;
10688 if (TARGET_PACKED_STACK
10689 && cfun_save_high_fprs_p
10690 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
10692 offset = (cfun_frame_layout.f8_offset
10693 + (cfun_frame_layout.high_fprs - 1) * 8);
10695 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
10696 if (cfun_fpr_save_p (i))
10698 insn = save_fpr (stack_pointer_rtx, offset, i);
10700 RTX_FRAME_RELATED_P (insn) = 1;
10701 offset -= 8;
10703 if (offset >= cfun_frame_layout.f8_offset)
10704 next_fpr = i;
10707 if (!TARGET_PACKED_STACK)
10708 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
10710 if (flag_stack_usage_info)
10711 current_function_static_stack_size = cfun_frame_layout.frame_size;
10713 /* Decrement stack pointer. */
10715 if (cfun_frame_layout.frame_size > 0)
10717 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10718 rtx real_frame_off;
10720 if (s390_stack_size)
10722 HOST_WIDE_INT stack_guard;
10724 if (s390_stack_guard)
10725 stack_guard = s390_stack_guard;
10726 else
10728 /* If no value for stack guard is provided the smallest power of 2
10729 larger than the current frame size is chosen. */
10730 stack_guard = 1;
10731 while (stack_guard < cfun_frame_layout.frame_size)
10732 stack_guard <<= 1;
10735 if (cfun_frame_layout.frame_size >= s390_stack_size)
10737 warning (0, "frame size of function %qs is %wd"
10738 " bytes exceeding user provided stack limit of "
10739 "%d bytes. "
10740 "An unconditional trap is added.",
10741 current_function_name(), cfun_frame_layout.frame_size,
10742 s390_stack_size);
10743 emit_insn (gen_trap ());
10744 emit_barrier ();
10746 else
10748 /* stack_guard has to be smaller than s390_stack_size.
10749 Otherwise we would emit an AND with zero which would
10750 not match the test under mask pattern. */
10751 if (stack_guard >= s390_stack_size)
10753 warning (0, "frame size of function %qs is %wd"
10754 " bytes which is more than half the stack size. "
10755 "The dynamic check would not be reliable. "
10756 "No check emitted for this function.",
10757 current_function_name(),
10758 cfun_frame_layout.frame_size);
10760 else
10762 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
10763 & ~(stack_guard - 1));
10765 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
10766 GEN_INT (stack_check_mask));
10767 if (TARGET_64BIT)
10768 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
10769 t, const0_rtx),
10770 t, const0_rtx, const0_rtx));
10771 else
10772 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
10773 t, const0_rtx),
10774 t, const0_rtx, const0_rtx));
10779 if (s390_warn_framesize > 0
10780 && cfun_frame_layout.frame_size >= s390_warn_framesize)
10781 warning (0, "frame size of %qs is %wd bytes",
10782 current_function_name (), cfun_frame_layout.frame_size);
10784 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
10785 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
10787 /* Save incoming stack pointer into temp reg. */
10788 if (TARGET_BACKCHAIN || next_fpr)
10789 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
10791 /* Subtract frame size from stack pointer. */
10793 if (DISP_IN_RANGE (INTVAL (frame_off)))
10795 insn = gen_rtx_SET (stack_pointer_rtx,
10796 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10797 frame_off));
10798 insn = emit_insn (insn);
10800 else
10802 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
10803 frame_off = force_const_mem (Pmode, frame_off);
10805 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
10806 annotate_constant_pool_refs (&PATTERN (insn));
10809 RTX_FRAME_RELATED_P (insn) = 1;
10810 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10811 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10812 gen_rtx_SET (stack_pointer_rtx,
10813 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10814 real_frame_off)));
10816 /* Set backchain. */
10818 if (TARGET_BACKCHAIN)
10820 if (cfun_frame_layout.backchain_offset)
10821 addr = gen_rtx_MEM (Pmode,
10822 plus_constant (Pmode, stack_pointer_rtx,
10823 cfun_frame_layout.backchain_offset));
10824 else
10825 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
10826 set_mem_alias_set (addr, get_frame_alias_set ());
10827 insn = emit_insn (gen_move_insn (addr, temp_reg));
10830 /* If we support non-call exceptions (e.g. for Java),
10831 we need to make sure the backchain pointer is set up
10832 before any possibly trapping memory access. */
10833 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
10835 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
10836 emit_clobber (addr);
10840 /* Save fprs 8 - 15 (64 bit ABI). */
10842 if (cfun_save_high_fprs_p && next_fpr)
10844 /* If the stack might be accessed through a different register
10845 we have to make sure that the stack pointer decrement is not
10846 moved below the use of the stack slots. */
10847 s390_emit_stack_tie ();
10849 insn = emit_insn (gen_add2_insn (temp_reg,
10850 GEN_INT (cfun_frame_layout.f8_offset)));
10852 offset = 0;
10854 for (i = FPR8_REGNUM; i <= next_fpr; i++)
10855 if (cfun_fpr_save_p (i))
10857 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
10858 cfun_frame_layout.frame_size
10859 + cfun_frame_layout.f8_offset
10860 + offset);
10862 insn = save_fpr (temp_reg, offset, i);
10863 offset += 8;
10864 RTX_FRAME_RELATED_P (insn) = 1;
10865 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10866 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
10867 gen_rtx_REG (DFmode, i)));
10871 /* Set frame pointer, if needed. */
10873 if (frame_pointer_needed)
10875 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10876 RTX_FRAME_RELATED_P (insn) = 1;
10879 /* Set up got pointer, if needed. */
10881 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
10883 rtx_insn *insns = s390_load_got ();
10885 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
10886 annotate_constant_pool_refs (&PATTERN (insn));
10888 emit_insn (insns);
10891 if (TARGET_TPF_PROFILING)
10893 /* Generate a BAS instruction to serve as a function
10894 entry intercept to facilitate the use of tracing
10895 algorithms located at the branch target. */
10896 emit_insn (gen_prologue_tpf ());
10898 /* Emit a blockage here so that all code
10899 lies between the profiling mechanisms. */
10900 emit_insn (gen_blockage ());
10904 /* Expand the epilogue into a bunch of separate insns. */
10906 void
10907 s390_emit_epilogue (bool sibcall)
10909 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
10910 int area_bottom, area_top, offset = 0;
10911 int next_offset;
10912 rtvec p;
10913 int i;
10915 if (TARGET_TPF_PROFILING)
10918 /* Generate a BAS instruction to serve as a function
10919 entry intercept to facilitate the use of tracing
10920 algorithms located at the branch target. */
10922 /* Emit a blockage here so that all code
10923 lies between the profiling mechanisms. */
10924 emit_insn (gen_blockage ());
10926 emit_insn (gen_epilogue_tpf ());
10929 /* Check whether to use frame or stack pointer for restore. */
10931 frame_pointer = (frame_pointer_needed
10932 ? hard_frame_pointer_rtx : stack_pointer_rtx);
10934 s390_frame_area (&area_bottom, &area_top);
10936 /* Check whether we can access the register save area.
10937 If not, increment the frame pointer as required. */
10939 if (area_top <= area_bottom)
10941 /* Nothing to restore. */
10943 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
10944 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
10946 /* Area is in range. */
10947 offset = cfun_frame_layout.frame_size;
10949 else
10951 rtx insn, frame_off, cfa;
10953 offset = area_bottom < 0 ? -area_bottom : 0;
10954 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
10956 cfa = gen_rtx_SET (frame_pointer,
10957 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
10958 if (DISP_IN_RANGE (INTVAL (frame_off)))
10960 insn = gen_rtx_SET (frame_pointer,
10961 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
10962 insn = emit_insn (insn);
10964 else
10966 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
10967 frame_off = force_const_mem (Pmode, frame_off);
10969 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
10970 annotate_constant_pool_refs (&PATTERN (insn));
10972 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
10973 RTX_FRAME_RELATED_P (insn) = 1;
10976 /* Restore call saved fprs. */
10978 if (TARGET_64BIT)
10980 if (cfun_save_high_fprs_p)
10982 next_offset = cfun_frame_layout.f8_offset;
10983 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
10985 if (cfun_fpr_save_p (i))
10987 restore_fpr (frame_pointer,
10988 offset + next_offset, i);
10989 cfa_restores
10990 = alloc_reg_note (REG_CFA_RESTORE,
10991 gen_rtx_REG (DFmode, i), cfa_restores);
10992 next_offset += 8;
10998 else
11000 next_offset = cfun_frame_layout.f4_offset;
11001 /* f4, f6 */
11002 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11004 if (cfun_fpr_save_p (i))
11006 restore_fpr (frame_pointer,
11007 offset + next_offset, i);
11008 cfa_restores
11009 = alloc_reg_note (REG_CFA_RESTORE,
11010 gen_rtx_REG (DFmode, i), cfa_restores);
11011 next_offset += 8;
11013 else if (!TARGET_PACKED_STACK)
11014 next_offset += 8;
11019 /* Return register. */
11021 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11023 /* Restore call saved gprs. */
11025 if (cfun_frame_layout.first_restore_gpr != -1)
11027 rtx insn, addr;
11028 int i;
11030 /* Check for global register and save them
11031 to stack location from where they get restored. */
11033 for (i = cfun_frame_layout.first_restore_gpr;
11034 i <= cfun_frame_layout.last_restore_gpr;
11035 i++)
11037 if (global_not_special_regno_p (i))
11039 addr = plus_constant (Pmode, frame_pointer,
11040 offset + cfun_frame_layout.gprs_offset
11041 + (i - cfun_frame_layout.first_save_gpr_slot)
11042 * UNITS_PER_LONG);
11043 addr = gen_rtx_MEM (Pmode, addr);
11044 set_mem_alias_set (addr, get_frame_alias_set ());
11045 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11047 else
11048 cfa_restores
11049 = alloc_reg_note (REG_CFA_RESTORE,
11050 gen_rtx_REG (Pmode, i), cfa_restores);
11053 if (! sibcall)
11055 /* Fetch return address from stack before load multiple,
11056 this will do good for scheduling.
11058 Only do this if we already decided that r14 needs to be
11059 saved to a stack slot. (And not just because r14 happens to
11060 be in between two GPRs which need saving.) Otherwise it
11061 would be difficult to take that decision back in
11062 s390_optimize_prologue. */
11063 if (cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK)
11065 int return_regnum = find_unused_clobbered_reg();
11066 if (!return_regnum)
11067 return_regnum = 4;
11068 return_reg = gen_rtx_REG (Pmode, return_regnum);
11070 addr = plus_constant (Pmode, frame_pointer,
11071 offset + cfun_frame_layout.gprs_offset
11072 + (RETURN_REGNUM
11073 - cfun_frame_layout.first_save_gpr_slot)
11074 * UNITS_PER_LONG);
11075 addr = gen_rtx_MEM (Pmode, addr);
11076 set_mem_alias_set (addr, get_frame_alias_set ());
11077 emit_move_insn (return_reg, addr);
11079 /* Once we did that optimization we have to make sure
11080 s390_optimize_prologue does not try to remove the
11081 store of r14 since we will not be able to find the
11082 load issued here. */
11083 cfun_frame_layout.save_return_addr_p = true;
11087 insn = restore_gprs (frame_pointer,
11088 offset + cfun_frame_layout.gprs_offset
11089 + (cfun_frame_layout.first_restore_gpr
11090 - cfun_frame_layout.first_save_gpr_slot)
11091 * UNITS_PER_LONG,
11092 cfun_frame_layout.first_restore_gpr,
11093 cfun_frame_layout.last_restore_gpr);
11094 insn = emit_insn (insn);
11095 REG_NOTES (insn) = cfa_restores;
11096 add_reg_note (insn, REG_CFA_DEF_CFA,
11097 plus_constant (Pmode, stack_pointer_rtx,
11098 STACK_POINTER_OFFSET));
11099 RTX_FRAME_RELATED_P (insn) = 1;
11102 s390_restore_gprs_from_fprs ();
11104 if (! sibcall)
11107 /* Return to caller. */
11109 p = rtvec_alloc (2);
11111 RTVEC_ELT (p, 0) = ret_rtx;
11112 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
11113 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
11117 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11119 static void
11120 s300_set_up_by_prologue (hard_reg_set_container *regs)
11122 if (cfun->machine->base_reg
11123 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11124 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11127 /* -fsplit-stack support. */
11129 /* A SYMBOL_REF for __morestack. */
11130 static GTY(()) rtx morestack_ref;
11132 /* When using -fsplit-stack, the allocation routines set a field in
11133 the TCB to the bottom of the stack plus this much space, measured
11134 in bytes. */
11136 #define SPLIT_STACK_AVAILABLE 1024
11138 /* Emit -fsplit-stack prologue, which goes before the regular function
11139 prologue. */
11141 void
11142 s390_expand_split_stack_prologue (void)
11144 rtx r1, guard, cc = NULL;
11145 rtx_insn *insn;
11146 /* Offset from thread pointer to __private_ss. */
11147 int psso = TARGET_64BIT ? 0x38 : 0x20;
11148 /* Pointer size in bytes. */
11149 /* Frame size and argument size - the two parameters to __morestack. */
11150 HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11151 /* Align argument size to 8 bytes - simplifies __morestack code. */
11152 HOST_WIDE_INT args_size = crtl->args.size >= 0
11153 ? ((crtl->args.size + 7) & ~7)
11154 : 0;
11155 /* Label to be called by __morestack. */
11156 rtx_code_label *call_done = NULL;
11157 rtx_code_label *parm_base = NULL;
11158 rtx tmp;
11160 gcc_assert (flag_split_stack && reload_completed);
11161 if (!TARGET_CPU_ZARCH)
11163 sorry ("CPUs older than z900 are not supported for -fsplit-stack");
11164 return;
11167 r1 = gen_rtx_REG (Pmode, 1);
11169 /* If no stack frame will be allocated, don't do anything. */
11170 if (!frame_size)
11172 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11174 /* If va_start is used, just use r15. */
11175 emit_move_insn (r1,
11176 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11177 GEN_INT (STACK_POINTER_OFFSET)));
11180 return;
11183 if (morestack_ref == NULL_RTX)
11185 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11186 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11187 | SYMBOL_FLAG_FUNCTION);
11190 if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11192 /* If frame_size will fit in an add instruction, do a stack space
11193 check, and only call __morestack if there's not enough space. */
11195 /* Get thread pointer. r1 is the only register we can always destroy - r0
11196 could contain a static chain (and cannot be used to address memory
11197 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
11198 emit_move_insn (r1, gen_rtx_REG (Pmode, TP_REGNUM));
11199 /* Aim at __private_ss. */
11200 guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
11202 /* If less that 1kiB used, skip addition and compare directly with
11203 __private_ss. */
11204 if (frame_size > SPLIT_STACK_AVAILABLE)
11206 emit_move_insn (r1, guard);
11207 if (TARGET_64BIT)
11208 emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
11209 else
11210 emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
11211 guard = r1;
11214 /* Compare the (maybe adjusted) guard with the stack pointer. */
11215 cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
11218 call_done = gen_label_rtx ();
11219 parm_base = gen_label_rtx ();
11221 /* Emit the parameter block. */
11222 tmp = gen_split_stack_data (parm_base, call_done,
11223 GEN_INT (frame_size),
11224 GEN_INT (args_size));
11225 insn = emit_insn (tmp);
11226 add_reg_note (insn, REG_LABEL_OPERAND, call_done);
11227 LABEL_NUSES (call_done)++;
11228 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11229 LABEL_NUSES (parm_base)++;
11231 /* %r1 = litbase. */
11232 insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
11233 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11234 LABEL_NUSES (parm_base)++;
11236 /* Now, we need to call __morestack. It has very special calling
11237 conventions: it preserves param/return/static chain registers for
11238 calling main function body, and looks for its own parameters at %r1. */
11240 if (cc != NULL)
11242 tmp = gen_split_stack_cond_call (morestack_ref, cc, call_done);
11244 insn = emit_jump_insn (tmp);
11245 JUMP_LABEL (insn) = call_done;
11246 LABEL_NUSES (call_done)++;
11248 /* Mark the jump as very unlikely to be taken. */
11249 add_int_reg_note (insn, REG_BR_PROB, REG_BR_PROB_BASE / 100);
11251 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11253 /* If va_start is used, and __morestack was not called, just use
11254 r15. */
11255 emit_move_insn (r1,
11256 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11257 GEN_INT (STACK_POINTER_OFFSET)));
11260 else
11262 tmp = gen_split_stack_call (morestack_ref, call_done);
11263 insn = emit_jump_insn (tmp);
11264 JUMP_LABEL (insn) = call_done;
11265 LABEL_NUSES (call_done)++;
11266 emit_barrier ();
11269 /* __morestack will call us here. */
11271 emit_label (call_done);
11274 /* We may have to tell the dataflow pass that the split stack prologue
11275 is initializing a register. */
11277 static void
11278 s390_live_on_entry (bitmap regs)
11280 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11282 gcc_assert (flag_split_stack);
11283 bitmap_set_bit (regs, 1);
11287 /* Return true if the function can use simple_return to return outside
11288 of a shrink-wrapped region. At present shrink-wrapping is supported
11289 in all cases. */
11291 bool
11292 s390_can_use_simple_return_insn (void)
11294 return true;
11297 /* Return true if the epilogue is guaranteed to contain only a return
11298 instruction and if a direct return can therefore be used instead.
11299 One of the main advantages of using direct return instructions
11300 is that we can then use conditional returns. */
11302 bool
11303 s390_can_use_return_insn (void)
11305 int i;
11307 if (!reload_completed)
11308 return false;
11310 if (crtl->profile)
11311 return false;
11313 if (TARGET_TPF_PROFILING)
11314 return false;
11316 for (i = 0; i < 16; i++)
11317 if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
11318 return false;
11320 /* For 31 bit this is not covered by the frame_size check below
11321 since f4, f6 are saved in the register save area without needing
11322 additional stack space. */
11323 if (!TARGET_64BIT
11324 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
11325 return false;
11327 if (cfun->machine->base_reg
11328 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11329 return false;
11331 return cfun_frame_layout.frame_size == 0;
11334 /* The VX ABI differs for vararg functions. Therefore we need the
11335 prototype of the callee to be available when passing vector type
11336 values. */
11337 static const char *
11338 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
11340 return ((TARGET_VX_ABI
11341 && typelist == 0
11342 && VECTOR_TYPE_P (TREE_TYPE (val))
11343 && (funcdecl == NULL_TREE
11344 || (TREE_CODE (funcdecl) == FUNCTION_DECL
11345 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
11346 ? N_("Vector argument passed to unprototyped function")
11347 : NULL);
11351 /* Return the size in bytes of a function argument of
11352 type TYPE and/or mode MODE. At least one of TYPE or
11353 MODE must be specified. */
11355 static int
11356 s390_function_arg_size (machine_mode mode, const_tree type)
11358 if (type)
11359 return int_size_in_bytes (type);
11361 /* No type info available for some library calls ... */
11362 if (mode != BLKmode)
11363 return GET_MODE_SIZE (mode);
11365 /* If we have neither type nor mode, abort */
11366 gcc_unreachable ();
11369 /* Return true if a function argument of type TYPE and mode MODE
11370 is to be passed in a vector register, if available. */
11372 bool
11373 s390_function_arg_vector (machine_mode mode, const_tree type)
11375 if (!TARGET_VX_ABI)
11376 return false;
11378 if (s390_function_arg_size (mode, type) > 16)
11379 return false;
11381 /* No type info available for some library calls ... */
11382 if (!type)
11383 return VECTOR_MODE_P (mode);
11385 /* The ABI says that record types with a single member are treated
11386 just like that member would be. */
11387 while (TREE_CODE (type) == RECORD_TYPE)
11389 tree field, single = NULL_TREE;
11391 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11393 if (TREE_CODE (field) != FIELD_DECL)
11394 continue;
11396 if (single == NULL_TREE)
11397 single = TREE_TYPE (field);
11398 else
11399 return false;
11402 if (single == NULL_TREE)
11403 return false;
11404 else
11406 /* If the field declaration adds extra byte due to
11407 e.g. padding this is not accepted as vector type. */
11408 if (int_size_in_bytes (single) <= 0
11409 || int_size_in_bytes (single) != int_size_in_bytes (type))
11410 return false;
11411 type = single;
11415 return VECTOR_TYPE_P (type);
11418 /* Return true if a function argument of type TYPE and mode MODE
11419 is to be passed in a floating-point register, if available. */
11421 static bool
11422 s390_function_arg_float (machine_mode mode, const_tree type)
11424 if (s390_function_arg_size (mode, type) > 8)
11425 return false;
11427 /* Soft-float changes the ABI: no floating-point registers are used. */
11428 if (TARGET_SOFT_FLOAT)
11429 return false;
11431 /* No type info available for some library calls ... */
11432 if (!type)
11433 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
11435 /* The ABI says that record types with a single member are treated
11436 just like that member would be. */
11437 while (TREE_CODE (type) == RECORD_TYPE)
11439 tree field, single = NULL_TREE;
11441 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11443 if (TREE_CODE (field) != FIELD_DECL)
11444 continue;
11446 if (single == NULL_TREE)
11447 single = TREE_TYPE (field);
11448 else
11449 return false;
11452 if (single == NULL_TREE)
11453 return false;
11454 else
11455 type = single;
11458 return TREE_CODE (type) == REAL_TYPE;
11461 /* Return true if a function argument of type TYPE and mode MODE
11462 is to be passed in an integer register, or a pair of integer
11463 registers, if available. */
11465 static bool
11466 s390_function_arg_integer (machine_mode mode, const_tree type)
11468 int size = s390_function_arg_size (mode, type);
11469 if (size > 8)
11470 return false;
11472 /* No type info available for some library calls ... */
11473 if (!type)
11474 return GET_MODE_CLASS (mode) == MODE_INT
11475 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
11477 /* We accept small integral (and similar) types. */
11478 if (INTEGRAL_TYPE_P (type)
11479 || POINTER_TYPE_P (type)
11480 || TREE_CODE (type) == NULLPTR_TYPE
11481 || TREE_CODE (type) == OFFSET_TYPE
11482 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
11483 return true;
11485 /* We also accept structs of size 1, 2, 4, 8 that are not
11486 passed in floating-point registers. */
11487 if (AGGREGATE_TYPE_P (type)
11488 && exact_log2 (size) >= 0
11489 && !s390_function_arg_float (mode, type))
11490 return true;
11492 return false;
11495 /* Return 1 if a function argument of type TYPE and mode MODE
11496 is to be passed by reference. The ABI specifies that only
11497 structures of size 1, 2, 4, or 8 bytes are passed by value,
11498 all other structures (and complex numbers) are passed by
11499 reference. */
11501 static bool
11502 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
11503 machine_mode mode, const_tree type,
11504 bool named ATTRIBUTE_UNUSED)
11506 int size = s390_function_arg_size (mode, type);
11508 if (s390_function_arg_vector (mode, type))
11509 return false;
11511 if (size > 8)
11512 return true;
11514 if (type)
11516 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
11517 return true;
11519 if (TREE_CODE (type) == COMPLEX_TYPE
11520 || TREE_CODE (type) == VECTOR_TYPE)
11521 return true;
11524 return false;
11527 /* Update the data in CUM to advance over an argument of mode MODE and
11528 data type TYPE. (TYPE is null for libcalls where that information
11529 may not be available.). The boolean NAMED specifies whether the
11530 argument is a named argument (as opposed to an unnamed argument
11531 matching an ellipsis). */
11533 static void
11534 s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
11535 const_tree type, bool named)
11537 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11539 if (s390_function_arg_vector (mode, type))
11541 /* We are called for unnamed vector stdarg arguments which are
11542 passed on the stack. In this case this hook does not have to
11543 do anything since stack arguments are tracked by common
11544 code. */
11545 if (!named)
11546 return;
11547 cum->vrs += 1;
11549 else if (s390_function_arg_float (mode, type))
11551 cum->fprs += 1;
11553 else if (s390_function_arg_integer (mode, type))
11555 int size = s390_function_arg_size (mode, type);
11556 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
11558 else
11559 gcc_unreachable ();
11562 /* Define where to put the arguments to a function.
11563 Value is zero to push the argument on the stack,
11564 or a hard register in which to store the argument.
11566 MODE is the argument's machine mode.
11567 TYPE is the data type of the argument (as a tree).
11568 This is null for libcalls where that information may
11569 not be available.
11570 CUM is a variable of type CUMULATIVE_ARGS which gives info about
11571 the preceding args and about the function being called.
11572 NAMED is nonzero if this argument is a named parameter
11573 (otherwise it is an extra parameter matching an ellipsis).
11575 On S/390, we use general purpose registers 2 through 6 to
11576 pass integer, pointer, and certain structure arguments, and
11577 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
11578 to pass floating point arguments. All remaining arguments
11579 are pushed to the stack. */
11581 static rtx
11582 s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
11583 const_tree type, bool named)
11585 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11587 if (!named)
11588 s390_check_type_for_vector_abi (type, true, false);
11590 if (s390_function_arg_vector (mode, type))
11592 /* Vector arguments being part of the ellipsis are passed on the
11593 stack. */
11594 if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
11595 return NULL_RTX;
11597 return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
11599 else if (s390_function_arg_float (mode, type))
11601 if (cum->fprs + 1 > FP_ARG_NUM_REG)
11602 return NULL_RTX;
11603 else
11604 return gen_rtx_REG (mode, cum->fprs + 16);
11606 else if (s390_function_arg_integer (mode, type))
11608 int size = s390_function_arg_size (mode, type);
11609 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
11611 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
11612 return NULL_RTX;
11613 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
11614 return gen_rtx_REG (mode, cum->gprs + 2);
11615 else if (n_gprs == 2)
11617 rtvec p = rtvec_alloc (2);
11619 RTVEC_ELT (p, 0)
11620 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
11621 const0_rtx);
11622 RTVEC_ELT (p, 1)
11623 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
11624 GEN_INT (4));
11626 return gen_rtx_PARALLEL (mode, p);
11630 /* After the real arguments, expand_call calls us once again
11631 with a void_type_node type. Whatever we return here is
11632 passed as operand 2 to the call expanders.
11634 We don't need this feature ... */
11635 else if (type == void_type_node)
11636 return const0_rtx;
11638 gcc_unreachable ();
11641 /* Return true if return values of type TYPE should be returned
11642 in a memory buffer whose address is passed by the caller as
11643 hidden first argument. */
11645 static bool
11646 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
11648 /* We accept small integral (and similar) types. */
11649 if (INTEGRAL_TYPE_P (type)
11650 || POINTER_TYPE_P (type)
11651 || TREE_CODE (type) == OFFSET_TYPE
11652 || TREE_CODE (type) == REAL_TYPE)
11653 return int_size_in_bytes (type) > 8;
11655 /* vector types which fit into a VR. */
11656 if (TARGET_VX_ABI
11657 && VECTOR_TYPE_P (type)
11658 && int_size_in_bytes (type) <= 16)
11659 return false;
11661 /* Aggregates and similar constructs are always returned
11662 in memory. */
11663 if (AGGREGATE_TYPE_P (type)
11664 || TREE_CODE (type) == COMPLEX_TYPE
11665 || VECTOR_TYPE_P (type))
11666 return true;
11668 /* ??? We get called on all sorts of random stuff from
11669 aggregate_value_p. We can't abort, but it's not clear
11670 what's safe to return. Pretend it's a struct I guess. */
11671 return true;
11674 /* Function arguments and return values are promoted to word size. */
11676 static machine_mode
11677 s390_promote_function_mode (const_tree type, machine_mode mode,
11678 int *punsignedp,
11679 const_tree fntype ATTRIBUTE_UNUSED,
11680 int for_return ATTRIBUTE_UNUSED)
11682 if (INTEGRAL_MODE_P (mode)
11683 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
11685 if (type != NULL_TREE && POINTER_TYPE_P (type))
11686 *punsignedp = POINTERS_EXTEND_UNSIGNED;
11687 return Pmode;
11690 return mode;
11693 /* Define where to return a (scalar) value of type RET_TYPE.
11694 If RET_TYPE is null, define where to return a (scalar)
11695 value of mode MODE from a libcall. */
11697 static rtx
11698 s390_function_and_libcall_value (machine_mode mode,
11699 const_tree ret_type,
11700 const_tree fntype_or_decl,
11701 bool outgoing ATTRIBUTE_UNUSED)
11703 /* For vector return types it is important to use the RET_TYPE
11704 argument whenever available since the middle-end might have
11705 changed the mode to a scalar mode. */
11706 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
11707 || (!ret_type && VECTOR_MODE_P (mode)));
11709 /* For normal functions perform the promotion as
11710 promote_function_mode would do. */
11711 if (ret_type)
11713 int unsignedp = TYPE_UNSIGNED (ret_type);
11714 mode = promote_function_mode (ret_type, mode, &unsignedp,
11715 fntype_or_decl, 1);
11718 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
11719 || SCALAR_FLOAT_MODE_P (mode)
11720 || (TARGET_VX_ABI && vector_ret_type_p));
11721 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
11723 if (TARGET_VX_ABI && vector_ret_type_p)
11724 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
11725 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
11726 return gen_rtx_REG (mode, 16);
11727 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
11728 || UNITS_PER_LONG == UNITS_PER_WORD)
11729 return gen_rtx_REG (mode, 2);
11730 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
11732 /* This case is triggered when returning a 64 bit value with
11733 -m31 -mzarch. Although the value would fit into a single
11734 register it has to be forced into a 32 bit register pair in
11735 order to match the ABI. */
11736 rtvec p = rtvec_alloc (2);
11738 RTVEC_ELT (p, 0)
11739 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
11740 RTVEC_ELT (p, 1)
11741 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
11743 return gen_rtx_PARALLEL (mode, p);
11746 gcc_unreachable ();
11749 /* Define where to return a scalar return value of type RET_TYPE. */
11751 static rtx
11752 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
11753 bool outgoing)
11755 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
11756 fn_decl_or_type, outgoing);
11759 /* Define where to return a scalar libcall return value of mode
11760 MODE. */
11762 static rtx
11763 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
11765 return s390_function_and_libcall_value (mode, NULL_TREE,
11766 NULL_TREE, true);
11770 /* Create and return the va_list datatype.
11772 On S/390, va_list is an array type equivalent to
11774 typedef struct __va_list_tag
11776 long __gpr;
11777 long __fpr;
11778 void *__overflow_arg_area;
11779 void *__reg_save_area;
11780 } va_list[1];
11782 where __gpr and __fpr hold the number of general purpose
11783 or floating point arguments used up to now, respectively,
11784 __overflow_arg_area points to the stack location of the
11785 next argument passed on the stack, and __reg_save_area
11786 always points to the start of the register area in the
11787 call frame of the current function. The function prologue
11788 saves all registers used for argument passing into this
11789 area if the function uses variable arguments. */
11791 static tree
11792 s390_build_builtin_va_list (void)
11794 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
11796 record = lang_hooks.types.make_type (RECORD_TYPE);
11798 type_decl =
11799 build_decl (BUILTINS_LOCATION,
11800 TYPE_DECL, get_identifier ("__va_list_tag"), record);
11802 f_gpr = build_decl (BUILTINS_LOCATION,
11803 FIELD_DECL, get_identifier ("__gpr"),
11804 long_integer_type_node);
11805 f_fpr = build_decl (BUILTINS_LOCATION,
11806 FIELD_DECL, get_identifier ("__fpr"),
11807 long_integer_type_node);
11808 f_ovf = build_decl (BUILTINS_LOCATION,
11809 FIELD_DECL, get_identifier ("__overflow_arg_area"),
11810 ptr_type_node);
11811 f_sav = build_decl (BUILTINS_LOCATION,
11812 FIELD_DECL, get_identifier ("__reg_save_area"),
11813 ptr_type_node);
11815 va_list_gpr_counter_field = f_gpr;
11816 va_list_fpr_counter_field = f_fpr;
11818 DECL_FIELD_CONTEXT (f_gpr) = record;
11819 DECL_FIELD_CONTEXT (f_fpr) = record;
11820 DECL_FIELD_CONTEXT (f_ovf) = record;
11821 DECL_FIELD_CONTEXT (f_sav) = record;
11823 TYPE_STUB_DECL (record) = type_decl;
11824 TYPE_NAME (record) = type_decl;
11825 TYPE_FIELDS (record) = f_gpr;
11826 DECL_CHAIN (f_gpr) = f_fpr;
11827 DECL_CHAIN (f_fpr) = f_ovf;
11828 DECL_CHAIN (f_ovf) = f_sav;
11830 layout_type (record);
11832 /* The correct type is an array type of one element. */
11833 return build_array_type (record, build_index_type (size_zero_node));
11836 /* Implement va_start by filling the va_list structure VALIST.
11837 STDARG_P is always true, and ignored.
11838 NEXTARG points to the first anonymous stack argument.
11840 The following global variables are used to initialize
11841 the va_list structure:
11843 crtl->args.info:
11844 holds number of gprs and fprs used for named arguments.
11845 crtl->args.arg_offset_rtx:
11846 holds the offset of the first anonymous stack argument
11847 (relative to the virtual arg pointer). */
11849 static void
11850 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
11852 HOST_WIDE_INT n_gpr, n_fpr;
11853 int off;
11854 tree f_gpr, f_fpr, f_ovf, f_sav;
11855 tree gpr, fpr, ovf, sav, t;
11857 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11858 f_fpr = DECL_CHAIN (f_gpr);
11859 f_ovf = DECL_CHAIN (f_fpr);
11860 f_sav = DECL_CHAIN (f_ovf);
11862 valist = build_simple_mem_ref (valist);
11863 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11864 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
11865 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
11866 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
11868 /* Count number of gp and fp argument registers used. */
11870 n_gpr = crtl->args.info.gprs;
11871 n_fpr = crtl->args.info.fprs;
11873 if (cfun->va_list_gpr_size)
11875 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11876 build_int_cst (NULL_TREE, n_gpr));
11877 TREE_SIDE_EFFECTS (t) = 1;
11878 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11881 if (cfun->va_list_fpr_size)
11883 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11884 build_int_cst (NULL_TREE, n_fpr));
11885 TREE_SIDE_EFFECTS (t) = 1;
11886 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11889 if (flag_split_stack
11890 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
11891 == NULL)
11892 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
11894 rtx reg;
11895 rtx_insn *seq;
11897 reg = gen_reg_rtx (Pmode);
11898 cfun->machine->split_stack_varargs_pointer = reg;
11900 start_sequence ();
11901 emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
11902 seq = get_insns ();
11903 end_sequence ();
11905 push_topmost_sequence ();
11906 emit_insn_after (seq, entry_of_function ());
11907 pop_topmost_sequence ();
11910 /* Find the overflow area.
11911 FIXME: This currently is too pessimistic when the vector ABI is
11912 enabled. In that case we *always* set up the overflow area
11913 pointer. */
11914 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
11915 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
11916 || TARGET_VX_ABI)
11918 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
11919 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
11920 else
11921 t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
11923 off = INTVAL (crtl->args.arg_offset_rtx);
11924 off = off < 0 ? 0 : off;
11925 if (TARGET_DEBUG_ARG)
11926 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
11927 (int)n_gpr, (int)n_fpr, off);
11929 t = fold_build_pointer_plus_hwi (t, off);
11931 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
11932 TREE_SIDE_EFFECTS (t) = 1;
11933 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11936 /* Find the register save area. */
11937 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
11938 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
11940 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
11941 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
11943 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
11944 TREE_SIDE_EFFECTS (t) = 1;
11945 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11949 /* Implement va_arg by updating the va_list structure
11950 VALIST as required to retrieve an argument of type
11951 TYPE, and returning that argument.
11953 Generates code equivalent to:
11955 if (integral value) {
11956 if (size <= 4 && args.gpr < 5 ||
11957 size > 4 && args.gpr < 4 )
11958 ret = args.reg_save_area[args.gpr+8]
11959 else
11960 ret = *args.overflow_arg_area++;
11961 } else if (vector value) {
11962 ret = *args.overflow_arg_area;
11963 args.overflow_arg_area += size / 8;
11964 } else if (float value) {
11965 if (args.fgpr < 2)
11966 ret = args.reg_save_area[args.fpr+64]
11967 else
11968 ret = *args.overflow_arg_area++;
11969 } else if (aggregate value) {
11970 if (args.gpr < 5)
11971 ret = *args.reg_save_area[args.gpr]
11972 else
11973 ret = **args.overflow_arg_area++;
11974 } */
11976 static tree
11977 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
11978 gimple_seq *post_p ATTRIBUTE_UNUSED)
11980 tree f_gpr, f_fpr, f_ovf, f_sav;
11981 tree gpr, fpr, ovf, sav, reg, t, u;
11982 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
11983 tree lab_false, lab_over;
11984 tree addr = create_tmp_var (ptr_type_node, "addr");
11985 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
11986 a stack slot. */
11988 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11989 f_fpr = DECL_CHAIN (f_gpr);
11990 f_ovf = DECL_CHAIN (f_fpr);
11991 f_sav = DECL_CHAIN (f_ovf);
11993 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11994 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
11995 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
11997 /* The tree for args* cannot be shared between gpr/fpr and ovf since
11998 both appear on a lhs. */
11999 valist = unshare_expr (valist);
12000 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12002 size = int_size_in_bytes (type);
12004 s390_check_type_for_vector_abi (type, true, false);
12006 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12008 if (TARGET_DEBUG_ARG)
12010 fprintf (stderr, "va_arg: aggregate type");
12011 debug_tree (type);
12014 /* Aggregates are passed by reference. */
12015 indirect_p = 1;
12016 reg = gpr;
12017 n_reg = 1;
12019 /* kernel stack layout on 31 bit: It is assumed here that no padding
12020 will be added by s390_frame_info because for va_args always an even
12021 number of gprs has to be saved r15-r2 = 14 regs. */
12022 sav_ofs = 2 * UNITS_PER_LONG;
12023 sav_scale = UNITS_PER_LONG;
12024 size = UNITS_PER_LONG;
12025 max_reg = GP_ARG_NUM_REG - n_reg;
12026 left_align_p = false;
12028 else if (s390_function_arg_vector (TYPE_MODE (type), type))
12030 if (TARGET_DEBUG_ARG)
12032 fprintf (stderr, "va_arg: vector type");
12033 debug_tree (type);
12036 indirect_p = 0;
12037 reg = NULL_TREE;
12038 n_reg = 0;
12039 sav_ofs = 0;
12040 sav_scale = 8;
12041 max_reg = 0;
12042 left_align_p = true;
12044 else if (s390_function_arg_float (TYPE_MODE (type), type))
12046 if (TARGET_DEBUG_ARG)
12048 fprintf (stderr, "va_arg: float type");
12049 debug_tree (type);
12052 /* FP args go in FP registers, if present. */
12053 indirect_p = 0;
12054 reg = fpr;
12055 n_reg = 1;
12056 sav_ofs = 16 * UNITS_PER_LONG;
12057 sav_scale = 8;
12058 max_reg = FP_ARG_NUM_REG - n_reg;
12059 left_align_p = false;
12061 else
12063 if (TARGET_DEBUG_ARG)
12065 fprintf (stderr, "va_arg: other type");
12066 debug_tree (type);
12069 /* Otherwise into GP registers. */
12070 indirect_p = 0;
12071 reg = gpr;
12072 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12074 /* kernel stack layout on 31 bit: It is assumed here that no padding
12075 will be added by s390_frame_info because for va_args always an even
12076 number of gprs has to be saved r15-r2 = 14 regs. */
12077 sav_ofs = 2 * UNITS_PER_LONG;
12079 if (size < UNITS_PER_LONG)
12080 sav_ofs += UNITS_PER_LONG - size;
12082 sav_scale = UNITS_PER_LONG;
12083 max_reg = GP_ARG_NUM_REG - n_reg;
12084 left_align_p = false;
12087 /* Pull the value out of the saved registers ... */
12089 if (reg != NULL_TREE)
12092 if (reg > ((typeof (reg))max_reg))
12093 goto lab_false;
12095 addr = sav + sav_ofs + reg * save_scale;
12097 goto lab_over;
12099 lab_false:
12102 lab_false = create_artificial_label (UNKNOWN_LOCATION);
12103 lab_over = create_artificial_label (UNKNOWN_LOCATION);
12105 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12106 t = build2 (GT_EXPR, boolean_type_node, reg, t);
12107 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12108 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12109 gimplify_and_add (t, pre_p);
12111 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12112 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12113 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12114 t = fold_build_pointer_plus (t, u);
12116 gimplify_assign (addr, t, pre_p);
12118 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12120 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12123 /* ... Otherwise out of the overflow area. */
12125 t = ovf;
12126 if (size < UNITS_PER_LONG && !left_align_p)
12127 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12129 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12131 gimplify_assign (addr, t, pre_p);
12133 if (size < UNITS_PER_LONG && left_align_p)
12134 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12135 else
12136 t = fold_build_pointer_plus_hwi (t, size);
12138 gimplify_assign (ovf, t, pre_p);
12140 if (reg != NULL_TREE)
12141 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12144 /* Increment register save count. */
12146 if (n_reg > 0)
12148 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12149 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12150 gimplify_and_add (u, pre_p);
12153 if (indirect_p)
12155 t = build_pointer_type_for_mode (build_pointer_type (type),
12156 ptr_mode, true);
12157 addr = fold_convert (t, addr);
12158 addr = build_va_arg_indirect_ref (addr);
12160 else
12162 t = build_pointer_type_for_mode (type, ptr_mode, true);
12163 addr = fold_convert (t, addr);
12166 return build_va_arg_indirect_ref (addr);
12169 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12170 expanders.
12171 DEST - Register location where CC will be stored.
12172 TDB - Pointer to a 256 byte area where to store the transaction.
12173 diagnostic block. NULL if TDB is not needed.
12174 RETRY - Retry count value. If non-NULL a retry loop for CC2
12175 is emitted
12176 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12177 of the tbegin instruction pattern. */
12179 void
12180 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
12182 rtx retry_plus_two = gen_reg_rtx (SImode);
12183 rtx retry_reg = gen_reg_rtx (SImode);
12184 rtx_code_label *retry_label = NULL;
12186 if (retry != NULL_RTX)
12188 emit_move_insn (retry_reg, retry);
12189 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
12190 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
12191 retry_label = gen_label_rtx ();
12192 emit_label (retry_label);
12195 if (clobber_fprs_p)
12197 if (TARGET_VX)
12198 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12199 tdb));
12200 else
12201 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12202 tdb));
12204 else
12205 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12206 tdb));
12208 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
12209 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
12210 CC_REGNUM)),
12211 UNSPEC_CC_TO_INT));
12212 if (retry != NULL_RTX)
12214 const int CC0 = 1 << 3;
12215 const int CC1 = 1 << 2;
12216 const int CC3 = 1 << 0;
12217 rtx jump;
12218 rtx count = gen_reg_rtx (SImode);
12219 rtx_code_label *leave_label = gen_label_rtx ();
12221 /* Exit for success and permanent failures. */
12222 jump = s390_emit_jump (leave_label,
12223 gen_rtx_EQ (VOIDmode,
12224 gen_rtx_REG (CCRAWmode, CC_REGNUM),
12225 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
12226 LABEL_NUSES (leave_label) = 1;
12228 /* CC2 - transient failure. Perform retry with ppa. */
12229 emit_move_insn (count, retry_plus_two);
12230 emit_insn (gen_subsi3 (count, count, retry_reg));
12231 emit_insn (gen_tx_assist (count));
12232 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
12233 retry_reg,
12234 retry_reg));
12235 JUMP_LABEL (jump) = retry_label;
12236 LABEL_NUSES (retry_label) = 1;
12237 emit_label (leave_label);
12242 /* Return the decl for the target specific builtin with the function
12243 code FCODE. */
12245 static tree
12246 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
12248 if (fcode >= S390_BUILTIN_MAX)
12249 return error_mark_node;
12251 return s390_builtin_decls[fcode];
12254 /* We call mcount before the function prologue. So a profiled leaf
12255 function should stay a leaf function. */
12257 static bool
12258 s390_keep_leaf_when_profiled ()
12260 return true;
12263 /* Output assembly code for the trampoline template to
12264 stdio stream FILE.
12266 On S/390, we use gpr 1 internally in the trampoline code;
12267 gpr 0 is used to hold the static chain. */
12269 static void
12270 s390_asm_trampoline_template (FILE *file)
12272 rtx op[2];
12273 op[0] = gen_rtx_REG (Pmode, 0);
12274 op[1] = gen_rtx_REG (Pmode, 1);
12276 if (TARGET_64BIT)
12278 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12279 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
12280 output_asm_insn ("br\t%1", op); /* 2 byte */
12281 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
12283 else
12285 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12286 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
12287 output_asm_insn ("br\t%1", op); /* 2 byte */
12288 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
12292 /* Emit RTL insns to initialize the variable parts of a trampoline.
12293 FNADDR is an RTX for the address of the function's pure code.
12294 CXT is an RTX for the static chain value for the function. */
12296 static void
12297 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
12299 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
12300 rtx mem;
12302 emit_block_move (m_tramp, assemble_trampoline_template (),
12303 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
12305 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
12306 emit_move_insn (mem, cxt);
12307 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
12308 emit_move_insn (mem, fnaddr);
12311 /* Output assembler code to FILE to increment profiler label # LABELNO
12312 for profiling a function entry. */
12314 void
12315 s390_function_profiler (FILE *file, int labelno)
12317 rtx op[7];
12319 char label[128];
12320 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
12322 fprintf (file, "# function profiler \n");
12324 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
12325 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
12326 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
12328 op[2] = gen_rtx_REG (Pmode, 1);
12329 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
12330 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
12332 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
12333 if (flag_pic)
12335 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
12336 op[4] = gen_rtx_CONST (Pmode, op[4]);
12339 if (TARGET_64BIT)
12341 output_asm_insn ("stg\t%0,%1", op);
12342 output_asm_insn ("larl\t%2,%3", op);
12343 output_asm_insn ("brasl\t%0,%4", op);
12344 output_asm_insn ("lg\t%0,%1", op);
12346 else if (TARGET_CPU_ZARCH)
12348 output_asm_insn ("st\t%0,%1", op);
12349 output_asm_insn ("larl\t%2,%3", op);
12350 output_asm_insn ("brasl\t%0,%4", op);
12351 output_asm_insn ("l\t%0,%1", op);
12353 else if (!flag_pic)
12355 op[6] = gen_label_rtx ();
12357 output_asm_insn ("st\t%0,%1", op);
12358 output_asm_insn ("bras\t%2,%l6", op);
12359 output_asm_insn (".long\t%4", op);
12360 output_asm_insn (".long\t%3", op);
12361 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
12362 output_asm_insn ("l\t%0,0(%2)", op);
12363 output_asm_insn ("l\t%2,4(%2)", op);
12364 output_asm_insn ("basr\t%0,%0", op);
12365 output_asm_insn ("l\t%0,%1", op);
12367 else
12369 op[5] = gen_label_rtx ();
12370 op[6] = gen_label_rtx ();
12372 output_asm_insn ("st\t%0,%1", op);
12373 output_asm_insn ("bras\t%2,%l6", op);
12374 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
12375 output_asm_insn (".long\t%4-%l5", op);
12376 output_asm_insn (".long\t%3-%l5", op);
12377 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
12378 output_asm_insn ("lr\t%0,%2", op);
12379 output_asm_insn ("a\t%0,0(%2)", op);
12380 output_asm_insn ("a\t%2,4(%2)", op);
12381 output_asm_insn ("basr\t%0,%0", op);
12382 output_asm_insn ("l\t%0,%1", op);
12386 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
12387 into its SYMBOL_REF_FLAGS. */
12389 static void
12390 s390_encode_section_info (tree decl, rtx rtl, int first)
12392 default_encode_section_info (decl, rtl, first);
12394 if (TREE_CODE (decl) == VAR_DECL)
12396 /* Store the alignment to be able to check if we can use
12397 a larl/load-relative instruction. We only handle the cases
12398 that can go wrong (i.e. no FUNC_DECLs). If a symref does
12399 not have any flag we assume it to be correctly aligned. */
12401 if (DECL_ALIGN (decl) % 64)
12402 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12404 if (DECL_ALIGN (decl) % 32)
12405 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12407 if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
12408 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12411 /* Literal pool references don't have a decl so they are handled
12412 differently here. We rely on the information in the MEM_ALIGN
12413 entry to decide upon the alignment. */
12414 if (MEM_P (rtl)
12415 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
12416 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0))
12417 && MEM_ALIGN (rtl) != 0
12418 && GET_MODE_BITSIZE (GET_MODE (rtl)) != 0)
12420 if (MEM_ALIGN (rtl) % 64)
12421 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12423 if (MEM_ALIGN (rtl) % 32)
12424 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12426 if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
12427 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12431 /* Output thunk to FILE that implements a C++ virtual function call (with
12432 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
12433 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
12434 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
12435 relative to the resulting this pointer. */
12437 static void
12438 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
12439 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12440 tree function)
12442 rtx op[10];
12443 int nonlocal = 0;
12445 /* Make sure unwind info is emitted for the thunk if needed. */
12446 final_start_function (emit_barrier (), file, 1);
12448 /* Operand 0 is the target function. */
12449 op[0] = XEXP (DECL_RTL (function), 0);
12450 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
12452 nonlocal = 1;
12453 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
12454 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
12455 op[0] = gen_rtx_CONST (Pmode, op[0]);
12458 /* Operand 1 is the 'this' pointer. */
12459 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12460 op[1] = gen_rtx_REG (Pmode, 3);
12461 else
12462 op[1] = gen_rtx_REG (Pmode, 2);
12464 /* Operand 2 is the delta. */
12465 op[2] = GEN_INT (delta);
12467 /* Operand 3 is the vcall_offset. */
12468 op[3] = GEN_INT (vcall_offset);
12470 /* Operand 4 is the temporary register. */
12471 op[4] = gen_rtx_REG (Pmode, 1);
12473 /* Operands 5 to 8 can be used as labels. */
12474 op[5] = NULL_RTX;
12475 op[6] = NULL_RTX;
12476 op[7] = NULL_RTX;
12477 op[8] = NULL_RTX;
12479 /* Operand 9 can be used for temporary register. */
12480 op[9] = NULL_RTX;
12482 /* Generate code. */
12483 if (TARGET_64BIT)
12485 /* Setup literal pool pointer if required. */
12486 if ((!DISP_IN_RANGE (delta)
12487 && !CONST_OK_FOR_K (delta)
12488 && !CONST_OK_FOR_Os (delta))
12489 || (!DISP_IN_RANGE (vcall_offset)
12490 && !CONST_OK_FOR_K (vcall_offset)
12491 && !CONST_OK_FOR_Os (vcall_offset)))
12493 op[5] = gen_label_rtx ();
12494 output_asm_insn ("larl\t%4,%5", op);
12497 /* Add DELTA to this pointer. */
12498 if (delta)
12500 if (CONST_OK_FOR_J (delta))
12501 output_asm_insn ("la\t%1,%2(%1)", op);
12502 else if (DISP_IN_RANGE (delta))
12503 output_asm_insn ("lay\t%1,%2(%1)", op);
12504 else if (CONST_OK_FOR_K (delta))
12505 output_asm_insn ("aghi\t%1,%2", op);
12506 else if (CONST_OK_FOR_Os (delta))
12507 output_asm_insn ("agfi\t%1,%2", op);
12508 else
12510 op[6] = gen_label_rtx ();
12511 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
12515 /* Perform vcall adjustment. */
12516 if (vcall_offset)
12518 if (DISP_IN_RANGE (vcall_offset))
12520 output_asm_insn ("lg\t%4,0(%1)", op);
12521 output_asm_insn ("ag\t%1,%3(%4)", op);
12523 else if (CONST_OK_FOR_K (vcall_offset))
12525 output_asm_insn ("lghi\t%4,%3", op);
12526 output_asm_insn ("ag\t%4,0(%1)", op);
12527 output_asm_insn ("ag\t%1,0(%4)", op);
12529 else if (CONST_OK_FOR_Os (vcall_offset))
12531 output_asm_insn ("lgfi\t%4,%3", op);
12532 output_asm_insn ("ag\t%4,0(%1)", op);
12533 output_asm_insn ("ag\t%1,0(%4)", op);
12535 else
12537 op[7] = gen_label_rtx ();
12538 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
12539 output_asm_insn ("ag\t%4,0(%1)", op);
12540 output_asm_insn ("ag\t%1,0(%4)", op);
12544 /* Jump to target. */
12545 output_asm_insn ("jg\t%0", op);
12547 /* Output literal pool if required. */
12548 if (op[5])
12550 output_asm_insn (".align\t4", op);
12551 targetm.asm_out.internal_label (file, "L",
12552 CODE_LABEL_NUMBER (op[5]));
12554 if (op[6])
12556 targetm.asm_out.internal_label (file, "L",
12557 CODE_LABEL_NUMBER (op[6]));
12558 output_asm_insn (".long\t%2", op);
12560 if (op[7])
12562 targetm.asm_out.internal_label (file, "L",
12563 CODE_LABEL_NUMBER (op[7]));
12564 output_asm_insn (".long\t%3", op);
12567 else
12569 /* Setup base pointer if required. */
12570 if (!vcall_offset
12571 || (!DISP_IN_RANGE (delta)
12572 && !CONST_OK_FOR_K (delta)
12573 && !CONST_OK_FOR_Os (delta))
12574 || (!DISP_IN_RANGE (delta)
12575 && !CONST_OK_FOR_K (vcall_offset)
12576 && !CONST_OK_FOR_Os (vcall_offset)))
12578 op[5] = gen_label_rtx ();
12579 output_asm_insn ("basr\t%4,0", op);
12580 targetm.asm_out.internal_label (file, "L",
12581 CODE_LABEL_NUMBER (op[5]));
12584 /* Add DELTA to this pointer. */
12585 if (delta)
12587 if (CONST_OK_FOR_J (delta))
12588 output_asm_insn ("la\t%1,%2(%1)", op);
12589 else if (DISP_IN_RANGE (delta))
12590 output_asm_insn ("lay\t%1,%2(%1)", op);
12591 else if (CONST_OK_FOR_K (delta))
12592 output_asm_insn ("ahi\t%1,%2", op);
12593 else if (CONST_OK_FOR_Os (delta))
12594 output_asm_insn ("afi\t%1,%2", op);
12595 else
12597 op[6] = gen_label_rtx ();
12598 output_asm_insn ("a\t%1,%6-%5(%4)", op);
12602 /* Perform vcall adjustment. */
12603 if (vcall_offset)
12605 if (CONST_OK_FOR_J (vcall_offset))
12607 output_asm_insn ("l\t%4,0(%1)", op);
12608 output_asm_insn ("a\t%1,%3(%4)", op);
12610 else if (DISP_IN_RANGE (vcall_offset))
12612 output_asm_insn ("l\t%4,0(%1)", op);
12613 output_asm_insn ("ay\t%1,%3(%4)", op);
12615 else if (CONST_OK_FOR_K (vcall_offset))
12617 output_asm_insn ("lhi\t%4,%3", op);
12618 output_asm_insn ("a\t%4,0(%1)", op);
12619 output_asm_insn ("a\t%1,0(%4)", op);
12621 else if (CONST_OK_FOR_Os (vcall_offset))
12623 output_asm_insn ("iilf\t%4,%3", op);
12624 output_asm_insn ("a\t%4,0(%1)", op);
12625 output_asm_insn ("a\t%1,0(%4)", op);
12627 else
12629 op[7] = gen_label_rtx ();
12630 output_asm_insn ("l\t%4,%7-%5(%4)", op);
12631 output_asm_insn ("a\t%4,0(%1)", op);
12632 output_asm_insn ("a\t%1,0(%4)", op);
12635 /* We had to clobber the base pointer register.
12636 Re-setup the base pointer (with a different base). */
12637 op[5] = gen_label_rtx ();
12638 output_asm_insn ("basr\t%4,0", op);
12639 targetm.asm_out.internal_label (file, "L",
12640 CODE_LABEL_NUMBER (op[5]));
12643 /* Jump to target. */
12644 op[8] = gen_label_rtx ();
12646 if (!flag_pic)
12647 output_asm_insn ("l\t%4,%8-%5(%4)", op);
12648 else if (!nonlocal)
12649 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12650 /* We cannot call through .plt, since .plt requires %r12 loaded. */
12651 else if (flag_pic == 1)
12653 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12654 output_asm_insn ("l\t%4,%0(%4)", op);
12656 else if (flag_pic == 2)
12658 op[9] = gen_rtx_REG (Pmode, 0);
12659 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
12660 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12661 output_asm_insn ("ar\t%4,%9", op);
12662 output_asm_insn ("l\t%4,0(%4)", op);
12665 output_asm_insn ("br\t%4", op);
12667 /* Output literal pool. */
12668 output_asm_insn (".align\t4", op);
12670 if (nonlocal && flag_pic == 2)
12671 output_asm_insn (".long\t%0", op);
12672 if (nonlocal)
12674 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
12675 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
12678 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
12679 if (!flag_pic)
12680 output_asm_insn (".long\t%0", op);
12681 else
12682 output_asm_insn (".long\t%0-%5", op);
12684 if (op[6])
12686 targetm.asm_out.internal_label (file, "L",
12687 CODE_LABEL_NUMBER (op[6]));
12688 output_asm_insn (".long\t%2", op);
12690 if (op[7])
12692 targetm.asm_out.internal_label (file, "L",
12693 CODE_LABEL_NUMBER (op[7]));
12694 output_asm_insn (".long\t%3", op);
12697 final_end_function ();
12700 static bool
12701 s390_valid_pointer_mode (machine_mode mode)
12703 return (mode == SImode || (TARGET_64BIT && mode == DImode));
12706 /* Checks whether the given CALL_EXPR would use a caller
12707 saved register. This is used to decide whether sibling call
12708 optimization could be performed on the respective function
12709 call. */
12711 static bool
12712 s390_call_saved_register_used (tree call_expr)
12714 CUMULATIVE_ARGS cum_v;
12715 cumulative_args_t cum;
12716 tree parameter;
12717 machine_mode mode;
12718 tree type;
12719 rtx parm_rtx;
12720 int reg, i;
12722 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
12723 cum = pack_cumulative_args (&cum_v);
12725 for (i = 0; i < call_expr_nargs (call_expr); i++)
12727 parameter = CALL_EXPR_ARG (call_expr, i);
12728 gcc_assert (parameter);
12730 /* For an undeclared variable passed as parameter we will get
12731 an ERROR_MARK node here. */
12732 if (TREE_CODE (parameter) == ERROR_MARK)
12733 return true;
12735 type = TREE_TYPE (parameter);
12736 gcc_assert (type);
12738 mode = TYPE_MODE (type);
12739 gcc_assert (mode);
12741 /* We assume that in the target function all parameters are
12742 named. This only has an impact on vector argument register
12743 usage none of which is call-saved. */
12744 if (pass_by_reference (&cum_v, mode, type, true))
12746 mode = Pmode;
12747 type = build_pointer_type (type);
12750 parm_rtx = s390_function_arg (cum, mode, type, true);
12752 s390_function_arg_advance (cum, mode, type, true);
12754 if (!parm_rtx)
12755 continue;
12757 if (REG_P (parm_rtx))
12759 for (reg = 0;
12760 reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
12761 reg++)
12762 if (!call_used_regs[reg + REGNO (parm_rtx)])
12763 return true;
12766 if (GET_CODE (parm_rtx) == PARALLEL)
12768 int i;
12770 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
12772 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
12774 gcc_assert (REG_P (r));
12776 for (reg = 0;
12777 reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
12778 reg++)
12779 if (!call_used_regs[reg + REGNO (r)])
12780 return true;
12785 return false;
12788 /* Return true if the given call expression can be
12789 turned into a sibling call.
12790 DECL holds the declaration of the function to be called whereas
12791 EXP is the call expression itself. */
12793 static bool
12794 s390_function_ok_for_sibcall (tree decl, tree exp)
12796 /* The TPF epilogue uses register 1. */
12797 if (TARGET_TPF_PROFILING)
12798 return false;
12800 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
12801 which would have to be restored before the sibcall. */
12802 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
12803 return false;
12805 /* Register 6 on s390 is available as an argument register but unfortunately
12806 "caller saved". This makes functions needing this register for arguments
12807 not suitable for sibcalls. */
12808 return !s390_call_saved_register_used (exp);
12811 /* Return the fixed registers used for condition codes. */
12813 static bool
12814 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
12816 *p1 = CC_REGNUM;
12817 *p2 = INVALID_REGNUM;
12819 return true;
12822 /* This function is used by the call expanders of the machine description.
12823 It emits the call insn itself together with the necessary operations
12824 to adjust the target address and returns the emitted insn.
12825 ADDR_LOCATION is the target address rtx
12826 TLS_CALL the location of the thread-local symbol
12827 RESULT_REG the register where the result of the call should be stored
12828 RETADDR_REG the register where the return address should be stored
12829 If this parameter is NULL_RTX the call is considered
12830 to be a sibling call. */
12832 rtx_insn *
12833 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
12834 rtx retaddr_reg)
12836 bool plt_call = false;
12837 rtx_insn *insn;
12838 rtx call;
12839 rtx clobber;
12840 rtvec vec;
12842 /* Direct function calls need special treatment. */
12843 if (GET_CODE (addr_location) == SYMBOL_REF)
12845 /* When calling a global routine in PIC mode, we must
12846 replace the symbol itself with the PLT stub. */
12847 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
12849 if (TARGET_64BIT || retaddr_reg != NULL_RTX)
12851 addr_location = gen_rtx_UNSPEC (Pmode,
12852 gen_rtvec (1, addr_location),
12853 UNSPEC_PLT);
12854 addr_location = gen_rtx_CONST (Pmode, addr_location);
12855 plt_call = true;
12857 else
12858 /* For -fpic code the PLT entries might use r12 which is
12859 call-saved. Therefore we cannot do a sibcall when
12860 calling directly using a symbol ref. When reaching
12861 this point we decided (in s390_function_ok_for_sibcall)
12862 to do a sibcall for a function pointer but one of the
12863 optimizers was able to get rid of the function pointer
12864 by propagating the symbol ref into the call. This
12865 optimization is illegal for S/390 so we turn the direct
12866 call into a indirect call again. */
12867 addr_location = force_reg (Pmode, addr_location);
12870 /* Unless we can use the bras(l) insn, force the
12871 routine address into a register. */
12872 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
12874 if (flag_pic)
12875 addr_location = legitimize_pic_address (addr_location, 0);
12876 else
12877 addr_location = force_reg (Pmode, addr_location);
12881 /* If it is already an indirect call or the code above moved the
12882 SYMBOL_REF to somewhere else make sure the address can be found in
12883 register 1. */
12884 if (retaddr_reg == NULL_RTX
12885 && GET_CODE (addr_location) != SYMBOL_REF
12886 && !plt_call)
12888 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
12889 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
12892 addr_location = gen_rtx_MEM (QImode, addr_location);
12893 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
12895 if (result_reg != NULL_RTX)
12896 call = gen_rtx_SET (result_reg, call);
12898 if (retaddr_reg != NULL_RTX)
12900 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
12902 if (tls_call != NULL_RTX)
12903 vec = gen_rtvec (3, call, clobber,
12904 gen_rtx_USE (VOIDmode, tls_call));
12905 else
12906 vec = gen_rtvec (2, call, clobber);
12908 call = gen_rtx_PARALLEL (VOIDmode, vec);
12911 insn = emit_call_insn (call);
12913 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
12914 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
12916 /* s390_function_ok_for_sibcall should
12917 have denied sibcalls in this case. */
12918 gcc_assert (retaddr_reg != NULL_RTX);
12919 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
12921 return insn;
12924 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
12926 static void
12927 s390_conditional_register_usage (void)
12929 int i;
12931 if (flag_pic)
12933 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12934 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12936 if (TARGET_CPU_ZARCH)
12938 fixed_regs[BASE_REGNUM] = 0;
12939 call_used_regs[BASE_REGNUM] = 0;
12940 fixed_regs[RETURN_REGNUM] = 0;
12941 call_used_regs[RETURN_REGNUM] = 0;
12943 if (TARGET_64BIT)
12945 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
12946 call_used_regs[i] = call_really_used_regs[i] = 0;
12948 else
12950 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
12951 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
12954 if (TARGET_SOFT_FLOAT)
12956 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
12957 call_used_regs[i] = fixed_regs[i] = 1;
12960 /* Disable v16 - v31 for non-vector target. */
12961 if (!TARGET_VX)
12963 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
12964 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
12968 /* Corresponding function to eh_return expander. */
12970 static GTY(()) rtx s390_tpf_eh_return_symbol;
12971 void
12972 s390_emit_tpf_eh_return (rtx target)
12974 rtx_insn *insn;
12975 rtx reg, orig_ra;
12977 if (!s390_tpf_eh_return_symbol)
12978 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
12980 reg = gen_rtx_REG (Pmode, 2);
12981 orig_ra = gen_rtx_REG (Pmode, 3);
12983 emit_move_insn (reg, target);
12984 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
12985 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
12986 gen_rtx_REG (Pmode, RETURN_REGNUM));
12987 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
12988 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
12990 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
12993 /* Rework the prologue/epilogue to avoid saving/restoring
12994 registers unnecessarily. */
12996 static void
12997 s390_optimize_prologue (void)
12999 rtx_insn *insn, *new_insn, *next_insn;
13001 /* Do a final recompute of the frame-related data. */
13002 s390_optimize_register_info ();
13004 /* If all special registers are in fact used, there's nothing we
13005 can do, so no point in walking the insn list. */
13007 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13008 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
13009 && (TARGET_CPU_ZARCH
13010 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
13011 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
13012 return;
13014 /* Search for prologue/epilogue insns and replace them. */
13016 for (insn = get_insns (); insn; insn = next_insn)
13018 int first, last, off;
13019 rtx set, base, offset;
13020 rtx pat;
13022 next_insn = NEXT_INSN (insn);
13024 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13025 continue;
13027 pat = PATTERN (insn);
13029 /* Remove ldgr/lgdr instructions used for saving and restore
13030 GPRs if possible. */
13031 if (TARGET_Z10
13032 && GET_CODE (pat) == SET
13033 && GET_MODE (SET_SRC (pat)) == DImode
13034 && REG_P (SET_SRC (pat))
13035 && REG_P (SET_DEST (pat)))
13037 int src_regno = REGNO (SET_SRC (pat));
13038 int dest_regno = REGNO (SET_DEST (pat));
13039 int gpr_regno;
13040 int fpr_regno;
13042 if (!((GENERAL_REGNO_P (src_regno) && FP_REGNO_P (dest_regno))
13043 || (FP_REGNO_P (src_regno) && GENERAL_REGNO_P (dest_regno))))
13044 continue;
13046 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
13047 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
13049 /* GPR must be call-saved, FPR must be call-clobbered. */
13050 if (!call_really_used_regs[fpr_regno]
13051 || call_really_used_regs[gpr_regno])
13052 continue;
13054 /* It must not happen that what we once saved in an FPR now
13055 needs a stack slot. */
13056 gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
13058 if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
13060 remove_insn (insn);
13061 continue;
13065 if (GET_CODE (pat) == PARALLEL
13066 && store_multiple_operation (pat, VOIDmode))
13068 set = XVECEXP (pat, 0, 0);
13069 first = REGNO (SET_SRC (set));
13070 last = first + XVECLEN (pat, 0) - 1;
13071 offset = const0_rtx;
13072 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13073 off = INTVAL (offset);
13075 if (GET_CODE (base) != REG || off < 0)
13076 continue;
13077 if (cfun_frame_layout.first_save_gpr != -1
13078 && (cfun_frame_layout.first_save_gpr < first
13079 || cfun_frame_layout.last_save_gpr > last))
13080 continue;
13081 if (REGNO (base) != STACK_POINTER_REGNUM
13082 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13083 continue;
13084 if (first > BASE_REGNUM || last < BASE_REGNUM)
13085 continue;
13087 if (cfun_frame_layout.first_save_gpr != -1)
13089 rtx s_pat = save_gprs (base,
13090 off + (cfun_frame_layout.first_save_gpr
13091 - first) * UNITS_PER_LONG,
13092 cfun_frame_layout.first_save_gpr,
13093 cfun_frame_layout.last_save_gpr);
13094 new_insn = emit_insn_before (s_pat, insn);
13095 INSN_ADDRESSES_NEW (new_insn, -1);
13098 remove_insn (insn);
13099 continue;
13102 if (cfun_frame_layout.first_save_gpr == -1
13103 && GET_CODE (pat) == SET
13104 && GENERAL_REG_P (SET_SRC (pat))
13105 && GET_CODE (SET_DEST (pat)) == MEM)
13107 set = pat;
13108 first = REGNO (SET_SRC (set));
13109 offset = const0_rtx;
13110 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13111 off = INTVAL (offset);
13113 if (GET_CODE (base) != REG || off < 0)
13114 continue;
13115 if (REGNO (base) != STACK_POINTER_REGNUM
13116 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13117 continue;
13119 remove_insn (insn);
13120 continue;
13123 if (GET_CODE (pat) == PARALLEL
13124 && load_multiple_operation (pat, VOIDmode))
13126 set = XVECEXP (pat, 0, 0);
13127 first = REGNO (SET_DEST (set));
13128 last = first + XVECLEN (pat, 0) - 1;
13129 offset = const0_rtx;
13130 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13131 off = INTVAL (offset);
13133 if (GET_CODE (base) != REG || off < 0)
13134 continue;
13136 if (cfun_frame_layout.first_restore_gpr != -1
13137 && (cfun_frame_layout.first_restore_gpr < first
13138 || cfun_frame_layout.last_restore_gpr > last))
13139 continue;
13140 if (REGNO (base) != STACK_POINTER_REGNUM
13141 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13142 continue;
13143 if (first > BASE_REGNUM || last < BASE_REGNUM)
13144 continue;
13146 if (cfun_frame_layout.first_restore_gpr != -1)
13148 rtx rpat = restore_gprs (base,
13149 off + (cfun_frame_layout.first_restore_gpr
13150 - first) * UNITS_PER_LONG,
13151 cfun_frame_layout.first_restore_gpr,
13152 cfun_frame_layout.last_restore_gpr);
13154 /* Remove REG_CFA_RESTOREs for registers that we no
13155 longer need to save. */
13156 REG_NOTES (rpat) = REG_NOTES (insn);
13157 for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
13158 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
13159 && ((int) REGNO (XEXP (*ptr, 0))
13160 < cfun_frame_layout.first_restore_gpr))
13161 *ptr = XEXP (*ptr, 1);
13162 else
13163 ptr = &XEXP (*ptr, 1);
13164 new_insn = emit_insn_before (rpat, insn);
13165 RTX_FRAME_RELATED_P (new_insn) = 1;
13166 INSN_ADDRESSES_NEW (new_insn, -1);
13169 remove_insn (insn);
13170 continue;
13173 if (cfun_frame_layout.first_restore_gpr == -1
13174 && GET_CODE (pat) == SET
13175 && GENERAL_REG_P (SET_DEST (pat))
13176 && GET_CODE (SET_SRC (pat)) == MEM)
13178 set = pat;
13179 first = REGNO (SET_DEST (set));
13180 offset = const0_rtx;
13181 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13182 off = INTVAL (offset);
13184 if (GET_CODE (base) != REG || off < 0)
13185 continue;
13187 if (REGNO (base) != STACK_POINTER_REGNUM
13188 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13189 continue;
13191 remove_insn (insn);
13192 continue;
13197 /* On z10 and later the dynamic branch prediction must see the
13198 backward jump within a certain windows. If not it falls back to
13199 the static prediction. This function rearranges the loop backward
13200 branch in a way which makes the static prediction always correct.
13201 The function returns true if it added an instruction. */
13202 static bool
13203 s390_fix_long_loop_prediction (rtx_insn *insn)
13205 rtx set = single_set (insn);
13206 rtx code_label, label_ref, new_label;
13207 rtx_insn *uncond_jump;
13208 rtx_insn *cur_insn;
13209 rtx tmp;
13210 int distance;
13212 /* This will exclude branch on count and branch on index patterns
13213 since these are correctly statically predicted. */
13214 if (!set
13215 || SET_DEST (set) != pc_rtx
13216 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
13217 return false;
13219 /* Skip conditional returns. */
13220 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
13221 && XEXP (SET_SRC (set), 2) == pc_rtx)
13222 return false;
13224 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
13225 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
13227 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
13229 code_label = XEXP (label_ref, 0);
13231 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
13232 || INSN_ADDRESSES (INSN_UID (insn)) == -1
13233 || (INSN_ADDRESSES (INSN_UID (insn))
13234 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
13235 return false;
13237 for (distance = 0, cur_insn = PREV_INSN (insn);
13238 distance < PREDICT_DISTANCE - 6;
13239 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
13240 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
13241 return false;
13243 new_label = gen_label_rtx ();
13244 uncond_jump = emit_jump_insn_after (
13245 gen_rtx_SET (pc_rtx,
13246 gen_rtx_LABEL_REF (VOIDmode, code_label)),
13247 insn);
13248 emit_label_after (new_label, uncond_jump);
13250 tmp = XEXP (SET_SRC (set), 1);
13251 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
13252 XEXP (SET_SRC (set), 2) = tmp;
13253 INSN_CODE (insn) = -1;
13255 XEXP (label_ref, 0) = new_label;
13256 JUMP_LABEL (insn) = new_label;
13257 JUMP_LABEL (uncond_jump) = code_label;
13259 return true;
13262 /* Returns 1 if INSN reads the value of REG for purposes not related
13263 to addressing of memory, and 0 otherwise. */
13264 static int
13265 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
13267 return reg_referenced_p (reg, PATTERN (insn))
13268 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
13271 /* Starting from INSN find_cond_jump looks downwards in the insn
13272 stream for a single jump insn which is the last user of the
13273 condition code set in INSN. */
13274 static rtx_insn *
13275 find_cond_jump (rtx_insn *insn)
13277 for (; insn; insn = NEXT_INSN (insn))
13279 rtx ite, cc;
13281 if (LABEL_P (insn))
13282 break;
13284 if (!JUMP_P (insn))
13286 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
13287 break;
13288 continue;
13291 /* This will be triggered by a return. */
13292 if (GET_CODE (PATTERN (insn)) != SET)
13293 break;
13295 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
13296 ite = SET_SRC (PATTERN (insn));
13298 if (GET_CODE (ite) != IF_THEN_ELSE)
13299 break;
13301 cc = XEXP (XEXP (ite, 0), 0);
13302 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
13303 break;
13305 if (find_reg_note (insn, REG_DEAD, cc))
13306 return insn;
13307 break;
13310 return NULL;
13313 /* Swap the condition in COND and the operands in OP0 and OP1 so that
13314 the semantics does not change. If NULL_RTX is passed as COND the
13315 function tries to find the conditional jump starting with INSN. */
13316 static void
13317 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
13319 rtx tmp = *op0;
13321 if (cond == NULL_RTX)
13323 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
13324 rtx set = jump ? single_set (jump) : NULL_RTX;
13326 if (set == NULL_RTX)
13327 return;
13329 cond = XEXP (SET_SRC (set), 0);
13332 *op0 = *op1;
13333 *op1 = tmp;
13334 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
13337 /* On z10, instructions of the compare-and-branch family have the
13338 property to access the register occurring as second operand with
13339 its bits complemented. If such a compare is grouped with a second
13340 instruction that accesses the same register non-complemented, and
13341 if that register's value is delivered via a bypass, then the
13342 pipeline recycles, thereby causing significant performance decline.
13343 This function locates such situations and exchanges the two
13344 operands of the compare. The function return true whenever it
13345 added an insn. */
13346 static bool
13347 s390_z10_optimize_cmp (rtx_insn *insn)
13349 rtx_insn *prev_insn, *next_insn;
13350 bool insn_added_p = false;
13351 rtx cond, *op0, *op1;
13353 if (GET_CODE (PATTERN (insn)) == PARALLEL)
13355 /* Handle compare and branch and branch on count
13356 instructions. */
13357 rtx pattern = single_set (insn);
13359 if (!pattern
13360 || SET_DEST (pattern) != pc_rtx
13361 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
13362 return false;
13364 cond = XEXP (SET_SRC (pattern), 0);
13365 op0 = &XEXP (cond, 0);
13366 op1 = &XEXP (cond, 1);
13368 else if (GET_CODE (PATTERN (insn)) == SET)
13370 rtx src, dest;
13372 /* Handle normal compare instructions. */
13373 src = SET_SRC (PATTERN (insn));
13374 dest = SET_DEST (PATTERN (insn));
13376 if (!REG_P (dest)
13377 || !CC_REGNO_P (REGNO (dest))
13378 || GET_CODE (src) != COMPARE)
13379 return false;
13381 /* s390_swap_cmp will try to find the conditional
13382 jump when passing NULL_RTX as condition. */
13383 cond = NULL_RTX;
13384 op0 = &XEXP (src, 0);
13385 op1 = &XEXP (src, 1);
13387 else
13388 return false;
13390 if (!REG_P (*op0) || !REG_P (*op1))
13391 return false;
13393 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
13394 return false;
13396 /* Swap the COMPARE arguments and its mask if there is a
13397 conflicting access in the previous insn. */
13398 prev_insn = prev_active_insn (insn);
13399 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13400 && reg_referenced_p (*op1, PATTERN (prev_insn)))
13401 s390_swap_cmp (cond, op0, op1, insn);
13403 /* Check if there is a conflict with the next insn. If there
13404 was no conflict with the previous insn, then swap the
13405 COMPARE arguments and its mask. If we already swapped
13406 the operands, or if swapping them would cause a conflict
13407 with the previous insn, issue a NOP after the COMPARE in
13408 order to separate the two instuctions. */
13409 next_insn = next_active_insn (insn);
13410 if (next_insn != NULL_RTX && INSN_P (next_insn)
13411 && s390_non_addr_reg_read_p (*op1, next_insn))
13413 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13414 && s390_non_addr_reg_read_p (*op0, prev_insn))
13416 if (REGNO (*op1) == 0)
13417 emit_insn_after (gen_nop1 (), insn);
13418 else
13419 emit_insn_after (gen_nop (), insn);
13420 insn_added_p = true;
13422 else
13423 s390_swap_cmp (cond, op0, op1, insn);
13425 return insn_added_p;
13428 /* Perform machine-dependent processing. */
13430 static void
13431 s390_reorg (void)
13433 bool pool_overflow = false;
13434 int hw_before, hw_after;
13436 /* Make sure all splits have been performed; splits after
13437 machine_dependent_reorg might confuse insn length counts. */
13438 split_all_insns_noflow ();
13440 /* Install the main literal pool and the associated base
13441 register load insns.
13443 In addition, there are two problematic situations we need
13444 to correct:
13446 - the literal pool might be > 4096 bytes in size, so that
13447 some of its elements cannot be directly accessed
13449 - a branch target might be > 64K away from the branch, so that
13450 it is not possible to use a PC-relative instruction.
13452 To fix those, we split the single literal pool into multiple
13453 pool chunks, reloading the pool base register at various
13454 points throughout the function to ensure it always points to
13455 the pool chunk the following code expects, and / or replace
13456 PC-relative branches by absolute branches.
13458 However, the two problems are interdependent: splitting the
13459 literal pool can move a branch further away from its target,
13460 causing the 64K limit to overflow, and on the other hand,
13461 replacing a PC-relative branch by an absolute branch means
13462 we need to put the branch target address into the literal
13463 pool, possibly causing it to overflow.
13465 So, we loop trying to fix up both problems until we manage
13466 to satisfy both conditions at the same time. Note that the
13467 loop is guaranteed to terminate as every pass of the loop
13468 strictly decreases the total number of PC-relative branches
13469 in the function. (This is not completely true as there
13470 might be branch-over-pool insns introduced by chunkify_start.
13471 Those never need to be split however.) */
13473 for (;;)
13475 struct constant_pool *pool = NULL;
13477 /* Collect the literal pool. */
13478 if (!pool_overflow)
13480 pool = s390_mainpool_start ();
13481 if (!pool)
13482 pool_overflow = true;
13485 /* If literal pool overflowed, start to chunkify it. */
13486 if (pool_overflow)
13487 pool = s390_chunkify_start ();
13489 /* Split out-of-range branches. If this has created new
13490 literal pool entries, cancel current chunk list and
13491 recompute it. zSeries machines have large branch
13492 instructions, so we never need to split a branch. */
13493 if (!TARGET_CPU_ZARCH && s390_split_branches ())
13495 if (pool_overflow)
13496 s390_chunkify_cancel (pool);
13497 else
13498 s390_mainpool_cancel (pool);
13500 continue;
13503 /* If we made it up to here, both conditions are satisfied.
13504 Finish up literal pool related changes. */
13505 if (pool_overflow)
13506 s390_chunkify_finish (pool);
13507 else
13508 s390_mainpool_finish (pool);
13510 /* We're done splitting branches. */
13511 cfun->machine->split_branches_pending_p = false;
13512 break;
13515 /* Generate out-of-pool execute target insns. */
13516 if (TARGET_CPU_ZARCH)
13518 rtx_insn *insn, *target;
13519 rtx label;
13521 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13523 label = s390_execute_label (insn);
13524 if (!label)
13525 continue;
13527 gcc_assert (label != const0_rtx);
13529 target = emit_label (XEXP (label, 0));
13530 INSN_ADDRESSES_NEW (target, -1);
13532 target = emit_insn (s390_execute_target (insn));
13533 INSN_ADDRESSES_NEW (target, -1);
13537 /* Try to optimize prologue and epilogue further. */
13538 s390_optimize_prologue ();
13540 /* Walk over the insns and do some >=z10 specific changes. */
13541 if (s390_tune >= PROCESSOR_2097_Z10)
13543 rtx_insn *insn;
13544 bool insn_added_p = false;
13546 /* The insn lengths and addresses have to be up to date for the
13547 following manipulations. */
13548 shorten_branches (get_insns ());
13550 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13552 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
13553 continue;
13555 if (JUMP_P (insn))
13556 insn_added_p |= s390_fix_long_loop_prediction (insn);
13558 if ((GET_CODE (PATTERN (insn)) == PARALLEL
13559 || GET_CODE (PATTERN (insn)) == SET)
13560 && s390_tune == PROCESSOR_2097_Z10)
13561 insn_added_p |= s390_z10_optimize_cmp (insn);
13564 /* Adjust branches if we added new instructions. */
13565 if (insn_added_p)
13566 shorten_branches (get_insns ());
13569 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
13570 if (hw_after > 0)
13572 rtx_insn *insn;
13574 /* Insert NOPs for hotpatching. */
13575 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13576 /* Emit NOPs
13577 1. inside the area covered by debug information to allow setting
13578 breakpoints at the NOPs,
13579 2. before any insn which results in an asm instruction,
13580 3. before in-function labels to avoid jumping to the NOPs, for
13581 example as part of a loop,
13582 4. before any barrier in case the function is completely empty
13583 (__builtin_unreachable ()) and has neither internal labels nor
13584 active insns.
13586 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
13587 break;
13588 /* Output a series of NOPs before the first active insn. */
13589 while (insn && hw_after > 0)
13591 if (hw_after >= 3 && TARGET_CPU_ZARCH)
13593 emit_insn_before (gen_nop_6_byte (), insn);
13594 hw_after -= 3;
13596 else if (hw_after >= 2)
13598 emit_insn_before (gen_nop_4_byte (), insn);
13599 hw_after -= 2;
13601 else
13603 emit_insn_before (gen_nop_2_byte (), insn);
13604 hw_after -= 1;
13610 /* Return true if INSN is a fp load insn writing register REGNO. */
13611 static inline bool
13612 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
13614 rtx set;
13615 enum attr_type flag = s390_safe_attr_type (insn);
13617 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
13618 return false;
13620 set = single_set (insn);
13622 if (set == NULL_RTX)
13623 return false;
13625 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
13626 return false;
13628 if (REGNO (SET_DEST (set)) != regno)
13629 return false;
13631 return true;
13634 /* This value describes the distance to be avoided between an
13635 aritmetic fp instruction and an fp load writing the same register.
13636 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
13637 fine but the exact value has to be avoided. Otherwise the FP
13638 pipeline will throw an exception causing a major penalty. */
13639 #define Z10_EARLYLOAD_DISTANCE 7
13641 /* Rearrange the ready list in order to avoid the situation described
13642 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
13643 moved to the very end of the ready list. */
13644 static void
13645 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
13647 unsigned int regno;
13648 int nready = *nready_p;
13649 rtx_insn *tmp;
13650 int i;
13651 rtx_insn *insn;
13652 rtx set;
13653 enum attr_type flag;
13654 int distance;
13656 /* Skip DISTANCE - 1 active insns. */
13657 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
13658 distance > 0 && insn != NULL_RTX;
13659 distance--, insn = prev_active_insn (insn))
13660 if (CALL_P (insn) || JUMP_P (insn))
13661 return;
13663 if (insn == NULL_RTX)
13664 return;
13666 set = single_set (insn);
13668 if (set == NULL_RTX || !REG_P (SET_DEST (set))
13669 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
13670 return;
13672 flag = s390_safe_attr_type (insn);
13674 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
13675 return;
13677 regno = REGNO (SET_DEST (set));
13678 i = nready - 1;
13680 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
13681 i--;
13683 if (!i)
13684 return;
13686 tmp = ready[i];
13687 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
13688 ready[0] = tmp;
13692 /* The s390_sched_state variable tracks the state of the current or
13693 the last instruction group.
13695 0,1,2 number of instructions scheduled in the current group
13696 3 the last group is complete - normal insns
13697 4 the last group was a cracked/expanded insn */
13699 static int s390_sched_state;
13701 #define S390_SCHED_STATE_NORMAL 3
13702 #define S390_SCHED_STATE_CRACKED 4
13704 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
13705 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
13706 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
13707 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
13709 static unsigned int
13710 s390_get_sched_attrmask (rtx_insn *insn)
13712 unsigned int mask = 0;
13714 switch (s390_tune)
13716 case PROCESSOR_2827_ZEC12:
13717 if (get_attr_zEC12_cracked (insn))
13718 mask |= S390_SCHED_ATTR_MASK_CRACKED;
13719 if (get_attr_zEC12_expanded (insn))
13720 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
13721 if (get_attr_zEC12_endgroup (insn))
13722 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
13723 if (get_attr_zEC12_groupalone (insn))
13724 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
13725 break;
13726 case PROCESSOR_2964_Z13:
13727 if (get_attr_z13_cracked (insn))
13728 mask |= S390_SCHED_ATTR_MASK_CRACKED;
13729 if (get_attr_z13_expanded (insn))
13730 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
13731 if (get_attr_z13_endgroup (insn))
13732 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
13733 if (get_attr_z13_groupalone (insn))
13734 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
13735 break;
13736 default:
13737 gcc_unreachable ();
13739 return mask;
13742 static unsigned int
13743 s390_get_unit_mask (rtx_insn *insn, int *units)
13745 unsigned int mask = 0;
13747 switch (s390_tune)
13749 case PROCESSOR_2964_Z13:
13750 *units = 3;
13751 if (get_attr_z13_unit_lsu (insn))
13752 mask |= 1 << 0;
13753 if (get_attr_z13_unit_fxu (insn))
13754 mask |= 1 << 1;
13755 if (get_attr_z13_unit_vfu (insn))
13756 mask |= 1 << 2;
13757 break;
13758 default:
13759 gcc_unreachable ();
13761 return mask;
13764 /* Return the scheduling score for INSN. The higher the score the
13765 better. The score is calculated from the OOO scheduling attributes
13766 of INSN and the scheduling state s390_sched_state. */
13767 static int
13768 s390_sched_score (rtx_insn *insn)
13770 unsigned int mask = s390_get_sched_attrmask (insn);
13771 int score = 0;
13773 switch (s390_sched_state)
13775 case 0:
13776 /* Try to put insns into the first slot which would otherwise
13777 break a group. */
13778 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
13779 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
13780 score += 5;
13781 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
13782 score += 10;
13783 case 1:
13784 /* Prefer not cracked insns while trying to put together a
13785 group. */
13786 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
13787 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
13788 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
13789 score += 10;
13790 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
13791 score += 5;
13792 break;
13793 case 2:
13794 /* Prefer not cracked insns while trying to put together a
13795 group. */
13796 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
13797 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
13798 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
13799 score += 10;
13800 /* Prefer endgroup insns in the last slot. */
13801 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
13802 score += 10;
13803 break;
13804 case S390_SCHED_STATE_NORMAL:
13805 /* Prefer not cracked insns if the last was not cracked. */
13806 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
13807 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0)
13808 score += 5;
13809 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
13810 score += 10;
13811 break;
13812 case S390_SCHED_STATE_CRACKED:
13813 /* Try to keep cracked insns together to prevent them from
13814 interrupting groups. */
13815 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
13816 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
13817 score += 5;
13818 break;
13821 if (s390_tune == PROCESSOR_2964_Z13)
13823 int units, i;
13824 unsigned unit_mask, m = 1;
13826 unit_mask = s390_get_unit_mask (insn, &units);
13827 gcc_assert (units <= MAX_SCHED_UNITS);
13829 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
13830 ago the last insn of this unit type got scheduled. This is
13831 supposed to help providing a proper instruction mix to the
13832 CPU. */
13833 for (i = 0; i < units; i++, m <<= 1)
13834 if (m & unit_mask)
13835 score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE /
13836 MAX_SCHED_MIX_DISTANCE);
13838 return score;
13841 /* This function is called via hook TARGET_SCHED_REORDER before
13842 issuing one insn from list READY which contains *NREADYP entries.
13843 For target z10 it reorders load instructions to avoid early load
13844 conflicts in the floating point pipeline */
13845 static int
13846 s390_sched_reorder (FILE *file, int verbose,
13847 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
13849 if (s390_tune == PROCESSOR_2097_Z10
13850 && reload_completed
13851 && *nreadyp > 1)
13852 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
13854 if (s390_tune >= PROCESSOR_2827_ZEC12
13855 && reload_completed
13856 && *nreadyp > 1)
13858 int i;
13859 int last_index = *nreadyp - 1;
13860 int max_index = -1;
13861 int max_score = -1;
13862 rtx_insn *tmp;
13864 /* Just move the insn with the highest score to the top (the
13865 end) of the list. A full sort is not needed since a conflict
13866 in the hazard recognition cannot happen. So the top insn in
13867 the ready list will always be taken. */
13868 for (i = last_index; i >= 0; i--)
13870 int score;
13872 if (recog_memoized (ready[i]) < 0)
13873 continue;
13875 score = s390_sched_score (ready[i]);
13876 if (score > max_score)
13878 max_score = score;
13879 max_index = i;
13883 if (max_index != -1)
13885 if (max_index != last_index)
13887 tmp = ready[max_index];
13888 ready[max_index] = ready[last_index];
13889 ready[last_index] = tmp;
13891 if (verbose > 5)
13892 fprintf (file,
13893 ";;\t\tBACKEND: move insn %d to the top of list\n",
13894 INSN_UID (ready[last_index]));
13896 else if (verbose > 5)
13897 fprintf (file,
13898 ";;\t\tBACKEND: best insn %d already on top\n",
13899 INSN_UID (ready[last_index]));
13902 if (verbose > 5)
13904 fprintf (file, "ready list ooo attributes - sched state: %d\n",
13905 s390_sched_state);
13907 for (i = last_index; i >= 0; i--)
13909 unsigned int sched_mask;
13910 rtx_insn *insn = ready[i];
13912 if (recog_memoized (insn) < 0)
13913 continue;
13915 sched_mask = s390_get_sched_attrmask (insn);
13916 fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
13917 INSN_UID (insn),
13918 s390_sched_score (insn));
13919 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
13920 ((M) & sched_mask) ? #ATTR : "");
13921 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
13922 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
13923 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
13924 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
13925 #undef PRINT_SCHED_ATTR
13926 if (s390_tune == PROCESSOR_2964_Z13)
13928 unsigned int unit_mask, m = 1;
13929 int units, j;
13931 unit_mask = s390_get_unit_mask (insn, &units);
13932 fprintf (file, "(units:");
13933 for (j = 0; j < units; j++, m <<= 1)
13934 if (m & unit_mask)
13935 fprintf (file, " u%d", j);
13936 fprintf (file, ")");
13938 fprintf (file, "\n");
13943 return s390_issue_rate ();
13947 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
13948 the scheduler has issued INSN. It stores the last issued insn into
13949 last_scheduled_insn in order to make it available for
13950 s390_sched_reorder. */
13951 static int
13952 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
13954 last_scheduled_insn = insn;
13956 if (s390_tune >= PROCESSOR_2827_ZEC12
13957 && reload_completed
13958 && recog_memoized (insn) >= 0)
13960 unsigned int mask = s390_get_sched_attrmask (insn);
13962 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
13963 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
13964 s390_sched_state = S390_SCHED_STATE_CRACKED;
13965 else if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0
13966 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
13967 s390_sched_state = S390_SCHED_STATE_NORMAL;
13968 else
13970 /* Only normal insns are left (mask == 0). */
13971 switch (s390_sched_state)
13973 case 0:
13974 case 1:
13975 case 2:
13976 case S390_SCHED_STATE_NORMAL:
13977 if (s390_sched_state == S390_SCHED_STATE_NORMAL)
13978 s390_sched_state = 1;
13979 else
13980 s390_sched_state++;
13982 break;
13983 case S390_SCHED_STATE_CRACKED:
13984 s390_sched_state = S390_SCHED_STATE_NORMAL;
13985 break;
13989 if (s390_tune == PROCESSOR_2964_Z13)
13991 int units, i;
13992 unsigned unit_mask, m = 1;
13994 unit_mask = s390_get_unit_mask (insn, &units);
13995 gcc_assert (units <= MAX_SCHED_UNITS);
13997 for (i = 0; i < units; i++, m <<= 1)
13998 if (m & unit_mask)
13999 last_scheduled_unit_distance[i] = 0;
14000 else if (last_scheduled_unit_distance[i] < MAX_SCHED_MIX_DISTANCE)
14001 last_scheduled_unit_distance[i]++;
14004 if (verbose > 5)
14006 unsigned int sched_mask;
14008 sched_mask = s390_get_sched_attrmask (insn);
14010 fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
14011 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
14012 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14013 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14014 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14015 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14016 #undef PRINT_SCHED_ATTR
14018 if (s390_tune == PROCESSOR_2964_Z13)
14020 unsigned int unit_mask, m = 1;
14021 int units, j;
14023 unit_mask = s390_get_unit_mask (insn, &units);
14024 fprintf (file, "(units:");
14025 for (j = 0; j < units; j++, m <<= 1)
14026 if (m & unit_mask)
14027 fprintf (file, " %d", j);
14028 fprintf (file, ")");
14030 fprintf (file, " sched state: %d\n", s390_sched_state);
14032 if (s390_tune == PROCESSOR_2964_Z13)
14034 int units, j;
14036 s390_get_unit_mask (insn, &units);
14038 fprintf (file, ";;\t\tBACKEND: units unused for: ");
14039 for (j = 0; j < units; j++)
14040 fprintf (file, "%d:%d ", j, last_scheduled_unit_distance[j]);
14041 fprintf (file, "\n");
14046 if (GET_CODE (PATTERN (insn)) != USE
14047 && GET_CODE (PATTERN (insn)) != CLOBBER)
14048 return more - 1;
14049 else
14050 return more;
14053 static void
14054 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
14055 int verbose ATTRIBUTE_UNUSED,
14056 int max_ready ATTRIBUTE_UNUSED)
14058 last_scheduled_insn = NULL;
14059 memset (last_scheduled_unit_distance, 0, MAX_SCHED_UNITS * sizeof (int));
14060 s390_sched_state = 0;
14063 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
14064 a new number struct loop *loop should be unrolled if tuned for cpus with
14065 a built-in stride prefetcher.
14066 The loop is analyzed for memory accesses by calling check_dpu for
14067 each rtx of the loop. Depending on the loop_depth and the amount of
14068 memory accesses a new number <=nunroll is returned to improve the
14069 behavior of the hardware prefetch unit. */
14070 static unsigned
14071 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
14073 basic_block *bbs;
14074 rtx_insn *insn;
14075 unsigned i;
14076 unsigned mem_count = 0;
14078 if (s390_tune < PROCESSOR_2097_Z10)
14079 return nunroll;
14081 /* Count the number of memory references within the loop body. */
14082 bbs = get_loop_body (loop);
14083 subrtx_iterator::array_type array;
14084 for (i = 0; i < loop->num_nodes; i++)
14085 FOR_BB_INSNS (bbs[i], insn)
14086 if (INSN_P (insn) && INSN_CODE (insn) != -1)
14087 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
14088 if (MEM_P (*iter))
14089 mem_count += 1;
14090 free (bbs);
14092 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
14093 if (mem_count == 0)
14094 return nunroll;
14096 switch (loop_depth(loop))
14098 case 1:
14099 return MIN (nunroll, 28 / mem_count);
14100 case 2:
14101 return MIN (nunroll, 22 / mem_count);
14102 default:
14103 return MIN (nunroll, 16 / mem_count);
14107 /* Restore the current options. This is a hook function and also called
14108 internally. */
14110 static void
14111 s390_function_specific_restore (struct gcc_options *opts,
14112 struct cl_target_option *ptr ATTRIBUTE_UNUSED)
14114 opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
14117 static void
14118 s390_option_override_internal (bool main_args_p,
14119 struct gcc_options *opts,
14120 const struct gcc_options *opts_set)
14122 const char *prefix;
14123 const char *suffix;
14125 /* Set up prefix/suffix so the error messages refer to either the command
14126 line argument, or the attribute(target). */
14127 if (main_args_p)
14129 prefix = "-m";
14130 suffix = "";
14132 else
14134 prefix = "option(\"";
14135 suffix = "\")";
14139 /* Architecture mode defaults according to ABI. */
14140 if (!(opts_set->x_target_flags & MASK_ZARCH))
14142 if (TARGET_64BIT)
14143 opts->x_target_flags |= MASK_ZARCH;
14144 else
14145 opts->x_target_flags &= ~MASK_ZARCH;
14148 /* Set the march default in case it hasn't been specified on cmdline. */
14149 if (!opts_set->x_s390_arch)
14150 opts->x_s390_arch = PROCESSOR_2064_Z900;
14151 else if (opts->x_s390_arch == PROCESSOR_9672_G5
14152 || opts->x_s390_arch == PROCESSOR_9672_G6)
14153 warning (OPT_Wdeprecated, "%sarch=%s%s is deprecated and will be removed "
14154 "in future releases; use at least %sarch=z900%s",
14155 prefix, opts->x_s390_arch == PROCESSOR_9672_G5 ? "g5" : "g6",
14156 suffix, prefix, suffix);
14158 opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
14160 /* Determine processor to tune for. */
14161 if (!opts_set->x_s390_tune)
14162 opts->x_s390_tune = opts->x_s390_arch;
14163 else if (opts->x_s390_tune == PROCESSOR_9672_G5
14164 || opts->x_s390_tune == PROCESSOR_9672_G6)
14165 warning (OPT_Wdeprecated, "%stune=%s%s is deprecated and will be removed "
14166 "in future releases; use at least %stune=z900%s",
14167 prefix, opts->x_s390_tune == PROCESSOR_9672_G5 ? "g5" : "g6",
14168 suffix, prefix, suffix);
14170 opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
14172 /* Sanity checks. */
14173 if (opts->x_s390_arch == PROCESSOR_NATIVE
14174 || opts->x_s390_tune == PROCESSOR_NATIVE)
14175 gcc_unreachable ();
14176 if (TARGET_ZARCH_P (opts->x_target_flags) && !TARGET_CPU_ZARCH_P (opts))
14177 error ("z/Architecture mode not supported on %s",
14178 processor_table[(int)opts->x_s390_arch].name);
14179 if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
14180 error ("64-bit ABI not supported in ESA/390 mode");
14182 /* Enable hardware transactions if available and not explicitly
14183 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
14184 if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
14186 if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
14187 opts->x_target_flags |= MASK_OPT_HTM;
14188 else
14189 opts->x_target_flags &= ~MASK_OPT_HTM;
14192 if (TARGET_OPT_VX_P (opts_set->x_target_flags))
14194 if (TARGET_OPT_VX_P (opts->x_target_flags))
14196 if (!TARGET_CPU_VX_P (opts))
14197 error ("hardware vector support not available on %s",
14198 processor_table[(int)opts->x_s390_arch].name);
14199 if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
14200 error ("hardware vector support not available with -msoft-float");
14203 else
14205 if (TARGET_CPU_VX_P (opts))
14206 /* Enable vector support if available and not explicitly disabled
14207 by user. E.g. with -m31 -march=z13 -mzarch */
14208 opts->x_target_flags |= MASK_OPT_VX;
14209 else
14210 opts->x_target_flags &= ~MASK_OPT_VX;
14213 /* Use hardware DFP if available and not explicitly disabled by
14214 user. E.g. with -m31 -march=z10 -mzarch */
14215 if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
14217 if (TARGET_DFP_P (opts))
14218 opts->x_target_flags |= MASK_HARD_DFP;
14219 else
14220 opts->x_target_flags &= ~MASK_HARD_DFP;
14223 if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
14225 if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
14227 if (!TARGET_CPU_DFP_P (opts))
14228 error ("hardware decimal floating point instructions"
14229 " not available on %s",
14230 processor_table[(int)opts->x_s390_arch].name);
14231 if (!TARGET_ZARCH_P (opts->x_target_flags))
14232 error ("hardware decimal floating point instructions"
14233 " not available in ESA/390 mode");
14235 else
14236 opts->x_target_flags &= ~MASK_HARD_DFP;
14239 if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
14240 && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
14242 if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
14243 && TARGET_HARD_DFP_P (opts->x_target_flags))
14244 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
14246 opts->x_target_flags &= ~MASK_HARD_DFP;
14249 if (TARGET_BACKCHAIN_P (opts->x_target_flags)
14250 && TARGET_PACKED_STACK_P (opts->x_target_flags)
14251 && TARGET_HARD_FLOAT_P (opts->x_target_flags))
14252 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
14253 "in combination");
14255 if (opts->x_s390_stack_size)
14257 if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
14258 error ("stack size must be greater than the stack guard value");
14259 else if (opts->x_s390_stack_size > 1 << 16)
14260 error ("stack size must not be greater than 64k");
14262 else if (opts->x_s390_stack_guard)
14263 error ("-mstack-guard implies use of -mstack-size");
14265 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
14266 if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
14267 opts->x_target_flags |= MASK_LONG_DOUBLE_128;
14268 #endif
14270 if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
14272 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
14273 opts->x_param_values,
14274 opts_set->x_param_values);
14275 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
14276 opts->x_param_values,
14277 opts_set->x_param_values);
14278 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
14279 opts->x_param_values,
14280 opts_set->x_param_values);
14281 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
14282 opts->x_param_values,
14283 opts_set->x_param_values);
14286 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
14287 opts->x_param_values,
14288 opts_set->x_param_values);
14289 /* values for loop prefetching */
14290 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
14291 opts->x_param_values,
14292 opts_set->x_param_values);
14293 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
14294 opts->x_param_values,
14295 opts_set->x_param_values);
14296 /* s390 has more than 2 levels and the size is much larger. Since
14297 we are always running virtualized assume that we only get a small
14298 part of the caches above l1. */
14299 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
14300 opts->x_param_values,
14301 opts_set->x_param_values);
14302 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
14303 opts->x_param_values,
14304 opts_set->x_param_values);
14305 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
14306 opts->x_param_values,
14307 opts_set->x_param_values);
14309 /* Use the alternative scheduling-pressure algorithm by default. */
14310 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
14311 opts->x_param_values,
14312 opts_set->x_param_values);
14314 /* Call target specific restore function to do post-init work. At the moment,
14315 this just sets opts->x_s390_cost_pointer. */
14316 s390_function_specific_restore (opts, NULL);
14319 static void
14320 s390_option_override (void)
14322 unsigned int i;
14323 cl_deferred_option *opt;
14324 vec<cl_deferred_option> *v =
14325 (vec<cl_deferred_option> *) s390_deferred_options;
14327 if (v)
14328 FOR_EACH_VEC_ELT (*v, i, opt)
14330 switch (opt->opt_index)
14332 case OPT_mhotpatch_:
14334 int val1;
14335 int val2;
14336 char s[256];
14337 char *t;
14339 strncpy (s, opt->arg, 256);
14340 s[255] = 0;
14341 t = strchr (s, ',');
14342 if (t != NULL)
14344 *t = 0;
14345 t++;
14346 val1 = integral_argument (s);
14347 val2 = integral_argument (t);
14349 else
14351 val1 = -1;
14352 val2 = -1;
14354 if (val1 == -1 || val2 == -1)
14356 /* argument is not a plain number */
14357 error ("arguments to %qs should be non-negative integers",
14358 "-mhotpatch=n,m");
14359 break;
14361 else if (val1 > s390_hotpatch_hw_max
14362 || val2 > s390_hotpatch_hw_max)
14364 error ("argument to %qs is too large (max. %d)",
14365 "-mhotpatch=n,m", s390_hotpatch_hw_max);
14366 break;
14368 s390_hotpatch_hw_before_label = val1;
14369 s390_hotpatch_hw_after_label = val2;
14370 break;
14372 default:
14373 gcc_unreachable ();
14377 /* Set up function hooks. */
14378 init_machine_status = s390_init_machine_status;
14380 s390_option_override_internal (true, &global_options, &global_options_set);
14382 /* Save the initial options in case the user does function specific
14383 options. */
14384 target_option_default_node = build_target_option_node (&global_options);
14385 target_option_current_node = target_option_default_node;
14387 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
14388 requires the arch flags to be evaluated already. Since prefetching
14389 is beneficial on s390, we enable it if available. */
14390 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
14391 flag_prefetch_loop_arrays = 1;
14393 if (TARGET_TPF)
14395 /* Don't emit DWARF3/4 unless specifically selected. The TPF
14396 debuggers do not yet support DWARF 3/4. */
14397 if (!global_options_set.x_dwarf_strict)
14398 dwarf_strict = 1;
14399 if (!global_options_set.x_dwarf_version)
14400 dwarf_version = 2;
14403 /* Register a target-specific optimization-and-lowering pass
14404 to run immediately before prologue and epilogue generation.
14406 Registering the pass must be done at start up. It's
14407 convenient to do it here. */
14408 opt_pass *new_pass = new pass_s390_early_mach (g);
14409 struct register_pass_info insert_pass_s390_early_mach =
14411 new_pass, /* pass */
14412 "pro_and_epilogue", /* reference_pass_name */
14413 1, /* ref_pass_instance_number */
14414 PASS_POS_INSERT_BEFORE /* po_op */
14416 register_pass (&insert_pass_s390_early_mach);
14419 #if S390_USE_TARGET_ATTRIBUTE
14420 /* Inner function to process the attribute((target(...))), take an argument and
14421 set the current options from the argument. If we have a list, recursively go
14422 over the list. */
14424 static bool
14425 s390_valid_target_attribute_inner_p (tree args,
14426 struct gcc_options *opts,
14427 struct gcc_options *new_opts_set,
14428 bool force_pragma)
14430 char *next_optstr;
14431 bool ret = true;
14433 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
14434 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
14435 static const struct
14437 const char *string;
14438 size_t len;
14439 int opt;
14440 int has_arg;
14441 int only_as_pragma;
14442 } attrs[] = {
14443 /* enum options */
14444 S390_ATTRIB ("arch=", OPT_march_, 1),
14445 S390_ATTRIB ("tune=", OPT_mtune_, 1),
14446 /* uinteger options */
14447 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
14448 S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
14449 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
14450 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
14451 /* flag options */
14452 S390_ATTRIB ("backchain", OPT_mbackchain, 0),
14453 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
14454 S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
14455 S390_ATTRIB ("htm", OPT_mhtm, 0),
14456 S390_ATTRIB ("vx", OPT_mvx, 0),
14457 S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
14458 S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
14459 S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
14460 S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
14461 S390_PRAGMA ("zvector", OPT_mzvector, 0),
14462 /* boolean options */
14463 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
14465 #undef S390_ATTRIB
14466 #undef S390_PRAGMA
14468 /* If this is a list, recurse to get the options. */
14469 if (TREE_CODE (args) == TREE_LIST)
14471 bool ret = true;
14472 int num_pragma_values;
14473 int i;
14475 /* Note: attribs.c:decl_attributes prepends the values from
14476 current_target_pragma to the list of target attributes. To determine
14477 whether we're looking at a value of the attribute or the pragma we
14478 assume that the first [list_length (current_target_pragma)] values in
14479 the list are the values from the pragma. */
14480 num_pragma_values = (!force_pragma && current_target_pragma != NULL)
14481 ? list_length (current_target_pragma) : 0;
14482 for (i = 0; args; args = TREE_CHAIN (args), i++)
14484 bool is_pragma;
14486 is_pragma = (force_pragma || i < num_pragma_values);
14487 if (TREE_VALUE (args)
14488 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
14489 opts, new_opts_set,
14490 is_pragma))
14492 ret = false;
14495 return ret;
14498 else if (TREE_CODE (args) != STRING_CST)
14500 error ("attribute %<target%> argument not a string");
14501 return false;
14504 /* Handle multiple arguments separated by commas. */
14505 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
14507 while (next_optstr && *next_optstr != '\0')
14509 char *p = next_optstr;
14510 char *orig_p = p;
14511 char *comma = strchr (next_optstr, ',');
14512 size_t len, opt_len;
14513 int opt;
14514 bool opt_set_p;
14515 char ch;
14516 unsigned i;
14517 int mask = 0;
14518 enum cl_var_type var_type;
14519 bool found;
14521 if (comma)
14523 *comma = '\0';
14524 len = comma - next_optstr;
14525 next_optstr = comma + 1;
14527 else
14529 len = strlen (p);
14530 next_optstr = NULL;
14533 /* Recognize no-xxx. */
14534 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
14536 opt_set_p = false;
14537 p += 3;
14538 len -= 3;
14540 else
14541 opt_set_p = true;
14543 /* Find the option. */
14544 ch = *p;
14545 found = false;
14546 for (i = 0; i < ARRAY_SIZE (attrs); i++)
14548 opt_len = attrs[i].len;
14549 if (ch == attrs[i].string[0]
14550 && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
14551 && memcmp (p, attrs[i].string, opt_len) == 0)
14553 opt = attrs[i].opt;
14554 if (!opt_set_p && cl_options[opt].cl_reject_negative)
14555 continue;
14556 mask = cl_options[opt].var_value;
14557 var_type = cl_options[opt].var_type;
14558 found = true;
14559 break;
14563 /* Process the option. */
14564 if (!found)
14566 error ("attribute(target(\"%s\")) is unknown", orig_p);
14567 return false;
14569 else if (attrs[i].only_as_pragma && !force_pragma)
14571 /* Value is not allowed for the target attribute. */
14572 error ("Value %qs is not supported by attribute %<target%>",
14573 attrs[i].string);
14574 return false;
14577 else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
14579 if (var_type == CLVC_BIT_CLEAR)
14580 opt_set_p = !opt_set_p;
14582 if (opt_set_p)
14583 opts->x_target_flags |= mask;
14584 else
14585 opts->x_target_flags &= ~mask;
14586 new_opts_set->x_target_flags |= mask;
14589 else if (cl_options[opt].var_type == CLVC_BOOLEAN)
14591 int value;
14593 if (cl_options[opt].cl_uinteger)
14595 /* Unsigned integer argument. Code based on the function
14596 decode_cmdline_option () in opts-common.c. */
14597 value = integral_argument (p + opt_len);
14599 else
14600 value = (opt_set_p) ? 1 : 0;
14602 if (value != -1)
14604 struct cl_decoded_option decoded;
14606 /* Value range check; only implemented for numeric and boolean
14607 options at the moment. */
14608 generate_option (opt, NULL, value, CL_TARGET, &decoded);
14609 s390_handle_option (opts, new_opts_set, &decoded, input_location);
14610 set_option (opts, new_opts_set, opt, value,
14611 p + opt_len, DK_UNSPECIFIED, input_location,
14612 global_dc);
14614 else
14616 error ("attribute(target(\"%s\")) is unknown", orig_p);
14617 ret = false;
14621 else if (cl_options[opt].var_type == CLVC_ENUM)
14623 bool arg_ok;
14624 int value;
14626 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
14627 if (arg_ok)
14628 set_option (opts, new_opts_set, opt, value,
14629 p + opt_len, DK_UNSPECIFIED, input_location,
14630 global_dc);
14631 else
14633 error ("attribute(target(\"%s\")) is unknown", orig_p);
14634 ret = false;
14638 else
14639 gcc_unreachable ();
14641 return ret;
14644 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
14646 tree
14647 s390_valid_target_attribute_tree (tree args,
14648 struct gcc_options *opts,
14649 const struct gcc_options *opts_set,
14650 bool force_pragma)
14652 tree t = NULL_TREE;
14653 struct gcc_options new_opts_set;
14655 memset (&new_opts_set, 0, sizeof (new_opts_set));
14657 /* Process each of the options on the chain. */
14658 if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
14659 force_pragma))
14660 return error_mark_node;
14662 /* If some option was set (even if it has not changed), rerun
14663 s390_option_override_internal, and then save the options away. */
14664 if (new_opts_set.x_target_flags
14665 || new_opts_set.x_s390_arch
14666 || new_opts_set.x_s390_tune
14667 || new_opts_set.x_s390_stack_guard
14668 || new_opts_set.x_s390_stack_size
14669 || new_opts_set.x_s390_branch_cost
14670 || new_opts_set.x_s390_warn_framesize
14671 || new_opts_set.x_s390_warn_dynamicstack_p)
14673 const unsigned char *src = (const unsigned char *)opts_set;
14674 unsigned char *dest = (unsigned char *)&new_opts_set;
14675 unsigned int i;
14677 /* Merge the original option flags into the new ones. */
14678 for (i = 0; i < sizeof(*opts_set); i++)
14679 dest[i] |= src[i];
14681 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
14682 s390_option_override_internal (false, opts, &new_opts_set);
14683 /* Save the current options unless we are validating options for
14684 #pragma. */
14685 t = build_target_option_node (opts);
14687 return t;
14690 /* Hook to validate attribute((target("string"))). */
14692 static bool
14693 s390_valid_target_attribute_p (tree fndecl,
14694 tree ARG_UNUSED (name),
14695 tree args,
14696 int ARG_UNUSED (flags))
14698 struct gcc_options func_options;
14699 tree new_target, new_optimize;
14700 bool ret = true;
14702 /* attribute((target("default"))) does nothing, beyond
14703 affecting multi-versioning. */
14704 if (TREE_VALUE (args)
14705 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
14706 && TREE_CHAIN (args) == NULL_TREE
14707 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
14708 return true;
14710 tree old_optimize = build_optimization_node (&global_options);
14712 /* Get the optimization options of the current function. */
14713 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
14715 if (!func_optimize)
14716 func_optimize = old_optimize;
14718 /* Init func_options. */
14719 memset (&func_options, 0, sizeof (func_options));
14720 init_options_struct (&func_options, NULL);
14721 lang_hooks.init_options_struct (&func_options);
14723 cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize));
14725 /* Initialize func_options to the default before its target options can
14726 be set. */
14727 cl_target_option_restore (&func_options,
14728 TREE_TARGET_OPTION (target_option_default_node));
14730 new_target = s390_valid_target_attribute_tree (args, &func_options,
14731 &global_options_set,
14732 (args ==
14733 current_target_pragma));
14734 new_optimize = build_optimization_node (&func_options);
14735 if (new_target == error_mark_node)
14736 ret = false;
14737 else if (fndecl && new_target)
14739 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
14740 if (old_optimize != new_optimize)
14741 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
14743 return ret;
14746 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
14747 cache. */
14749 void
14750 s390_activate_target_options (tree new_tree)
14752 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
14753 if (TREE_TARGET_GLOBALS (new_tree))
14754 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
14755 else if (new_tree == target_option_default_node)
14756 restore_target_globals (&default_target_globals);
14757 else
14758 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
14759 s390_previous_fndecl = NULL_TREE;
14762 /* Establish appropriate back-end context for processing the function
14763 FNDECL. The argument might be NULL to indicate processing at top
14764 level, outside of any function scope. */
14765 static void
14766 s390_set_current_function (tree fndecl)
14768 /* Only change the context if the function changes. This hook is called
14769 several times in the course of compiling a function, and we don't want to
14770 slow things down too much or call target_reinit when it isn't safe. */
14771 if (fndecl == s390_previous_fndecl)
14772 return;
14774 tree old_tree;
14775 if (s390_previous_fndecl == NULL_TREE)
14776 old_tree = target_option_current_node;
14777 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
14778 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
14779 else
14780 old_tree = target_option_default_node;
14782 if (fndecl == NULL_TREE)
14784 if (old_tree != target_option_current_node)
14785 s390_activate_target_options (target_option_current_node);
14786 return;
14789 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
14790 if (new_tree == NULL_TREE)
14791 new_tree = target_option_default_node;
14793 if (old_tree != new_tree)
14794 s390_activate_target_options (new_tree);
14795 s390_previous_fndecl = fndecl;
14797 #endif
14799 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
14801 static bool
14802 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
14803 unsigned int align ATTRIBUTE_UNUSED,
14804 enum by_pieces_operation op ATTRIBUTE_UNUSED,
14805 bool speed_p ATTRIBUTE_UNUSED)
14807 return (size == 1 || size == 2
14808 || size == 4 || (TARGET_ZARCH && size == 8));
14811 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
14813 static void
14814 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
14816 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
14817 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
14818 tree call_efpc = build_call_expr (efpc, 0);
14819 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
14821 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
14822 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
14823 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
14824 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
14825 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
14826 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
14828 /* Generates the equivalent of feholdexcept (&fenv_var)
14830 fenv_var = __builtin_s390_efpc ();
14831 __builtin_s390_sfpc (fenv_var & mask) */
14832 tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
14833 tree new_fpc =
14834 build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
14835 build_int_cst (unsigned_type_node,
14836 ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
14837 FPC_EXCEPTION_MASK)));
14838 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
14839 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
14841 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
14843 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
14844 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
14845 build_int_cst (unsigned_type_node,
14846 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
14847 *clear = build_call_expr (sfpc, 1, new_fpc);
14849 /* Generates the equivalent of feupdateenv (fenv_var)
14851 old_fpc = __builtin_s390_efpc ();
14852 __builtin_s390_sfpc (fenv_var);
14853 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
14855 old_fpc = create_tmp_var_raw (unsigned_type_node);
14856 tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
14857 old_fpc, call_efpc);
14859 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
14861 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
14862 build_int_cst (unsigned_type_node,
14863 FPC_FLAGS_MASK));
14864 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
14865 build_int_cst (unsigned_type_node,
14866 FPC_FLAGS_SHIFT));
14867 tree atomic_feraiseexcept
14868 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
14869 raise_old_except = build_call_expr (atomic_feraiseexcept,
14870 1, raise_old_except);
14872 *update = build2 (COMPOUND_EXPR, void_type_node,
14873 build2 (COMPOUND_EXPR, void_type_node,
14874 store_old_fpc, set_new_fpc),
14875 raise_old_except);
14877 #undef FPC_EXCEPTION_MASK
14878 #undef FPC_FLAGS_MASK
14879 #undef FPC_DXC_MASK
14880 #undef FPC_EXCEPTION_MASK_SHIFT
14881 #undef FPC_FLAGS_SHIFT
14882 #undef FPC_DXC_SHIFT
14885 /* Return the vector mode to be used for inner mode MODE when doing
14886 vectorization. */
14887 static machine_mode
14888 s390_preferred_simd_mode (machine_mode mode)
14890 if (TARGET_VX)
14891 switch (mode)
14893 case DFmode:
14894 return V2DFmode;
14895 case DImode:
14896 return V2DImode;
14897 case SImode:
14898 return V4SImode;
14899 case HImode:
14900 return V8HImode;
14901 case QImode:
14902 return V16QImode;
14903 default:;
14905 return word_mode;
14908 /* Our hardware does not require vectors to be strictly aligned. */
14909 static bool
14910 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
14911 const_tree type ATTRIBUTE_UNUSED,
14912 int misalignment ATTRIBUTE_UNUSED,
14913 bool is_packed ATTRIBUTE_UNUSED)
14915 if (TARGET_VX)
14916 return true;
14918 return default_builtin_support_vector_misalignment (mode, type, misalignment,
14919 is_packed);
14922 /* The vector ABI requires vector types to be aligned on an 8 byte
14923 boundary (our stack alignment). However, we allow this to be
14924 overriden by the user, while this definitely breaks the ABI. */
14925 static HOST_WIDE_INT
14926 s390_vector_alignment (const_tree type)
14928 if (!TARGET_VX_ABI)
14929 return default_vector_alignment (type);
14931 if (TYPE_USER_ALIGN (type))
14932 return TYPE_ALIGN (type);
14934 return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
14937 #ifdef HAVE_AS_MACHINE_MACHINEMODE
14938 /* Implement TARGET_ASM_FILE_START. */
14939 static void
14940 s390_asm_file_start (void)
14942 s390_asm_output_machine_for_arch (asm_out_file);
14944 #endif
14946 /* Implement TARGET_ASM_FILE_END. */
14947 static void
14948 s390_asm_file_end (void)
14950 #ifdef HAVE_AS_GNU_ATTRIBUTE
14951 varpool_node *vnode;
14952 cgraph_node *cnode;
14954 FOR_EACH_VARIABLE (vnode)
14955 if (TREE_PUBLIC (vnode->decl))
14956 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
14958 FOR_EACH_FUNCTION (cnode)
14959 if (TREE_PUBLIC (cnode->decl))
14960 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
14963 if (s390_vector_abi != 0)
14964 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
14965 s390_vector_abi);
14966 #endif
14967 file_end_indicate_exec_stack ();
14969 if (flag_split_stack)
14970 file_end_indicate_split_stack ();
14973 /* Return true if TYPE is a vector bool type. */
14974 static inline bool
14975 s390_vector_bool_type_p (const_tree type)
14977 return TYPE_VECTOR_OPAQUE (type);
14980 /* Return the diagnostic message string if the binary operation OP is
14981 not permitted on TYPE1 and TYPE2, NULL otherwise. */
14982 static const char*
14983 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
14985 bool bool1_p, bool2_p;
14986 bool plusminus_p;
14987 bool muldiv_p;
14988 bool compare_p;
14989 machine_mode mode1, mode2;
14991 if (!TARGET_ZVECTOR)
14992 return NULL;
14994 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
14995 return NULL;
14997 bool1_p = s390_vector_bool_type_p (type1);
14998 bool2_p = s390_vector_bool_type_p (type2);
15000 /* Mixing signed and unsigned types is forbidden for all
15001 operators. */
15002 if (!bool1_p && !bool2_p
15003 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
15004 return N_("types differ in signess");
15006 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
15007 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
15008 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
15009 || op == ROUND_DIV_EXPR);
15010 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
15011 || op == EQ_EXPR || op == NE_EXPR);
15013 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
15014 return N_("binary operator does not support two vector bool operands");
15016 if (bool1_p != bool2_p && (muldiv_p || compare_p))
15017 return N_("binary operator does not support vector bool operand");
15019 mode1 = TYPE_MODE (type1);
15020 mode2 = TYPE_MODE (type2);
15022 if (bool1_p != bool2_p && plusminus_p
15023 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
15024 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
15025 return N_("binary operator does not support mixing vector "
15026 "bool with floating point vector operands");
15028 return NULL;
15031 /* Initialize GCC target structure. */
15033 #undef TARGET_ASM_ALIGNED_HI_OP
15034 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
15035 #undef TARGET_ASM_ALIGNED_DI_OP
15036 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
15037 #undef TARGET_ASM_INTEGER
15038 #define TARGET_ASM_INTEGER s390_assemble_integer
15040 #undef TARGET_ASM_OPEN_PAREN
15041 #define TARGET_ASM_OPEN_PAREN ""
15043 #undef TARGET_ASM_CLOSE_PAREN
15044 #define TARGET_ASM_CLOSE_PAREN ""
15046 #undef TARGET_OPTION_OVERRIDE
15047 #define TARGET_OPTION_OVERRIDE s390_option_override
15049 #undef TARGET_ENCODE_SECTION_INFO
15050 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
15052 #undef TARGET_SCALAR_MODE_SUPPORTED_P
15053 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
15055 #ifdef HAVE_AS_TLS
15056 #undef TARGET_HAVE_TLS
15057 #define TARGET_HAVE_TLS true
15058 #endif
15059 #undef TARGET_CANNOT_FORCE_CONST_MEM
15060 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
15062 #undef TARGET_DELEGITIMIZE_ADDRESS
15063 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
15065 #undef TARGET_LEGITIMIZE_ADDRESS
15066 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
15068 #undef TARGET_RETURN_IN_MEMORY
15069 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
15071 #undef TARGET_INIT_BUILTINS
15072 #define TARGET_INIT_BUILTINS s390_init_builtins
15073 #undef TARGET_EXPAND_BUILTIN
15074 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
15075 #undef TARGET_BUILTIN_DECL
15076 #define TARGET_BUILTIN_DECL s390_builtin_decl
15078 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
15079 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
15081 #undef TARGET_ASM_OUTPUT_MI_THUNK
15082 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
15083 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
15084 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
15086 #undef TARGET_SCHED_ADJUST_PRIORITY
15087 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
15088 #undef TARGET_SCHED_ISSUE_RATE
15089 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
15090 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
15091 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
15093 #undef TARGET_SCHED_VARIABLE_ISSUE
15094 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
15095 #undef TARGET_SCHED_REORDER
15096 #define TARGET_SCHED_REORDER s390_sched_reorder
15097 #undef TARGET_SCHED_INIT
15098 #define TARGET_SCHED_INIT s390_sched_init
15100 #undef TARGET_CANNOT_COPY_INSN_P
15101 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
15102 #undef TARGET_RTX_COSTS
15103 #define TARGET_RTX_COSTS s390_rtx_costs
15104 #undef TARGET_ADDRESS_COST
15105 #define TARGET_ADDRESS_COST s390_address_cost
15106 #undef TARGET_REGISTER_MOVE_COST
15107 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
15108 #undef TARGET_MEMORY_MOVE_COST
15109 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
15111 #undef TARGET_MACHINE_DEPENDENT_REORG
15112 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
15114 #undef TARGET_VALID_POINTER_MODE
15115 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
15117 #undef TARGET_BUILD_BUILTIN_VA_LIST
15118 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
15119 #undef TARGET_EXPAND_BUILTIN_VA_START
15120 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
15121 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
15122 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
15124 #undef TARGET_PROMOTE_FUNCTION_MODE
15125 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
15126 #undef TARGET_PASS_BY_REFERENCE
15127 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
15129 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
15130 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
15131 #undef TARGET_FUNCTION_ARG
15132 #define TARGET_FUNCTION_ARG s390_function_arg
15133 #undef TARGET_FUNCTION_ARG_ADVANCE
15134 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
15135 #undef TARGET_FUNCTION_VALUE
15136 #define TARGET_FUNCTION_VALUE s390_function_value
15137 #undef TARGET_LIBCALL_VALUE
15138 #define TARGET_LIBCALL_VALUE s390_libcall_value
15139 #undef TARGET_STRICT_ARGUMENT_NAMING
15140 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
15142 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
15143 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
15145 #undef TARGET_FIXED_CONDITION_CODE_REGS
15146 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
15148 #undef TARGET_CC_MODES_COMPATIBLE
15149 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
15151 #undef TARGET_INVALID_WITHIN_DOLOOP
15152 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
15154 #ifdef HAVE_AS_TLS
15155 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
15156 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
15157 #endif
15159 #undef TARGET_DWARF_FRAME_REG_MODE
15160 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
15162 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
15163 #undef TARGET_MANGLE_TYPE
15164 #define TARGET_MANGLE_TYPE s390_mangle_type
15165 #endif
15167 #undef TARGET_SCALAR_MODE_SUPPORTED_P
15168 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
15170 #undef TARGET_VECTOR_MODE_SUPPORTED_P
15171 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
15173 #undef TARGET_PREFERRED_RELOAD_CLASS
15174 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
15176 #undef TARGET_SECONDARY_RELOAD
15177 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
15179 #undef TARGET_LIBGCC_CMP_RETURN_MODE
15180 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
15182 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
15183 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
15185 #undef TARGET_LEGITIMATE_ADDRESS_P
15186 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
15188 #undef TARGET_LEGITIMATE_CONSTANT_P
15189 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
15191 #undef TARGET_LRA_P
15192 #define TARGET_LRA_P s390_lra_p
15194 #undef TARGET_CAN_ELIMINATE
15195 #define TARGET_CAN_ELIMINATE s390_can_eliminate
15197 #undef TARGET_CONDITIONAL_REGISTER_USAGE
15198 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
15200 #undef TARGET_LOOP_UNROLL_ADJUST
15201 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
15203 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
15204 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
15205 #undef TARGET_TRAMPOLINE_INIT
15206 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
15208 #undef TARGET_UNWIND_WORD_MODE
15209 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
15211 #undef TARGET_CANONICALIZE_COMPARISON
15212 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
15214 #undef TARGET_HARD_REGNO_SCRATCH_OK
15215 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
15217 #undef TARGET_ATTRIBUTE_TABLE
15218 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
15220 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
15221 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
15223 #undef TARGET_SET_UP_BY_PROLOGUE
15224 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
15226 #undef TARGET_EXTRA_LIVE_ON_ENTRY
15227 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
15229 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
15230 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
15231 s390_use_by_pieces_infrastructure_p
15233 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
15234 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
15236 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
15237 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
15239 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
15240 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
15242 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
15243 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
15245 #undef TARGET_VECTOR_ALIGNMENT
15246 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
15248 #undef TARGET_INVALID_BINARY_OP
15249 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
15251 #ifdef HAVE_AS_MACHINE_MACHINEMODE
15252 #undef TARGET_ASM_FILE_START
15253 #define TARGET_ASM_FILE_START s390_asm_file_start
15254 #endif
15256 #undef TARGET_ASM_FILE_END
15257 #define TARGET_ASM_FILE_END s390_asm_file_end
15259 #if S390_USE_TARGET_ATTRIBUTE
15260 #undef TARGET_SET_CURRENT_FUNCTION
15261 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
15263 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
15264 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
15265 #endif
15267 #undef TARGET_OPTION_RESTORE
15268 #define TARGET_OPTION_RESTORE s390_function_specific_restore
15270 struct gcc_target targetm = TARGET_INITIALIZER;
15272 #include "gt-s390.h"