Add hppa*-*-hpux* to targets which do not support split DWARF
[official-gcc.git] / gcc / config / s390 / s390.cc
blobf182c26e78bcf17d82946e87ece6824aea8b018f
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2024 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "target-globals.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "gimple.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "tm_p.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "expmed.h"
42 #include "optabs.h"
43 #include "regs.h"
44 #include "emit-rtl.h"
45 #include "recog.h"
46 #include "cgraph.h"
47 #include "diagnostic-core.h"
48 #include "diagnostic.h"
49 #include "alias.h"
50 #include "fold-const.h"
51 #include "print-tree.h"
52 #include "stor-layout.h"
53 #include "varasm.h"
54 #include "calls.h"
55 #include "conditions.h"
56 #include "output.h"
57 #include "insn-attr.h"
58 #include "flags.h"
59 #include "except.h"
60 #include "dojump.h"
61 #include "explow.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "reload.h"
65 #include "cfgrtl.h"
66 #include "cfganal.h"
67 #include "lcm.h"
68 #include "cfgbuild.h"
69 #include "cfgcleanup.h"
70 #include "debug.h"
71 #include "langhooks.h"
72 #include "internal-fn.h"
73 #include "gimple-iterator.h"
74 #include "gimple-fold.h"
75 #include "tree-eh.h"
76 #include "gimplify.h"
77 #include "opts.h"
78 #include "tree-pass.h"
79 #include "context.h"
80 #include "builtins.h"
81 #include "rtl-iter.h"
82 #include "intl.h"
83 #include "tm-constrs.h"
84 #include "tree-vrp.h"
85 #include "symbol-summary.h"
86 #include "ipa-prop.h"
87 #include "ipa-fnsummary.h"
88 #include "sched-int.h"
90 /* This file should be included last. */
91 #include "target-def.h"
93 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode);
95 /* Remember the last target of s390_set_current_function. */
96 static GTY(()) tree s390_previous_fndecl;
98 /* Define the specific costs for a given cpu. */
100 struct processor_costs
102 /* multiplication */
103 const int m; /* cost of an M instruction. */
104 const int mghi; /* cost of an MGHI instruction. */
105 const int mh; /* cost of an MH instruction. */
106 const int mhi; /* cost of an MHI instruction. */
107 const int ml; /* cost of an ML instruction. */
108 const int mr; /* cost of an MR instruction. */
109 const int ms; /* cost of an MS instruction. */
110 const int msg; /* cost of an MSG instruction. */
111 const int msgf; /* cost of an MSGF instruction. */
112 const int msgfr; /* cost of an MSGFR instruction. */
113 const int msgr; /* cost of an MSGR instruction. */
114 const int msr; /* cost of an MSR instruction. */
115 const int mult_df; /* cost of multiplication in DFmode. */
116 const int mxbr;
117 /* square root */
118 const int sqxbr; /* cost of square root in TFmode. */
119 const int sqdbr; /* cost of square root in DFmode. */
120 const int sqebr; /* cost of square root in SFmode. */
121 /* multiply and add */
122 const int madbr; /* cost of multiply and add in DFmode. */
123 const int maebr; /* cost of multiply and add in SFmode. */
124 /* division */
125 const int dxbr;
126 const int ddbr;
127 const int debr;
128 const int dlgr;
129 const int dlr;
130 const int dr;
131 const int dsgfr;
132 const int dsgr;
135 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
137 static const
138 struct processor_costs z900_cost =
140 COSTS_N_INSNS (5), /* M */
141 COSTS_N_INSNS (10), /* MGHI */
142 COSTS_N_INSNS (5), /* MH */
143 COSTS_N_INSNS (4), /* MHI */
144 COSTS_N_INSNS (5), /* ML */
145 COSTS_N_INSNS (5), /* MR */
146 COSTS_N_INSNS (4), /* MS */
147 COSTS_N_INSNS (15), /* MSG */
148 COSTS_N_INSNS (7), /* MSGF */
149 COSTS_N_INSNS (7), /* MSGFR */
150 COSTS_N_INSNS (10), /* MSGR */
151 COSTS_N_INSNS (4), /* MSR */
152 COSTS_N_INSNS (7), /* multiplication in DFmode */
153 COSTS_N_INSNS (13), /* MXBR */
154 COSTS_N_INSNS (136), /* SQXBR */
155 COSTS_N_INSNS (44), /* SQDBR */
156 COSTS_N_INSNS (35), /* SQEBR */
157 COSTS_N_INSNS (18), /* MADBR */
158 COSTS_N_INSNS (13), /* MAEBR */
159 COSTS_N_INSNS (134), /* DXBR */
160 COSTS_N_INSNS (30), /* DDBR */
161 COSTS_N_INSNS (27), /* DEBR */
162 COSTS_N_INSNS (220), /* DLGR */
163 COSTS_N_INSNS (34), /* DLR */
164 COSTS_N_INSNS (34), /* DR */
165 COSTS_N_INSNS (32), /* DSGFR */
166 COSTS_N_INSNS (32), /* DSGR */
169 static const
170 struct processor_costs z990_cost =
172 COSTS_N_INSNS (4), /* M */
173 COSTS_N_INSNS (2), /* MGHI */
174 COSTS_N_INSNS (2), /* MH */
175 COSTS_N_INSNS (2), /* MHI */
176 COSTS_N_INSNS (4), /* ML */
177 COSTS_N_INSNS (4), /* MR */
178 COSTS_N_INSNS (5), /* MS */
179 COSTS_N_INSNS (6), /* MSG */
180 COSTS_N_INSNS (4), /* MSGF */
181 COSTS_N_INSNS (4), /* MSGFR */
182 COSTS_N_INSNS (4), /* MSGR */
183 COSTS_N_INSNS (4), /* MSR */
184 COSTS_N_INSNS (1), /* multiplication in DFmode */
185 COSTS_N_INSNS (28), /* MXBR */
186 COSTS_N_INSNS (130), /* SQXBR */
187 COSTS_N_INSNS (66), /* SQDBR */
188 COSTS_N_INSNS (38), /* SQEBR */
189 COSTS_N_INSNS (1), /* MADBR */
190 COSTS_N_INSNS (1), /* MAEBR */
191 COSTS_N_INSNS (60), /* DXBR */
192 COSTS_N_INSNS (40), /* DDBR */
193 COSTS_N_INSNS (26), /* DEBR */
194 COSTS_N_INSNS (176), /* DLGR */
195 COSTS_N_INSNS (31), /* DLR */
196 COSTS_N_INSNS (31), /* DR */
197 COSTS_N_INSNS (31), /* DSGFR */
198 COSTS_N_INSNS (31), /* DSGR */
201 static const
202 struct processor_costs z9_109_cost =
204 COSTS_N_INSNS (4), /* M */
205 COSTS_N_INSNS (2), /* MGHI */
206 COSTS_N_INSNS (2), /* MH */
207 COSTS_N_INSNS (2), /* MHI */
208 COSTS_N_INSNS (4), /* ML */
209 COSTS_N_INSNS (4), /* MR */
210 COSTS_N_INSNS (5), /* MS */
211 COSTS_N_INSNS (6), /* MSG */
212 COSTS_N_INSNS (4), /* MSGF */
213 COSTS_N_INSNS (4), /* MSGFR */
214 COSTS_N_INSNS (4), /* MSGR */
215 COSTS_N_INSNS (4), /* MSR */
216 COSTS_N_INSNS (1), /* multiplication in DFmode */
217 COSTS_N_INSNS (28), /* MXBR */
218 COSTS_N_INSNS (130), /* SQXBR */
219 COSTS_N_INSNS (66), /* SQDBR */
220 COSTS_N_INSNS (38), /* SQEBR */
221 COSTS_N_INSNS (1), /* MADBR */
222 COSTS_N_INSNS (1), /* MAEBR */
223 COSTS_N_INSNS (60), /* DXBR */
224 COSTS_N_INSNS (40), /* DDBR */
225 COSTS_N_INSNS (26), /* DEBR */
226 COSTS_N_INSNS (30), /* DLGR */
227 COSTS_N_INSNS (23), /* DLR */
228 COSTS_N_INSNS (23), /* DR */
229 COSTS_N_INSNS (24), /* DSGFR */
230 COSTS_N_INSNS (24), /* DSGR */
233 static const
234 struct processor_costs z10_cost =
236 COSTS_N_INSNS (10), /* M */
237 COSTS_N_INSNS (10), /* MGHI */
238 COSTS_N_INSNS (10), /* MH */
239 COSTS_N_INSNS (10), /* MHI */
240 COSTS_N_INSNS (10), /* ML */
241 COSTS_N_INSNS (10), /* MR */
242 COSTS_N_INSNS (10), /* MS */
243 COSTS_N_INSNS (10), /* MSG */
244 COSTS_N_INSNS (10), /* MSGF */
245 COSTS_N_INSNS (10), /* MSGFR */
246 COSTS_N_INSNS (10), /* MSGR */
247 COSTS_N_INSNS (10), /* MSR */
248 COSTS_N_INSNS (1) , /* multiplication in DFmode */
249 COSTS_N_INSNS (50), /* MXBR */
250 COSTS_N_INSNS (120), /* SQXBR */
251 COSTS_N_INSNS (52), /* SQDBR */
252 COSTS_N_INSNS (38), /* SQEBR */
253 COSTS_N_INSNS (1), /* MADBR */
254 COSTS_N_INSNS (1), /* MAEBR */
255 COSTS_N_INSNS (111), /* DXBR */
256 COSTS_N_INSNS (39), /* DDBR */
257 COSTS_N_INSNS (32), /* DEBR */
258 COSTS_N_INSNS (160), /* DLGR */
259 COSTS_N_INSNS (71), /* DLR */
260 COSTS_N_INSNS (71), /* DR */
261 COSTS_N_INSNS (71), /* DSGFR */
262 COSTS_N_INSNS (71), /* DSGR */
265 static const
266 struct processor_costs z196_cost =
268 COSTS_N_INSNS (7), /* M */
269 COSTS_N_INSNS (5), /* MGHI */
270 COSTS_N_INSNS (5), /* MH */
271 COSTS_N_INSNS (5), /* MHI */
272 COSTS_N_INSNS (7), /* ML */
273 COSTS_N_INSNS (7), /* MR */
274 COSTS_N_INSNS (6), /* MS */
275 COSTS_N_INSNS (8), /* MSG */
276 COSTS_N_INSNS (6), /* MSGF */
277 COSTS_N_INSNS (6), /* MSGFR */
278 COSTS_N_INSNS (8), /* MSGR */
279 COSTS_N_INSNS (6), /* MSR */
280 COSTS_N_INSNS (1) , /* multiplication in DFmode */
281 COSTS_N_INSNS (40), /* MXBR B+40 */
282 COSTS_N_INSNS (100), /* SQXBR B+100 */
283 COSTS_N_INSNS (42), /* SQDBR B+42 */
284 COSTS_N_INSNS (28), /* SQEBR B+28 */
285 COSTS_N_INSNS (1), /* MADBR B */
286 COSTS_N_INSNS (1), /* MAEBR B */
287 COSTS_N_INSNS (101), /* DXBR B+101 */
288 COSTS_N_INSNS (29), /* DDBR */
289 COSTS_N_INSNS (22), /* DEBR */
290 COSTS_N_INSNS (160), /* DLGR cracked */
291 COSTS_N_INSNS (160), /* DLR cracked */
292 COSTS_N_INSNS (160), /* DR expanded */
293 COSTS_N_INSNS (160), /* DSGFR cracked */
294 COSTS_N_INSNS (160), /* DSGR cracked */
297 static const
298 struct processor_costs zEC12_cost =
300 COSTS_N_INSNS (7), /* M */
301 COSTS_N_INSNS (5), /* MGHI */
302 COSTS_N_INSNS (5), /* MH */
303 COSTS_N_INSNS (5), /* MHI */
304 COSTS_N_INSNS (7), /* ML */
305 COSTS_N_INSNS (7), /* MR */
306 COSTS_N_INSNS (6), /* MS */
307 COSTS_N_INSNS (8), /* MSG */
308 COSTS_N_INSNS (6), /* MSGF */
309 COSTS_N_INSNS (6), /* MSGFR */
310 COSTS_N_INSNS (8), /* MSGR */
311 COSTS_N_INSNS (6), /* MSR */
312 COSTS_N_INSNS (1) , /* multiplication in DFmode */
313 COSTS_N_INSNS (40), /* MXBR B+40 */
314 COSTS_N_INSNS (100), /* SQXBR B+100 */
315 COSTS_N_INSNS (42), /* SQDBR B+42 */
316 COSTS_N_INSNS (28), /* SQEBR B+28 */
317 COSTS_N_INSNS (1), /* MADBR B */
318 COSTS_N_INSNS (1), /* MAEBR B */
319 COSTS_N_INSNS (131), /* DXBR B+131 */
320 COSTS_N_INSNS (29), /* DDBR */
321 COSTS_N_INSNS (22), /* DEBR */
322 COSTS_N_INSNS (160), /* DLGR cracked */
323 COSTS_N_INSNS (160), /* DLR cracked */
324 COSTS_N_INSNS (160), /* DR expanded */
325 COSTS_N_INSNS (160), /* DSGFR cracked */
326 COSTS_N_INSNS (160), /* DSGR cracked */
329 const struct s390_processor processor_table[] =
331 { "z900", "z900", PROCESSOR_2064_Z900, &z900_cost, 5 },
332 { "z990", "z990", PROCESSOR_2084_Z990, &z990_cost, 6 },
333 { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost, 7 },
334 { "z9-ec", "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost, 7 },
335 { "z10", "z10", PROCESSOR_2097_Z10, &z10_cost, 8 },
336 { "z196", "z196", PROCESSOR_2817_Z196, &z196_cost, 9 },
337 { "zEC12", "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost, 10 },
338 { "z13", "z13", PROCESSOR_2964_Z13, &zEC12_cost, 11 },
339 { "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost, 12 },
340 { "z15", "arch13", PROCESSOR_8561_Z15, &zEC12_cost, 13 },
341 { "z16", "arch14", PROCESSOR_3931_Z16, &zEC12_cost, 14 },
342 { "native", "", PROCESSOR_NATIVE, NULL, 0 }
345 extern int reload_completed;
347 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
348 static rtx_insn *last_scheduled_insn;
349 #define NUM_SIDES 2
351 #define MAX_SCHED_UNITS 4
352 static int last_scheduled_unit_distance[MAX_SCHED_UNITS][NUM_SIDES];
354 /* Estimate of number of cycles a long-running insn occupies an
355 execution unit. */
356 static int fxd_longrunning[NUM_SIDES];
357 static int fpd_longrunning[NUM_SIDES];
359 /* The maximum score added for an instruction whose unit hasn't been
360 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
361 give instruction mix scheduling more priority over instruction
362 grouping. */
363 #define MAX_SCHED_MIX_SCORE 2
365 /* The maximum distance up to which individual scores will be
366 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
367 Increase this with the OOO windows size of the machine. */
368 #define MAX_SCHED_MIX_DISTANCE 70
370 /* Structure used to hold the components of a S/390 memory
371 address. A legitimate address on S/390 is of the general
372 form
373 base + index + displacement
374 where any of the components is optional.
376 base and index are registers of the class ADDR_REGS,
377 displacement is an unsigned 12-bit immediate constant. */
379 /* The max number of insns of backend generated memset/memcpy/memcmp
380 loops. This value is used in the unroll adjust hook to detect such
381 loops. Current max is 9 coming from the memcmp loop. */
382 #define BLOCK_MEM_OPS_LOOP_INSNS 9
384 struct s390_address
386 rtx base;
387 rtx indx;
388 rtx disp;
389 bool pointer;
390 bool literal_pool;
393 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
395 #define cfun_frame_layout (cfun->machine->frame_layout)
396 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
397 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
398 ? cfun_frame_layout.fpr_bitmap & 0x0f \
399 : cfun_frame_layout.fpr_bitmap & 0x03))
400 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
401 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
402 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
403 (1 << (REGNO - FPR0_REGNUM)))
404 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
405 (1 << (REGNO - FPR0_REGNUM))))
406 #define cfun_gpr_save_slot(REGNO) \
407 cfun->machine->frame_layout.gpr_save_slots[REGNO]
409 /* Number of GPRs and FPRs used for argument passing. */
410 #define GP_ARG_NUM_REG 5
411 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
412 #define VEC_ARG_NUM_REG 8
414 /* Return TRUE if GPR REGNO is supposed to be restored in the function
415 epilogue. */
416 static inline bool
417 s390_restore_gpr_p (int regno)
419 return (cfun_frame_layout.first_restore_gpr != -1
420 && regno >= cfun_frame_layout.first_restore_gpr
421 && regno <= cfun_frame_layout.last_restore_gpr);
424 /* Return TRUE if any of the registers in range [FIRST, LAST] is saved
425 because of -mpreserve-args. */
426 static inline bool
427 s390_preserve_gpr_arg_in_range_p (int first, int last)
429 int num_arg_regs = MIN (crtl->args.info.gprs + cfun->va_list_gpr_size,
430 GP_ARG_NUM_REG);
431 return (num_arg_regs
432 && s390_preserve_args_p
433 && first <= GPR2_REGNUM + num_arg_regs - 1
434 && last >= GPR2_REGNUM);
437 static inline bool
438 s390_preserve_gpr_arg_p (int regno)
440 return s390_preserve_gpr_arg_in_range_p (regno, regno);
443 static inline bool
444 s390_preserve_fpr_arg_p (int regno)
446 int num_arg_regs = MIN (crtl->args.info.fprs + cfun->va_list_fpr_size,
447 FP_ARG_NUM_REG);
448 return (s390_preserve_args_p
449 && regno <= FPR0_REGNUM + num_arg_regs - 1
450 && regno >= FPR0_REGNUM);
453 #undef TARGET_ATOMIC_ALIGN_FOR_MODE
454 #define TARGET_ATOMIC_ALIGN_FOR_MODE s390_atomic_align_for_mode
455 static unsigned int
456 s390_atomic_align_for_mode (machine_mode mode)
458 return GET_MODE_BITSIZE (mode);
461 /* A couple of shortcuts. */
462 #define CONST_OK_FOR_J(x) \
463 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
464 #define CONST_OK_FOR_K(x) \
465 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
466 #define CONST_OK_FOR_Os(x) \
467 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
468 #define CONST_OK_FOR_Op(x) \
469 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
470 #define CONST_OK_FOR_On(x) \
471 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
473 #define REGNO_PAIR_OK(REGNO, MODE) \
474 (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
476 /* That's the read ahead of the dynamic branch prediction unit in
477 bytes on a z10 (or higher) CPU. */
478 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
480 static int
481 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
482 addr_space_t as ATTRIBUTE_UNUSED,
483 bool speed ATTRIBUTE_UNUSED);
485 static unsigned int
486 s390_hard_regno_nregs (unsigned int regno, machine_mode mode);
488 /* Masks per jump target register indicating which thunk need to be
489 generated. */
490 static GTY(()) int indirect_branch_prez10thunk_mask = 0;
491 static GTY(()) int indirect_branch_z10thunk_mask = 0;
493 #define INDIRECT_BRANCH_NUM_OPTIONS 4
495 enum s390_indirect_branch_option
497 s390_opt_indirect_branch_jump = 0,
498 s390_opt_indirect_branch_call,
499 s390_opt_function_return_reg,
500 s390_opt_function_return_mem
503 static GTY(()) int indirect_branch_table_label_no[INDIRECT_BRANCH_NUM_OPTIONS] = { 0 };
504 const char *indirect_branch_table_label[INDIRECT_BRANCH_NUM_OPTIONS] = \
505 { "LJUMP", "LCALL", "LRETREG", "LRETMEM" };
506 const char *indirect_branch_table_name[INDIRECT_BRANCH_NUM_OPTIONS] = \
507 { ".s390_indirect_jump", ".s390_indirect_call",
508 ".s390_return_reg", ".s390_return_mem" };
510 bool
511 s390_return_addr_from_memory ()
513 return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
516 /* Generate a SUBREG for the MODE lowpart of EXPR.
518 In contrast to gen_lowpart it will always return a SUBREG
519 expression. This is useful to generate STRICT_LOW_PART
520 expressions. */
522 s390_gen_lowpart_subreg (machine_mode mode, rtx expr)
524 rtx lowpart = gen_lowpart (mode, expr);
526 /* There might be no SUBREG in case it could be applied to the hard
527 REG rtx or it could be folded with a paradoxical subreg. Bring
528 it back. */
529 if (!SUBREG_P (lowpart))
531 machine_mode reg_mode = TARGET_ZARCH ? DImode : SImode;
532 gcc_assert (REG_P (lowpart));
533 lowpart = gen_lowpart_SUBREG (mode,
534 gen_rtx_REG (reg_mode,
535 REGNO (lowpart)));
538 return lowpart;
541 /* Return nonzero if it's OK to use fused multiply-add for MODE. */
542 bool
543 s390_fma_allowed_p (machine_mode mode)
545 if (TARGET_VXE && mode == TFmode)
546 return flag_vx_long_double_fma;
548 return true;
551 /* Indicate which ABI has been used for passing vector args.
552 0 - no vector type arguments have been passed where the ABI is relevant
553 1 - the old ABI has been used
554 2 - a vector type argument has been passed either in a vector register
555 or on the stack by value */
556 static int s390_vector_abi = 0;
558 /* Set the vector ABI marker if TYPE is subject to the vector ABI
559 switch. The vector ABI affects only vector data types. There are
560 two aspects of the vector ABI relevant here:
562 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
563 ABI and natural alignment with the old.
565 2. vector <= 16 bytes are passed in VRs or by value on the stack
566 with the new ABI but by reference on the stack with the old.
568 If ARG_P is true TYPE is used for a function argument or return
569 value. The ABI marker then is set for all vector data types. If
570 ARG_P is false only type 1 vectors are being checked. */
572 static void
573 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
575 static hash_set<const_tree> visited_types_hash;
577 if (s390_vector_abi)
578 return;
580 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
581 return;
583 if (visited_types_hash.contains (type))
584 return;
586 visited_types_hash.add (type);
588 if (VECTOR_TYPE_P (type))
590 int type_size = int_size_in_bytes (type);
592 /* Outside arguments only the alignment is changing and this
593 only happens for vector types >= 16 bytes. */
594 if (!arg_p && type_size < 16)
595 return;
597 /* In arguments vector types > 16 are passed as before (GCC
598 never enforced the bigger alignment for arguments which was
599 required by the old vector ABI). However, it might still be
600 ABI relevant due to the changed alignment if it is a struct
601 member. */
602 if (arg_p && type_size > 16 && !in_struct_p)
603 return;
605 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
607 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
609 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
610 natural alignment there will never be ABI dependent padding
611 in an array type. That's why we do not set in_struct_p to
612 true here. */
613 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
615 else if (FUNC_OR_METHOD_TYPE_P (type))
617 tree arg_chain;
619 /* Check the return type. */
620 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
622 for (arg_chain = TYPE_ARG_TYPES (type);
623 arg_chain;
624 arg_chain = TREE_CHAIN (arg_chain))
625 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
627 else if (RECORD_OR_UNION_TYPE_P (type))
629 tree field;
631 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
633 if (TREE_CODE (field) != FIELD_DECL)
634 continue;
636 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
642 /* System z builtins. */
644 #include "s390-builtins.h"
646 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
648 #undef B_DEF
649 #undef OB_DEF
650 #undef OB_DEF_VAR
651 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
652 #define OB_DEF(...)
653 #define OB_DEF_VAR(...)
654 #include "s390-builtins.def"
658 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
660 #undef B_DEF
661 #undef OB_DEF
662 #undef OB_DEF_VAR
663 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
664 #define OB_DEF(...)
665 #define OB_DEF_VAR(...)
666 #include "s390-builtins.def"
670 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
672 #undef B_DEF
673 #undef OB_DEF
674 #undef OB_DEF_VAR
675 #define B_DEF(...)
676 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
677 #define OB_DEF_VAR(...)
678 #include "s390-builtins.def"
682 const unsigned int
683 bflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
685 #undef B_DEF
686 #undef OB_DEF
687 #undef OB_DEF_VAR
688 #define B_DEF(...)
689 #define OB_DEF(...)
690 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
691 #include "s390-builtins.def"
695 const unsigned int
696 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
698 #undef B_DEF
699 #undef OB_DEF
700 #undef OB_DEF_VAR
701 #define B_DEF(...)
702 #define OB_DEF(...)
703 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
704 #include "s390-builtins.def"
708 tree s390_builtin_types[BT_MAX];
709 tree s390_builtin_fn_types[BT_FN_MAX];
710 tree s390_builtin_decls[S390_BUILTIN_MAX +
711 S390_OVERLOADED_BUILTIN_MAX +
712 S390_OVERLOADED_BUILTIN_VAR_MAX];
714 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
715 #undef B_DEF
716 #undef OB_DEF
717 #undef OB_DEF_VAR
718 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
719 #define OB_DEF(...)
720 #define OB_DEF_VAR(...)
722 #include "s390-builtins.def"
723 CODE_FOR_nothing
726 static void
727 s390_init_builtins (void)
729 /* These definitions are being used in s390-builtins.def. */
730 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
731 NULL, NULL);
732 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
733 tree c_uint64_type_node;
735 /* The uint64_type_node from tree.cc is not compatible to the C99
736 uint64_t data type. What we want is c_uint64_type_node from
737 c-common.cc. But since backend code is not supposed to interface
738 with the frontend we recreate it here. */
739 if (TARGET_64BIT)
740 c_uint64_type_node = long_unsigned_type_node;
741 else
742 c_uint64_type_node = long_long_unsigned_type_node;
744 #undef DEF_TYPE
745 #define DEF_TYPE(INDEX, NODE, CONST_P) \
746 if (s390_builtin_types[INDEX] == NULL) \
747 s390_builtin_types[INDEX] = (!CONST_P) ? \
748 (NODE) : build_type_variant ((NODE), 1, 0);
750 #undef DEF_POINTER_TYPE
751 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \
752 if (s390_builtin_types[INDEX] == NULL) \
753 s390_builtin_types[INDEX] = \
754 build_pointer_type (s390_builtin_types[INDEX_BASE]);
756 #undef DEF_DISTINCT_TYPE
757 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \
758 if (s390_builtin_types[INDEX] == NULL) \
759 s390_builtin_types[INDEX] = \
760 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
762 #undef DEF_VECTOR_TYPE
763 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
764 if (s390_builtin_types[INDEX] == NULL) \
765 s390_builtin_types[INDEX] = \
766 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
768 #undef DEF_OPAQUE_VECTOR_TYPE
769 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
770 if (s390_builtin_types[INDEX] == NULL) \
771 s390_builtin_types[INDEX] = \
772 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
774 #undef DEF_FN_TYPE
775 #define DEF_FN_TYPE(INDEX, args...) \
776 if (s390_builtin_fn_types[INDEX] == NULL) \
777 s390_builtin_fn_types[INDEX] = \
778 build_function_type_list (args, NULL_TREE);
779 #undef DEF_OV_TYPE
780 #define DEF_OV_TYPE(...)
781 #include "s390-builtin-types.def"
783 #undef B_DEF
784 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
785 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
786 s390_builtin_decls[S390_BUILTIN_##NAME] = \
787 add_builtin_function ("__builtin_" #NAME, \
788 s390_builtin_fn_types[FNTYPE], \
789 S390_BUILTIN_##NAME, \
790 BUILT_IN_MD, \
791 NULL, \
792 ATTRS);
793 #undef OB_DEF
794 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
795 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
796 == NULL) \
797 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
798 add_builtin_function ("__builtin_" #NAME, \
799 s390_builtin_fn_types[FNTYPE], \
800 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
801 BUILT_IN_MD, \
802 NULL, \
804 #undef OB_DEF_VAR
805 #define OB_DEF_VAR(...)
806 #include "s390-builtins.def"
810 /* Return true if ARG is appropriate as argument number ARGNUM of
811 builtin DECL. The operand flags from s390-builtins.def have to
812 passed as OP_FLAGS. */
813 bool
814 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
816 if (O_UIMM_P (op_flags))
818 unsigned HOST_WIDE_INT bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32, 4 };
819 unsigned HOST_WIDE_INT bitmasks[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 12 };
820 unsigned HOST_WIDE_INT bitwidth = bitwidths[op_flags - O_U1];
821 unsigned HOST_WIDE_INT bitmask = bitmasks[op_flags - O_U1];
823 gcc_assert(ARRAY_SIZE(bitwidths) == (O_M12 - O_U1 + 1));
824 gcc_assert(ARRAY_SIZE(bitmasks) == (O_M12 - O_U1 + 1));
826 if (!tree_fits_uhwi_p (arg)
827 || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1
828 || (bitmask && tree_to_uhwi (arg) & ~bitmask))
830 if (bitmask)
832 gcc_assert (bitmask < 16);
833 char values[120] = "";
835 for (unsigned HOST_WIDE_INT i = 0; i <= bitmask; i++)
837 char buf[5];
838 if (i & ~bitmask)
839 continue;
840 int ret = snprintf (buf, 5, HOST_WIDE_INT_PRINT_UNSIGNED, i & bitmask);
841 gcc_assert (ret < 5);
842 strcat (values, buf);
843 if (i < bitmask)
844 strcat (values, ", ");
846 error ("constant argument %d for builtin %qF is invalid (%s)",
847 argnum, decl, values);
849 else
850 error ("constant argument %d for builtin %qF is out of range (0-%wu)",
851 argnum, decl, (HOST_WIDE_INT_1U << bitwidth) - 1);
853 return false;
857 if (O_SIMM_P (op_flags))
859 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
860 int bitwidth = bitwidths[op_flags - O_S2];
862 if (!tree_fits_shwi_p (arg)
863 || tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1))
864 || tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1))
866 error ("constant argument %d for builtin %qF is out of range "
867 "(%wd-%wd)", argnum, decl,
868 -(HOST_WIDE_INT_1 << (bitwidth - 1)),
869 (HOST_WIDE_INT_1 << (bitwidth - 1)) - 1);
870 return false;
873 return true;
876 /* Expand an expression EXP that calls a built-in function,
877 with result going to TARGET if that's convenient
878 (and in mode MODE if that's convenient).
879 SUBTARGET may be used as the target for computing one of EXP's operands.
880 IGNORE is nonzero if the value is to be ignored. */
882 static rtx
883 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
884 machine_mode mode ATTRIBUTE_UNUSED,
885 int ignore ATTRIBUTE_UNUSED)
887 #define MAX_ARGS 6
889 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
890 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
891 enum insn_code icode;
892 rtx op[MAX_ARGS], pat;
893 int arity;
894 bool nonvoid;
895 tree arg;
896 call_expr_arg_iterator iter;
897 unsigned int all_op_flags = opflags_for_builtin (fcode);
898 machine_mode last_vec_mode = VOIDmode;
900 if (TARGET_DEBUG_ARG)
902 fprintf (stderr,
903 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
904 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
905 bflags_for_builtin (fcode));
908 if (S390_USE_TARGET_ATTRIBUTE)
910 unsigned int bflags;
912 bflags = bflags_for_builtin (fcode);
913 if ((bflags & B_HTM) && !TARGET_HTM)
915 error ("builtin %qF is not supported without %<-mhtm%> "
916 "(default with %<-march=zEC12%> and higher)", fndecl);
917 return const0_rtx;
919 if (((bflags & B_VX) || (bflags & B_VXE)) && !TARGET_VX)
921 error ("builtin %qF requires %<-mvx%> "
922 "(default with %<-march=z13%> and higher)", fndecl);
923 return const0_rtx;
926 if ((bflags & B_VXE) && !TARGET_VXE)
928 error ("Builtin %qF requires z14 or higher", fndecl);
929 return const0_rtx;
932 if ((bflags & B_VXE2) && !TARGET_VXE2)
934 error ("Builtin %qF requires z15 or higher", fndecl);
935 return const0_rtx;
938 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
939 && fcode < S390_ALL_BUILTIN_MAX)
941 gcc_unreachable ();
943 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
945 icode = code_for_builtin[fcode];
946 /* Set a flag in the machine specific cfun part in order to support
947 saving/restoring of FPRs. */
948 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
949 cfun->machine->tbegin_p = true;
951 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
953 error ("unresolved overloaded builtin");
954 return const0_rtx;
956 else
957 internal_error ("bad builtin fcode");
959 if (icode == 0)
960 internal_error ("bad builtin icode");
962 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
964 if (nonvoid)
966 machine_mode tmode = insn_data[icode].operand[0].mode;
967 if (!target
968 || GET_MODE (target) != tmode
969 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
970 target = gen_reg_rtx (tmode);
972 /* There are builtins (e.g. vec_promote) with no vector
973 arguments but an element selector. So we have to also look
974 at the vector return type when emitting the modulo
975 operation. */
976 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
977 last_vec_mode = insn_data[icode].operand[0].mode;
980 arity = 0;
981 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
983 rtx tmp_rtx;
984 const struct insn_operand_data *insn_op;
985 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
987 all_op_flags = all_op_flags >> O_SHIFT;
989 if (arg == error_mark_node)
990 return NULL_RTX;
991 if (arity >= MAX_ARGS)
992 return NULL_RTX;
994 if (O_IMM_P (op_flags)
995 && TREE_CODE (arg) != INTEGER_CST)
997 error ("constant value required for builtin %qF argument %d",
998 fndecl, arity + 1);
999 return const0_rtx;
1002 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
1003 return const0_rtx;
1005 insn_op = &insn_data[icode].operand[arity + nonvoid];
1006 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
1008 /* expand_expr truncates constants to the target mode only if it
1009 is "convenient". However, our checks below rely on this
1010 being done. */
1011 if (CONST_INT_P (op[arity])
1012 && SCALAR_INT_MODE_P (insn_op->mode)
1013 && GET_MODE (op[arity]) != insn_op->mode)
1014 op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
1015 insn_op->mode));
1017 /* Wrap the expanded RTX for pointer types into a MEM expr with
1018 the proper mode. This allows us to use e.g. (match_operand
1019 "memory_operand"..) in the insn patterns instead of (mem
1020 (match_operand "address_operand)). This is helpful for
1021 patterns not just accepting MEMs. */
1022 if (POINTER_TYPE_P (TREE_TYPE (arg))
1023 && insn_op->predicate != address_operand)
1024 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
1026 /* Expand the module operation required on element selectors. */
1027 if (op_flags == O_ELEM)
1029 gcc_assert (last_vec_mode != VOIDmode);
1030 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
1031 op[arity],
1032 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
1033 NULL_RTX, 1, OPTAB_DIRECT);
1036 /* Record the vector mode used for an element selector. This assumes:
1037 1. There is no builtin with two different vector modes and an element selector
1038 2. The element selector comes after the vector type it is referring to.
1039 This currently the true for all the builtins but FIXME we
1040 should better check for that. */
1041 if (VECTOR_MODE_P (insn_op->mode))
1042 last_vec_mode = insn_op->mode;
1044 if (insn_op->predicate (op[arity], insn_op->mode))
1046 arity++;
1047 continue;
1050 /* A memory operand is rejected by the memory_operand predicate.
1051 Try making the address legal by copying it into a register. */
1052 if (MEM_P (op[arity])
1053 && insn_op->predicate == memory_operand
1054 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
1055 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
1057 op[arity] = replace_equiv_address (op[arity],
1058 copy_to_mode_reg (Pmode,
1059 XEXP (op[arity], 0)));
1061 /* Some of the builtins require different modes/types than the
1062 pattern in order to implement a specific API. Instead of
1063 adding many expanders which do the mode change we do it here.
1064 E.g. s390_vec_add_u128 required to have vector unsigned char
1065 arguments is mapped to addti3. */
1066 else if (insn_op->mode != VOIDmode
1067 && GET_MODE (op[arity]) != VOIDmode
1068 && GET_MODE (op[arity]) != insn_op->mode
1069 && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
1070 GET_MODE (op[arity]), 0))
1071 != NULL_RTX))
1073 op[arity] = tmp_rtx;
1076 /* The predicate rejects the operand although the mode is fine.
1077 Copy the operand to register. */
1078 if (!insn_op->predicate (op[arity], insn_op->mode)
1079 && (GET_MODE (op[arity]) == insn_op->mode
1080 || GET_MODE (op[arity]) == VOIDmode
1081 || (insn_op->predicate == address_operand
1082 && GET_MODE (op[arity]) == Pmode)))
1084 /* An address_operand usually has VOIDmode in the expander
1085 so we cannot use this. */
1086 machine_mode target_mode =
1087 (insn_op->predicate == address_operand
1088 ? (machine_mode) Pmode : insn_op->mode);
1089 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
1092 if (!insn_op->predicate (op[arity], insn_op->mode))
1094 error ("invalid argument %d for builtin %qF", arity + 1, fndecl);
1095 return const0_rtx;
1097 arity++;
1100 switch (arity)
1102 case 0:
1103 pat = GEN_FCN (icode) (target);
1104 break;
1105 case 1:
1106 if (nonvoid)
1107 pat = GEN_FCN (icode) (target, op[0]);
1108 else
1109 pat = GEN_FCN (icode) (op[0]);
1110 break;
1111 case 2:
1112 if (nonvoid)
1113 pat = GEN_FCN (icode) (target, op[0], op[1]);
1114 else
1115 pat = GEN_FCN (icode) (op[0], op[1]);
1116 break;
1117 case 3:
1118 if (nonvoid)
1119 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1120 else
1121 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1122 break;
1123 case 4:
1124 if (nonvoid)
1125 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1126 else
1127 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1128 break;
1129 case 5:
1130 if (nonvoid)
1131 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1132 else
1133 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1134 break;
1135 case 6:
1136 if (nonvoid)
1137 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1138 else
1139 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1140 break;
1141 default:
1142 gcc_unreachable ();
1144 if (!pat)
1145 return NULL_RTX;
1146 emit_insn (pat);
1148 if (nonvoid)
1149 return target;
1150 else
1151 return const0_rtx;
1155 static const int s390_hotpatch_hw_max = 1000000;
1156 static int s390_hotpatch_hw_before_label = 0;
1157 static int s390_hotpatch_hw_after_label = 0;
1159 /* Check whether the hotpatch attribute is applied to a function and, if it has
1160 an argument, the argument is valid. */
1162 static tree
1163 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1164 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1166 tree expr;
1167 tree expr2;
1168 int err;
1170 if (TREE_CODE (*node) != FUNCTION_DECL)
1172 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1173 name);
1174 *no_add_attrs = true;
1176 if (args != NULL && TREE_CHAIN (args) != NULL)
1178 expr = TREE_VALUE (args);
1179 expr2 = TREE_VALUE (TREE_CHAIN (args));
1181 if (args == NULL || TREE_CHAIN (args) == NULL)
1182 err = 1;
1183 else if (TREE_CODE (expr) != INTEGER_CST
1184 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1185 || wi::gtu_p (wi::to_wide (expr), s390_hotpatch_hw_max))
1186 err = 1;
1187 else if (TREE_CODE (expr2) != INTEGER_CST
1188 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1189 || wi::gtu_p (wi::to_wide (expr2), s390_hotpatch_hw_max))
1190 err = 1;
1191 else
1192 err = 0;
1193 if (err)
1195 error ("requested %qE attribute is not a comma separated pair of"
1196 " non-negative integer constants or too large (max. %d)", name,
1197 s390_hotpatch_hw_max);
1198 *no_add_attrs = true;
1201 return NULL_TREE;
1204 /* Expand the s390_vector_bool type attribute. */
1206 static tree
1207 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1208 tree args ATTRIBUTE_UNUSED,
1209 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1211 tree type = *node, result = NULL_TREE;
1212 machine_mode mode;
1214 while (POINTER_TYPE_P (type)
1215 || TREE_CODE (type) == FUNCTION_TYPE
1216 || TREE_CODE (type) == METHOD_TYPE
1217 || TREE_CODE (type) == ARRAY_TYPE)
1218 type = TREE_TYPE (type);
1220 mode = TYPE_MODE (type);
1221 switch (mode)
1223 case E_DImode: case E_V2DImode:
1224 result = s390_builtin_types[BT_BV2DI];
1225 break;
1226 case E_SImode: case E_V4SImode:
1227 result = s390_builtin_types[BT_BV4SI];
1228 break;
1229 case E_HImode: case E_V8HImode:
1230 result = s390_builtin_types[BT_BV8HI];
1231 break;
1232 case E_QImode: case E_V16QImode:
1233 result = s390_builtin_types[BT_BV16QI];
1234 break;
1235 default:
1236 break;
1239 *no_add_attrs = true; /* No need to hang on to the attribute. */
1241 if (result)
1242 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1244 return NULL_TREE;
1247 /* Check syntax of function decl attributes having a string type value. */
1249 static tree
1250 s390_handle_string_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1251 tree args ATTRIBUTE_UNUSED,
1252 int flags ATTRIBUTE_UNUSED,
1253 bool *no_add_attrs)
1255 tree cst;
1257 if (TREE_CODE (*node) != FUNCTION_DECL)
1259 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1260 name);
1261 *no_add_attrs = true;
1264 cst = TREE_VALUE (args);
1266 if (TREE_CODE (cst) != STRING_CST)
1268 warning (OPT_Wattributes,
1269 "%qE attribute requires a string constant argument",
1270 name);
1271 *no_add_attrs = true;
1274 if (is_attribute_p ("indirect_branch", name)
1275 || is_attribute_p ("indirect_branch_call", name)
1276 || is_attribute_p ("function_return", name)
1277 || is_attribute_p ("function_return_reg", name)
1278 || is_attribute_p ("function_return_mem", name))
1280 if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1281 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1282 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1284 warning (OPT_Wattributes,
1285 "argument to %qE attribute is not "
1286 "(keep|thunk|thunk-extern)", name);
1287 *no_add_attrs = true;
1291 if (is_attribute_p ("indirect_branch_jump", name)
1292 && strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1293 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1294 && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
1295 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1297 warning (OPT_Wattributes,
1298 "argument to %qE attribute is not "
1299 "(keep|thunk|thunk-inline|thunk-extern)", name);
1300 *no_add_attrs = true;
1303 return NULL_TREE;
1306 TARGET_GNU_ATTRIBUTES (s390_attribute_table, {
1307 { "hotpatch", 2, 2, true, false, false, false,
1308 s390_handle_hotpatch_attribute, NULL },
1309 { "s390_vector_bool", 0, 0, false, true, false, true,
1310 s390_handle_vectorbool_attribute, NULL },
1311 { "indirect_branch", 1, 1, true, false, false, false,
1312 s390_handle_string_attribute, NULL },
1313 { "indirect_branch_jump", 1, 1, true, false, false, false,
1314 s390_handle_string_attribute, NULL },
1315 { "indirect_branch_call", 1, 1, true, false, false, false,
1316 s390_handle_string_attribute, NULL },
1317 { "function_return", 1, 1, true, false, false, false,
1318 s390_handle_string_attribute, NULL },
1319 { "function_return_reg", 1, 1, true, false, false, false,
1320 s390_handle_string_attribute, NULL },
1321 { "function_return_mem", 1, 1, true, false, false, false,
1322 s390_handle_string_attribute, NULL }
1325 /* Return the alignment for LABEL. We default to the -falign-labels
1326 value except for the literal pool base label. */
1328 s390_label_align (rtx_insn *label)
1330 rtx_insn *prev_insn = prev_active_insn (label);
1331 rtx set, src;
1333 if (prev_insn == NULL_RTX)
1334 goto old;
1336 set = single_set (prev_insn);
1338 if (set == NULL_RTX)
1339 goto old;
1341 src = SET_SRC (set);
1343 /* Don't align literal pool base labels. */
1344 if (GET_CODE (src) == UNSPEC
1345 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1346 return 0;
1348 old:
1349 return align_labels.levels[0].log;
1352 static GTY(()) rtx got_symbol;
1354 /* Return the GOT table symbol. The symbol will be created when the
1355 function is invoked for the first time. */
1357 static rtx
1358 s390_got_symbol (void)
1360 if (!got_symbol)
1362 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1363 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
1366 return got_symbol;
1369 static scalar_int_mode
1370 s390_libgcc_cmp_return_mode (void)
1372 return TARGET_64BIT ? DImode : SImode;
1375 static scalar_int_mode
1376 s390_libgcc_shift_count_mode (void)
1378 return TARGET_64BIT ? DImode : SImode;
1381 static scalar_int_mode
1382 s390_unwind_word_mode (void)
1384 return TARGET_64BIT ? DImode : SImode;
1387 /* Return true if the back end supports mode MODE. */
1388 static bool
1389 s390_scalar_mode_supported_p (scalar_mode mode)
1391 /* In contrast to the default implementation reject TImode constants on 31bit
1392 TARGET_ZARCH for ABI compliance. */
1393 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1394 return false;
1396 if (DECIMAL_FLOAT_MODE_P (mode))
1397 return default_decimal_float_supported_p ();
1399 return default_scalar_mode_supported_p (mode);
1402 /* Return true if the back end supports vector mode MODE. */
1403 static bool
1404 s390_vector_mode_supported_p (machine_mode mode)
1406 machine_mode inner;
1408 if (!VECTOR_MODE_P (mode)
1409 || !TARGET_VX
1410 || GET_MODE_SIZE (mode) > 16)
1411 return false;
1413 inner = GET_MODE_INNER (mode);
1415 switch (inner)
1417 case E_QImode:
1418 case E_HImode:
1419 case E_SImode:
1420 case E_DImode:
1421 case E_TImode:
1422 case E_SFmode:
1423 case E_DFmode:
1424 case E_TFmode:
1425 return true;
1426 default:
1427 return false;
1431 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1433 void
1434 s390_set_has_landing_pad_p (bool value)
1436 cfun->machine->has_landing_pad_p = value;
1439 /* If two condition code modes are compatible, return a condition code
1440 mode which is compatible with both. Otherwise, return
1441 VOIDmode. */
1443 static machine_mode
1444 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1446 if (m1 == m2)
1447 return m1;
1449 switch (m1)
1451 case E_CCZmode:
1452 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1453 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1454 return m2;
1455 return VOIDmode;
1457 case E_CCSmode:
1458 case E_CCUmode:
1459 case E_CCTmode:
1460 case E_CCSRmode:
1461 case E_CCURmode:
1462 case E_CCZ1mode:
1463 if (m2 == CCZmode)
1464 return m1;
1466 return VOIDmode;
1468 default:
1469 return VOIDmode;
1471 return VOIDmode;
1474 /* Return true if SET either doesn't set the CC register, or else
1475 the source and destination have matching CC modes and that
1476 CC mode is at least as constrained as REQ_MODE. */
1478 static bool
1479 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1481 machine_mode set_mode;
1483 gcc_assert (GET_CODE (set) == SET);
1485 /* These modes are supposed to be used only in CC consumer
1486 patterns. */
1487 gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
1488 && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
1490 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1491 return 1;
1493 set_mode = GET_MODE (SET_DEST (set));
1494 switch (set_mode)
1496 case E_CCZ1mode:
1497 case E_CCSmode:
1498 case E_CCSRmode:
1499 case E_CCSFPSmode:
1500 case E_CCUmode:
1501 case E_CCURmode:
1502 case E_CCOmode:
1503 case E_CCLmode:
1504 case E_CCL1mode:
1505 case E_CCL2mode:
1506 case E_CCL3mode:
1507 case E_CCT1mode:
1508 case E_CCT2mode:
1509 case E_CCT3mode:
1510 case E_CCVEQmode:
1511 case E_CCVIHmode:
1512 case E_CCVIHUmode:
1513 case E_CCVFHmode:
1514 case E_CCVFHEmode:
1515 if (req_mode != set_mode)
1516 return 0;
1517 break;
1519 case E_CCZmode:
1520 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1521 && req_mode != CCSRmode && req_mode != CCURmode
1522 && req_mode != CCZ1mode)
1523 return 0;
1524 break;
1526 case E_CCAPmode:
1527 case E_CCANmode:
1528 if (req_mode != CCAmode)
1529 return 0;
1530 break;
1532 default:
1533 gcc_unreachable ();
1536 return (GET_MODE (SET_SRC (set)) == set_mode);
1539 /* Return true if every SET in INSN that sets the CC register
1540 has source and destination with matching CC modes and that
1541 CC mode is at least as constrained as REQ_MODE.
1542 If REQ_MODE is VOIDmode, always return false. */
1544 bool
1545 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1547 int i;
1549 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1550 if (req_mode == VOIDmode)
1551 return false;
1553 if (GET_CODE (PATTERN (insn)) == SET)
1554 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1556 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1557 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1559 rtx set = XVECEXP (PATTERN (insn), 0, i);
1560 if (GET_CODE (set) == SET)
1561 if (!s390_match_ccmode_set (set, req_mode))
1562 return false;
1565 return true;
1568 /* If a test-under-mask instruction can be used to implement
1569 (compare (and ... OP1) OP2), return the CC mode required
1570 to do that. Otherwise, return VOIDmode.
1571 MIXED is true if the instruction can distinguish between
1572 CC1 and CC2 for mixed selected bits (TMxx), it is false
1573 if the instruction cannot (TM). */
1575 machine_mode
1576 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1578 int bit0, bit1;
1580 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1581 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1582 return VOIDmode;
1584 /* Selected bits all zero: CC0.
1585 e.g.: int a; if ((a & (16 + 128)) == 0) */
1586 if (INTVAL (op2) == 0)
1587 return CCTmode;
1589 /* Selected bits all one: CC3.
1590 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1591 if (INTVAL (op2) == INTVAL (op1))
1592 return CCT3mode;
1594 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1595 int a;
1596 if ((a & (16 + 128)) == 16) -> CCT1
1597 if ((a & (16 + 128)) == 128) -> CCT2 */
1598 if (mixed)
1600 bit1 = exact_log2 (INTVAL (op2));
1601 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1602 if (bit0 != -1 && bit1 != -1)
1603 return bit0 > bit1 ? CCT1mode : CCT2mode;
1606 return VOIDmode;
1609 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1610 OP0 and OP1 of a COMPARE, return the mode to be used for the
1611 comparison. */
1613 machine_mode
1614 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1616 switch (code)
1618 case EQ:
1619 case NE:
1620 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1621 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1622 return CCAPmode;
1623 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1624 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1625 return CCAPmode;
1626 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1627 || GET_CODE (op1) == NEG)
1628 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1629 return CCLmode;
1631 if (GET_CODE (op0) == AND)
1633 /* Check whether we can potentially do it via TM. */
1634 machine_mode ccmode;
1635 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1636 if (ccmode != VOIDmode)
1638 /* Relax CCTmode to CCZmode to allow fall-back to AND
1639 if that turns out to be beneficial. */
1640 return ccmode == CCTmode ? CCZmode : ccmode;
1644 if (register_operand (op0, HImode)
1645 && GET_CODE (op1) == CONST_INT
1646 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1647 return CCT3mode;
1648 if (register_operand (op0, QImode)
1649 && GET_CODE (op1) == CONST_INT
1650 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1651 return CCT3mode;
1653 return CCZmode;
1655 case LE:
1656 case LT:
1657 case GE:
1658 case GT:
1659 /* The only overflow condition of NEG and ABS happens when
1660 -INT_MAX is used as parameter, which stays negative. So
1661 we have an overflow from a positive value to a negative.
1662 Using CCAP mode the resulting cc can be used for comparisons. */
1663 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1664 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1665 return CCAPmode;
1667 /* If constants are involved in an add instruction it is possible to use
1668 the resulting cc for comparisons with zero. Knowing the sign of the
1669 constant the overflow behavior gets predictable. e.g.:
1670 int a, b; if ((b = a + c) > 0)
1671 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1672 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1673 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1674 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1675 /* Avoid INT32_MIN on 32 bit. */
1676 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1678 if (INTVAL (XEXP((op0), 1)) < 0)
1679 return CCANmode;
1680 else
1681 return CCAPmode;
1684 /* Fall through. */
1685 case LTGT:
1686 if (HONOR_NANS (op0) || HONOR_NANS (op1))
1687 return CCSFPSmode;
1689 /* Fall through. */
1690 case UNORDERED:
1691 case ORDERED:
1692 case UNEQ:
1693 case UNLE:
1694 case UNLT:
1695 case UNGE:
1696 case UNGT:
1697 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1698 && GET_CODE (op1) != CONST_INT)
1699 return CCSRmode;
1700 return CCSmode;
1702 case LTU:
1703 case GEU:
1704 if (GET_CODE (op0) == PLUS
1705 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1706 return CCL1mode;
1708 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1709 && GET_CODE (op1) != CONST_INT)
1710 return CCURmode;
1711 return CCUmode;
1713 case LEU:
1714 case GTU:
1715 if (GET_CODE (op0) == MINUS
1716 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1717 return CCL2mode;
1719 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1720 && GET_CODE (op1) != CONST_INT)
1721 return CCURmode;
1722 return CCUmode;
1724 default:
1725 gcc_unreachable ();
1729 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1730 that we can implement more efficiently. */
1732 static void
1733 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1734 bool op0_preserve_value)
1736 if (op0_preserve_value)
1737 return;
1739 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1740 if ((*code == EQ || *code == NE)
1741 && *op1 == const0_rtx
1742 && GET_CODE (*op0) == ZERO_EXTRACT
1743 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1744 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1745 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1747 rtx inner = XEXP (*op0, 0);
1748 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1749 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1750 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1752 if (len > 0 && len < modesize
1753 && pos >= 0 && pos + len <= modesize
1754 && modesize <= HOST_BITS_PER_WIDE_INT)
1756 unsigned HOST_WIDE_INT block;
1757 block = (HOST_WIDE_INT_1U << len) - 1;
1758 block <<= modesize - pos - len;
1760 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1761 gen_int_mode (block, GET_MODE (inner)));
1765 /* Narrow AND of memory against immediate to enable TM. */
1766 if ((*code == EQ || *code == NE)
1767 && *op1 == const0_rtx
1768 && GET_CODE (*op0) == AND
1769 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1770 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1772 rtx inner = XEXP (*op0, 0);
1773 rtx mask = XEXP (*op0, 1);
1775 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1776 if (GET_CODE (inner) == SUBREG
1777 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1778 && (GET_MODE_SIZE (GET_MODE (inner))
1779 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1780 && ((INTVAL (mask)
1781 & GET_MODE_MASK (GET_MODE (inner))
1782 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1783 == 0))
1784 inner = SUBREG_REG (inner);
1786 /* Do not change volatile MEMs. */
1787 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1789 int part = s390_single_part (XEXP (*op0, 1),
1790 GET_MODE (inner), QImode, 0);
1791 if (part >= 0)
1793 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1794 inner = adjust_address_nv (inner, QImode, part);
1795 *op0 = gen_rtx_AND (QImode, inner, mask);
1800 /* Narrow comparisons against 0xffff to HImode if possible. */
1801 if ((*code == EQ || *code == NE)
1802 && GET_CODE (*op1) == CONST_INT
1803 && INTVAL (*op1) == 0xffff
1804 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1805 && (nonzero_bits (*op0, GET_MODE (*op0))
1806 & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1808 *op0 = gen_lowpart (HImode, *op0);
1809 *op1 = constm1_rtx;
1812 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1813 if (GET_CODE (*op0) == UNSPEC
1814 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1815 && XVECLEN (*op0, 0) == 1
1816 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1817 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1818 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1819 && *op1 == const0_rtx)
1821 enum rtx_code new_code = UNKNOWN;
1822 switch (*code)
1824 case EQ: new_code = EQ; break;
1825 case NE: new_code = NE; break;
1826 case LT: new_code = GTU; break;
1827 case GT: new_code = LTU; break;
1828 case LE: new_code = GEU; break;
1829 case GE: new_code = LEU; break;
1830 default: break;
1833 if (new_code != UNKNOWN)
1835 *op0 = XVECEXP (*op0, 0, 0);
1836 *code = new_code;
1840 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1841 if (GET_CODE (*op0) == UNSPEC
1842 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1843 && XVECLEN (*op0, 0) == 1
1844 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1845 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1846 && CONST_INT_P (*op1))
1848 enum rtx_code new_code = UNKNOWN;
1849 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1851 case E_CCZmode:
1852 case E_CCRAWmode:
1853 switch (*code)
1855 case EQ: new_code = EQ; break;
1856 case NE: new_code = NE; break;
1857 default: break;
1859 break;
1860 default: break;
1863 if (new_code != UNKNOWN)
1865 /* For CCRAWmode put the required cc mask into the second
1866 operand. */
1867 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1868 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1869 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1870 *op0 = XVECEXP (*op0, 0, 0);
1871 *code = new_code;
1874 /* Remove UNSPEC_CC_TO_INT from connectives. This happens for
1875 checks against multiple condition codes. */
1876 if (GET_CODE (*op0) == AND
1877 && GET_CODE (XEXP (*op0, 0)) == UNSPEC
1878 && XINT (XEXP (*op0, 0), 1) == UNSPEC_CC_TO_INT
1879 && XVECLEN (XEXP (*op0, 0), 0) == 1
1880 && REGNO (XVECEXP (XEXP (*op0, 0), 0, 0)) == CC_REGNUM
1881 && CONST_INT_P (XEXP (*op0, 1))
1882 && CONST_INT_P (*op1)
1883 && INTVAL (XEXP (*op0, 1)) == -3
1884 && *code == EQ)
1886 if (INTVAL (*op1) == 0)
1888 /* case cc == 0 || cc = 2 => mask = 0xa */
1889 *op0 = XVECEXP (XEXP (*op0, 0), 0, 0);
1890 *op1 = gen_rtx_CONST_INT (VOIDmode, 0xa);
1892 else if (INTVAL (*op1) == 1)
1894 /* case cc == 1 || cc == 3 => mask = 0x5 */
1895 *op0 = XVECEXP (XEXP (*op0, 0), 0, 0);
1896 *op1 = gen_rtx_CONST_INT (VOIDmode, 0x5);
1899 if (GET_CODE (*op0) == PLUS
1900 && GET_CODE (XEXP (*op0, 0)) == UNSPEC
1901 && XINT (XEXP (*op0, 0), 1) == UNSPEC_CC_TO_INT
1902 && XVECLEN (XEXP (*op0, 0), 0) == 1
1903 && REGNO (XVECEXP (XEXP (*op0, 0), 0, 0)) == CC_REGNUM
1904 && CONST_INT_P (XEXP (*op0, 1))
1905 && CONST_INT_P (*op1)
1906 && (*code == LEU || *code == GTU))
1908 if (INTVAL (*op1) == 1)
1910 if (INTVAL (XEXP (*op0, 1)) == -1)
1912 /* case cc == 1 || cc == 2 => mask = 0x6 */
1913 *op0 = XVECEXP (XEXP (*op0, 0), 0, 0);
1914 *op1 = gen_rtx_CONST_INT (VOIDmode, 0x6);
1915 *code = *code == GTU ? NE : EQ;
1917 else if (INTVAL (XEXP (*op0, 1)) == -2)
1919 /* case cc == 2 || cc == 3 => mask = 0x3 */
1920 *op0 = XVECEXP (XEXP (*op0, 0), 0, 0);
1921 *op1 = gen_rtx_CONST_INT (VOIDmode, 0x3);
1922 *code = *code == GTU ? NE : EQ;
1925 else if (INTVAL (*op1) == 2
1926 && INTVAL (XEXP (*op0, 1)) == -1)
1928 /* case cc == 1 || cc == 2 || cc == 3 => mask = 0x7 */
1929 *op0 = XVECEXP (XEXP (*op0, 0), 0, 0);
1930 *op1 = gen_rtx_CONST_INT (VOIDmode, 0x7);
1931 *code = *code == GTU ? NE : EQ;
1934 else if (*code == LEU || *code == GTU)
1936 if (GET_CODE (*op0) == UNSPEC
1937 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1938 && XVECLEN (*op0, 0) == 1
1939 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1940 && CONST_INT_P (*op1))
1942 if (INTVAL (*op1) == 1)
1944 /* case cc == 0 || cc == 1 => mask = 0xc */
1945 *op0 = XVECEXP (*op0, 0, 0);
1946 *op1 = gen_rtx_CONST_INT (VOIDmode, 0xc);
1947 *code = *code == GTU ? NE : EQ;
1949 else if (INTVAL (*op1) == 2)
1951 /* case cc == 0 || cc == 1 || cc == 2 => mask = 0xd */
1952 *op0 = XVECEXP (*op0, 0, 0);
1953 *op1 = gen_rtx_CONST_INT (VOIDmode, 0xd);
1954 *code = *code == GTU ? NE : EQ;
1956 else if (INTVAL (*op1) == 3)
1958 /* always true */
1959 *op0 = const0_rtx;
1960 *op1 = const0_rtx;
1961 *code = *code == GTU ? NE : EQ;
1966 /* Simplify cascaded EQ, NE with const0_rtx. */
1967 if ((*code == NE || *code == EQ)
1968 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1969 && GET_MODE (*op0) == SImode
1970 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1971 && REG_P (XEXP (*op0, 0))
1972 && XEXP (*op0, 1) == const0_rtx
1973 && *op1 == const0_rtx)
1975 if ((*code == EQ && GET_CODE (*op0) == NE)
1976 || (*code == NE && GET_CODE (*op0) == EQ))
1977 *code = EQ;
1978 else
1979 *code = NE;
1980 *op0 = XEXP (*op0, 0);
1983 /* Prefer register over memory as first operand. */
1984 if (MEM_P (*op0) && REG_P (*op1))
1986 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1987 *code = (int)swap_condition ((enum rtx_code)*code);
1990 /* A comparison result is compared against zero. Replace it with
1991 the (perhaps inverted) original comparison.
1992 This probably should be done by simplify_relational_operation. */
1993 if ((*code == EQ || *code == NE)
1994 && *op1 == const0_rtx
1995 && COMPARISON_P (*op0)
1996 && CC_REG_P (XEXP (*op0, 0)))
1998 enum rtx_code new_code;
2000 if (*code == EQ)
2001 new_code = reversed_comparison_code_parts (GET_CODE (*op0),
2002 XEXP (*op0, 0),
2003 XEXP (*op0, 1), NULL);
2004 else
2005 new_code = GET_CODE (*op0);
2007 if (new_code != UNKNOWN)
2009 *code = new_code;
2010 *op1 = XEXP (*op0, 1);
2011 *op0 = XEXP (*op0, 0);
2015 /* ~a==b -> ~(a^b)==0 ~a!=b -> ~(a^b)!=0 */
2016 if (TARGET_Z15
2017 && (*code == EQ || *code == NE)
2018 && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
2019 && GET_CODE (*op0) == NOT)
2021 machine_mode mode = GET_MODE (*op0);
2022 *op0 = gen_rtx_XOR (mode, XEXP (*op0, 0), *op1);
2023 *op0 = gen_rtx_NOT (mode, *op0);
2024 *op1 = const0_rtx;
2027 /* a&b == -1 -> ~a|~b == 0 a|b == -1 -> ~a&~b == 0 */
2028 if (TARGET_Z15
2029 && (*code == EQ || *code == NE)
2030 && (GET_CODE (*op0) == AND || GET_CODE (*op0) == IOR)
2031 && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
2032 && CONST_INT_P (*op1)
2033 && *op1 == constm1_rtx)
2035 machine_mode mode = GET_MODE (*op0);
2036 rtx op00 = gen_rtx_NOT (mode, XEXP (*op0, 0));
2037 rtx op01 = gen_rtx_NOT (mode, XEXP (*op0, 1));
2039 if (GET_CODE (*op0) == AND)
2040 *op0 = gen_rtx_IOR (mode, op00, op01);
2041 else
2042 *op0 = gen_rtx_AND (mode, op00, op01);
2044 *op1 = const0_rtx;
2049 /* Emit a compare instruction suitable to implement the comparison
2050 OP0 CODE OP1. Return the correct condition RTL to be placed in
2051 the IF_THEN_ELSE of the conditional branch testing the result. */
2054 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
2056 machine_mode mode = s390_select_ccmode (code, op0, op1);
2057 rtx cc;
2059 /* Force OP1 into register in order to satisfy VXE TFmode patterns. */
2060 if (TARGET_VXE && GET_MODE (op1) == TFmode)
2061 op1 = force_reg (TFmode, op1);
2063 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
2065 /* Do not output a redundant compare instruction if a
2066 compare_and_swap pattern already computed the result and the
2067 machine modes are compatible. */
2068 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
2069 == GET_MODE (op0));
2070 cc = op0;
2072 else
2074 cc = gen_rtx_REG (mode, CC_REGNUM);
2075 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
2078 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
2081 /* If MEM is not a legitimate compare-and-swap memory operand, return a new
2082 MEM, whose address is a pseudo containing the original MEM's address. */
2084 static rtx
2085 s390_legitimize_cs_operand (rtx mem)
2087 rtx tmp;
2089 if (!contains_symbol_ref_p (mem))
2090 return mem;
2091 tmp = gen_reg_rtx (Pmode);
2092 emit_move_insn (tmp, copy_rtx (XEXP (mem, 0)));
2093 return change_address (mem, VOIDmode, tmp);
2096 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
2097 matches CMP.
2098 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
2099 conditional branch testing the result. */
2101 static rtx
2102 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
2103 rtx cmp, rtx new_rtx, machine_mode ccmode)
2105 rtx cc;
2107 mem = s390_legitimize_cs_operand (mem);
2108 cc = gen_rtx_REG (ccmode, CC_REGNUM);
2109 switch (GET_MODE (mem))
2111 case E_SImode:
2112 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp,
2113 new_rtx, cc));
2114 break;
2115 case E_DImode:
2116 emit_insn (gen_atomic_compare_and_swapdi_internal (old, mem, cmp,
2117 new_rtx, cc));
2118 break;
2119 case E_TImode:
2120 emit_insn (gen_atomic_compare_and_swapti_internal (old, mem, cmp,
2121 new_rtx, cc));
2122 break;
2123 case E_QImode:
2124 case E_HImode:
2125 default:
2126 gcc_unreachable ();
2128 return s390_emit_compare (code, cc, const0_rtx);
2131 /* Emit a jump instruction to TARGET and return it. If COND is
2132 NULL_RTX, emit an unconditional jump, else a conditional jump under
2133 condition COND. */
2135 rtx_insn *
2136 s390_emit_jump (rtx target, rtx cond)
2138 rtx insn;
2140 target = gen_rtx_LABEL_REF (VOIDmode, target);
2141 if (cond)
2142 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
2144 insn = gen_rtx_SET (pc_rtx, target);
2145 return emit_jump_insn (insn);
2148 /* Return branch condition mask to implement a branch
2149 specified by CODE. Return -1 for invalid comparisons. */
2152 s390_branch_condition_mask (rtx code)
2154 const int CC0 = 1 << 3;
2155 const int CC1 = 1 << 2;
2156 const int CC2 = 1 << 1;
2157 const int CC3 = 1 << 0;
2159 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
2160 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
2161 gcc_assert (XEXP (code, 1) == const0_rtx
2162 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2163 && CONST_INT_P (XEXP (code, 1))));
2166 switch (GET_MODE (XEXP (code, 0)))
2168 case E_CCZmode:
2169 case E_CCZ1mode:
2170 switch (GET_CODE (code))
2172 case EQ: return CC0;
2173 case NE: return CC1 | CC2 | CC3;
2174 default: return -1;
2176 break;
2178 case E_CCT1mode:
2179 switch (GET_CODE (code))
2181 case EQ: return CC1;
2182 case NE: return CC0 | CC2 | CC3;
2183 default: return -1;
2185 break;
2187 case E_CCT2mode:
2188 switch (GET_CODE (code))
2190 case EQ: return CC2;
2191 case NE: return CC0 | CC1 | CC3;
2192 default: return -1;
2194 break;
2196 case E_CCT3mode:
2197 switch (GET_CODE (code))
2199 case EQ: return CC3;
2200 case NE: return CC0 | CC1 | CC2;
2201 default: return -1;
2203 break;
2205 case E_CCLmode:
2206 switch (GET_CODE (code))
2208 case EQ: return CC0 | CC2;
2209 case NE: return CC1 | CC3;
2210 default: return -1;
2212 break;
2214 case E_CCL1mode:
2215 switch (GET_CODE (code))
2217 case LTU: return CC2 | CC3; /* carry */
2218 case GEU: return CC0 | CC1; /* no carry */
2219 default: return -1;
2221 break;
2223 case E_CCL2mode:
2224 switch (GET_CODE (code))
2226 case GTU: return CC0 | CC1; /* borrow */
2227 case LEU: return CC2 | CC3; /* no borrow */
2228 default: return -1;
2230 break;
2232 case E_CCL3mode:
2233 switch (GET_CODE (code))
2235 case EQ: return CC0 | CC2;
2236 case NE: return CC1 | CC3;
2237 case LTU: return CC1;
2238 case GTU: return CC3;
2239 case LEU: return CC1 | CC2;
2240 case GEU: return CC2 | CC3;
2241 default: return -1;
2244 case E_CCUmode:
2245 switch (GET_CODE (code))
2247 case EQ: return CC0;
2248 case NE: return CC1 | CC2 | CC3;
2249 case LTU: return CC1;
2250 case GTU: return CC2;
2251 case LEU: return CC0 | CC1;
2252 case GEU: return CC0 | CC2;
2253 default: return -1;
2255 break;
2257 case E_CCURmode:
2258 switch (GET_CODE (code))
2260 case EQ: return CC0;
2261 case NE: return CC2 | CC1 | CC3;
2262 case LTU: return CC2;
2263 case GTU: return CC1;
2264 case LEU: return CC0 | CC2;
2265 case GEU: return CC0 | CC1;
2266 default: return -1;
2268 break;
2270 case E_CCAPmode:
2271 switch (GET_CODE (code))
2273 case EQ: return CC0;
2274 case NE: return CC1 | CC2 | CC3;
2275 case LT: return CC1 | CC3;
2276 case GT: return CC2;
2277 case LE: return CC0 | CC1 | CC3;
2278 case GE: return CC0 | CC2;
2279 default: return -1;
2281 break;
2283 case E_CCANmode:
2284 switch (GET_CODE (code))
2286 case EQ: return CC0;
2287 case NE: return CC1 | CC2 | CC3;
2288 case LT: return CC1;
2289 case GT: return CC2 | CC3;
2290 case LE: return CC0 | CC1;
2291 case GE: return CC0 | CC2 | CC3;
2292 default: return -1;
2294 break;
2296 case E_CCOmode:
2297 switch (GET_CODE (code))
2299 case EQ: return CC0 | CC1 | CC2;
2300 case NE: return CC3;
2301 default: return -1;
2303 break;
2305 case E_CCSmode:
2306 case E_CCSFPSmode:
2307 switch (GET_CODE (code))
2309 case EQ: return CC0;
2310 case NE: return CC1 | CC2 | CC3;
2311 case LT: return CC1;
2312 case GT: return CC2;
2313 case LE: return CC0 | CC1;
2314 case GE: return CC0 | CC2;
2315 case UNORDERED: return CC3;
2316 case ORDERED: return CC0 | CC1 | CC2;
2317 case UNEQ: return CC0 | CC3;
2318 case UNLT: return CC1 | CC3;
2319 case UNGT: return CC2 | CC3;
2320 case UNLE: return CC0 | CC1 | CC3;
2321 case UNGE: return CC0 | CC2 | CC3;
2322 case LTGT: return CC1 | CC2;
2323 default: return -1;
2325 break;
2327 case E_CCSRmode:
2328 switch (GET_CODE (code))
2330 case EQ: return CC0;
2331 case NE: return CC2 | CC1 | CC3;
2332 case LT: return CC2;
2333 case GT: return CC1;
2334 case LE: return CC0 | CC2;
2335 case GE: return CC0 | CC1;
2336 case UNORDERED: return CC3;
2337 case ORDERED: return CC0 | CC2 | CC1;
2338 case UNEQ: return CC0 | CC3;
2339 case UNLT: return CC2 | CC3;
2340 case UNGT: return CC1 | CC3;
2341 case UNLE: return CC0 | CC2 | CC3;
2342 case UNGE: return CC0 | CC1 | CC3;
2343 case LTGT: return CC2 | CC1;
2344 default: return -1;
2346 break;
2348 /* Vector comparison modes. */
2349 /* CC2 will never be set. It however is part of the negated
2350 masks. */
2351 case E_CCVIALLmode:
2352 switch (GET_CODE (code))
2354 case EQ:
2355 case GTU:
2356 case GT:
2357 case GE: return CC0;
2358 /* The inverted modes are in fact *any* modes. */
2359 case NE:
2360 case LEU:
2361 case LE:
2362 case LT: return CC3 | CC1 | CC2;
2363 default: return -1;
2366 case E_CCVIANYmode:
2367 switch (GET_CODE (code))
2369 case EQ:
2370 case GTU:
2371 case GT:
2372 case GE: return CC0 | CC1;
2373 /* The inverted modes are in fact *all* modes. */
2374 case NE:
2375 case LEU:
2376 case LE:
2377 case LT: return CC3 | CC2;
2378 default: return -1;
2380 case E_CCVFALLmode:
2381 switch (GET_CODE (code))
2383 case EQ:
2384 case GT:
2385 case GE: return CC0;
2386 /* The inverted modes are in fact *any* modes. */
2387 case NE:
2388 case UNLE:
2389 case UNLT: return CC3 | CC1 | CC2;
2390 default: return -1;
2393 case E_CCVFANYmode:
2394 switch (GET_CODE (code))
2396 case EQ:
2397 case GT:
2398 case GE: return CC0 | CC1;
2399 /* The inverted modes are in fact *all* modes. */
2400 case NE:
2401 case UNLE:
2402 case UNLT: return CC3 | CC2;
2403 default: return -1;
2406 case E_CCRAWmode:
2407 switch (GET_CODE (code))
2409 case EQ:
2410 return INTVAL (XEXP (code, 1));
2411 case NE:
2412 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2413 default:
2414 gcc_unreachable ();
2417 default:
2418 return -1;
2423 /* Return branch condition mask to implement a compare and branch
2424 specified by CODE. Return -1 for invalid comparisons. */
2427 s390_compare_and_branch_condition_mask (rtx code)
2429 const int CC0 = 1 << 3;
2430 const int CC1 = 1 << 2;
2431 const int CC2 = 1 << 1;
2433 switch (GET_CODE (code))
2435 case EQ:
2436 return CC0;
2437 case NE:
2438 return CC1 | CC2;
2439 case LT:
2440 case LTU:
2441 return CC1;
2442 case GT:
2443 case GTU:
2444 return CC2;
2445 case LE:
2446 case LEU:
2447 return CC0 | CC1;
2448 case GE:
2449 case GEU:
2450 return CC0 | CC2;
2451 default:
2452 gcc_unreachable ();
2454 return -1;
2457 /* If INV is false, return assembler mnemonic string to implement
2458 a branch specified by CODE. If INV is true, return mnemonic
2459 for the corresponding inverted branch. */
2461 static const char *
2462 s390_branch_condition_mnemonic (rtx code, int inv)
2464 int mask;
2466 static const char *const mnemonic[16] =
2468 NULL, "o", "h", "nle",
2469 "l", "nhe", "lh", "ne",
2470 "e", "nlh", "he", "nl",
2471 "le", "nh", "no", NULL
2474 if (GET_CODE (XEXP (code, 0)) == REG
2475 && REGNO (XEXP (code, 0)) == CC_REGNUM
2476 && (XEXP (code, 1) == const0_rtx
2477 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2478 && CONST_INT_P (XEXP (code, 1)))))
2479 mask = s390_branch_condition_mask (code);
2480 else
2481 mask = s390_compare_and_branch_condition_mask (code);
2483 gcc_assert (mask >= 0);
2485 if (inv)
2486 mask ^= 15;
2488 gcc_assert (mask >= 1 && mask <= 14);
2490 return mnemonic[mask];
2493 /* Return the part of op which has a value different from def.
2494 The size of the part is determined by mode.
2495 Use this function only if you already know that op really
2496 contains such a part. */
2498 unsigned HOST_WIDE_INT
2499 s390_extract_part (rtx op, machine_mode mode, int def)
2501 unsigned HOST_WIDE_INT value = 0;
2502 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2503 int part_bits = GET_MODE_BITSIZE (mode);
2504 unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1;
2505 int i;
2507 for (i = 0; i < max_parts; i++)
2509 if (i == 0)
2510 value = UINTVAL (op);
2511 else
2512 value >>= part_bits;
2514 if ((value & part_mask) != (def & part_mask))
2515 return value & part_mask;
2518 gcc_unreachable ();
2521 /* If OP is an integer constant of mode MODE with exactly one
2522 part of mode PART_MODE unequal to DEF, return the number of that
2523 part. Otherwise, return -1. */
2526 s390_single_part (rtx op,
2527 machine_mode mode,
2528 machine_mode part_mode,
2529 int def)
2531 unsigned HOST_WIDE_INT value = 0;
2532 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2533 unsigned HOST_WIDE_INT part_mask
2534 = (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1;
2535 int i, part = -1;
2537 if (GET_CODE (op) != CONST_INT)
2538 return -1;
2540 for (i = 0; i < n_parts; i++)
2542 if (i == 0)
2543 value = UINTVAL (op);
2544 else
2545 value >>= GET_MODE_BITSIZE (part_mode);
2547 if ((value & part_mask) != (def & part_mask))
2549 if (part != -1)
2550 return -1;
2551 else
2552 part = i;
2555 return part == -1 ? -1 : n_parts - 1 - part;
2558 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2559 bits and no other bits are set in (the lower SIZE bits of) IN.
2561 PSTART and PEND can be used to obtain the start and end
2562 position (inclusive) of the bitfield relative to 64
2563 bits. *PSTART / *PEND gives the position of the first/last bit
2564 of the bitfield counting from the highest order bit starting
2565 with zero. */
2567 bool
2568 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2569 int *pstart, int *pend)
2571 int start;
2572 int end = -1;
2573 int lowbit = HOST_BITS_PER_WIDE_INT - 1;
2574 int highbit = HOST_BITS_PER_WIDE_INT - size;
2575 unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U;
2577 gcc_assert (!!pstart == !!pend);
2578 for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2579 if (end == -1)
2581 /* Look for the rightmost bit of a contiguous range of ones. */
2582 if (bitmask & in)
2583 /* Found it. */
2584 end = start;
2586 else
2588 /* Look for the firt zero bit after the range of ones. */
2589 if (! (bitmask & in))
2590 /* Found it. */
2591 break;
2593 /* We're one past the last one-bit. */
2594 start++;
2596 if (end == -1)
2597 /* No one bits found. */
2598 return false;
2600 if (start > highbit)
2602 unsigned HOST_WIDE_INT mask;
2604 /* Calculate a mask for all bits beyond the contiguous bits. */
2605 mask = ((~HOST_WIDE_INT_0U >> highbit)
2606 & (~HOST_WIDE_INT_0U << (lowbit - start + 1)));
2607 if (mask & in)
2608 /* There are more bits set beyond the first range of one bits. */
2609 return false;
2612 if (pstart)
2614 *pstart = start;
2615 *pend = end;
2618 return true;
2621 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2622 if ~IN contains a contiguous bitfield. In that case, *END is <
2623 *START.
2625 If WRAP_P is true, a bitmask that wraps around is also tested.
2626 When a wraparoud occurs *START is greater than *END (in
2627 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2628 part of the range. If WRAP_P is false, no wraparound is
2629 tested. */
2631 bool
2632 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2633 int size, int *start, int *end)
2635 int bs = HOST_BITS_PER_WIDE_INT;
2636 bool b;
2638 gcc_assert (!!start == !!end);
2639 if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0)
2640 /* This cannot be expressed as a contiguous bitmask. Exit early because
2641 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2642 a valid bitmask. */
2643 return false;
2644 b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2645 if (b)
2646 return true;
2647 if (! wrap_p)
2648 return false;
2649 b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2650 if (b && start)
2652 int s = *start;
2653 int e = *end;
2655 gcc_assert (s >= 1);
2656 *start = ((e + 1) & (bs - 1));
2657 *end = ((s - 1 + bs) & (bs - 1));
2660 return b;
2663 /* Return true if OP contains the same contiguous bitfield in *all*
2664 its elements. START and END can be used to obtain the start and
2665 end position of the bitfield.
2667 START/STOP give the position of the first/last bit of the bitfield
2668 counting from the lowest order bit starting with zero. In order to
2669 use these values for S/390 instructions this has to be converted to
2670 "bits big endian" style. */
2672 bool
2673 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2675 unsigned HOST_WIDE_INT mask;
2676 int size;
2677 rtx elt;
2678 bool b;
2680 /* Handle floats by bitcasting them to ints. */
2681 op = gen_lowpart (related_int_vector_mode (GET_MODE (op)).require (), op);
2683 gcc_assert (!!start == !!end);
2684 if (!const_vec_duplicate_p (op, &elt)
2685 || !CONST_INT_P (elt))
2686 return false;
2688 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2690 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2691 if (size > 64)
2692 return false;
2694 mask = UINTVAL (elt);
2696 b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2697 if (b)
2699 if (start)
2701 *start -= (HOST_BITS_PER_WIDE_INT - size);
2702 *end -= (HOST_BITS_PER_WIDE_INT - size);
2704 return true;
2706 else
2707 return false;
2710 /* Return true if C consists only of byte chunks being either 0 or
2711 0xff. If MASK is !=NULL a byte mask is generated which is
2712 appropriate for the vector generate byte mask instruction. */
2714 bool
2715 s390_bytemask_vector_p (rtx op, unsigned *mask)
2717 int i;
2718 unsigned tmp_mask = 0;
2719 int nunit, unit_size;
2721 if (!VECTOR_MODE_P (GET_MODE (op))
2722 || GET_CODE (op) != CONST_VECTOR
2723 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2724 return false;
2726 nunit = GET_MODE_NUNITS (GET_MODE (op));
2727 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2729 for (i = 0; i < nunit; i++)
2731 unsigned HOST_WIDE_INT c;
2732 int j;
2734 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2735 return false;
2737 c = UINTVAL (XVECEXP (op, 0, i));
2738 for (j = 0; j < unit_size; j++)
2740 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2741 return false;
2742 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2743 c = c >> BITS_PER_UNIT;
2747 if (mask != NULL)
2748 *mask = tmp_mask;
2750 return true;
2753 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2754 equivalent to a shift followed by the AND. In particular, CONTIG
2755 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2756 for ROTL indicate a rotate to the right. */
2758 bool
2759 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2761 int start, end;
2762 bool ok;
2764 ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2765 gcc_assert (ok);
2767 if (rotl >= 0)
2768 return (64 - end >= rotl);
2769 else
2771 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2772 DIMode. */
2773 rotl = -rotl + (64 - bitsize);
2774 return (start >= rotl);
2778 /* Check whether we can (and want to) split a double-word
2779 move in mode MODE from SRC to DST into two single-word
2780 moves, moving the subword FIRST_SUBWORD first. */
2782 bool
2783 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2785 /* Floating point and vector registers cannot be split. */
2786 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2787 return false;
2789 /* Non-offsettable memory references cannot be split. */
2790 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2791 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2792 return false;
2794 /* Moving the first subword must not clobber a register
2795 needed to move the second subword. */
2796 if (register_operand (dst, mode))
2798 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2799 if (reg_overlap_mentioned_p (subreg, src))
2800 return false;
2803 return true;
2806 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2807 and [MEM2, MEM2 + SIZE] do overlap and false
2808 otherwise. */
2810 bool
2811 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2813 rtx addr1, addr2, addr_delta;
2814 HOST_WIDE_INT delta;
2816 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2817 return true;
2819 if (size == 0)
2820 return false;
2822 addr1 = XEXP (mem1, 0);
2823 addr2 = XEXP (mem2, 0);
2825 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2827 /* This overlapping check is used by peepholes merging memory block operations.
2828 Overlapping operations would otherwise be recognized by the S/390 hardware
2829 and would fall back to a slower implementation. Allowing overlapping
2830 operations would lead to slow code but not to wrong code. Therefore we are
2831 somewhat optimistic if we cannot prove that the memory blocks are
2832 overlapping.
2833 That's why we return false here although this may accept operations on
2834 overlapping memory areas. */
2835 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2836 return false;
2838 delta = INTVAL (addr_delta);
2840 if (delta == 0
2841 || (delta > 0 && delta < size)
2842 || (delta < 0 && -delta < size))
2843 return true;
2845 return false;
2848 /* Check whether the address of memory reference MEM2 equals exactly
2849 the address of memory reference MEM1 plus DELTA. Return true if
2850 we can prove this to be the case, false otherwise. */
2852 bool
2853 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2855 rtx addr1, addr2, addr_delta;
2857 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2858 return false;
2860 addr1 = XEXP (mem1, 0);
2861 addr2 = XEXP (mem2, 0);
2863 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2864 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2865 return false;
2867 return true;
2870 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2872 void
2873 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2874 rtx *operands)
2876 machine_mode wmode = mode;
2877 rtx dst = operands[0];
2878 rtx src1 = operands[1];
2879 rtx src2 = operands[2];
2880 rtx op, clob, tem;
2882 /* If we cannot handle the operation directly, use a temp register. */
2883 if (!s390_logical_operator_ok_p (operands))
2884 dst = gen_reg_rtx (mode);
2886 /* QImode and HImode patterns make sense only if we have a destination
2887 in memory. Otherwise perform the operation in SImode. */
2888 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2889 wmode = SImode;
2891 /* Widen operands if required. */
2892 if (mode != wmode)
2894 if (GET_CODE (dst) == SUBREG
2895 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2896 dst = tem;
2897 else if (REG_P (dst))
2898 dst = gen_rtx_SUBREG (wmode, dst, 0);
2899 else
2900 dst = gen_reg_rtx (wmode);
2902 if (GET_CODE (src1) == SUBREG
2903 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2904 src1 = tem;
2905 else if (GET_MODE (src1) != VOIDmode)
2906 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2908 if (GET_CODE (src2) == SUBREG
2909 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2910 src2 = tem;
2911 else if (GET_MODE (src2) != VOIDmode)
2912 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2915 /* Emit the instruction. */
2916 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2917 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2918 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2920 /* Fix up the destination if needed. */
2921 if (dst != operands[0])
2922 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2925 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2927 bool
2928 s390_logical_operator_ok_p (rtx *operands)
2930 /* If the destination operand is in memory, it needs to coincide
2931 with one of the source operands. After reload, it has to be
2932 the first source operand. */
2933 if (GET_CODE (operands[0]) == MEM)
2934 return rtx_equal_p (operands[0], operands[1])
2935 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2937 return true;
2940 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2941 operand IMMOP to switch from SS to SI type instructions. */
2943 void
2944 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2946 int def = code == AND ? -1 : 0;
2947 HOST_WIDE_INT mask;
2948 int part;
2950 gcc_assert (GET_CODE (*memop) == MEM);
2951 gcc_assert (!MEM_VOLATILE_P (*memop));
2953 mask = s390_extract_part (*immop, QImode, def);
2954 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2955 gcc_assert (part >= 0);
2957 *memop = adjust_address (*memop, QImode, part);
2958 *immop = gen_int_mode (mask, QImode);
2962 /* How to allocate a 'struct machine_function'. */
2964 static struct machine_function *
2965 s390_init_machine_status (void)
2967 return ggc_cleared_alloc<machine_function> ();
2970 /* Map for smallest class containing reg regno. */
2972 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2973 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2974 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2975 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2976 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2977 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2978 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2979 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2980 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2981 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2982 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2983 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2984 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2985 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2986 VEC_REGS, VEC_REGS /* 52 */
2989 /* Return attribute type of insn. */
2991 static enum attr_type
2992 s390_safe_attr_type (rtx_insn *insn)
2994 if (recog_memoized (insn) >= 0)
2995 return get_attr_type (insn);
2996 else
2997 return TYPE_NONE;
3000 /* Return attribute relative_long of insn. */
3002 static bool
3003 s390_safe_relative_long_p (rtx_insn *insn)
3005 if (recog_memoized (insn) >= 0)
3006 return get_attr_relative_long (insn) == RELATIVE_LONG_YES;
3007 else
3008 return false;
3011 /* Return true if DISP is a valid short displacement. */
3013 static bool
3014 s390_short_displacement (rtx disp)
3016 /* No displacement is OK. */
3017 if (!disp)
3018 return true;
3020 /* Without the long displacement facility we don't need to
3021 distingiush between long and short displacement. */
3022 if (!TARGET_LONG_DISPLACEMENT)
3023 return true;
3025 /* Integer displacement in range. */
3026 if (GET_CODE (disp) == CONST_INT)
3027 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
3029 /* GOT offset is not OK, the GOT can be large. */
3030 if (GET_CODE (disp) == CONST
3031 && GET_CODE (XEXP (disp, 0)) == UNSPEC
3032 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
3033 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
3034 return false;
3036 /* All other symbolic constants are literal pool references,
3037 which are OK as the literal pool must be small. */
3038 if (GET_CODE (disp) == CONST)
3039 return true;
3041 return false;
3044 /* Attempts to split `ref', which should be UNSPEC_LTREF, into (base + `disp').
3045 If successful, also determines the
3046 following characteristics of `ref': `is_ptr' - whether it can be an
3047 LA argument, `is_base_ptr' - whether the resulting base is a well-known
3048 base register (stack/frame pointer, etc), `is_pool_ptr` - whether it is
3049 considered a literal pool pointer for purposes of avoiding two different
3050 literal pool pointers per insn during or after reload (`B' constraint). */
3051 static bool
3052 s390_decompose_constant_pool_ref (rtx *ref, rtx *disp, bool *is_ptr,
3053 bool *is_base_ptr, bool *is_pool_ptr)
3055 if (!*ref)
3056 return true;
3058 if (GET_CODE (*ref) == UNSPEC)
3059 switch (XINT (*ref, 1))
3061 case UNSPEC_LTREF:
3062 if (!*disp)
3063 *disp = gen_rtx_UNSPEC (Pmode,
3064 gen_rtvec (1, XVECEXP (*ref, 0, 0)),
3065 UNSPEC_LTREL_OFFSET);
3066 else
3067 return false;
3069 *ref = XVECEXP (*ref, 0, 1);
3070 break;
3072 default:
3073 return false;
3076 if (!REG_P (*ref) || GET_MODE (*ref) != Pmode)
3077 return false;
3079 if (REGNO (*ref) == STACK_POINTER_REGNUM
3080 || REGNO (*ref) == FRAME_POINTER_REGNUM
3081 || ((reload_completed || reload_in_progress)
3082 && frame_pointer_needed
3083 && REGNO (*ref) == HARD_FRAME_POINTER_REGNUM)
3084 || REGNO (*ref) == ARG_POINTER_REGNUM
3085 || (flag_pic
3086 && REGNO (*ref) == PIC_OFFSET_TABLE_REGNUM))
3087 *is_ptr = *is_base_ptr = true;
3089 if ((reload_completed || reload_in_progress)
3090 && *ref == cfun->machine->base_reg)
3091 *is_ptr = *is_base_ptr = *is_pool_ptr = true;
3093 return true;
3096 /* Decompose a RTL expression ADDR for a memory address into
3097 its components, returned in OUT.
3099 Returns false if ADDR is not a valid memory address, true
3100 otherwise. If OUT is NULL, don't return the components,
3101 but check for validity only.
3103 Note: Only addresses in canonical form are recognized.
3104 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
3105 canonical form so that they will be recognized. */
3107 static int
3108 s390_decompose_address (rtx addr, struct s390_address *out)
3110 HOST_WIDE_INT offset = 0;
3111 rtx base = NULL_RTX;
3112 rtx indx = NULL_RTX;
3113 rtx disp = NULL_RTX;
3114 rtx orig_disp;
3115 bool pointer = false;
3116 bool base_ptr = false;
3117 bool indx_ptr = false;
3118 bool literal_pool = false;
3120 /* We may need to substitute the literal pool base register into the address
3121 below. However, at this point we do not know which register is going to
3122 be used as base, so we substitute the arg pointer register. This is going
3123 to be treated as holding a pointer below -- it shouldn't be used for any
3124 other purpose. */
3125 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
3127 /* Decompose address into base + index + displacement. */
3129 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
3130 base = addr;
3132 else if (GET_CODE (addr) == PLUS)
3134 rtx op0 = XEXP (addr, 0);
3135 rtx op1 = XEXP (addr, 1);
3136 enum rtx_code code0 = GET_CODE (op0);
3137 enum rtx_code code1 = GET_CODE (op1);
3139 if (code0 == REG || code0 == UNSPEC)
3141 if (code1 == REG || code1 == UNSPEC)
3143 indx = op0; /* index + base */
3144 base = op1;
3147 else
3149 base = op0; /* base + displacement */
3150 disp = op1;
3154 else if (code0 == PLUS)
3156 indx = XEXP (op0, 0); /* index + base + disp */
3157 base = XEXP (op0, 1);
3158 disp = op1;
3161 else
3163 return false;
3167 else
3168 disp = addr; /* displacement */
3170 /* Extract integer part of displacement. */
3171 orig_disp = disp;
3172 if (disp)
3174 if (GET_CODE (disp) == CONST_INT)
3176 offset = INTVAL (disp);
3177 disp = NULL_RTX;
3179 else if (GET_CODE (disp) == CONST
3180 && GET_CODE (XEXP (disp, 0)) == PLUS
3181 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3183 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
3184 disp = XEXP (XEXP (disp, 0), 0);
3188 /* Strip off CONST here to avoid special case tests later. */
3189 if (disp && GET_CODE (disp) == CONST)
3190 disp = XEXP (disp, 0);
3192 /* We can convert literal pool addresses to
3193 displacements by basing them off the base register. */
3194 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
3196 if (base || indx)
3197 return false;
3199 base = fake_pool_base, literal_pool = true;
3201 /* Mark up the displacement. */
3202 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
3203 UNSPEC_LTREL_OFFSET);
3206 /* Validate base register. */
3207 if (!s390_decompose_constant_pool_ref (&base, &disp, &pointer, &base_ptr,
3208 &literal_pool))
3209 return false;
3211 /* Validate index register. */
3212 if (!s390_decompose_constant_pool_ref (&indx, &disp, &pointer, &indx_ptr,
3213 &literal_pool))
3214 return false;
3216 /* Prefer to use pointer as base, not index. */
3217 if (base && indx && !base_ptr
3218 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
3220 rtx tmp = base;
3221 base = indx;
3222 indx = tmp;
3225 /* Validate displacement. */
3226 if (!disp)
3228 /* If virtual registers are involved, the displacement will change later
3229 anyway as the virtual registers get eliminated. This could make a
3230 valid displacement invalid, but it is more likely to make an invalid
3231 displacement valid, because we sometimes access the register save area
3232 via negative offsets to one of those registers.
3233 Thus we don't check the displacement for validity here. If after
3234 elimination the displacement turns out to be invalid after all,
3235 this is fixed up by reload in any case. */
3236 /* LRA maintains always displacements up to date and we need to
3237 know the displacement is right during all LRA not only at the
3238 final elimination. */
3239 if (lra_in_progress
3240 || (base != arg_pointer_rtx
3241 && indx != arg_pointer_rtx
3242 && base != return_address_pointer_rtx
3243 && indx != return_address_pointer_rtx
3244 && base != frame_pointer_rtx
3245 && indx != frame_pointer_rtx
3246 && base != virtual_stack_vars_rtx
3247 && indx != virtual_stack_vars_rtx))
3248 if (!DISP_IN_RANGE (offset))
3249 return false;
3251 else
3253 /* All the special cases are pointers. */
3254 pointer = true;
3256 /* In the small-PIC case, the linker converts @GOT
3257 and @GOTNTPOFF offsets to possible displacements. */
3258 if (GET_CODE (disp) == UNSPEC
3259 && (XINT (disp, 1) == UNSPEC_GOT
3260 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
3261 && flag_pic == 1)
3266 /* Accept pool label offsets. */
3267 else if (GET_CODE (disp) == UNSPEC
3268 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
3271 /* Accept literal pool references. */
3272 else if (GET_CODE (disp) == UNSPEC
3273 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
3275 /* In case CSE pulled a non literal pool reference out of
3276 the pool we have to reject the address. This is
3277 especially important when loading the GOT pointer on non
3278 zarch CPUs. In this case the literal pool contains an lt
3279 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3280 will most likely exceed the displacement. */
3281 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3282 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
3283 return false;
3285 orig_disp = gen_rtx_CONST (Pmode, disp);
3286 if (offset)
3288 /* If we have an offset, make sure it does not
3289 exceed the size of the constant pool entry.
3290 Otherwise we might generate an out-of-range
3291 displacement for the base register form. */
3292 rtx sym = XVECEXP (disp, 0, 0);
3293 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
3294 return false;
3296 orig_disp = plus_constant (Pmode, orig_disp, offset);
3300 else
3301 return false;
3304 if (!base && !indx)
3305 pointer = true;
3307 if (out)
3309 out->base = base;
3310 out->indx = indx;
3311 out->disp = orig_disp;
3312 out->pointer = pointer;
3313 out->literal_pool = literal_pool;
3316 return true;
3319 /* Decompose a RTL expression OP for an address style operand into its
3320 components, and return the base register in BASE and the offset in
3321 OFFSET. While OP looks like an address it is never supposed to be
3322 used as such.
3324 Return true if OP is a valid address operand, false if not. */
3326 bool
3327 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
3328 HOST_WIDE_INT *offset)
3330 rtx off = NULL_RTX;
3332 /* We can have an integer constant, an address register,
3333 or a sum of the two. */
3334 if (CONST_SCALAR_INT_P (op))
3336 off = op;
3337 op = NULL_RTX;
3339 if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3341 off = XEXP (op, 1);
3342 op = XEXP (op, 0);
3344 while (op && GET_CODE (op) == SUBREG)
3345 op = SUBREG_REG (op);
3347 if (op && GET_CODE (op) != REG)
3348 return false;
3350 if (offset)
3352 if (off == NULL_RTX)
3353 *offset = 0;
3354 else if (CONST_INT_P (off))
3355 *offset = INTVAL (off);
3356 else if (CONST_WIDE_INT_P (off))
3357 /* The offset will anyway be cut down to 12 bits so take just
3358 the lowest order chunk of the wide int. */
3359 *offset = CONST_WIDE_INT_ELT (off, 0);
3360 else
3361 gcc_unreachable ();
3363 if (base)
3364 *base = op;
3366 return true;
3369 /* Check that OP is a valid shift count operand.
3370 It should be of the following structure:
3371 (subreg (and (plus (reg imm_op)) 2^k-1) 7)
3372 where subreg, and and plus are optional.
3374 If IMPLICIT_MASK is > 0 and OP contains and
3375 (AND ... immediate)
3376 it is checked whether IMPLICIT_MASK and the immediate match.
3377 Otherwise, no checking is performed.
3379 bool
3380 s390_valid_shift_count (rtx op, HOST_WIDE_INT implicit_mask)
3382 /* Strip subreg. */
3383 while (GET_CODE (op) == SUBREG && subreg_lowpart_p (op))
3384 op = XEXP (op, 0);
3386 /* Check for an and with proper constant. */
3387 if (GET_CODE (op) == AND)
3389 rtx op1 = XEXP (op, 0);
3390 rtx imm = XEXP (op, 1);
3392 if (GET_CODE (op1) == SUBREG && subreg_lowpart_p (op1))
3393 op1 = XEXP (op1, 0);
3395 if (!(register_operand (op1, GET_MODE (op1)) || GET_CODE (op1) == PLUS))
3396 return false;
3398 if (!immediate_operand (imm, GET_MODE (imm)))
3399 return false;
3401 HOST_WIDE_INT val = INTVAL (imm);
3402 if (implicit_mask > 0
3403 && (val & implicit_mask) != implicit_mask)
3404 return false;
3406 op = op1;
3409 /* Check the rest. */
3410 return s390_decompose_addrstyle_without_index (op, NULL, NULL);
3413 /* Return true if CODE is a valid address without index. */
3415 bool
3416 s390_legitimate_address_without_index_p (rtx op)
3418 struct s390_address addr;
3420 if (!s390_decompose_address (XEXP (op, 0), &addr))
3421 return false;
3422 if (addr.indx)
3423 return false;
3425 return true;
3429 /* Return TRUE if ADDR is an operand valid for a load/store relative
3430 instruction. Be aware that the alignment of the operand needs to
3431 be checked separately.
3432 Valid addresses are single references or a sum of a reference and a
3433 constant integer. Return these parts in SYMREF and ADDEND. You can
3434 pass NULL in REF and/or ADDEND if you are not interested in these
3435 values. */
3437 static bool
3438 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3440 HOST_WIDE_INT tmpaddend = 0;
3442 if (GET_CODE (addr) == CONST)
3443 addr = XEXP (addr, 0);
3445 if (GET_CODE (addr) == PLUS)
3447 if (!CONST_INT_P (XEXP (addr, 1)))
3448 return false;
3450 tmpaddend = INTVAL (XEXP (addr, 1));
3451 addr = XEXP (addr, 0);
3454 if (GET_CODE (addr) == SYMBOL_REF
3455 || (GET_CODE (addr) == UNSPEC
3456 && (XINT (addr, 1) == UNSPEC_GOTENT
3457 || XINT (addr, 1) == UNSPEC_PLT31)))
3459 if (symref)
3460 *symref = addr;
3461 if (addend)
3462 *addend = tmpaddend;
3464 return true;
3466 return false;
3469 /* Return true if the address in OP is valid for constraint letter C
3470 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3471 pool MEMs should be accepted. Only the Q, R, S, T constraint
3472 letters are allowed for C. */
3474 static int
3475 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3477 rtx symref;
3478 struct s390_address addr;
3479 bool decomposed = false;
3481 if (!address_operand (op, GET_MODE (op)))
3482 return 0;
3484 /* This check makes sure that no symbolic address (except literal
3485 pool references) are accepted by the R or T constraints. */
3486 if (s390_loadrelative_operand_p (op, &symref, NULL)
3487 && (!lit_pool_ok
3488 || !SYMBOL_REF_P (symref)
3489 || !CONSTANT_POOL_ADDRESS_P (symref)))
3490 return 0;
3492 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3493 if (!lit_pool_ok)
3495 if (!s390_decompose_address (op, &addr))
3496 return 0;
3497 if (addr.literal_pool)
3498 return 0;
3499 decomposed = true;
3502 /* With reload, we sometimes get intermediate address forms that are
3503 actually invalid as-is, but we need to accept them in the most
3504 generic cases below ('R' or 'T'), since reload will in fact fix
3505 them up. LRA behaves differently here; we never see such forms,
3506 but on the other hand, we need to strictly reject every invalid
3507 address form. After both reload and LRA invalid address forms
3508 must be rejected, because nothing will fix them up later. Perform
3509 this check right up front. */
3510 if (lra_in_progress || reload_completed)
3512 if (!decomposed && !s390_decompose_address (op, &addr))
3513 return 0;
3514 decomposed = true;
3517 switch (c)
3519 case 'Q': /* no index short displacement */
3520 if (!decomposed && !s390_decompose_address (op, &addr))
3521 return 0;
3522 if (addr.indx)
3523 return 0;
3524 if (!s390_short_displacement (addr.disp))
3525 return 0;
3526 break;
3528 case 'R': /* with index short displacement */
3529 if (TARGET_LONG_DISPLACEMENT)
3531 if (!decomposed && !s390_decompose_address (op, &addr))
3532 return 0;
3533 if (!s390_short_displacement (addr.disp))
3534 return 0;
3536 /* Any invalid address here will be fixed up by reload,
3537 so accept it for the most generic constraint. */
3538 break;
3540 case 'S': /* no index long displacement */
3541 if (!decomposed && !s390_decompose_address (op, &addr))
3542 return 0;
3543 if (addr.indx)
3544 return 0;
3545 break;
3547 case 'T': /* with index long displacement */
3548 /* Any invalid address here will be fixed up by reload,
3549 so accept it for the most generic constraint. */
3550 break;
3552 default:
3553 return 0;
3555 return 1;
3559 /* Evaluates constraint strings described by the regular expression
3560 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3561 the constraint given in STR, or 0 else. */
3564 s390_mem_constraint (const char *str, rtx op)
3566 char c = str[0];
3568 switch (c)
3570 case 'A':
3571 /* Check for offsettable variants of memory constraints. */
3572 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3573 return 0;
3574 if ((reload_completed || reload_in_progress)
3575 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3576 return 0;
3577 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3578 case 'B':
3579 /* Check for non-literal-pool variants of memory constraints. */
3580 if (!MEM_P (op))
3581 return 0;
3582 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3583 case 'Q':
3584 case 'R':
3585 case 'S':
3586 case 'T':
3587 if (GET_CODE (op) != MEM)
3588 return 0;
3589 return s390_check_qrst_address (c, XEXP (op, 0), true);
3590 case 'Y':
3591 /* Simply check for the basic form of a shift count. Reload will
3592 take care of making sure we have a proper base register. */
3593 if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3594 return 0;
3595 break;
3596 case 'Z':
3597 return s390_check_qrst_address (str[1], op, true);
3598 default:
3599 return 0;
3601 return 1;
3605 /* Evaluates constraint strings starting with letter O. Input
3606 parameter C is the second letter following the "O" in the constraint
3607 string. Returns 1 if VALUE meets the respective constraint and 0
3608 otherwise. */
3611 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3613 if (!TARGET_EXTIMM)
3614 return 0;
3616 switch (c)
3618 case 's':
3619 return trunc_int_for_mode (value, SImode) == value;
3621 case 'p':
3622 return value == 0
3623 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3625 case 'n':
3626 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3628 default:
3629 gcc_unreachable ();
3634 /* Evaluates constraint strings starting with letter N. Parameter STR
3635 contains the letters following letter "N" in the constraint string.
3636 Returns true if VALUE matches the constraint. */
3639 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3641 machine_mode mode, part_mode;
3642 int def;
3643 int part, part_goal;
3646 if (str[0] == 'x')
3647 part_goal = -1;
3648 else
3649 part_goal = str[0] - '0';
3651 switch (str[1])
3653 case 'Q':
3654 part_mode = QImode;
3655 break;
3656 case 'H':
3657 part_mode = HImode;
3658 break;
3659 case 'S':
3660 part_mode = SImode;
3661 break;
3662 default:
3663 return 0;
3666 switch (str[2])
3668 case 'H':
3669 mode = HImode;
3670 break;
3671 case 'S':
3672 mode = SImode;
3673 break;
3674 case 'D':
3675 mode = DImode;
3676 break;
3677 default:
3678 return 0;
3681 switch (str[3])
3683 case '0':
3684 def = 0;
3685 break;
3686 case 'F':
3687 def = -1;
3688 break;
3689 default:
3690 return 0;
3693 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3694 return 0;
3696 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3697 if (part < 0)
3698 return 0;
3699 if (part_goal != -1 && part_goal != part)
3700 return 0;
3702 return 1;
3706 /* Returns true if the input parameter VALUE is a float zero. */
3709 s390_float_const_zero_p (rtx value)
3711 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3712 && value == CONST0_RTX (GET_MODE (value)));
3715 /* Implement TARGET_REGISTER_MOVE_COST. */
3717 static int
3718 s390_register_move_cost (machine_mode mode,
3719 reg_class_t from, reg_class_t to)
3721 /* On s390, copy between fprs and gprs is expensive. */
3723 /* It becomes somewhat faster having ldgr/lgdr. */
3724 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3726 /* ldgr is single cycle. */
3727 if (reg_classes_intersect_p (from, GENERAL_REGS)
3728 && reg_classes_intersect_p (to, FP_REGS))
3729 return 1;
3730 /* lgdr needs 3 cycles. */
3731 if (reg_classes_intersect_p (to, GENERAL_REGS)
3732 && reg_classes_intersect_p (from, FP_REGS))
3733 return 3;
3736 /* Otherwise copying is done via memory. */
3737 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3738 && reg_classes_intersect_p (to, FP_REGS))
3739 || (reg_classes_intersect_p (from, FP_REGS)
3740 && reg_classes_intersect_p (to, GENERAL_REGS)))
3741 return 10;
3743 /* We usually do not want to copy via CC. */
3744 if (reg_classes_intersect_p (from, CC_REGS)
3745 || reg_classes_intersect_p (to, CC_REGS))
3746 return 5;
3748 return 1;
3751 /* Implement TARGET_MEMORY_MOVE_COST. */
3753 static int
3754 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3755 reg_class_t rclass ATTRIBUTE_UNUSED,
3756 bool in ATTRIBUTE_UNUSED)
3758 return 2;
3761 /* Compute a (partial) cost for rtx X. Return true if the complete
3762 cost has been computed, and false if subexpressions should be
3763 scanned. In either case, *TOTAL contains the cost result. The
3764 initial value of *TOTAL is the default value computed by
3765 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3766 code of the superexpression of x. */
3768 static bool
3769 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3770 int opno ATTRIBUTE_UNUSED,
3771 int *total, bool speed ATTRIBUTE_UNUSED)
3773 int code = GET_CODE (x);
3774 switch (code)
3776 case CONST:
3777 case CONST_INT:
3778 case LABEL_REF:
3779 case SYMBOL_REF:
3780 case CONST_DOUBLE:
3781 case CONST_WIDE_INT:
3782 case MEM:
3783 *total = 0;
3784 return true;
3785 case SET: {
3786 rtx dst = SET_DEST (x);
3787 rtx src = SET_SRC (x);
3789 switch (GET_CODE (src))
3791 case IF_THEN_ELSE: {
3792 /* Without this a conditional move instruction would be
3793 accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3794 comparison operator). That's a bit pessimistic. */
3796 if (!TARGET_Z196)
3797 return false;
3799 rtx cond = XEXP (src, 0);
3800 if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1)))
3801 return false;
3803 /* It is going to be a load/store on condition. Make it
3804 slightly more expensive than a normal load. */
3805 *total = COSTS_N_INSNS (1) + 2;
3807 rtx then = XEXP (src, 1);
3808 rtx els = XEXP (src, 2);
3810 /* It is a real IF-THEN-ELSE. An additional move will be
3811 needed to implement that. */
3812 if (!TARGET_Z15 && reload_completed && !rtx_equal_p (dst, then)
3813 && !rtx_equal_p (dst, els))
3814 *total += COSTS_N_INSNS (1) / 2;
3816 /* A minor penalty for constants we cannot directly handle. */
3817 if ((CONST_INT_P (then) || CONST_INT_P (els))
3818 && (!TARGET_Z13 || MEM_P (dst)
3819 || (CONST_INT_P (then) && !satisfies_constraint_K (then))
3820 || (CONST_INT_P (els) && !satisfies_constraint_K (els))))
3821 *total += COSTS_N_INSNS (1) / 2;
3823 /* A store on condition can only handle register src operands. */
3824 if (MEM_P (dst) && (!REG_P (then) || !REG_P (els)))
3825 *total += COSTS_N_INSNS (1) / 2;
3827 return true;
3829 default:
3830 break;
3833 switch (GET_CODE (dst))
3835 case SUBREG:
3836 if (!REG_P (SUBREG_REG (dst)))
3837 *total += rtx_cost (SUBREG_REG (src), VOIDmode, SET, 0, speed);
3838 /* fallthrough */
3839 case REG:
3840 /* If this is a VR -> VR copy, count the number of
3841 registers. */
3842 if (VECTOR_MODE_P (GET_MODE (dst)) && REG_P (src))
3844 int nregs = s390_hard_regno_nregs (VR0_REGNUM, GET_MODE (dst));
3845 *total = COSTS_N_INSNS (nregs);
3847 /* Same for GPRs. */
3848 else if (REG_P (src))
3850 int nregs
3851 = s390_hard_regno_nregs (GPR0_REGNUM, GET_MODE (dst));
3852 *total = COSTS_N_INSNS (nregs);
3854 else
3855 /* Otherwise just cost the src. */
3856 *total += rtx_cost (src, mode, SET, 1, speed);
3857 return true;
3858 case MEM: {
3859 rtx address = XEXP (dst, 0);
3860 rtx tmp;
3861 HOST_WIDE_INT tmp2;
3862 if (s390_loadrelative_operand_p (address, &tmp, &tmp2))
3863 *total = COSTS_N_INSNS (1);
3864 else
3865 *total = s390_address_cost (address, mode, 0, speed);
3866 return true;
3868 default:
3869 /* Not handled for now, assume default costs. */
3870 *total = COSTS_N_INSNS (1);
3871 return false;
3874 return false;
3876 case IOR:
3878 /* nnrk, nngrk */
3879 if (TARGET_Z15
3880 && (mode == SImode || mode == DImode)
3881 && GET_CODE (XEXP (x, 0)) == NOT
3882 && GET_CODE (XEXP (x, 1)) == NOT)
3884 *total = COSTS_N_INSNS (1);
3885 if (!REG_P (XEXP (XEXP (x, 0), 0)))
3886 *total += 1;
3887 if (!REG_P (XEXP (XEXP (x, 1), 0)))
3888 *total += 1;
3889 return true;
3892 /* risbg */
3893 if (GET_CODE (XEXP (x, 0)) == AND
3894 && GET_CODE (XEXP (x, 1)) == ASHIFT
3895 && REG_P (XEXP (XEXP (x, 0), 0))
3896 && REG_P (XEXP (XEXP (x, 1), 0))
3897 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3898 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3899 && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3900 (HOST_WIDE_INT_1U << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3902 *total = COSTS_N_INSNS (2);
3903 return true;
3906 /* ~AND on a 128 bit mode. This can be done using a vector
3907 instruction. */
3908 if (TARGET_VXE
3909 && GET_CODE (XEXP (x, 0)) == NOT
3910 && GET_CODE (XEXP (x, 1)) == NOT
3911 && REG_P (XEXP (XEXP (x, 0), 0))
3912 && REG_P (XEXP (XEXP (x, 1), 0))
3913 && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 0), 0))) == 16
3914 && s390_hard_regno_mode_ok (VR0_REGNUM,
3915 GET_MODE (XEXP (XEXP (x, 0), 0))))
3917 *total = COSTS_N_INSNS (1);
3918 return true;
3921 *total = COSTS_N_INSNS (1);
3922 return false;
3924 case AND:
3925 /* nork, nogrk */
3926 if (TARGET_Z15
3927 && (mode == SImode || mode == DImode)
3928 && GET_CODE (XEXP (x, 0)) == NOT
3929 && GET_CODE (XEXP (x, 1)) == NOT)
3931 *total = COSTS_N_INSNS (1);
3932 if (!REG_P (XEXP (XEXP (x, 0), 0)))
3933 *total += 1;
3934 if (!REG_P (XEXP (XEXP (x, 1), 0)))
3935 *total += 1;
3936 return true;
3938 /* fallthrough */
3939 case ASHIFT:
3940 case ASHIFTRT:
3941 case LSHIFTRT:
3942 case ROTATE:
3943 case ROTATERT:
3944 case XOR:
3945 case NEG:
3946 case NOT:
3947 case PLUS:
3948 case MINUS:
3949 *total = COSTS_N_INSNS (1);
3950 return false;
3952 case MULT:
3953 switch (mode)
3955 case E_SImode:
3957 rtx left = XEXP (x, 0);
3958 rtx right = XEXP (x, 1);
3959 if (GET_CODE (right) == CONST_INT
3960 && CONST_OK_FOR_K (INTVAL (right)))
3961 *total = s390_cost->mhi;
3962 else if (GET_CODE (left) == SIGN_EXTEND)
3963 *total = s390_cost->mh;
3964 else
3965 *total = s390_cost->ms; /* msr, ms, msy */
3966 break;
3968 case E_DImode:
3970 rtx left = XEXP (x, 0);
3971 rtx right = XEXP (x, 1);
3972 if (TARGET_ZARCH)
3974 if (GET_CODE (right) == CONST_INT
3975 && CONST_OK_FOR_K (INTVAL (right)))
3976 *total = s390_cost->mghi;
3977 else if (GET_CODE (left) == SIGN_EXTEND)
3978 *total = s390_cost->msgf;
3979 else
3980 *total = s390_cost->msg; /* msgr, msg */
3982 else /* TARGET_31BIT */
3984 if (GET_CODE (left) == SIGN_EXTEND
3985 && GET_CODE (right) == SIGN_EXTEND)
3986 /* mulsidi case: mr, m */
3987 *total = s390_cost->m;
3988 else if (GET_CODE (left) == ZERO_EXTEND
3989 && GET_CODE (right) == ZERO_EXTEND)
3990 /* umulsidi case: ml, mlr */
3991 *total = s390_cost->ml;
3992 else
3993 /* Complex calculation is required. */
3994 *total = COSTS_N_INSNS (40);
3996 break;
3998 case E_SFmode:
3999 case E_DFmode:
4000 *total = s390_cost->mult_df;
4001 break;
4002 case E_TFmode:
4003 *total = s390_cost->mxbr;
4004 break;
4005 default:
4006 return false;
4008 return false;
4010 case FMA:
4011 switch (mode)
4013 case E_DFmode:
4014 *total = s390_cost->madbr;
4015 break;
4016 case E_SFmode:
4017 *total = s390_cost->maebr;
4018 break;
4019 default:
4020 return false;
4022 /* Negate in the third argument is free: FMSUB. */
4023 if (GET_CODE (XEXP (x, 2)) == NEG)
4025 *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
4026 + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
4027 + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
4028 return true;
4030 return false;
4032 case UDIV:
4033 case UMOD:
4034 if (mode == TImode) /* 128 bit division */
4035 *total = s390_cost->dlgr;
4036 else if (mode == DImode)
4038 rtx right = XEXP (x, 1);
4039 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
4040 *total = s390_cost->dlr;
4041 else /* 64 by 64 bit division */
4042 *total = s390_cost->dlgr;
4044 else if (mode == SImode) /* 32 bit division */
4045 *total = s390_cost->dlr;
4046 return false;
4048 case DIV:
4049 case MOD:
4050 if (mode == DImode)
4052 rtx right = XEXP (x, 1);
4053 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
4054 if (TARGET_ZARCH)
4055 *total = s390_cost->dsgfr;
4056 else
4057 *total = s390_cost->dr;
4058 else /* 64 by 64 bit division */
4059 *total = s390_cost->dsgr;
4061 else if (mode == SImode) /* 32 bit division */
4062 *total = s390_cost->dlr;
4063 else if (mode == SFmode)
4065 *total = s390_cost->debr;
4067 else if (mode == DFmode)
4069 *total = s390_cost->ddbr;
4071 else if (mode == TFmode)
4073 *total = s390_cost->dxbr;
4075 return false;
4077 case SQRT:
4078 if (mode == SFmode)
4079 *total = s390_cost->sqebr;
4080 else if (mode == DFmode)
4081 *total = s390_cost->sqdbr;
4082 else /* TFmode */
4083 *total = s390_cost->sqxbr;
4084 return false;
4086 case SIGN_EXTEND:
4087 case ZERO_EXTEND:
4088 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
4089 || outer_code == PLUS || outer_code == MINUS
4090 || outer_code == COMPARE)
4091 *total = 0;
4092 return false;
4094 case COMPARE:
4095 *total = COSTS_N_INSNS (1);
4097 /* nxrk, nxgrk ~(a^b)==0 */
4098 if (TARGET_Z15
4099 && GET_CODE (XEXP (x, 0)) == NOT
4100 && XEXP (x, 1) == const0_rtx
4101 && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
4102 && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode)
4103 && mode == CCZmode)
4105 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0)))
4106 *total += 1;
4107 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
4108 *total += 1;
4109 return true;
4112 /* nnrk, nngrk, nork, nogrk */
4113 if (TARGET_Z15
4114 && (GET_CODE (XEXP (x, 0)) == AND || GET_CODE (XEXP (x, 0)) == IOR)
4115 && XEXP (x, 1) == const0_rtx
4116 && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode)
4117 && GET_CODE (XEXP (XEXP (x, 0), 0)) == NOT
4118 && GET_CODE (XEXP (XEXP (x, 0), 1)) == NOT
4119 && mode == CCZmode)
4121 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0)))
4122 *total += 1;
4123 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 1), 0)))
4124 *total += 1;
4125 return true;
4128 if (GET_CODE (XEXP (x, 0)) == AND
4129 && GET_CODE (XEXP (x, 1)) == CONST_INT
4130 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
4132 rtx op0 = XEXP (XEXP (x, 0), 0);
4133 rtx op1 = XEXP (XEXP (x, 0), 1);
4134 rtx op2 = XEXP (x, 1);
4136 if (memory_operand (op0, GET_MODE (op0))
4137 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
4138 return true;
4139 if (register_operand (op0, GET_MODE (op0))
4140 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
4141 return true;
4143 return false;
4145 default:
4146 return false;
4150 /* Return the cost of an address rtx ADDR. */
4152 static int
4153 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
4154 addr_space_t as ATTRIBUTE_UNUSED,
4155 bool speed ATTRIBUTE_UNUSED)
4157 struct s390_address ad;
4158 if (!s390_decompose_address (addr, &ad))
4159 return 1000;
4161 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
4164 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4165 static int
4166 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4167 tree vectype,
4168 int misalign ATTRIBUTE_UNUSED)
4170 switch (type_of_cost)
4172 case scalar_stmt:
4173 case scalar_load:
4174 case scalar_store:
4175 case vector_stmt:
4176 case vector_load:
4177 case vector_store:
4178 case vector_gather_load:
4179 case vector_scatter_store:
4180 case vec_to_scalar:
4181 case scalar_to_vec:
4182 case cond_branch_not_taken:
4183 case vec_perm:
4184 case vec_promote_demote:
4185 case unaligned_load:
4186 case unaligned_store:
4187 return 1;
4189 case cond_branch_taken:
4190 return 3;
4192 case vec_construct:
4193 return TYPE_VECTOR_SUBPARTS (vectype) - 1;
4195 default:
4196 gcc_unreachable ();
4200 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
4201 otherwise return 0. */
4204 tls_symbolic_operand (rtx op)
4206 if (GET_CODE (op) != SYMBOL_REF)
4207 return 0;
4208 return SYMBOL_REF_TLS_MODEL (op);
4211 /* Split DImode access register reference REG (on 64-bit) into its constituent
4212 low and high parts, and store them into LO and HI. Note that gen_lowpart/
4213 gen_highpart cannot be used as they assume all registers are word-sized,
4214 while our access registers have only half that size. */
4216 void
4217 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
4219 gcc_assert (TARGET_64BIT);
4220 gcc_assert (ACCESS_REG_P (reg));
4221 gcc_assert (GET_MODE (reg) == DImode);
4222 gcc_assert (!(REGNO (reg) & 1));
4224 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
4225 *hi = gen_rtx_REG (SImode, REGNO (reg));
4228 /* Return true if OP contains a symbol reference */
4230 bool
4231 symbolic_reference_mentioned_p (rtx op)
4233 const char *fmt;
4234 int i;
4236 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4237 return 1;
4239 fmt = GET_RTX_FORMAT (GET_CODE (op));
4240 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4242 if (fmt[i] == 'E')
4244 int j;
4246 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4247 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4248 return 1;
4251 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4252 return 1;
4255 return 0;
4258 /* Return true if OP contains a reference to a thread-local symbol. */
4260 bool
4261 tls_symbolic_reference_mentioned_p (rtx op)
4263 const char *fmt;
4264 int i;
4266 if (GET_CODE (op) == SYMBOL_REF)
4267 return tls_symbolic_operand (op);
4269 fmt = GET_RTX_FORMAT (GET_CODE (op));
4270 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4272 if (fmt[i] == 'E')
4274 int j;
4276 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4277 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4278 return true;
4281 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
4282 return true;
4285 return false;
4289 /* Return true if OP is a legitimate general operand when
4290 generating PIC code. It is given that flag_pic is on
4291 and that OP satisfies CONSTANT_P. */
4294 legitimate_pic_operand_p (rtx op)
4296 /* Accept all non-symbolic constants. */
4297 if (!SYMBOLIC_CONST (op))
4298 return 1;
4300 /* Accept addresses that can be expressed relative to (pc). */
4301 if (larl_operand (op, VOIDmode))
4302 return 1;
4304 /* Reject everything else; must be handled
4305 via emit_symbolic_move. */
4306 return 0;
4309 /* Returns true if the constant value OP is a legitimate general operand.
4310 It is given that OP satisfies CONSTANT_P. */
4312 static bool
4313 s390_legitimate_constant_p (machine_mode mode, rtx op)
4315 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
4317 if (GET_MODE_SIZE (mode) != 16)
4318 return 0;
4320 if (!satisfies_constraint_j00 (op)
4321 && !satisfies_constraint_jm1 (op)
4322 && !satisfies_constraint_jKK (op)
4323 && !satisfies_constraint_jxx (op)
4324 && !satisfies_constraint_jyy (op))
4325 return 0;
4328 /* Accept all non-symbolic constants. */
4329 if (!SYMBOLIC_CONST (op))
4330 return 1;
4332 /* Accept immediate LARL operands. */
4333 if (larl_operand (op, mode))
4334 return 1;
4336 /* Thread-local symbols are never legal constants. This is
4337 so that emit_call knows that computing such addresses
4338 might require a function call. */
4339 if (TLS_SYMBOLIC_CONST (op))
4340 return 0;
4342 /* In the PIC case, symbolic constants must *not* be
4343 forced into the literal pool. We accept them here,
4344 so that they will be handled by emit_symbolic_move. */
4345 if (flag_pic)
4346 return 1;
4348 /* All remaining non-PIC symbolic constants are
4349 forced into the literal pool. */
4350 return 0;
4353 /* Determine if it's legal to put X into the constant pool. This
4354 is not possible if X contains the address of a symbol that is
4355 not constant (TLS) or not known at final link time (PIC). */
4357 static bool
4358 s390_cannot_force_const_mem (machine_mode mode, rtx x)
4360 switch (GET_CODE (x))
4362 case CONST_INT:
4363 case CONST_DOUBLE:
4364 case CONST_WIDE_INT:
4365 case CONST_VECTOR:
4366 /* Accept all non-symbolic constants. */
4367 return false;
4369 case NEG:
4370 /* Accept an unary '-' only on scalar numeric constants. */
4371 switch (GET_CODE (XEXP (x, 0)))
4373 case CONST_INT:
4374 case CONST_DOUBLE:
4375 case CONST_WIDE_INT:
4376 return false;
4377 default:
4378 return true;
4381 case LABEL_REF:
4382 /* Labels are OK iff we are non-PIC. */
4383 return flag_pic != 0;
4385 case SYMBOL_REF:
4386 /* 'Naked' TLS symbol references are never OK,
4387 non-TLS symbols are OK iff we are non-PIC. */
4388 if (tls_symbolic_operand (x))
4389 return true;
4390 else
4391 return flag_pic != 0;
4393 case CONST:
4394 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
4395 case PLUS:
4396 case MINUS:
4397 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
4398 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
4400 case UNSPEC:
4401 switch (XINT (x, 1))
4403 /* Only lt-relative or GOT-relative UNSPECs are OK. */
4404 case UNSPEC_LTREL_OFFSET:
4405 case UNSPEC_GOT:
4406 case UNSPEC_GOTOFF:
4407 case UNSPEC_PLTOFF:
4408 case UNSPEC_TLSGD:
4409 case UNSPEC_TLSLDM:
4410 case UNSPEC_NTPOFF:
4411 case UNSPEC_DTPOFF:
4412 case UNSPEC_GOTNTPOFF:
4413 case UNSPEC_INDNTPOFF:
4414 return false;
4416 /* If the literal pool shares the code section, be put
4417 execute template placeholders into the pool as well. */
4418 case UNSPEC_INSN:
4419 default:
4420 return true;
4422 break;
4424 default:
4425 gcc_unreachable ();
4429 /* Returns true if the constant value OP is a legitimate general
4430 operand during and after reload. The difference to
4431 legitimate_constant_p is that this function will not accept
4432 a constant that would need to be forced to the literal pool
4433 before it can be used as operand.
4434 This function accepts all constants which can be loaded directly
4435 into a GPR. */
4437 bool
4438 legitimate_reload_constant_p (rtx op)
4440 /* Accept la(y) operands. */
4441 if (GET_CODE (op) == CONST_INT
4442 && DISP_IN_RANGE (INTVAL (op)))
4443 return true;
4445 /* Accept l(g)hi/l(g)fi operands. */
4446 if (GET_CODE (op) == CONST_INT
4447 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
4448 return true;
4450 /* Accept lliXX operands. */
4451 if (TARGET_ZARCH
4452 && GET_CODE (op) == CONST_INT
4453 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4454 && s390_single_part (op, word_mode, HImode, 0) >= 0)
4455 return true;
4457 if (TARGET_EXTIMM
4458 && GET_CODE (op) == CONST_INT
4459 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4460 && s390_single_part (op, word_mode, SImode, 0) >= 0)
4461 return true;
4463 /* Accept larl operands. */
4464 if (larl_operand (op, VOIDmode))
4465 return true;
4467 /* Accept floating-point zero operands that fit into a single GPR. */
4468 if (GET_CODE (op) == CONST_DOUBLE
4469 && s390_float_const_zero_p (op)
4470 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
4471 return true;
4473 /* Accept double-word operands that can be split. */
4474 if (GET_CODE (op) == CONST_WIDE_INT
4475 || (GET_CODE (op) == CONST_INT
4476 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
4478 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
4479 rtx hi = operand_subword (op, 0, 0, dword_mode);
4480 rtx lo = operand_subword (op, 1, 0, dword_mode);
4481 return legitimate_reload_constant_p (hi)
4482 && legitimate_reload_constant_p (lo);
4485 /* Everything else cannot be handled without reload. */
4486 return false;
4489 /* Returns true if the constant value OP is a legitimate fp operand
4490 during and after reload.
4491 This function accepts all constants which can be loaded directly
4492 into an FPR. */
4494 static bool
4495 legitimate_reload_fp_constant_p (rtx op)
4497 /* Accept floating-point zero operands if the load zero instruction
4498 can be used. Prior to z196 the load fp zero instruction caused a
4499 performance penalty if the result is used as BFP number. */
4500 if (TARGET_Z196
4501 && GET_CODE (op) == CONST_DOUBLE
4502 && s390_float_const_zero_p (op))
4503 return true;
4505 return false;
4508 /* Returns true if the constant value OP is a legitimate vector operand
4509 during and after reload.
4510 This function accepts all constants which can be loaded directly
4511 into an VR. */
4513 static bool
4514 legitimate_reload_vector_constant_p (rtx op)
4516 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
4517 && (satisfies_constraint_j00 (op)
4518 || satisfies_constraint_jm1 (op)
4519 || satisfies_constraint_jKK (op)
4520 || satisfies_constraint_jxx (op)
4521 || satisfies_constraint_jyy (op)))
4522 return true;
4524 return false;
4527 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4528 return the class of reg to actually use. */
4530 static reg_class_t
4531 s390_preferred_reload_class (rtx op, reg_class_t rclass)
4533 switch (GET_CODE (op))
4535 /* Constants we cannot reload into general registers
4536 must be forced into the literal pool. */
4537 case CONST_VECTOR:
4538 case CONST_DOUBLE:
4539 case CONST_INT:
4540 case CONST_WIDE_INT:
4541 if (reg_class_subset_p (GENERAL_REGS, rclass)
4542 && legitimate_reload_constant_p (op))
4543 return GENERAL_REGS;
4544 else if (reg_class_subset_p (ADDR_REGS, rclass)
4545 && legitimate_reload_constant_p (op))
4546 return ADDR_REGS;
4547 else if (reg_class_subset_p (FP_REGS, rclass)
4548 && legitimate_reload_fp_constant_p (op))
4549 return FP_REGS;
4550 else if (reg_class_subset_p (VEC_REGS, rclass)
4551 && legitimate_reload_vector_constant_p (op))
4552 return VEC_REGS;
4554 return NO_REGS;
4556 /* If a symbolic constant or a PLUS is reloaded,
4557 it is most likely being used as an address, so
4558 prefer ADDR_REGS. If 'class' is not a superset
4559 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4560 case CONST:
4561 /* Symrefs cannot be pushed into the literal pool with -fPIC
4562 so we *MUST NOT* return NO_REGS for these cases
4563 (s390_cannot_force_const_mem will return true).
4565 On the other hand we MUST return NO_REGS for symrefs with
4566 invalid addend which might have been pushed to the literal
4567 pool (no -fPIC). Usually we would expect them to be
4568 handled via secondary reload but this does not happen if
4569 they are used as literal pool slot replacement in reload
4570 inheritance (see emit_input_reload_insns). */
4571 if (GET_CODE (XEXP (op, 0)) == PLUS
4572 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4573 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4575 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4576 return ADDR_REGS;
4577 else
4578 return NO_REGS;
4580 /* fallthrough */
4581 case LABEL_REF:
4582 case SYMBOL_REF:
4583 if (!legitimate_reload_constant_p (op))
4584 return NO_REGS;
4585 /* fallthrough */
4586 case PLUS:
4587 /* load address will be used. */
4588 if (reg_class_subset_p (ADDR_REGS, rclass))
4589 return ADDR_REGS;
4590 else
4591 return NO_REGS;
4593 default:
4594 break;
4597 return rclass;
4600 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4601 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4602 aligned. */
4604 bool
4605 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4607 HOST_WIDE_INT addend;
4608 rtx symref;
4610 /* The "required alignment" might be 0 (e.g. for certain structs
4611 accessed via BLKmode). Early abort in this case, as well as when
4612 an alignment > 8 is required. */
4613 if (alignment < 2 || alignment > 8)
4614 return false;
4616 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4617 return false;
4619 if (addend & (alignment - 1))
4620 return false;
4622 if (GET_CODE (symref) == SYMBOL_REF)
4624 /* s390_encode_section_info is not called for anchors, since they don't
4625 have corresponding VAR_DECLs. Therefore, we cannot rely on
4626 SYMBOL_FLAG_NOTALIGN{2,4,8}_P returning useful information. */
4627 if (SYMBOL_REF_ANCHOR_P (symref))
4629 HOST_WIDE_INT block_offset = SYMBOL_REF_BLOCK_OFFSET (symref);
4630 unsigned int block_alignment = (SYMBOL_REF_BLOCK (symref)->alignment
4631 / BITS_PER_UNIT);
4633 gcc_assert (block_offset >= 0);
4634 return ((block_offset & (alignment - 1)) == 0
4635 && block_alignment >= alignment);
4638 /* We have load-relative instructions for 2-byte, 4-byte, and
4639 8-byte alignment so allow only these. */
4640 switch (alignment)
4642 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4643 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4644 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4645 default: return false;
4649 if (GET_CODE (symref) == UNSPEC
4650 && alignment <= UNITS_PER_LONG)
4651 return true;
4653 return false;
4656 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4657 operand SCRATCH is used to reload the even part of the address and
4658 adding one. */
4660 void
4661 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4663 HOST_WIDE_INT addend;
4664 rtx symref;
4666 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4667 gcc_unreachable ();
4669 if (!(addend & 1))
4670 /* Easy case. The addend is even so larl will do fine. */
4671 emit_move_insn (reg, addr);
4672 else
4674 /* We can leave the scratch register untouched if the target
4675 register is a valid base register. */
4676 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4677 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4678 scratch = reg;
4680 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4681 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4683 if (addend != 1)
4684 emit_move_insn (scratch,
4685 gen_rtx_CONST (Pmode,
4686 gen_rtx_PLUS (Pmode, symref,
4687 GEN_INT (addend - 1))));
4688 else
4689 emit_move_insn (scratch, symref);
4691 /* Increment the address using la in order to avoid clobbering cc. */
4692 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4696 /* Generate what is necessary to move between REG and MEM using
4697 SCRATCH. The direction is given by TOMEM. */
4699 void
4700 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4702 /* Reload might have pulled a constant out of the literal pool.
4703 Force it back in. */
4704 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4705 || GET_CODE (mem) == CONST_WIDE_INT
4706 || GET_CODE (mem) == CONST_VECTOR
4707 || GET_CODE (mem) == CONST)
4708 mem = force_const_mem (GET_MODE (reg), mem);
4710 gcc_assert (MEM_P (mem));
4712 /* For a load from memory we can leave the scratch register
4713 untouched if the target register is a valid base register. */
4714 if (!tomem
4715 && REGNO (reg) < FIRST_PSEUDO_REGISTER
4716 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4717 && GET_MODE (reg) == GET_MODE (scratch))
4718 scratch = reg;
4720 /* Load address into scratch register. Since we can't have a
4721 secondary reload for a secondary reload we have to cover the case
4722 where larl would need a secondary reload here as well. */
4723 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4725 /* Now we can use a standard load/store to do the move. */
4726 if (tomem)
4727 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4728 else
4729 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4732 /* Inform reload about cases where moving X with a mode MODE to a register in
4733 RCLASS requires an extra scratch or immediate register. Return the class
4734 needed for the immediate register. */
4736 static reg_class_t
4737 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4738 machine_mode mode, secondary_reload_info *sri)
4740 enum reg_class rclass = (enum reg_class) rclass_i;
4742 /* Intermediate register needed. */
4743 if (reg_classes_intersect_p (CC_REGS, rclass))
4744 return GENERAL_REGS;
4746 if (TARGET_VX)
4748 /* The vst/vl vector move instructions allow only for short
4749 displacements. */
4750 if (MEM_P (x)
4751 && GET_CODE (XEXP (x, 0)) == PLUS
4752 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4753 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4754 && reg_class_subset_p (rclass, VEC_REGS)
4755 && (!reg_class_subset_p (rclass, FP_REGS)
4756 || (GET_MODE_SIZE (mode) > 8
4757 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4759 if (in_p)
4760 sri->icode = (TARGET_64BIT ?
4761 CODE_FOR_reloaddi_la_in :
4762 CODE_FOR_reloadsi_la_in);
4763 else
4764 sri->icode = (TARGET_64BIT ?
4765 CODE_FOR_reloaddi_la_out :
4766 CODE_FOR_reloadsi_la_out);
4770 if (TARGET_Z10)
4772 HOST_WIDE_INT offset;
4773 rtx symref;
4775 /* On z10 several optimizer steps may generate larl operands with
4776 an odd addend. */
4777 if (in_p
4778 && s390_loadrelative_operand_p (x, &symref, &offset)
4779 && mode == Pmode
4780 && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4781 && (offset & 1) == 1)
4782 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4783 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4785 /* Handle all the (mem (symref)) accesses we cannot use the z10
4786 instructions for. */
4787 if (MEM_P (x)
4788 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4789 && (mode == QImode
4790 || !reg_class_subset_p (rclass, GENERAL_REGS)
4791 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4792 || !s390_check_symref_alignment (XEXP (x, 0),
4793 GET_MODE_SIZE (mode))))
4795 #define __SECONDARY_RELOAD_CASE(M,m) \
4796 case E_##M##mode: \
4797 if (TARGET_64BIT) \
4798 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4799 CODE_FOR_reload##m##di_tomem_z10; \
4800 else \
4801 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4802 CODE_FOR_reload##m##si_tomem_z10; \
4803 break;
4805 switch (GET_MODE (x))
4807 __SECONDARY_RELOAD_CASE (QI, qi);
4808 __SECONDARY_RELOAD_CASE (HI, hi);
4809 __SECONDARY_RELOAD_CASE (SI, si);
4810 __SECONDARY_RELOAD_CASE (DI, di);
4811 __SECONDARY_RELOAD_CASE (TI, ti);
4812 __SECONDARY_RELOAD_CASE (SF, sf);
4813 __SECONDARY_RELOAD_CASE (DF, df);
4814 __SECONDARY_RELOAD_CASE (TF, tf);
4815 __SECONDARY_RELOAD_CASE (SD, sd);
4816 __SECONDARY_RELOAD_CASE (DD, dd);
4817 __SECONDARY_RELOAD_CASE (TD, td);
4818 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4819 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4820 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4821 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4822 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4823 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4824 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4825 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4826 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4827 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4828 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4829 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4830 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4831 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4832 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4833 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4834 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4835 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4836 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4837 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4838 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4839 default:
4840 gcc_unreachable ();
4842 #undef __SECONDARY_RELOAD_CASE
4846 /* We need a scratch register when loading a PLUS expression which
4847 is not a legitimate operand of the LOAD ADDRESS instruction. */
4848 /* LRA can deal with transformation of plus op very well -- so we
4849 don't need to prompt LRA in this case. */
4850 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4851 sri->icode = (TARGET_64BIT ?
4852 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4854 /* Performing a multiword move from or to memory we have to make sure the
4855 second chunk in memory is addressable without causing a displacement
4856 overflow. If that would be the case we calculate the address in
4857 a scratch register. */
4858 if (MEM_P (x)
4859 && GET_CODE (XEXP (x, 0)) == PLUS
4860 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4861 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4862 + GET_MODE_SIZE (mode) - 1))
4864 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4865 in a s_operand address since we may fallback to lm/stm. So we only
4866 have to care about overflows in the b+i+d case. */
4867 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4868 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4869 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4870 /* For FP_REGS no lm/stm is available so this check is triggered
4871 for displacement overflows in b+i+d and b+d like addresses. */
4872 || (reg_classes_intersect_p (FP_REGS, rclass)
4873 && s390_class_max_nregs (FP_REGS, mode) > 1))
4875 if (in_p)
4876 sri->icode = (TARGET_64BIT ?
4877 CODE_FOR_reloaddi_la_in :
4878 CODE_FOR_reloadsi_la_in);
4879 else
4880 sri->icode = (TARGET_64BIT ?
4881 CODE_FOR_reloaddi_la_out :
4882 CODE_FOR_reloadsi_la_out);
4886 /* A scratch address register is needed when a symbolic constant is
4887 copied to r0 compiling with -fPIC. In other cases the target
4888 register might be used as temporary (see legitimize_pic_address). */
4889 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4890 sri->icode = (TARGET_64BIT ?
4891 CODE_FOR_reloaddi_PIC_addr :
4892 CODE_FOR_reloadsi_PIC_addr);
4894 /* Either scratch or no register needed. */
4895 return NO_REGS;
4898 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
4900 We need secondary memory to move data between GPRs and FPRs.
4902 - With DFP the ldgr lgdr instructions are available. Due to the
4903 different alignment we cannot use them for SFmode. For 31 bit a
4904 64 bit value in GPR would be a register pair so here we still
4905 need to go via memory.
4907 - With z13 we can do the SF/SImode moves with vlgvf. Due to the
4908 overlapping of FPRs and VRs we still disallow TF/TD modes to be
4909 in full VRs so as before also on z13 we do these moves via
4910 memory.
4912 FIXME: Should we try splitting it into two vlgvg's/vlvg's instead? */
4914 static bool
4915 s390_secondary_memory_needed (machine_mode mode,
4916 reg_class_t class1, reg_class_t class2)
4918 return (((reg_classes_intersect_p (class1, VEC_REGS)
4919 && reg_classes_intersect_p (class2, GENERAL_REGS))
4920 || (reg_classes_intersect_p (class1, GENERAL_REGS)
4921 && reg_classes_intersect_p (class2, VEC_REGS)))
4922 && (TARGET_TPF || !TARGET_DFP || !TARGET_64BIT
4923 || GET_MODE_SIZE (mode) != 8)
4924 && (!TARGET_VX || (SCALAR_FLOAT_MODE_P (mode)
4925 && GET_MODE_SIZE (mode) > 8)));
4928 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
4930 get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
4931 because the movsi and movsf patterns don't handle r/f moves. */
4933 static machine_mode
4934 s390_secondary_memory_needed_mode (machine_mode mode)
4936 if (GET_MODE_BITSIZE (mode) < 32)
4937 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
4938 return mode;
4941 /* Generate code to load SRC, which is PLUS that is not a
4942 legitimate operand for the LA instruction, into TARGET.
4943 SCRATCH may be used as scratch register. */
4945 void
4946 s390_expand_plus_operand (rtx target, rtx src,
4947 rtx scratch)
4949 rtx sum1, sum2;
4950 struct s390_address ad;
4952 /* src must be a PLUS; get its two operands. */
4953 gcc_assert (GET_CODE (src) == PLUS);
4954 gcc_assert (GET_MODE (src) == Pmode);
4956 /* Check if any of the two operands is already scheduled
4957 for replacement by reload. This can happen e.g. when
4958 float registers occur in an address. */
4959 sum1 = find_replacement (&XEXP (src, 0));
4960 sum2 = find_replacement (&XEXP (src, 1));
4961 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4963 /* If the address is already strictly valid, there's nothing to do. */
4964 if (!s390_decompose_address (src, &ad)
4965 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4966 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4968 /* Otherwise, one of the operands cannot be an address register;
4969 we reload its value into the scratch register. */
4970 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4972 emit_move_insn (scratch, sum1);
4973 sum1 = scratch;
4975 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4977 emit_move_insn (scratch, sum2);
4978 sum2 = scratch;
4981 /* According to the way these invalid addresses are generated
4982 in reload.cc, it should never happen (at least on s390) that
4983 *neither* of the PLUS components, after find_replacements
4984 was applied, is an address register. */
4985 if (sum1 == scratch && sum2 == scratch)
4987 debug_rtx (src);
4988 gcc_unreachable ();
4991 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4994 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4995 is only ever performed on addresses, so we can mark the
4996 sum as legitimate for LA in any case. */
4997 s390_load_address (target, src);
5001 /* Return true if ADDR is a valid memory address.
5002 STRICT specifies whether strict register checking applies. */
5004 static bool
5005 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict,
5006 code_helper = ERROR_MARK)
5008 struct s390_address ad;
5010 if (TARGET_Z10
5011 && larl_operand (addr, VOIDmode)
5012 && (mode == VOIDmode
5013 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
5014 return true;
5016 if (!s390_decompose_address (addr, &ad))
5017 return false;
5019 /* The vector memory instructions only support short displacements.
5020 Reject invalid displacements early to prevent plenty of lay
5021 instructions to be generated later which then cannot be merged
5022 properly. */
5023 if (TARGET_VX
5024 && VECTOR_MODE_P (mode)
5025 && ad.disp != NULL_RTX
5026 && CONST_INT_P (ad.disp)
5027 && !SHORT_DISP_IN_RANGE (INTVAL (ad.disp)))
5028 return false;
5030 if (strict)
5032 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5033 return false;
5035 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
5036 return false;
5038 else
5040 if (ad.base
5041 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
5042 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
5043 return false;
5045 if (ad.indx
5046 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
5047 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
5048 return false;
5050 return true;
5053 /* Return true if OP is a valid operand for the LA instruction.
5054 In 31-bit, we need to prove that the result is used as an
5055 address, as LA performs only a 31-bit addition. */
5057 bool
5058 legitimate_la_operand_p (rtx op)
5060 struct s390_address addr;
5061 if (!s390_decompose_address (op, &addr))
5062 return false;
5064 return (TARGET_64BIT || addr.pointer);
5067 /* Return true if it is valid *and* preferable to use LA to
5068 compute the sum of OP1 and OP2. */
5070 bool
5071 preferred_la_operand_p (rtx op1, rtx op2)
5073 struct s390_address addr;
5075 if (op2 != const0_rtx)
5076 op1 = gen_rtx_PLUS (Pmode, op1, op2);
5078 if (!s390_decompose_address (op1, &addr))
5079 return false;
5080 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
5081 return false;
5082 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
5083 return false;
5085 /* Avoid LA instructions with index (and base) register on z196 or
5086 later; it is preferable to use regular add instructions when
5087 possible. Starting with zEC12 the la with index register is
5088 "uncracked" again but still slower than a regular add. */
5089 if (addr.indx && s390_tune >= PROCESSOR_2817_Z196)
5090 return false;
5092 if (!TARGET_64BIT && !addr.pointer)
5093 return false;
5095 if (addr.pointer)
5096 return true;
5098 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
5099 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
5100 return true;
5102 return false;
5105 /* Emit a forced load-address operation to load SRC into DST.
5106 This will use the LOAD ADDRESS instruction even in situations
5107 where legitimate_la_operand_p (SRC) returns false. */
5109 void
5110 s390_load_address (rtx dst, rtx src)
5112 if (TARGET_64BIT)
5113 emit_move_insn (dst, src);
5114 else
5115 emit_insn (gen_force_la_31 (dst, src));
5118 /* Return true if it ok to use SYMBOL_REF in a relative address. */
5120 bool
5121 s390_rel_address_ok_p (rtx symbol_ref)
5123 tree decl;
5125 if (symbol_ref == s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref))
5126 return true;
5128 decl = SYMBOL_REF_DECL (symbol_ref);
5130 if (!flag_pic || SYMBOL_REF_LOCAL_P (symbol_ref))
5131 return (s390_pic_data_is_text_relative
5132 || (decl
5133 && TREE_CODE (decl) == FUNCTION_DECL));
5135 return false;
5138 /* Return a legitimate reference for ORIG (an address) using the
5139 register REG. If REG is 0, a new pseudo is generated.
5141 There are two types of references that must be handled:
5143 1. Global data references must load the address from the GOT, via
5144 the PIC reg. An insn is emitted to do this load, and the reg is
5145 returned.
5147 2. Static data references, constant pool addresses, and code labels
5148 compute the address as an offset from the GOT, whose base is in
5149 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5150 differentiate them from global data objects. The returned
5151 address is the PIC reg + an unspec constant.
5153 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
5154 reg also appears in the address. */
5157 legitimize_pic_address (rtx orig, rtx reg)
5159 rtx addr = orig;
5160 rtx addend = const0_rtx;
5161 rtx new_rtx = orig;
5163 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
5165 if (GET_CODE (addr) == CONST)
5166 addr = XEXP (addr, 0);
5168 if (GET_CODE (addr) == PLUS)
5170 addend = XEXP (addr, 1);
5171 addr = XEXP (addr, 0);
5174 if ((GET_CODE (addr) == LABEL_REF
5175 || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (addr))
5176 || (GET_CODE (addr) == UNSPEC &&
5177 (XINT (addr, 1) == UNSPEC_GOTENT
5178 || XINT (addr, 1) == UNSPEC_PLT31)))
5179 && GET_CODE (addend) == CONST_INT)
5181 /* This can be locally addressed. */
5183 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
5184 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
5185 gen_rtx_CONST (Pmode, addr) : addr);
5187 if (larl_operand (const_addr, VOIDmode)
5188 && INTVAL (addend) < HOST_WIDE_INT_1 << 31
5189 && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31))
5191 if (INTVAL (addend) & 1)
5193 /* LARL can't handle odd offsets, so emit a pair of LARL
5194 and LA. */
5195 rtx temp = reg? reg : gen_reg_rtx (Pmode);
5197 if (!DISP_IN_RANGE (INTVAL (addend)))
5199 HOST_WIDE_INT even = INTVAL (addend) - 1;
5200 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
5201 addr = gen_rtx_CONST (Pmode, addr);
5202 addend = const1_rtx;
5205 emit_move_insn (temp, addr);
5206 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
5208 if (reg != 0)
5210 s390_load_address (reg, new_rtx);
5211 new_rtx = reg;
5214 else
5216 /* If the offset is even, we can just use LARL. This
5217 will happen automatically. */
5220 else
5222 /* No larl - Access local symbols relative to the GOT. */
5224 rtx temp = reg? reg : gen_reg_rtx (Pmode);
5226 if (reload_in_progress || reload_completed)
5227 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5229 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5230 if (addend != const0_rtx)
5231 addr = gen_rtx_PLUS (Pmode, addr, addend);
5232 addr = gen_rtx_CONST (Pmode, addr);
5233 addr = force_const_mem (Pmode, addr);
5234 emit_move_insn (temp, addr);
5236 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
5237 if (reg != 0)
5239 s390_load_address (reg, new_rtx);
5240 new_rtx = reg;
5244 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
5246 /* A non-local symbol reference without addend.
5248 The symbol ref is wrapped into an UNSPEC to make sure the
5249 proper operand modifier (@GOT or @GOTENT) will be emitted.
5250 This will tell the linker to put the symbol into the GOT.
5252 Additionally the code dereferencing the GOT slot is emitted here.
5254 An addend to the symref needs to be added afterwards.
5255 legitimize_pic_address calls itself recursively to handle
5256 that case. So no need to do it here. */
5258 if (reg == 0)
5259 reg = gen_reg_rtx (Pmode);
5261 if (TARGET_Z10)
5263 /* Use load relative if possible.
5264 lgrl <target>, sym@GOTENT */
5265 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
5266 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5267 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
5269 emit_move_insn (reg, new_rtx);
5270 new_rtx = reg;
5272 else if (flag_pic == 1)
5274 /* Assume GOT offset is a valid displacement operand (< 4k
5275 or < 512k with z990). This is handled the same way in
5276 both 31- and 64-bit code (@GOT).
5277 lg <target>, sym@GOT(r12) */
5279 if (reload_in_progress || reload_completed)
5280 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5282 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5283 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5284 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5285 new_rtx = gen_const_mem (Pmode, new_rtx);
5286 emit_move_insn (reg, new_rtx);
5287 new_rtx = reg;
5289 else
5291 /* If the GOT offset might be >= 4k, we determine the position
5292 of the GOT entry via a PC-relative LARL (@GOTENT).
5293 larl temp, sym@GOTENT
5294 lg <target>, 0(temp) */
5296 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
5298 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
5299 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
5301 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
5302 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5303 emit_move_insn (temp, new_rtx);
5304 new_rtx = gen_const_mem (Pmode, temp);
5305 emit_move_insn (reg, new_rtx);
5307 new_rtx = reg;
5310 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
5312 gcc_assert (XVECLEN (addr, 0) == 1);
5313 switch (XINT (addr, 1))
5315 /* These address symbols (or PLT slots) relative to the GOT
5316 (not GOT slots!). In general this will exceed the
5317 displacement range so these value belong into the literal
5318 pool. */
5319 case UNSPEC_GOTOFF:
5320 case UNSPEC_PLTOFF:
5321 new_rtx = force_const_mem (Pmode, orig);
5322 break;
5324 /* For -fPIC the GOT size might exceed the displacement
5325 range so make sure the value is in the literal pool. */
5326 case UNSPEC_GOT:
5327 if (flag_pic == 2)
5328 new_rtx = force_const_mem (Pmode, orig);
5329 break;
5331 /* For @GOTENT larl is used. This is handled like local
5332 symbol refs. */
5333 case UNSPEC_GOTENT:
5334 gcc_unreachable ();
5335 break;
5337 /* For @PLT larl is used. This is handled like local
5338 symbol refs. */
5339 case UNSPEC_PLT31:
5340 gcc_unreachable ();
5341 break;
5343 /* Everything else cannot happen. */
5344 default:
5345 gcc_unreachable ();
5348 else if (addend != const0_rtx)
5350 /* Otherwise, compute the sum. */
5352 rtx base = legitimize_pic_address (addr, reg);
5353 new_rtx = legitimize_pic_address (addend,
5354 base == reg ? NULL_RTX : reg);
5355 if (GET_CODE (new_rtx) == CONST_INT)
5356 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
5357 else
5359 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
5361 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
5362 new_rtx = XEXP (new_rtx, 1);
5364 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
5367 if (GET_CODE (new_rtx) == CONST)
5368 new_rtx = XEXP (new_rtx, 0);
5369 new_rtx = force_operand (new_rtx, 0);
5372 return new_rtx;
5375 /* Load the thread pointer into a register. */
5378 s390_get_thread_pointer (void)
5380 rtx tp = gen_reg_rtx (Pmode);
5382 emit_insn (gen_get_thread_pointer (Pmode, tp));
5384 mark_reg_pointer (tp, BITS_PER_WORD);
5386 return tp;
5389 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
5390 in s390_tls_symbol which always refers to __tls_get_offset.
5391 The returned offset is written to RESULT_REG and an USE rtx is
5392 generated for TLS_CALL. */
5394 static GTY(()) rtx s390_tls_symbol;
5396 static void
5397 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
5399 rtx insn;
5401 if (!flag_pic)
5402 emit_insn (s390_load_got ());
5404 if (!s390_tls_symbol)
5406 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
5407 SYMBOL_REF_FLAGS (s390_tls_symbol) |= SYMBOL_FLAG_FUNCTION;
5410 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
5411 gen_rtx_REG (Pmode, RETURN_REGNUM));
5413 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
5414 RTL_CONST_CALL_P (insn) = 1;
5417 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
5418 this (thread-local) address. REG may be used as temporary. */
5420 static rtx
5421 legitimize_tls_address (rtx addr, rtx reg)
5423 rtx new_rtx, tls_call, temp, base, r2;
5424 rtx_insn *insn;
5426 if (GET_CODE (addr) == SYMBOL_REF)
5427 switch (tls_symbolic_operand (addr))
5429 case TLS_MODEL_GLOBAL_DYNAMIC:
5430 start_sequence ();
5431 r2 = gen_rtx_REG (Pmode, 2);
5432 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
5433 new_rtx = gen_rtx_CONST (Pmode, tls_call);
5434 new_rtx = force_const_mem (Pmode, new_rtx);
5435 emit_move_insn (r2, new_rtx);
5436 s390_emit_tls_call_insn (r2, tls_call);
5437 insn = get_insns ();
5438 end_sequence ();
5440 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5441 temp = gen_reg_rtx (Pmode);
5442 emit_libcall_block (insn, temp, r2, new_rtx);
5444 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5445 if (reg != 0)
5447 s390_load_address (reg, new_rtx);
5448 new_rtx = reg;
5450 break;
5452 case TLS_MODEL_LOCAL_DYNAMIC:
5453 start_sequence ();
5454 r2 = gen_rtx_REG (Pmode, 2);
5455 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
5456 new_rtx = gen_rtx_CONST (Pmode, tls_call);
5457 new_rtx = force_const_mem (Pmode, new_rtx);
5458 emit_move_insn (r2, new_rtx);
5459 s390_emit_tls_call_insn (r2, tls_call);
5460 insn = get_insns ();
5461 end_sequence ();
5463 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
5464 temp = gen_reg_rtx (Pmode);
5465 emit_libcall_block (insn, temp, r2, new_rtx);
5467 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5468 base = gen_reg_rtx (Pmode);
5469 s390_load_address (base, new_rtx);
5471 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
5472 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5473 new_rtx = force_const_mem (Pmode, new_rtx);
5474 temp = gen_reg_rtx (Pmode);
5475 emit_move_insn (temp, new_rtx);
5477 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
5478 if (reg != 0)
5480 s390_load_address (reg, new_rtx);
5481 new_rtx = reg;
5483 break;
5485 case TLS_MODEL_INITIAL_EXEC:
5486 if (flag_pic == 1)
5488 /* Assume GOT offset < 4k. This is handled the same way
5489 in both 31- and 64-bit code. */
5491 if (reload_in_progress || reload_completed)
5492 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5494 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5495 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5496 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5497 new_rtx = gen_const_mem (Pmode, new_rtx);
5498 temp = gen_reg_rtx (Pmode);
5499 emit_move_insn (temp, new_rtx);
5501 else
5503 /* If the GOT offset might be >= 4k, we determine the position
5504 of the GOT entry via a PC-relative LARL. */
5506 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5507 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5508 temp = gen_reg_rtx (Pmode);
5509 emit_move_insn (temp, new_rtx);
5511 new_rtx = gen_const_mem (Pmode, temp);
5512 temp = gen_reg_rtx (Pmode);
5513 emit_move_insn (temp, new_rtx);
5516 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5517 if (reg != 0)
5519 s390_load_address (reg, new_rtx);
5520 new_rtx = reg;
5522 break;
5524 case TLS_MODEL_LOCAL_EXEC:
5525 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5526 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5527 new_rtx = force_const_mem (Pmode, new_rtx);
5528 temp = gen_reg_rtx (Pmode);
5529 emit_move_insn (temp, new_rtx);
5531 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5532 if (reg != 0)
5534 s390_load_address (reg, new_rtx);
5535 new_rtx = reg;
5537 break;
5539 default:
5540 gcc_unreachable ();
5543 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
5545 switch (XINT (XEXP (addr, 0), 1))
5547 case UNSPEC_NTPOFF:
5548 case UNSPEC_INDNTPOFF:
5549 new_rtx = addr;
5550 break;
5552 default:
5553 gcc_unreachable ();
5557 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5558 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5560 new_rtx = XEXP (XEXP (addr, 0), 0);
5561 if (GET_CODE (new_rtx) != SYMBOL_REF)
5562 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5564 new_rtx = legitimize_tls_address (new_rtx, reg);
5565 new_rtx = plus_constant (Pmode, new_rtx,
5566 INTVAL (XEXP (XEXP (addr, 0), 1)));
5567 new_rtx = force_operand (new_rtx, 0);
5570 /* (const (neg (unspec (symbol_ref)))) -> (neg (const (unspec (symbol_ref)))) */
5571 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == NEG)
5573 new_rtx = XEXP (XEXP (addr, 0), 0);
5574 if (GET_CODE (new_rtx) != SYMBOL_REF)
5575 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5577 new_rtx = legitimize_tls_address (new_rtx, reg);
5578 new_rtx = gen_rtx_NEG (Pmode, new_rtx);
5579 new_rtx = force_operand (new_rtx, 0);
5582 else
5583 gcc_unreachable (); /* for now ... */
5585 return new_rtx;
5588 /* Emit insns making the address in operands[1] valid for a standard
5589 move to operands[0]. operands[1] is replaced by an address which
5590 should be used instead of the former RTX to emit the move
5591 pattern. */
5593 void
5594 emit_symbolic_move (rtx *operands)
5596 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5598 if (GET_CODE (operands[0]) == MEM)
5599 operands[1] = force_reg (Pmode, operands[1]);
5600 else if (TLS_SYMBOLIC_CONST (operands[1]))
5601 operands[1] = legitimize_tls_address (operands[1], temp);
5602 else if (flag_pic)
5603 operands[1] = legitimize_pic_address (operands[1], temp);
5606 /* Try machine-dependent ways of modifying an illegitimate address X
5607 to be legitimate. If we find one, return the new, valid address.
5609 OLDX is the address as it was before break_out_memory_refs was called.
5610 In some cases it is useful to look at this to decide what needs to be done.
5612 MODE is the mode of the operand pointed to by X.
5614 When -fpic is used, special handling is needed for symbolic references.
5615 See comments by legitimize_pic_address for details. */
5617 static rtx
5618 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5619 machine_mode mode ATTRIBUTE_UNUSED)
5621 rtx constant_term = const0_rtx;
5623 if (TLS_SYMBOLIC_CONST (x))
5625 x = legitimize_tls_address (x, 0);
5627 if (s390_legitimate_address_p (mode, x, FALSE))
5628 return x;
5630 else if (GET_CODE (x) == PLUS
5631 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5632 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5634 return x;
5636 else if (flag_pic)
5638 if (SYMBOLIC_CONST (x)
5639 || (GET_CODE (x) == PLUS
5640 && (SYMBOLIC_CONST (XEXP (x, 0))
5641 || SYMBOLIC_CONST (XEXP (x, 1)))))
5642 x = legitimize_pic_address (x, 0);
5644 if (s390_legitimate_address_p (mode, x, FALSE))
5645 return x;
5648 x = eliminate_constant_term (x, &constant_term);
5650 /* Optimize loading of large displacements by splitting them
5651 into the multiple of 4K and the rest; this allows the
5652 former to be CSE'd if possible.
5654 Don't do this if the displacement is added to a register
5655 pointing into the stack frame, as the offsets will
5656 change later anyway. */
5658 if (GET_CODE (constant_term) == CONST_INT
5659 && !TARGET_LONG_DISPLACEMENT
5660 && !DISP_IN_RANGE (INTVAL (constant_term))
5661 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5663 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5664 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5666 rtx temp = gen_reg_rtx (Pmode);
5667 rtx val = force_operand (GEN_INT (upper), temp);
5668 if (val != temp)
5669 emit_move_insn (temp, val);
5671 x = gen_rtx_PLUS (Pmode, x, temp);
5672 constant_term = GEN_INT (lower);
5675 if (GET_CODE (x) == PLUS)
5677 if (GET_CODE (XEXP (x, 0)) == REG)
5679 rtx temp = gen_reg_rtx (Pmode);
5680 rtx val = force_operand (XEXP (x, 1), temp);
5681 if (val != temp)
5682 emit_move_insn (temp, val);
5684 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5687 else if (GET_CODE (XEXP (x, 1)) == REG)
5689 rtx temp = gen_reg_rtx (Pmode);
5690 rtx val = force_operand (XEXP (x, 0), temp);
5691 if (val != temp)
5692 emit_move_insn (temp, val);
5694 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5698 if (constant_term != const0_rtx)
5699 x = gen_rtx_PLUS (Pmode, x, constant_term);
5701 return x;
5704 /* Try a machine-dependent way of reloading an illegitimate address AD
5705 operand. If we find one, push the reload and return the new address.
5707 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5708 and TYPE is the reload type of the current reload. */
5711 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5712 int opnum, int type)
5714 if (!optimize || TARGET_LONG_DISPLACEMENT)
5715 return NULL_RTX;
5717 if (GET_CODE (ad) == PLUS)
5719 rtx tem = simplify_binary_operation (PLUS, Pmode,
5720 XEXP (ad, 0), XEXP (ad, 1));
5721 if (tem)
5722 ad = tem;
5725 if (GET_CODE (ad) == PLUS
5726 && GET_CODE (XEXP (ad, 0)) == REG
5727 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5728 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5730 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5731 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5732 rtx cst, tem, new_rtx;
5734 cst = GEN_INT (upper);
5735 if (!legitimate_reload_constant_p (cst))
5736 cst = force_const_mem (Pmode, cst);
5738 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5739 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5741 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5742 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5743 opnum, (enum reload_type) type);
5744 return new_rtx;
5747 return NULL_RTX;
5750 /* Emit code to move LEN bytes from SRC to DST. */
5752 bool
5753 s390_expand_cpymem (rtx dst, rtx src, rtx len, rtx min_len_rtx, rtx max_len_rtx)
5755 /* Exit early in case nothing has to be done. */
5756 if (CONST_INT_P (len) && UINTVAL (len) == 0)
5757 return true;
5759 unsigned HOST_WIDE_INT min_len = UINTVAL (min_len_rtx);
5760 unsigned HOST_WIDE_INT max_len
5761 = max_len_rtx ? UINTVAL (max_len_rtx) : HOST_WIDE_INT_M1U;
5763 /* Expand memcpy for constant length operands without a loop if it
5764 is shorter that way.
5766 With a constant length argument a
5767 memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
5768 if (CONST_INT_P (len)
5769 && UINTVAL (len) <= 6 * 256
5770 && (!TARGET_MVCLE || UINTVAL (len) <= 256))
5772 HOST_WIDE_INT o, l;
5774 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5776 rtx newdst = adjust_address (dst, BLKmode, o);
5777 rtx newsrc = adjust_address (src, BLKmode, o);
5778 emit_insn (gen_cpymem_short (newdst, newsrc,
5779 GEN_INT (l > 256 ? 255 : l - 1)));
5782 return true;
5785 else if (TARGET_MVCLE
5786 && (s390_tune < PROCESSOR_2097_Z10
5787 || (CONST_INT_P (len) && UINTVAL (len) <= (1 << 16))))
5789 emit_insn (gen_cpymem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5790 return true;
5793 /* Non-constant length and no loop required. */
5794 else if (!CONST_INT_P (len) && max_len <= 256)
5796 rtx_code_label *end_label;
5798 if (min_len == 0)
5800 end_label = gen_label_rtx ();
5801 emit_cmp_and_jump_insns (len, const0_rtx, EQ, NULL_RTX,
5802 GET_MODE (len), 1, end_label,
5803 profile_probability::very_unlikely ());
5806 rtx lenm1 = expand_binop (GET_MODE (len), add_optab, len, constm1_rtx,
5807 NULL_RTX, 1, OPTAB_DIRECT);
5809 /* Prefer a vectorized implementation over one which makes use of an
5810 execute instruction since it is faster (although it increases register
5811 pressure). */
5812 if (max_len <= 16 && TARGET_VX)
5814 rtx tmp = gen_reg_rtx (V16QImode);
5815 lenm1 = convert_to_mode (SImode, lenm1, 1);
5816 emit_insn (gen_vllv16qi (tmp, lenm1, src));
5817 emit_insn (gen_vstlv16qi (tmp, lenm1, dst));
5819 else if (TARGET_Z15)
5820 emit_insn (gen_mvcrl (dst, src, convert_to_mode (SImode, lenm1, 1)));
5821 else
5822 emit_insn (
5823 gen_cpymem_short (dst, src, convert_to_mode (Pmode, lenm1, 1)));
5825 if (min_len == 0)
5826 emit_label (end_label);
5828 return true;
5831 else if (s390_tune < PROCESSOR_2097_Z10 || (CONST_INT_P (len) && UINTVAL (len) <= (1 << 16)))
5833 rtx dst_addr, src_addr, count, blocks, temp;
5834 rtx_code_label *loop_start_label = gen_label_rtx ();
5835 rtx_code_label *loop_end_label = gen_label_rtx ();
5836 rtx_code_label *end_label = gen_label_rtx ();
5837 machine_mode mode;
5839 mode = GET_MODE (len);
5840 if (mode == VOIDmode)
5841 mode = Pmode;
5843 dst_addr = gen_reg_rtx (Pmode);
5844 src_addr = gen_reg_rtx (Pmode);
5845 count = gen_reg_rtx (mode);
5846 blocks = gen_reg_rtx (mode);
5848 convert_move (count, len, 1);
5849 if (min_len == 0)
5850 emit_cmp_and_jump_insns (count, const0_rtx, EQ, NULL_RTX, mode, 1,
5851 end_label);
5853 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5854 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5855 dst = change_address (dst, VOIDmode, dst_addr);
5856 src = change_address (src, VOIDmode, src_addr);
5858 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5859 OPTAB_DIRECT);
5860 if (temp != count)
5861 emit_move_insn (count, temp);
5863 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5864 OPTAB_DIRECT);
5865 if (temp != blocks)
5866 emit_move_insn (blocks, temp);
5868 emit_cmp_and_jump_insns (blocks, const0_rtx,
5869 EQ, NULL_RTX, mode, 1, loop_end_label);
5871 emit_label (loop_start_label);
5873 if (TARGET_Z10
5874 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5876 rtx prefetch;
5878 /* Issue a read prefetch for the +3 cache line. */
5879 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5880 const0_rtx, const0_rtx);
5881 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5882 emit_insn (prefetch);
5884 /* Issue a write prefetch for the +3 cache line. */
5885 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5886 const1_rtx, const0_rtx);
5887 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5888 emit_insn (prefetch);
5891 emit_insn (gen_cpymem_short (dst, src, GEN_INT (255)));
5892 s390_load_address (dst_addr,
5893 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5894 s390_load_address (src_addr,
5895 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5897 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5898 OPTAB_DIRECT);
5899 if (temp != blocks)
5900 emit_move_insn (blocks, temp);
5902 emit_cmp_and_jump_insns (blocks, const0_rtx,
5903 EQ, NULL_RTX, mode, 1, loop_end_label);
5905 emit_jump (loop_start_label);
5906 emit_label (loop_end_label);
5908 emit_insn (gen_cpymem_short (dst, src,
5909 convert_to_mode (Pmode, count, 1)));
5910 emit_label (end_label);
5912 return true;
5915 return false;
5918 bool
5919 s390_expand_movmem (rtx dst, rtx src, rtx len, rtx min_len_rtx, rtx max_len_rtx)
5921 /* Exit early in case nothing has to be done. */
5922 if (CONST_INT_P (len) && UINTVAL (len) == 0)
5923 return true;
5924 /* Exit early in case length is not upper bounded. */
5925 else if (max_len_rtx == NULL)
5926 return false;
5928 unsigned HOST_WIDE_INT min_len = UINTVAL (min_len_rtx);
5929 unsigned HOST_WIDE_INT max_len = UINTVAL (max_len_rtx);
5931 /* At most 16 bytes. */
5932 if (max_len <= 16 && TARGET_VX)
5934 rtx_code_label *end_label;
5936 if (min_len == 0)
5938 end_label = gen_label_rtx ();
5939 emit_cmp_and_jump_insns (len, const0_rtx, EQ, NULL_RTX,
5940 GET_MODE (len), 1, end_label,
5941 profile_probability::very_unlikely ());
5944 rtx lenm1;
5945 if (CONST_INT_P (len))
5947 lenm1 = gen_reg_rtx (SImode);
5948 emit_move_insn (lenm1, GEN_INT (UINTVAL (len) - 1));
5950 else
5951 lenm1
5952 = expand_binop (SImode, add_optab, convert_to_mode (SImode, len, 1),
5953 constm1_rtx, NULL_RTX, 1, OPTAB_DIRECT);
5955 rtx tmp = gen_reg_rtx (V16QImode);
5956 emit_insn (gen_vllv16qi (tmp, lenm1, src));
5957 emit_insn (gen_vstlv16qi (tmp, lenm1, dst));
5959 if (min_len == 0)
5960 emit_label (end_label);
5962 return true;
5965 /* At most 256 bytes. */
5966 else if (max_len <= 256 && TARGET_Z15)
5968 rtx_code_label *end_label = gen_label_rtx ();
5970 if (min_len == 0)
5971 emit_cmp_and_jump_insns (len, const0_rtx, EQ, NULL_RTX, GET_MODE (len),
5972 1, end_label,
5973 profile_probability::very_unlikely ());
5975 rtx dst_addr = gen_reg_rtx (Pmode);
5976 rtx src_addr = gen_reg_rtx (Pmode);
5977 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5978 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5980 rtx lenm1 = CONST_INT_P (len)
5981 ? GEN_INT (UINTVAL (len) - 1)
5982 : expand_binop (GET_MODE (len), add_optab, len, constm1_rtx,
5983 NULL_RTX, 1, OPTAB_DIRECT);
5985 rtx_code_label *right_to_left_label = gen_label_rtx ();
5986 emit_cmp_and_jump_insns (src_addr, dst_addr, LT, NULL_RTX, GET_MODE (len),
5987 1, right_to_left_label);
5989 // MVC
5990 emit_insn (
5991 gen_cpymem_short (dst, src, convert_to_mode (Pmode, lenm1, 1)));
5992 emit_jump (end_label);
5994 // MVCRL
5995 emit_label (right_to_left_label);
5996 emit_insn (gen_mvcrl (dst, src, convert_to_mode (SImode, lenm1, 1)));
5998 emit_label (end_label);
6000 return true;
6003 return false;
6006 /* Emit code to set LEN bytes at DST to VAL.
6007 Make use of clrmem if VAL is zero. */
6009 void
6010 s390_expand_setmem (rtx dst, rtx len, rtx val, rtx min_len_rtx, rtx max_len_rtx)
6012 /* Exit early in case nothing has to be done. */
6013 if (CONST_INT_P (len) && UINTVAL (len) == 0)
6014 return;
6016 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
6018 unsigned HOST_WIDE_INT min_len = UINTVAL (min_len_rtx);
6019 unsigned HOST_WIDE_INT max_len
6020 = max_len_rtx ? UINTVAL (max_len_rtx) : HOST_WIDE_INT_M1U;
6022 /* Vectorize memset with a constant length
6023 - if 0 < LEN < 16, then emit a vstl based solution;
6024 - if 16 <= LEN <= 64, then emit a vst based solution
6025 where the last two vector stores may overlap in case LEN%16!=0. Paying
6026 the price for an overlap is negligible compared to an extra GPR which is
6027 required for vstl. */
6028 if (CONST_INT_P (len) && UINTVAL (len) <= 64 && val != const0_rtx
6029 && TARGET_VX)
6031 rtx val_vec = gen_reg_rtx (V16QImode);
6032 emit_move_insn (val_vec, gen_rtx_VEC_DUPLICATE (V16QImode, val));
6034 if (UINTVAL (len) < 16)
6036 rtx len_reg = gen_reg_rtx (SImode);
6037 emit_move_insn (len_reg, GEN_INT (UINTVAL (len) - 1));
6038 emit_insn (gen_vstlv16qi (val_vec, len_reg, dst));
6040 else
6042 unsigned HOST_WIDE_INT l = UINTVAL (len) / 16;
6043 unsigned HOST_WIDE_INT r = UINTVAL (len) % 16;
6044 unsigned HOST_WIDE_INT o = 0;
6045 for (unsigned HOST_WIDE_INT i = 0; i < l; ++i)
6047 rtx newdst = adjust_address (dst, V16QImode, o);
6048 emit_move_insn (newdst, val_vec);
6049 o += 16;
6051 if (r != 0)
6053 rtx newdst = adjust_address (dst, V16QImode, (o - 16) + r);
6054 emit_move_insn (newdst, val_vec);
6059 /* Expand setmem/clrmem for a constant length operand without a
6060 loop if it will be shorter that way.
6061 clrmem loop (with PFD) is 30 bytes -> 5 * xc
6062 clrmem loop (without PFD) is 24 bytes -> 4 * xc
6063 setmem loop (with PFD) is 38 bytes -> ~4 * (mvi/stc + mvc)
6064 setmem loop (without PFD) is 32 bytes -> ~4 * (mvi/stc + mvc) */
6065 else if (GET_CODE (len) == CONST_INT
6066 && ((val == const0_rtx
6067 && (INTVAL (len) <= 256 * 4
6068 || (INTVAL (len) <= 256 * 5 && TARGET_SETMEM_PFD(val,len))))
6069 || (val != const0_rtx && INTVAL (len) <= 257 * 4))
6070 && (!TARGET_MVCLE || INTVAL (len) <= 256))
6072 HOST_WIDE_INT o, l;
6074 if (val == const0_rtx)
6075 /* clrmem: emit 256 byte blockwise XCs. */
6076 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
6078 rtx newdst = adjust_address (dst, BLKmode, o);
6079 emit_insn (gen_clrmem_short (newdst,
6080 GEN_INT (l > 256 ? 255 : l - 1)));
6082 else
6083 /* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
6084 setting first byte to val and using a 256 byte mvc with one
6085 byte overlap to propagate the byte. */
6086 for (l = INTVAL (len), o = 0; l > 0; l -= 257, o += 257)
6088 rtx newdst = adjust_address (dst, BLKmode, o);
6089 emit_move_insn (adjust_address (dst, QImode, o), val);
6090 if (l > 1)
6092 rtx newdstp1 = adjust_address (dst, BLKmode, o + 1);
6093 emit_insn (gen_cpymem_short (newdstp1, newdst,
6094 GEN_INT (l > 257 ? 255 : l - 2)));
6099 else if (TARGET_MVCLE)
6101 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
6102 if (TARGET_64BIT)
6103 emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
6104 val));
6105 else
6106 emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
6107 val));
6110 /* Non-constant length and no loop required. */
6111 else if (!CONST_INT_P (len) && max_len <= 256)
6113 rtx_code_label *end_label;
6115 if (min_len == 0)
6117 end_label = gen_label_rtx ();
6118 emit_cmp_and_jump_insns (len, const0_rtx, EQ, NULL_RTX,
6119 GET_MODE (len), 1, end_label,
6120 profile_probability::very_unlikely ());
6123 rtx lenm1 = expand_binop (GET_MODE (len), add_optab, len, constm1_rtx,
6124 NULL_RTX, 1, OPTAB_DIRECT);
6126 /* Prefer a vectorized implementation over one which makes use of an
6127 execute instruction since it is faster (although it increases register
6128 pressure). */
6129 if (max_len <= 16 && TARGET_VX)
6131 rtx val_vec = gen_reg_rtx (V16QImode);
6132 if (val == const0_rtx)
6133 emit_move_insn (val_vec, CONST0_RTX (V16QImode));
6134 else
6135 emit_move_insn (val_vec, gen_rtx_VEC_DUPLICATE (V16QImode, val));
6137 lenm1 = convert_to_mode (SImode, lenm1, 1);
6138 emit_insn (gen_vstlv16qi (val_vec, lenm1, dst));
6140 else
6142 if (val == const0_rtx)
6143 emit_insn (
6144 gen_clrmem_short (dst, convert_to_mode (Pmode, lenm1, 1)));
6145 else
6147 emit_move_insn (adjust_address (dst, QImode, 0), val);
6149 rtx_code_label *onebyte_end_label;
6150 if (min_len <= 1)
6152 onebyte_end_label = gen_label_rtx ();
6153 emit_cmp_and_jump_insns (
6154 len, const1_rtx, EQ, NULL_RTX, GET_MODE (len), 1,
6155 onebyte_end_label, profile_probability::very_unlikely ());
6158 rtx dstp1 = adjust_address (dst, VOIDmode, 1);
6159 rtx lenm2
6160 = expand_binop (GET_MODE (len), add_optab, len, GEN_INT (-2),
6161 NULL_RTX, 1, OPTAB_DIRECT);
6162 lenm2 = convert_to_mode (Pmode, lenm2, 1);
6163 emit_insn (gen_cpymem_short (dstp1, dst, lenm2));
6165 if (min_len <= 1)
6166 emit_label (onebyte_end_label);
6170 if (min_len == 0)
6171 emit_label (end_label);
6174 else
6176 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
6177 rtx_code_label *loop_start_label = gen_label_rtx ();
6178 rtx_code_label *onebyte_end_label = gen_label_rtx ();
6179 rtx_code_label *zerobyte_end_label = gen_label_rtx ();
6180 rtx_code_label *restbyte_end_label = gen_label_rtx ();
6181 machine_mode mode;
6183 mode = GET_MODE (len);
6184 if (mode == VOIDmode)
6185 mode = Pmode;
6187 dst_addr = gen_reg_rtx (Pmode);
6188 count = gen_reg_rtx (mode);
6189 blocks = gen_reg_rtx (mode);
6191 convert_move (count, len, 1);
6192 if (min_len == 0)
6193 emit_cmp_and_jump_insns (count, const0_rtx, EQ, NULL_RTX, mode, 1,
6194 zerobyte_end_label,
6195 profile_probability::very_unlikely ());
6197 /* We need to make a copy of the target address since memset is
6198 supposed to return it unmodified. We have to make it here
6199 already since the new reg is used at onebyte_end_label. */
6200 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
6201 dst = change_address (dst, VOIDmode, dst_addr);
6203 if (val != const0_rtx)
6205 /* When using the overlapping mvc the original target
6206 address is only accessed as single byte entity (even by
6207 the mvc reading this value). */
6208 set_mem_size (dst, 1);
6209 dstp1 = adjust_address (dst, VOIDmode, 1);
6210 if (min_len <= 1)
6211 emit_cmp_and_jump_insns (count, const1_rtx, EQ, NULL_RTX, mode, 1,
6212 onebyte_end_label,
6213 profile_probability::very_unlikely ());
6216 /* There is one unconditional (mvi+mvc)/xc after the loop
6217 dealing with the rest of the bytes, subtracting two (mvi+mvc)
6218 or one (xc) here leaves this number of bytes to be handled by
6219 it. */
6220 temp = expand_binop (mode, add_optab, count,
6221 val == const0_rtx ? constm1_rtx : GEN_INT (-2),
6222 count, 1, OPTAB_DIRECT);
6223 if (temp != count)
6224 emit_move_insn (count, temp);
6226 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
6227 OPTAB_DIRECT);
6228 if (temp != blocks)
6229 emit_move_insn (blocks, temp);
6231 emit_cmp_and_jump_insns (blocks, const0_rtx,
6232 EQ, NULL_RTX, mode, 1, restbyte_end_label);
6234 emit_jump (loop_start_label);
6236 if (val != const0_rtx && min_len <= 1)
6238 /* The 1 byte != 0 special case. Not handled efficiently
6239 since we require two jumps for that. However, this
6240 should be very rare. */
6241 emit_label (onebyte_end_label);
6242 emit_move_insn (adjust_address (dst, QImode, 0), val);
6243 emit_jump (zerobyte_end_label);
6246 emit_label (loop_start_label);
6248 if (TARGET_SETMEM_PFD (val, len))
6250 /* Issue a write prefetch. */
6251 rtx distance = GEN_INT (TARGET_SETMEM_PREFETCH_DISTANCE);
6252 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, distance),
6253 const1_rtx, const0_rtx);
6254 emit_insn (prefetch);
6255 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
6258 if (val == const0_rtx)
6259 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
6260 else
6262 /* Set the first byte in the block to the value and use an
6263 overlapping mvc for the block. */
6264 emit_move_insn (adjust_address (dst, QImode, 0), val);
6265 emit_insn (gen_cpymem_short (dstp1, dst, GEN_INT (254)));
6267 s390_load_address (dst_addr,
6268 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
6270 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
6271 OPTAB_DIRECT);
6272 if (temp != blocks)
6273 emit_move_insn (blocks, temp);
6275 emit_cmp_and_jump_insns (blocks, const0_rtx,
6276 NE, NULL_RTX, mode, 1, loop_start_label);
6278 emit_label (restbyte_end_label);
6280 if (val == const0_rtx)
6281 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
6282 else
6284 /* Set the first byte in the block to the value and use an
6285 overlapping mvc for the block. */
6286 emit_move_insn (adjust_address (dst, QImode, 0), val);
6287 /* execute only uses the lowest 8 bits of count that's
6288 exactly what we need here. */
6289 emit_insn (gen_cpymem_short (dstp1, dst,
6290 convert_to_mode (Pmode, count, 1)));
6293 emit_label (zerobyte_end_label);
6297 /* Emit code to compare LEN bytes at OP0 with those at OP1,
6298 and return the result in TARGET. */
6300 bool
6301 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
6303 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
6304 rtx tmp;
6306 /* When tuning for z10 or higher we rely on the Glibc functions to
6307 do the right thing. Only for constant lengths below 64k we will
6308 generate inline code. */
6309 if (s390_tune >= PROCESSOR_2097_Z10
6310 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
6311 return false;
6313 /* As the result of CMPINT is inverted compared to what we need,
6314 we have to swap the operands. */
6315 tmp = op0; op0 = op1; op1 = tmp;
6317 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
6319 if (INTVAL (len) > 0)
6321 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
6322 emit_insn (gen_cmpint (target, ccreg));
6324 else
6325 emit_move_insn (target, const0_rtx);
6327 else if (TARGET_MVCLE)
6329 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
6330 emit_insn (gen_cmpint (target, ccreg));
6332 else
6334 rtx addr0, addr1, count, blocks, temp;
6335 rtx_code_label *loop_start_label = gen_label_rtx ();
6336 rtx_code_label *loop_end_label = gen_label_rtx ();
6337 rtx_code_label *end_label = gen_label_rtx ();
6338 machine_mode mode;
6340 mode = GET_MODE (len);
6341 if (mode == VOIDmode)
6342 mode = Pmode;
6344 addr0 = gen_reg_rtx (Pmode);
6345 addr1 = gen_reg_rtx (Pmode);
6346 count = gen_reg_rtx (mode);
6347 blocks = gen_reg_rtx (mode);
6349 convert_move (count, len, 1);
6350 emit_cmp_and_jump_insns (count, const0_rtx,
6351 EQ, NULL_RTX, mode, 1, end_label);
6353 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
6354 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
6355 op0 = change_address (op0, VOIDmode, addr0);
6356 op1 = change_address (op1, VOIDmode, addr1);
6358 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
6359 OPTAB_DIRECT);
6360 if (temp != count)
6361 emit_move_insn (count, temp);
6363 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
6364 OPTAB_DIRECT);
6365 if (temp != blocks)
6366 emit_move_insn (blocks, temp);
6368 emit_cmp_and_jump_insns (blocks, const0_rtx,
6369 EQ, NULL_RTX, mode, 1, loop_end_label);
6371 emit_label (loop_start_label);
6373 if (TARGET_Z10
6374 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
6376 rtx prefetch;
6378 /* Issue a read prefetch for the +2 cache line of operand 1. */
6379 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
6380 const0_rtx, const0_rtx);
6381 emit_insn (prefetch);
6382 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
6384 /* Issue a read prefetch for the +2 cache line of operand 2. */
6385 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
6386 const0_rtx, const0_rtx);
6387 emit_insn (prefetch);
6388 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
6391 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
6392 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
6393 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
6394 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
6395 temp = gen_rtx_SET (pc_rtx, temp);
6396 emit_jump_insn (temp);
6398 s390_load_address (addr0,
6399 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
6400 s390_load_address (addr1,
6401 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
6403 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
6404 OPTAB_DIRECT);
6405 if (temp != blocks)
6406 emit_move_insn (blocks, temp);
6408 emit_cmp_and_jump_insns (blocks, const0_rtx,
6409 EQ, NULL_RTX, mode, 1, loop_end_label);
6411 emit_jump (loop_start_label);
6412 emit_label (loop_end_label);
6414 emit_insn (gen_cmpmem_short (op0, op1,
6415 convert_to_mode (Pmode, count, 1)));
6416 emit_label (end_label);
6418 emit_insn (gen_cmpint (target, ccreg));
6420 return true;
6423 /* Emit a conditional jump to LABEL for condition code mask MASK using
6424 comparsion operator COMPARISON. Return the emitted jump insn. */
6426 static rtx_insn *
6427 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
6429 rtx temp;
6431 gcc_assert (comparison == EQ || comparison == NE);
6432 gcc_assert (mask > 0 && mask < 15);
6434 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
6435 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
6436 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
6437 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
6438 temp = gen_rtx_SET (pc_rtx, temp);
6439 return emit_jump_insn (temp);
6442 /* Emit the instructions to implement strlen of STRING and store the
6443 result in TARGET. The string has the known ALIGNMENT. This
6444 version uses vector instructions and is therefore not appropriate
6445 for targets prior to z13. */
6447 void
6448 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
6450 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
6451 rtx str_reg = gen_reg_rtx (V16QImode);
6452 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
6453 rtx str_idx_reg = gen_reg_rtx (Pmode);
6454 rtx result_reg = gen_reg_rtx (V16QImode);
6455 rtx is_aligned_label = gen_label_rtx ();
6456 rtx into_loop_label = NULL_RTX;
6457 rtx loop_start_label = gen_label_rtx ();
6458 rtx temp;
6459 rtx len = gen_reg_rtx (QImode);
6460 rtx cond;
6461 rtx mem;
6463 s390_load_address (str_addr_base_reg, XEXP (string, 0));
6464 emit_move_insn (str_idx_reg, const0_rtx);
6466 if (INTVAL (alignment) < 16)
6468 /* Check whether the address happens to be aligned properly so
6469 jump directly to the aligned loop. */
6470 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
6471 str_addr_base_reg, GEN_INT (15)),
6472 const0_rtx, EQ, NULL_RTX,
6473 Pmode, 1, is_aligned_label);
6475 temp = gen_reg_rtx (Pmode);
6476 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
6477 GEN_INT (15), temp, 1, OPTAB_DIRECT);
6478 gcc_assert (REG_P (temp));
6479 highest_index_to_load_reg =
6480 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
6481 highest_index_to_load_reg, 1, OPTAB_DIRECT);
6482 gcc_assert (REG_P (highest_index_to_load_reg));
6483 emit_insn (gen_vllv16qi (str_reg,
6484 convert_to_mode (SImode, highest_index_to_load_reg, 1),
6485 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
6487 into_loop_label = gen_label_rtx ();
6488 s390_emit_jump (into_loop_label, NULL_RTX);
6489 emit_barrier ();
6492 emit_label (is_aligned_label);
6493 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
6495 /* Reaching this point we are only performing 16 bytes aligned
6496 loads. */
6497 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
6499 emit_label (loop_start_label);
6500 LABEL_NUSES (loop_start_label) = 1;
6502 /* Load 16 bytes of the string into VR. */
6503 mem = gen_rtx_MEM (V16QImode,
6504 gen_rtx_PLUS (Pmode, str_idx_reg, str_addr_base_reg));
6505 set_mem_align (mem, 128);
6506 emit_move_insn (str_reg, mem);
6507 if (into_loop_label != NULL_RTX)
6509 emit_label (into_loop_label);
6510 LABEL_NUSES (into_loop_label) = 1;
6513 /* Increment string index by 16 bytes. */
6514 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
6515 str_idx_reg, 1, OPTAB_DIRECT);
6517 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
6518 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
6520 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
6521 REG_BR_PROB,
6522 profile_probability::very_likely ().to_reg_br_prob_note ());
6523 emit_insn (gen_vec_extractv16qiqi (len, result_reg, GEN_INT (7)));
6525 /* If the string pointer wasn't aligned we have loaded less then 16
6526 bytes and the remaining bytes got filled with zeros (by vll).
6527 Now we have to check whether the resulting index lies within the
6528 bytes actually part of the string. */
6530 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
6531 highest_index_to_load_reg);
6532 s390_load_address (highest_index_to_load_reg,
6533 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
6534 const1_rtx));
6535 if (TARGET_64BIT)
6536 emit_insn (gen_movdicc (str_idx_reg, cond,
6537 highest_index_to_load_reg, str_idx_reg));
6538 else
6539 emit_insn (gen_movsicc (str_idx_reg, cond,
6540 highest_index_to_load_reg, str_idx_reg));
6542 add_reg_br_prob_note (s390_emit_jump (is_aligned_label, cond),
6543 profile_probability::very_unlikely ());
6545 expand_binop (Pmode, add_optab, str_idx_reg,
6546 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
6547 /* FIXME: len is already zero extended - so avoid the llgcr emitted
6548 here. */
6549 temp = expand_binop (Pmode, add_optab, str_idx_reg,
6550 convert_to_mode (Pmode, len, 1),
6551 target, 1, OPTAB_DIRECT);
6552 if (temp != target)
6553 emit_move_insn (target, temp);
6556 void
6557 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
6559 rtx temp = gen_reg_rtx (Pmode);
6560 rtx src_addr = XEXP (src, 0);
6561 rtx dst_addr = XEXP (dst, 0);
6562 rtx src_addr_reg = gen_reg_rtx (Pmode);
6563 rtx dst_addr_reg = gen_reg_rtx (Pmode);
6564 rtx offset = gen_reg_rtx (Pmode);
6565 rtx vsrc = gen_reg_rtx (V16QImode);
6566 rtx vpos = gen_reg_rtx (V16QImode);
6567 rtx loadlen = gen_reg_rtx (SImode);
6568 rtx gpos_qi = gen_reg_rtx(QImode);
6569 rtx gpos = gen_reg_rtx (SImode);
6570 rtx done_label = gen_label_rtx ();
6571 rtx loop_label = gen_label_rtx ();
6572 rtx exit_label = gen_label_rtx ();
6573 rtx full_label = gen_label_rtx ();
6575 /* Perform a quick check for string ending on the first up to 16
6576 bytes and exit early if successful. */
6578 emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
6579 emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
6580 emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
6581 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6582 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6583 /* gpos is the byte index if a zero was found and 16 otherwise.
6584 So if it is lower than the loaded bytes we have a hit. */
6585 emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
6586 full_label);
6587 emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
6589 force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
6590 1, OPTAB_DIRECT);
6591 emit_jump (exit_label);
6592 emit_barrier ();
6594 emit_label (full_label);
6595 LABEL_NUSES (full_label) = 1;
6597 /* Calculate `offset' so that src + offset points to the last byte
6598 before 16 byte alignment. */
6600 /* temp = src_addr & 0xf */
6601 force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
6602 1, OPTAB_DIRECT);
6604 /* offset = 0xf - temp */
6605 emit_move_insn (offset, GEN_INT (15));
6606 force_expand_binop (Pmode, sub_optab, offset, temp, offset,
6607 1, OPTAB_DIRECT);
6609 /* Store `offset' bytes in the dstination string. The quick check
6610 has loaded at least `offset' bytes into vsrc. */
6612 emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
6614 /* Advance to the next byte to be loaded. */
6615 force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
6616 1, OPTAB_DIRECT);
6618 /* Make sure the addresses are single regs which can be used as a
6619 base. */
6620 emit_move_insn (src_addr_reg, src_addr);
6621 emit_move_insn (dst_addr_reg, dst_addr);
6623 /* MAIN LOOP */
6625 emit_label (loop_label);
6626 LABEL_NUSES (loop_label) = 1;
6628 emit_move_insn (vsrc,
6629 gen_rtx_MEM (V16QImode,
6630 gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
6632 emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
6633 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
6634 add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
6635 REG_BR_PROB, profile_probability::very_unlikely ()
6636 .to_reg_br_prob_note ());
6638 emit_move_insn (gen_rtx_MEM (V16QImode,
6639 gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
6640 vsrc);
6641 /* offset += 16 */
6642 force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
6643 offset, 1, OPTAB_DIRECT);
6645 emit_jump (loop_label);
6646 emit_barrier ();
6648 /* REGULAR EXIT */
6650 /* We are done. Add the offset of the zero character to the dst_addr
6651 pointer to get the result. */
6653 emit_label (done_label);
6654 LABEL_NUSES (done_label) = 1;
6656 force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
6657 1, OPTAB_DIRECT);
6659 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6660 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6662 emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
6664 force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
6665 1, OPTAB_DIRECT);
6667 /* EARLY EXIT */
6669 emit_label (exit_label);
6670 LABEL_NUSES (exit_label) = 1;
6674 /* Expand conditional increment or decrement using alc/slb instructions.
6675 Should generate code setting DST to either SRC or SRC + INCREMENT,
6676 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
6677 Returns true if successful, false otherwise.
6679 That makes it possible to implement some if-constructs without jumps e.g.:
6680 (borrow = CC0 | CC1 and carry = CC2 | CC3)
6681 unsigned int a, b, c;
6682 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
6683 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
6684 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
6685 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
6687 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
6688 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
6689 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
6690 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
6691 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
6693 bool
6694 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
6695 rtx dst, rtx src, rtx increment)
6697 machine_mode cmp_mode;
6698 machine_mode cc_mode;
6699 rtx op_res;
6700 rtx insn;
6701 rtvec p;
6702 int ret;
6704 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
6705 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
6706 cmp_mode = SImode;
6707 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
6708 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
6709 cmp_mode = DImode;
6710 else
6711 return false;
6713 /* Try ADD LOGICAL WITH CARRY. */
6714 if (increment == const1_rtx)
6716 /* Determine CC mode to use. */
6717 if (cmp_code == EQ || cmp_code == NE)
6719 if (cmp_op1 != const0_rtx)
6721 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6722 NULL_RTX, 0, OPTAB_WIDEN);
6723 cmp_op1 = const0_rtx;
6726 cmp_code = cmp_code == EQ ? LEU : GTU;
6729 if (cmp_code == LTU || cmp_code == LEU)
6731 rtx tem = cmp_op0;
6732 cmp_op0 = cmp_op1;
6733 cmp_op1 = tem;
6734 cmp_code = swap_condition (cmp_code);
6737 switch (cmp_code)
6739 case GTU:
6740 cc_mode = CCUmode;
6741 break;
6743 case GEU:
6744 cc_mode = CCL3mode;
6745 break;
6747 default:
6748 return false;
6751 /* Emit comparison instruction pattern. */
6752 if (!register_operand (cmp_op0, cmp_mode))
6753 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6755 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6756 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6757 /* We use insn_invalid_p here to add clobbers if required. */
6758 ret = insn_invalid_p (emit_insn (insn), false);
6759 gcc_assert (!ret);
6761 /* Emit ALC instruction pattern. */
6762 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6763 gen_rtx_REG (cc_mode, CC_REGNUM),
6764 const0_rtx);
6766 if (src != const0_rtx)
6768 if (!register_operand (src, GET_MODE (dst)))
6769 src = force_reg (GET_MODE (dst), src);
6771 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
6772 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
6775 p = rtvec_alloc (2);
6776 RTVEC_ELT (p, 0) =
6777 gen_rtx_SET (dst, op_res);
6778 RTVEC_ELT (p, 1) =
6779 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6780 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6782 return true;
6785 /* Try SUBTRACT LOGICAL WITH BORROW. */
6786 if (increment == constm1_rtx)
6788 /* Determine CC mode to use. */
6789 if (cmp_code == EQ || cmp_code == NE)
6791 if (cmp_op1 != const0_rtx)
6793 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6794 NULL_RTX, 0, OPTAB_WIDEN);
6795 cmp_op1 = const0_rtx;
6798 cmp_code = cmp_code == EQ ? LEU : GTU;
6801 if (cmp_code == GTU || cmp_code == GEU)
6803 rtx tem = cmp_op0;
6804 cmp_op0 = cmp_op1;
6805 cmp_op1 = tem;
6806 cmp_code = swap_condition (cmp_code);
6809 switch (cmp_code)
6811 case LEU:
6812 cc_mode = CCUmode;
6813 break;
6815 case LTU:
6816 cc_mode = CCL3mode;
6817 break;
6819 default:
6820 return false;
6823 /* Emit comparison instruction pattern. */
6824 if (!register_operand (cmp_op0, cmp_mode))
6825 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6827 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6828 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6829 /* We use insn_invalid_p here to add clobbers if required. */
6830 ret = insn_invalid_p (emit_insn (insn), false);
6831 gcc_assert (!ret);
6833 /* Emit SLB instruction pattern. */
6834 if (!register_operand (src, GET_MODE (dst)))
6835 src = force_reg (GET_MODE (dst), src);
6837 op_res = gen_rtx_MINUS (GET_MODE (dst),
6838 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
6839 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6840 gen_rtx_REG (cc_mode, CC_REGNUM),
6841 const0_rtx));
6842 p = rtvec_alloc (2);
6843 RTVEC_ELT (p, 0) =
6844 gen_rtx_SET (dst, op_res);
6845 RTVEC_ELT (p, 1) =
6846 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6847 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6849 return true;
6852 return false;
6855 /* Expand code for the insv template. Return true if successful. */
6857 bool
6858 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6860 int bitsize = INTVAL (op1);
6861 int bitpos = INTVAL (op2);
6862 machine_mode mode = GET_MODE (dest);
6863 machine_mode smode;
6864 int smode_bsize, mode_bsize;
6865 rtx op, clobber;
6867 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6868 return false;
6870 /* Just a move. */
6871 if (bitpos == 0
6872 && bitsize == GET_MODE_BITSIZE (GET_MODE (src))
6873 && mode == GET_MODE (src))
6875 emit_move_insn (dest, src);
6876 return true;
6879 /* Generate INSERT IMMEDIATE (IILL et al). */
6880 /* (set (ze (reg)) (const_int)). */
6881 if (TARGET_ZARCH
6882 && register_operand (dest, word_mode)
6883 && (bitpos % 16) == 0
6884 && (bitsize % 16) == 0
6885 && const_int_operand (src, VOIDmode))
6887 HOST_WIDE_INT val = INTVAL (src);
6888 int regpos = bitpos + bitsize;
6890 while (regpos > bitpos)
6892 machine_mode putmode;
6893 int putsize;
6895 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6896 putmode = SImode;
6897 else
6898 putmode = HImode;
6900 putsize = GET_MODE_BITSIZE (putmode);
6901 regpos -= putsize;
6902 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6903 GEN_INT (putsize),
6904 GEN_INT (regpos)),
6905 gen_int_mode (val, putmode));
6906 val >>= putsize;
6908 gcc_assert (regpos == bitpos);
6909 return true;
6912 smode = smallest_int_mode_for_size (bitsize);
6913 smode_bsize = GET_MODE_BITSIZE (smode);
6914 mode_bsize = GET_MODE_BITSIZE (mode);
6916 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6917 if (bitpos == 0
6918 && (bitsize % BITS_PER_UNIT) == 0
6919 && MEM_P (dest)
6920 && (register_operand (src, word_mode)
6921 || const_int_operand (src, VOIDmode)))
6923 /* Emit standard pattern if possible. */
6924 if (smode_bsize == bitsize)
6926 emit_move_insn (adjust_address (dest, smode, 0),
6927 gen_lowpart (smode, src));
6928 return true;
6931 /* (set (ze (mem)) (const_int)). */
6932 else if (const_int_operand (src, VOIDmode))
6934 int size = bitsize / BITS_PER_UNIT;
6935 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6936 BLKmode,
6937 UNITS_PER_WORD - size);
6939 dest = adjust_address (dest, BLKmode, 0);
6940 set_mem_size (dest, size);
6941 rtx size_rtx = GEN_INT (size);
6942 s390_expand_cpymem (dest, src_mem, size_rtx, size_rtx, size_rtx);
6943 return true;
6946 /* (set (ze (mem)) (reg)). */
6947 else if (register_operand (src, word_mode))
6949 if (bitsize <= 32)
6950 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6951 const0_rtx), src);
6952 else
6954 /* Emit st,stcmh sequence. */
6955 int stcmh_width = bitsize - 32;
6956 int size = stcmh_width / BITS_PER_UNIT;
6958 emit_move_insn (adjust_address (dest, SImode, size),
6959 gen_lowpart (SImode, src));
6960 set_mem_size (dest, size);
6961 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6962 GEN_INT (stcmh_width),
6963 const0_rtx),
6964 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6966 return true;
6970 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6971 if ((bitpos % BITS_PER_UNIT) == 0
6972 && (bitsize % BITS_PER_UNIT) == 0
6973 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6974 && MEM_P (src)
6975 && (mode == DImode || mode == SImode)
6976 && mode != smode
6977 && register_operand (dest, mode))
6979 /* Emit a strict_low_part pattern if possible. */
6980 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6982 rtx low_dest = s390_gen_lowpart_subreg (smode, dest);
6983 rtx low_src = gen_lowpart (smode, src);
6985 switch (smode)
6987 case E_QImode: emit_insn (gen_movstrictqi (low_dest, low_src)); return true;
6988 case E_HImode: emit_insn (gen_movstricthi (low_dest, low_src)); return true;
6989 case E_SImode: emit_insn (gen_movstrictsi (low_dest, low_src)); return true;
6990 default: break;
6994 /* ??? There are more powerful versions of ICM that are not
6995 completely represented in the md file. */
6998 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6999 if (TARGET_Z10 && (mode == DImode || mode == SImode))
7001 machine_mode mode_s = GET_MODE (src);
7003 if (CONSTANT_P (src))
7005 /* For constant zero values the representation with AND
7006 appears to be folded in more situations than the (set
7007 (zero_extract) ...).
7008 We only do this when the start and end of the bitfield
7009 remain in the same SImode chunk. That way nihf or nilf
7010 can be used.
7011 The AND patterns might still generate a risbg for this. */
7012 if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
7013 return false;
7014 else
7015 src = force_reg (mode, src);
7017 else if (mode_s != mode)
7019 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
7020 src = force_reg (mode_s, src);
7021 src = gen_lowpart (mode, src);
7024 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
7025 op = gen_rtx_SET (op, src);
7027 if (!TARGET_ZEC12)
7029 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
7030 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
7032 emit_insn (op);
7034 return true;
7037 return false;
7040 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
7041 register that holds VAL of mode MODE shifted by COUNT bits. */
7043 static inline rtx
7044 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
7046 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
7047 NULL_RTX, 1, OPTAB_DIRECT);
7048 return expand_simple_binop (SImode, ASHIFT, val, count,
7049 NULL_RTX, 1, OPTAB_DIRECT);
7052 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
7053 the result in TARGET. */
7055 void
7056 s390_expand_vec_compare (rtx target, enum rtx_code cond,
7057 rtx cmp_op1, rtx cmp_op2)
7059 machine_mode mode = GET_MODE (target);
7060 bool neg_p = false, swap_p = false;
7061 rtx tmp;
7063 if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
7065 cmp_op2 = force_reg (GET_MODE (cmp_op1), cmp_op2);
7066 switch (cond)
7068 /* NE a != b -> !(a == b) */
7069 case NE: cond = EQ; neg_p = true; break;
7070 case UNGT:
7071 emit_insn (gen_vec_cmpungt (target, cmp_op1, cmp_op2));
7072 return;
7073 case UNGE:
7074 emit_insn (gen_vec_cmpunge (target, cmp_op1, cmp_op2));
7075 return;
7076 case LE: cond = GE; swap_p = true; break;
7077 /* UNLE: (a u<= b) -> (b u>= a). */
7078 case UNLE:
7079 emit_insn (gen_vec_cmpunge (target, cmp_op2, cmp_op1));
7080 return;
7081 /* LT: a < b -> b > a */
7082 case LT: cond = GT; swap_p = true; break;
7083 /* UNLT: (a u< b) -> (b u> a). */
7084 case UNLT:
7085 emit_insn (gen_vec_cmpungt (target, cmp_op2, cmp_op1));
7086 return;
7087 case UNEQ:
7088 emit_insn (gen_vec_cmpuneq (target, cmp_op1, cmp_op2));
7089 return;
7090 case LTGT:
7091 emit_insn (gen_vec_cmpltgt (target, cmp_op1, cmp_op2));
7092 return;
7093 case ORDERED:
7094 emit_insn (gen_vec_cmpordered (target, cmp_op1, cmp_op2));
7095 return;
7096 case UNORDERED:
7097 emit_insn (gen_vec_cmpunordered (target, cmp_op1, cmp_op2));
7098 return;
7099 default: break;
7102 else
7104 /* Turn x < 0 into x >> (bits per element - 1) */
7105 if (cond == LT && cmp_op2 == CONST0_RTX (mode))
7107 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (mode)) - 1;
7108 rtx res = expand_simple_binop (mode, ASHIFTRT, cmp_op1,
7109 GEN_INT (shift), target,
7110 0, OPTAB_DIRECT);
7111 if (res != target)
7112 emit_move_insn (target, res);
7113 return;
7115 cmp_op2 = force_reg (GET_MODE (cmp_op1), cmp_op2);
7117 switch (cond)
7119 /* NE: a != b -> !(a == b) */
7120 case NE: cond = EQ; neg_p = true; break;
7121 /* GE: a >= b -> !(b > a) */
7122 case GE: cond = GT; neg_p = true; swap_p = true; break;
7123 /* GEU: a >= b -> !(b > a) */
7124 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
7125 /* LE: a <= b -> !(a > b) */
7126 case LE: cond = GT; neg_p = true; break;
7127 /* LEU: a <= b -> !(a > b) */
7128 case LEU: cond = GTU; neg_p = true; break;
7129 /* LT: a < b -> b > a */
7130 case LT: cond = GT; swap_p = true; break;
7131 /* LTU: a < b -> b > a */
7132 case LTU: cond = GTU; swap_p = true; break;
7133 default: break;
7137 if (swap_p)
7139 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
7142 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
7143 mode,
7144 cmp_op1, cmp_op2)));
7145 if (neg_p)
7146 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
7149 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
7150 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
7151 elements in CMP1 and CMP2 fulfill the comparison.
7152 This function is only used to emit patterns for the vx builtins and
7153 therefore only handles comparison codes required by the
7154 builtins. */
7155 void
7156 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
7157 rtx cmp1, rtx cmp2, bool all_p)
7159 machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
7160 rtx tmp_reg = gen_reg_rtx (SImode);
7161 bool swap_p = false;
7163 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
7165 switch (code)
7167 case EQ:
7168 case NE:
7169 cc_producer_mode = CCVEQmode;
7170 break;
7171 case GE:
7172 case LT:
7173 code = swap_condition (code);
7174 swap_p = true;
7175 /* fallthrough */
7176 case GT:
7177 case LE:
7178 cc_producer_mode = CCVIHmode;
7179 break;
7180 case GEU:
7181 case LTU:
7182 code = swap_condition (code);
7183 swap_p = true;
7184 /* fallthrough */
7185 case GTU:
7186 case LEU:
7187 cc_producer_mode = CCVIHUmode;
7188 break;
7189 default:
7190 gcc_unreachable ();
7193 scratch_mode = GET_MODE (cmp1);
7194 /* These codes represent inverted CC interpretations. Inverting
7195 an ALL CC mode results in an ANY CC mode and the other way
7196 around. Invert the all_p flag here to compensate for
7197 that. */
7198 if (code == NE || code == LE || code == LEU)
7199 all_p = !all_p;
7201 cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
7203 else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
7205 bool inv_p = false;
7207 switch (code)
7209 case EQ: cc_producer_mode = CCVEQmode; break;
7210 case NE: cc_producer_mode = CCVEQmode; inv_p = true; break;
7211 case GT: cc_producer_mode = CCVFHmode; break;
7212 case GE: cc_producer_mode = CCVFHEmode; break;
7213 case UNLE: cc_producer_mode = CCVFHmode; inv_p = true; break;
7214 case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
7215 case LT: cc_producer_mode = CCVFHmode; code = GT; swap_p = true; break;
7216 case LE: cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
7217 default: gcc_unreachable ();
7219 scratch_mode = related_int_vector_mode (GET_MODE (cmp1)).require ();
7221 if (inv_p)
7222 all_p = !all_p;
7224 cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
7226 else
7227 gcc_unreachable ();
7229 if (swap_p)
7231 rtx tmp = cmp2;
7232 cmp2 = cmp1;
7233 cmp1 = tmp;
7236 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7237 gen_rtvec (2, gen_rtx_SET (
7238 gen_rtx_REG (cc_producer_mode, CC_REGNUM),
7239 gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
7240 gen_rtx_CLOBBER (VOIDmode,
7241 gen_rtx_SCRATCH (scratch_mode)))));
7242 emit_move_insn (target, const0_rtx);
7243 emit_move_insn (tmp_reg, const1_rtx);
7245 emit_move_insn (target,
7246 gen_rtx_IF_THEN_ELSE (SImode,
7247 gen_rtx_fmt_ee (code, VOIDmode,
7248 gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
7249 const0_rtx),
7250 tmp_reg, target));
7253 /* Invert the comparison CODE applied to a CC mode. This is only safe
7254 if we know whether there result was created by a floating point
7255 compare or not. For the CCV modes this is encoded as part of the
7256 mode. */
7257 enum rtx_code
7258 s390_reverse_condition (machine_mode mode, enum rtx_code code)
7260 /* Reversal of FP compares takes care -- an ordered compare
7261 becomes an unordered compare and vice versa. */
7262 if (mode == CCVFALLmode || mode == CCVFANYmode || mode == CCSFPSmode)
7263 return reverse_condition_maybe_unordered (code);
7264 else if (mode == CCVIALLmode || mode == CCVIANYmode)
7265 return reverse_condition (code);
7266 else
7267 gcc_unreachable ();
7270 /* Generate a vector comparison expression loading either elements of
7271 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
7272 and CMP_OP2. */
7274 void
7275 s390_expand_vcond (rtx target, rtx then, rtx els,
7276 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
7278 rtx tmp;
7279 machine_mode result_mode;
7280 rtx result_target;
7282 machine_mode target_mode = GET_MODE (target);
7283 machine_mode cmp_mode = GET_MODE (cmp_op1);
7284 rtx op = (cond == LT) ? els : then;
7286 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
7287 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
7288 for short and byte (x >> 15 and x >> 7 respectively). */
7289 if ((cond == LT || cond == GE)
7290 && target_mode == cmp_mode
7291 && cmp_op2 == CONST0_RTX (cmp_mode)
7292 && op == CONST0_RTX (target_mode)
7293 && s390_vector_mode_supported_p (target_mode)
7294 && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
7296 rtx negop = (cond == LT) ? then : els;
7298 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
7300 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
7301 if (negop == CONST1_RTX (target_mode))
7303 rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
7304 GEN_INT (shift), target,
7305 1, OPTAB_DIRECT);
7306 if (res != target)
7307 emit_move_insn (target, res);
7308 return;
7311 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
7312 else if (all_ones_operand (negop, target_mode))
7314 rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
7315 GEN_INT (shift), target,
7316 0, OPTAB_DIRECT);
7317 if (res != target)
7318 emit_move_insn (target, res);
7319 return;
7323 /* We always use an integral type vector to hold the comparison
7324 result. */
7325 result_mode = related_int_vector_mode (cmp_mode).require ();
7326 result_target = gen_reg_rtx (result_mode);
7328 /* We allow vector immediates as comparison operands that
7329 can be handled by the optimization above but not by the
7330 following code. Hence, force them into registers here. */
7331 if (!REG_P (cmp_op1))
7332 cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
7334 s390_expand_vec_compare (result_target, cond, cmp_op1, cmp_op2);
7336 /* If the results are supposed to be either -1 or 0 we are done
7337 since this is what our compare instructions generate anyway. */
7338 if (all_ones_operand (then, GET_MODE (then))
7339 && const0_operand (els, GET_MODE (els)))
7341 emit_move_insn (target, gen_rtx_SUBREG (target_mode,
7342 result_target, 0));
7343 return;
7346 /* Otherwise we will do a vsel afterwards. */
7347 /* This gets triggered e.g.
7348 with gcc.c-torture/compile/pr53410-1.c */
7349 if (!REG_P (then))
7350 then = force_reg (target_mode, then);
7352 if (!REG_P (els))
7353 els = force_reg (target_mode, els);
7355 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
7356 result_target,
7357 CONST0_RTX (result_mode));
7359 /* We compared the result against zero above so we have to swap then
7360 and els here. */
7361 tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
7363 gcc_assert (target_mode == GET_MODE (then));
7364 emit_insn (gen_rtx_SET (target, tmp));
7367 /* Emit the RTX necessary to initialize the vector TARGET with values
7368 in VALS. */
7369 void
7370 s390_expand_vec_init (rtx target, rtx vals)
7372 machine_mode mode = GET_MODE (target);
7373 machine_mode inner_mode = GET_MODE_INNER (mode);
7374 int n_elts = GET_MODE_NUNITS (mode);
7375 bool all_same = true, all_regs = true, all_const_int = true;
7376 rtx x;
7377 int i;
7379 for (i = 0; i < n_elts; ++i)
7381 x = XVECEXP (vals, 0, i);
7383 if (!CONST_INT_P (x))
7384 all_const_int = false;
7386 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7387 all_same = false;
7389 if (!REG_P (x))
7390 all_regs = false;
7393 /* Use vector gen mask or vector gen byte mask if possible. */
7394 if (all_same && all_const_int)
7396 rtx vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7397 if (XVECEXP (vals, 0, 0) == const0_rtx
7398 || s390_contiguous_bitmask_vector_p (vec, NULL, NULL)
7399 || s390_bytemask_vector_p (vec, NULL))
7401 emit_insn (gen_rtx_SET (target, vec));
7402 return;
7406 /* Use vector replicate instructions. vlrep/vrepi/vrep */
7407 if (all_same)
7409 rtx elem = XVECEXP (vals, 0, 0);
7411 /* vec_splats accepts general_operand as source. */
7412 if (!general_operand (elem, GET_MODE (elem)))
7413 elem = force_reg (inner_mode, elem);
7415 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, elem)));
7416 return;
7419 if (all_regs
7420 && REG_P (target)
7421 && n_elts == 2
7422 && GET_MODE_SIZE (inner_mode) == 8)
7424 /* Use vector load pair. */
7425 emit_insn (gen_rtx_SET (target,
7426 gen_rtx_VEC_CONCAT (mode,
7427 XVECEXP (vals, 0, 0),
7428 XVECEXP (vals, 0, 1))));
7429 return;
7432 /* Use vector load logical element and zero. */
7433 if (TARGET_VXE && (mode == V4SImode || mode == V4SFmode))
7435 bool found = true;
7437 x = XVECEXP (vals, 0, 0);
7438 if (memory_operand (x, inner_mode))
7440 for (i = 1; i < n_elts; ++i)
7441 found = found && XVECEXP (vals, 0, i) == const0_rtx;
7443 if (found)
7445 machine_mode half_mode = (inner_mode == SFmode
7446 ? V2SFmode : V2SImode);
7447 emit_insn (gen_rtx_SET (target,
7448 gen_rtx_VEC_CONCAT (mode,
7449 gen_rtx_VEC_CONCAT (half_mode,
7451 const0_rtx),
7452 gen_rtx_VEC_CONCAT (half_mode,
7453 const0_rtx,
7454 const0_rtx))));
7455 return;
7460 /* We are about to set the vector elements one by one. Zero out the
7461 full register first in order to help the data flow framework to
7462 detect it as full VR set. */
7463 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
7465 /* Unfortunately the vec_init expander is not allowed to fail. So
7466 we have to implement the fallback ourselves. */
7467 for (i = 0; i < n_elts; i++)
7469 rtx elem = XVECEXP (vals, 0, i);
7470 if (!general_operand (elem, GET_MODE (elem)))
7471 elem = force_reg (inner_mode, elem);
7473 if (elem != const0_rtx)
7474 emit_insn (gen_rtx_SET (target,
7475 gen_rtx_UNSPEC (mode,
7476 gen_rtvec (3, elem,
7477 GEN_INT (i), target),
7478 UNSPEC_VEC_SET)));
7482 /* Return a parallel of constant integers to be used as permutation
7483 vector for a vector merge operation in MODE. If HIGH_P is true the
7484 left-most elements of the source vectors are merged otherwise the
7485 right-most elements. */
7487 s390_expand_merge_perm_const (machine_mode mode, bool high_p)
7489 int nelts = GET_MODE_NUNITS (mode);
7490 rtx perm[16];
7491 int addend = high_p ? 0 : nelts;
7493 for (int i = 0; i < nelts; i++)
7494 perm[i] = GEN_INT ((i + addend) / 2 + (i % 2) * nelts);
7496 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelts, perm));
7499 /* Emit RTL to implement a vector merge operation of SRC1 and SRC2
7500 which creates the result in TARGET. HIGH_P determines whether a
7501 merge hi or lo will be generated. */
7502 void
7503 s390_expand_merge (rtx target, rtx src1, rtx src2, bool high_p)
7505 machine_mode mode = GET_MODE (target);
7506 opt_machine_mode opt_mode_2x = mode_for_vector (GET_MODE_INNER (mode),
7507 2 * GET_MODE_NUNITS (mode));
7508 gcc_assert (opt_mode_2x.exists ());
7509 machine_mode mode_double_nelts = opt_mode_2x.require ();
7510 rtx constv = s390_expand_merge_perm_const (mode, high_p);
7511 src1 = force_reg (GET_MODE (src1), src1);
7512 src2 = force_reg (GET_MODE (src2), src2);
7513 rtx x = gen_rtx_VEC_CONCAT (mode_double_nelts, src1, src2);
7514 x = gen_rtx_VEC_SELECT (mode, x, constv);
7515 emit_insn (gen_rtx_SET (target, x));
7518 /* Emit a vector constant that contains 1s in each element's sign bit position
7519 and 0s in other positions. MODE is the desired constant's mode. */
7520 extern rtx
7521 s390_build_signbit_mask (machine_mode mode)
7523 if (mode == TFmode && TARGET_VXE)
7525 wide_int mask_val = wi::set_bit_in_zero (127, 128);
7526 rtx mask = immed_wide_int_const (mask_val, TImode);
7527 return gen_lowpart (TFmode, mask);
7530 /* Generate the integral element mask value. */
7531 machine_mode inner_mode = GET_MODE_INNER (mode);
7532 int inner_bitsize = GET_MODE_BITSIZE (inner_mode);
7533 wide_int mask_val = wi::set_bit_in_zero (inner_bitsize - 1, inner_bitsize);
7535 /* Emit the element mask rtx. Use gen_lowpart in order to cast the integral
7536 value to the desired mode. */
7537 machine_mode int_mode = related_int_vector_mode (mode).require ();
7538 rtx mask = immed_wide_int_const (mask_val, GET_MODE_INNER (int_mode));
7539 mask = gen_lowpart (inner_mode, mask);
7541 /* Emit the vector mask rtx by mode the element mask rtx. */
7542 int nunits = GET_MODE_NUNITS (mode);
7543 rtvec v = rtvec_alloc (nunits);
7544 for (int i = 0; i < nunits; i++)
7545 RTVEC_ELT (v, i) = mask;
7546 return gen_rtx_CONST_VECTOR (mode, v);
7549 /* Structure to hold the initial parameters for a compare_and_swap operation
7550 in HImode and QImode. */
7552 struct alignment_context
7554 rtx memsi; /* SI aligned memory location. */
7555 rtx shift; /* Bit offset with regard to lsb. */
7556 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
7557 rtx modemaski; /* ~modemask */
7558 bool aligned; /* True if memory is aligned, false else. */
7561 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
7562 structure AC for transparent simplifying, if the memory alignment is known
7563 to be at least 32bit. MEM is the memory location for the actual operation
7564 and MODE its mode. */
7566 static void
7567 init_alignment_context (struct alignment_context *ac, rtx mem,
7568 machine_mode mode)
7570 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
7571 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
7573 if (ac->aligned)
7574 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
7575 else
7577 /* Alignment is unknown. */
7578 rtx byteoffset, addr, align;
7580 /* Force the address into a register. */
7581 addr = force_reg (Pmode, XEXP (mem, 0));
7583 /* Align it to SImode. */
7584 align = expand_simple_binop (Pmode, AND, addr,
7585 GEN_INT (-GET_MODE_SIZE (SImode)),
7586 NULL_RTX, 1, OPTAB_DIRECT);
7587 /* Generate MEM. */
7588 ac->memsi = gen_rtx_MEM (SImode, align);
7589 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
7590 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
7591 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
7593 /* Calculate shiftcount. */
7594 byteoffset = expand_simple_binop (Pmode, AND, addr,
7595 GEN_INT (GET_MODE_SIZE (SImode) - 1),
7596 NULL_RTX, 1, OPTAB_DIRECT);
7597 /* As we already have some offset, evaluate the remaining distance. */
7598 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
7599 NULL_RTX, 1, OPTAB_DIRECT);
7602 /* Shift is the byte count, but we need the bitcount. */
7603 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
7604 NULL_RTX, 1, OPTAB_DIRECT);
7606 /* Calculate masks. */
7607 ac->modemask = expand_simple_binop (SImode, ASHIFT,
7608 GEN_INT (GET_MODE_MASK (mode)),
7609 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
7610 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
7611 NULL_RTX, 1);
7614 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
7615 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
7616 perform the merge in SEQ2. */
7618 static rtx
7619 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
7620 machine_mode mode, rtx val, rtx ins)
7622 rtx tmp;
7624 if (ac->aligned)
7626 start_sequence ();
7627 tmp = copy_to_mode_reg (SImode, val);
7628 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
7629 const0_rtx, ins))
7631 *seq1 = NULL;
7632 *seq2 = get_insns ();
7633 end_sequence ();
7634 return tmp;
7636 end_sequence ();
7639 /* Failed to use insv. Generate a two part shift and mask. */
7640 start_sequence ();
7641 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
7642 *seq1 = get_insns ();
7643 end_sequence ();
7645 start_sequence ();
7646 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
7647 *seq2 = get_insns ();
7648 end_sequence ();
7650 return tmp;
7653 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
7654 the memory location, CMP the old value to compare MEM with and NEW_RTX the
7655 value to set if CMP == MEM. */
7657 static void
7658 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7659 rtx cmp, rtx new_rtx, bool is_weak)
7661 struct alignment_context ac;
7662 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
7663 rtx res = gen_reg_rtx (SImode);
7664 rtx_code_label *csloop = NULL, *csend = NULL;
7666 gcc_assert (MEM_P (mem));
7668 init_alignment_context (&ac, mem, mode);
7670 /* Load full word. Subsequent loads are performed by CS. */
7671 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
7672 NULL_RTX, 1, OPTAB_DIRECT);
7674 /* Prepare insertions of cmp and new_rtx into the loaded value. When
7675 possible, we try to use insv to make this happen efficiently. If
7676 that fails we'll generate code both inside and outside the loop. */
7677 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
7678 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
7680 if (seq0)
7681 emit_insn (seq0);
7682 if (seq1)
7683 emit_insn (seq1);
7685 /* Start CS loop. */
7686 if (!is_weak)
7688 /* Begin assuming success. */
7689 emit_move_insn (btarget, const1_rtx);
7691 csloop = gen_label_rtx ();
7692 csend = gen_label_rtx ();
7693 emit_label (csloop);
7696 /* val = "<mem>00..0<mem>"
7697 * cmp = "00..0<cmp>00..0"
7698 * new = "00..0<new>00..0"
7701 emit_insn (seq2);
7702 emit_insn (seq3);
7704 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv, CCZ1mode);
7705 if (is_weak)
7706 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
7707 else
7709 rtx tmp;
7711 /* Jump to end if we're done (likely?). */
7712 s390_emit_jump (csend, cc);
7714 /* Check for changes outside mode, and loop internal if so.
7715 Arrange the moves so that the compare is adjacent to the
7716 branch so that we can generate CRJ. */
7717 tmp = copy_to_reg (val);
7718 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
7719 1, OPTAB_DIRECT);
7720 cc = s390_emit_compare (NE, val, tmp);
7721 s390_emit_jump (csloop, cc);
7723 /* Failed. */
7724 emit_move_insn (btarget, const0_rtx);
7725 emit_label (csend);
7728 /* Return the correct part of the bitfield. */
7729 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
7730 NULL_RTX, 1, OPTAB_DIRECT), 1);
7733 /* Variant of s390_expand_cs for SI, DI and TI modes. */
7734 static void
7735 s390_expand_cs_tdsi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7736 rtx cmp, rtx new_rtx, bool is_weak)
7738 rtx output = vtarget;
7739 rtx_code_label *skip_cs_label = NULL;
7740 bool do_const_opt = false;
7742 if (!register_operand (output, mode))
7743 output = gen_reg_rtx (mode);
7745 /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
7746 with the constant first and skip the compare_and_swap because its very
7747 expensive and likely to fail anyway.
7748 Note 1: This is done only for IS_WEAK. C11 allows optimizations that may
7749 cause spurious in that case.
7750 Note 2: It may be useful to do this also for non-constant INPUT.
7751 Note 3: Currently only targets with "load on condition" are supported
7752 (z196 and newer). */
7754 if (TARGET_Z196
7755 && (mode == SImode || mode == DImode))
7756 do_const_opt = (is_weak && CONST_INT_P (cmp));
7758 if (do_const_opt)
7760 rtx cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7762 skip_cs_label = gen_label_rtx ();
7763 emit_move_insn (btarget, const0_rtx);
7764 if (CONST_INT_P (cmp) && INTVAL (cmp) == 0)
7766 rtvec lt = rtvec_alloc (2);
7768 /* Load-and-test + conditional jump. */
7769 RTVEC_ELT (lt, 0)
7770 = gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, mem, cmp));
7771 RTVEC_ELT (lt, 1) = gen_rtx_SET (output, mem);
7772 emit_insn (gen_rtx_PARALLEL (VOIDmode, lt));
7774 else
7776 emit_move_insn (output, mem);
7777 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp)));
7779 s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx));
7780 add_reg_br_prob_note (get_last_insn (),
7781 profile_probability::very_unlikely ());
7782 /* If the jump is not taken, OUTPUT is the expected value. */
7783 cmp = output;
7784 /* Reload newval to a register manually, *after* the compare and jump
7785 above. Otherwise Reload might place it before the jump. */
7787 else
7788 cmp = force_reg (mode, cmp);
7789 new_rtx = force_reg (mode, new_rtx);
7790 s390_emit_compare_and_swap (EQ, output, mem, cmp, new_rtx,
7791 (do_const_opt) ? CCZmode : CCZ1mode);
7792 if (skip_cs_label != NULL)
7793 emit_label (skip_cs_label);
7795 /* We deliberately accept non-register operands in the predicate
7796 to ensure the write back to the output operand happens *before*
7797 the store-flags code below. This makes it easier for combine
7798 to merge the store-flags code with a potential test-and-branch
7799 pattern following (immediately!) afterwards. */
7800 if (output != vtarget)
7801 emit_move_insn (vtarget, output);
7803 if (do_const_opt)
7805 rtx cc, cond, ite;
7807 /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
7808 btarget has already been initialized with 0 above. */
7809 cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7810 cond = gen_rtx_EQ (VOIDmode, cc, const0_rtx);
7811 ite = gen_rtx_IF_THEN_ELSE (SImode, cond, const1_rtx, btarget);
7812 emit_insn (gen_rtx_SET (btarget, ite));
7814 else
7816 rtx cc, cond;
7818 cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
7819 cond = gen_rtx_EQ (SImode, cc, const0_rtx);
7820 emit_insn (gen_cstorecc4 (btarget, cond, cc, const0_rtx));
7824 /* Expand an atomic compare and swap operation. MEM is the memory location,
7825 CMP the old value to compare MEM with and NEW_RTX the value to set if
7826 CMP == MEM. */
7828 void
7829 s390_expand_cs (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7830 rtx cmp, rtx new_rtx, bool is_weak)
7832 switch (mode)
7834 case E_TImode:
7835 case E_DImode:
7836 case E_SImode:
7837 s390_expand_cs_tdsi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7838 break;
7839 case E_HImode:
7840 case E_QImode:
7841 s390_expand_cs_hqi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7842 break;
7843 default:
7844 gcc_unreachable ();
7848 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7849 The memory location MEM is set to INPUT. OUTPUT is set to the previous value
7850 of MEM. */
7852 void
7853 s390_expand_atomic_exchange_tdsi (rtx output, rtx mem, rtx input)
7855 machine_mode mode = GET_MODE (mem);
7856 rtx_code_label *csloop;
7858 if (TARGET_Z196
7859 && (mode == DImode || mode == SImode)
7860 && CONST_INT_P (input) && INTVAL (input) == 0)
7862 emit_move_insn (output, const0_rtx);
7863 if (mode == DImode)
7864 emit_insn (gen_atomic_fetch_anddi (output, mem, const0_rtx, input));
7865 else
7866 emit_insn (gen_atomic_fetch_andsi (output, mem, const0_rtx, input));
7867 return;
7870 input = force_reg (mode, input);
7871 emit_move_insn (output, mem);
7872 csloop = gen_label_rtx ();
7873 emit_label (csloop);
7874 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, output, mem, output,
7875 input, CCZ1mode));
7878 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
7879 and VAL the value to play with. If AFTER is true then store the value
7880 MEM holds after the operation, if AFTER is false then store the value MEM
7881 holds before the operation. If TARGET is zero then discard that value, else
7882 store it to TARGET. */
7884 void
7885 s390_expand_atomic (machine_mode mode, enum rtx_code code,
7886 rtx target, rtx mem, rtx val, bool after)
7888 struct alignment_context ac;
7889 rtx cmp;
7890 rtx new_rtx = gen_reg_rtx (SImode);
7891 rtx orig = gen_reg_rtx (SImode);
7892 rtx_code_label *csloop = gen_label_rtx ();
7894 gcc_assert (!target || register_operand (target, VOIDmode));
7895 gcc_assert (MEM_P (mem));
7897 init_alignment_context (&ac, mem, mode);
7899 /* Shift val to the correct bit positions.
7900 Preserve "icm", but prevent "ex icm". */
7901 if (!(ac.aligned && code == SET && MEM_P (val)))
7902 val = s390_expand_mask_and_shift (val, mode, ac.shift);
7904 /* Further preparation insns. */
7905 if (code == PLUS || code == MINUS)
7906 emit_move_insn (orig, val);
7907 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
7908 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
7909 NULL_RTX, 1, OPTAB_DIRECT);
7911 /* Load full word. Subsequent loads are performed by CS. */
7912 cmp = force_reg (SImode, ac.memsi);
7914 /* Start CS loop. */
7915 emit_label (csloop);
7916 emit_move_insn (new_rtx, cmp);
7918 /* Patch new with val at correct position. */
7919 switch (code)
7921 case PLUS:
7922 case MINUS:
7923 val = expand_simple_binop (SImode, code, new_rtx, orig,
7924 NULL_RTX, 1, OPTAB_DIRECT);
7925 val = expand_simple_binop (SImode, AND, val, ac.modemask,
7926 NULL_RTX, 1, OPTAB_DIRECT);
7927 /* FALLTHRU */
7928 case SET:
7929 if (ac.aligned && MEM_P (val))
7930 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
7931 0, 0, SImode, val, false, false);
7932 else
7934 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
7935 NULL_RTX, 1, OPTAB_DIRECT);
7936 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
7937 NULL_RTX, 1, OPTAB_DIRECT);
7939 break;
7940 case AND:
7941 case IOR:
7942 case XOR:
7943 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
7944 NULL_RTX, 1, OPTAB_DIRECT);
7945 break;
7946 case MULT: /* NAND */
7947 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
7948 NULL_RTX, 1, OPTAB_DIRECT);
7949 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
7950 NULL_RTX, 1, OPTAB_DIRECT);
7951 break;
7952 default:
7953 gcc_unreachable ();
7956 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
7957 ac.memsi, cmp, new_rtx,
7958 CCZ1mode));
7960 /* Return the correct part of the bitfield. */
7961 if (target)
7962 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
7963 after ? new_rtx : cmp, ac.shift,
7964 NULL_RTX, 1, OPTAB_DIRECT), 1);
7967 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7968 We need to emit DTP-relative relocations. */
7970 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7972 static void
7973 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
7975 switch (size)
7977 case 4:
7978 fputs ("\t.long\t", file);
7979 break;
7980 case 8:
7981 fputs ("\t.quad\t", file);
7982 break;
7983 default:
7984 gcc_unreachable ();
7986 output_addr_const (file, x);
7987 fputs ("@DTPOFF", file);
7990 /* Return the proper mode for REGNO being represented in the dwarf
7991 unwind table. */
7992 machine_mode
7993 s390_dwarf_frame_reg_mode (int regno)
7995 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
7997 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
7998 if (GENERAL_REGNO_P (regno))
7999 save_mode = Pmode;
8001 /* The rightmost 64 bits of vector registers are call-clobbered. */
8002 if (GET_MODE_SIZE (save_mode) > 8)
8003 save_mode = DImode;
8005 return save_mode;
8008 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
8009 /* Implement TARGET_MANGLE_TYPE. */
8011 static const char *
8012 s390_mangle_type (const_tree type)
8014 type = TYPE_MAIN_VARIANT (type);
8016 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
8017 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
8018 return NULL;
8020 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
8021 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
8022 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
8023 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
8025 if (type == long_double_type_node && TARGET_LONG_DOUBLE_128)
8026 return "g";
8028 /* For all other types, use normal C++ mangling. */
8029 return NULL;
8031 #endif
8033 /* In the name of slightly smaller debug output, and to cater to
8034 general assembler lossage, recognize various UNSPEC sequences
8035 and turn them back into a direct symbol reference. */
8037 static rtx
8038 s390_delegitimize_address (rtx orig_x)
8040 rtx x, y;
8042 orig_x = delegitimize_mem_from_attrs (orig_x);
8043 x = orig_x;
8045 /* Extract the symbol ref from:
8046 (plus:SI (reg:SI 12 %r12)
8047 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
8048 UNSPEC_GOTOFF/PLTOFF)))
8050 (plus:SI (reg:SI 12 %r12)
8051 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
8052 UNSPEC_GOTOFF/PLTOFF)
8053 (const_int 4 [0x4])))) */
8054 if (GET_CODE (x) == PLUS
8055 && REG_P (XEXP (x, 0))
8056 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
8057 && GET_CODE (XEXP (x, 1)) == CONST)
8059 HOST_WIDE_INT offset = 0;
8061 /* The const operand. */
8062 y = XEXP (XEXP (x, 1), 0);
8064 if (GET_CODE (y) == PLUS
8065 && GET_CODE (XEXP (y, 1)) == CONST_INT)
8067 offset = INTVAL (XEXP (y, 1));
8068 y = XEXP (y, 0);
8071 if (GET_CODE (y) == UNSPEC
8072 && (XINT (y, 1) == UNSPEC_GOTOFF
8073 || XINT (y, 1) == UNSPEC_PLTOFF))
8074 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
8077 if (GET_CODE (x) != MEM)
8078 return orig_x;
8080 x = XEXP (x, 0);
8081 if (GET_CODE (x) == PLUS
8082 && GET_CODE (XEXP (x, 1)) == CONST
8083 && GET_CODE (XEXP (x, 0)) == REG
8084 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8086 y = XEXP (XEXP (x, 1), 0);
8087 if (GET_CODE (y) == UNSPEC
8088 && XINT (y, 1) == UNSPEC_GOT)
8089 y = XVECEXP (y, 0, 0);
8090 else
8091 return orig_x;
8093 else if (GET_CODE (x) == CONST)
8095 /* Extract the symbol ref from:
8096 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
8097 UNSPEC_PLT/GOTENT))) */
8099 y = XEXP (x, 0);
8100 if (GET_CODE (y) == UNSPEC
8101 && (XINT (y, 1) == UNSPEC_GOTENT
8102 || XINT (y, 1) == UNSPEC_PLT31))
8103 y = XVECEXP (y, 0, 0);
8104 else
8105 return orig_x;
8107 else
8108 return orig_x;
8110 if (GET_MODE (orig_x) != Pmode)
8112 if (GET_MODE (orig_x) == BLKmode)
8113 return orig_x;
8114 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
8115 if (y == NULL_RTX)
8116 return orig_x;
8118 return y;
8121 /* Output operand OP to stdio stream FILE.
8122 OP is an address (register + offset) which is not used to address data;
8123 instead the rightmost bits are interpreted as the value. */
8125 static void
8126 print_addrstyle_operand (FILE *file, rtx op)
8128 HOST_WIDE_INT offset;
8129 rtx base;
8131 /* Extract base register and offset. */
8132 if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
8133 gcc_unreachable ();
8135 /* Sanity check. */
8136 if (base)
8138 gcc_assert (GET_CODE (base) == REG);
8139 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
8140 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
8143 /* Offsets are constricted to twelve bits. */
8144 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
8145 if (base)
8146 fprintf (file, "(%s)", reg_names[REGNO (base)]);
8149 /* Print the shift count operand OP to FILE.
8150 OP is an address-style operand in a form which
8151 s390_valid_shift_count permits. Subregs and no-op
8152 and-masking of the operand are stripped. */
8154 static void
8155 print_shift_count_operand (FILE *file, rtx op)
8157 /* No checking of the and mask required here. */
8158 if (!s390_valid_shift_count (op, 0))
8159 gcc_unreachable ();
8161 while (op && GET_CODE (op) == SUBREG)
8162 op = SUBREG_REG (op);
8164 if (GET_CODE (op) == AND)
8165 op = XEXP (op, 0);
8167 print_addrstyle_operand (file, op);
8170 /* Assigns the number of NOP halfwords to be emitted before and after the
8171 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
8172 If hotpatching is disabled for the function, the values are set to zero.
8175 static void
8176 s390_function_num_hotpatch_hw (tree decl,
8177 int *hw_before,
8178 int *hw_after)
8180 tree attr;
8182 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
8184 /* Handle the arguments of the hotpatch attribute. The values
8185 specified via attribute might override the cmdline argument
8186 values. */
8187 if (attr)
8189 tree args = TREE_VALUE (attr);
8191 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
8192 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
8194 else
8196 /* Use the values specified by the cmdline arguments. */
8197 *hw_before = s390_hotpatch_hw_before_label;
8198 *hw_after = s390_hotpatch_hw_after_label;
8202 /* Write the current .machine and .machinemode specification to the assembler
8203 file. */
8205 #ifdef HAVE_AS_MACHINE_MACHINEMODE
8206 static void
8207 s390_asm_output_machine_for_arch (FILE *asm_out_file)
8209 fprintf (asm_out_file, "\t.machinemode %s\n",
8210 (TARGET_ZARCH) ? "zarch" : "esa");
8211 fprintf (asm_out_file, "\t.machine \"%s",
8212 processor_table[s390_arch].binutils_name);
8213 if (S390_USE_ARCHITECTURE_MODIFIERS)
8215 int cpu_flags;
8217 cpu_flags = processor_flags_table[(int) s390_arch];
8218 if (TARGET_HTM && !(cpu_flags & PF_TX))
8219 fprintf (asm_out_file, "+htm");
8220 else if (!TARGET_HTM && (cpu_flags & PF_TX))
8221 fprintf (asm_out_file, "+nohtm");
8222 if (TARGET_VX && !(cpu_flags & PF_VX))
8223 fprintf (asm_out_file, "+vx");
8224 else if (!TARGET_VX && (cpu_flags & PF_VX))
8225 fprintf (asm_out_file, "+novx");
8227 fprintf (asm_out_file, "\"\n");
8230 /* Write an extra function header before the very start of the function. */
8232 void
8233 s390_asm_output_function_prefix (FILE *asm_out_file,
8234 const char *fnname ATTRIBUTE_UNUSED)
8236 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
8237 return;
8238 /* Since only the function specific options are saved but not the indications
8239 which options are set, it's too much work here to figure out which options
8240 have actually changed. Thus, generate .machine and .machinemode whenever a
8241 function has the target attribute or pragma. */
8242 fprintf (asm_out_file, "\t.machinemode push\n");
8243 fprintf (asm_out_file, "\t.machine push\n");
8244 s390_asm_output_machine_for_arch (asm_out_file);
8247 /* Write an extra function footer after the very end of the function. */
8249 void
8250 s390_asm_declare_function_size (FILE *asm_out_file,
8251 const char *fnname, tree decl)
8253 if (!flag_inhibit_size_directive)
8254 ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
8255 if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
8256 return;
8257 fprintf (asm_out_file, "\t.machine pop\n");
8258 fprintf (asm_out_file, "\t.machinemode pop\n");
8260 #endif
8262 /* Write the extra assembler code needed to declare a function properly. */
8264 void
8265 s390_asm_output_function_label (FILE *out_file, const char *fname,
8266 tree decl)
8268 int hw_before, hw_after;
8270 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
8271 if (hw_before > 0)
8273 unsigned int function_alignment;
8274 int i;
8276 /* Add a trampoline code area before the function label and initialize it
8277 with two-byte nop instructions. This area can be overwritten with code
8278 that jumps to a patched version of the function. */
8279 asm_fprintf (out_file, "\tnopr\t%%r0"
8280 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
8281 hw_before);
8282 for (i = 1; i < hw_before; i++)
8283 fputs ("\tnopr\t%r0\n", out_file);
8285 /* Note: The function label must be aligned so that (a) the bytes of the
8286 following nop do not cross a cacheline boundary, and (b) a jump address
8287 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
8288 stored directly before the label without crossing a cacheline
8289 boundary. All this is necessary to make sure the trampoline code can
8290 be changed atomically.
8291 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
8292 if there are NOPs before the function label, the alignment is placed
8293 before them. So it is necessary to duplicate the alignment after the
8294 NOPs. */
8295 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
8296 if (! DECL_USER_ALIGN (decl))
8297 function_alignment
8298 = MAX (function_alignment,
8299 (unsigned int) align_functions.levels[0].get_value ());
8300 fputs ("\t# alignment for hotpatch\n", out_file);
8301 ASM_OUTPUT_ALIGN (out_file, align_functions.levels[0].log);
8304 if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
8306 asm_fprintf (out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
8307 asm_fprintf (out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
8308 asm_fprintf (out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
8309 asm_fprintf (out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
8310 asm_fprintf (out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
8311 asm_fprintf (out_file, "\t# fn:%s wf%d\n", fname,
8312 s390_warn_framesize);
8313 asm_fprintf (out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
8314 asm_fprintf (out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
8315 asm_fprintf (out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
8316 asm_fprintf (out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
8317 asm_fprintf (out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
8318 asm_fprintf (out_file, "\t# fn:%s ps%d\n", fname,
8319 TARGET_PACKED_STACK);
8320 asm_fprintf (out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
8321 asm_fprintf (out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
8322 asm_fprintf (out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
8323 asm_fprintf (out_file, "\t# fn:%s wd%d\n", fname,
8324 s390_warn_dynamicstack_p);
8326 assemble_function_label_raw (out_file, fname);
8327 if (hw_after > 0)
8328 asm_fprintf (out_file,
8329 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
8330 hw_after);
8333 /* Output machine-dependent UNSPECs occurring in address constant X
8334 in assembler syntax to stdio stream FILE. Returns true if the
8335 constant X could be recognized, false otherwise. */
8337 static bool
8338 s390_output_addr_const_extra (FILE *file, rtx x)
8340 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
8341 switch (XINT (x, 1))
8343 case UNSPEC_GOTENT:
8344 output_addr_const (file, XVECEXP (x, 0, 0));
8345 fprintf (file, "@GOTENT");
8346 return true;
8347 case UNSPEC_GOT:
8348 output_addr_const (file, XVECEXP (x, 0, 0));
8349 fprintf (file, "@GOT");
8350 return true;
8351 case UNSPEC_GOTOFF:
8352 output_addr_const (file, XVECEXP (x, 0, 0));
8353 fprintf (file, "@GOTOFF");
8354 return true;
8355 case UNSPEC_PLT31:
8356 output_addr_const (file, XVECEXP (x, 0, 0));
8357 fprintf (file, "@PLT");
8358 return true;
8359 case UNSPEC_PLTOFF:
8360 output_addr_const (file, XVECEXP (x, 0, 0));
8361 fprintf (file, "@PLTOFF");
8362 return true;
8363 case UNSPEC_TLSGD:
8364 output_addr_const (file, XVECEXP (x, 0, 0));
8365 fprintf (file, "@TLSGD");
8366 return true;
8367 case UNSPEC_TLSLDM:
8368 assemble_name (file, get_some_local_dynamic_name ());
8369 fprintf (file, "@TLSLDM");
8370 return true;
8371 case UNSPEC_DTPOFF:
8372 output_addr_const (file, XVECEXP (x, 0, 0));
8373 fprintf (file, "@DTPOFF");
8374 return true;
8375 case UNSPEC_NTPOFF:
8376 output_addr_const (file, XVECEXP (x, 0, 0));
8377 fprintf (file, "@NTPOFF");
8378 return true;
8379 case UNSPEC_GOTNTPOFF:
8380 output_addr_const (file, XVECEXP (x, 0, 0));
8381 fprintf (file, "@GOTNTPOFF");
8382 return true;
8383 case UNSPEC_INDNTPOFF:
8384 output_addr_const (file, XVECEXP (x, 0, 0));
8385 fprintf (file, "@INDNTPOFF");
8386 return true;
8389 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
8390 switch (XINT (x, 1))
8392 case UNSPEC_POOL_OFFSET:
8393 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
8394 output_addr_const (file, x);
8395 return true;
8397 return false;
8400 /* Output address operand ADDR in assembler syntax to
8401 stdio stream FILE. */
8403 void
8404 print_operand_address (FILE *file, rtx addr)
8406 struct s390_address ad;
8407 memset (&ad, 0, sizeof (s390_address));
8409 if (s390_loadrelative_operand_p (addr, NULL, NULL))
8411 if (!TARGET_Z10)
8413 output_operand_lossage ("symbolic memory references are "
8414 "only supported on z10 or later");
8415 return;
8417 output_addr_const (file, addr);
8418 return;
8421 if (!s390_decompose_address (addr, &ad)
8422 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
8423 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
8424 output_operand_lossage ("cannot decompose address");
8426 if (ad.disp)
8427 output_addr_const (file, ad.disp);
8428 else
8429 fprintf (file, "0");
8431 if (ad.base && ad.indx)
8432 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
8433 reg_names[REGNO (ad.base)]);
8434 else if (ad.base)
8435 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
8438 /* Output operand X in assembler syntax to stdio stream FILE.
8439 CODE specified the format flag. The following format flags
8440 are recognized:
8442 'A': On z14 or higher: If operand is a mem print the alignment
8443 hint usable with vl/vst prefixed by a comma.
8444 'C': print opcode suffix for branch condition.
8445 'D': print opcode suffix for inverse branch condition.
8446 'E': print opcode suffix for branch on index instruction.
8447 'G': print the size of the operand in bytes.
8448 'J': print tls_load/tls_gdcall/tls_ldcall suffix
8449 'K': print @PLT suffix for call targets and load address values.
8450 'M': print the second word of a TImode operand.
8451 'N': print the second word of a DImode operand.
8452 'O': print only the displacement of a memory reference or address.
8453 'R': print only the base register of a memory reference or address.
8454 'S': print S-type memory reference (base+displacement).
8455 'Y': print address style operand without index (e.g. shift count or setmem
8456 operand).
8458 'b': print integer X as if it's an unsigned byte.
8459 'c': print integer X as if it's an signed byte.
8460 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
8461 'f': "end" contiguous bitmask X in SImode.
8462 'h': print integer X as if it's a signed halfword.
8463 'i': print the first nonzero HImode part of X.
8464 'j': print the first HImode part unequal to -1 of X.
8465 'k': print the first nonzero SImode part of X.
8466 'm': print the first SImode part unequal to -1 of X.
8467 'o': print integer X as if it's an unsigned 32bit word.
8468 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
8469 't': CONST_INT: "start" of contiguous bitmask X in SImode.
8470 CONST_VECTOR: Generate a bitmask for vgbm instruction.
8471 'x': print integer X as if it's an unsigned halfword.
8472 'v': print register number as vector register (v1 instead of f1).
8473 'V': print the second word of a TFmode operand as vector register.
8476 void
8477 print_operand (FILE *file, rtx x, int code)
8479 HOST_WIDE_INT ival;
8481 switch (code)
8483 case 'A':
8484 if (TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS && MEM_P (x))
8486 if (MEM_ALIGN (x) >= 128)
8487 fprintf (file, ",4");
8488 else if (MEM_ALIGN (x) == 64)
8489 fprintf (file, ",3");
8491 return;
8492 case 'C':
8493 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
8494 return;
8496 case 'D':
8497 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
8498 return;
8500 case 'E':
8501 if (GET_CODE (x) == LE)
8502 fprintf (file, "l");
8503 else if (GET_CODE (x) == GT)
8504 fprintf (file, "h");
8505 else
8506 output_operand_lossage ("invalid comparison operator "
8507 "for 'E' output modifier");
8508 return;
8510 case 'J':
8511 if (GET_CODE (x) == SYMBOL_REF)
8513 fprintf (file, "%s", ":tls_load:");
8514 output_addr_const (file, x);
8516 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
8518 fprintf (file, "%s", ":tls_gdcall:");
8519 output_addr_const (file, XVECEXP (x, 0, 0));
8521 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
8523 fprintf (file, "%s", ":tls_ldcall:");
8524 const char *name = get_some_local_dynamic_name ();
8525 gcc_assert (name);
8526 assemble_name (file, name);
8528 else
8529 output_operand_lossage ("invalid reference for 'J' output modifier");
8530 return;
8532 case 'G':
8533 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
8534 return;
8536 case 'O':
8538 struct s390_address ad;
8539 int ret;
8541 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
8543 if (!ret
8544 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
8545 || ad.indx)
8547 output_operand_lossage ("invalid address for 'O' output modifier");
8548 return;
8551 if (ad.disp)
8552 output_addr_const (file, ad.disp);
8553 else
8554 fprintf (file, "0");
8556 return;
8558 case 'R':
8560 struct s390_address ad;
8561 int ret;
8563 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
8565 if (!ret
8566 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
8567 || ad.indx)
8569 output_operand_lossage ("invalid address for 'R' output modifier");
8570 return;
8573 if (ad.base)
8574 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
8575 else
8576 fprintf (file, "0");
8578 return;
8580 case 'S':
8582 struct s390_address ad;
8583 int ret;
8585 if (!MEM_P (x))
8587 output_operand_lossage ("memory reference expected for "
8588 "'S' output modifier");
8589 return;
8591 ret = s390_decompose_address (XEXP (x, 0), &ad);
8593 if (!ret
8594 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
8595 || ad.indx)
8597 output_operand_lossage ("invalid address for 'S' output modifier");
8598 return;
8601 if (ad.disp)
8602 output_addr_const (file, ad.disp);
8603 else
8604 fprintf (file, "0");
8606 if (ad.base)
8607 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
8609 return;
8611 case 'N':
8612 if (GET_CODE (x) == REG)
8613 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
8614 else if (GET_CODE (x) == MEM)
8615 x = change_address (x, VOIDmode,
8616 plus_constant (Pmode, XEXP (x, 0), 4));
8617 else
8618 output_operand_lossage ("register or memory expression expected "
8619 "for 'N' output modifier");
8620 break;
8622 case 'M':
8623 if (GET_CODE (x) == REG)
8624 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
8625 else if (GET_CODE (x) == MEM)
8626 x = change_address (x, VOIDmode,
8627 plus_constant (Pmode, XEXP (x, 0), 8));
8628 else
8629 output_operand_lossage ("register or memory expression expected "
8630 "for 'M' output modifier");
8631 break;
8633 case 'Y':
8634 print_shift_count_operand (file, x);
8635 return;
8637 case 'K':
8638 /* Append @PLT to both local and non-local symbols in order to support
8639 Linux Kernel livepatching: patches contain individual functions and
8640 are loaded further than 2G away from vmlinux, and therefore they must
8641 call even static functions via PLT. ld will optimize @PLT away for
8642 normal code, and keep it for patches.
8644 Do not indiscriminately add @PLT in 31-bit mode due to the %r12
8645 restriction, use UNSPEC_PLT31 instead.
8647 @PLT only makes sense for functions, data is taken care of by
8648 -mno-pic-data-is-text-relative.
8650 Adding @PLT interferes with handling of weak symbols in non-PIC code,
8651 since their addresses are loaded with larl, which then always produces
8652 a non-NULL result, so skip them here as well. */
8653 if (TARGET_64BIT
8654 && GET_CODE (x) == SYMBOL_REF
8655 && SYMBOL_REF_FUNCTION_P (x)
8656 && !(SYMBOL_REF_WEAK (x) && !flag_pic))
8657 fprintf (file, "@PLT");
8658 return;
8661 switch (GET_CODE (x))
8663 case REG:
8664 /* Print FP regs as fx instead of vx when they are accessed
8665 through non-vector mode. */
8666 if ((code == 'v' || code == 'V')
8667 || VECTOR_NOFP_REG_P (x)
8668 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
8669 || (VECTOR_REG_P (x)
8670 && (GET_MODE_SIZE (GET_MODE (x)) /
8671 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
8672 fprintf (file, "%%v%s", reg_names[REGNO (x) + (code == 'V')] + 2);
8673 else
8674 fprintf (file, "%s", reg_names[REGNO (x)]);
8675 break;
8677 case MEM:
8678 output_address (GET_MODE (x), XEXP (x, 0));
8679 break;
8681 case CONST:
8682 case CODE_LABEL:
8683 case LABEL_REF:
8684 case SYMBOL_REF:
8685 output_addr_const (file, x);
8686 break;
8688 case CONST_INT:
8689 ival = INTVAL (x);
8690 switch (code)
8692 case 0:
8693 break;
8694 case 'b':
8695 ival &= 0xff;
8696 break;
8697 case 'c':
8698 ival = ((ival & 0xff) ^ 0x80) - 0x80;
8699 break;
8700 case 'x':
8701 ival &= 0xffff;
8702 break;
8703 case 'h':
8704 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
8705 break;
8706 case 'i':
8707 ival = s390_extract_part (x, HImode, 0);
8708 break;
8709 case 'j':
8710 ival = s390_extract_part (x, HImode, -1);
8711 break;
8712 case 'k':
8713 ival = s390_extract_part (x, SImode, 0);
8714 break;
8715 case 'm':
8716 ival = s390_extract_part (x, SImode, -1);
8717 break;
8718 case 'o':
8719 ival &= 0xffffffff;
8720 break;
8721 case 'e': case 'f':
8722 case 's': case 't':
8724 int start, end;
8725 int len;
8726 bool ok;
8728 len = (code == 's' || code == 'e' ? 64 : 32);
8729 ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
8730 gcc_assert (ok);
8731 if (code == 's' || code == 't')
8732 ival = start;
8733 else
8734 ival = end;
8736 break;
8737 default:
8738 output_operand_lossage ("invalid constant for output modifier '%c'", code);
8740 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8741 break;
8743 case CONST_WIDE_INT:
8744 if (code == 'b')
8745 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8746 CONST_WIDE_INT_ELT (x, 0) & 0xff);
8747 else if (code == 'x')
8748 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8749 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
8750 else if (code == 'h')
8751 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8752 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
8753 else
8755 if (code == 0)
8756 output_operand_lossage ("invalid constant - try using "
8757 "an output modifier");
8758 else
8759 output_operand_lossage ("invalid constant for output modifier '%c'",
8760 code);
8762 break;
8763 case CONST_VECTOR:
8764 switch (code)
8766 case 'h':
8767 gcc_assert (const_vec_duplicate_p (x));
8768 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8769 ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
8770 break;
8771 case 'e':
8772 case 's':
8774 int start, end;
8775 bool ok;
8777 ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
8778 gcc_assert (ok);
8779 ival = (code == 's') ? start : end;
8780 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8782 break;
8783 case 't':
8785 unsigned mask;
8786 bool ok = s390_bytemask_vector_p (x, &mask);
8787 gcc_assert (ok);
8788 fprintf (file, "%u", mask);
8790 break;
8792 default:
8793 output_operand_lossage ("invalid constant vector for output "
8794 "modifier '%c'", code);
8796 break;
8798 default:
8799 if (code == 0)
8800 output_operand_lossage ("invalid expression - try using "
8801 "an output modifier");
8802 else
8803 output_operand_lossage ("invalid expression for output "
8804 "modifier '%c'", code);
8805 break;
8809 /* Target hook for assembling integer objects. We need to define it
8810 here to work a round a bug in some versions of GAS, which couldn't
8811 handle values smaller than INT_MIN when printed in decimal. */
8813 static bool
8814 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
8816 if (size == 8 && aligned_p
8817 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
8819 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
8820 INTVAL (x));
8821 return true;
8823 return default_assemble_integer (x, size, aligned_p);
8826 /* Returns true if register REGNO is used for forming
8827 a memory address in expression X. */
8829 static bool
8830 reg_used_in_mem_p (int regno, rtx x)
8832 enum rtx_code code = GET_CODE (x);
8833 int i, j;
8834 const char *fmt;
8836 if (code == MEM)
8838 if (refers_to_regno_p (regno, XEXP (x, 0)))
8839 return true;
8841 else if (code == SET
8842 && GET_CODE (SET_DEST (x)) == PC)
8844 if (refers_to_regno_p (regno, SET_SRC (x)))
8845 return true;
8848 fmt = GET_RTX_FORMAT (code);
8849 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8851 if (fmt[i] == 'e'
8852 && reg_used_in_mem_p (regno, XEXP (x, i)))
8853 return true;
8855 else if (fmt[i] == 'E')
8856 for (j = 0; j < XVECLEN (x, i); j++)
8857 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
8858 return true;
8860 return false;
8863 /* Returns true if expression DEP_RTX sets an address register
8864 used by instruction INSN to address memory. */
8866 static bool
8867 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
8869 rtx target, pat;
8871 if (NONJUMP_INSN_P (dep_rtx))
8872 dep_rtx = PATTERN (dep_rtx);
8874 if (GET_CODE (dep_rtx) == SET)
8876 target = SET_DEST (dep_rtx);
8877 if (GET_CODE (target) == STRICT_LOW_PART)
8878 target = XEXP (target, 0);
8879 while (GET_CODE (target) == SUBREG)
8880 target = SUBREG_REG (target);
8882 if (GET_CODE (target) == REG)
8884 int regno = REGNO (target);
8886 if (s390_safe_attr_type (insn) == TYPE_LA)
8888 pat = PATTERN (insn);
8889 if (GET_CODE (pat) == PARALLEL)
8891 gcc_assert (XVECLEN (pat, 0) == 2);
8892 pat = XVECEXP (pat, 0, 0);
8894 gcc_assert (GET_CODE (pat) == SET);
8895 return refers_to_regno_p (regno, SET_SRC (pat));
8897 else if (get_attr_atype (insn) == ATYPE_AGEN)
8898 return reg_used_in_mem_p (regno, PATTERN (insn));
8901 return false;
8904 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
8907 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
8909 rtx dep_rtx = PATTERN (dep_insn);
8910 int i;
8912 if (GET_CODE (dep_rtx) == SET
8913 && addr_generation_dependency_p (dep_rtx, insn))
8914 return 1;
8915 else if (GET_CODE (dep_rtx) == PARALLEL)
8917 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
8919 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
8920 return 1;
8923 return 0;
8927 /* A C statement (sans semicolon) to update the integer scheduling priority
8928 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
8929 reduce the priority to execute INSN later. Do not define this macro if
8930 you do not need to adjust the scheduling priorities of insns.
8932 A STD instruction should be scheduled earlier,
8933 in order to use the bypass. */
8934 static int
8935 s390_adjust_priority (rtx_insn *insn, int priority)
8937 if (! INSN_P (insn))
8938 return priority;
8940 if (s390_tune <= PROCESSOR_2064_Z900)
8941 return priority;
8943 switch (s390_safe_attr_type (insn))
8945 case TYPE_FSTOREDF:
8946 case TYPE_FSTORESF:
8947 priority = priority << 3;
8948 break;
8949 case TYPE_STORE:
8950 case TYPE_STM:
8951 priority = priority << 1;
8952 break;
8953 default:
8954 break;
8956 return priority;
8960 /* The number of instructions that can be issued per cycle. */
8962 static int
8963 s390_issue_rate (void)
8965 switch (s390_tune)
8967 case PROCESSOR_2084_Z990:
8968 case PROCESSOR_2094_Z9_109:
8969 case PROCESSOR_2094_Z9_EC:
8970 case PROCESSOR_2817_Z196:
8971 return 3;
8972 case PROCESSOR_2097_Z10:
8973 return 2;
8974 case PROCESSOR_2064_Z900:
8975 /* Starting with EC12 we use the sched_reorder hook to take care
8976 of instruction dispatch constraints. The algorithm only
8977 picks the best instruction and assumes only a single
8978 instruction gets issued per cycle. */
8979 case PROCESSOR_2827_ZEC12:
8980 case PROCESSOR_2964_Z13:
8981 case PROCESSOR_3906_Z14:
8982 case PROCESSOR_8561_Z15:
8983 case PROCESSOR_3931_Z16:
8984 default:
8985 return 1;
8989 static int
8990 s390_first_cycle_multipass_dfa_lookahead (void)
8992 return 4;
8995 static void
8996 annotate_constant_pool_refs_1 (rtx *x)
8998 int i, j;
8999 const char *fmt;
9001 gcc_assert (GET_CODE (*x) != SYMBOL_REF
9002 || !CONSTANT_POOL_ADDRESS_P (*x));
9004 /* Literal pool references can only occur inside a MEM ... */
9005 if (GET_CODE (*x) == MEM)
9007 rtx memref = XEXP (*x, 0);
9009 if (GET_CODE (memref) == SYMBOL_REF
9010 && CONSTANT_POOL_ADDRESS_P (memref))
9012 rtx base = cfun->machine->base_reg;
9013 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
9014 UNSPEC_LTREF);
9016 *x = replace_equiv_address (*x, addr);
9017 return;
9020 if (GET_CODE (memref) == CONST
9021 && GET_CODE (XEXP (memref, 0)) == PLUS
9022 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
9023 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
9024 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
9026 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
9027 rtx sym = XEXP (XEXP (memref, 0), 0);
9028 rtx base = cfun->machine->base_reg;
9029 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
9030 UNSPEC_LTREF);
9032 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
9033 return;
9037 /* ... or a load-address type pattern. */
9038 if (GET_CODE (*x) == SET)
9040 rtx addrref = SET_SRC (*x);
9042 if (GET_CODE (addrref) == SYMBOL_REF
9043 && CONSTANT_POOL_ADDRESS_P (addrref))
9045 rtx base = cfun->machine->base_reg;
9046 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
9047 UNSPEC_LTREF);
9049 SET_SRC (*x) = addr;
9050 return;
9053 if (GET_CODE (addrref) == CONST
9054 && GET_CODE (XEXP (addrref, 0)) == PLUS
9055 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
9056 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
9057 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
9059 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
9060 rtx sym = XEXP (XEXP (addrref, 0), 0);
9061 rtx base = cfun->machine->base_reg;
9062 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
9063 UNSPEC_LTREF);
9065 SET_SRC (*x) = plus_constant (Pmode, addr, off);
9066 return;
9070 fmt = GET_RTX_FORMAT (GET_CODE (*x));
9071 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
9073 if (fmt[i] == 'e')
9075 annotate_constant_pool_refs_1 (&XEXP (*x, i));
9077 else if (fmt[i] == 'E')
9079 for (j = 0; j < XVECLEN (*x, i); j++)
9080 annotate_constant_pool_refs_1 (&XVECEXP (*x, i, j));
9085 /* Annotate every literal pool reference in INSN by an UNSPEC_LTREF expression.
9086 Fix up MEMs as required.
9087 Skip insns which support relative addressing, because they do not use a base
9088 register. */
9090 static void
9091 annotate_constant_pool_refs (rtx_insn *insn)
9093 if (s390_safe_relative_long_p (insn))
9094 return;
9095 annotate_constant_pool_refs_1 (&PATTERN (insn));
9098 static void
9099 find_constant_pool_ref_1 (rtx x, rtx *ref)
9101 int i, j;
9102 const char *fmt;
9104 /* Likewise POOL_ENTRY insns. */
9105 if (GET_CODE (x) == UNSPEC_VOLATILE
9106 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
9107 return;
9109 gcc_assert (GET_CODE (x) != SYMBOL_REF
9110 || !CONSTANT_POOL_ADDRESS_P (x));
9112 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
9114 rtx sym = XVECEXP (x, 0, 0);
9115 gcc_assert (GET_CODE (sym) == SYMBOL_REF
9116 && CONSTANT_POOL_ADDRESS_P (sym));
9118 if (*ref == NULL_RTX)
9119 *ref = sym;
9120 else
9121 gcc_assert (*ref == sym);
9123 return;
9126 fmt = GET_RTX_FORMAT (GET_CODE (x));
9127 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9129 if (fmt[i] == 'e')
9131 find_constant_pool_ref_1 (XEXP (x, i), ref);
9133 else if (fmt[i] == 'E')
9135 for (j = 0; j < XVECLEN (x, i); j++)
9136 find_constant_pool_ref_1 (XVECEXP (x, i, j), ref);
9141 /* Find an annotated literal pool symbol referenced in INSN,
9142 and store it at REF. Will abort if INSN contains references to
9143 more than one such pool symbol; multiple references to the same
9144 symbol are allowed, however.
9146 The rtx pointed to by REF must be initialized to NULL_RTX
9147 by the caller before calling this routine.
9149 Skip insns which support relative addressing, because they do not use a base
9150 register. */
9152 static void
9153 find_constant_pool_ref (rtx_insn *insn, rtx *ref)
9155 if (s390_safe_relative_long_p (insn))
9156 return;
9157 find_constant_pool_ref_1 (PATTERN (insn), ref);
9160 static void
9161 replace_constant_pool_ref_1 (rtx *x, rtx ref, rtx offset)
9163 int i, j;
9164 const char *fmt;
9166 gcc_assert (*x != ref);
9168 if (GET_CODE (*x) == UNSPEC
9169 && XINT (*x, 1) == UNSPEC_LTREF
9170 && XVECEXP (*x, 0, 0) == ref)
9172 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
9173 return;
9176 if (GET_CODE (*x) == PLUS
9177 && GET_CODE (XEXP (*x, 1)) == CONST_INT
9178 && GET_CODE (XEXP (*x, 0)) == UNSPEC
9179 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
9180 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
9182 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
9183 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
9184 return;
9187 fmt = GET_RTX_FORMAT (GET_CODE (*x));
9188 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
9190 if (fmt[i] == 'e')
9192 replace_constant_pool_ref_1 (&XEXP (*x, i), ref, offset);
9194 else if (fmt[i] == 'E')
9196 for (j = 0; j < XVECLEN (*x, i); j++)
9197 replace_constant_pool_ref_1 (&XVECEXP (*x, i, j), ref, offset);
9202 /* Replace every reference to the annotated literal pool
9203 symbol REF in INSN by its base plus OFFSET.
9204 Skip insns which support relative addressing, because they do not use a base
9205 register. */
9207 static void
9208 replace_constant_pool_ref (rtx_insn *insn, rtx ref, rtx offset)
9210 if (s390_safe_relative_long_p (insn))
9211 return;
9212 replace_constant_pool_ref_1 (&PATTERN (insn), ref, offset);
9215 /* We keep a list of constants which we have to add to internal
9216 constant tables in the middle of large functions. */
9218 static machine_mode constant_modes[] =
9220 TFmode, FPRX2mode, TImode, TDmode,
9221 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
9222 V4SFmode, V2DFmode, V1TFmode,
9223 DFmode, DImode, DDmode,
9224 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
9225 SFmode, SImode, SDmode,
9226 V4QImode, V2HImode, V1SImode, V1SFmode,
9227 HImode,
9228 V2QImode, V1HImode,
9229 QImode,
9230 V1QImode
9232 #define NR_C_MODES (ARRAY_SIZE (constant_modes))
9234 struct constant
9236 struct constant *next;
9237 rtx value;
9238 rtx_code_label *label;
9241 struct constant_pool
9243 struct constant_pool *next;
9244 rtx_insn *first_insn;
9245 rtx_insn *pool_insn;
9246 bitmap insns;
9247 rtx_insn *emit_pool_after;
9249 struct constant *constants[NR_C_MODES];
9250 struct constant *execute;
9251 rtx_code_label *label;
9252 int size;
9255 /* Allocate new constant_pool structure. */
9257 static struct constant_pool *
9258 s390_alloc_pool (void)
9260 struct constant_pool *pool;
9261 size_t i;
9263 pool = (struct constant_pool *) xmalloc (sizeof *pool);
9264 pool->next = NULL;
9265 for (i = 0; i < NR_C_MODES; i++)
9266 pool->constants[i] = NULL;
9268 pool->execute = NULL;
9269 pool->label = gen_label_rtx ();
9270 pool->first_insn = NULL;
9271 pool->pool_insn = NULL;
9272 pool->insns = BITMAP_ALLOC (NULL);
9273 pool->size = 0;
9274 pool->emit_pool_after = NULL;
9276 return pool;
9279 /* Create new constant pool covering instructions starting at INSN
9280 and chain it to the end of POOL_LIST. */
9282 static struct constant_pool *
9283 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
9285 struct constant_pool *pool, **prev;
9287 pool = s390_alloc_pool ();
9288 pool->first_insn = insn;
9290 for (prev = pool_list; *prev; prev = &(*prev)->next)
9292 *prev = pool;
9294 return pool;
9297 /* End range of instructions covered by POOL at INSN and emit
9298 placeholder insn representing the pool. */
9300 static void
9301 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
9303 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
9305 if (!insn)
9306 insn = get_last_insn ();
9308 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
9309 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9312 /* Add INSN to the list of insns covered by POOL. */
9314 static void
9315 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
9317 bitmap_set_bit (pool->insns, INSN_UID (insn));
9320 /* Return pool out of POOL_LIST that covers INSN. */
9322 static struct constant_pool *
9323 s390_find_pool (struct constant_pool *pool_list, rtx insn)
9325 struct constant_pool *pool;
9327 for (pool = pool_list; pool; pool = pool->next)
9328 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
9329 break;
9331 return pool;
9334 /* Add constant VAL of mode MODE to the constant pool POOL. */
9336 static void
9337 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
9339 struct constant *c;
9340 size_t i;
9342 for (i = 0; i < NR_C_MODES; i++)
9343 if (constant_modes[i] == mode)
9344 break;
9345 gcc_assert (i != NR_C_MODES);
9347 for (c = pool->constants[i]; c != NULL; c = c->next)
9348 if (rtx_equal_p (val, c->value))
9349 break;
9351 if (c == NULL)
9353 c = (struct constant *) xmalloc (sizeof *c);
9354 c->value = val;
9355 c->label = gen_label_rtx ();
9356 c->next = pool->constants[i];
9357 pool->constants[i] = c;
9358 pool->size += GET_MODE_SIZE (mode);
9362 /* Return an rtx that represents the offset of X from the start of
9363 pool POOL. */
9365 static rtx
9366 s390_pool_offset (struct constant_pool *pool, rtx x)
9368 rtx label;
9370 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
9371 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
9372 UNSPEC_POOL_OFFSET);
9373 return gen_rtx_CONST (GET_MODE (x), x);
9376 /* Find constant VAL of mode MODE in the constant pool POOL.
9377 Return an RTX describing the distance from the start of
9378 the pool to the location of the new constant. */
9380 static rtx
9381 s390_find_constant (struct constant_pool *pool, rtx val,
9382 machine_mode mode)
9384 struct constant *c;
9385 size_t i;
9387 for (i = 0; i < NR_C_MODES; i++)
9388 if (constant_modes[i] == mode)
9389 break;
9390 gcc_assert (i != NR_C_MODES);
9392 for (c = pool->constants[i]; c != NULL; c = c->next)
9393 if (rtx_equal_p (val, c->value))
9394 break;
9396 gcc_assert (c);
9398 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
9401 /* Check whether INSN is an execute. Return the label_ref to its
9402 execute target template if so, NULL_RTX otherwise. */
9404 static rtx
9405 s390_execute_label (rtx insn)
9407 if (INSN_P (insn)
9408 && GET_CODE (PATTERN (insn)) == PARALLEL
9409 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
9410 && (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE
9411 || XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE_JUMP))
9413 if (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
9414 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
9415 else
9417 gcc_assert (JUMP_P (insn));
9418 /* For jump insns as execute target:
9419 - There is one operand less in the parallel (the
9420 modification register of the execute is always 0).
9421 - The execute target label is wrapped into an
9422 if_then_else in order to hide it from jump analysis. */
9423 return XEXP (XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 0), 0);
9427 return NULL_RTX;
9430 /* Find execute target for INSN in the constant pool POOL.
9431 Return an RTX describing the distance from the start of
9432 the pool to the location of the execute target. */
9434 static rtx
9435 s390_find_execute (struct constant_pool *pool, rtx insn)
9437 struct constant *c;
9439 for (c = pool->execute; c != NULL; c = c->next)
9440 if (INSN_UID (insn) == INSN_UID (c->value))
9441 break;
9443 gcc_assert (c);
9445 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
9448 /* For an execute INSN, extract the execute target template. */
9450 static rtx
9451 s390_execute_target (rtx insn)
9453 rtx pattern = PATTERN (insn);
9454 gcc_assert (s390_execute_label (insn));
9456 if (XVECLEN (pattern, 0) == 2)
9458 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
9460 else
9462 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
9463 int i;
9465 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
9466 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
9468 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
9471 return pattern;
9474 /* Indicate that INSN cannot be duplicated. This is the case for
9475 execute insns that carry a unique label. */
9477 static bool
9478 s390_cannot_copy_insn_p (rtx_insn *insn)
9480 rtx label = s390_execute_label (insn);
9481 return label && label != const0_rtx;
9484 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
9485 do not emit the pool base label. */
9487 static void
9488 s390_dump_pool (struct constant_pool *pool, bool remote_label)
9490 struct constant *c;
9491 rtx_insn *insn = pool->pool_insn;
9492 size_t i;
9494 /* Switch to rodata section. */
9495 insn = emit_insn_after (gen_pool_section_start (), insn);
9496 INSN_ADDRESSES_NEW (insn, -1);
9498 /* Ensure minimum pool alignment. */
9499 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
9500 INSN_ADDRESSES_NEW (insn, -1);
9502 /* Emit pool base label. */
9503 if (!remote_label)
9505 insn = emit_label_after (pool->label, insn);
9506 INSN_ADDRESSES_NEW (insn, -1);
9509 /* Dump constants in descending alignment requirement order,
9510 ensuring proper alignment for every constant. */
9511 for (i = 0; i < NR_C_MODES; i++)
9512 for (c = pool->constants[i]; c; c = c->next)
9514 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
9515 rtx value = copy_rtx (c->value);
9516 if (GET_CODE (value) == CONST
9517 && GET_CODE (XEXP (value, 0)) == UNSPEC
9518 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
9519 && XVECLEN (XEXP (value, 0), 0) == 1)
9520 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
9522 insn = emit_label_after (c->label, insn);
9523 INSN_ADDRESSES_NEW (insn, -1);
9525 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
9526 gen_rtvec (1, value),
9527 UNSPECV_POOL_ENTRY);
9528 insn = emit_insn_after (value, insn);
9529 INSN_ADDRESSES_NEW (insn, -1);
9532 /* Ensure minimum alignment for instructions. */
9533 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
9534 INSN_ADDRESSES_NEW (insn, -1);
9536 /* Output in-pool execute template insns. */
9537 for (c = pool->execute; c; c = c->next)
9539 insn = emit_label_after (c->label, insn);
9540 INSN_ADDRESSES_NEW (insn, -1);
9542 insn = emit_insn_after (s390_execute_target (c->value), insn);
9543 INSN_ADDRESSES_NEW (insn, -1);
9546 /* Switch back to previous section. */
9547 insn = emit_insn_after (gen_pool_section_end (), insn);
9548 INSN_ADDRESSES_NEW (insn, -1);
9550 insn = emit_barrier_after (insn);
9551 INSN_ADDRESSES_NEW (insn, -1);
9553 /* Remove placeholder insn. */
9554 remove_insn (pool->pool_insn);
9557 /* Free all memory used by POOL. */
9559 static void
9560 s390_free_pool (struct constant_pool *pool)
9562 struct constant *c, *next;
9563 size_t i;
9565 for (i = 0; i < NR_C_MODES; i++)
9566 for (c = pool->constants[i]; c; c = next)
9568 next = c->next;
9569 free (c);
9572 for (c = pool->execute; c; c = next)
9574 next = c->next;
9575 free (c);
9578 BITMAP_FREE (pool->insns);
9579 free (pool);
9583 /* Collect main literal pool. Return NULL on overflow. */
9585 static struct constant_pool *
9586 s390_mainpool_start (void)
9588 struct constant_pool *pool;
9589 rtx_insn *insn;
9591 pool = s390_alloc_pool ();
9593 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9595 if (NONJUMP_INSN_P (insn)
9596 && GET_CODE (PATTERN (insn)) == SET
9597 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
9598 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
9600 /* There might be two main_pool instructions if base_reg
9601 is call-clobbered; one for shrink-wrapped code and one
9602 for the rest. We want to keep the first. */
9603 if (pool->pool_insn)
9605 insn = PREV_INSN (insn);
9606 delete_insn (NEXT_INSN (insn));
9607 continue;
9609 pool->pool_insn = insn;
9612 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9614 rtx pool_ref = NULL_RTX;
9615 find_constant_pool_ref (insn, &pool_ref);
9616 if (pool_ref)
9618 rtx constant = get_pool_constant (pool_ref);
9619 machine_mode mode = get_pool_mode (pool_ref);
9620 s390_add_constant (pool, constant, mode);
9624 /* If hot/cold partitioning is enabled we have to make sure that
9625 the literal pool is emitted in the same section where the
9626 initialization of the literal pool base pointer takes place.
9627 emit_pool_after is only used in the non-overflow case on non
9628 Z cpus where we can emit the literal pool at the end of the
9629 function body within the text section. */
9630 if (NOTE_P (insn)
9631 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
9632 && !pool->emit_pool_after)
9633 pool->emit_pool_after = PREV_INSN (insn);
9636 gcc_assert (pool->pool_insn || pool->size == 0);
9638 if (pool->size >= 4096)
9640 /* We're going to chunkify the pool, so remove the main
9641 pool placeholder insn. */
9642 remove_insn (pool->pool_insn);
9644 s390_free_pool (pool);
9645 pool = NULL;
9648 /* If the functions ends with the section where the literal pool
9649 should be emitted set the marker to its end. */
9650 if (pool && !pool->emit_pool_after)
9651 pool->emit_pool_after = get_last_insn ();
9653 return pool;
9656 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9657 Modify the current function to output the pool constants as well as
9658 the pool register setup instruction. */
9660 static void
9661 s390_mainpool_finish (struct constant_pool *pool)
9663 rtx base_reg = cfun->machine->base_reg;
9664 rtx set;
9665 rtx_insn *insn;
9667 /* If the pool is empty, we're done. */
9668 if (pool->size == 0)
9670 /* We don't actually need a base register after all. */
9671 cfun->machine->base_reg = NULL_RTX;
9673 if (pool->pool_insn)
9674 remove_insn (pool->pool_insn);
9675 s390_free_pool (pool);
9676 return;
9679 /* We need correct insn addresses. */
9680 shorten_branches (get_insns ());
9682 /* Use a LARL to load the pool register. The pool is
9683 located in the .rodata section, so we emit it after the function. */
9684 set = gen_main_base_64 (base_reg, pool->label);
9685 insn = emit_insn_after (set, pool->pool_insn);
9686 INSN_ADDRESSES_NEW (insn, -1);
9687 remove_insn (pool->pool_insn);
9689 insn = get_last_insn ();
9690 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9691 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9693 s390_dump_pool (pool, 0);
9695 /* Replace all literal pool references. */
9697 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
9699 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9701 rtx addr, pool_ref = NULL_RTX;
9702 find_constant_pool_ref (insn, &pool_ref);
9703 if (pool_ref)
9705 if (s390_execute_label (insn))
9706 addr = s390_find_execute (pool, insn);
9707 else
9708 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
9709 get_pool_mode (pool_ref));
9711 replace_constant_pool_ref (insn, pool_ref, addr);
9712 INSN_CODE (insn) = -1;
9718 /* Free the pool. */
9719 s390_free_pool (pool);
9722 /* Chunkify the literal pool. */
9724 #define S390_POOL_CHUNK_MIN 0xc00
9725 #define S390_POOL_CHUNK_MAX 0xe00
9727 static struct constant_pool *
9728 s390_chunkify_start (void)
9730 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
9731 bitmap far_labels;
9732 rtx_insn *insn;
9734 /* We need correct insn addresses. */
9736 shorten_branches (get_insns ());
9738 /* Scan all insns and move literals to pool chunks. */
9740 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9742 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9744 rtx pool_ref = NULL_RTX;
9745 find_constant_pool_ref (insn, &pool_ref);
9746 if (pool_ref)
9748 rtx constant = get_pool_constant (pool_ref);
9749 machine_mode mode = get_pool_mode (pool_ref);
9751 if (!curr_pool)
9752 curr_pool = s390_start_pool (&pool_list, insn);
9754 s390_add_constant (curr_pool, constant, mode);
9755 s390_add_pool_insn (curr_pool, insn);
9759 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
9761 if (curr_pool)
9762 s390_add_pool_insn (curr_pool, insn);
9765 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
9766 continue;
9768 if (!curr_pool
9769 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
9770 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
9771 continue;
9773 if (curr_pool->size < S390_POOL_CHUNK_MAX)
9774 continue;
9776 s390_end_pool (curr_pool, NULL);
9777 curr_pool = NULL;
9780 if (curr_pool)
9781 s390_end_pool (curr_pool, NULL);
9783 /* Find all labels that are branched into
9784 from an insn belonging to a different chunk. */
9786 far_labels = BITMAP_ALLOC (NULL);
9788 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9790 rtx_jump_table_data *table;
9792 /* Labels marked with LABEL_PRESERVE_P can be target
9793 of non-local jumps, so we have to mark them.
9794 The same holds for named labels.
9796 Don't do that, however, if it is the label before
9797 a jump table. */
9799 if (LABEL_P (insn)
9800 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
9802 rtx_insn *vec_insn = NEXT_INSN (insn);
9803 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
9804 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
9806 /* Check potential targets in a table jump (casesi_jump). */
9807 else if (tablejump_p (insn, NULL, &table))
9809 rtx vec_pat = PATTERN (table);
9810 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
9812 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
9814 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
9816 if (s390_find_pool (pool_list, label)
9817 != s390_find_pool (pool_list, insn))
9818 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9821 /* If we have a direct jump (conditional or unconditional),
9822 check all potential targets. */
9823 else if (JUMP_P (insn))
9825 rtx pat = PATTERN (insn);
9827 if (GET_CODE (pat) == PARALLEL)
9828 pat = XVECEXP (pat, 0, 0);
9830 if (GET_CODE (pat) == SET)
9832 rtx label = JUMP_LABEL (insn);
9833 if (label && !ANY_RETURN_P (label))
9835 if (s390_find_pool (pool_list, label)
9836 != s390_find_pool (pool_list, insn))
9837 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9843 /* Insert base register reload insns before every pool. */
9845 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9847 rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
9848 curr_pool->label);
9849 rtx_insn *insn = curr_pool->first_insn;
9850 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
9853 /* Insert base register reload insns at every far label. */
9855 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9856 if (LABEL_P (insn)
9857 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
9859 struct constant_pool *pool = s390_find_pool (pool_list, insn);
9860 if (pool)
9862 rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
9863 pool->label);
9864 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9869 BITMAP_FREE (far_labels);
9872 /* Recompute insn addresses. */
9874 init_insn_lengths ();
9875 shorten_branches (get_insns ());
9877 return pool_list;
9880 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9881 After we have decided to use this list, finish implementing
9882 all changes to the current function as required. */
9884 static void
9885 s390_chunkify_finish (struct constant_pool *pool_list)
9887 struct constant_pool *curr_pool = NULL;
9888 rtx_insn *insn;
9891 /* Replace all literal pool references. */
9893 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9895 curr_pool = s390_find_pool (pool_list, insn);
9896 if (!curr_pool)
9897 continue;
9899 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9901 rtx addr, pool_ref = NULL_RTX;
9902 find_constant_pool_ref (insn, &pool_ref);
9903 if (pool_ref)
9905 if (s390_execute_label (insn))
9906 addr = s390_find_execute (curr_pool, insn);
9907 else
9908 addr = s390_find_constant (curr_pool,
9909 get_pool_constant (pool_ref),
9910 get_pool_mode (pool_ref));
9912 replace_constant_pool_ref (insn, pool_ref, addr);
9913 INSN_CODE (insn) = -1;
9918 /* Dump out all literal pools. */
9920 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9921 s390_dump_pool (curr_pool, 0);
9923 /* Free pool list. */
9925 while (pool_list)
9927 struct constant_pool *next = pool_list->next;
9928 s390_free_pool (pool_list);
9929 pool_list = next;
9933 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9935 void
9936 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9938 switch (GET_MODE_CLASS (mode))
9940 case MODE_FLOAT:
9941 case MODE_DECIMAL_FLOAT:
9942 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9944 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp),
9945 as_a <scalar_float_mode> (mode), align);
9946 break;
9948 case MODE_INT:
9949 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9950 mark_symbol_refs_as_used (exp);
9951 break;
9953 case MODE_VECTOR_INT:
9954 case MODE_VECTOR_FLOAT:
9956 int i;
9957 machine_mode inner_mode;
9958 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9960 inner_mode = GET_MODE_INNER (GET_MODE (exp));
9961 for (i = 0; i < XVECLEN (exp, 0); i++)
9962 s390_output_pool_entry (XVECEXP (exp, 0, i),
9963 inner_mode,
9964 i == 0
9965 ? align
9966 : GET_MODE_BITSIZE (inner_mode));
9968 break;
9970 default:
9971 gcc_unreachable ();
9975 /* Return true if MEM refers to an integer constant in the literal pool. If
9976 VAL is not nullptr, then also fill it with the constant's value. */
9978 bool
9979 s390_const_int_pool_entry_p (rtx mem, HOST_WIDE_INT *val)
9981 /* Try to match the following:
9982 - (mem (unspec [(symbol_ref) (reg)] UNSPEC_LTREF)).
9983 - (mem (symbol_ref)). */
9985 if (!MEM_P (mem))
9986 return false;
9988 rtx addr = XEXP (mem, 0);
9989 rtx sym;
9990 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LTREF)
9991 sym = XVECEXP (addr, 0, 0);
9992 else
9993 sym = addr;
9995 if (!SYMBOL_REF_P (sym) || !CONSTANT_POOL_ADDRESS_P (sym))
9996 return false;
9998 rtx val_rtx = get_pool_constant (sym);
9999 if (!CONST_INT_P (val_rtx))
10000 return false;
10002 if (val != nullptr)
10003 *val = INTVAL (val_rtx);
10004 return true;
10007 /* Return an RTL expression representing the value of the return address
10008 for the frame COUNT steps up from the current frame. FRAME is the
10009 frame pointer of that frame. */
10012 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
10014 int offset;
10015 rtx addr;
10017 /* Without backchain, we fail for all but the current frame. */
10019 if (!TARGET_BACKCHAIN && count > 0)
10020 return NULL_RTX;
10022 /* For the current frame, we need to make sure the initial
10023 value of RETURN_REGNUM is actually saved. */
10025 if (count == 0)
10026 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
10028 if (TARGET_PACKED_STACK)
10029 offset = -2 * UNITS_PER_LONG;
10030 else
10031 offset = RETURN_REGNUM * UNITS_PER_LONG;
10033 addr = plus_constant (Pmode, frame, offset);
10034 addr = memory_address (Pmode, addr);
10035 return gen_rtx_MEM (Pmode, addr);
10038 /* Return an RTL expression representing the back chain stored in
10039 the current stack frame. */
10042 s390_back_chain_rtx (void)
10044 rtx chain;
10046 gcc_assert (TARGET_BACKCHAIN);
10048 if (TARGET_PACKED_STACK)
10049 chain = plus_constant (Pmode, stack_pointer_rtx,
10050 STACK_POINTER_OFFSET - UNITS_PER_LONG);
10051 else
10052 chain = stack_pointer_rtx;
10054 chain = gen_rtx_MEM (Pmode, chain);
10055 return chain;
10058 /* Find first call clobbered register unused in a function.
10059 This could be used as base register in a leaf function
10060 or for holding the return address before epilogue. */
10062 static int
10063 find_unused_clobbered_reg (void)
10065 int i;
10066 for (i = 0; i < 6; i++)
10067 if (!df_regs_ever_live_p (i))
10068 return i;
10069 return 0;
10073 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
10074 clobbered hard regs in SETREG. */
10076 static void
10077 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
10079 char *regs_ever_clobbered = (char *)data;
10080 unsigned int i, regno;
10081 machine_mode mode = GET_MODE (setreg);
10083 if (GET_CODE (setreg) == SUBREG)
10085 rtx inner = SUBREG_REG (setreg);
10086 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
10087 return;
10088 regno = subreg_regno (setreg);
10090 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
10091 regno = REGNO (setreg);
10092 else
10093 return;
10095 for (i = regno;
10096 i < end_hard_regno (mode, regno);
10097 i++)
10098 regs_ever_clobbered[i] = 1;
10101 /* Walks through all basic blocks of the current function looking
10102 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
10103 of the passed integer array REGS_EVER_CLOBBERED are set to one for
10104 each of those regs. */
10106 static void
10107 s390_regs_ever_clobbered (char regs_ever_clobbered[])
10109 basic_block cur_bb;
10110 rtx_insn *cur_insn;
10111 unsigned int i;
10113 memset (regs_ever_clobbered, 0, 32);
10115 /* For non-leaf functions we have to consider all call clobbered regs to be
10116 clobbered. */
10117 if (!crtl->is_leaf)
10119 for (i = 0; i < 32; i++)
10120 regs_ever_clobbered[i] = call_used_regs[i];
10123 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
10124 this work is done by liveness analysis (mark_regs_live_at_end).
10125 Special care is needed for functions containing landing pads. Landing pads
10126 may use the eh registers, but the code which sets these registers is not
10127 contained in that function. Hence s390_regs_ever_clobbered is not able to
10128 deal with this automatically. */
10129 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
10130 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
10131 if (crtl->calls_eh_return
10132 || (cfun->machine->has_landing_pad_p
10133 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
10134 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
10136 /* For nonlocal gotos all call-saved registers have to be saved.
10137 This flag is also set for the unwinding code in libgcc.
10138 See expand_builtin_unwind_init. For regs_ever_live this is done by
10139 reload. */
10140 if (crtl->saves_all_registers)
10141 for (i = 0; i < 32; i++)
10142 if (!call_used_regs[i])
10143 regs_ever_clobbered[i] = 1;
10145 FOR_EACH_BB_FN (cur_bb, cfun)
10147 FOR_BB_INSNS (cur_bb, cur_insn)
10149 rtx pat;
10151 if (!INSN_P (cur_insn))
10152 continue;
10154 pat = PATTERN (cur_insn);
10156 /* Ignore GPR restore insns. */
10157 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
10159 if (GET_CODE (pat) == SET
10160 && GENERAL_REG_P (SET_DEST (pat)))
10162 /* lgdr */
10163 if (GET_MODE (SET_SRC (pat)) == DImode
10164 && FP_REG_P (SET_SRC (pat)))
10165 continue;
10167 /* l / lg */
10168 if (GET_CODE (SET_SRC (pat)) == MEM)
10169 continue;
10172 /* lm / lmg */
10173 if (GET_CODE (pat) == PARALLEL
10174 && load_multiple_operation (pat, VOIDmode))
10175 continue;
10178 note_stores (cur_insn,
10179 s390_reg_clobbered_rtx,
10180 regs_ever_clobbered);
10185 /* Determine the frame area which actually has to be accessed
10186 in the function epilogue. The values are stored at the
10187 given pointers AREA_BOTTOM (address of the lowest used stack
10188 address) and AREA_TOP (address of the first item which does
10189 not belong to the stack frame). */
10191 static void
10192 s390_frame_area (int *area_bottom, int *area_top)
10194 int b, t;
10196 b = INT_MAX;
10197 t = INT_MIN;
10199 if (cfun_frame_layout.first_restore_gpr != -1)
10201 b = (cfun_frame_layout.gprs_offset
10202 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
10203 t = b + (cfun_frame_layout.last_restore_gpr
10204 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
10207 if (TARGET_64BIT && cfun_save_high_fprs_p)
10209 b = MIN (b, cfun_frame_layout.f8_offset);
10210 t = MAX (t, (cfun_frame_layout.f8_offset
10211 + cfun_frame_layout.high_fprs * 8));
10214 if (!TARGET_64BIT)
10216 if (cfun_fpr_save_p (FPR4_REGNUM))
10218 b = MIN (b, cfun_frame_layout.f4_offset);
10219 t = MAX (t, cfun_frame_layout.f4_offset + 8);
10221 if (cfun_fpr_save_p (FPR6_REGNUM))
10223 b = MIN (b, cfun_frame_layout.f4_offset + 8);
10224 t = MAX (t, cfun_frame_layout.f4_offset + 16);
10227 *area_bottom = b;
10228 *area_top = t;
10230 /* Update gpr_save_slots in the frame layout trying to make use of
10231 FPRs as GPR save slots.
10232 This is a helper routine of s390_register_info. */
10234 static void
10235 s390_register_info_gprtofpr ()
10237 int save_reg_slot = FPR0_REGNUM;
10238 int i, j;
10240 if (TARGET_TPF || !TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10241 return;
10243 /* builtin_eh_return needs to be able to modify the return address
10244 on the stack. It could also adjust the FPR save slot instead but
10245 is it worth the trouble?! */
10246 if (crtl->calls_eh_return)
10247 return;
10249 for (i = 15; i >= 6; i--)
10251 if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
10252 continue;
10254 /* Advance to the next FP register which can be used as a
10255 GPR save slot. */
10256 while ((!call_used_regs[save_reg_slot]
10257 || df_regs_ever_live_p (save_reg_slot)
10258 || cfun_fpr_save_p (save_reg_slot))
10259 && FP_REGNO_P (save_reg_slot))
10260 save_reg_slot++;
10261 if (!FP_REGNO_P (save_reg_slot))
10263 /* We only want to use ldgr/lgdr if we can get rid of
10264 stm/lm entirely. So undo the gpr slot allocation in
10265 case we ran out of FPR save slots. */
10266 for (j = 6; j <= 15; j++)
10267 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
10268 cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
10269 break;
10271 cfun_gpr_save_slot (i) = save_reg_slot++;
10275 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
10276 stdarg or -mpreserve-args.
10277 This is a helper routine for s390_register_info. */
10278 static void
10279 s390_register_info_arg_fpr ()
10281 int i;
10282 int min_stdarg_fpr = INT_MAX, max_stdarg_fpr = -1;
10283 int min_preserve_fpr = INT_MAX, max_preserve_fpr = -1;
10284 int min_fpr, max_fpr;
10286 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
10287 f0-f4 for 64 bit. */
10288 if (cfun->stdarg
10289 && TARGET_HARD_FLOAT
10290 && cfun->va_list_fpr_size
10291 && crtl->args.info.fprs < FP_ARG_NUM_REG)
10293 min_stdarg_fpr = crtl->args.info.fprs;
10294 max_stdarg_fpr = min_stdarg_fpr + cfun->va_list_fpr_size - 1;
10295 if (max_stdarg_fpr >= FP_ARG_NUM_REG)
10296 max_stdarg_fpr = FP_ARG_NUM_REG - 1;
10298 /* FPR argument regs start at f0. */
10299 min_stdarg_fpr += FPR0_REGNUM;
10300 max_stdarg_fpr += FPR0_REGNUM;
10303 if (s390_preserve_args_p && crtl->args.info.fprs)
10305 min_preserve_fpr = FPR0_REGNUM;
10306 max_preserve_fpr = MIN (FPR0_REGNUM + FP_ARG_NUM_REG - 1,
10307 FPR0_REGNUM + crtl->args.info.fprs - 1);
10310 min_fpr = MIN (min_stdarg_fpr, min_preserve_fpr);
10311 max_fpr = MAX (max_stdarg_fpr, max_preserve_fpr);
10313 if (max_fpr == -1)
10314 return;
10316 for (i = min_fpr; i <= max_fpr; i++)
10317 cfun_set_fpr_save (i);
10321 /* Reserve the GPR save slots for GPRs which need to be saved due to
10322 stdarg or -mpreserve-args.
10323 This is a helper routine for s390_register_info. */
10325 static void
10326 s390_register_info_arg_gpr ()
10328 int i;
10329 int min_stdarg_gpr = INT_MAX, max_stdarg_gpr = -1;
10330 int min_preserve_gpr = INT_MAX, max_preserve_gpr = -1;
10331 int min_gpr, max_gpr;
10333 if (cfun->stdarg
10334 && cfun->va_list_gpr_size
10335 && crtl->args.info.gprs < GP_ARG_NUM_REG)
10337 min_stdarg_gpr = crtl->args.info.gprs;
10338 max_stdarg_gpr = min_stdarg_gpr + cfun->va_list_gpr_size - 1;
10339 if (max_stdarg_gpr >= GP_ARG_NUM_REG)
10340 max_stdarg_gpr = GP_ARG_NUM_REG - 1;
10342 /* GPR argument regs start at r2. */
10343 min_stdarg_gpr += GPR2_REGNUM;
10344 max_stdarg_gpr += GPR2_REGNUM;
10347 if (s390_preserve_args_p && crtl->args.info.gprs)
10349 min_preserve_gpr = GPR2_REGNUM;
10350 max_preserve_gpr = MIN (GPR6_REGNUM,
10351 GPR2_REGNUM + crtl->args.info.gprs - 1);
10354 min_gpr = MIN (min_stdarg_gpr, min_preserve_gpr);
10355 max_gpr = MAX (max_stdarg_gpr, max_preserve_gpr);
10357 if (max_gpr == -1)
10358 return;
10360 /* If r6 was supposed to be saved into an FPR and now needs to go to
10361 the stack for vararg we have to adjust the restore range to make
10362 sure that the restore is done from stack as well. */
10363 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
10364 && min_gpr <= GPR6_REGNUM
10365 && max_gpr >= GPR6_REGNUM)
10367 if (cfun_frame_layout.first_restore_gpr == -1
10368 || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
10369 cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
10370 if (cfun_frame_layout.last_restore_gpr == -1
10371 || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
10372 cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
10375 if (cfun_frame_layout.first_save_gpr == -1
10376 || cfun_frame_layout.first_save_gpr > min_gpr)
10377 cfun_frame_layout.first_save_gpr = min_gpr;
10379 if (cfun_frame_layout.last_save_gpr == -1
10380 || cfun_frame_layout.last_save_gpr < max_gpr)
10381 cfun_frame_layout.last_save_gpr = max_gpr;
10383 for (i = min_gpr; i <= max_gpr; i++)
10384 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
10387 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
10388 prologue and epilogue. */
10390 static void
10391 s390_register_info_set_ranges ()
10393 int i, j;
10395 /* Find the first and the last save slot supposed to use the stack
10396 to set the restore range.
10397 Vararg regs might be marked as save to stack but only the
10398 call-saved regs really need restoring (i.e. r6). This code
10399 assumes that the vararg regs have not yet been recorded in
10400 cfun_gpr_save_slot. */
10401 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
10402 for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
10403 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
10404 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
10405 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
10406 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
10409 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
10410 for registers which need to be saved in function prologue.
10411 This function can be used until the insns emitted for save/restore
10412 of the regs are visible in the RTL stream. */
10414 static void
10415 s390_register_info ()
10417 int i;
10418 char clobbered_regs[32];
10420 gcc_assert (!epilogue_completed);
10422 if (reload_completed)
10423 /* After reload we rely on our own routine to determine which
10424 registers need saving. */
10425 s390_regs_ever_clobbered (clobbered_regs);
10426 else
10427 /* During reload we use regs_ever_live as a base since reload
10428 does changes in there which we otherwise would not be aware
10429 of. */
10430 for (i = 0; i < 32; i++)
10431 clobbered_regs[i] = df_regs_ever_live_p (i);
10433 for (i = 0; i < 32; i++)
10434 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
10436 /* Mark the call-saved FPRs which need to be saved.
10437 This needs to be done before checking the special GPRs since the
10438 stack pointer usage depends on whether high FPRs have to be saved
10439 or not. */
10440 cfun_frame_layout.fpr_bitmap = 0;
10441 cfun_frame_layout.high_fprs = 0;
10442 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
10443 if (clobbered_regs[i] && !call_used_regs[i])
10445 cfun_set_fpr_save (i);
10446 if (i >= FPR8_REGNUM)
10447 cfun_frame_layout.high_fprs++;
10450 /* Register 12 is used for GOT address, but also as temp in prologue
10451 for split-stack stdarg functions (unless r14 is available). */
10452 clobbered_regs[12]
10453 |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
10454 || (flag_split_stack && cfun->stdarg
10455 && (crtl->is_leaf || TARGET_TPF_PROFILING
10456 || has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
10458 clobbered_regs[BASE_REGNUM]
10459 |= (cfun->machine->base_reg
10460 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
10462 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
10463 |= !!frame_pointer_needed;
10465 /* On pre z900 machines this might take until machine dependent
10466 reorg to decide.
10467 save_return_addr_p will only be set on non-zarch machines so
10468 there is no risk that r14 goes into an FPR instead of a stack
10469 slot. */
10470 clobbered_regs[RETURN_REGNUM]
10471 |= (!crtl->is_leaf
10472 || TARGET_TPF_PROFILING
10473 || cfun_frame_layout.save_return_addr_p
10474 || crtl->calls_eh_return);
10476 clobbered_regs[STACK_POINTER_REGNUM]
10477 |= (!crtl->is_leaf
10478 || TARGET_TPF_PROFILING
10479 || cfun_save_high_fprs_p
10480 || get_frame_size () > 0
10481 || (reload_completed && cfun_frame_layout.frame_size > 0)
10482 || cfun->calls_alloca);
10484 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
10486 for (i = 0; i < 16; i++)
10487 if (clobbered_regs[i] && !call_used_regs[i])
10488 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
10490 s390_register_info_arg_fpr ();
10491 s390_register_info_gprtofpr ();
10492 s390_register_info_set_ranges ();
10494 /* Forcing argument registers to be saved on the stack might
10495 override the GPR->FPR save decision for r6 so this must come
10496 last. */
10497 s390_register_info_arg_gpr ();
10500 /* Return true if REGNO is a global register, but not one
10501 of the special ones that need to be saved/restored in anyway. */
10503 static inline bool
10504 global_not_special_regno_p (int regno)
10506 return (global_regs[regno]
10507 /* These registers are special and need to be
10508 restored in any case. */
10509 && !(regno == STACK_POINTER_REGNUM
10510 || regno == RETURN_REGNUM
10511 || regno == BASE_REGNUM
10512 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
10515 /* This function is called by s390_optimize_prologue in order to get
10516 rid of unnecessary GPR save/restore instructions. The register info
10517 for the GPRs is re-computed and the ranges are re-calculated. */
10519 static void
10520 s390_optimize_register_info ()
10522 char clobbered_regs[32];
10523 int i;
10525 gcc_assert (epilogue_completed);
10527 s390_regs_ever_clobbered (clobbered_regs);
10529 /* Global registers do not need to be saved and restored unless it
10530 is one of our special regs. (r12, r13, r14, or r15). */
10531 for (i = 0; i < 32; i++)
10532 clobbered_regs[i] = clobbered_regs[i] && !global_not_special_regno_p (i);
10534 /* There is still special treatment needed for cases invisible to
10535 s390_regs_ever_clobbered. */
10536 clobbered_regs[RETURN_REGNUM]
10537 |= (TARGET_TPF_PROFILING
10538 /* When expanding builtin_return_addr in ESA mode we do not
10539 know whether r14 will later be needed as scratch reg when
10540 doing branch splitting. So the builtin always accesses the
10541 r14 save slot and we need to stick to the save/restore
10542 decision for r14 even if it turns out that it didn't get
10543 clobbered. */
10544 || cfun_frame_layout.save_return_addr_p
10545 || crtl->calls_eh_return);
10547 for (i = 0; i < 16; i++)
10548 if (!clobbered_regs[i] || call_used_regs[i])
10549 cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
10551 s390_register_info_set_ranges ();
10552 s390_register_info_arg_gpr ();
10555 /* Fill cfun->machine with info about frame of current function. */
10557 static void
10558 s390_frame_info (void)
10560 HOST_WIDE_INT lowest_offset;
10562 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
10563 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
10565 /* The va_arg builtin uses a constant distance of 16 *
10566 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
10567 pointer. So even if we are going to save the stack pointer in an
10568 FPR we need the stack space in order to keep the offsets
10569 correct. */
10570 if (cfun->stdarg && cfun_save_arg_fprs_p)
10572 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
10574 if (cfun_frame_layout.first_save_gpr_slot == -1)
10575 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
10578 cfun_frame_layout.frame_size = get_frame_size ();
10579 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
10580 fatal_error (input_location,
10581 "total size of local variables exceeds architecture limit");
10583 if (!TARGET_PACKED_STACK)
10585 /* Fixed stack layout. */
10586 cfun_frame_layout.backchain_offset = 0;
10587 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
10588 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
10589 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
10590 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
10591 * UNITS_PER_LONG);
10593 else if (TARGET_BACKCHAIN)
10595 /* Kernel stack layout - packed stack, backchain, no float */
10596 gcc_assert (TARGET_SOFT_FLOAT);
10597 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
10598 - UNITS_PER_LONG);
10600 /* The distance between the backchain and the return address
10601 save slot must not change. So we always need a slot for the
10602 stack pointer which resides in between. */
10603 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
10605 cfun_frame_layout.gprs_offset
10606 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
10608 /* FPRs will not be saved. Nevertheless pick sane values to
10609 keep area calculations valid. */
10610 cfun_frame_layout.f0_offset =
10611 cfun_frame_layout.f4_offset =
10612 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
10614 else
10616 int num_fprs;
10618 /* Packed stack layout without backchain. */
10620 /* With stdarg FPRs need their dedicated slots. */
10621 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
10622 : (cfun_fpr_save_p (FPR4_REGNUM) +
10623 cfun_fpr_save_p (FPR6_REGNUM)));
10624 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
10626 num_fprs = (cfun->stdarg ? 2
10627 : (cfun_fpr_save_p (FPR0_REGNUM)
10628 + cfun_fpr_save_p (FPR2_REGNUM)));
10629 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
10631 cfun_frame_layout.gprs_offset
10632 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
10634 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
10635 - cfun_frame_layout.high_fprs * 8);
10638 if (cfun_save_high_fprs_p)
10639 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
10641 if (!crtl->is_leaf)
10642 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
10644 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
10645 sized area at the bottom of the stack. This is required also for
10646 leaf functions. When GCC generates a local stack reference it
10647 will always add STACK_POINTER_OFFSET to all these references. */
10648 if (crtl->is_leaf
10649 && !TARGET_TPF_PROFILING
10650 && cfun_frame_layout.frame_size == 0
10651 && !cfun->calls_alloca)
10652 return;
10654 /* Calculate the number of bytes we have used in our own register
10655 save area. With the packed stack layout we can re-use the
10656 remaining bytes for normal stack elements. */
10658 if (TARGET_PACKED_STACK)
10659 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
10660 cfun_frame_layout.f4_offset),
10661 cfun_frame_layout.gprs_offset);
10662 else
10663 lowest_offset = 0;
10665 if (TARGET_BACKCHAIN)
10666 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
10668 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
10670 /* If under 31 bit an odd number of gprs has to be saved we have to
10671 adjust the frame size to sustain 8 byte alignment of stack
10672 frames. */
10673 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
10674 STACK_BOUNDARY / BITS_PER_UNIT - 1)
10675 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
10678 /* Generate frame layout. Fills in register and frame data for the current
10679 function in cfun->machine. This routine can be called multiple times;
10680 it will re-do the complete frame layout every time. */
10682 static void
10683 s390_init_frame_layout (void)
10685 HOST_WIDE_INT frame_size;
10686 int base_used;
10688 /* After LRA the frame layout is supposed to be read-only and should
10689 not be re-computed. */
10690 if (reload_completed)
10691 return;
10695 frame_size = cfun_frame_layout.frame_size;
10697 /* Try to predict whether we'll need the base register. */
10698 base_used = crtl->uses_const_pool
10699 || (!DISP_IN_RANGE (frame_size)
10700 && !CONST_OK_FOR_K (frame_size));
10702 /* Decide which register to use as literal pool base. In small
10703 leaf functions, try to use an unused call-clobbered register
10704 as base register to avoid save/restore overhead. */
10705 if (!base_used)
10706 cfun->machine->base_reg = NULL_RTX;
10707 else
10709 int br = 0;
10711 if (crtl->is_leaf)
10712 /* Prefer r5 (most likely to be free). */
10713 for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
10715 cfun->machine->base_reg =
10716 gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
10719 s390_register_info ();
10720 s390_frame_info ();
10722 while (frame_size != cfun_frame_layout.frame_size);
10725 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
10726 the TX is nonescaping. A transaction is considered escaping if
10727 there is at least one path from tbegin returning CC0 to the
10728 function exit block without an tend.
10730 The check so far has some limitations:
10731 - only single tbegin/tend BBs are supported
10732 - the first cond jump after tbegin must separate the CC0 path from ~CC0
10733 - when CC is copied to a GPR and the CC0 check is done with the GPR
10734 this is not supported
10737 static void
10738 s390_optimize_nonescaping_tx (void)
10740 const unsigned int CC0 = 1 << 3;
10741 basic_block tbegin_bb = NULL;
10742 basic_block tend_bb = NULL;
10743 basic_block bb;
10744 rtx_insn *insn;
10745 bool result = true;
10746 int bb_index;
10747 rtx_insn *tbegin_insn = NULL;
10749 if (!cfun->machine->tbegin_p)
10750 return;
10752 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
10754 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
10756 if (!bb)
10757 continue;
10759 FOR_BB_INSNS (bb, insn)
10761 rtx ite, cc, pat, target;
10762 unsigned HOST_WIDE_INT mask;
10764 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
10765 continue;
10767 pat = PATTERN (insn);
10769 if (GET_CODE (pat) == PARALLEL)
10770 pat = XVECEXP (pat, 0, 0);
10772 if (GET_CODE (pat) != SET
10773 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
10774 continue;
10776 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
10778 rtx_insn *tmp;
10780 tbegin_insn = insn;
10782 /* Just return if the tbegin doesn't have clobbers. */
10783 if (GET_CODE (PATTERN (insn)) != PARALLEL)
10784 return;
10786 if (tbegin_bb != NULL)
10787 return;
10789 /* Find the next conditional jump. */
10790 for (tmp = NEXT_INSN (insn);
10791 tmp != NULL_RTX;
10792 tmp = NEXT_INSN (tmp))
10794 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
10795 return;
10796 if (!JUMP_P (tmp))
10797 continue;
10799 ite = SET_SRC (PATTERN (tmp));
10800 if (GET_CODE (ite) != IF_THEN_ELSE)
10801 continue;
10803 cc = XEXP (XEXP (ite, 0), 0);
10804 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
10805 || GET_MODE (cc) != CCRAWmode
10806 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
10807 return;
10809 if (bb->succs->length () != 2)
10810 return;
10812 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
10813 if (GET_CODE (XEXP (ite, 0)) == NE)
10814 mask ^= 0xf;
10816 if (mask == CC0)
10817 target = XEXP (ite, 1);
10818 else if (mask == (CC0 ^ 0xf))
10819 target = XEXP (ite, 2);
10820 else
10821 return;
10824 edge_iterator ei;
10825 edge e1, e2;
10827 ei = ei_start (bb->succs);
10828 e1 = ei_safe_edge (ei);
10829 ei_next (&ei);
10830 e2 = ei_safe_edge (ei);
10832 if (e2->flags & EDGE_FALLTHRU)
10834 e2 = e1;
10835 e1 = ei_safe_edge (ei);
10838 if (!(e1->flags & EDGE_FALLTHRU))
10839 return;
10841 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
10843 if (tmp == BB_END (bb))
10844 break;
10848 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
10850 if (tend_bb != NULL)
10851 return;
10852 tend_bb = bb;
10857 /* Either we successfully remove the FPR clobbers here or we are not
10858 able to do anything for this TX. Both cases don't qualify for
10859 another look. */
10860 cfun->machine->tbegin_p = false;
10862 if (tbegin_bb == NULL || tend_bb == NULL)
10863 return;
10865 calculate_dominance_info (CDI_POST_DOMINATORS);
10866 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
10867 free_dominance_info (CDI_POST_DOMINATORS);
10869 if (!result)
10870 return;
10872 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10873 gen_rtvec (2,
10874 XVECEXP (PATTERN (tbegin_insn), 0, 0),
10875 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10876 INSN_CODE (tbegin_insn) = -1;
10877 df_insn_rescan (tbegin_insn);
10879 return;
10882 /* Implement TARGET_HARD_REGNO_NREGS. Because all registers in a class
10883 have the same size, this is equivalent to CLASS_MAX_NREGS. */
10885 static unsigned int
10886 s390_hard_regno_nregs (unsigned int regno, machine_mode mode)
10888 return s390_class_max_nregs (REGNO_REG_CLASS (regno), mode);
10891 /* Implement TARGET_HARD_REGNO_MODE_OK.
10893 Integer modes <= word size fit into any GPR.
10894 Integer modes > word size fit into successive GPRs, starting with
10895 an even-numbered register.
10896 SImode and DImode fit into FPRs as well.
10898 Floating point modes <= word size fit into any FPR or GPR.
10899 Floating point modes > word size (i.e. DFmode on 32-bit) fit
10900 into any FPR, or an even-odd GPR pair.
10901 TFmode fits only into an even-odd FPR pair.
10903 Complex floating point modes fit either into two FPRs, or into
10904 successive GPRs (again starting with an even number).
10905 TCmode fits only into two successive even-odd FPR pairs.
10907 Condition code modes fit only into the CC register. */
10909 static bool
10910 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10912 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10913 return false;
10915 switch (REGNO_REG_CLASS (regno))
10917 case VEC_REGS:
10918 return ((GET_MODE_CLASS (mode) == MODE_INT
10919 && s390_class_max_nregs (VEC_REGS, mode) == 1)
10920 || mode == DFmode
10921 || (TARGET_VXE && mode == SFmode)
10922 || s390_vector_mode_supported_p (mode));
10923 break;
10924 case FP_REGS:
10925 if (TARGET_VX
10926 && ((GET_MODE_CLASS (mode) == MODE_INT
10927 && s390_class_max_nregs (FP_REGS, mode) == 1)
10928 || mode == DFmode
10929 || s390_vector_mode_supported_p (mode)))
10930 return true;
10932 if (REGNO_PAIR_OK (regno, mode))
10934 if (mode == SImode || mode == DImode)
10935 return true;
10937 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10938 return true;
10940 break;
10941 case ADDR_REGS:
10942 if (FRAME_REGNO_P (regno) && mode == Pmode)
10943 return true;
10945 /* fallthrough */
10946 case GENERAL_REGS:
10947 if (REGNO_PAIR_OK (regno, mode))
10949 if (TARGET_ZARCH
10950 || (mode != TFmode && mode != TCmode && mode != TDmode))
10951 return true;
10953 break;
10954 case CC_REGS:
10955 if (GET_MODE_CLASS (mode) == MODE_CC)
10956 return true;
10957 break;
10958 case ACCESS_REGS:
10959 if (REGNO_PAIR_OK (regno, mode))
10961 if (mode == SImode || mode == Pmode)
10962 return true;
10964 break;
10965 default:
10966 return false;
10969 return false;
10972 /* Implement TARGET_MODES_TIEABLE_P. */
10974 static bool
10975 s390_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10977 return ((mode1 == SFmode || mode1 == DFmode)
10978 == (mode2 == SFmode || mode2 == DFmode));
10981 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10983 bool
10984 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10986 /* Once we've decided upon a register to use as base register, it must
10987 no longer be used for any other purpose. */
10988 if (cfun->machine->base_reg)
10989 if (REGNO (cfun->machine->base_reg) == old_reg
10990 || REGNO (cfun->machine->base_reg) == new_reg)
10991 return false;
10993 /* Prevent regrename from using call-saved regs which haven't
10994 actually been saved. This is necessary since regrename assumes
10995 the backend save/restore decisions are based on
10996 df_regs_ever_live. Since we have our own routine we have to tell
10997 regrename manually about it. */
10998 if (GENERAL_REGNO_P (new_reg)
10999 && !call_used_regs[new_reg]
11000 && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
11001 return false;
11003 return true;
11006 /* Return nonzero if register REGNO can be used as a scratch register
11007 in peephole2. */
11009 static bool
11010 s390_hard_regno_scratch_ok (unsigned int regno)
11012 /* See s390_hard_regno_rename_ok. */
11013 if (GENERAL_REGNO_P (regno)
11014 && !call_used_regs[regno]
11015 && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
11016 return false;
11018 return true;
11021 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. When generating
11022 code that runs in z/Architecture mode, but conforms to the 31-bit
11023 ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
11024 bytes are saved across calls, however. */
11026 static bool
11027 s390_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
11028 machine_mode mode)
11030 /* For r12 we know that the only bits we actually care about are
11031 preserved across function calls. Since r12 is a fixed reg all
11032 accesses to r12 are generated by the backend.
11034 This workaround is necessary until gcse implements proper
11035 tracking of partially clobbered registers. */
11036 if (!TARGET_64BIT
11037 && TARGET_ZARCH
11038 && GET_MODE_SIZE (mode) > 4
11039 && (!flag_pic || regno != PIC_OFFSET_TABLE_REGNUM)
11040 && ((regno >= 6 && regno <= 15) || regno == 32))
11041 return true;
11043 if (TARGET_VX
11044 && GET_MODE_SIZE (mode) > 8
11045 && (((TARGET_64BIT && regno >= 24 && regno <= 31))
11046 || (!TARGET_64BIT && (regno == 18 || regno == 19))))
11047 return true;
11049 return false;
11052 /* Maximum number of registers to represent a value of mode MODE
11053 in a register of class RCLASS. */
11056 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
11058 int reg_size;
11059 bool reg_pair_required_p = false;
11061 switch (rclass)
11063 case FP_REGS:
11064 case VEC_REGS:
11065 reg_size = TARGET_VX ? 16 : 8;
11067 /* TF and TD modes would fit into a VR but we put them into a
11068 register pair since we do not have 128bit FP instructions on
11069 full VRs. */
11070 if (TARGET_VX
11071 && SCALAR_FLOAT_MODE_P (mode)
11072 && GET_MODE_SIZE (mode) >= 16
11073 && !(TARGET_VXE && mode == TFmode))
11074 reg_pair_required_p = true;
11076 /* Even if complex types would fit into a single FPR/VR we force
11077 them into a register pair to deal with the parts more easily.
11078 (FIXME: What about complex ints?) */
11079 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
11080 reg_pair_required_p = true;
11081 break;
11082 case ACCESS_REGS:
11083 reg_size = 4;
11084 break;
11085 default:
11086 reg_size = UNITS_PER_WORD;
11087 break;
11090 if (reg_pair_required_p)
11091 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
11093 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
11096 /* Return nonzero if mode M describes a 128-bit float in a floating point
11097 register pair. */
11099 static bool
11100 s390_is_fpr128 (machine_mode m)
11102 return m == FPRX2mode || (!TARGET_VXE && m == TFmode);
11105 /* Return nonzero if mode M describes a 128-bit float in a vector
11106 register. */
11108 static bool
11109 s390_is_vr128 (machine_mode m)
11111 return m == V1TFmode || (TARGET_VXE && m == TFmode);
11114 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
11116 static bool
11117 s390_can_change_mode_class (machine_mode from_mode,
11118 machine_mode to_mode,
11119 reg_class_t rclass)
11121 machine_mode small_mode;
11122 machine_mode big_mode;
11124 /* 128-bit values have different representations in floating point and
11125 vector registers. */
11126 if (reg_classes_intersect_p (VEC_REGS, rclass)
11127 && ((s390_is_fpr128 (from_mode) && s390_is_vr128 (to_mode))
11128 || (s390_is_vr128 (from_mode) && s390_is_fpr128 (to_mode))))
11129 return false;
11131 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
11132 return true;
11134 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
11136 small_mode = from_mode;
11137 big_mode = to_mode;
11139 else
11141 small_mode = to_mode;
11142 big_mode = from_mode;
11145 /* Values residing in VRs are little-endian style. All modes are
11146 placed left-aligned in an VR. This means that we cannot allow
11147 switching between modes with differing sizes. Also if the vector
11148 facility is available we still place TFmode values in VR register
11149 pairs, since the only instructions we have operating on TFmodes
11150 only deal with register pairs. Therefore we have to allow DFmode
11151 subregs of TFmodes to enable the TFmode splitters. */
11152 if (reg_classes_intersect_p (VEC_REGS, rclass)
11153 && (GET_MODE_SIZE (small_mode) < 8
11154 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
11155 return false;
11157 /* Likewise for access registers, since they have only half the
11158 word size on 64-bit. */
11159 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
11160 return false;
11162 return true;
11165 /* Return true if we use LRA instead of reload pass. */
11166 static bool
11167 s390_lra_p (void)
11169 return s390_lra_flag;
11172 /* Return true if register FROM can be eliminated via register TO. */
11174 static bool
11175 s390_can_eliminate (const int from, const int to)
11177 /* We have not marked the base register as fixed.
11178 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
11179 If a function requires the base register, we say here that this
11180 elimination cannot be performed. This will cause reload to free
11181 up the base register (as if it were fixed). On the other hand,
11182 if the current function does *not* require the base register, we
11183 say here the elimination succeeds, which in turn allows reload
11184 to allocate the base register for any other purpose. */
11185 if (from == BASE_REGNUM && to == BASE_REGNUM)
11187 s390_init_frame_layout ();
11188 return cfun->machine->base_reg == NULL_RTX;
11191 /* Everything else must point into the stack frame. */
11192 gcc_assert (to == STACK_POINTER_REGNUM
11193 || to == HARD_FRAME_POINTER_REGNUM);
11195 gcc_assert (from == FRAME_POINTER_REGNUM
11196 || from == ARG_POINTER_REGNUM
11197 || from == RETURN_ADDRESS_POINTER_REGNUM);
11199 /* Make sure we actually saved the return address. */
11200 if (from == RETURN_ADDRESS_POINTER_REGNUM)
11201 if (!crtl->calls_eh_return
11202 && !cfun->stdarg
11203 && !cfun_frame_layout.save_return_addr_p)
11204 return false;
11206 return true;
11209 /* Return offset between register FROM and TO initially after prolog. */
11211 HOST_WIDE_INT
11212 s390_initial_elimination_offset (int from, int to)
11214 HOST_WIDE_INT offset;
11216 /* ??? Why are we called for non-eliminable pairs? */
11217 if (!s390_can_eliminate (from, to))
11218 return 0;
11220 switch (from)
11222 case FRAME_POINTER_REGNUM:
11223 offset = (get_frame_size()
11224 + STACK_POINTER_OFFSET
11225 + crtl->outgoing_args_size);
11226 break;
11228 case ARG_POINTER_REGNUM:
11229 s390_init_frame_layout ();
11230 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
11231 break;
11233 case RETURN_ADDRESS_POINTER_REGNUM:
11234 s390_init_frame_layout ();
11236 if (cfun_frame_layout.first_save_gpr_slot == -1)
11238 /* If it turns out that for stdarg nothing went into the reg
11239 save area we also do not need the return address
11240 pointer. */
11241 if (cfun->stdarg && !cfun_save_arg_fprs_p)
11242 return 0;
11244 gcc_unreachable ();
11247 /* In order to make the following work it is not necessary for
11248 r14 to have a save slot. It is sufficient if one other GPR
11249 got one. Since the GPRs are always stored without gaps we
11250 are able to calculate where the r14 save slot would
11251 reside. */
11252 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
11253 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
11254 UNITS_PER_LONG);
11255 break;
11257 case BASE_REGNUM:
11258 offset = 0;
11259 break;
11261 default:
11262 gcc_unreachable ();
11265 return offset;
11268 /* Emit insn to save fpr REGNUM at offset OFFSET relative
11269 to register BASE. Return generated insn. */
11271 static rtx
11272 save_fpr (rtx base, int offset, int regnum)
11274 rtx addr;
11275 rtx insn;
11277 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
11279 if (regnum >= FPR0_REGNUM && regnum <= (FPR0_REGNUM + FP_ARG_NUM_REG))
11280 set_mem_alias_set (addr, get_varargs_alias_set ());
11281 else
11282 set_mem_alias_set (addr, get_frame_alias_set ());
11284 insn = emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
11286 if (!call_used_regs[regnum] || s390_preserve_fpr_arg_p (regnum))
11287 RTX_FRAME_RELATED_P (insn) = 1;
11289 if (s390_preserve_fpr_arg_p (regnum) && !cfun_fpr_save_p (regnum))
11291 rtx reg = gen_rtx_REG (DFmode, regnum);
11292 add_reg_note (insn, REG_CFA_NO_RESTORE, reg);
11293 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (addr, reg));
11296 return insn;
11299 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
11300 to register BASE. Return generated insn. */
11302 static rtx
11303 restore_fpr (rtx base, int offset, int regnum)
11305 rtx addr;
11306 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
11307 set_mem_alias_set (addr, get_frame_alias_set ());
11309 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
11312 /* Generate insn to save registers FIRST to LAST into
11313 the register save area located at offset OFFSET
11314 relative to register BASE. */
11316 static void
11317 save_gprs (rtx base, int offset, int first, int last, rtx_insn *before = NULL)
11319 rtx addr, insn, note;
11320 rtx_insn *out_insn;
11321 int i;
11323 addr = plus_constant (Pmode, base, offset);
11324 addr = gen_frame_mem (Pmode, addr);
11326 /* Special-case single register. */
11327 if (first == last)
11329 if (TARGET_64BIT)
11330 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
11331 else
11332 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
11334 if (!global_not_special_regno_p (first))
11335 RTX_FRAME_RELATED_P (insn) = 1;
11337 if (s390_preserve_gpr_arg_p (first) && !s390_restore_gpr_p (first))
11339 rtx reg = gen_rtx_REG (Pmode, first);
11340 add_reg_note (insn, REG_CFA_NO_RESTORE, reg);
11341 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (addr, reg));
11344 goto emit;
11348 insn = gen_store_multiple (addr,
11349 gen_rtx_REG (Pmode, first),
11350 GEN_INT (last - first + 1));
11352 if (first <= 6 && cfun->stdarg)
11353 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
11355 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
11357 if (first + i <= 6)
11358 set_mem_alias_set (mem, get_varargs_alias_set ());
11361 /* We need to set the FRAME_RELATED flag on all SETs
11362 inside the store-multiple pattern.
11364 However, we must not emit DWARF records for registers 2..5
11365 if they are stored for use by variable arguments ...
11367 ??? Unfortunately, it is not enough to simply not the
11368 FRAME_RELATED flags for those SETs, because the first SET
11369 of the PARALLEL is always treated as if it had the flag
11370 set, even if it does not. Therefore we emit a new pattern
11371 without those registers as REG_FRAME_RELATED_EXPR note. */
11373 /* In these cases all of the sets are marked as frame related:
11374 1. call-save GPR saved and restored
11375 2. argument GPR saved because of -mpreserve-args */
11376 if ((first >= GPR6_REGNUM && !global_not_special_regno_p (first))
11377 || s390_preserve_gpr_arg_in_range_p (first, last))
11380 rtx pat = PATTERN (insn);
11382 for (i = 0; i < XVECLEN (pat, 0); i++)
11383 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
11384 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
11385 0, i)))))
11386 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
11388 RTX_FRAME_RELATED_P (insn) = 1;
11390 /* For the -mpreserve-args register saves no restore operations
11391 will be emitted. CFI checking would complain about this. We
11392 manually generate the REG_CFA notes here to be able to mark
11393 those operations with REG_CFA_NO_RESTORE. */
11394 if (s390_preserve_gpr_arg_in_range_p (first, last))
11396 for (int regno = first; regno <= last; regno++)
11398 rtx reg = gen_rtx_REG (Pmode, regno);
11399 rtx reg_addr = plus_constant (Pmode, base,
11400 offset + (regno - first) * UNITS_PER_LONG);
11401 if (!s390_restore_gpr_p (regno))
11402 add_reg_note (insn, REG_CFA_NO_RESTORE, reg);
11403 add_reg_note (insn, REG_CFA_OFFSET,
11404 gen_rtx_SET (gen_frame_mem (Pmode, reg_addr), reg));
11408 else if (last >= 6)
11410 int start;
11412 for (start = first >= 6 ? first : 6; start <= last; start++)
11413 if (!global_not_special_regno_p (start))
11414 break;
11416 if (start > last)
11417 goto emit;
11419 addr = plus_constant (Pmode, base,
11420 offset + (start - first) * UNITS_PER_LONG);
11422 if (start == last)
11424 if (TARGET_64BIT)
11425 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
11426 gen_rtx_REG (Pmode, start));
11427 else
11428 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
11429 gen_rtx_REG (Pmode, start));
11430 note = PATTERN (note);
11432 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
11433 RTX_FRAME_RELATED_P (insn) = 1;
11435 goto emit;
11438 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
11439 gen_rtx_REG (Pmode, start),
11440 GEN_INT (last - start + 1));
11441 note = PATTERN (note);
11443 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
11445 for (i = 0; i < XVECLEN (note, 0); i++)
11446 if (GET_CODE (XVECEXP (note, 0, i)) == SET
11447 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
11448 0, i)))))
11449 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
11451 RTX_FRAME_RELATED_P (insn) = 1;
11454 emit:
11455 if (before != NULL_RTX)
11456 out_insn = emit_insn_before (insn, before);
11457 else
11458 out_insn = emit_insn (insn);
11459 INSN_ADDRESSES_NEW (out_insn, -1);
11463 /* Generate insn to restore registers FIRST to LAST from
11464 the register save area located at offset OFFSET
11465 relative to register BASE. */
11467 static rtx
11468 restore_gprs (rtx base, int offset, int first, int last)
11470 rtx addr, insn;
11472 addr = plus_constant (Pmode, base, offset);
11473 addr = gen_frame_mem (Pmode, addr);
11475 /* Special-case single register. */
11476 if (first == last)
11478 if (TARGET_64BIT)
11479 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
11480 else
11481 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
11483 RTX_FRAME_RELATED_P (insn) = 1;
11484 return insn;
11487 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
11488 addr,
11489 GEN_INT (last - first + 1));
11490 RTX_FRAME_RELATED_P (insn) = 1;
11491 return insn;
11494 /* Return insn sequence to load the GOT register. */
11496 rtx_insn *
11497 s390_load_got (void)
11499 rtx_insn *insns;
11501 /* We cannot use pic_offset_table_rtx here since we use this
11502 function also for non-pic if __tls_get_offset is called and in
11503 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
11504 aren't usable. */
11505 rtx got_rtx = gen_rtx_REG (Pmode, 12);
11507 start_sequence ();
11509 emit_move_insn (got_rtx, s390_got_symbol ());
11511 insns = get_insns ();
11512 end_sequence ();
11513 return insns;
11516 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
11517 and the change to the stack pointer. */
11519 static void
11520 s390_emit_stack_tie (void)
11522 rtx mem = gen_frame_mem (BLKmode, stack_pointer_rtx);
11523 if (frame_pointer_needed)
11524 emit_insn (gen_stack_tie (Pmode, mem, hard_frame_pointer_rtx));
11525 else
11526 emit_insn (gen_stack_tie (Pmode, mem, stack_pointer_rtx));
11529 /* Copy GPRS into FPR save slots. */
11531 static void
11532 s390_save_gprs_to_fprs (void)
11534 int i;
11536 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
11537 return;
11539 for (i = 6; i < 16; i++)
11541 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
11543 rtx_insn *insn =
11544 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
11545 gen_rtx_REG (DImode, i));
11546 RTX_FRAME_RELATED_P (insn) = 1;
11547 /* This prevents dwarf2cfi from interpreting the set. Doing
11548 so it might emit def_cfa_register infos setting an FPR as
11549 new CFA. */
11550 add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
11555 /* Restore GPRs from FPR save slots. */
11557 static void
11558 s390_restore_gprs_from_fprs (void)
11560 int i;
11562 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
11563 return;
11565 /* Restore the GPRs starting with the stack pointer. That way the
11566 stack pointer already has its original value when it comes to
11567 restoring the hard frame pointer. So we can set the cfa reg back
11568 to the stack pointer. */
11569 for (i = STACK_POINTER_REGNUM; i >= 6; i--)
11571 rtx_insn *insn;
11573 if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
11574 continue;
11576 rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
11578 if (i == STACK_POINTER_REGNUM)
11579 insn = emit_insn (gen_stack_restore_from_fpr (fpr));
11580 else
11581 insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
11583 df_set_regs_ever_live (i, true);
11584 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
11586 /* If either the stack pointer or the frame pointer get restored
11587 set the CFA value to its value at function start. Doing this
11588 for the frame pointer results in .cfi_def_cfa_register 15
11589 what is ok since if the stack pointer got modified it has
11590 been restored already. */
11591 if (i == STACK_POINTER_REGNUM || i == HARD_FRAME_POINTER_REGNUM)
11592 add_reg_note (insn, REG_CFA_DEF_CFA,
11593 plus_constant (Pmode, stack_pointer_rtx,
11594 STACK_POINTER_OFFSET));
11595 RTX_FRAME_RELATED_P (insn) = 1;
11600 /* A pass run immediately before shrink-wrapping and prologue and epilogue
11601 generation. */
11603 namespace {
11605 const pass_data pass_data_s390_early_mach =
11607 RTL_PASS, /* type */
11608 "early_mach", /* name */
11609 OPTGROUP_NONE, /* optinfo_flags */
11610 TV_MACH_DEP, /* tv_id */
11611 0, /* properties_required */
11612 0, /* properties_provided */
11613 0, /* properties_destroyed */
11614 0, /* todo_flags_start */
11615 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
11618 class pass_s390_early_mach : public rtl_opt_pass
11620 public:
11621 pass_s390_early_mach (gcc::context *ctxt)
11622 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
11625 /* opt_pass methods: */
11626 virtual unsigned int execute (function *);
11628 }; // class pass_s390_early_mach
11630 unsigned int
11631 pass_s390_early_mach::execute (function *fun)
11633 rtx_insn *insn;
11635 /* Try to get rid of the FPR clobbers. */
11636 s390_optimize_nonescaping_tx ();
11638 /* Re-compute register info. */
11639 s390_register_info ();
11641 /* If we're using a base register, ensure that it is always valid for
11642 the first non-prologue instruction. */
11643 if (fun->machine->base_reg)
11644 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
11646 /* Annotate all constant pool references to let the scheduler know
11647 they implicitly use the base register. */
11648 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11649 if (INSN_P (insn))
11651 annotate_constant_pool_refs (insn);
11652 df_insn_rescan (insn);
11654 return 0;
11657 } // anon namespace
11659 rtl_opt_pass *
11660 make_pass_s390_early_mach (gcc::context *ctxt)
11662 return new pass_s390_early_mach (ctxt);
11665 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
11666 - push too big immediates to the literal pool and annotate the refs
11667 - emit frame related notes for stack pointer changes. */
11669 static rtx
11670 s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool frame_related_p)
11672 rtx_insn *insn;
11673 rtx orig_offset = offset;
11675 gcc_assert (REG_P (target));
11676 gcc_assert (REG_P (reg));
11677 gcc_assert (CONST_INT_P (offset));
11679 if (offset == const0_rtx) /* lr/lgr */
11681 insn = emit_move_insn (target, reg);
11683 else if (DISP_IN_RANGE (INTVAL (offset))) /* la */
11685 insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg,
11686 offset));
11688 else
11690 if (!satisfies_constraint_K (offset) /* ahi/aghi */
11691 && (!TARGET_EXTIMM
11692 || (!satisfies_constraint_Op (offset) /* alfi/algfi */
11693 && !satisfies_constraint_On (offset)))) /* slfi/slgfi */
11694 offset = force_const_mem (Pmode, offset);
11696 if (target != reg)
11698 insn = emit_move_insn (target, reg);
11699 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
11702 insn = emit_insn (gen_add2_insn (target, offset));
11704 if (!CONST_INT_P (offset))
11706 annotate_constant_pool_refs (insn);
11708 if (frame_related_p)
11709 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11710 gen_rtx_SET (target,
11711 gen_rtx_PLUS (Pmode, target,
11712 orig_offset)));
11716 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
11718 /* If this is a stack adjustment and we are generating a stack clash
11719 prologue, then add a REG_STACK_CHECK note to signal that this insn
11720 should be left alone. */
11721 if (flag_stack_clash_protection && target == stack_pointer_rtx)
11722 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
11724 return insn;
11727 /* Emit a compare instruction with a volatile memory access as stack
11728 probe. It does not waste store tags and does not clobber any
11729 registers apart from the condition code. */
11730 static void
11731 s390_emit_stack_probe (rtx addr)
11733 rtx mem = gen_rtx_MEM (word_mode, addr);
11734 MEM_VOLATILE_P (mem) = 1;
11735 emit_insn (gen_probe_stack (mem));
11738 /* Use a runtime loop if we have to emit more probes than this. */
11739 #define MIN_UNROLL_PROBES 3
11741 /* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
11742 if necessary. LAST_PROBE_OFFSET contains the offset of the closest
11743 probe relative to the stack pointer.
11745 Note that SIZE is negative.
11747 The return value is true if TEMP_REG has been clobbered. */
11748 static bool
11749 allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
11750 rtx temp_reg)
11752 bool temp_reg_clobbered_p = false;
11753 HOST_WIDE_INT probe_interval
11754 = 1 << param_stack_clash_protection_probe_interval;
11755 HOST_WIDE_INT guard_size
11756 = 1 << param_stack_clash_protection_guard_size;
11758 if (flag_stack_clash_protection)
11760 if (last_probe_offset + -INTVAL (size) < guard_size)
11761 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
11762 else
11764 rtx offset = GEN_INT (probe_interval - UNITS_PER_LONG);
11765 HOST_WIDE_INT rounded_size = -INTVAL (size) & -probe_interval;
11766 HOST_WIDE_INT num_probes = rounded_size / probe_interval;
11767 HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
11769 if (num_probes < MIN_UNROLL_PROBES)
11771 /* Emit unrolled probe statements. */
11773 for (unsigned int i = 0; i < num_probes; i++)
11775 s390_prologue_plus_offset (stack_pointer_rtx,
11776 stack_pointer_rtx,
11777 GEN_INT (-probe_interval), true);
11778 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11779 stack_pointer_rtx,
11780 offset));
11782 if (num_probes > 0)
11783 last_probe_offset = INTVAL (offset);
11784 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
11786 else
11788 /* Emit a loop probing the pages. */
11790 rtx_code_label *loop_start_label = gen_label_rtx ();
11792 /* From now on temp_reg will be the CFA register. */
11793 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11794 GEN_INT (-rounded_size), true);
11795 emit_label (loop_start_label);
11797 s390_prologue_plus_offset (stack_pointer_rtx,
11798 stack_pointer_rtx,
11799 GEN_INT (-probe_interval), false);
11800 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11801 stack_pointer_rtx,
11802 offset));
11803 emit_cmp_and_jump_insns (stack_pointer_rtx, temp_reg,
11804 GT, NULL_RTX,
11805 Pmode, 1, loop_start_label);
11807 /* Without this make_edges ICEes. */
11808 JUMP_LABEL (get_last_insn ()) = loop_start_label;
11809 LABEL_NUSES (loop_start_label) = 1;
11811 /* That's going to be a NOP since stack pointer and
11812 temp_reg are supposed to be the same here. We just
11813 emit it to set the CFA reg back to r15. */
11814 s390_prologue_plus_offset (stack_pointer_rtx, temp_reg,
11815 const0_rtx, true);
11816 temp_reg_clobbered_p = true;
11817 last_probe_offset = INTVAL (offset);
11818 dump_stack_clash_frame_info (PROBE_LOOP, residual != 0);
11821 /* Handle any residual allocation request. */
11822 s390_prologue_plus_offset (stack_pointer_rtx,
11823 stack_pointer_rtx,
11824 GEN_INT (-residual), true);
11825 last_probe_offset += residual;
11826 if (last_probe_offset >= probe_interval)
11827 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11828 stack_pointer_rtx,
11829 GEN_INT (residual
11830 - UNITS_PER_LONG)));
11832 return temp_reg_clobbered_p;
11836 /* Subtract frame size from stack pointer. */
11837 s390_prologue_plus_offset (stack_pointer_rtx,
11838 stack_pointer_rtx,
11839 size, true);
11841 return temp_reg_clobbered_p;
11844 /* Expand the prologue into a bunch of separate insns. */
11846 void
11847 s390_emit_prologue (void)
11849 rtx insn, addr;
11850 rtx temp_reg;
11851 int i;
11852 int offset;
11853 int next_fpr = 0;
11855 /* Choose best register to use for temp use within prologue.
11856 TPF with profiling must avoid the register 14 - the tracing function
11857 needs the original contents of r14 to be preserved. */
11859 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
11860 && !crtl->is_leaf
11861 && !TARGET_TPF_PROFILING)
11862 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11863 else if (flag_split_stack && cfun->stdarg)
11864 temp_reg = gen_rtx_REG (Pmode, 12);
11865 else
11866 temp_reg = gen_rtx_REG (Pmode, 1);
11868 /* When probing for stack-clash mitigation, we have to track the distance
11869 between the stack pointer and closest known reference.
11871 Most of the time we have to make a worst case assumption. The
11872 only exception is when TARGET_BACKCHAIN is active, in which case
11873 we know *sp (offset 0) was written. */
11874 HOST_WIDE_INT probe_interval
11875 = 1 << param_stack_clash_protection_probe_interval;
11876 HOST_WIDE_INT last_probe_offset
11877 = (TARGET_BACKCHAIN
11878 ? (TARGET_PACKED_STACK ? STACK_POINTER_OFFSET - UNITS_PER_LONG : 0)
11879 : probe_interval - (STACK_BOUNDARY / UNITS_PER_WORD));
11881 s390_save_gprs_to_fprs ();
11883 /* Save call saved gprs. */
11884 if (cfun_frame_layout.first_save_gpr != -1)
11886 save_gprs (stack_pointer_rtx,
11887 cfun_frame_layout.gprs_offset +
11888 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11889 - cfun_frame_layout.first_save_gpr_slot),
11890 cfun_frame_layout.first_save_gpr,
11891 cfun_frame_layout.last_save_gpr);
11893 /* This is not 100% correct. If we have more than one register saved,
11894 then LAST_PROBE_OFFSET can move even closer to sp. */
11895 last_probe_offset
11896 = (cfun_frame_layout.gprs_offset +
11897 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11898 - cfun_frame_layout.first_save_gpr_slot));
11901 /* Dummy insn to mark literal pool slot. */
11903 if (cfun->machine->base_reg)
11904 emit_insn (gen_main_pool (cfun->machine->base_reg));
11906 offset = cfun_frame_layout.f0_offset;
11908 /* Save f0 and f2. */
11909 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
11911 if (cfun_fpr_save_p (i))
11913 save_fpr (stack_pointer_rtx, offset, i);
11914 if (offset < last_probe_offset)
11915 last_probe_offset = offset;
11916 offset += 8;
11918 else if (!TARGET_PACKED_STACK || cfun->stdarg)
11919 offset += 8;
11922 /* Save f4 and f6. */
11923 offset = cfun_frame_layout.f4_offset;
11924 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11926 if (cfun_fpr_save_p (i))
11928 save_fpr (stack_pointer_rtx, offset, i);
11929 if (offset < last_probe_offset)
11930 last_probe_offset = offset;
11931 offset += 8;
11933 else if (!TARGET_PACKED_STACK || call_used_regs[i])
11934 offset += 8;
11937 if (TARGET_PACKED_STACK
11938 && cfun_save_high_fprs_p
11939 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
11941 offset = (cfun_frame_layout.f8_offset
11942 + (cfun_frame_layout.high_fprs - 1) * 8);
11944 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
11945 if (cfun_fpr_save_p (i))
11947 save_fpr (stack_pointer_rtx, offset, i);
11948 if (offset < last_probe_offset)
11949 last_probe_offset = offset;
11951 offset -= 8;
11953 if (offset >= cfun_frame_layout.f8_offset)
11954 next_fpr = i;
11957 if (!TARGET_PACKED_STACK)
11958 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
11960 if (flag_stack_usage_info)
11961 current_function_static_stack_size = cfun_frame_layout.frame_size;
11963 /* Decrement stack pointer. */
11965 if (cfun_frame_layout.frame_size > 0)
11967 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
11968 rtx_insn *stack_pointer_backup_loc;
11969 bool temp_reg_clobbered_p;
11971 if (s390_stack_size)
11973 HOST_WIDE_INT stack_guard;
11975 if (s390_stack_guard)
11976 stack_guard = s390_stack_guard;
11977 else
11979 /* If no value for stack guard is provided the smallest power of 2
11980 larger than the current frame size is chosen. */
11981 stack_guard = 1;
11982 while (stack_guard < cfun_frame_layout.frame_size)
11983 stack_guard <<= 1;
11986 if (cfun_frame_layout.frame_size >= s390_stack_size)
11988 warning (0, "frame size of function %qs is %wd"
11989 " bytes exceeding user provided stack limit of "
11990 "%d bytes; "
11991 "an unconditional trap is added",
11992 current_function_name(), cfun_frame_layout.frame_size,
11993 s390_stack_size);
11994 emit_insn (gen_trap ());
11995 emit_barrier ();
11997 else
11999 /* stack_guard has to be smaller than s390_stack_size.
12000 Otherwise we would emit an AND with zero which would
12001 not match the test under mask pattern. */
12002 if (stack_guard >= s390_stack_size)
12004 warning (0, "frame size of function %qs is %wd"
12005 " bytes which is more than half the stack size; "
12006 "the dynamic check would not be reliable; "
12007 "no check emitted for this function",
12008 current_function_name(),
12009 cfun_frame_layout.frame_size);
12011 else
12013 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
12014 & ~(stack_guard - 1));
12016 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
12017 GEN_INT (stack_check_mask));
12018 if (TARGET_64BIT)
12019 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
12020 t, const0_rtx),
12021 t, const0_rtx, const0_rtx));
12022 else
12023 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
12024 t, const0_rtx),
12025 t, const0_rtx, const0_rtx));
12030 if (s390_warn_framesize > 0
12031 && cfun_frame_layout.frame_size >= s390_warn_framesize)
12032 warning (0, "frame size of %qs is %wd bytes",
12033 current_function_name (), cfun_frame_layout.frame_size);
12035 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
12036 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
12038 /* Save the location where we could backup the incoming stack
12039 pointer. */
12040 stack_pointer_backup_loc = get_last_insn ();
12042 temp_reg_clobbered_p = allocate_stack_space (frame_off, last_probe_offset,
12043 temp_reg);
12045 if (TARGET_BACKCHAIN || next_fpr)
12047 if (temp_reg_clobbered_p)
12049 /* allocate_stack_space had to make use of temp_reg and
12050 we need it to hold a backup of the incoming stack
12051 pointer. Calculate back that value from the current
12052 stack pointer. */
12053 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
12054 GEN_INT (cfun_frame_layout.frame_size),
12055 false);
12057 else
12059 /* allocate_stack_space didn't actually required
12060 temp_reg. Insert the stack pointer backup insn
12061 before the stack pointer decrement code - knowing now
12062 that the value will survive. */
12063 emit_insn_after (gen_move_insn (temp_reg, stack_pointer_rtx),
12064 stack_pointer_backup_loc);
12068 /* Set backchain. */
12070 if (TARGET_BACKCHAIN)
12072 if (cfun_frame_layout.backchain_offset)
12073 addr = gen_rtx_MEM (Pmode,
12074 plus_constant (Pmode, stack_pointer_rtx,
12075 cfun_frame_layout.backchain_offset));
12076 else
12077 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
12078 set_mem_alias_set (addr, get_frame_alias_set ());
12079 insn = emit_insn (gen_move_insn (addr, temp_reg));
12082 /* If we support non-call exceptions (e.g. for Java),
12083 we need to make sure the backchain pointer is set up
12084 before any possibly trapping memory access. */
12085 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
12087 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
12088 emit_clobber (addr);
12091 else if (flag_stack_clash_protection)
12092 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
12094 /* Save fprs 8 - 15 (64 bit ABI). */
12096 if (cfun_save_high_fprs_p && next_fpr)
12098 /* If the stack might be accessed through a different register
12099 we have to make sure that the stack pointer decrement is not
12100 moved below the use of the stack slots. */
12101 s390_emit_stack_tie ();
12103 insn = emit_insn (gen_add2_insn (temp_reg,
12104 GEN_INT (cfun_frame_layout.f8_offset)));
12106 offset = 0;
12108 for (i = FPR8_REGNUM; i <= next_fpr; i++)
12109 if (cfun_fpr_save_p (i))
12111 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
12112 cfun_frame_layout.frame_size
12113 + cfun_frame_layout.f8_offset
12114 + offset);
12116 insn = save_fpr (temp_reg, offset, i);
12117 offset += 8;
12118 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
12119 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
12120 gen_rtx_REG (DFmode, i)));
12124 /* Set frame pointer, if needed. */
12126 if (frame_pointer_needed)
12128 s390_emit_stack_tie ();
12129 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
12130 RTX_FRAME_RELATED_P (insn) = 1;
12133 /* Set up got pointer, if needed. */
12135 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
12137 rtx_insn *insns = s390_load_got ();
12139 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
12140 annotate_constant_pool_refs (insn);
12142 emit_insn (insns);
12145 #if TARGET_TPF != 0
12146 if (TARGET_TPF_PROFILING)
12148 /* Generate a BAS instruction to serve as a function entry
12149 intercept to facilitate the use of tracing algorithms located
12150 at the branch target. */
12151 emit_insn (gen_prologue_tpf (
12152 GEN_INT (s390_tpf_trace_hook_prologue_check),
12153 GEN_INT (s390_tpf_trace_hook_prologue_target)));
12155 /* Emit a blockage here so that all code lies between the
12156 profiling mechanisms. */
12157 emit_insn (gen_blockage ());
12159 #endif
12162 /* Expand the epilogue into a bunch of separate insns. */
12164 void
12165 s390_emit_epilogue (bool sibcall)
12167 rtx frame_pointer, return_reg = NULL_RTX, cfa_restores = NULL_RTX;
12168 int area_bottom, area_top, offset = 0;
12169 int next_offset;
12170 int i;
12172 #if TARGET_TPF != 0
12173 if (TARGET_TPF_PROFILING)
12175 /* Generate a BAS instruction to serve as a function entry
12176 intercept to facilitate the use of tracing algorithms located
12177 at the branch target. */
12179 /* Emit a blockage here so that all code lies between the
12180 profiling mechanisms. */
12181 emit_insn (gen_blockage ());
12183 emit_insn (gen_epilogue_tpf (
12184 GEN_INT (s390_tpf_trace_hook_epilogue_check),
12185 GEN_INT (s390_tpf_trace_hook_epilogue_target)));
12187 #endif
12189 /* Check whether to use frame or stack pointer for restore. */
12191 frame_pointer = (frame_pointer_needed
12192 ? hard_frame_pointer_rtx : stack_pointer_rtx);
12194 s390_frame_area (&area_bottom, &area_top);
12196 /* Check whether we can access the register save area.
12197 If not, increment the frame pointer as required. */
12199 if (area_top <= area_bottom)
12201 /* Nothing to restore. */
12203 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
12204 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
12206 /* Area is in range. */
12207 offset = cfun_frame_layout.frame_size;
12209 else
12211 rtx_insn *insn;
12212 rtx frame_off, cfa;
12214 offset = area_bottom < 0 ? -area_bottom : 0;
12215 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
12217 cfa = gen_rtx_SET (frame_pointer,
12218 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
12219 if (DISP_IN_RANGE (INTVAL (frame_off)))
12221 rtx set;
12223 set = gen_rtx_SET (frame_pointer,
12224 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
12225 insn = emit_insn (set);
12227 else
12229 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
12230 frame_off = force_const_mem (Pmode, frame_off);
12232 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
12233 annotate_constant_pool_refs (insn);
12235 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
12236 RTX_FRAME_RELATED_P (insn) = 1;
12239 /* Restore call saved fprs. */
12241 if (TARGET_64BIT)
12243 if (cfun_save_high_fprs_p)
12245 next_offset = cfun_frame_layout.f8_offset;
12246 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
12248 if (cfun_fpr_save_p (i))
12250 restore_fpr (frame_pointer,
12251 offset + next_offset, i);
12252 cfa_restores
12253 = alloc_reg_note (REG_CFA_RESTORE,
12254 gen_rtx_REG (DFmode, i), cfa_restores);
12255 next_offset += 8;
12261 else
12263 next_offset = cfun_frame_layout.f4_offset;
12264 /* f4, f6 */
12265 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
12267 if (cfun_fpr_save_p (i))
12269 restore_fpr (frame_pointer,
12270 offset + next_offset, i);
12271 cfa_restores
12272 = alloc_reg_note (REG_CFA_RESTORE,
12273 gen_rtx_REG (DFmode, i), cfa_restores);
12274 next_offset += 8;
12276 else if (!TARGET_PACKED_STACK)
12277 next_offset += 8;
12282 /* Restore call saved gprs. */
12284 if (cfun_frame_layout.first_restore_gpr != -1)
12286 rtx insn, addr;
12287 int i;
12289 /* Check for global register and save them
12290 to stack location from where they get restored. */
12292 for (i = cfun_frame_layout.first_restore_gpr;
12293 i <= cfun_frame_layout.last_restore_gpr;
12294 i++)
12296 if (global_not_special_regno_p (i))
12298 addr = plus_constant (Pmode, frame_pointer,
12299 offset + cfun_frame_layout.gprs_offset
12300 + (i - cfun_frame_layout.first_save_gpr_slot)
12301 * UNITS_PER_LONG);
12302 addr = gen_rtx_MEM (Pmode, addr);
12303 set_mem_alias_set (addr, get_frame_alias_set ());
12304 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
12306 else
12307 cfa_restores
12308 = alloc_reg_note (REG_CFA_RESTORE,
12309 gen_rtx_REG (Pmode, i), cfa_restores);
12312 /* Fetch return address from stack before load multiple,
12313 this will do good for scheduling.
12315 Only do this if we already decided that r14 needs to be
12316 saved to a stack slot. (And not just because r14 happens to
12317 be in between two GPRs which need saving.) Otherwise it
12318 would be difficult to take that decision back in
12319 s390_optimize_prologue.
12321 This optimization is only helpful on in-order machines. */
12322 if (! sibcall
12323 && cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK
12324 && s390_tune <= PROCESSOR_2097_Z10)
12326 int return_regnum = find_unused_clobbered_reg();
12327 if (!return_regnum
12328 || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION
12329 && !TARGET_CPU_Z10
12330 && return_regnum == INDIRECT_BRANCH_THUNK_REGNUM))
12332 gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM != 4);
12333 return_regnum = 4;
12335 return_reg = gen_rtx_REG (Pmode, return_regnum);
12337 addr = plus_constant (Pmode, frame_pointer,
12338 offset + cfun_frame_layout.gprs_offset
12339 + (RETURN_REGNUM
12340 - cfun_frame_layout.first_save_gpr_slot)
12341 * UNITS_PER_LONG);
12342 addr = gen_rtx_MEM (Pmode, addr);
12343 set_mem_alias_set (addr, get_frame_alias_set ());
12344 emit_move_insn (return_reg, addr);
12346 /* Once we did that optimization we have to make sure
12347 s390_optimize_prologue does not try to remove the store
12348 of r14 since we will not be able to find the load issued
12349 here. */
12350 cfun_frame_layout.save_return_addr_p = true;
12353 insn = restore_gprs (frame_pointer,
12354 offset + cfun_frame_layout.gprs_offset
12355 + (cfun_frame_layout.first_restore_gpr
12356 - cfun_frame_layout.first_save_gpr_slot)
12357 * UNITS_PER_LONG,
12358 cfun_frame_layout.first_restore_gpr,
12359 cfun_frame_layout.last_restore_gpr);
12360 insn = emit_insn (insn);
12361 REG_NOTES (insn) = cfa_restores;
12362 add_reg_note (insn, REG_CFA_DEF_CFA,
12363 plus_constant (Pmode, stack_pointer_rtx,
12364 STACK_POINTER_OFFSET));
12365 RTX_FRAME_RELATED_P (insn) = 1;
12368 s390_restore_gprs_from_fprs ();
12370 if (! sibcall)
12372 if (!return_reg && !s390_can_use_return_insn ())
12373 /* We planned to emit (return), be we are not allowed to. */
12374 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
12376 if (return_reg)
12377 /* Emit (return) and (use). */
12378 emit_jump_insn (gen_return_use (return_reg));
12379 else
12380 /* The fact that RETURN_REGNUM is used is already reflected by
12381 EPILOGUE_USES. Emit plain (return). */
12382 emit_jump_insn (gen_return ());
12386 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
12388 static void
12389 s300_set_up_by_prologue (hard_reg_set_container *regs)
12391 if (cfun->machine->base_reg
12392 && !call_used_regs[REGNO (cfun->machine->base_reg)])
12393 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
12396 /* -fsplit-stack support. */
12398 /* A SYMBOL_REF for __morestack. */
12399 static GTY(()) rtx morestack_ref;
12401 /* When using -fsplit-stack, the allocation routines set a field in
12402 the TCB to the bottom of the stack plus this much space, measured
12403 in bytes. */
12405 #define SPLIT_STACK_AVAILABLE 1024
12407 /* Emit the parmblock for __morestack into .rodata section. It
12408 consists of 3 pointer size entries:
12409 - frame size
12410 - size of stack arguments
12411 - offset between parm block and __morestack return label */
12413 void
12414 s390_output_split_stack_data (rtx parm_block, rtx call_done,
12415 rtx frame_size, rtx args_size)
12417 rtx ops[] = { parm_block, call_done };
12419 switch_to_section (targetm.asm_out.function_rodata_section
12420 (current_function_decl, false));
12422 if (TARGET_64BIT)
12423 output_asm_insn (".align\t8", NULL);
12424 else
12425 output_asm_insn (".align\t4", NULL);
12427 (*targetm.asm_out.internal_label) (asm_out_file, "L",
12428 CODE_LABEL_NUMBER (parm_block));
12429 if (TARGET_64BIT)
12431 output_asm_insn (".quad\t%0", &frame_size);
12432 output_asm_insn (".quad\t%0", &args_size);
12433 output_asm_insn (".quad\t%1-%0", ops);
12435 else
12437 output_asm_insn (".long\t%0", &frame_size);
12438 output_asm_insn (".long\t%0", &args_size);
12439 output_asm_insn (".long\t%1-%0", ops);
12442 switch_to_section (current_function_section ());
12445 /* Emit -fsplit-stack prologue, which goes before the regular function
12446 prologue. */
12448 void
12449 s390_expand_split_stack_prologue (void)
12451 rtx r1, guard, cc = NULL;
12452 rtx_insn *insn;
12453 /* Offset from thread pointer to __private_ss. */
12454 int psso = TARGET_64BIT ? 0x38 : 0x20;
12455 /* Pointer size in bytes. */
12456 /* Frame size and argument size - the two parameters to __morestack. */
12457 HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
12458 /* Align argument size to 8 bytes - simplifies __morestack code. */
12459 HOST_WIDE_INT args_size = crtl->args.size >= 0
12460 ? ((crtl->args.size + 7) & ~7)
12461 : 0;
12462 /* Label to be called by __morestack. */
12463 rtx_code_label *call_done = NULL;
12464 rtx_code_label *parm_base = NULL;
12465 rtx tmp;
12467 gcc_assert (flag_split_stack && reload_completed);
12469 r1 = gen_rtx_REG (Pmode, 1);
12471 /* If no stack frame will be allocated, don't do anything. */
12472 if (!frame_size)
12474 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12476 /* If va_start is used, just use r15. */
12477 emit_move_insn (r1,
12478 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12479 GEN_INT (STACK_POINTER_OFFSET)));
12482 return;
12485 if (morestack_ref == NULL_RTX)
12487 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12488 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
12489 | SYMBOL_FLAG_FUNCTION);
12492 if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
12494 /* If frame_size will fit in an add instruction, do a stack space
12495 check, and only call __morestack if there's not enough space. */
12497 /* Get thread pointer. r1 is the only register we can always destroy - r0
12498 could contain a static chain (and cannot be used to address memory
12499 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
12500 emit_insn (gen_get_thread_pointer (Pmode, r1));
12501 /* Aim at __private_ss. */
12502 guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
12504 /* If less that 1kiB used, skip addition and compare directly with
12505 __private_ss. */
12506 if (frame_size > SPLIT_STACK_AVAILABLE)
12508 emit_move_insn (r1, guard);
12509 if (TARGET_64BIT)
12510 emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
12511 else
12512 emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
12513 guard = r1;
12516 /* Compare the (maybe adjusted) guard with the stack pointer. */
12517 cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
12520 call_done = gen_label_rtx ();
12521 parm_base = gen_label_rtx ();
12522 LABEL_NUSES (parm_base)++;
12523 LABEL_NUSES (call_done)++;
12525 /* %r1 = litbase. */
12526 insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
12527 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
12528 LABEL_NUSES (parm_base)++;
12530 /* Now, we need to call __morestack. It has very special calling
12531 conventions: it preserves param/return/static chain registers for
12532 calling main function body, and looks for its own parameters at %r1. */
12533 if (cc != NULL)
12534 tmp = gen_split_stack_cond_call (Pmode,
12535 morestack_ref,
12536 parm_base,
12537 call_done,
12538 GEN_INT (frame_size),
12539 GEN_INT (args_size),
12540 cc);
12541 else
12542 tmp = gen_split_stack_call (Pmode,
12543 morestack_ref,
12544 parm_base,
12545 call_done,
12546 GEN_INT (frame_size),
12547 GEN_INT (args_size));
12549 insn = emit_jump_insn (tmp);
12550 JUMP_LABEL (insn) = call_done;
12551 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
12552 add_reg_note (insn, REG_LABEL_OPERAND, call_done);
12554 if (cc != NULL)
12556 /* Mark the jump as very unlikely to be taken. */
12557 add_reg_br_prob_note (insn,
12558 profile_probability::very_unlikely ());
12560 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12562 /* If va_start is used, and __morestack was not called, just use
12563 r15. */
12564 emit_move_insn (r1,
12565 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12566 GEN_INT (STACK_POINTER_OFFSET)));
12569 else
12571 emit_barrier ();
12574 /* __morestack will call us here. */
12576 emit_label (call_done);
12579 /* We may have to tell the dataflow pass that the split stack prologue
12580 is initializing a register. */
12582 static void
12583 s390_live_on_entry (bitmap regs)
12585 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12587 gcc_assert (flag_split_stack);
12588 bitmap_set_bit (regs, 1);
12592 /* Return true if the function can use simple_return to return outside
12593 of a shrink-wrapped region. At present shrink-wrapping is supported
12594 in all cases. */
12596 bool
12597 s390_can_use_simple_return_insn (void)
12599 return true;
12602 /* Return true if the epilogue is guaranteed to contain only a return
12603 instruction and if a direct return can therefore be used instead.
12604 One of the main advantages of using direct return instructions
12605 is that we can then use conditional returns. */
12607 bool
12608 s390_can_use_return_insn (void)
12610 int i;
12612 if (!reload_completed)
12613 return false;
12615 if (crtl->profile)
12616 return false;
12618 if (TARGET_TPF_PROFILING)
12619 return false;
12621 for (i = 0; i < 16; i++)
12622 if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
12623 return false;
12625 /* For 31 bit this is not covered by the frame_size check below
12626 since f4, f6 are saved in the register save area without needing
12627 additional stack space. */
12628 if (!TARGET_64BIT
12629 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
12630 return false;
12632 if (cfun->machine->base_reg
12633 && !call_used_regs[REGNO (cfun->machine->base_reg)])
12634 return false;
12636 return cfun_frame_layout.frame_size == 0;
12639 /* The VX ABI differs for vararg functions. Therefore we need the
12640 prototype of the callee to be available when passing vector type
12641 values. */
12642 static const char *
12643 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
12645 return ((TARGET_VX_ABI
12646 && typelist == 0
12647 && VECTOR_TYPE_P (TREE_TYPE (val))
12648 && (funcdecl == NULL_TREE
12649 || (TREE_CODE (funcdecl) == FUNCTION_DECL
12650 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD
12651 && !fndecl_built_in_p (funcdecl, BUILT_IN_CLASSIFY_TYPE))))
12652 ? N_("vector argument passed to unprototyped function")
12653 : NULL);
12657 /* Return the size in bytes of a function argument of
12658 type TYPE and/or mode MODE. At least one of TYPE or
12659 MODE must be specified. */
12661 static int
12662 s390_function_arg_size (machine_mode mode, const_tree type)
12664 if (type)
12665 return int_size_in_bytes (type);
12667 /* No type info available for some library calls ... */
12668 if (mode != BLKmode)
12669 return GET_MODE_SIZE (mode);
12671 /* If we have neither type nor mode, abort */
12672 gcc_unreachable ();
12675 /* Return true if a variable of TYPE should be passed as single value
12676 with type CODE. If STRICT_SIZE_CHECK_P is true the sizes of the
12677 record type and the field type must match.
12679 The ABI says that record types with a single member are treated
12680 just like that member would be. This function is a helper to
12681 detect such cases. The function also produces the proper
12682 diagnostics for cases where the outcome might be different
12683 depending on the GCC version. */
12684 static bool
12685 s390_single_field_struct_p (enum tree_code code, const_tree type,
12686 bool strict_size_check_p)
12688 int empty_base_seen = 0;
12689 bool zero_width_bf_skipped_p = false;
12690 const_tree orig_type = type;
12692 while (TREE_CODE (type) == RECORD_TYPE)
12694 tree field, single_type = NULL_TREE;
12696 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
12698 if (TREE_CODE (field) != FIELD_DECL)
12699 continue;
12701 if (DECL_FIELD_ABI_IGNORED (field))
12703 if (lookup_attribute ("no_unique_address",
12704 DECL_ATTRIBUTES (field)))
12705 empty_base_seen |= 2;
12706 else
12707 empty_base_seen |= 1;
12708 continue;
12711 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
12713 zero_width_bf_skipped_p = true;
12714 continue;
12717 if (single_type == NULL_TREE)
12718 single_type = TREE_TYPE (field);
12719 else
12720 return false;
12723 if (single_type == NULL_TREE)
12724 return false;
12726 /* Reaching this point we have a struct with a single member and
12727 zero or more zero-sized bit-fields which have been skipped in the
12728 past. */
12730 /* If ZERO_WIDTH_BF_SKIPPED_P then the struct will not be accepted. In case
12731 we are not supposed to emit a warning exit early. */
12732 if (zero_width_bf_skipped_p && !warn_psabi)
12733 return false;
12735 /* If the field declaration adds extra bytes due to padding this
12736 is not accepted with STRICT_SIZE_CHECK_P. */
12737 if (strict_size_check_p
12738 && (int_size_in_bytes (single_type) <= 0
12739 || int_size_in_bytes (single_type) != int_size_in_bytes (type)))
12740 return false;
12742 type = single_type;
12745 if (TREE_CODE (type) != code)
12746 return false;
12748 if (warn_psabi)
12750 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (orig_type));
12752 if (empty_base_seen)
12754 static unsigned last_reported_type_uid_empty_base;
12755 if (uid != last_reported_type_uid_empty_base)
12757 last_reported_type_uid_empty_base = uid;
12758 const char *url = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
12759 if (empty_base_seen & 1)
12760 inform (input_location,
12761 "parameter passing for argument of type %qT when C++17 "
12762 "is enabled changed to match C++14 %{in GCC 10.1%}",
12763 orig_type, url);
12764 else
12765 inform (input_location,
12766 "parameter passing for argument of type %qT with "
12767 "%<[[no_unique_address]]%> members changed "
12768 "%{in GCC 10.1%}", orig_type, url);
12772 /* For C++ older GCCs ignored zero width bitfields and therefore
12773 passed structs more often as single values than GCC 12 does.
12774 So diagnostics are only required in cases where we do NOT
12775 accept the struct to be passed as single value. */
12776 if (zero_width_bf_skipped_p)
12778 static unsigned last_reported_type_uid_zero_width;
12779 if (uid != last_reported_type_uid_zero_width)
12781 last_reported_type_uid_zero_width = uid;
12782 inform (input_location,
12783 "parameter passing for argument of type %qT with "
12784 "zero-width bit fields members changed in GCC 12",
12785 orig_type);
12790 return !zero_width_bf_skipped_p;
12794 /* Return true if a function argument of type TYPE and mode MODE
12795 is to be passed in a vector register, if available. */
12797 static bool
12798 s390_function_arg_vector (machine_mode mode, const_tree type)
12800 if (!TARGET_VX_ABI)
12801 return false;
12803 if (s390_function_arg_size (mode, type) > 16)
12804 return false;
12806 /* No type info available for some library calls ... */
12807 if (!type)
12808 return VECTOR_MODE_P (mode);
12810 if (!s390_single_field_struct_p (VECTOR_TYPE, type, true))
12811 return false;
12813 return true;
12816 /* Return true if a function argument of type TYPE and mode MODE
12817 is to be passed in a floating-point register, if available. */
12819 static bool
12820 s390_function_arg_float (machine_mode mode, const_tree type)
12822 if (s390_function_arg_size (mode, type) > 8)
12823 return false;
12825 /* Soft-float changes the ABI: no floating-point registers are used. */
12826 if (TARGET_SOFT_FLOAT)
12827 return false;
12829 /* No type info available for some library calls ... */
12830 if (!type)
12831 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
12833 if (!s390_single_field_struct_p (REAL_TYPE, type, false))
12834 return false;
12836 return true;
12839 /* Return true if a function argument of type TYPE and mode MODE
12840 is to be passed in an integer register, or a pair of integer
12841 registers, if available. */
12843 static bool
12844 s390_function_arg_integer (machine_mode mode, const_tree type)
12846 int size = s390_function_arg_size (mode, type);
12847 if (size > 8)
12848 return false;
12850 /* No type info available for some library calls ... */
12851 if (!type)
12852 return GET_MODE_CLASS (mode) == MODE_INT
12853 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
12855 /* We accept small integral (and similar) types. */
12856 if (INTEGRAL_TYPE_P (type)
12857 || POINTER_TYPE_P (type)
12858 || TREE_CODE (type) == NULLPTR_TYPE
12859 || TREE_CODE (type) == OFFSET_TYPE
12860 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_TYPE_P (type)))
12861 return true;
12863 /* We also accept structs of size 1, 2, 4, 8 that are not
12864 passed in floating-point registers. */
12865 if (AGGREGATE_TYPE_P (type)
12866 && exact_log2 (size) >= 0
12867 && !s390_function_arg_float (mode, type))
12868 return true;
12870 return false;
12873 /* Return 1 if a function argument ARG is to be passed by reference.
12874 The ABI specifies that only structures of size 1, 2, 4, or 8 bytes
12875 are passed by value, all other structures (and complex numbers) are
12876 passed by reference. */
12878 static bool
12879 s390_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
12881 int size = s390_function_arg_size (arg.mode, arg.type);
12883 if (s390_function_arg_vector (arg.mode, arg.type))
12884 return false;
12886 if (size > 8)
12887 return true;
12889 if (tree type = arg.type)
12891 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
12892 return true;
12894 if (TREE_CODE (type) == COMPLEX_TYPE
12895 || TREE_CODE (type) == VECTOR_TYPE)
12896 return true;
12899 return false;
12902 /* Update the data in CUM to advance over argument ARG. */
12904 static void
12905 s390_function_arg_advance (cumulative_args_t cum_v,
12906 const function_arg_info &arg)
12908 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12910 if (s390_function_arg_vector (arg.mode, arg.type))
12912 /* We are called for unnamed vector stdarg arguments which are
12913 passed on the stack. In this case this hook does not have to
12914 do anything since stack arguments are tracked by common
12915 code. */
12916 if (!arg.named)
12917 return;
12918 cum->vrs += 1;
12920 else if (s390_function_arg_float (arg.mode, arg.type))
12922 cum->fprs += 1;
12924 else if (s390_function_arg_integer (arg.mode, arg.type))
12926 int size = s390_function_arg_size (arg.mode, arg.type);
12927 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
12929 else
12930 gcc_unreachable ();
12933 /* Define where to put the arguments to a function.
12934 Value is zero to push the argument on the stack,
12935 or a hard register in which to store the argument.
12937 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12938 the preceding args and about the function being called.
12939 ARG is a description of the argument.
12941 On S/390, we use general purpose registers 2 through 6 to
12942 pass integer, pointer, and certain structure arguments, and
12943 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
12944 to pass floating point arguments. All remaining arguments
12945 are pushed to the stack. */
12947 static rtx
12948 s390_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
12950 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12952 if (!arg.named)
12953 s390_check_type_for_vector_abi (arg.type, true, false);
12955 if (s390_function_arg_vector (arg.mode, arg.type))
12957 /* Vector arguments being part of the ellipsis are passed on the
12958 stack. */
12959 if (!arg.named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
12960 return NULL_RTX;
12962 return gen_rtx_REG (arg.mode, cum->vrs + FIRST_VEC_ARG_REGNO);
12964 else if (s390_function_arg_float (arg.mode, arg.type))
12966 if (cum->fprs + 1 > FP_ARG_NUM_REG)
12967 return NULL_RTX;
12968 else
12969 return gen_rtx_REG (arg.mode, cum->fprs + 16);
12971 else if (s390_function_arg_integer (arg.mode, arg.type))
12973 int size = s390_function_arg_size (arg.mode, arg.type);
12974 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12976 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
12977 return NULL_RTX;
12978 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
12979 return gen_rtx_REG (arg.mode, cum->gprs + 2);
12980 else if (n_gprs == 2)
12982 rtvec p = rtvec_alloc (2);
12984 RTVEC_ELT (p, 0)
12985 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
12986 const0_rtx);
12987 RTVEC_ELT (p, 1)
12988 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
12989 GEN_INT (4));
12991 return gen_rtx_PARALLEL (arg.mode, p);
12995 /* After the real arguments, expand_call calls us once again with an
12996 end marker. Whatever we return here is passed as operand 2 to the
12997 call expanders.
12999 We don't need this feature ... */
13000 else if (arg.end_marker_p ())
13001 return const0_rtx;
13003 gcc_unreachable ();
13006 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Vector arguments are
13007 left-justified when placed on the stack during parameter passing. */
13009 static pad_direction
13010 s390_function_arg_padding (machine_mode mode, const_tree type)
13012 if (s390_function_arg_vector (mode, type))
13013 return PAD_UPWARD;
13015 return default_function_arg_padding (mode, type);
13018 /* Return true if return values of type TYPE should be returned
13019 in a memory buffer whose address is passed by the caller as
13020 hidden first argument. */
13022 static bool
13023 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
13025 /* We accept small integral (and similar) types. */
13026 if (INTEGRAL_TYPE_P (type)
13027 || POINTER_TYPE_P (type)
13028 || TREE_CODE (type) == OFFSET_TYPE
13029 || SCALAR_FLOAT_TYPE_P (type))
13030 return int_size_in_bytes (type) > 8;
13032 /* vector types which fit into a VR. */
13033 if (TARGET_VX_ABI
13034 && VECTOR_TYPE_P (type)
13035 && int_size_in_bytes (type) <= 16)
13036 return false;
13038 /* Aggregates and similar constructs are always returned
13039 in memory. */
13040 if (AGGREGATE_TYPE_P (type)
13041 || TREE_CODE (type) == COMPLEX_TYPE
13042 || VECTOR_TYPE_P (type))
13043 return true;
13045 /* ??? We get called on all sorts of random stuff from
13046 aggregate_value_p. We can't abort, but it's not clear
13047 what's safe to return. Pretend it's a struct I guess. */
13048 return true;
13051 /* Function arguments and return values are promoted to word size. */
13053 static machine_mode
13054 s390_promote_function_mode (const_tree type, machine_mode mode,
13055 int *punsignedp,
13056 const_tree fntype ATTRIBUTE_UNUSED,
13057 int for_return ATTRIBUTE_UNUSED)
13059 if (INTEGRAL_MODE_P (mode)
13060 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
13062 if (type != NULL_TREE && POINTER_TYPE_P (type))
13063 *punsignedp = POINTERS_EXTEND_UNSIGNED;
13064 return Pmode;
13067 return mode;
13070 /* Define where to return a (scalar) value of type RET_TYPE.
13071 If RET_TYPE is null, define where to return a (scalar)
13072 value of mode MODE from a libcall. */
13074 static rtx
13075 s390_function_and_libcall_value (machine_mode mode,
13076 const_tree ret_type,
13077 const_tree fntype_or_decl,
13078 bool outgoing ATTRIBUTE_UNUSED)
13080 /* For vector return types it is important to use the RET_TYPE
13081 argument whenever available since the middle-end might have
13082 changed the mode to a scalar mode. */
13083 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
13084 || (!ret_type && VECTOR_MODE_P (mode)));
13086 /* For normal functions perform the promotion as
13087 promote_function_mode would do. */
13088 if (ret_type)
13090 int unsignedp = TYPE_UNSIGNED (ret_type);
13091 mode = promote_function_mode (ret_type, mode, &unsignedp,
13092 fntype_or_decl, 1);
13095 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
13096 || SCALAR_FLOAT_MODE_P (mode)
13097 || (TARGET_VX_ABI && vector_ret_type_p));
13098 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
13100 if (TARGET_VX_ABI && vector_ret_type_p)
13101 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
13102 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
13103 return gen_rtx_REG (mode, 16);
13104 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
13105 || UNITS_PER_LONG == UNITS_PER_WORD)
13106 return gen_rtx_REG (mode, 2);
13107 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
13109 /* This case is triggered when returning a 64 bit value with
13110 -m31 -mzarch. Although the value would fit into a single
13111 register it has to be forced into a 32 bit register pair in
13112 order to match the ABI. */
13113 rtvec p = rtvec_alloc (2);
13115 RTVEC_ELT (p, 0)
13116 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
13117 RTVEC_ELT (p, 1)
13118 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
13120 return gen_rtx_PARALLEL (mode, p);
13123 gcc_unreachable ();
13126 /* Define where to return a scalar return value of type RET_TYPE. */
13128 static rtx
13129 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
13130 bool outgoing)
13132 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
13133 fn_decl_or_type, outgoing);
13136 /* Define where to return a scalar libcall return value of mode
13137 MODE. */
13139 static rtx
13140 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
13142 return s390_function_and_libcall_value (mode, NULL_TREE,
13143 NULL_TREE, true);
13147 /* Create and return the va_list datatype.
13149 On S/390, va_list is an array type equivalent to
13151 typedef struct __va_list_tag
13153 long __gpr;
13154 long __fpr;
13155 void *__overflow_arg_area;
13156 void *__reg_save_area;
13157 } va_list[1];
13159 where __gpr and __fpr hold the number of general purpose
13160 or floating point arguments used up to now, respectively,
13161 __overflow_arg_area points to the stack location of the
13162 next argument passed on the stack, and __reg_save_area
13163 always points to the start of the register area in the
13164 call frame of the current function. The function prologue
13165 saves all registers used for argument passing into this
13166 area if the function uses variable arguments. */
13168 static tree
13169 s390_build_builtin_va_list (void)
13171 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
13173 record = lang_hooks.types.make_type (RECORD_TYPE);
13175 type_decl =
13176 build_decl (BUILTINS_LOCATION,
13177 TYPE_DECL, get_identifier ("__va_list_tag"), record);
13179 f_gpr = build_decl (BUILTINS_LOCATION,
13180 FIELD_DECL, get_identifier ("__gpr"),
13181 long_integer_type_node);
13182 f_fpr = build_decl (BUILTINS_LOCATION,
13183 FIELD_DECL, get_identifier ("__fpr"),
13184 long_integer_type_node);
13185 f_ovf = build_decl (BUILTINS_LOCATION,
13186 FIELD_DECL, get_identifier ("__overflow_arg_area"),
13187 ptr_type_node);
13188 f_sav = build_decl (BUILTINS_LOCATION,
13189 FIELD_DECL, get_identifier ("__reg_save_area"),
13190 ptr_type_node);
13192 va_list_gpr_counter_field = f_gpr;
13193 va_list_fpr_counter_field = f_fpr;
13195 DECL_FIELD_CONTEXT (f_gpr) = record;
13196 DECL_FIELD_CONTEXT (f_fpr) = record;
13197 DECL_FIELD_CONTEXT (f_ovf) = record;
13198 DECL_FIELD_CONTEXT (f_sav) = record;
13200 TYPE_STUB_DECL (record) = type_decl;
13201 TYPE_NAME (record) = type_decl;
13202 TYPE_FIELDS (record) = f_gpr;
13203 DECL_CHAIN (f_gpr) = f_fpr;
13204 DECL_CHAIN (f_fpr) = f_ovf;
13205 DECL_CHAIN (f_ovf) = f_sav;
13207 layout_type (record);
13209 /* The correct type is an array type of one element. */
13210 return build_array_type (record, build_index_type (size_zero_node));
13213 /* Implement va_start by filling the va_list structure VALIST.
13214 STDARG_P is always true, and ignored.
13215 NEXTARG points to the first anonymous stack argument.
13217 The following global variables are used to initialize
13218 the va_list structure:
13220 crtl->args.info:
13221 holds number of gprs and fprs used for named arguments.
13222 crtl->args.arg_offset_rtx:
13223 holds the offset of the first anonymous stack argument
13224 (relative to the virtual arg pointer). */
13226 static void
13227 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
13229 HOST_WIDE_INT n_gpr, n_fpr;
13230 int off;
13231 tree f_gpr, f_fpr, f_ovf, f_sav;
13232 tree gpr, fpr, ovf, sav, t;
13234 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13235 f_fpr = DECL_CHAIN (f_gpr);
13236 f_ovf = DECL_CHAIN (f_fpr);
13237 f_sav = DECL_CHAIN (f_ovf);
13239 valist = build_simple_mem_ref (valist);
13240 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13241 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
13242 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
13243 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
13245 /* Count number of gp and fp argument registers used. */
13247 n_gpr = crtl->args.info.gprs;
13248 n_fpr = crtl->args.info.fprs;
13250 if (cfun->va_list_gpr_size)
13252 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
13253 build_int_cst (NULL_TREE, n_gpr));
13254 TREE_SIDE_EFFECTS (t) = 1;
13255 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13258 if (cfun->va_list_fpr_size)
13260 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
13261 build_int_cst (NULL_TREE, n_fpr));
13262 TREE_SIDE_EFFECTS (t) = 1;
13263 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13266 if (flag_split_stack
13267 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
13268 == NULL)
13269 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
13271 rtx reg;
13272 rtx_insn *seq;
13274 reg = gen_reg_rtx (Pmode);
13275 cfun->machine->split_stack_varargs_pointer = reg;
13277 start_sequence ();
13278 emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
13279 seq = get_insns ();
13280 end_sequence ();
13282 push_topmost_sequence ();
13283 emit_insn_after (seq, entry_of_function ());
13284 pop_topmost_sequence ();
13287 /* Find the overflow area.
13288 FIXME: This currently is too pessimistic when the vector ABI is
13289 enabled. In that case we *always* set up the overflow area
13290 pointer. */
13291 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
13292 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
13293 || TARGET_VX_ABI)
13295 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
13296 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
13297 else
13298 t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
13300 off = INTVAL (crtl->args.arg_offset_rtx);
13301 off = off < 0 ? 0 : off;
13302 if (TARGET_DEBUG_ARG)
13303 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
13304 (int)n_gpr, (int)n_fpr, off);
13306 t = fold_build_pointer_plus_hwi (t, off);
13308 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
13309 TREE_SIDE_EFFECTS (t) = 1;
13310 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13313 /* Find the register save area. */
13314 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
13315 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
13317 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
13318 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
13320 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
13321 TREE_SIDE_EFFECTS (t) = 1;
13322 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13326 /* Implement va_arg by updating the va_list structure
13327 VALIST as required to retrieve an argument of type
13328 TYPE, and returning that argument.
13330 Generates code equivalent to:
13332 if (integral value) {
13333 if (size <= 4 && args.gpr < 5 ||
13334 size > 4 && args.gpr < 4 )
13335 ret = args.reg_save_area[args.gpr+8]
13336 else
13337 ret = *args.overflow_arg_area++;
13338 } else if (vector value) {
13339 ret = *args.overflow_arg_area;
13340 args.overflow_arg_area += size / 8;
13341 } else if (float value) {
13342 if (args.fgpr < 2)
13343 ret = args.reg_save_area[args.fpr+64]
13344 else
13345 ret = *args.overflow_arg_area++;
13346 } else if (aggregate value) {
13347 if (args.gpr < 5)
13348 ret = *args.reg_save_area[args.gpr]
13349 else
13350 ret = **args.overflow_arg_area++;
13351 } */
13353 static tree
13354 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
13355 gimple_seq *post_p ATTRIBUTE_UNUSED)
13357 tree f_gpr, f_fpr, f_ovf, f_sav;
13358 tree gpr, fpr, ovf, sav, reg, t, u;
13359 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
13360 tree lab_false, lab_over = NULL_TREE;
13361 tree addr = create_tmp_var (ptr_type_node, "addr");
13362 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
13363 a stack slot. */
13365 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13366 f_fpr = DECL_CHAIN (f_gpr);
13367 f_ovf = DECL_CHAIN (f_fpr);
13368 f_sav = DECL_CHAIN (f_ovf);
13370 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13371 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
13372 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
13374 /* The tree for args* cannot be shared between gpr/fpr and ovf since
13375 both appear on a lhs. */
13376 valist = unshare_expr (valist);
13377 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
13379 size = int_size_in_bytes (type);
13381 s390_check_type_for_vector_abi (type, true, false);
13383 if (pass_va_arg_by_reference (type))
13385 if (TARGET_DEBUG_ARG)
13387 fprintf (stderr, "va_arg: aggregate type");
13388 debug_tree (type);
13391 /* Aggregates are passed by reference. */
13392 indirect_p = 1;
13393 reg = gpr;
13394 n_reg = 1;
13396 /* kernel stack layout on 31 bit: It is assumed here that no padding
13397 will be added by s390_frame_info because for va_args always an even
13398 number of gprs has to be saved r15-r2 = 14 regs. */
13399 sav_ofs = 2 * UNITS_PER_LONG;
13400 sav_scale = UNITS_PER_LONG;
13401 size = UNITS_PER_LONG;
13402 max_reg = GP_ARG_NUM_REG - n_reg;
13403 left_align_p = false;
13405 else if (s390_function_arg_vector (TYPE_MODE (type), type))
13407 if (TARGET_DEBUG_ARG)
13409 fprintf (stderr, "va_arg: vector type");
13410 debug_tree (type);
13413 indirect_p = 0;
13414 reg = NULL_TREE;
13415 n_reg = 0;
13416 sav_ofs = 0;
13417 sav_scale = 8;
13418 max_reg = 0;
13419 left_align_p = true;
13421 else if (s390_function_arg_float (TYPE_MODE (type), type))
13423 if (TARGET_DEBUG_ARG)
13425 fprintf (stderr, "va_arg: float type");
13426 debug_tree (type);
13429 /* FP args go in FP registers, if present. */
13430 indirect_p = 0;
13431 reg = fpr;
13432 n_reg = 1;
13433 sav_ofs = 16 * UNITS_PER_LONG;
13434 sav_scale = 8;
13435 max_reg = FP_ARG_NUM_REG - n_reg;
13436 left_align_p = false;
13438 else
13440 if (TARGET_DEBUG_ARG)
13442 fprintf (stderr, "va_arg: other type");
13443 debug_tree (type);
13446 /* Otherwise into GP registers. */
13447 indirect_p = 0;
13448 reg = gpr;
13449 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
13451 /* kernel stack layout on 31 bit: It is assumed here that no padding
13452 will be added by s390_frame_info because for va_args always an even
13453 number of gprs has to be saved r15-r2 = 14 regs. */
13454 sav_ofs = 2 * UNITS_PER_LONG;
13456 if (size < UNITS_PER_LONG)
13457 sav_ofs += UNITS_PER_LONG - size;
13459 sav_scale = UNITS_PER_LONG;
13460 max_reg = GP_ARG_NUM_REG - n_reg;
13461 left_align_p = false;
13464 /* Pull the value out of the saved registers ... */
13466 if (reg != NULL_TREE)
13469 if (reg > ((typeof (reg))max_reg))
13470 goto lab_false;
13472 addr = sav + sav_ofs + reg * save_scale;
13474 goto lab_over;
13476 lab_false:
13479 lab_false = create_artificial_label (UNKNOWN_LOCATION);
13480 lab_over = create_artificial_label (UNKNOWN_LOCATION);
13482 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
13483 t = build2 (GT_EXPR, boolean_type_node, reg, t);
13484 u = build1 (GOTO_EXPR, void_type_node, lab_false);
13485 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
13486 gimplify_and_add (t, pre_p);
13488 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
13489 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
13490 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
13491 t = fold_build_pointer_plus (t, u);
13493 gimplify_assign (addr, t, pre_p);
13495 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
13497 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
13500 /* ... Otherwise out of the overflow area. */
13502 t = ovf;
13503 if (size < UNITS_PER_LONG && !left_align_p)
13504 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
13506 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
13508 gimplify_assign (addr, t, pre_p);
13510 if (size < UNITS_PER_LONG && left_align_p)
13511 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
13512 else
13513 t = fold_build_pointer_plus_hwi (t, size);
13515 gimplify_assign (ovf, t, pre_p);
13517 if (reg != NULL_TREE)
13518 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
13521 /* Increment register save count. */
13523 if (n_reg > 0)
13525 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
13526 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
13527 gimplify_and_add (u, pre_p);
13530 if (indirect_p)
13532 t = build_pointer_type_for_mode (build_pointer_type (type),
13533 ptr_mode, true);
13534 addr = fold_convert (t, addr);
13535 addr = build_va_arg_indirect_ref (addr);
13537 else
13539 t = build_pointer_type_for_mode (type, ptr_mode, true);
13540 addr = fold_convert (t, addr);
13543 return build_va_arg_indirect_ref (addr);
13546 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
13547 expanders.
13548 DEST - Register location where CC will be stored.
13549 TDB - Pointer to a 256 byte area where to store the transaction.
13550 diagnostic block. NULL if TDB is not needed.
13551 RETRY - Retry count value. If non-NULL a retry loop for CC2
13552 is emitted
13553 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
13554 of the tbegin instruction pattern. */
13556 void
13557 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
13559 rtx retry_plus_two = gen_reg_rtx (SImode);
13560 rtx retry_reg = gen_reg_rtx (SImode);
13561 rtx_code_label *retry_label = NULL;
13563 if (retry != NULL_RTX)
13565 emit_move_insn (retry_reg, retry);
13566 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
13567 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
13568 retry_label = gen_label_rtx ();
13569 emit_label (retry_label);
13572 if (clobber_fprs_p)
13574 if (TARGET_VX)
13575 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
13576 tdb));
13577 else
13578 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
13579 tdb));
13581 else
13582 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
13583 tdb));
13585 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
13586 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
13587 CC_REGNUM)),
13588 UNSPEC_CC_TO_INT));
13589 if (retry != NULL_RTX)
13591 const int CC0 = 1 << 3;
13592 const int CC1 = 1 << 2;
13593 const int CC3 = 1 << 0;
13594 rtx jump;
13595 rtx count = gen_reg_rtx (SImode);
13596 rtx_code_label *leave_label = gen_label_rtx ();
13598 /* Exit for success and permanent failures. */
13599 jump = s390_emit_jump (leave_label,
13600 gen_rtx_EQ (VOIDmode,
13601 gen_rtx_REG (CCRAWmode, CC_REGNUM),
13602 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
13603 LABEL_NUSES (leave_label) = 1;
13605 /* CC2 - transient failure. Perform retry with ppa. */
13606 emit_move_insn (count, retry_plus_two);
13607 emit_insn (gen_subsi3 (count, count, retry_reg));
13608 emit_insn (gen_tx_assist (count));
13609 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
13610 retry_reg,
13611 retry_reg));
13612 JUMP_LABEL (jump) = retry_label;
13613 LABEL_NUSES (retry_label) = 1;
13614 emit_label (leave_label);
13619 /* Return the decl for the target specific builtin with the function
13620 code FCODE. */
13622 static tree
13623 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
13625 if (fcode >= S390_BUILTIN_MAX)
13626 return error_mark_node;
13628 return s390_builtin_decls[fcode];
13631 /* We call mcount before the function prologue. So a profiled leaf
13632 function should stay a leaf function. */
13634 static bool
13635 s390_keep_leaf_when_profiled ()
13637 return true;
13640 /* Output assembly code for the trampoline template to
13641 stdio stream FILE.
13643 On S/390, we use gpr 1 internally in the trampoline code;
13644 gpr 0 is used to hold the static chain. */
13646 static void
13647 s390_asm_trampoline_template (FILE *file)
13649 rtx op[2];
13650 op[0] = gen_rtx_REG (Pmode, 0);
13651 op[1] = gen_rtx_REG (Pmode, 1);
13653 if (TARGET_64BIT)
13655 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
13656 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
13657 output_asm_insn ("br\t%1", op); /* 2 byte */
13658 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
13660 else
13662 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
13663 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
13664 output_asm_insn ("br\t%1", op); /* 2 byte */
13665 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
13669 /* Emit RTL insns to initialize the variable parts of a trampoline.
13670 FNADDR is an RTX for the address of the function's pure code.
13671 CXT is an RTX for the static chain value for the function. */
13673 static void
13674 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
13676 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
13677 rtx mem;
13679 emit_block_move (m_tramp, assemble_trampoline_template (),
13680 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
13682 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
13683 emit_move_insn (mem, cxt);
13684 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
13685 emit_move_insn (mem, fnaddr);
13688 static void
13689 output_asm_nops (const char *user, int hw)
13691 asm_fprintf (asm_out_file, "\t# NOPs for %s (%d halfwords)\n", user, hw);
13692 while (hw > 0)
13694 if (hw >= 3)
13696 output_asm_insn ("brcl\t0,0", NULL);
13697 hw -= 3;
13699 else if (hw >= 2)
13701 output_asm_insn ("bc\t0,0", NULL);
13702 hw -= 2;
13704 else
13706 output_asm_insn ("bcr\t0,0", NULL);
13707 hw -= 1;
13712 /* Output assembler code to FILE to call a profiler hook. */
13714 void
13715 s390_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
13717 rtx op[4];
13719 fprintf (file, "# function profiler \n");
13721 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
13722 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
13723 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
13724 op[3] = GEN_INT (UNITS_PER_LONG);
13726 op[2] = gen_rtx_SYMBOL_REF (Pmode, flag_fentry ? "__fentry__" : "_mcount");
13727 SYMBOL_REF_FLAGS (op[2]) |= SYMBOL_FLAG_FUNCTION;
13728 if (flag_pic && !TARGET_64BIT)
13730 op[2] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[2]), UNSPEC_PLT31);
13731 op[2] = gen_rtx_CONST (Pmode, op[2]);
13734 if (flag_record_mcount)
13735 fprintf (file, "1:\n");
13737 if (flag_fentry)
13739 if (flag_nop_mcount)
13740 output_asm_nops ("-mnop-mcount", /* brasl */ 3);
13741 else if (cfun->static_chain_decl)
13742 warning (OPT_Wcannot_profile, "nested functions cannot be profiled "
13743 "with %<-mfentry%> on s390");
13744 else
13745 output_asm_insn ("brasl\t0,%2%K2", op);
13747 else if (TARGET_64BIT)
13749 if (flag_nop_mcount)
13750 output_asm_nops ("-mnop-mcount", /* stg */ 3 + /* brasl */ 3 +
13751 /* lg */ 3);
13752 else
13754 output_asm_insn ("stg\t%0,%1", op);
13755 if (flag_dwarf2_cfi_asm)
13756 output_asm_insn (".cfi_rel_offset\t%0,%3", op);
13757 output_asm_insn ("brasl\t%0,%2%K2", op);
13758 output_asm_insn ("lg\t%0,%1", op);
13759 if (flag_dwarf2_cfi_asm)
13760 output_asm_insn (".cfi_restore\t%0", op);
13763 else
13765 if (flag_nop_mcount)
13766 output_asm_nops ("-mnop-mcount", /* st */ 2 + /* brasl */ 3 +
13767 /* l */ 2);
13768 else
13770 output_asm_insn ("st\t%0,%1", op);
13771 if (flag_dwarf2_cfi_asm)
13772 output_asm_insn (".cfi_rel_offset\t%0,%3", op);
13773 output_asm_insn ("brasl\t%0,%2%K2", op);
13774 output_asm_insn ("l\t%0,%1", op);
13775 if (flag_dwarf2_cfi_asm)
13776 output_asm_insn (".cfi_restore\t%0", op);
13780 if (flag_record_mcount)
13782 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
13783 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
13784 fprintf (file, "\t.previous\n");
13788 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
13789 into its SYMBOL_REF_FLAGS. */
13791 static void
13792 s390_encode_section_info (tree decl, rtx rtl, int first)
13794 default_encode_section_info (decl, rtl, first);
13796 if (VAR_P (decl))
13798 /* Store the alignment to be able to check if we can use
13799 a larl/load-relative instruction. We only handle the cases
13800 that can go wrong (i.e. no FUNC_DECLs).
13801 All symbols without an explicit alignment are assumed to be 2
13802 byte aligned as mandated by our ABI. This behavior can be
13803 overridden for external symbols with the -munaligned-symbols
13804 switch. */
13805 if ((DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) % 16)
13806 || (s390_unaligned_symbols_p && !decl_binds_to_current_def_p (decl)))
13807 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13808 else if (DECL_ALIGN (decl) % 32)
13809 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13810 else if (DECL_ALIGN (decl) % 64)
13811 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13814 /* Literal pool references don't have a decl so they are handled
13815 differently here. We rely on the information in the MEM_ALIGN
13816 entry to decide upon the alignment. */
13817 if (MEM_P (rtl)
13818 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
13819 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
13821 if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
13822 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13823 else if (MEM_ALIGN (rtl) % 32)
13824 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13825 else if (MEM_ALIGN (rtl) % 64)
13826 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13830 /* Output thunk to FILE that implements a C++ virtual function call (with
13831 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
13832 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
13833 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
13834 relative to the resulting this pointer. */
13836 static void
13837 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
13838 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
13839 tree function)
13841 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
13842 rtx op[10];
13843 int nonlocal = 0;
13845 assemble_start_function (thunk, fnname);
13846 /* Make sure unwind info is emitted for the thunk if needed. */
13847 final_start_function (emit_barrier (), file, 1);
13849 /* Operand 0 is the target function. */
13850 op[0] = XEXP (DECL_RTL (function), 0);
13851 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
13853 nonlocal = 1;
13854 if (!TARGET_64BIT)
13856 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]), UNSPEC_GOT);
13857 op[0] = gen_rtx_CONST (Pmode, op[0]);
13861 /* Operand 1 is the 'this' pointer. */
13862 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
13863 op[1] = gen_rtx_REG (Pmode, 3);
13864 else
13865 op[1] = gen_rtx_REG (Pmode, 2);
13867 /* Operand 2 is the delta. */
13868 op[2] = GEN_INT (delta);
13870 /* Operand 3 is the vcall_offset. */
13871 op[3] = GEN_INT (vcall_offset);
13873 /* Operand 4 is the temporary register. */
13874 op[4] = gen_rtx_REG (Pmode, 1);
13876 /* Operands 5 to 8 can be used as labels. */
13877 op[5] = NULL_RTX;
13878 op[6] = NULL_RTX;
13879 op[7] = NULL_RTX;
13880 op[8] = NULL_RTX;
13882 /* Operand 9 can be used for temporary register. */
13883 op[9] = NULL_RTX;
13885 /* Generate code. */
13886 if (TARGET_64BIT)
13888 /* Setup literal pool pointer if required. */
13889 if ((!DISP_IN_RANGE (delta)
13890 && !CONST_OK_FOR_K (delta)
13891 && !CONST_OK_FOR_Os (delta))
13892 || (!DISP_IN_RANGE (vcall_offset)
13893 && !CONST_OK_FOR_K (vcall_offset)
13894 && !CONST_OK_FOR_Os (vcall_offset)))
13896 op[5] = gen_label_rtx ();
13897 output_asm_insn ("larl\t%4,%5", op);
13900 /* Add DELTA to this pointer. */
13901 if (delta)
13903 if (CONST_OK_FOR_J (delta))
13904 output_asm_insn ("la\t%1,%2(%1)", op);
13905 else if (DISP_IN_RANGE (delta))
13906 output_asm_insn ("lay\t%1,%2(%1)", op);
13907 else if (CONST_OK_FOR_K (delta))
13908 output_asm_insn ("aghi\t%1,%2", op);
13909 else if (CONST_OK_FOR_Os (delta))
13910 output_asm_insn ("agfi\t%1,%2", op);
13911 else
13913 op[6] = gen_label_rtx ();
13914 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
13918 /* Perform vcall adjustment. */
13919 if (vcall_offset)
13921 if (DISP_IN_RANGE (vcall_offset))
13923 output_asm_insn ("lg\t%4,0(%1)", op);
13924 output_asm_insn ("ag\t%1,%3(%4)", op);
13926 else if (CONST_OK_FOR_K (vcall_offset))
13928 output_asm_insn ("lghi\t%4,%3", op);
13929 output_asm_insn ("ag\t%4,0(%1)", op);
13930 output_asm_insn ("ag\t%1,0(%4)", op);
13932 else if (CONST_OK_FOR_Os (vcall_offset))
13934 output_asm_insn ("lgfi\t%4,%3", op);
13935 output_asm_insn ("ag\t%4,0(%1)", op);
13936 output_asm_insn ("ag\t%1,0(%4)", op);
13938 else
13940 op[7] = gen_label_rtx ();
13941 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
13942 output_asm_insn ("ag\t%4,0(%1)", op);
13943 output_asm_insn ("ag\t%1,0(%4)", op);
13947 /* Jump to target. */
13948 output_asm_insn ("jg\t%0%K0", op);
13950 /* Output literal pool if required. */
13951 if (op[5])
13953 output_asm_insn (".align\t4", op);
13954 targetm.asm_out.internal_label (file, "L",
13955 CODE_LABEL_NUMBER (op[5]));
13957 if (op[6])
13959 targetm.asm_out.internal_label (file, "L",
13960 CODE_LABEL_NUMBER (op[6]));
13961 output_asm_insn (".long\t%2", op);
13963 if (op[7])
13965 targetm.asm_out.internal_label (file, "L",
13966 CODE_LABEL_NUMBER (op[7]));
13967 output_asm_insn (".long\t%3", op);
13970 else
13972 /* Setup base pointer if required. */
13973 if (!vcall_offset
13974 || (!DISP_IN_RANGE (delta)
13975 && !CONST_OK_FOR_K (delta)
13976 && !CONST_OK_FOR_Os (delta))
13977 || (!DISP_IN_RANGE (delta)
13978 && !CONST_OK_FOR_K (vcall_offset)
13979 && !CONST_OK_FOR_Os (vcall_offset)))
13981 op[5] = gen_label_rtx ();
13982 output_asm_insn ("basr\t%4,0", op);
13983 targetm.asm_out.internal_label (file, "L",
13984 CODE_LABEL_NUMBER (op[5]));
13987 /* Add DELTA to this pointer. */
13988 if (delta)
13990 if (CONST_OK_FOR_J (delta))
13991 output_asm_insn ("la\t%1,%2(%1)", op);
13992 else if (DISP_IN_RANGE (delta))
13993 output_asm_insn ("lay\t%1,%2(%1)", op);
13994 else if (CONST_OK_FOR_K (delta))
13995 output_asm_insn ("ahi\t%1,%2", op);
13996 else if (CONST_OK_FOR_Os (delta))
13997 output_asm_insn ("afi\t%1,%2", op);
13998 else
14000 op[6] = gen_label_rtx ();
14001 output_asm_insn ("a\t%1,%6-%5(%4)", op);
14005 /* Perform vcall adjustment. */
14006 if (vcall_offset)
14008 if (CONST_OK_FOR_J (vcall_offset))
14010 output_asm_insn ("l\t%4,0(%1)", op);
14011 output_asm_insn ("a\t%1,%3(%4)", op);
14013 else if (DISP_IN_RANGE (vcall_offset))
14015 output_asm_insn ("l\t%4,0(%1)", op);
14016 output_asm_insn ("ay\t%1,%3(%4)", op);
14018 else if (CONST_OK_FOR_K (vcall_offset))
14020 output_asm_insn ("lhi\t%4,%3", op);
14021 output_asm_insn ("a\t%4,0(%1)", op);
14022 output_asm_insn ("a\t%1,0(%4)", op);
14024 else if (CONST_OK_FOR_Os (vcall_offset))
14026 output_asm_insn ("iilf\t%4,%3", op);
14027 output_asm_insn ("a\t%4,0(%1)", op);
14028 output_asm_insn ("a\t%1,0(%4)", op);
14030 else
14032 op[7] = gen_label_rtx ();
14033 output_asm_insn ("l\t%4,%7-%5(%4)", op);
14034 output_asm_insn ("a\t%4,0(%1)", op);
14035 output_asm_insn ("a\t%1,0(%4)", op);
14038 /* We had to clobber the base pointer register.
14039 Re-setup the base pointer (with a different base). */
14040 op[5] = gen_label_rtx ();
14041 output_asm_insn ("basr\t%4,0", op);
14042 targetm.asm_out.internal_label (file, "L",
14043 CODE_LABEL_NUMBER (op[5]));
14046 /* Jump to target. */
14047 op[8] = gen_label_rtx ();
14049 if (!flag_pic)
14050 output_asm_insn ("l\t%4,%8-%5(%4)", op);
14051 else if (!nonlocal)
14052 output_asm_insn ("a\t%4,%8-%5(%4)", op);
14053 /* We cannot call through .plt, since .plt requires %r12 loaded. */
14054 else if (flag_pic == 1)
14056 output_asm_insn ("a\t%4,%8-%5(%4)", op);
14057 output_asm_insn ("l\t%4,%0(%4)", op);
14059 else if (flag_pic == 2)
14061 op[9] = gen_rtx_REG (Pmode, 0);
14062 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
14063 output_asm_insn ("a\t%4,%8-%5(%4)", op);
14064 output_asm_insn ("ar\t%4,%9", op);
14065 output_asm_insn ("l\t%4,0(%4)", op);
14068 output_asm_insn ("br\t%4", op);
14070 /* Output literal pool. */
14071 output_asm_insn (".align\t4", op);
14073 if (nonlocal && flag_pic == 2)
14074 output_asm_insn (".long\t%0", op);
14075 if (nonlocal)
14077 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
14078 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
14081 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
14082 if (!flag_pic)
14083 output_asm_insn (".long\t%0", op);
14084 else
14085 output_asm_insn (".long\t%0-%5", op);
14087 if (op[6])
14089 targetm.asm_out.internal_label (file, "L",
14090 CODE_LABEL_NUMBER (op[6]));
14091 output_asm_insn (".long\t%2", op);
14093 if (op[7])
14095 targetm.asm_out.internal_label (file, "L",
14096 CODE_LABEL_NUMBER (op[7]));
14097 output_asm_insn (".long\t%3", op);
14100 final_end_function ();
14101 assemble_end_function (thunk, fnname);
14104 /* Output either an indirect jump or an indirect call
14105 (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO
14106 using a branch trampoline disabling branch target prediction. */
14108 void
14109 s390_indirect_branch_via_thunk (unsigned int regno,
14110 unsigned int return_addr_regno,
14111 rtx comparison_operator,
14112 enum s390_indirect_branch_type type)
14114 enum s390_indirect_branch_option option;
14116 if (type == s390_indirect_branch_type_return)
14118 if (s390_return_addr_from_memory ())
14119 option = s390_opt_function_return_mem;
14120 else
14121 option = s390_opt_function_return_reg;
14123 else if (type == s390_indirect_branch_type_jump)
14124 option = s390_opt_indirect_branch_jump;
14125 else if (type == s390_indirect_branch_type_call)
14126 option = s390_opt_indirect_branch_call;
14127 else
14128 gcc_unreachable ();
14130 if (TARGET_INDIRECT_BRANCH_TABLE)
14132 char label[32];
14134 ASM_GENERATE_INTERNAL_LABEL (label,
14135 indirect_branch_table_label[option],
14136 indirect_branch_table_label_no[option]++);
14137 ASM_OUTPUT_LABEL (asm_out_file, label);
14140 if (return_addr_regno != INVALID_REGNUM)
14142 gcc_assert (comparison_operator == NULL_RTX);
14143 fprintf (asm_out_file, " \tbrasl\t%%r%d,", return_addr_regno);
14145 else
14147 fputs (" \tjg", asm_out_file);
14148 if (comparison_operator != NULL_RTX)
14149 print_operand (asm_out_file, comparison_operator, 'C');
14151 fputs ("\t", asm_out_file);
14154 if (TARGET_CPU_Z10)
14155 fprintf (asm_out_file,
14156 TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL "\n",
14157 regno);
14158 else
14159 fprintf (asm_out_file,
14160 TARGET_INDIRECT_BRANCH_THUNK_NAME_EX "\n",
14161 INDIRECT_BRANCH_THUNK_REGNUM, regno);
14163 if ((option == s390_opt_indirect_branch_jump
14164 && cfun->machine->indirect_branch_jump == indirect_branch_thunk)
14165 || (option == s390_opt_indirect_branch_call
14166 && cfun->machine->indirect_branch_call == indirect_branch_thunk)
14167 || (option == s390_opt_function_return_reg
14168 && cfun->machine->function_return_reg == indirect_branch_thunk)
14169 || (option == s390_opt_function_return_mem
14170 && cfun->machine->function_return_mem == indirect_branch_thunk))
14172 if (TARGET_CPU_Z10)
14173 indirect_branch_z10thunk_mask |= (1 << regno);
14174 else
14175 indirect_branch_prez10thunk_mask |= (1 << regno);
14179 /* Output an inline thunk for indirect jumps. EXECUTE_TARGET can
14180 either be an address register or a label pointing to the location
14181 of the jump instruction. */
14183 void
14184 s390_indirect_branch_via_inline_thunk (rtx execute_target)
14186 if (TARGET_INDIRECT_BRANCH_TABLE)
14188 char label[32];
14190 ASM_GENERATE_INTERNAL_LABEL (label,
14191 indirect_branch_table_label[s390_opt_indirect_branch_jump],
14192 indirect_branch_table_label_no[s390_opt_indirect_branch_jump]++);
14193 ASM_OUTPUT_LABEL (asm_out_file, label);
14196 if (!TARGET_ZARCH)
14197 fputs ("\t.machinemode zarch\n", asm_out_file);
14199 if (REG_P (execute_target))
14200 fprintf (asm_out_file, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target));
14201 else
14202 output_asm_insn ("\texrl\t%%r0,%0", &execute_target);
14204 if (!TARGET_ZARCH)
14205 fputs ("\t.machinemode esa\n", asm_out_file);
14207 fputs ("0:\tj\t0b\n", asm_out_file);
14210 static bool
14211 s390_valid_pointer_mode (scalar_int_mode mode)
14213 return (mode == SImode || (TARGET_64BIT && mode == DImode));
14216 /* Checks whether the given CALL_EXPR would use a caller
14217 saved register. This is used to decide whether sibling call
14218 optimization could be performed on the respective function
14219 call. */
14221 static bool
14222 s390_call_saved_register_used (tree call_expr)
14224 CUMULATIVE_ARGS cum_v;
14225 cumulative_args_t cum;
14226 tree parameter;
14227 rtx parm_rtx;
14228 int reg, i;
14230 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
14231 cum = pack_cumulative_args (&cum_v);
14233 for (i = 0; i < call_expr_nargs (call_expr); i++)
14235 parameter = CALL_EXPR_ARG (call_expr, i);
14236 gcc_assert (parameter);
14238 /* For an undeclared variable passed as parameter we will get
14239 an ERROR_MARK node here. */
14240 if (TREE_CODE (parameter) == ERROR_MARK)
14241 return true;
14243 /* We assume that in the target function all parameters are
14244 named. This only has an impact on vector argument register
14245 usage none of which is call-saved. */
14246 function_arg_info arg (TREE_TYPE (parameter), /*named=*/true);
14247 apply_pass_by_reference_rules (&cum_v, arg);
14249 parm_rtx = s390_function_arg (cum, arg);
14251 s390_function_arg_advance (cum, arg);
14253 if (!parm_rtx)
14254 continue;
14256 if (REG_P (parm_rtx))
14258 int size = s390_function_arg_size (arg.mode, arg.type);
14259 int nregs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
14261 for (reg = 0; reg < nregs; reg++)
14262 if (!call_used_or_fixed_reg_p (reg + REGNO (parm_rtx)))
14263 return true;
14265 else if (GET_CODE (parm_rtx) == PARALLEL)
14267 int i;
14269 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
14271 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
14273 gcc_assert (REG_P (r));
14274 gcc_assert (REG_NREGS (r) == 1);
14276 if (!call_used_or_fixed_reg_p (REGNO (r)))
14277 return true;
14281 return false;
14284 /* Return true if the given call expression can be
14285 turned into a sibling call.
14286 DECL holds the declaration of the function to be called whereas
14287 EXP is the call expression itself. */
14289 static bool
14290 s390_function_ok_for_sibcall (tree decl, tree exp)
14292 /* The TPF epilogue uses register 1. */
14293 if (TARGET_TPF_PROFILING)
14294 return false;
14296 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
14297 which would have to be restored before the sibcall. */
14298 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
14299 return false;
14301 /* The thunks for indirect branches require r1 if no exrl is
14302 available. r1 might not be available when doing a sibling
14303 call. */
14304 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
14305 && !TARGET_CPU_Z10
14306 && !decl)
14307 return false;
14309 /* Register 6 on s390 is available as an argument register but unfortunately
14310 "caller saved". This makes functions needing this register for arguments
14311 not suitable for sibcalls. */
14312 return !s390_call_saved_register_used (exp);
14315 /* Return the fixed registers used for condition codes. */
14317 static bool
14318 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14320 *p1 = CC_REGNUM;
14321 *p2 = INVALID_REGNUM;
14323 return true;
14326 /* This function is used by the call expanders of the machine description.
14327 It emits the call insn itself together with the necessary operations
14328 to adjust the target address and returns the emitted insn.
14329 ADDR_LOCATION is the target address rtx
14330 TLS_CALL the location of the thread-local symbol
14331 RESULT_REG the register where the result of the call should be stored
14332 RETADDR_REG the register where the return address should be stored
14333 If this parameter is NULL_RTX the call is considered
14334 to be a sibling call. */
14336 rtx_insn *
14337 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
14338 rtx retaddr_reg)
14340 bool plt31_call_p = false;
14341 rtx_insn *insn;
14342 rtx vec[4] = { NULL_RTX };
14343 int elts = 0;
14344 rtx *call = &vec[0];
14345 rtx *clobber_ret_reg = &vec[1];
14346 rtx *use = &vec[2];
14347 rtx *clobber_thunk_reg = &vec[3];
14348 int i;
14350 /* Direct function calls need special treatment. */
14351 if (GET_CODE (addr_location) == SYMBOL_REF)
14353 /* When calling a global routine in PIC mode, we must
14354 replace the symbol itself with the PLT stub. */
14355 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location) && !TARGET_64BIT)
14357 if (retaddr_reg != NULL_RTX)
14359 addr_location = gen_rtx_UNSPEC (Pmode,
14360 gen_rtvec (1, addr_location),
14361 UNSPEC_PLT31);
14362 addr_location = gen_rtx_CONST (Pmode, addr_location);
14363 plt31_call_p = true;
14365 else
14366 /* For -fpic code the PLT entries might use r12 which is
14367 call-saved. Therefore we cannot do a sibcall when
14368 calling directly using a symbol ref. When reaching
14369 this point we decided (in s390_function_ok_for_sibcall)
14370 to do a sibcall for a function pointer but one of the
14371 optimizers was able to get rid of the function pointer
14372 by propagating the symbol ref into the call. This
14373 optimization is illegal for S/390 so we turn the direct
14374 call into a indirect call again. */
14375 addr_location = force_reg (Pmode, addr_location);
14379 /* If it is already an indirect call or the code above moved the
14380 SYMBOL_REF to somewhere else make sure the address can be found in
14381 register 1. */
14382 if (retaddr_reg == NULL_RTX
14383 && GET_CODE (addr_location) != SYMBOL_REF
14384 && !plt31_call_p)
14386 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
14387 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
14390 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
14391 && GET_CODE (addr_location) != SYMBOL_REF
14392 && !plt31_call_p)
14394 /* Indirect branch thunks require the target to be a single GPR. */
14395 addr_location = force_reg (Pmode, addr_location);
14397 /* Without exrl the indirect branch thunks need an additional
14398 register for larl;ex */
14399 if (!TARGET_CPU_Z10)
14401 *clobber_thunk_reg = gen_rtx_REG (Pmode, INDIRECT_BRANCH_THUNK_REGNUM);
14402 *clobber_thunk_reg = gen_rtx_CLOBBER (VOIDmode, *clobber_thunk_reg);
14406 addr_location = gen_rtx_MEM (QImode, addr_location);
14407 *call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
14409 if (result_reg != NULL_RTX)
14410 *call = gen_rtx_SET (result_reg, *call);
14412 if (retaddr_reg != NULL_RTX)
14414 *clobber_ret_reg = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
14416 if (tls_call != NULL_RTX)
14417 *use = gen_rtx_USE (VOIDmode, tls_call);
14421 for (i = 0; i < 4; i++)
14422 if (vec[i] != NULL_RTX)
14423 elts++;
14425 if (elts > 1)
14427 rtvec v;
14428 int e = 0;
14430 v = rtvec_alloc (elts);
14431 for (i = 0; i < 4; i++)
14432 if (vec[i] != NULL_RTX)
14434 RTVEC_ELT (v, e) = vec[i];
14435 e++;
14438 *call = gen_rtx_PARALLEL (VOIDmode, v);
14441 insn = emit_call_insn (*call);
14443 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
14444 if (plt31_call_p || tls_call != NULL_RTX)
14446 /* s390_function_ok_for_sibcall should
14447 have denied sibcalls in this case. */
14448 gcc_assert (retaddr_reg != NULL_RTX);
14449 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
14451 return insn;
14454 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
14456 static void
14457 s390_conditional_register_usage (void)
14459 int i;
14461 if (flag_pic)
14462 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
14463 fixed_regs[BASE_REGNUM] = 0;
14464 fixed_regs[RETURN_REGNUM] = 0;
14465 if (TARGET_64BIT)
14467 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
14468 call_used_regs[i] = 0;
14470 else
14472 call_used_regs[FPR4_REGNUM] = 0;
14473 call_used_regs[FPR6_REGNUM] = 0;
14476 if (TARGET_SOFT_FLOAT)
14478 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
14479 fixed_regs[i] = 1;
14482 /* Disable v16 - v31 for non-vector target. */
14483 if (!TARGET_VX)
14485 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
14486 fixed_regs[i] = call_used_regs[i] = 1;
14490 /* Corresponding function to eh_return expander. */
14492 static GTY(()) rtx s390_tpf_eh_return_symbol;
14493 void
14494 s390_emit_tpf_eh_return (rtx target)
14496 rtx_insn *insn;
14497 rtx reg, orig_ra;
14499 if (!s390_tpf_eh_return_symbol)
14501 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
14502 SYMBOL_REF_FLAGS (s390_tpf_eh_return_symbol) |= SYMBOL_FLAG_FUNCTION;
14505 reg = gen_rtx_REG (Pmode, 2);
14506 orig_ra = gen_rtx_REG (Pmode, 3);
14508 emit_move_insn (reg, target);
14509 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
14510 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
14511 gen_rtx_REG (Pmode, RETURN_REGNUM));
14512 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
14513 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
14515 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
14518 /* Rework the prologue/epilogue to avoid saving/restoring
14519 registers unnecessarily. */
14521 static void
14522 s390_optimize_prologue (void)
14524 rtx_insn *insn, *new_insn, *next_insn;
14526 /* Do a final recompute of the frame-related data. */
14527 s390_optimize_register_info ();
14529 /* If all special registers are in fact used, there's nothing we
14530 can do, so no point in walking the insn list. */
14532 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
14533 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM)
14534 return;
14536 /* Search for prologue/epilogue insns and replace them. */
14537 for (insn = get_insns (); insn; insn = next_insn)
14539 int first, last, off;
14540 rtx set, base, offset;
14541 rtx pat;
14543 next_insn = NEXT_INSN (insn);
14545 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
14546 continue;
14548 pat = PATTERN (insn);
14550 /* Remove ldgr/lgdr instructions used for saving and restore
14551 GPRs if possible. */
14552 if (TARGET_Z10)
14554 rtx tmp_pat = pat;
14556 if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
14557 tmp_pat = XVECEXP (pat, 0, 0);
14559 if (GET_CODE (tmp_pat) == SET
14560 && GET_MODE (SET_SRC (tmp_pat)) == DImode
14561 && REG_P (SET_SRC (tmp_pat))
14562 && REG_P (SET_DEST (tmp_pat)))
14564 int src_regno = REGNO (SET_SRC (tmp_pat));
14565 int dest_regno = REGNO (SET_DEST (tmp_pat));
14566 int gpr_regno;
14567 int fpr_regno;
14569 if (!((GENERAL_REGNO_P (src_regno)
14570 && FP_REGNO_P (dest_regno))
14571 || (FP_REGNO_P (src_regno)
14572 && GENERAL_REGNO_P (dest_regno))))
14573 continue;
14575 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
14576 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
14578 /* GPR must be call-saved, FPR must be call-clobbered. */
14579 if (!call_used_regs[fpr_regno]
14580 || call_used_regs[gpr_regno])
14581 continue;
14583 /* It must not happen that what we once saved in an FPR now
14584 needs a stack slot. */
14585 gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
14587 if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
14589 remove_insn (insn);
14590 continue;
14595 if (GET_CODE (pat) == PARALLEL
14596 && store_multiple_operation (pat, VOIDmode))
14598 set = XVECEXP (pat, 0, 0);
14599 first = REGNO (SET_SRC (set));
14600 last = first + XVECLEN (pat, 0) - 1;
14601 offset = const0_rtx;
14602 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
14603 off = INTVAL (offset);
14605 if (GET_CODE (base) != REG || off < 0)
14606 continue;
14607 if (cfun_frame_layout.first_save_gpr != -1
14608 && (cfun_frame_layout.first_save_gpr < first
14609 || cfun_frame_layout.last_save_gpr > last))
14610 continue;
14611 if (REGNO (base) != STACK_POINTER_REGNUM
14612 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14613 continue;
14614 if (first > BASE_REGNUM || last < BASE_REGNUM)
14615 continue;
14617 if (cfun_frame_layout.first_save_gpr != -1)
14618 save_gprs (base,
14619 off + (cfun_frame_layout.first_save_gpr
14620 - first) * UNITS_PER_LONG,
14621 cfun_frame_layout.first_save_gpr,
14622 cfun_frame_layout.last_save_gpr, insn);
14624 remove_insn (insn);
14625 continue;
14628 if (cfun_frame_layout.first_save_gpr == -1
14629 && GET_CODE (pat) == SET
14630 && GENERAL_REG_P (SET_SRC (pat))
14631 && GET_CODE (SET_DEST (pat)) == MEM)
14633 set = pat;
14634 first = REGNO (SET_SRC (set));
14635 offset = const0_rtx;
14636 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
14637 off = INTVAL (offset);
14639 if (GET_CODE (base) != REG || off < 0)
14640 continue;
14641 if (REGNO (base) != STACK_POINTER_REGNUM
14642 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14643 continue;
14645 remove_insn (insn);
14646 continue;
14649 if (GET_CODE (pat) == PARALLEL
14650 && load_multiple_operation (pat, VOIDmode))
14652 set = XVECEXP (pat, 0, 0);
14653 first = REGNO (SET_DEST (set));
14654 last = first + XVECLEN (pat, 0) - 1;
14655 offset = const0_rtx;
14656 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
14657 off = INTVAL (offset);
14659 if (GET_CODE (base) != REG || off < 0)
14660 continue;
14662 if (cfun_frame_layout.first_restore_gpr != -1
14663 && (cfun_frame_layout.first_restore_gpr < first
14664 || cfun_frame_layout.last_restore_gpr > last))
14665 continue;
14666 if (REGNO (base) != STACK_POINTER_REGNUM
14667 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14668 continue;
14669 if (first > BASE_REGNUM || last < BASE_REGNUM)
14670 continue;
14672 if (cfun_frame_layout.first_restore_gpr != -1)
14674 rtx rpat = restore_gprs (base,
14675 off + (cfun_frame_layout.first_restore_gpr
14676 - first) * UNITS_PER_LONG,
14677 cfun_frame_layout.first_restore_gpr,
14678 cfun_frame_layout.last_restore_gpr);
14680 /* Remove REG_CFA_RESTOREs for registers that we no
14681 longer need to save. */
14682 REG_NOTES (rpat) = REG_NOTES (insn);
14683 for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
14684 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
14685 && ((int) REGNO (XEXP (*ptr, 0))
14686 < cfun_frame_layout.first_restore_gpr))
14687 *ptr = XEXP (*ptr, 1);
14688 else
14689 ptr = &XEXP (*ptr, 1);
14690 new_insn = emit_insn_before (rpat, insn);
14691 RTX_FRAME_RELATED_P (new_insn) = 1;
14692 INSN_ADDRESSES_NEW (new_insn, -1);
14695 remove_insn (insn);
14696 continue;
14699 if (cfun_frame_layout.first_restore_gpr == -1
14700 && GET_CODE (pat) == SET
14701 && GENERAL_REG_P (SET_DEST (pat))
14702 && GET_CODE (SET_SRC (pat)) == MEM)
14704 set = pat;
14705 first = REGNO (SET_DEST (set));
14706 offset = const0_rtx;
14707 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
14708 off = INTVAL (offset);
14710 if (GET_CODE (base) != REG || off < 0)
14711 continue;
14713 if (REGNO (base) != STACK_POINTER_REGNUM
14714 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14715 continue;
14717 remove_insn (insn);
14718 continue;
14723 /* On z10 and later the dynamic branch prediction must see the
14724 backward jump within a certain windows. If not it falls back to
14725 the static prediction. This function rearranges the loop backward
14726 branch in a way which makes the static prediction always correct.
14727 The function returns true if it added an instruction. */
14728 static bool
14729 s390_fix_long_loop_prediction (rtx_insn *insn)
14731 rtx set = single_set (insn);
14732 rtx code_label, label_ref;
14733 rtx_insn *uncond_jump;
14734 rtx_insn *cur_insn;
14735 rtx tmp;
14736 int distance;
14738 /* This will exclude branch on count and branch on index patterns
14739 since these are correctly statically predicted.
14741 The additional check for a PARALLEL is required here since
14742 single_set might be != NULL for PARALLELs where the set of the
14743 iteration variable is dead. */
14744 if (GET_CODE (PATTERN (insn)) == PARALLEL
14745 || !set
14746 || SET_DEST (set) != pc_rtx
14747 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
14748 return false;
14750 /* Skip conditional returns. */
14751 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
14752 && XEXP (SET_SRC (set), 2) == pc_rtx)
14753 return false;
14755 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
14756 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
14758 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
14760 code_label = XEXP (label_ref, 0);
14762 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
14763 || INSN_ADDRESSES (INSN_UID (insn)) == -1
14764 || (INSN_ADDRESSES (INSN_UID (insn))
14765 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
14766 return false;
14768 for (distance = 0, cur_insn = PREV_INSN (insn);
14769 distance < PREDICT_DISTANCE - 6;
14770 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
14771 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
14772 return false;
14774 rtx_code_label *new_label = gen_label_rtx ();
14775 uncond_jump = emit_jump_insn_after (
14776 gen_rtx_SET (pc_rtx,
14777 gen_rtx_LABEL_REF (VOIDmode, code_label)),
14778 insn);
14779 emit_label_after (new_label, uncond_jump);
14781 tmp = XEXP (SET_SRC (set), 1);
14782 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
14783 XEXP (SET_SRC (set), 2) = tmp;
14784 INSN_CODE (insn) = -1;
14786 XEXP (label_ref, 0) = new_label;
14787 JUMP_LABEL (insn) = new_label;
14788 JUMP_LABEL (uncond_jump) = code_label;
14790 return true;
14793 /* Returns 1 if INSN reads the value of REG for purposes not related
14794 to addressing of memory, and 0 otherwise. */
14795 static int
14796 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
14798 return reg_referenced_p (reg, PATTERN (insn))
14799 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
14802 /* Starting from INSN find_cond_jump looks downwards in the insn
14803 stream for a single jump insn which is the last user of the
14804 condition code set in INSN. */
14805 static rtx_insn *
14806 find_cond_jump (rtx_insn *insn)
14808 for (; insn; insn = NEXT_INSN (insn))
14810 rtx ite, cc;
14812 if (LABEL_P (insn))
14813 break;
14815 if (!JUMP_P (insn))
14817 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
14818 break;
14819 continue;
14822 /* This will be triggered by a return. */
14823 if (GET_CODE (PATTERN (insn)) != SET)
14824 break;
14826 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
14827 ite = SET_SRC (PATTERN (insn));
14829 if (GET_CODE (ite) != IF_THEN_ELSE)
14830 break;
14832 cc = XEXP (XEXP (ite, 0), 0);
14833 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
14834 break;
14836 if (find_reg_note (insn, REG_DEAD, cc))
14837 return insn;
14838 break;
14841 return NULL;
14844 /* Swap the condition in COND and the operands in OP0 and OP1 so that
14845 the semantics does not change. If NULL_RTX is passed as COND the
14846 function tries to find the conditional jump starting with INSN. */
14847 static void
14848 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
14850 rtx tmp = *op0;
14852 if (cond == NULL_RTX)
14854 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
14855 rtx set = jump ? single_set (jump) : NULL_RTX;
14857 if (set == NULL_RTX)
14858 return;
14860 cond = XEXP (SET_SRC (set), 0);
14863 *op0 = *op1;
14864 *op1 = tmp;
14865 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
14868 /* On z10, instructions of the compare-and-branch family have the
14869 property to access the register occurring as second operand with
14870 its bits complemented. If such a compare is grouped with a second
14871 instruction that accesses the same register non-complemented, and
14872 if that register's value is delivered via a bypass, then the
14873 pipeline recycles, thereby causing significant performance decline.
14874 This function locates such situations and exchanges the two
14875 operands of the compare. The function return true whenever it
14876 added an insn. */
14877 static bool
14878 s390_z10_optimize_cmp (rtx_insn *insn)
14880 rtx_insn *prev_insn, *next_insn;
14881 bool insn_added_p = false;
14882 rtx cond, *op0, *op1;
14884 if (GET_CODE (PATTERN (insn)) == PARALLEL)
14886 /* Handle compare and branch and branch on count
14887 instructions. */
14888 rtx pattern = single_set (insn);
14890 if (!pattern
14891 || SET_DEST (pattern) != pc_rtx
14892 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
14893 return false;
14895 cond = XEXP (SET_SRC (pattern), 0);
14896 op0 = &XEXP (cond, 0);
14897 op1 = &XEXP (cond, 1);
14899 else if (GET_CODE (PATTERN (insn)) == SET)
14901 rtx src, dest;
14903 /* Handle normal compare instructions. */
14904 src = SET_SRC (PATTERN (insn));
14905 dest = SET_DEST (PATTERN (insn));
14907 if (!REG_P (dest)
14908 || !CC_REGNO_P (REGNO (dest))
14909 || GET_CODE (src) != COMPARE)
14910 return false;
14912 /* s390_swap_cmp will try to find the conditional
14913 jump when passing NULL_RTX as condition. */
14914 cond = NULL_RTX;
14915 op0 = &XEXP (src, 0);
14916 op1 = &XEXP (src, 1);
14918 else
14919 return false;
14921 if (!REG_P (*op0) || !REG_P (*op1))
14922 return false;
14924 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
14925 return false;
14927 /* Swap the COMPARE arguments and its mask if there is a
14928 conflicting access in the previous insn. */
14929 prev_insn = prev_active_insn (insn);
14930 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14931 && reg_referenced_p (*op1, PATTERN (prev_insn)))
14932 s390_swap_cmp (cond, op0, op1, insn);
14934 /* Check if there is a conflict with the next insn. If there
14935 was no conflict with the previous insn, then swap the
14936 COMPARE arguments and its mask. If we already swapped
14937 the operands, or if swapping them would cause a conflict
14938 with the previous insn, issue a NOP after the COMPARE in
14939 order to separate the two instuctions. */
14940 next_insn = next_active_insn (insn);
14941 if (next_insn != NULL_RTX && INSN_P (next_insn)
14942 && s390_non_addr_reg_read_p (*op1, next_insn))
14944 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14945 && s390_non_addr_reg_read_p (*op0, prev_insn))
14947 if (REGNO (*op1) == 0)
14948 emit_insn_after (gen_nop_lr1 (), insn);
14949 else
14950 emit_insn_after (gen_nop_lr0 (), insn);
14951 insn_added_p = true;
14953 else
14954 s390_swap_cmp (cond, op0, op1, insn);
14956 return insn_added_p;
14959 /* Number of INSNs to be scanned backward in the last BB of the loop
14960 and forward in the first BB of the loop. This usually should be a
14961 bit more than the number of INSNs which could go into one
14962 group. */
14963 #define S390_OSC_SCAN_INSN_NUM 5
14965 /* Scan LOOP for static OSC collisions and return true if a osc_break
14966 should be issued for this loop. */
14967 static bool
14968 s390_adjust_loop_scan_osc (struct loop* loop)
14971 HARD_REG_SET modregs, newregs;
14972 rtx_insn *insn, *store_insn = NULL;
14973 rtx set;
14974 struct s390_address addr_store, addr_load;
14975 subrtx_iterator::array_type array;
14976 int insn_count;
14978 CLEAR_HARD_REG_SET (modregs);
14980 insn_count = 0;
14981 FOR_BB_INSNS_REVERSE (loop->latch, insn)
14983 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14984 continue;
14986 insn_count++;
14987 if (insn_count > S390_OSC_SCAN_INSN_NUM)
14988 return false;
14990 find_all_hard_reg_sets (insn, &newregs, true);
14991 modregs |= newregs;
14993 set = single_set (insn);
14994 if (!set)
14995 continue;
14997 if (MEM_P (SET_DEST (set))
14998 && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store))
15000 store_insn = insn;
15001 break;
15005 if (store_insn == NULL_RTX)
15006 return false;
15008 insn_count = 0;
15009 FOR_BB_INSNS (loop->header, insn)
15011 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
15012 continue;
15014 if (insn == store_insn)
15015 return false;
15017 insn_count++;
15018 if (insn_count > S390_OSC_SCAN_INSN_NUM)
15019 return false;
15021 find_all_hard_reg_sets (insn, &newregs, true);
15022 modregs |= newregs;
15024 set = single_set (insn);
15025 if (!set)
15026 continue;
15028 /* An intermediate store disrupts static OSC checking
15029 anyway. */
15030 if (MEM_P (SET_DEST (set))
15031 && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL))
15032 return false;
15034 FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
15035 if (MEM_P (*iter)
15036 && s390_decompose_address (XEXP (*iter, 0), &addr_load)
15037 && rtx_equal_p (addr_load.base, addr_store.base)
15038 && rtx_equal_p (addr_load.indx, addr_store.indx)
15039 && rtx_equal_p (addr_load.disp, addr_store.disp))
15041 if ((addr_load.base != NULL_RTX
15042 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base)))
15043 || (addr_load.indx != NULL_RTX
15044 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx))))
15045 return true;
15048 return false;
15051 /* Look for adjustments which can be done on simple innermost
15052 loops. */
15053 static void
15054 s390_adjust_loops ()
15056 df_analyze ();
15057 compute_bb_for_insn ();
15059 /* Find the loops. */
15060 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
15062 for (auto loop : loops_list (cfun, LI_ONLY_INNERMOST))
15064 if (dump_file)
15066 flow_loop_dump (loop, dump_file, NULL, 0);
15067 fprintf (dump_file, ";; OSC loop scan Loop: ");
15069 if (loop->latch == NULL
15070 || pc_set (BB_END (loop->latch)) == NULL_RTX
15071 || !s390_adjust_loop_scan_osc (loop))
15073 if (dump_file)
15075 if (loop->latch == NULL)
15076 fprintf (dump_file, " muliple backward jumps\n");
15077 else
15079 fprintf (dump_file, " header insn: %d latch insn: %d ",
15080 INSN_UID (BB_HEAD (loop->header)),
15081 INSN_UID (BB_END (loop->latch)));
15082 if (pc_set (BB_END (loop->latch)) == NULL_RTX)
15083 fprintf (dump_file, " loop does not end with jump\n");
15084 else
15085 fprintf (dump_file, " not instrumented\n");
15089 else
15091 rtx_insn *new_insn;
15093 if (dump_file)
15094 fprintf (dump_file, " adding OSC break insn: ");
15095 new_insn = emit_insn_before (gen_osc_break (),
15096 BB_END (loop->latch));
15097 INSN_ADDRESSES_NEW (new_insn, -1);
15101 loop_optimizer_finalize ();
15103 df_finish_pass (false);
15106 /* Perform machine-dependent processing. */
15108 static void
15109 s390_reorg (void)
15111 struct constant_pool *pool;
15112 rtx_insn *insn;
15113 int hw_before, hw_after;
15115 if (s390_tune == PROCESSOR_2964_Z13)
15116 s390_adjust_loops ();
15118 /* Make sure all splits have been performed; splits after
15119 machine_dependent_reorg might confuse insn length counts. */
15120 split_all_insns_noflow ();
15122 /* Install the main literal pool and the associated base
15123 register load insns. The literal pool might be > 4096 bytes in
15124 size, so that some of its elements cannot be directly accessed.
15126 To fix this, we split the single literal pool into multiple
15127 pool chunks, reloading the pool base register at various
15128 points throughout the function to ensure it always points to
15129 the pool chunk the following code expects. */
15131 /* Collect the literal pool. */
15132 pool = s390_mainpool_start ();
15133 if (pool)
15135 /* Finish up literal pool related changes. */
15136 s390_mainpool_finish (pool);
15138 else
15140 /* If literal pool overflowed, chunkify it. */
15141 pool = s390_chunkify_start ();
15142 s390_chunkify_finish (pool);
15145 /* Generate out-of-pool execute target insns. */
15146 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15148 rtx label;
15149 rtx_insn *target;
15151 label = s390_execute_label (insn);
15152 if (!label)
15153 continue;
15155 gcc_assert (label != const0_rtx);
15157 target = emit_label (XEXP (label, 0));
15158 INSN_ADDRESSES_NEW (target, -1);
15160 if (JUMP_P (insn))
15162 target = emit_jump_insn (s390_execute_target (insn));
15163 /* This is important in order to keep a table jump
15164 pointing at the jump table label. Only this makes it
15165 being recognized as table jump. */
15166 JUMP_LABEL (target) = JUMP_LABEL (insn);
15168 else
15169 target = emit_insn (s390_execute_target (insn));
15170 INSN_ADDRESSES_NEW (target, -1);
15173 /* Try to optimize prologue and epilogue further. */
15174 s390_optimize_prologue ();
15176 /* Walk over the insns and do some >=z10 specific changes. */
15177 if (s390_tune >= PROCESSOR_2097_Z10)
15179 rtx_insn *insn;
15180 bool insn_added_p = false;
15182 /* The insn lengths and addresses have to be up to date for the
15183 following manipulations. */
15184 shorten_branches (get_insns ());
15186 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15188 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
15189 continue;
15191 if (JUMP_P (insn))
15192 insn_added_p |= s390_fix_long_loop_prediction (insn);
15194 if ((GET_CODE (PATTERN (insn)) == PARALLEL
15195 || GET_CODE (PATTERN (insn)) == SET)
15196 && s390_tune == PROCESSOR_2097_Z10)
15197 insn_added_p |= s390_z10_optimize_cmp (insn);
15200 /* Adjust branches if we added new instructions. */
15201 if (insn_added_p)
15202 shorten_branches (get_insns ());
15205 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
15206 if (hw_after > 0)
15208 rtx_insn *insn;
15210 /* Insert NOPs for hotpatching. */
15211 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15212 /* Emit NOPs
15213 1. inside the area covered by debug information to allow setting
15214 breakpoints at the NOPs,
15215 2. before any insn which results in an asm instruction,
15216 3. before in-function labels to avoid jumping to the NOPs, for
15217 example as part of a loop,
15218 4. before any barrier in case the function is completely empty
15219 (__builtin_unreachable ()) and has neither internal labels nor
15220 active insns.
15222 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
15223 break;
15224 /* Output a series of NOPs before the first active insn. */
15225 while (insn && hw_after > 0)
15227 if (hw_after >= 3)
15229 emit_insn_before (gen_nop_6_byte (), insn);
15230 hw_after -= 3;
15232 else if (hw_after >= 2)
15234 emit_insn_before (gen_nop_4_byte (), insn);
15235 hw_after -= 2;
15237 else
15239 emit_insn_before (gen_nop_2_byte (), insn);
15240 hw_after -= 1;
15246 /* Return true if INSN is a fp load insn writing register REGNO. */
15247 static inline bool
15248 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
15250 rtx set;
15251 enum attr_type flag = s390_safe_attr_type (insn);
15253 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
15254 return false;
15256 set = single_set (insn);
15258 if (set == NULL_RTX)
15259 return false;
15261 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
15262 return false;
15264 if (REGNO (SET_DEST (set)) != regno)
15265 return false;
15267 return true;
15270 /* This value describes the distance to be avoided between an
15271 arithmetic fp instruction and an fp load writing the same register.
15272 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
15273 fine but the exact value has to be avoided. Otherwise the FP
15274 pipeline will throw an exception causing a major penalty. */
15275 #define Z10_EARLYLOAD_DISTANCE 7
15277 /* Rearrange the ready list in order to avoid the situation described
15278 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
15279 moved to the very end of the ready list. */
15280 static void
15281 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
15283 unsigned int regno;
15284 int nready = *nready_p;
15285 rtx_insn *tmp;
15286 int i;
15287 rtx_insn *insn;
15288 rtx set;
15289 enum attr_type flag;
15290 int distance;
15292 /* Skip DISTANCE - 1 active insns. */
15293 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
15294 distance > 0 && insn != NULL_RTX;
15295 distance--, insn = prev_active_insn (insn))
15296 if (CALL_P (insn) || JUMP_P (insn))
15297 return;
15299 if (insn == NULL_RTX)
15300 return;
15302 set = single_set (insn);
15304 if (set == NULL_RTX || !REG_P (SET_DEST (set))
15305 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
15306 return;
15308 flag = s390_safe_attr_type (insn);
15310 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
15311 return;
15313 regno = REGNO (SET_DEST (set));
15314 i = nready - 1;
15316 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
15317 i--;
15319 if (!i)
15320 return;
15322 tmp = ready[i];
15323 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
15324 ready[0] = tmp;
15327 struct s390_sched_state
15329 /* Number of insns in the group. */
15330 int group_state;
15331 /* Execution side of the group. */
15332 int side;
15333 /* Group can only hold two insns. */
15334 bool group_of_two;
15335 } s390_sched_state;
15337 static struct s390_sched_state sched_state;
15339 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
15340 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
15341 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
15342 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
15343 #define S390_SCHED_ATTR_MASK_GROUPOFTWO 0x10
15345 static unsigned int
15346 s390_get_sched_attrmask (rtx_insn *insn)
15348 unsigned int mask = 0;
15350 switch (s390_tune)
15352 case PROCESSOR_2827_ZEC12:
15353 if (get_attr_zEC12_cracked (insn))
15354 mask |= S390_SCHED_ATTR_MASK_CRACKED;
15355 if (get_attr_zEC12_expanded (insn))
15356 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
15357 if (get_attr_zEC12_endgroup (insn))
15358 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
15359 if (get_attr_zEC12_groupalone (insn))
15360 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
15361 break;
15362 case PROCESSOR_2964_Z13:
15363 if (get_attr_z13_cracked (insn))
15364 mask |= S390_SCHED_ATTR_MASK_CRACKED;
15365 if (get_attr_z13_expanded (insn))
15366 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
15367 if (get_attr_z13_endgroup (insn))
15368 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
15369 if (get_attr_z13_groupalone (insn))
15370 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
15371 if (get_attr_z13_groupoftwo (insn))
15372 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
15373 break;
15374 case PROCESSOR_3906_Z14:
15375 if (get_attr_z14_cracked (insn))
15376 mask |= S390_SCHED_ATTR_MASK_CRACKED;
15377 if (get_attr_z14_expanded (insn))
15378 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
15379 if (get_attr_z14_endgroup (insn))
15380 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
15381 if (get_attr_z14_groupalone (insn))
15382 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
15383 if (get_attr_z14_groupoftwo (insn))
15384 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
15385 break;
15386 case PROCESSOR_8561_Z15:
15387 if (get_attr_z15_cracked (insn))
15388 mask |= S390_SCHED_ATTR_MASK_CRACKED;
15389 if (get_attr_z15_expanded (insn))
15390 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
15391 if (get_attr_z15_endgroup (insn))
15392 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
15393 if (get_attr_z15_groupalone (insn))
15394 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
15395 if (get_attr_z15_groupoftwo (insn))
15396 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
15397 break;
15398 case PROCESSOR_3931_Z16:
15399 if (get_attr_z16_cracked (insn))
15400 mask |= S390_SCHED_ATTR_MASK_CRACKED;
15401 if (get_attr_z16_expanded (insn))
15402 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
15403 if (get_attr_z16_endgroup (insn))
15404 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
15405 if (get_attr_z16_groupalone (insn))
15406 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
15407 if (get_attr_z16_groupoftwo (insn))
15408 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
15409 break;
15410 default:
15411 gcc_unreachable ();
15413 return mask;
15416 static unsigned int
15417 s390_get_unit_mask (rtx_insn *insn, int *units)
15419 unsigned int mask = 0;
15421 switch (s390_tune)
15423 case PROCESSOR_2964_Z13:
15424 *units = 4;
15425 if (get_attr_z13_unit_lsu (insn))
15426 mask |= 1 << 0;
15427 if (get_attr_z13_unit_fxa (insn))
15428 mask |= 1 << 1;
15429 if (get_attr_z13_unit_fxb (insn))
15430 mask |= 1 << 2;
15431 if (get_attr_z13_unit_vfu (insn))
15432 mask |= 1 << 3;
15433 break;
15434 case PROCESSOR_3906_Z14:
15435 *units = 4;
15436 if (get_attr_z14_unit_lsu (insn))
15437 mask |= 1 << 0;
15438 if (get_attr_z14_unit_fxa (insn))
15439 mask |= 1 << 1;
15440 if (get_attr_z14_unit_fxb (insn))
15441 mask |= 1 << 2;
15442 if (get_attr_z14_unit_vfu (insn))
15443 mask |= 1 << 3;
15444 break;
15445 case PROCESSOR_8561_Z15:
15446 *units = 4;
15447 if (get_attr_z15_unit_lsu (insn))
15448 mask |= 1 << 0;
15449 if (get_attr_z15_unit_fxa (insn))
15450 mask |= 1 << 1;
15451 if (get_attr_z15_unit_fxb (insn))
15452 mask |= 1 << 2;
15453 if (get_attr_z15_unit_vfu (insn))
15454 mask |= 1 << 3;
15455 break;
15456 case PROCESSOR_3931_Z16:
15457 *units = 4;
15458 if (get_attr_z16_unit_lsu (insn))
15459 mask |= 1 << 0;
15460 if (get_attr_z16_unit_fxa (insn))
15461 mask |= 1 << 1;
15462 if (get_attr_z16_unit_fxb (insn))
15463 mask |= 1 << 2;
15464 if (get_attr_z16_unit_vfu (insn))
15465 mask |= 1 << 3;
15466 break;
15467 default:
15468 gcc_unreachable ();
15470 return mask;
15473 static bool
15474 s390_is_fpd (rtx_insn *insn)
15476 if (insn == NULL_RTX)
15477 return false;
15479 return get_attr_z13_unit_fpd (insn) || get_attr_z14_unit_fpd (insn)
15480 || get_attr_z15_unit_fpd (insn) || get_attr_z16_unit_fpd (insn);
15483 static bool
15484 s390_is_fxd (rtx_insn *insn)
15486 if (insn == NULL_RTX)
15487 return false;
15489 return get_attr_z13_unit_fxd (insn) || get_attr_z14_unit_fxd (insn)
15490 || get_attr_z15_unit_fxd (insn) || get_attr_z16_unit_fxd (insn);
15493 /* Returns TRUE if INSN is a long-running instruction. */
15494 static bool
15495 s390_is_longrunning (rtx_insn *insn)
15497 if (insn == NULL_RTX)
15498 return false;
15500 return s390_is_fxd (insn) || s390_is_fpd (insn);
15504 /* Return the scheduling score for INSN. The higher the score the
15505 better. The score is calculated from the OOO scheduling attributes
15506 of INSN and the scheduling state sched_state. */
15507 static int
15508 s390_sched_score (rtx_insn *insn)
15510 unsigned int mask = s390_get_sched_attrmask (insn);
15511 int score = 0;
15513 switch (sched_state.group_state)
15515 case 0:
15516 /* Try to put insns into the first slot which would otherwise
15517 break a group. */
15518 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
15519 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
15520 score += 5;
15521 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
15522 score += 10;
15523 break;
15524 case 1:
15525 /* Prefer not cracked insns while trying to put together a
15526 group. */
15527 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
15528 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
15529 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
15530 score += 10;
15531 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
15532 score += 5;
15533 /* If we are in a group of two already, try to schedule another
15534 group-of-two insn to avoid shortening another group. */
15535 if (sched_state.group_of_two
15536 && (mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
15537 score += 15;
15538 break;
15539 case 2:
15540 /* Prefer not cracked insns while trying to put together a
15541 group. */
15542 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
15543 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
15544 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
15545 score += 10;
15546 /* Prefer endgroup insns in the last slot. */
15547 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
15548 score += 10;
15549 /* Try to avoid group-of-two insns in the last slot as they will
15550 shorten this group as well as the next one. */
15551 if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
15552 score = MAX (0, score - 15);
15553 break;
15556 if (s390_tune >= PROCESSOR_2964_Z13)
15558 int units, i;
15559 unsigned unit_mask, m = 1;
15561 unit_mask = s390_get_unit_mask (insn, &units);
15562 gcc_assert (units <= MAX_SCHED_UNITS);
15564 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
15565 ago the last insn of this unit type got scheduled. This is
15566 supposed to help providing a proper instruction mix to the
15567 CPU. */
15568 for (i = 0; i < units; i++, m <<= 1)
15569 if (m & unit_mask)
15570 score += (last_scheduled_unit_distance[i][sched_state.side]
15571 * MAX_SCHED_MIX_SCORE / MAX_SCHED_MIX_DISTANCE);
15573 int other_side = 1 - sched_state.side;
15575 /* Try to delay long-running insns when side is busy. */
15576 if (s390_is_longrunning (insn))
15578 if (s390_is_fxd (insn))
15580 if (fxd_longrunning[sched_state.side]
15581 && fxd_longrunning[other_side]
15582 <= fxd_longrunning[sched_state.side])
15583 score = MAX (0, score - 10);
15585 else if (fxd_longrunning[other_side]
15586 >= fxd_longrunning[sched_state.side])
15587 score += 10;
15590 if (s390_is_fpd (insn))
15592 if (fpd_longrunning[sched_state.side]
15593 && fpd_longrunning[other_side]
15594 <= fpd_longrunning[sched_state.side])
15595 score = MAX (0, score - 10);
15597 else if (fpd_longrunning[other_side]
15598 >= fpd_longrunning[sched_state.side])
15599 score += 10;
15604 return score;
15607 /* This function is called via hook TARGET_SCHED_REORDER before
15608 issuing one insn from list READY which contains *NREADYP entries.
15609 For target z10 it reorders load instructions to avoid early load
15610 conflicts in the floating point pipeline */
15611 static int
15612 s390_sched_reorder (FILE *file, int verbose,
15613 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
15615 if (s390_tune == PROCESSOR_2097_Z10
15616 && reload_completed
15617 && *nreadyp > 1)
15618 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
15620 if (s390_tune >= PROCESSOR_2827_ZEC12
15621 && reload_completed
15622 && *nreadyp > 1)
15624 int i;
15625 int last_index = *nreadyp - 1;
15626 int max_index = -1;
15627 int max_score = -1;
15628 rtx_insn *tmp;
15630 /* Just move the insn with the highest score to the top (the
15631 end) of the list. A full sort is not needed since a conflict
15632 in the hazard recognition cannot happen. So the top insn in
15633 the ready list will always be taken. */
15634 for (i = last_index; i >= 0; i--)
15636 int score;
15638 if (recog_memoized (ready[i]) < 0)
15639 continue;
15641 score = s390_sched_score (ready[i]);
15642 if (score > max_score)
15644 max_score = score;
15645 max_index = i;
15649 if (max_index != -1)
15651 if (max_index != last_index)
15653 tmp = ready[max_index];
15654 ready[max_index] = ready[last_index];
15655 ready[last_index] = tmp;
15657 if (verbose > 5)
15658 fprintf (file,
15659 ";;\t\tBACKEND: move insn %d to the top of list\n",
15660 INSN_UID (ready[last_index]));
15662 else if (verbose > 5)
15663 fprintf (file,
15664 ";;\t\tBACKEND: best insn %d already on top\n",
15665 INSN_UID (ready[last_index]));
15668 if (verbose > 5)
15670 fprintf (file, "ready list ooo attributes - sched state: %d\n",
15671 sched_state.group_state);
15673 for (i = last_index; i >= 0; i--)
15675 unsigned int sched_mask;
15676 rtx_insn *insn = ready[i];
15678 if (recog_memoized (insn) < 0)
15679 continue;
15681 sched_mask = s390_get_sched_attrmask (insn);
15682 fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
15683 INSN_UID (insn),
15684 s390_sched_score (insn));
15685 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
15686 ((M) & sched_mask) ? #ATTR : "");
15687 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
15688 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
15689 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
15690 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
15691 #undef PRINT_SCHED_ATTR
15692 if (s390_tune >= PROCESSOR_2964_Z13)
15694 unsigned int unit_mask, m = 1;
15695 int units, j;
15697 unit_mask = s390_get_unit_mask (insn, &units);
15698 fprintf (file, "(units:");
15699 for (j = 0; j < units; j++, m <<= 1)
15700 if (m & unit_mask)
15701 fprintf (file, " u%d", j);
15702 fprintf (file, ")");
15704 fprintf (file, "\n");
15709 return s390_issue_rate ();
15713 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
15714 the scheduler has issued INSN. It stores the last issued insn into
15715 last_scheduled_insn in order to make it available for
15716 s390_sched_reorder. */
15717 static int
15718 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
15720 last_scheduled_insn = insn;
15722 bool ends_group = false;
15724 if (s390_tune >= PROCESSOR_2827_ZEC12
15725 && reload_completed
15726 && recog_memoized (insn) >= 0)
15728 unsigned int mask = s390_get_sched_attrmask (insn);
15730 if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
15731 sched_state.group_of_two = true;
15733 /* If this is a group-of-two insn, we actually ended the last group
15734 and this insn is the first one of the new group. */
15735 if (sched_state.group_state == 2 && sched_state.group_of_two)
15737 sched_state.side = sched_state.side ? 0 : 1;
15738 sched_state.group_state = 0;
15741 /* Longrunning and side bookkeeping. */
15742 for (int i = 0; i < 2; i++)
15744 fxd_longrunning[i] = MAX (0, fxd_longrunning[i] - 1);
15745 fpd_longrunning[i] = MAX (0, fpd_longrunning[i] - 1);
15748 unsigned latency = insn_default_latency (insn);
15749 if (s390_is_longrunning (insn))
15751 if (s390_is_fxd (insn))
15752 fxd_longrunning[sched_state.side] = latency;
15753 else
15754 fpd_longrunning[sched_state.side] = latency;
15757 if (s390_tune >= PROCESSOR_2964_Z13)
15759 int units, i;
15760 unsigned unit_mask, m = 1;
15762 unit_mask = s390_get_unit_mask (insn, &units);
15763 gcc_assert (units <= MAX_SCHED_UNITS);
15765 for (i = 0; i < units; i++, m <<= 1)
15766 if (m & unit_mask)
15767 last_scheduled_unit_distance[i][sched_state.side] = 0;
15768 else if (last_scheduled_unit_distance[i][sched_state.side]
15769 < MAX_SCHED_MIX_DISTANCE)
15770 last_scheduled_unit_distance[i][sched_state.side]++;
15773 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
15774 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0
15775 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0
15776 || (mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
15778 sched_state.group_state = 0;
15779 ends_group = true;
15781 else
15783 switch (sched_state.group_state)
15785 case 0:
15786 sched_state.group_state++;
15787 break;
15788 case 1:
15789 sched_state.group_state++;
15790 if (sched_state.group_of_two)
15792 sched_state.group_state = 0;
15793 ends_group = true;
15795 break;
15796 case 2:
15797 sched_state.group_state++;
15798 ends_group = true;
15799 break;
15803 if (verbose > 5)
15805 unsigned int sched_mask;
15807 sched_mask = s390_get_sched_attrmask (insn);
15809 fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
15810 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
15811 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
15812 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
15813 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
15814 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
15815 #undef PRINT_SCHED_ATTR
15817 if (s390_tune >= PROCESSOR_2964_Z13)
15819 unsigned int unit_mask, m = 1;
15820 int units, j;
15822 unit_mask = s390_get_unit_mask (insn, &units);
15823 fprintf (file, "(units:");
15824 for (j = 0; j < units; j++, m <<= 1)
15825 if (m & unit_mask)
15826 fprintf (file, " %d", j);
15827 fprintf (file, ")");
15829 fprintf (file, " sched state: %d\n", sched_state.group_state);
15831 if (s390_tune >= PROCESSOR_2964_Z13)
15833 int units, j;
15835 s390_get_unit_mask (insn, &units);
15837 fprintf (file, ";;\t\tBACKEND: units on this side (%d) unused for: ", sched_state.side);
15838 for (j = 0; j < units; j++)
15839 fprintf (file, "%d:%d ", j,
15840 last_scheduled_unit_distance[j][sched_state.side]);
15841 fprintf (file, "\n");
15845 /* If this insn ended a group, the next will be on the other side. */
15846 if (ends_group)
15848 sched_state.group_state = 0;
15849 sched_state.side = sched_state.side ? 0 : 1;
15850 sched_state.group_of_two = false;
15854 if (GET_CODE (PATTERN (insn)) != USE
15855 && GET_CODE (PATTERN (insn)) != CLOBBER)
15856 return more - 1;
15857 else
15858 return more;
15861 static void
15862 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
15863 int verbose ATTRIBUTE_UNUSED,
15864 int max_ready ATTRIBUTE_UNUSED)
15866 /* If the next basic block is most likely entered via a fallthru edge
15867 we keep the last sched state. Otherwise we start a new group.
15868 The scheduler traverses basic blocks in "instruction stream" ordering
15869 so if we see a fallthru edge here, sched_state will be of its
15870 source block.
15872 current_sched_info->prev_head is the insn before the first insn of the
15873 block of insns to be scheduled.
15875 last_scheduled_insn = NULL;
15876 memset (last_scheduled_unit_distance, 0,
15877 MAX_SCHED_UNITS * NUM_SIDES * sizeof (int));
15878 memset (fpd_longrunning, 0, NUM_SIDES * sizeof (int));
15879 memset (fxd_longrunning, 0, NUM_SIDES * sizeof (int));
15880 sched_state = {};
15883 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
15884 a new number struct loop *loop should be unrolled if tuned for cpus with
15885 a built-in stride prefetcher.
15886 The loop is analyzed for memory accesses by calling check_dpu for
15887 each rtx of the loop. Depending on the loop_depth and the amount of
15888 memory accesses a new number <=nunroll is returned to improve the
15889 behavior of the hardware prefetch unit. */
15890 static unsigned
15891 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
15893 basic_block *bbs;
15894 rtx_insn *insn;
15895 unsigned i;
15896 unsigned mem_count = 0;
15898 if (s390_tune < PROCESSOR_2097_Z10)
15899 return nunroll;
15901 if (unroll_only_small_loops)
15903 /* Only unroll loops smaller than or equal to 12 insns. */
15904 const unsigned int small_threshold = 12;
15906 if (loop->ninsns > small_threshold)
15907 return 0;
15909 /* ???: Make this dependent on the type of registers in
15910 the loop. Increase the limit for vector registers. */
15911 const unsigned int max_insns = optimize >= 3 ? 36 : 24;
15913 nunroll = MIN (nunroll, max_insns / loop->ninsns);
15916 /* Count the number of memory references within the loop body. */
15917 bbs = get_loop_body (loop);
15918 subrtx_iterator::array_type array;
15919 for (i = 0; i < loop->num_nodes; i++)
15920 FOR_BB_INSNS (bbs[i], insn)
15921 if (INSN_P (insn) && INSN_CODE (insn) != -1)
15923 rtx set;
15925 /* The runtime of small loops with memory block operations
15926 will be determined by the memory operation. Doing
15927 unrolling doesn't help here. Measurements to confirm
15928 this where only done on recent CPU levels. So better do
15929 not change anything for older CPUs. */
15930 if (s390_tune >= PROCESSOR_2964_Z13
15931 && loop->ninsns <= BLOCK_MEM_OPS_LOOP_INSNS
15932 && ((set = single_set (insn)) != NULL_RTX)
15933 && ((GET_MODE (SET_DEST (set)) == BLKmode
15934 && (GET_MODE (SET_SRC (set)) == BLKmode
15935 || SET_SRC (set) == const0_rtx))
15936 || (GET_CODE (SET_SRC (set)) == COMPARE
15937 && GET_MODE (XEXP (SET_SRC (set), 0)) == BLKmode
15938 && GET_MODE (XEXP (SET_SRC (set), 1)) == BLKmode)))
15940 free (bbs);
15941 return 1;
15944 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
15945 if (MEM_P (*iter))
15946 mem_count += 1;
15948 free (bbs);
15950 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
15951 if (mem_count == 0)
15952 return nunroll;
15954 switch (loop_depth(loop))
15956 case 1:
15957 return MIN (nunroll, 28 / mem_count);
15958 case 2:
15959 return MIN (nunroll, 22 / mem_count);
15960 default:
15961 return MIN (nunroll, 16 / mem_count);
15965 /* Restore the current options. This is a hook function and also called
15966 internally. */
15968 static void
15969 s390_function_specific_restore (struct gcc_options *opts,
15970 struct gcc_options */* opts_set */,
15971 struct cl_target_option *ptr ATTRIBUTE_UNUSED)
15973 opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
15976 static void
15977 s390_default_align (struct gcc_options *opts)
15979 /* Set the default function alignment to 16 in order to get rid of
15980 some unwanted performance effects. */
15981 if (opts->x_flag_align_functions && !opts->x_str_align_functions
15982 && opts->x_s390_tune >= PROCESSOR_2964_Z13)
15983 opts->x_str_align_functions = "16";
15986 static void
15987 s390_override_options_after_change (void)
15989 s390_default_align (&global_options);
15991 /* Explicit -funroll-loops turns -munroll-only-small-loops off. */
15992 if ((OPTION_SET_P (flag_unroll_loops) && flag_unroll_loops)
15993 || (OPTION_SET_P (flag_unroll_all_loops)
15994 && flag_unroll_all_loops))
15996 if (!OPTION_SET_P (unroll_only_small_loops))
15997 unroll_only_small_loops = 0;
15998 if (!OPTION_SET_P (flag_cunroll_grow_size))
15999 flag_cunroll_grow_size = 1;
16001 else if (!OPTION_SET_P (flag_cunroll_grow_size))
16002 flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
16005 static void
16006 s390_option_override_internal (struct gcc_options *opts,
16007 struct gcc_options *opts_set)
16009 /* Architecture mode defaults according to ABI. */
16010 if (!(opts_set->x_target_flags & MASK_ZARCH))
16012 if (TARGET_64BIT)
16013 opts->x_target_flags |= MASK_ZARCH;
16014 else
16015 opts->x_target_flags &= ~MASK_ZARCH;
16018 /* Set the march default in case it hasn't been specified on cmdline. */
16019 if (!opts_set->x_s390_arch)
16020 opts->x_s390_arch = PROCESSOR_2064_Z900;
16022 opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
16024 /* Determine processor to tune for. */
16025 if (!opts_set->x_s390_tune)
16026 opts->x_s390_tune = opts->x_s390_arch;
16028 opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
16030 /* Sanity checks. */
16031 if (opts->x_s390_arch == PROCESSOR_NATIVE
16032 || opts->x_s390_tune == PROCESSOR_NATIVE)
16033 gcc_unreachable ();
16034 if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
16035 error ("64-bit ABI not supported in ESA/390 mode");
16037 if (opts->x_s390_indirect_branch == indirect_branch_thunk_inline
16038 || opts->x_s390_indirect_branch_call == indirect_branch_thunk_inline
16039 || opts->x_s390_function_return == indirect_branch_thunk_inline
16040 || opts->x_s390_function_return_reg == indirect_branch_thunk_inline
16041 || opts->x_s390_function_return_mem == indirect_branch_thunk_inline)
16042 error ("thunk-inline is only supported with %<-mindirect-branch-jump%>");
16044 if (opts->x_s390_indirect_branch != indirect_branch_keep)
16046 if (!opts_set->x_s390_indirect_branch_call)
16047 opts->x_s390_indirect_branch_call = opts->x_s390_indirect_branch;
16049 if (!opts_set->x_s390_indirect_branch_jump)
16050 opts->x_s390_indirect_branch_jump = opts->x_s390_indirect_branch;
16053 if (opts->x_s390_function_return != indirect_branch_keep)
16055 if (!opts_set->x_s390_function_return_reg)
16056 opts->x_s390_function_return_reg = opts->x_s390_function_return;
16058 if (!opts_set->x_s390_function_return_mem)
16059 opts->x_s390_function_return_mem = opts->x_s390_function_return;
16062 /* Enable hardware transactions if available and not explicitly
16063 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
16064 if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
16066 if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
16067 opts->x_target_flags |= MASK_OPT_HTM;
16068 else
16069 opts->x_target_flags &= ~MASK_OPT_HTM;
16072 if (TARGET_OPT_VX_P (opts_set->x_target_flags))
16074 if (TARGET_OPT_VX_P (opts->x_target_flags))
16076 if (!TARGET_CPU_VX_P (opts))
16077 error ("hardware vector support not available on %s",
16078 processor_table[(int)opts->x_s390_arch].name);
16079 if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
16080 error ("hardware vector support not available with "
16081 "%<-msoft-float%>");
16084 else
16086 if (TARGET_CPU_VX_P (opts))
16087 /* Enable vector support if available and not explicitly disabled
16088 by user. E.g. with -m31 -march=z13 -mzarch */
16089 opts->x_target_flags |= MASK_OPT_VX;
16090 else
16091 opts->x_target_flags &= ~MASK_OPT_VX;
16094 /* Use hardware DFP if available and not explicitly disabled by
16095 user. E.g. with -m31 -march=z10 -mzarch */
16096 if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
16098 if (TARGET_DFP_P (opts))
16099 opts->x_target_flags |= MASK_HARD_DFP;
16100 else
16101 opts->x_target_flags &= ~MASK_HARD_DFP;
16104 if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
16106 if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
16108 if (!TARGET_CPU_DFP_P (opts))
16109 error ("hardware decimal floating-point instructions"
16110 " not available on %s",
16111 processor_table[(int)opts->x_s390_arch].name);
16112 if (!TARGET_ZARCH_P (opts->x_target_flags))
16113 error ("hardware decimal floating-point instructions"
16114 " not available in ESA/390 mode");
16116 else
16117 opts->x_target_flags &= ~MASK_HARD_DFP;
16120 if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
16121 && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
16123 if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
16124 && TARGET_HARD_DFP_P (opts->x_target_flags))
16125 error ("%<-mhard-dfp%> cannot be used in conjunction with "
16126 "%<-msoft-float%>");
16128 opts->x_target_flags &= ~MASK_HARD_DFP;
16131 if (TARGET_BACKCHAIN_P (opts->x_target_flags)
16132 && TARGET_PACKED_STACK_P (opts->x_target_flags)
16133 && TARGET_HARD_FLOAT_P (opts->x_target_flags))
16134 error ("%<-mbackchain%> %<-mpacked-stack%> %<-mhard-float%> are not "
16135 "supported in combination");
16137 if (opts->x_s390_stack_size)
16139 if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
16140 error ("stack size must be greater than the stack guard value");
16141 else if (opts->x_s390_stack_size > 1 << 16)
16142 error ("stack size must not be greater than 64k");
16144 else if (opts->x_s390_stack_guard)
16145 error ("%<-mstack-guard%> implies use of %<-mstack-size%>");
16147 /* Our implementation of the stack probe requires the probe interval
16148 to be used as displacement in an address operand. The maximum
16149 probe interval currently is 64k. This would exceed short
16150 displacements. Trim that value down to 4k if that happens. This
16151 might result in too many probes being generated only on the
16152 oldest supported machine level z900. */
16153 if (!DISP_IN_RANGE ((1 << param_stack_clash_protection_probe_interval)))
16154 param_stack_clash_protection_probe_interval = 12;
16156 #if TARGET_TPF != 0
16157 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_prologue_check))
16158 error ("%<-mtpf-trace-hook-prologue-check%> requires integer in range 0-4095");
16160 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_prologue_target))
16161 error ("%<-mtpf-trace-hook-prologue-target%> requires integer in range 0-4095");
16163 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_epilogue_check))
16164 error ("%<-mtpf-trace-hook-epilogue-check%> requires integer in range 0-4095");
16166 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_epilogue_target))
16167 error ("%<-mtpf-trace-hook-epilogue-target%> requires integer in range 0-4095");
16169 if (s390_tpf_trace_skip)
16171 opts->x_s390_tpf_trace_hook_prologue_target = TPF_TRACE_PROLOGUE_SKIP_TARGET;
16172 opts->x_s390_tpf_trace_hook_epilogue_target = TPF_TRACE_EPILOGUE_SKIP_TARGET;
16174 #endif
16176 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
16177 if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
16178 opts->x_target_flags |= MASK_LONG_DOUBLE_128;
16179 #endif
16181 if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
16183 SET_OPTION_IF_UNSET (opts, opts_set, param_max_unrolled_insns,
16184 100);
16185 SET_OPTION_IF_UNSET (opts, opts_set, param_max_unroll_times, 32);
16186 SET_OPTION_IF_UNSET (opts, opts_set, param_max_completely_peeled_insns,
16187 2000);
16188 SET_OPTION_IF_UNSET (opts, opts_set, param_max_completely_peel_times,
16189 64);
16192 SET_OPTION_IF_UNSET (opts, opts_set, param_max_pending_list_length,
16193 256);
16194 /* values for loop prefetching */
16195 SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_line_size, 256);
16196 SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_size, 128);
16197 /* s390 has more than 2 levels and the size is much larger. Since
16198 we are always running virtualized assume that we only get a small
16199 part of the caches above l1. */
16200 SET_OPTION_IF_UNSET (opts, opts_set, param_l2_cache_size, 1500);
16201 SET_OPTION_IF_UNSET (opts, opts_set,
16202 param_prefetch_min_insn_to_mem_ratio, 2);
16203 SET_OPTION_IF_UNSET (opts, opts_set, param_simultaneous_prefetches, 6);
16205 /* Use the alternative scheduling-pressure algorithm by default. */
16206 SET_OPTION_IF_UNSET (opts, opts_set, param_sched_pressure_algorithm, 2);
16208 /* Allow simple vector masking using vll/vstl for epilogues. */
16209 if (TARGET_Z13)
16210 SET_OPTION_IF_UNSET (opts, opts_set, param_vect_partial_vector_usage, 1);
16211 else
16212 SET_OPTION_IF_UNSET (opts, opts_set, param_vect_partial_vector_usage, 0);
16214 /* Do not vectorize loops with a low trip count for now. */
16215 SET_OPTION_IF_UNSET (opts, opts_set, param_min_vect_loop_bound, 2);
16217 /* Set the default alignment. */
16218 s390_default_align (opts);
16220 /* Set unroll options. */
16221 s390_override_options_after_change ();
16223 /* Call target specific restore function to do post-init work. At the moment,
16224 this just sets opts->x_s390_cost_pointer. */
16225 s390_function_specific_restore (opts, opts_set, NULL);
16227 /* Check whether -mfentry is supported. It cannot be used in 31-bit mode,
16228 because 31-bit PLT stubs assume that %r12 contains GOT address, which is
16229 not the case when the code runs before the prolog. */
16230 if (opts->x_flag_fentry && !TARGET_64BIT)
16231 error ("%<-mfentry%> is supported only for 64-bit CPUs");
16234 static void
16235 s390_option_override (void)
16237 unsigned int i;
16238 cl_deferred_option *opt;
16239 vec<cl_deferred_option> *v =
16240 (vec<cl_deferred_option> *) s390_deferred_options;
16242 if (v)
16243 FOR_EACH_VEC_ELT (*v, i, opt)
16245 switch (opt->opt_index)
16247 case OPT_mhotpatch_:
16249 int val1;
16250 int val2;
16251 char *s = strtok (ASTRDUP (opt->arg), ",");
16252 char *t = strtok (NULL, "\0");
16254 if (t != NULL)
16256 val1 = integral_argument (s);
16257 val2 = integral_argument (t);
16259 else
16261 val1 = -1;
16262 val2 = -1;
16264 if (val1 == -1 || val2 == -1)
16266 /* argument is not a plain number */
16267 error ("arguments to %qs should be non-negative integers",
16268 "-mhotpatch=n,m");
16269 break;
16271 else if (val1 > s390_hotpatch_hw_max
16272 || val2 > s390_hotpatch_hw_max)
16274 error ("argument to %qs is too large (max. %d)",
16275 "-mhotpatch=n,m", s390_hotpatch_hw_max);
16276 break;
16278 s390_hotpatch_hw_before_label = val1;
16279 s390_hotpatch_hw_after_label = val2;
16280 break;
16282 default:
16283 gcc_unreachable ();
16287 /* Set up function hooks. */
16288 init_machine_status = s390_init_machine_status;
16290 s390_option_override_internal (&global_options, &global_options_set);
16292 /* Save the initial options in case the user does function specific
16293 options. */
16294 target_option_default_node
16295 = build_target_option_node (&global_options, &global_options_set);
16296 target_option_current_node = target_option_default_node;
16298 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
16299 requires the arch flags to be evaluated already. Since prefetching
16300 is beneficial on s390, we enable it if available. */
16301 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
16302 flag_prefetch_loop_arrays = 1;
16304 if (!s390_pic_data_is_text_relative && !flag_pic)
16305 error ("%<-mno-pic-data-is-text-relative%> cannot be used without "
16306 "%<-fpic%>/%<-fPIC%>");
16308 if (TARGET_TPF)
16310 /* Don't emit DWARF3/4 unless specifically selected. The TPF
16311 debuggers do not yet support DWARF 3/4. */
16312 if (!OPTION_SET_P (dwarf_strict))
16313 dwarf_strict = 1;
16314 if (!OPTION_SET_P (dwarf_version))
16315 dwarf_version = 2;
16319 #if S390_USE_TARGET_ATTRIBUTE
16320 /* Inner function to process the attribute((target(...))), take an argument and
16321 set the current options from the argument. If we have a list, recursively go
16322 over the list. */
16324 static bool
16325 s390_valid_target_attribute_inner_p (tree args,
16326 struct gcc_options *opts,
16327 struct gcc_options *new_opts_set,
16328 bool force_pragma)
16330 char *next_optstr;
16331 bool ret = true;
16333 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
16334 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
16335 static const struct
16337 const char *string;
16338 size_t len;
16339 int opt;
16340 int has_arg;
16341 int only_as_pragma;
16342 } attrs[] = {
16343 /* enum options */
16344 S390_ATTRIB ("arch=", OPT_march_, 1),
16345 S390_ATTRIB ("tune=", OPT_mtune_, 1),
16346 /* uinteger options */
16347 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
16348 S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
16349 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
16350 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
16351 /* flag options */
16352 S390_ATTRIB ("backchain", OPT_mbackchain, 0),
16353 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
16354 S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
16355 S390_ATTRIB ("htm", OPT_mhtm, 0),
16356 S390_ATTRIB ("vx", OPT_mvx, 0),
16357 S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
16358 S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
16359 S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
16360 S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
16361 S390_PRAGMA ("zvector", OPT_mzvector, 0),
16362 /* boolean options */
16363 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
16365 #undef S390_ATTRIB
16366 #undef S390_PRAGMA
16368 /* If this is a list, recurse to get the options. */
16369 if (TREE_CODE (args) == TREE_LIST)
16371 bool ret = true;
16372 int num_pragma_values;
16373 int i;
16375 /* Note: attribs.cc:decl_attributes prepends the values from
16376 current_target_pragma to the list of target attributes. To determine
16377 whether we're looking at a value of the attribute or the pragma we
16378 assume that the first [list_length (current_target_pragma)] values in
16379 the list are the values from the pragma. */
16380 num_pragma_values = (!force_pragma && current_target_pragma != NULL)
16381 ? list_length (current_target_pragma) : 0;
16382 for (i = 0; args; args = TREE_CHAIN (args), i++)
16384 bool is_pragma;
16386 is_pragma = (force_pragma || i < num_pragma_values);
16387 if (TREE_VALUE (args)
16388 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
16389 opts, new_opts_set,
16390 is_pragma))
16392 ret = false;
16395 return ret;
16398 else if (TREE_CODE (args) != STRING_CST)
16400 error ("attribute %<target%> argument not a string");
16401 return false;
16404 /* Handle multiple arguments separated by commas. */
16405 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
16407 while (next_optstr && *next_optstr != '\0')
16409 char *p = next_optstr;
16410 char *orig_p = p;
16411 char *comma = strchr (next_optstr, ',');
16412 size_t len, opt_len;
16413 int opt;
16414 bool opt_set_p;
16415 char ch;
16416 unsigned i;
16417 int mask = 0;
16418 enum cl_var_type var_type;
16419 bool found;
16421 if (comma)
16423 *comma = '\0';
16424 len = comma - next_optstr;
16425 next_optstr = comma + 1;
16427 else
16429 len = strlen (p);
16430 next_optstr = NULL;
16433 /* Recognize no-xxx. */
16434 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
16436 opt_set_p = false;
16437 p += 3;
16438 len -= 3;
16440 else
16441 opt_set_p = true;
16443 /* Find the option. */
16444 ch = *p;
16445 found = false;
16446 for (i = 0; i < ARRAY_SIZE (attrs); i++)
16448 opt_len = attrs[i].len;
16449 if (ch == attrs[i].string[0]
16450 && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
16451 && memcmp (p, attrs[i].string, opt_len) == 0)
16453 opt = attrs[i].opt;
16454 if (!opt_set_p && cl_options[opt].cl_reject_negative)
16455 continue;
16456 mask = cl_options[opt].var_value;
16457 var_type = cl_options[opt].var_type;
16458 found = true;
16459 break;
16463 /* Process the option. */
16464 if (!found)
16466 error ("attribute %<target%> argument %qs is unknown", orig_p);
16467 return false;
16469 else if (attrs[i].only_as_pragma && !force_pragma)
16471 /* Value is not allowed for the target attribute. */
16472 error ("value %qs is not supported by attribute %<target%>",
16473 attrs[i].string);
16474 return false;
16477 else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
16479 if (var_type == CLVC_BIT_CLEAR)
16480 opt_set_p = !opt_set_p;
16482 if (opt_set_p)
16483 opts->x_target_flags |= mask;
16484 else
16485 opts->x_target_flags &= ~mask;
16486 new_opts_set->x_target_flags |= mask;
16489 else if (cl_options[opt].var_type == CLVC_INTEGER)
16491 int value;
16493 if (cl_options[opt].cl_uinteger)
16495 /* Unsigned integer argument. Code based on the function
16496 decode_cmdline_option () in opts-common.cc. */
16497 value = integral_argument (p + opt_len);
16499 else
16500 value = (opt_set_p) ? 1 : 0;
16502 if (value != -1)
16504 struct cl_decoded_option decoded;
16506 /* Value range check; only implemented for numeric and boolean
16507 options at the moment. */
16508 generate_option (opt, NULL, value, CL_TARGET, &decoded);
16509 s390_handle_option (opts, new_opts_set, &decoded, input_location);
16510 set_option (opts, new_opts_set, opt, value,
16511 p + opt_len, DK_UNSPECIFIED, input_location,
16512 global_dc);
16514 else
16516 error ("attribute %<target%> argument %qs is unknown", orig_p);
16517 ret = false;
16521 else if (cl_options[opt].var_type == CLVC_ENUM)
16523 bool arg_ok;
16524 int value;
16526 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
16527 if (arg_ok)
16528 set_option (opts, new_opts_set, opt, value,
16529 p + opt_len, DK_UNSPECIFIED, input_location,
16530 global_dc);
16531 else
16533 error ("attribute %<target%> argument %qs is unknown", orig_p);
16534 ret = false;
16538 else
16539 gcc_unreachable ();
16541 return ret;
16544 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
16546 tree
16547 s390_valid_target_attribute_tree (tree args,
16548 struct gcc_options *opts,
16549 const struct gcc_options *opts_set,
16550 bool force_pragma)
16552 tree t = NULL_TREE;
16553 struct gcc_options new_opts_set;
16555 memset (&new_opts_set, 0, sizeof (new_opts_set));
16557 /* Process each of the options on the chain. */
16558 if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
16559 force_pragma))
16560 return error_mark_node;
16562 /* If some option was set (even if it has not changed), rerun
16563 s390_option_override_internal, and then save the options away. */
16564 if (new_opts_set.x_target_flags
16565 || new_opts_set.x_s390_arch
16566 || new_opts_set.x_s390_tune
16567 || new_opts_set.x_s390_stack_guard
16568 || new_opts_set.x_s390_stack_size
16569 || new_opts_set.x_s390_branch_cost
16570 || new_opts_set.x_s390_warn_framesize
16571 || new_opts_set.x_s390_warn_dynamicstack_p)
16573 const unsigned char *src = (const unsigned char *)opts_set;
16574 unsigned char *dest = (unsigned char *)&new_opts_set;
16575 unsigned int i;
16577 /* Merge the original option flags into the new ones. */
16578 for (i = 0; i < sizeof(*opts_set); i++)
16579 dest[i] |= src[i];
16581 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
16582 s390_option_override_internal (opts, &new_opts_set);
16583 /* Save the current options unless we are validating options for
16584 #pragma. */
16585 t = build_target_option_node (opts, &new_opts_set);
16587 return t;
16590 /* Hook to validate attribute((target("string"))). */
16592 static bool
16593 s390_valid_target_attribute_p (tree fndecl,
16594 tree ARG_UNUSED (name),
16595 tree args,
16596 int ARG_UNUSED (flags))
16598 struct gcc_options func_options, func_options_set;
16599 tree new_target, new_optimize;
16600 bool ret = true;
16602 /* attribute((target("default"))) does nothing, beyond
16603 affecting multi-versioning. */
16604 if (TREE_VALUE (args)
16605 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
16606 && TREE_CHAIN (args) == NULL_TREE
16607 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
16608 return true;
16610 tree old_optimize
16611 = build_optimization_node (&global_options, &global_options_set);
16613 /* Get the optimization options of the current function. */
16614 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
16616 if (!func_optimize)
16617 func_optimize = old_optimize;
16619 /* Init func_options. */
16620 memset (&func_options, 0, sizeof (func_options));
16621 init_options_struct (&func_options, NULL);
16622 lang_hooks.init_options_struct (&func_options);
16623 memset (&func_options_set, 0, sizeof (func_options_set));
16625 cl_optimization_restore (&func_options, &func_options_set,
16626 TREE_OPTIMIZATION (func_optimize));
16628 /* Initialize func_options to the default before its target options can
16629 be set. */
16630 cl_target_option_restore (&func_options, &func_options_set,
16631 TREE_TARGET_OPTION (target_option_default_node));
16633 new_target = s390_valid_target_attribute_tree (args, &func_options,
16634 &global_options_set,
16635 (args ==
16636 current_target_pragma));
16637 new_optimize = build_optimization_node (&func_options, &func_options_set);
16638 if (new_target == error_mark_node)
16639 ret = false;
16640 else if (fndecl && new_target)
16642 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
16643 if (old_optimize != new_optimize)
16644 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
16646 return ret;
16649 /* Hook to determine if one function can safely inline another. */
16651 static bool
16652 s390_can_inline_p (tree caller, tree callee)
16654 /* Flags which if present in the callee are required in the caller as well. */
16655 const unsigned HOST_WIDE_INT caller_required_masks = MASK_OPT_HTM;
16657 /* Flags which affect the ABI and in general prevent inlining. */
16658 unsigned HOST_WIDE_INT must_match_masks
16659 = (MASK_64BIT | MASK_ZARCH | MASK_HARD_DFP | MASK_SOFT_FLOAT
16660 | MASK_LONG_DOUBLE_128 | MASK_OPT_VX);
16662 /* Flags which we in general want to prevent inlining but accept for
16663 always_inline. */
16664 const unsigned HOST_WIDE_INT always_inline_safe_masks
16665 = MASK_MVCLE | MASK_BACKCHAIN | MASK_SMALL_EXEC;
16667 const HOST_WIDE_INT all_masks
16668 = (caller_required_masks | must_match_masks | always_inline_safe_masks
16669 | MASK_DEBUG_ARG | MASK_PACKED_STACK | MASK_ZVECTOR);
16671 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
16672 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
16674 if (!callee_tree)
16675 callee_tree = target_option_default_node;
16676 if (!caller_tree)
16677 caller_tree = target_option_default_node;
16678 if (callee_tree == caller_tree)
16679 return true;
16681 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
16682 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
16684 /* If one of these triggers make sure to add proper handling of your
16685 new flag to this hook. */
16686 gcc_assert (!(caller_opts->x_target_flags & ~all_masks));
16687 gcc_assert (!(callee_opts->x_target_flags & ~all_masks));
16689 bool always_inline
16690 = (DECL_DISREGARD_INLINE_LIMITS (callee)
16691 && lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)));
16693 if (!always_inline)
16694 must_match_masks |= always_inline_safe_masks;
16696 /* Inlining a hard float function into a soft float function is only
16697 allowed if the hard float function doesn't actually make use of
16698 floating point.
16700 We are called from FEs for multi-versioning call optimization, so
16701 beware of ipa_fn_summaries not available. */
16702 if (always_inline && ipa_fn_summaries
16703 && !ipa_fn_summaries->get(cgraph_node::get (callee))->fp_expressions)
16704 must_match_masks &= ~(MASK_HARD_DFP | MASK_SOFT_FLOAT);
16706 if ((caller_opts->x_target_flags & must_match_masks)
16707 != (callee_opts->x_target_flags & must_match_masks))
16708 return false;
16710 if (~(caller_opts->x_target_flags & caller_required_masks)
16711 & (callee_opts->x_target_flags & caller_required_masks))
16712 return false;
16714 /* Don't inline functions to be compiled for a more recent arch into a
16715 function for an older arch. */
16716 if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
16717 return false;
16719 if (!always_inline && caller_opts->x_s390_tune != callee_opts->x_s390_tune)
16720 return false;
16722 return true;
16724 #endif
16726 /* Set VAL to correct enum value according to the indirect-branch or
16727 function-return attribute in ATTR. */
16729 static inline void
16730 s390_indirect_branch_attrvalue (tree attr, enum indirect_branch *val)
16732 const char *str = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
16733 if (strcmp (str, "keep") == 0)
16734 *val = indirect_branch_keep;
16735 else if (strcmp (str, "thunk") == 0)
16736 *val = indirect_branch_thunk;
16737 else if (strcmp (str, "thunk-inline") == 0)
16738 *val = indirect_branch_thunk_inline;
16739 else if (strcmp (str, "thunk-extern") == 0)
16740 *val = indirect_branch_thunk_extern;
16743 /* Memorize the setting for -mindirect-branch* and -mfunction-return*
16744 from either the cmdline or the function attributes in
16745 cfun->machine. */
16747 static void
16748 s390_indirect_branch_settings (tree fndecl)
16750 tree attr;
16752 if (!fndecl)
16753 return;
16755 /* Initialize with the cmdline options and let the attributes
16756 override it. */
16757 cfun->machine->indirect_branch_jump = s390_indirect_branch_jump;
16758 cfun->machine->indirect_branch_call = s390_indirect_branch_call;
16760 cfun->machine->function_return_reg = s390_function_return_reg;
16761 cfun->machine->function_return_mem = s390_function_return_mem;
16763 if ((attr = lookup_attribute ("indirect_branch",
16764 DECL_ATTRIBUTES (fndecl))))
16766 s390_indirect_branch_attrvalue (attr,
16767 &cfun->machine->indirect_branch_jump);
16768 s390_indirect_branch_attrvalue (attr,
16769 &cfun->machine->indirect_branch_call);
16772 if ((attr = lookup_attribute ("indirect_branch_jump",
16773 DECL_ATTRIBUTES (fndecl))))
16774 s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_jump);
16776 if ((attr = lookup_attribute ("indirect_branch_call",
16777 DECL_ATTRIBUTES (fndecl))))
16778 s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_call);
16780 if ((attr = lookup_attribute ("function_return",
16781 DECL_ATTRIBUTES (fndecl))))
16783 s390_indirect_branch_attrvalue (attr,
16784 &cfun->machine->function_return_reg);
16785 s390_indirect_branch_attrvalue (attr,
16786 &cfun->machine->function_return_mem);
16789 if ((attr = lookup_attribute ("function_return_reg",
16790 DECL_ATTRIBUTES (fndecl))))
16791 s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_reg);
16793 if ((attr = lookup_attribute ("function_return_mem",
16794 DECL_ATTRIBUTES (fndecl))))
16795 s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_mem);
16798 #if S390_USE_TARGET_ATTRIBUTE
16799 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
16800 cache. */
16802 void
16803 s390_activate_target_options (tree new_tree)
16805 cl_target_option_restore (&global_options, &global_options_set,
16806 TREE_TARGET_OPTION (new_tree));
16807 if (TREE_TARGET_GLOBALS (new_tree))
16808 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
16809 else if (new_tree == target_option_default_node)
16810 restore_target_globals (&default_target_globals);
16811 else
16812 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
16813 s390_previous_fndecl = NULL_TREE;
16815 #endif
16817 /* Establish appropriate back-end context for processing the function
16818 FNDECL. The argument might be NULL to indicate processing at top
16819 level, outside of any function scope. */
16820 static void
16821 s390_set_current_function (tree fndecl)
16823 #if S390_USE_TARGET_ATTRIBUTE
16824 /* Only change the context if the function changes. This hook is called
16825 several times in the course of compiling a function, and we don't want to
16826 slow things down too much or call target_reinit when it isn't safe. */
16827 if (fndecl == s390_previous_fndecl)
16829 s390_indirect_branch_settings (fndecl);
16830 return;
16833 tree old_tree;
16834 if (s390_previous_fndecl == NULL_TREE)
16835 old_tree = target_option_current_node;
16836 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
16837 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
16838 else
16839 old_tree = target_option_default_node;
16841 if (fndecl == NULL_TREE)
16843 if (old_tree != target_option_current_node)
16844 s390_activate_target_options (target_option_current_node);
16845 return;
16848 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
16849 if (new_tree == NULL_TREE)
16850 new_tree = target_option_default_node;
16852 if (old_tree != new_tree)
16853 s390_activate_target_options (new_tree);
16854 s390_previous_fndecl = fndecl;
16855 #endif
16856 s390_indirect_branch_settings (fndecl);
16859 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
16861 static bool
16862 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
16863 unsigned int align ATTRIBUTE_UNUSED,
16864 enum by_pieces_operation op ATTRIBUTE_UNUSED,
16865 bool speed_p ATTRIBUTE_UNUSED)
16867 return (size == 1 || size == 2
16868 || size == 4 || (TARGET_ZARCH && size == 8));
16871 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
16873 static void
16874 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
16876 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
16877 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
16878 tree call_efpc = build_call_expr (efpc, 0);
16879 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
16881 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
16882 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
16883 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
16884 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
16885 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
16886 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
16888 /* Generates the equivalent of feholdexcept (&fenv_var)
16890 fenv_var = __builtin_s390_efpc ();
16891 __builtin_s390_sfpc (fenv_var & mask) */
16892 tree old_fpc = build4 (TARGET_EXPR, unsigned_type_node, fenv_var, call_efpc,
16893 NULL_TREE, NULL_TREE);
16894 tree new_fpc
16895 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
16896 build_int_cst (unsigned_type_node,
16897 ~(FPC_DXC_MASK | FPC_FLAGS_MASK
16898 | FPC_EXCEPTION_MASK)));
16899 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
16900 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
16902 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
16904 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
16905 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
16906 build_int_cst (unsigned_type_node,
16907 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
16908 *clear = build_call_expr (sfpc, 1, new_fpc);
16910 /* Generates the equivalent of feupdateenv (fenv_var)
16912 old_fpc = __builtin_s390_efpc ();
16913 __builtin_s390_sfpc (fenv_var);
16914 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
16916 old_fpc = create_tmp_var_raw (unsigned_type_node);
16917 tree store_old_fpc = build4 (TARGET_EXPR, void_type_node, old_fpc, call_efpc,
16918 NULL_TREE, NULL_TREE);
16920 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
16922 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
16923 build_int_cst (unsigned_type_node,
16924 FPC_FLAGS_MASK));
16925 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
16926 build_int_cst (unsigned_type_node,
16927 FPC_FLAGS_SHIFT));
16928 tree atomic_feraiseexcept
16929 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
16930 raise_old_except = build_call_expr (atomic_feraiseexcept,
16931 1, raise_old_except);
16933 *update = build2 (COMPOUND_EXPR, void_type_node,
16934 build2 (COMPOUND_EXPR, void_type_node,
16935 store_old_fpc, set_new_fpc),
16936 raise_old_except);
16938 #undef FPC_EXCEPTION_MASK
16939 #undef FPC_FLAGS_MASK
16940 #undef FPC_DXC_MASK
16941 #undef FPC_EXCEPTION_MASK_SHIFT
16942 #undef FPC_FLAGS_SHIFT
16943 #undef FPC_DXC_SHIFT
16946 /* Return the vector mode to be used for inner mode MODE when doing
16947 vectorization. */
16948 static machine_mode
16949 s390_preferred_simd_mode (scalar_mode mode)
16951 if (TARGET_VXE)
16952 switch (mode)
16954 case E_SFmode:
16955 return V4SFmode;
16956 default:;
16959 if (TARGET_VX)
16960 switch (mode)
16962 case E_DFmode:
16963 return V2DFmode;
16964 case E_DImode:
16965 return V2DImode;
16966 case E_SImode:
16967 return V4SImode;
16968 case E_HImode:
16969 return V8HImode;
16970 case E_QImode:
16971 return V16QImode;
16972 default:;
16974 return word_mode;
16977 /* Our hardware does not require vectors to be strictly aligned. */
16978 static bool
16979 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
16980 const_tree type ATTRIBUTE_UNUSED,
16981 int misalignment ATTRIBUTE_UNUSED,
16982 bool is_packed ATTRIBUTE_UNUSED)
16984 if (TARGET_VX)
16985 return true;
16987 return default_builtin_support_vector_misalignment (mode, type, misalignment,
16988 is_packed);
16991 /* The vector ABI requires vector types to be aligned on an 8 byte
16992 boundary (our stack alignment). However, we allow this to be
16993 overriden by the user, while this definitely breaks the ABI. */
16994 static HOST_WIDE_INT
16995 s390_vector_alignment (const_tree type)
16997 tree size = TYPE_SIZE (type);
16999 if (!TARGET_VX_ABI)
17000 return default_vector_alignment (type);
17002 if (TYPE_USER_ALIGN (type))
17003 return TYPE_ALIGN (type);
17005 if (tree_fits_uhwi_p (size)
17006 && tree_to_uhwi (size) < BIGGEST_ALIGNMENT)
17007 return tree_to_uhwi (size);
17009 return BIGGEST_ALIGNMENT;
17012 /* Implement TARGET_CONSTANT_ALIGNMENT. Alignment on even addresses for
17013 LARL instruction. */
17015 static HOST_WIDE_INT
17016 s390_constant_alignment (const_tree, HOST_WIDE_INT align)
17018 return MAX (align, 16);
17021 #ifdef HAVE_AS_MACHINE_MACHINEMODE
17022 /* Implement TARGET_ASM_FILE_START. */
17023 static void
17024 s390_asm_file_start (void)
17026 default_file_start ();
17027 s390_asm_output_machine_for_arch (asm_out_file);
17029 #endif
17031 /* Implement TARGET_ASM_FILE_END. */
17032 static void
17033 s390_asm_file_end (void)
17035 #ifdef HAVE_AS_GNU_ATTRIBUTE
17036 varpool_node *vnode;
17037 cgraph_node *cnode;
17039 FOR_EACH_VARIABLE (vnode)
17040 if (TREE_PUBLIC (vnode->decl))
17041 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
17043 FOR_EACH_FUNCTION (cnode)
17044 if (TREE_PUBLIC (cnode->decl))
17045 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
17048 if (s390_vector_abi != 0)
17049 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
17050 s390_vector_abi);
17051 #endif
17052 file_end_indicate_exec_stack ();
17054 if (flag_split_stack)
17055 file_end_indicate_split_stack ();
17058 /* Return true if TYPE is a vector bool type. */
17059 static inline bool
17060 s390_vector_bool_type_p (const_tree type)
17062 return TYPE_VECTOR_OPAQUE (type);
17065 /* Return the diagnostic message string if the binary operation OP is
17066 not permitted on TYPE1 and TYPE2, NULL otherwise. */
17067 static const char*
17068 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
17070 bool bool1_p, bool2_p;
17071 bool plusminus_p;
17072 bool muldiv_p;
17073 bool compare_p;
17074 machine_mode mode1, mode2;
17076 if (!TARGET_ZVECTOR)
17077 return NULL;
17079 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
17080 return NULL;
17082 bool1_p = s390_vector_bool_type_p (type1);
17083 bool2_p = s390_vector_bool_type_p (type2);
17085 /* Mixing signed and unsigned types is forbidden for all
17086 operators. */
17087 if (!bool1_p && !bool2_p
17088 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
17089 return N_("types differ in signedness");
17091 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
17092 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
17093 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
17094 || op == ROUND_DIV_EXPR);
17095 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
17096 || op == EQ_EXPR || op == NE_EXPR);
17098 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
17099 return N_("binary operator does not support two vector bool operands");
17101 if (bool1_p != bool2_p && (muldiv_p || compare_p))
17102 return N_("binary operator does not support vector bool operand");
17104 mode1 = TYPE_MODE (type1);
17105 mode2 = TYPE_MODE (type2);
17107 if (bool1_p != bool2_p && plusminus_p
17108 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
17109 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
17110 return N_("binary operator does not support mixing vector "
17111 "bool with floating point vector operands");
17113 return NULL;
17116 #if ENABLE_S390_EXCESS_FLOAT_PRECISION == 1
17117 /* Implement TARGET_C_EXCESS_PRECISION to maintain historic behavior with older
17118 glibc versions
17120 For historical reasons, float_t and double_t had been typedef'ed to
17121 double on s390, causing operations on float_t to operate in a higher
17122 precision than is necessary. However, it is not the case that SFmode
17123 operations have implicit excess precision, and we generate more optimal
17124 code if we let the compiler know no implicit extra precision is added.
17126 With a glibc with that "historic" definition, configure will enable this hook
17127 to set FLT_EVAL_METHOD to 1 for -fexcess-precision=standard (e.g., as implied
17128 by -std=cXY). That means when we are compiling with -fexcess-precision=fast,
17129 the value we set for FLT_EVAL_METHOD will be out of line with the actual
17130 precision of float_t.
17132 Newer versions of glibc will be modified to derive the definition of float_t
17133 from FLT_EVAL_METHOD on s390x, as on many other architectures. There,
17134 configure will disable this hook by default, so that we defer to the default
17135 of FLT_EVAL_METHOD_PROMOTE_TO_FLOAT and a resulting typedef of float_t to
17136 float. Note that in that scenario, float_t and FLT_EVAL_METHOD will be in
17137 line independent of -fexcess-precision. */
17139 static enum flt_eval_method
17140 s390_excess_precision (enum excess_precision_type type)
17142 switch (type)
17144 case EXCESS_PRECISION_TYPE_IMPLICIT:
17145 case EXCESS_PRECISION_TYPE_FAST:
17146 /* The fastest type to promote to will always be the native type,
17147 whether that occurs with implicit excess precision or
17148 otherwise. */
17149 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
17150 case EXCESS_PRECISION_TYPE_STANDARD:
17151 /* Otherwise, when we are in a standards compliant mode, to
17152 ensure consistency with the implementation in glibc, report that
17153 float is evaluated to the range and precision of double. */
17154 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE;
17155 case EXCESS_PRECISION_TYPE_FLOAT16:
17156 error ("%<-fexcess-precision=16%> is not supported on this target");
17157 break;
17158 default:
17159 gcc_unreachable ();
17161 return FLT_EVAL_METHOD_UNPREDICTABLE;
17163 #endif
17165 void
17166 s390_rawmemchr (machine_mode elt_mode, rtx dst, rtx src, rtx pat)
17168 machine_mode vec_mode = mode_for_vector (as_a <scalar_int_mode> (elt_mode),
17169 16 / GET_MODE_SIZE (elt_mode)).require();
17170 rtx lens = gen_reg_rtx (V16QImode);
17171 rtx pattern = gen_reg_rtx (vec_mode);
17172 rtx loop_start = gen_label_rtx ();
17173 rtx loop_end = gen_label_rtx ();
17174 rtx addr = gen_reg_rtx (Pmode);
17175 rtx offset = gen_reg_rtx (Pmode);
17176 rtx loadlen = gen_reg_rtx (SImode);
17177 rtx matchlen = gen_reg_rtx (SImode);
17178 rtx mem;
17180 pat = GEN_INT (trunc_int_for_mode (INTVAL (pat), elt_mode));
17181 emit_insn (gen_rtx_SET (pattern, gen_rtx_VEC_DUPLICATE (vec_mode, pat)));
17183 emit_move_insn (addr, XEXP (src, 0));
17185 // alignment
17186 emit_insn (gen_vlbb (lens, gen_rtx_MEM (BLKmode, addr), GEN_INT (6)));
17187 emit_insn (gen_lcbb (loadlen, addr, GEN_INT (6)));
17188 lens = convert_to_mode (vec_mode, lens, 1);
17189 emit_insn (gen_vec_vfees (vec_mode, lens, lens, pattern, GEN_INT (0)));
17190 lens = convert_to_mode (V4SImode, lens, 1);
17191 emit_insn (gen_vec_extractv4sisi (matchlen, lens, GEN_INT (1)));
17192 lens = convert_to_mode (vec_mode, lens, 1);
17193 emit_cmp_and_jump_insns (matchlen, loadlen, LT, NULL_RTX, SImode, 1, loop_end);
17194 force_expand_binop (Pmode, add_optab, addr, GEN_INT(16), addr, 1, OPTAB_DIRECT);
17195 force_expand_binop (Pmode, and_optab, addr, GEN_INT(~HOST_WIDE_INT_UC(0xf)), addr, 1, OPTAB_DIRECT);
17196 // now, addr is 16-byte aligned
17198 mem = gen_rtx_MEM (vec_mode, addr);
17199 set_mem_align (mem, 128);
17200 emit_move_insn (lens, mem);
17201 emit_insn (gen_vec_vfees (vec_mode, lens, lens, pattern, GEN_INT (VSTRING_FLAG_CS)));
17202 add_int_reg_note (s390_emit_ccraw_jump (4, EQ, loop_end),
17203 REG_BR_PROB,
17204 profile_probability::very_unlikely ().to_reg_br_prob_note ());
17206 emit_label (loop_start);
17207 LABEL_NUSES (loop_start) = 1;
17209 force_expand_binop (Pmode, add_optab, addr, GEN_INT (16), addr, 1, OPTAB_DIRECT);
17210 mem = gen_rtx_MEM (vec_mode, addr);
17211 set_mem_align (mem, 128);
17212 emit_move_insn (lens, mem);
17213 emit_insn (gen_vec_vfees (vec_mode, lens, lens, pattern, GEN_INT (VSTRING_FLAG_CS)));
17214 add_int_reg_note (s390_emit_ccraw_jump (4, NE, loop_start),
17215 REG_BR_PROB,
17216 profile_probability::very_likely ().to_reg_br_prob_note ());
17218 emit_label (loop_end);
17219 LABEL_NUSES (loop_end) = 1;
17221 if (TARGET_64BIT)
17223 lens = convert_to_mode (V2DImode, lens, 1);
17224 emit_insn (gen_vec_extractv2didi (offset, lens, GEN_INT (0)));
17226 else
17228 lens = convert_to_mode (V4SImode, lens, 1);
17229 emit_insn (gen_vec_extractv4sisi (offset, lens, GEN_INT (1)));
17231 force_expand_binop (Pmode, add_optab, addr, offset, dst, 1, OPTAB_DIRECT);
17234 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
17236 static unsigned HOST_WIDE_INT
17237 s390_asan_shadow_offset (void)
17239 return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000);
17242 #ifdef HAVE_GAS_HIDDEN
17243 # define USE_HIDDEN_LINKONCE 1
17244 #else
17245 # define USE_HIDDEN_LINKONCE 0
17246 #endif
17248 /* Output an indirect branch trampoline for target register REGNO. */
17250 static void
17251 s390_output_indirect_thunk_function (unsigned int regno, bool z10_p)
17253 tree decl;
17254 char thunk_label[32];
17255 int i;
17257 if (z10_p)
17258 sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL, regno);
17259 else
17260 sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX,
17261 INDIRECT_BRANCH_THUNK_REGNUM, regno);
17263 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
17264 get_identifier (thunk_label),
17265 build_function_type_list (void_type_node, NULL_TREE));
17266 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
17267 NULL_TREE, void_type_node);
17268 TREE_PUBLIC (decl) = 1;
17269 TREE_STATIC (decl) = 1;
17270 DECL_IGNORED_P (decl) = 1;
17272 if (USE_HIDDEN_LINKONCE)
17274 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
17276 targetm.asm_out.unique_section (decl, 0);
17277 switch_to_section (get_named_section (decl, NULL, 0));
17279 targetm.asm_out.globalize_label (asm_out_file, thunk_label);
17280 fputs ("\t.hidden\t", asm_out_file);
17281 assemble_name (asm_out_file, thunk_label);
17282 putc ('\n', asm_out_file);
17283 ASM_DECLARE_FUNCTION_NAME (asm_out_file, thunk_label, decl);
17285 else
17287 switch_to_section (text_section);
17288 ASM_OUTPUT_LABEL (asm_out_file, thunk_label);
17291 DECL_INITIAL (decl) = make_node (BLOCK);
17292 current_function_decl = decl;
17293 allocate_struct_function (decl, false);
17294 init_function_start (decl);
17295 cfun->is_thunk = true;
17296 first_function_block_is_cold = false;
17297 final_start_function (emit_barrier (), asm_out_file, 1);
17299 /* This makes CFI at least usable for indirect jumps.
17301 Stopping in the thunk: backtrace will point to the thunk target
17302 is if it was interrupted by a signal. For a call this means that
17303 the call chain will be: caller->callee->thunk */
17304 if (flag_asynchronous_unwind_tables && flag_dwarf2_cfi_asm)
17306 fputs ("\t.cfi_signal_frame\n", asm_out_file);
17307 fprintf (asm_out_file, "\t.cfi_return_column %d\n", regno);
17308 for (i = 0; i < FPR15_REGNUM; i++)
17309 fprintf (asm_out_file, "\t.cfi_same_value %s\n", reg_names[i]);
17312 if (z10_p)
17314 /* exrl 0,1f */
17316 /* We generate a thunk for z10 compiled code although z10 is
17317 currently not enabled. Tell the assembler to accept the
17318 instruction. */
17319 if (!TARGET_CPU_Z10)
17321 fputs ("\t.machine push\n", asm_out_file);
17322 fputs ("\t.machine z10\n", asm_out_file);
17324 /* We use exrl even if -mzarch hasn't been specified on the
17325 command line so we have to tell the assembler to accept
17326 it. */
17327 if (!TARGET_ZARCH)
17328 fputs ("\t.machinemode zarch\n", asm_out_file);
17330 fputs ("\texrl\t0,1f\n", asm_out_file);
17332 if (!TARGET_ZARCH)
17333 fputs ("\t.machinemode esa\n", asm_out_file);
17335 if (!TARGET_CPU_Z10)
17336 fputs ("\t.machine pop\n", asm_out_file);
17338 else
17340 /* larl %r1,1f */
17341 fprintf (asm_out_file, "\tlarl\t%%r%d,1f\n",
17342 INDIRECT_BRANCH_THUNK_REGNUM);
17344 /* ex 0,0(%r1) */
17345 fprintf (asm_out_file, "\tex\t0,0(%%r%d)\n",
17346 INDIRECT_BRANCH_THUNK_REGNUM);
17349 /* 0: j 0b */
17350 fputs ("0:\tj\t0b\n", asm_out_file);
17352 /* 1: br <regno> */
17353 fprintf (asm_out_file, "1:\tbr\t%%r%d\n", regno);
17355 final_end_function ();
17356 init_insn_lengths ();
17357 free_after_compilation (cfun);
17358 set_cfun (NULL);
17359 current_function_decl = NULL;
17362 /* Implement the asm.code_end target hook. */
17364 static void
17365 s390_code_end (void)
17367 int i;
17369 for (i = 1; i < 16; i++)
17371 if (indirect_branch_z10thunk_mask & (1 << i))
17372 s390_output_indirect_thunk_function (i, true);
17374 if (indirect_branch_prez10thunk_mask & (1 << i))
17375 s390_output_indirect_thunk_function (i, false);
17378 if (TARGET_INDIRECT_BRANCH_TABLE)
17380 int o;
17381 int i;
17383 for (o = 0; o < INDIRECT_BRANCH_NUM_OPTIONS; o++)
17385 if (indirect_branch_table_label_no[o] == 0)
17386 continue;
17388 switch_to_section (get_section (indirect_branch_table_name[o],
17390 NULL_TREE));
17391 for (i = 0; i < indirect_branch_table_label_no[o]; i++)
17393 char label_start[32];
17395 ASM_GENERATE_INTERNAL_LABEL (label_start,
17396 indirect_branch_table_label[o], i);
17398 fputs ("\t.long\t", asm_out_file);
17399 assemble_name_raw (asm_out_file, label_start);
17400 fputs ("-.\n", asm_out_file);
17406 /* Implement the TARGET_CASE_VALUES_THRESHOLD target hook. */
17408 unsigned int
17409 s390_case_values_threshold (void)
17411 /* Disabling branch prediction for indirect jumps makes jump tables
17412 much more expensive. */
17413 if (TARGET_INDIRECT_BRANCH_NOBP_JUMP)
17414 return 20;
17416 return default_case_values_threshold ();
17419 /* Evaluate the insns between HEAD and TAIL and do back-end to install
17420 back-end specific dependencies.
17422 Establish an ANTI dependency between r11 and r15 restores from FPRs
17423 to prevent the instructions scheduler from reordering them since
17424 this would break CFI. No further handling in the sched_reorder
17425 hook is required since the r11 and r15 restore will never appear in
17426 the same ready list with that change. */
17427 void
17428 s390_sched_dependencies_evaluation (rtx_insn *head, rtx_insn *tail)
17430 if (!frame_pointer_needed || !epilogue_completed)
17431 return;
17433 while (head != tail && DEBUG_INSN_P (head))
17434 head = NEXT_INSN (head);
17436 rtx_insn *r15_restore = NULL, *r11_restore = NULL;
17438 for (rtx_insn *insn = tail; insn != head; insn = PREV_INSN (insn))
17440 rtx set = single_set (insn);
17441 if (!INSN_P (insn)
17442 || !RTX_FRAME_RELATED_P (insn)
17443 || set == NULL_RTX
17444 || !REG_P (SET_DEST (set))
17445 || !FP_REG_P (SET_SRC (set)))
17446 continue;
17448 if (REGNO (SET_DEST (set)) == HARD_FRAME_POINTER_REGNUM)
17449 r11_restore = insn;
17451 if (REGNO (SET_DEST (set)) == STACK_POINTER_REGNUM)
17452 r15_restore = insn;
17455 if (r11_restore == NULL || r15_restore == NULL)
17456 return;
17457 add_dependence (r11_restore, r15_restore, REG_DEP_ANTI);
17460 /* Implement TARGET_SHIFT_TRUNCATION_MASK for integer shifts. */
17462 static unsigned HOST_WIDE_INT
17463 s390_shift_truncation_mask (machine_mode mode)
17465 return mode == DImode || mode == SImode ? 63 : 0;
17468 /* Return TRUE iff CONSTRAINT is an "f" constraint, possibly with additional
17469 modifiers. */
17471 static bool
17472 f_constraint_p (const char *constraint)
17474 bool seen_f_p = false;
17475 bool seen_v_p = false;
17477 for (size_t i = 0, c_len = strlen (constraint); i < c_len;
17478 i += CONSTRAINT_LEN (constraint[i], constraint + i))
17480 if (constraint[i] == 'f')
17481 seen_f_p = true;
17482 if (constraint[i] == 'v')
17483 seen_v_p = true;
17486 /* Treat "fv" constraints as "v", because LRA will choose the widest register
17487 * class. */
17488 return seen_f_p && !seen_v_p;
17491 /* Return TRUE iff X is a hard floating-point (and not a vector) register. */
17493 static bool
17494 s390_hard_fp_reg_p (rtx x)
17496 if (!(REG_P (x) && HARD_REGISTER_P (x) && REG_ATTRS (x)))
17497 return false;
17499 tree decl = REG_EXPR (x);
17500 if (!(HAS_DECL_ASSEMBLER_NAME_P (decl) && DECL_ASSEMBLER_NAME_SET_P (decl)))
17501 return false;
17503 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
17505 return name[0] == '*' && name[1] == 'f';
17508 /* Implement TARGET_MD_ASM_ADJUST hook in order to fix up "f"
17509 constraints when long doubles are stored in vector registers. */
17511 static rtx_insn *
17512 s390_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
17513 vec<machine_mode> &input_modes,
17514 vec<const char *> &constraints,
17515 vec<rtx> &/*uses*/, vec<rtx> &/*clobbers*/,
17516 HARD_REG_SET &clobbered_regs, location_t loc)
17519 rtx_insn *after_md_seq = NULL, *after_md_end = NULL;
17520 bool saw_cc = false;
17522 unsigned ninputs = inputs.length ();
17523 unsigned noutputs = outputs.length ();
17524 for (unsigned i = 0; i < noutputs; i++)
17526 const char *constraint = constraints[i];
17527 if (strncmp (constraint, "=@cc", 4) == 0)
17529 if (constraint[4] != 0)
17531 error_at (loc, "invalid cc output constraint: %qs", constraint);
17532 continue;
17534 if (saw_cc)
17536 error_at (loc, "multiple cc output constraints not supported");
17537 continue;
17539 if (TEST_HARD_REG_BIT (clobbered_regs, CC_REGNUM))
17541 error_at (loc, "%<asm%> specifier for cc output conflicts with %<asm%> clobber list");
17542 continue;
17544 rtx dest = outputs[i];
17545 if (GET_MODE (dest) != SImode)
17547 error ("invalid type for cc output constraint");
17548 continue;
17550 saw_cc = true;
17551 constraints[i] = "=c";
17552 outputs[i] = gen_rtx_REG (CCRAWmode, CC_REGNUM);
17554 push_to_sequence2 (after_md_seq, after_md_end);
17555 emit_insn (gen_rtx_SET (dest,
17556 gen_rtx_UNSPEC (SImode,
17557 gen_rtvec (1, outputs[i]),
17558 UNSPEC_CC_TO_INT)));
17559 after_md_seq = get_insns ();
17560 after_md_end = get_last_insn ();
17561 end_sequence ();
17562 continue;
17564 if (!TARGET_VXE)
17565 /* Long doubles are stored in FPR pairs - nothing to do. */
17566 continue;
17567 if (GET_MODE (outputs[i]) != TFmode)
17568 /* Not a long double - nothing to do. */
17569 continue;
17570 bool allows_mem, allows_reg, is_inout;
17571 bool ok = parse_output_constraint (&constraint, i, ninputs, noutputs,
17572 &allows_mem, &allows_reg, &is_inout);
17573 gcc_assert (ok);
17574 if (!f_constraint_p (constraint))
17575 /* Long double with a constraint other than "=f" - nothing to do. */
17576 continue;
17577 gcc_assert (allows_reg);
17578 gcc_assert (!is_inout);
17579 /* Copy output value from a FPR pair into a vector register. */
17580 rtx fprx2;
17581 push_to_sequence2 (after_md_seq, after_md_end);
17582 if (s390_hard_fp_reg_p (outputs[i]))
17584 fprx2 = gen_rtx_REG (FPRX2mode, REGNO (outputs[i]));
17585 /* The first half is already at the correct location, copy only the
17586 * second one. Use the UNSPEC pattern instead of the SUBREG one,
17587 * since s390_can_change_mode_class() rejects
17588 * (subreg:DF (reg:TF %fN) 8) and thus subreg validation fails. */
17589 rtx v1 = gen_rtx_REG (V2DFmode, REGNO (outputs[i]));
17590 rtx v3 = gen_rtx_REG (V2DFmode, REGNO (outputs[i]) + 1);
17591 emit_insn (gen_vec_permiv2df (v1, v1, v3, const0_rtx));
17593 else
17595 fprx2 = gen_reg_rtx (FPRX2mode);
17596 emit_insn (gen_fprx2_to_tf (outputs[i], fprx2));
17598 after_md_seq = get_insns ();
17599 after_md_end = get_last_insn ();
17600 end_sequence ();
17601 outputs[i] = fprx2;
17604 if (!TARGET_VXE)
17605 /* Long doubles are stored in FPR pairs - nothing left to do. */
17606 return after_md_seq;
17608 for (unsigned i = 0; i < ninputs; i++)
17610 if (GET_MODE (inputs[i]) != TFmode)
17611 /* Not a long double - nothing to do. */
17612 continue;
17613 const char *constraint = constraints[noutputs + i];
17614 bool allows_mem, allows_reg;
17615 bool ok = parse_input_constraint (&constraint, i, ninputs, noutputs, 0,
17616 constraints.address (), &allows_mem,
17617 &allows_reg);
17618 gcc_assert (ok);
17619 if (!f_constraint_p (constraint))
17620 /* Long double with a constraint other than "f" (or "=f" for inout
17621 operands) - nothing to do. */
17622 continue;
17623 gcc_assert (allows_reg);
17624 /* Copy input value from a vector register into a FPR pair. */
17625 rtx fprx2;
17626 if (s390_hard_fp_reg_p (inputs[i]))
17628 fprx2 = gen_rtx_REG (FPRX2mode, REGNO (inputs[i]));
17629 /* Copy only the second half. */
17630 rtx v1 = gen_rtx_REG (V2DFmode, REGNO (inputs[i]) + 1);
17631 rtx v2 = gen_rtx_REG (V2DFmode, REGNO (inputs[i]));
17632 emit_insn (gen_vec_permiv2df (v1, v2, v1, GEN_INT (3)));
17634 else
17636 fprx2 = gen_reg_rtx (FPRX2mode);
17637 emit_insn (gen_tf_to_fprx2 (fprx2, inputs[i]));
17639 inputs[i] = fprx2;
17640 input_modes[i] = FPRX2mode;
17643 return after_md_seq;
17646 #define MAX_VECT_LEN 16
17648 struct expand_vec_perm_d
17650 rtx target, op0, op1;
17651 unsigned char perm[MAX_VECT_LEN];
17652 machine_mode vmode;
17653 unsigned char nelt;
17654 bool testing_p;
17655 bool only_op0;
17656 bool only_op1;
17659 /* Try to expand the vector permute operation described by D using the
17660 vector merge instructions vml and vmh. Return true if vector merge
17661 could be used. */
17662 static bool
17663 expand_perm_with_merge (const struct expand_vec_perm_d &d)
17665 static const unsigned char hi_perm_di[2] = {0, 2};
17666 static const unsigned char hi_perm_si[4] = {0, 4, 1, 5};
17667 static const unsigned char hi_perm_hi[8] = {0, 8, 1, 9, 2, 10, 3, 11};
17668 static const unsigned char hi_perm_qi[16]
17669 = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
17671 static const unsigned char hi_perm_di_swap[2] = {2, 0};
17672 static const unsigned char hi_perm_si_swap[4] = {4, 0, 6, 2};
17673 static const unsigned char hi_perm_hi_swap[8] = {8, 0, 10, 2, 12, 4, 14, 6};
17674 static const unsigned char hi_perm_qi_swap[16]
17675 = {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14};
17677 static const unsigned char lo_perm_di[2] = {1, 3};
17678 static const unsigned char lo_perm_si[4] = {2, 6, 3, 7};
17679 static const unsigned char lo_perm_hi[8] = {4, 12, 5, 13, 6, 14, 7, 15};
17680 static const unsigned char lo_perm_qi[16]
17681 = {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31};
17683 static const unsigned char lo_perm_di_swap[2] = {3, 1};
17684 static const unsigned char lo_perm_si_swap[4] = {5, 1, 7, 3};
17685 static const unsigned char lo_perm_hi_swap[8] = {9, 1, 11, 3, 13, 5, 15, 7};
17686 static const unsigned char lo_perm_qi_swap[16]
17687 = {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15};
17689 bool merge_lo_p = false;
17690 bool merge_hi_p = false;
17691 bool swap_operands_p = false;
17693 if ((d.nelt == 2 && memcmp (d.perm, hi_perm_di, 2) == 0)
17694 || (d.nelt == 4 && memcmp (d.perm, hi_perm_si, 4) == 0)
17695 || (d.nelt == 8 && memcmp (d.perm, hi_perm_hi, 8) == 0)
17696 || (d.nelt == 16 && memcmp (d.perm, hi_perm_qi, 16) == 0))
17698 merge_hi_p = true;
17700 else if ((d.nelt == 2 && memcmp (d.perm, hi_perm_di_swap, 2) == 0)
17701 || (d.nelt == 4 && memcmp (d.perm, hi_perm_si_swap, 4) == 0)
17702 || (d.nelt == 8 && memcmp (d.perm, hi_perm_hi_swap, 8) == 0)
17703 || (d.nelt == 16 && memcmp (d.perm, hi_perm_qi_swap, 16) == 0))
17705 merge_hi_p = true;
17706 swap_operands_p = true;
17708 else if ((d.nelt == 2 && memcmp (d.perm, lo_perm_di, 2) == 0)
17709 || (d.nelt == 4 && memcmp (d.perm, lo_perm_si, 4) == 0)
17710 || (d.nelt == 8 && memcmp (d.perm, lo_perm_hi, 8) == 0)
17711 || (d.nelt == 16 && memcmp (d.perm, lo_perm_qi, 16) == 0))
17713 merge_lo_p = true;
17715 else if ((d.nelt == 2 && memcmp (d.perm, lo_perm_di_swap, 2) == 0)
17716 || (d.nelt == 4 && memcmp (d.perm, lo_perm_si_swap, 4) == 0)
17717 || (d.nelt == 8 && memcmp (d.perm, lo_perm_hi_swap, 8) == 0)
17718 || (d.nelt == 16 && memcmp (d.perm, lo_perm_qi_swap, 16) == 0))
17720 merge_lo_p = true;
17721 swap_operands_p = true;
17724 if (!merge_lo_p && !merge_hi_p)
17725 return false;
17727 if (d.testing_p)
17728 return merge_lo_p || merge_hi_p;
17730 rtx op0, op1;
17731 if (swap_operands_p)
17733 op0 = d.op1;
17734 op1 = d.op0;
17736 else
17738 op0 = d.op0;
17739 op1 = d.op1;
17742 s390_expand_merge (d.target, op0, op1, merge_hi_p);
17744 return true;
17747 /* Try to expand the vector permute operation described by D using the
17748 vector permute doubleword immediate instruction vpdi. Return true
17749 if vpdi could be used.
17751 VPDI allows 4 different immediate values (0, 1, 4, 5). The 0 and 5
17752 cases are covered by vmrhg and vmrlg already. So we only care
17753 about the 1, 4 cases here.
17754 1 - First element of src1 and second of src2
17755 4 - Second element of src1 and first of src2 */
17756 static bool
17757 expand_perm_with_vpdi (const struct expand_vec_perm_d &d)
17759 bool vpdi1_p = false;
17760 bool vpdi4_p = false;
17761 bool swap_operands_p = false;
17762 rtx op0_reg, op1_reg;
17764 // Only V2DI and V2DF are supported here.
17765 if (d.nelt != 2)
17766 return false;
17768 if (d.perm[0] == 0 && d.perm[1] == 3)
17769 vpdi1_p = true;
17770 else if (d.perm[0] == 2 && d.perm[1] == 1)
17772 vpdi1_p = true;
17773 swap_operands_p = true;
17775 else if ((d.perm[0] == 1 && d.perm[1] == 2)
17776 || (d.perm[0] == 1 && d.perm[1] == 0)
17777 || (d.perm[0] == 3 && d.perm[1] == 2))
17778 vpdi4_p = true;
17779 else if (d.perm[0] == 3 && d.perm[1] == 0)
17781 vpdi4_p = true;
17782 swap_operands_p = true;
17785 if (!vpdi1_p && !vpdi4_p)
17786 return false;
17788 if (d.testing_p)
17789 return true;
17791 op0_reg = force_reg (GET_MODE (d.op0), d.op0);
17792 op1_reg = force_reg (GET_MODE (d.op1), d.op1);
17794 /* If we only reference either of the operands in
17795 the permute mask, just use one of them. */
17796 if (d.only_op0)
17797 op1_reg = op0_reg;
17798 else if (d.only_op1)
17799 op0_reg = op1_reg;
17800 else if (swap_operands_p)
17802 rtx tmp = op0_reg;
17803 op0_reg = op1_reg;
17804 op1_reg = tmp;
17807 if (vpdi1_p)
17808 emit_insn (gen_vpdi1 (d.vmode, d.target, op0_reg, op1_reg));
17809 if (vpdi4_p)
17810 emit_insn (gen_vpdi4 (d.vmode, d.target, op0_reg, op1_reg));
17812 return true;
17815 /* Helper that checks if a vector permutation mask D
17816 represents a reversal of the vector's elements. */
17817 static inline bool
17818 is_reverse_perm_mask (const struct expand_vec_perm_d &d)
17820 for (int i = 0; i < d.nelt; i++)
17821 if (d.perm[i] != d.nelt - i - 1)
17822 return false;
17823 return true;
17826 static bool
17827 expand_perm_reverse_elements (const struct expand_vec_perm_d &d)
17829 if (d.op0 != d.op1 || !is_reverse_perm_mask (d))
17830 return false;
17832 if (d.testing_p)
17833 return true;
17835 switch (d.vmode)
17837 case V1TImode: emit_move_insn (d.target, d.op0); break;
17838 case V2DImode: emit_insn (gen_eltswapv2di (d.target, d.op0)); break;
17839 case V4SImode: emit_insn (gen_eltswapv4si (d.target, d.op0)); break;
17840 case V8HImode: emit_insn (gen_eltswapv8hi (d.target, d.op0)); break;
17841 case V16QImode: emit_insn (gen_eltswapv16qi (d.target, d.op0)); break;
17842 case V2DFmode: emit_insn (gen_eltswapv2df (d.target, d.op0)); break;
17843 case V4SFmode: emit_insn (gen_eltswapv4sf (d.target, d.op0)); break;
17844 default: gcc_unreachable();
17847 return true;
17850 /* Try to emit vlbr/vstbr. Note, this is only a candidate insn since
17851 TARGET_VECTORIZE_VEC_PERM_CONST operates on vector registers only. Thus,
17852 either fwprop, combine et al. "fixes" one of the input/output operands into
17853 a memory operand or a splitter has to reverse this into a general vperm
17854 operation. */
17856 static bool
17857 expand_perm_as_a_vlbr_vstbr_candidate (const struct expand_vec_perm_d &d)
17859 static const char perm[4][MAX_VECT_LEN]
17860 = { { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 },
17861 { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 },
17862 { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 },
17863 { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 } };
17865 if (!TARGET_VXE2 || d.vmode != V16QImode || d.op0 != d.op1)
17866 return false;
17868 if (memcmp (d.perm, perm[0], MAX_VECT_LEN) == 0)
17870 if (!d.testing_p)
17872 rtx target = gen_rtx_SUBREG (V8HImode, d.target, 0);
17873 rtx op0 = gen_rtx_SUBREG (V8HImode, d.op0, 0);
17874 emit_insn (gen_bswapv8hi (target, op0));
17876 return true;
17879 if (memcmp (d.perm, perm[1], MAX_VECT_LEN) == 0)
17881 if (!d.testing_p)
17883 rtx target = gen_rtx_SUBREG (V4SImode, d.target, 0);
17884 rtx op0 = gen_rtx_SUBREG (V4SImode, d.op0, 0);
17885 emit_insn (gen_bswapv4si (target, op0));
17887 return true;
17890 if (memcmp (d.perm, perm[2], MAX_VECT_LEN) == 0)
17892 if (!d.testing_p)
17894 rtx target = gen_rtx_SUBREG (V2DImode, d.target, 0);
17895 rtx op0 = gen_rtx_SUBREG (V2DImode, d.op0, 0);
17896 emit_insn (gen_bswapv2di (target, op0));
17898 return true;
17901 if (memcmp (d.perm, perm[3], MAX_VECT_LEN) == 0)
17903 if (!d.testing_p)
17905 rtx target = gen_rtx_SUBREG (V1TImode, d.target, 0);
17906 rtx op0 = gen_rtx_SUBREG (V1TImode, d.op0, 0);
17907 emit_insn (gen_bswapv1ti (target, op0));
17909 return true;
17912 return false;
17915 /* Try to find the best sequence for the vector permute operation
17916 described by D. Return true if the operation could be
17917 expanded. */
17918 static bool
17919 vectorize_vec_perm_const_1 (const struct expand_vec_perm_d &d)
17921 if (expand_perm_reverse_elements (d))
17922 return true;
17924 if (expand_perm_with_merge (d))
17925 return true;
17927 if (expand_perm_with_vpdi (d))
17928 return true;
17930 if (expand_perm_as_a_vlbr_vstbr_candidate (d))
17931 return true;
17933 return false;
17936 /* Return true if we can emit instructions for the constant
17937 permutation vector in SEL. If OUTPUT, IN0, IN1 are non-null the
17938 hook is supposed to emit the required INSNs. */
17940 bool
17941 s390_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
17942 rtx target, rtx op0, rtx op1,
17943 const vec_perm_indices &sel)
17945 if (vmode != op_mode)
17946 return false;
17948 struct expand_vec_perm_d d;
17949 unsigned int i, nelt;
17951 if (!s390_vector_mode_supported_p (vmode) || GET_MODE_SIZE (vmode) != 16)
17952 return false;
17954 d.target = target;
17955 d.op0 = op0;
17956 d.op1 = op1;
17958 d.vmode = vmode;
17959 gcc_assert (VECTOR_MODE_P (d.vmode));
17960 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
17961 d.testing_p = target == NULL_RTX;
17962 d.only_op0 = false;
17963 d.only_op1 = false;
17965 gcc_assert (target == NULL_RTX || REG_P (target));
17966 gcc_assert (sel.length () == nelt);
17968 unsigned int highest = 0, lowest = 2 * nelt - 1;
17969 for (i = 0; i < nelt; i++)
17971 unsigned char e = sel[i];
17972 lowest = MIN (lowest, e);
17973 highest = MAX (highest, e);
17974 gcc_assert (e < 2 * nelt);
17975 d.perm[i] = e;
17978 if (lowest < nelt && highest < nelt)
17979 d.only_op0 = true;
17980 else if (lowest >= nelt && highest >= nelt)
17981 d.only_op1 = true;
17983 return vectorize_vec_perm_const_1 (d);
17986 /* Initialize GCC target structure. */
17988 #undef TARGET_ASM_ALIGNED_HI_OP
17989 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
17990 #undef TARGET_ASM_ALIGNED_DI_OP
17991 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
17992 #undef TARGET_ASM_INTEGER
17993 #define TARGET_ASM_INTEGER s390_assemble_integer
17995 #undef TARGET_ASM_OPEN_PAREN
17996 #define TARGET_ASM_OPEN_PAREN ""
17998 #undef TARGET_ASM_CLOSE_PAREN
17999 #define TARGET_ASM_CLOSE_PAREN ""
18001 #undef TARGET_OPTION_OVERRIDE
18002 #define TARGET_OPTION_OVERRIDE s390_option_override
18004 #ifdef TARGET_THREAD_SSP_OFFSET
18005 #undef TARGET_STACK_PROTECT_GUARD
18006 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
18007 #endif
18009 #undef TARGET_ENCODE_SECTION_INFO
18010 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
18012 #undef TARGET_SCALAR_MODE_SUPPORTED_P
18013 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
18015 #ifdef HAVE_AS_TLS
18016 #undef TARGET_HAVE_TLS
18017 #define TARGET_HAVE_TLS true
18018 #endif
18019 #undef TARGET_CANNOT_FORCE_CONST_MEM
18020 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
18022 #undef TARGET_DELEGITIMIZE_ADDRESS
18023 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
18025 #undef TARGET_LEGITIMIZE_ADDRESS
18026 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
18028 #undef TARGET_RETURN_IN_MEMORY
18029 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
18031 #undef TARGET_INIT_BUILTINS
18032 #define TARGET_INIT_BUILTINS s390_init_builtins
18033 #undef TARGET_EXPAND_BUILTIN
18034 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
18035 #undef TARGET_BUILTIN_DECL
18036 #define TARGET_BUILTIN_DECL s390_builtin_decl
18038 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
18039 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
18041 #undef TARGET_ASM_OUTPUT_MI_THUNK
18042 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
18043 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
18044 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
18046 #if ENABLE_S390_EXCESS_FLOAT_PRECISION == 1
18047 /* This hook is only needed to maintain the historic behavior with glibc
18048 versions that typedef float_t to double. */
18049 #undef TARGET_C_EXCESS_PRECISION
18050 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
18051 #endif
18053 #undef TARGET_SCHED_ADJUST_PRIORITY
18054 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
18055 #undef TARGET_SCHED_ISSUE_RATE
18056 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
18057 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
18058 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
18060 #undef TARGET_SCHED_VARIABLE_ISSUE
18061 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
18062 #undef TARGET_SCHED_REORDER
18063 #define TARGET_SCHED_REORDER s390_sched_reorder
18064 #undef TARGET_SCHED_INIT
18065 #define TARGET_SCHED_INIT s390_sched_init
18067 #undef TARGET_CANNOT_COPY_INSN_P
18068 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
18069 #undef TARGET_RTX_COSTS
18070 #define TARGET_RTX_COSTS s390_rtx_costs
18071 #undef TARGET_ADDRESS_COST
18072 #define TARGET_ADDRESS_COST s390_address_cost
18073 #undef TARGET_REGISTER_MOVE_COST
18074 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
18075 #undef TARGET_MEMORY_MOVE_COST
18076 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
18077 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
18078 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
18079 s390_builtin_vectorization_cost
18081 #undef TARGET_MACHINE_DEPENDENT_REORG
18082 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
18084 #undef TARGET_VALID_POINTER_MODE
18085 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
18087 #undef TARGET_BUILD_BUILTIN_VA_LIST
18088 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
18089 #undef TARGET_EXPAND_BUILTIN_VA_START
18090 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
18091 #undef TARGET_ASAN_SHADOW_OFFSET
18092 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
18093 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
18094 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
18096 #undef TARGET_PROMOTE_FUNCTION_MODE
18097 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
18098 #undef TARGET_PASS_BY_REFERENCE
18099 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
18101 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
18102 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE s390_override_options_after_change
18104 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
18105 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
18106 #undef TARGET_FUNCTION_ARG
18107 #define TARGET_FUNCTION_ARG s390_function_arg
18108 #undef TARGET_FUNCTION_ARG_ADVANCE
18109 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
18110 #undef TARGET_FUNCTION_ARG_PADDING
18111 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
18112 #undef TARGET_FUNCTION_VALUE
18113 #define TARGET_FUNCTION_VALUE s390_function_value
18114 #undef TARGET_LIBCALL_VALUE
18115 #define TARGET_LIBCALL_VALUE s390_libcall_value
18116 #undef TARGET_STRICT_ARGUMENT_NAMING
18117 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
18119 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
18120 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
18122 #undef TARGET_FIXED_CONDITION_CODE_REGS
18123 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
18125 #undef TARGET_CC_MODES_COMPATIBLE
18126 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
18128 #undef TARGET_INVALID_WITHIN_DOLOOP
18129 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
18131 #ifdef HAVE_AS_TLS
18132 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
18133 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
18134 #endif
18136 #undef TARGET_DWARF_FRAME_REG_MODE
18137 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
18139 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
18140 #undef TARGET_MANGLE_TYPE
18141 #define TARGET_MANGLE_TYPE s390_mangle_type
18142 #endif
18144 #undef TARGET_SCALAR_MODE_SUPPORTED_P
18145 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
18147 #undef TARGET_VECTOR_MODE_SUPPORTED_P
18148 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
18150 #undef TARGET_PREFERRED_RELOAD_CLASS
18151 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
18153 #undef TARGET_SECONDARY_RELOAD
18154 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
18155 #undef TARGET_SECONDARY_MEMORY_NEEDED
18156 #define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed
18157 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
18158 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode
18160 #undef TARGET_LIBGCC_CMP_RETURN_MODE
18161 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
18163 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
18164 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
18166 #undef TARGET_LEGITIMATE_ADDRESS_P
18167 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
18169 #undef TARGET_LEGITIMATE_CONSTANT_P
18170 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
18172 #undef TARGET_LRA_P
18173 #define TARGET_LRA_P s390_lra_p
18175 #undef TARGET_CAN_ELIMINATE
18176 #define TARGET_CAN_ELIMINATE s390_can_eliminate
18178 #undef TARGET_CONDITIONAL_REGISTER_USAGE
18179 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
18181 #undef TARGET_LOOP_UNROLL_ADJUST
18182 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
18184 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
18185 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
18186 #undef TARGET_TRAMPOLINE_INIT
18187 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
18189 /* PR 79421 */
18190 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
18191 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
18193 #undef TARGET_UNWIND_WORD_MODE
18194 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
18196 #undef TARGET_CANONICALIZE_COMPARISON
18197 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
18199 #undef TARGET_HARD_REGNO_SCRATCH_OK
18200 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
18202 #undef TARGET_HARD_REGNO_NREGS
18203 #define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs
18204 #undef TARGET_HARD_REGNO_MODE_OK
18205 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
18206 #undef TARGET_MODES_TIEABLE_P
18207 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
18209 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
18210 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
18211 s390_hard_regno_call_part_clobbered
18213 #undef TARGET_ATTRIBUTE_TABLE
18214 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
18216 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
18217 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
18219 #undef TARGET_SET_UP_BY_PROLOGUE
18220 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
18222 #undef TARGET_EXTRA_LIVE_ON_ENTRY
18223 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
18225 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
18226 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
18227 s390_use_by_pieces_infrastructure_p
18229 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
18230 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
18232 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
18233 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
18235 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
18236 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
18238 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
18239 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
18241 #undef TARGET_VECTOR_ALIGNMENT
18242 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
18244 #undef TARGET_INVALID_BINARY_OP
18245 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
18247 #ifdef HAVE_AS_MACHINE_MACHINEMODE
18248 #undef TARGET_ASM_FILE_START
18249 #define TARGET_ASM_FILE_START s390_asm_file_start
18250 #endif
18252 #undef TARGET_ASM_FILE_END
18253 #define TARGET_ASM_FILE_END s390_asm_file_end
18255 #undef TARGET_SET_CURRENT_FUNCTION
18256 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
18258 #if S390_USE_TARGET_ATTRIBUTE
18259 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
18260 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
18262 #undef TARGET_CAN_INLINE_P
18263 #define TARGET_CAN_INLINE_P s390_can_inline_p
18264 #endif
18266 #undef TARGET_OPTION_RESTORE
18267 #define TARGET_OPTION_RESTORE s390_function_specific_restore
18269 #undef TARGET_CAN_CHANGE_MODE_CLASS
18270 #define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class
18272 #undef TARGET_CONSTANT_ALIGNMENT
18273 #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
18275 #undef TARGET_ASM_CODE_END
18276 #define TARGET_ASM_CODE_END s390_code_end
18278 #undef TARGET_CASE_VALUES_THRESHOLD
18279 #define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold
18281 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
18282 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
18283 s390_sched_dependencies_evaluation
18285 #undef TARGET_SHIFT_TRUNCATION_MASK
18286 #define TARGET_SHIFT_TRUNCATION_MASK s390_shift_truncation_mask
18288 /* Use only short displacement, since long displacement is not available for
18289 the floating point instructions. */
18290 #undef TARGET_MAX_ANCHOR_OFFSET
18291 #define TARGET_MAX_ANCHOR_OFFSET 0xfff
18293 #undef TARGET_MD_ASM_ADJUST
18294 #define TARGET_MD_ASM_ADJUST s390_md_asm_adjust
18296 #undef TARGET_VECTORIZE_VEC_PERM_CONST
18297 #define TARGET_VECTORIZE_VEC_PERM_CONST s390_vectorize_vec_perm_const
18299 struct gcc_target targetm = TARGET_INITIALIZER;
18301 #include "gt-s390.h"