IBM Z: Use @PLT symbols for local functions in 64-bit mode
[official-gcc.git] / gcc / config / s390 / s390.c
blobb1d3b99784de5be41175e13fb757bb23b8e8ba02
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2021 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "target-globals.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "gimple.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "tm_p.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "expmed.h"
42 #include "optabs.h"
43 #include "regs.h"
44 #include "emit-rtl.h"
45 #include "recog.h"
46 #include "cgraph.h"
47 #include "diagnostic-core.h"
48 #include "diagnostic.h"
49 #include "alias.h"
50 #include "fold-const.h"
51 #include "print-tree.h"
52 #include "stor-layout.h"
53 #include "varasm.h"
54 #include "calls.h"
55 #include "conditions.h"
56 #include "output.h"
57 #include "insn-attr.h"
58 #include "flags.h"
59 #include "except.h"
60 #include "dojump.h"
61 #include "explow.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "reload.h"
65 #include "cfgrtl.h"
66 #include "cfganal.h"
67 #include "lcm.h"
68 #include "cfgbuild.h"
69 #include "cfgcleanup.h"
70 #include "debug.h"
71 #include "langhooks.h"
72 #include "internal-fn.h"
73 #include "gimple-fold.h"
74 #include "tree-eh.h"
75 #include "gimplify.h"
76 #include "opts.h"
77 #include "tree-pass.h"
78 #include "context.h"
79 #include "builtins.h"
80 #include "rtl-iter.h"
81 #include "intl.h"
82 #include "tm-constrs.h"
83 #include "tree-vrp.h"
84 #include "symbol-summary.h"
85 #include "ipa-prop.h"
86 #include "ipa-fnsummary.h"
87 #include "sched-int.h"
89 /* This file should be included last. */
90 #include "target-def.h"
92 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode);
94 /* Remember the last target of s390_set_current_function. */
95 static GTY(()) tree s390_previous_fndecl;
97 /* Define the specific costs for a given cpu. */
99 struct processor_costs
101 /* multiplication */
102 const int m; /* cost of an M instruction. */
103 const int mghi; /* cost of an MGHI instruction. */
104 const int mh; /* cost of an MH instruction. */
105 const int mhi; /* cost of an MHI instruction. */
106 const int ml; /* cost of an ML instruction. */
107 const int mr; /* cost of an MR instruction. */
108 const int ms; /* cost of an MS instruction. */
109 const int msg; /* cost of an MSG instruction. */
110 const int msgf; /* cost of an MSGF instruction. */
111 const int msgfr; /* cost of an MSGFR instruction. */
112 const int msgr; /* cost of an MSGR instruction. */
113 const int msr; /* cost of an MSR instruction. */
114 const int mult_df; /* cost of multiplication in DFmode. */
115 const int mxbr;
116 /* square root */
117 const int sqxbr; /* cost of square root in TFmode. */
118 const int sqdbr; /* cost of square root in DFmode. */
119 const int sqebr; /* cost of square root in SFmode. */
120 /* multiply and add */
121 const int madbr; /* cost of multiply and add in DFmode. */
122 const int maebr; /* cost of multiply and add in SFmode. */
123 /* division */
124 const int dxbr;
125 const int ddbr;
126 const int debr;
127 const int dlgr;
128 const int dlr;
129 const int dr;
130 const int dsgfr;
131 const int dsgr;
134 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
136 static const
137 struct processor_costs z900_cost =
139 COSTS_N_INSNS (5), /* M */
140 COSTS_N_INSNS (10), /* MGHI */
141 COSTS_N_INSNS (5), /* MH */
142 COSTS_N_INSNS (4), /* MHI */
143 COSTS_N_INSNS (5), /* ML */
144 COSTS_N_INSNS (5), /* MR */
145 COSTS_N_INSNS (4), /* MS */
146 COSTS_N_INSNS (15), /* MSG */
147 COSTS_N_INSNS (7), /* MSGF */
148 COSTS_N_INSNS (7), /* MSGFR */
149 COSTS_N_INSNS (10), /* MSGR */
150 COSTS_N_INSNS (4), /* MSR */
151 COSTS_N_INSNS (7), /* multiplication in DFmode */
152 COSTS_N_INSNS (13), /* MXBR */
153 COSTS_N_INSNS (136), /* SQXBR */
154 COSTS_N_INSNS (44), /* SQDBR */
155 COSTS_N_INSNS (35), /* SQEBR */
156 COSTS_N_INSNS (18), /* MADBR */
157 COSTS_N_INSNS (13), /* MAEBR */
158 COSTS_N_INSNS (134), /* DXBR */
159 COSTS_N_INSNS (30), /* DDBR */
160 COSTS_N_INSNS (27), /* DEBR */
161 COSTS_N_INSNS (220), /* DLGR */
162 COSTS_N_INSNS (34), /* DLR */
163 COSTS_N_INSNS (34), /* DR */
164 COSTS_N_INSNS (32), /* DSGFR */
165 COSTS_N_INSNS (32), /* DSGR */
168 static const
169 struct processor_costs z990_cost =
171 COSTS_N_INSNS (4), /* M */
172 COSTS_N_INSNS (2), /* MGHI */
173 COSTS_N_INSNS (2), /* MH */
174 COSTS_N_INSNS (2), /* MHI */
175 COSTS_N_INSNS (4), /* ML */
176 COSTS_N_INSNS (4), /* MR */
177 COSTS_N_INSNS (5), /* MS */
178 COSTS_N_INSNS (6), /* MSG */
179 COSTS_N_INSNS (4), /* MSGF */
180 COSTS_N_INSNS (4), /* MSGFR */
181 COSTS_N_INSNS (4), /* MSGR */
182 COSTS_N_INSNS (4), /* MSR */
183 COSTS_N_INSNS (1), /* multiplication in DFmode */
184 COSTS_N_INSNS (28), /* MXBR */
185 COSTS_N_INSNS (130), /* SQXBR */
186 COSTS_N_INSNS (66), /* SQDBR */
187 COSTS_N_INSNS (38), /* SQEBR */
188 COSTS_N_INSNS (1), /* MADBR */
189 COSTS_N_INSNS (1), /* MAEBR */
190 COSTS_N_INSNS (60), /* DXBR */
191 COSTS_N_INSNS (40), /* DDBR */
192 COSTS_N_INSNS (26), /* DEBR */
193 COSTS_N_INSNS (176), /* DLGR */
194 COSTS_N_INSNS (31), /* DLR */
195 COSTS_N_INSNS (31), /* DR */
196 COSTS_N_INSNS (31), /* DSGFR */
197 COSTS_N_INSNS (31), /* DSGR */
200 static const
201 struct processor_costs z9_109_cost =
203 COSTS_N_INSNS (4), /* M */
204 COSTS_N_INSNS (2), /* MGHI */
205 COSTS_N_INSNS (2), /* MH */
206 COSTS_N_INSNS (2), /* MHI */
207 COSTS_N_INSNS (4), /* ML */
208 COSTS_N_INSNS (4), /* MR */
209 COSTS_N_INSNS (5), /* MS */
210 COSTS_N_INSNS (6), /* MSG */
211 COSTS_N_INSNS (4), /* MSGF */
212 COSTS_N_INSNS (4), /* MSGFR */
213 COSTS_N_INSNS (4), /* MSGR */
214 COSTS_N_INSNS (4), /* MSR */
215 COSTS_N_INSNS (1), /* multiplication in DFmode */
216 COSTS_N_INSNS (28), /* MXBR */
217 COSTS_N_INSNS (130), /* SQXBR */
218 COSTS_N_INSNS (66), /* SQDBR */
219 COSTS_N_INSNS (38), /* SQEBR */
220 COSTS_N_INSNS (1), /* MADBR */
221 COSTS_N_INSNS (1), /* MAEBR */
222 COSTS_N_INSNS (60), /* DXBR */
223 COSTS_N_INSNS (40), /* DDBR */
224 COSTS_N_INSNS (26), /* DEBR */
225 COSTS_N_INSNS (30), /* DLGR */
226 COSTS_N_INSNS (23), /* DLR */
227 COSTS_N_INSNS (23), /* DR */
228 COSTS_N_INSNS (24), /* DSGFR */
229 COSTS_N_INSNS (24), /* DSGR */
232 static const
233 struct processor_costs z10_cost =
235 COSTS_N_INSNS (10), /* M */
236 COSTS_N_INSNS (10), /* MGHI */
237 COSTS_N_INSNS (10), /* MH */
238 COSTS_N_INSNS (10), /* MHI */
239 COSTS_N_INSNS (10), /* ML */
240 COSTS_N_INSNS (10), /* MR */
241 COSTS_N_INSNS (10), /* MS */
242 COSTS_N_INSNS (10), /* MSG */
243 COSTS_N_INSNS (10), /* MSGF */
244 COSTS_N_INSNS (10), /* MSGFR */
245 COSTS_N_INSNS (10), /* MSGR */
246 COSTS_N_INSNS (10), /* MSR */
247 COSTS_N_INSNS (1) , /* multiplication in DFmode */
248 COSTS_N_INSNS (50), /* MXBR */
249 COSTS_N_INSNS (120), /* SQXBR */
250 COSTS_N_INSNS (52), /* SQDBR */
251 COSTS_N_INSNS (38), /* SQEBR */
252 COSTS_N_INSNS (1), /* MADBR */
253 COSTS_N_INSNS (1), /* MAEBR */
254 COSTS_N_INSNS (111), /* DXBR */
255 COSTS_N_INSNS (39), /* DDBR */
256 COSTS_N_INSNS (32), /* DEBR */
257 COSTS_N_INSNS (160), /* DLGR */
258 COSTS_N_INSNS (71), /* DLR */
259 COSTS_N_INSNS (71), /* DR */
260 COSTS_N_INSNS (71), /* DSGFR */
261 COSTS_N_INSNS (71), /* DSGR */
264 static const
265 struct processor_costs z196_cost =
267 COSTS_N_INSNS (7), /* M */
268 COSTS_N_INSNS (5), /* MGHI */
269 COSTS_N_INSNS (5), /* MH */
270 COSTS_N_INSNS (5), /* MHI */
271 COSTS_N_INSNS (7), /* ML */
272 COSTS_N_INSNS (7), /* MR */
273 COSTS_N_INSNS (6), /* MS */
274 COSTS_N_INSNS (8), /* MSG */
275 COSTS_N_INSNS (6), /* MSGF */
276 COSTS_N_INSNS (6), /* MSGFR */
277 COSTS_N_INSNS (8), /* MSGR */
278 COSTS_N_INSNS (6), /* MSR */
279 COSTS_N_INSNS (1) , /* multiplication in DFmode */
280 COSTS_N_INSNS (40), /* MXBR B+40 */
281 COSTS_N_INSNS (100), /* SQXBR B+100 */
282 COSTS_N_INSNS (42), /* SQDBR B+42 */
283 COSTS_N_INSNS (28), /* SQEBR B+28 */
284 COSTS_N_INSNS (1), /* MADBR B */
285 COSTS_N_INSNS (1), /* MAEBR B */
286 COSTS_N_INSNS (101), /* DXBR B+101 */
287 COSTS_N_INSNS (29), /* DDBR */
288 COSTS_N_INSNS (22), /* DEBR */
289 COSTS_N_INSNS (160), /* DLGR cracked */
290 COSTS_N_INSNS (160), /* DLR cracked */
291 COSTS_N_INSNS (160), /* DR expanded */
292 COSTS_N_INSNS (160), /* DSGFR cracked */
293 COSTS_N_INSNS (160), /* DSGR cracked */
296 static const
297 struct processor_costs zEC12_cost =
299 COSTS_N_INSNS (7), /* M */
300 COSTS_N_INSNS (5), /* MGHI */
301 COSTS_N_INSNS (5), /* MH */
302 COSTS_N_INSNS (5), /* MHI */
303 COSTS_N_INSNS (7), /* ML */
304 COSTS_N_INSNS (7), /* MR */
305 COSTS_N_INSNS (6), /* MS */
306 COSTS_N_INSNS (8), /* MSG */
307 COSTS_N_INSNS (6), /* MSGF */
308 COSTS_N_INSNS (6), /* MSGFR */
309 COSTS_N_INSNS (8), /* MSGR */
310 COSTS_N_INSNS (6), /* MSR */
311 COSTS_N_INSNS (1) , /* multiplication in DFmode */
312 COSTS_N_INSNS (40), /* MXBR B+40 */
313 COSTS_N_INSNS (100), /* SQXBR B+100 */
314 COSTS_N_INSNS (42), /* SQDBR B+42 */
315 COSTS_N_INSNS (28), /* SQEBR B+28 */
316 COSTS_N_INSNS (1), /* MADBR B */
317 COSTS_N_INSNS (1), /* MAEBR B */
318 COSTS_N_INSNS (131), /* DXBR B+131 */
319 COSTS_N_INSNS (29), /* DDBR */
320 COSTS_N_INSNS (22), /* DEBR */
321 COSTS_N_INSNS (160), /* DLGR cracked */
322 COSTS_N_INSNS (160), /* DLR cracked */
323 COSTS_N_INSNS (160), /* DR expanded */
324 COSTS_N_INSNS (160), /* DSGFR cracked */
325 COSTS_N_INSNS (160), /* DSGR cracked */
328 const struct s390_processor processor_table[] =
330 { "z900", "z900", PROCESSOR_2064_Z900, &z900_cost, 5 },
331 { "z990", "z990", PROCESSOR_2084_Z990, &z990_cost, 6 },
332 { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost, 7 },
333 { "z9-ec", "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost, 7 },
334 { "z10", "z10", PROCESSOR_2097_Z10, &z10_cost, 8 },
335 { "z196", "z196", PROCESSOR_2817_Z196, &z196_cost, 9 },
336 { "zEC12", "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost, 10 },
337 { "z13", "z13", PROCESSOR_2964_Z13, &zEC12_cost, 11 },
338 { "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost, 12 },
339 { "z15", "arch13", PROCESSOR_8561_Z15, &zEC12_cost, 13 },
340 { "arch14", "arch14", PROCESSOR_ARCH14, &zEC12_cost, 14 },
341 { "native", "", PROCESSOR_NATIVE, NULL, 0 }
344 extern int reload_completed;
346 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
347 static rtx_insn *last_scheduled_insn;
348 #define NUM_SIDES 2
350 #define MAX_SCHED_UNITS 4
351 static int last_scheduled_unit_distance[MAX_SCHED_UNITS][NUM_SIDES];
353 /* Estimate of number of cycles a long-running insn occupies an
354 execution unit. */
355 static int fxd_longrunning[NUM_SIDES];
356 static int fpd_longrunning[NUM_SIDES];
358 /* The maximum score added for an instruction whose unit hasn't been
359 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
360 give instruction mix scheduling more priority over instruction
361 grouping. */
362 #define MAX_SCHED_MIX_SCORE 2
364 /* The maximum distance up to which individual scores will be
365 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
366 Increase this with the OOO windows size of the machine. */
367 #define MAX_SCHED_MIX_DISTANCE 70
369 /* Structure used to hold the components of a S/390 memory
370 address. A legitimate address on S/390 is of the general
371 form
372 base + index + displacement
373 where any of the components is optional.
375 base and index are registers of the class ADDR_REGS,
376 displacement is an unsigned 12-bit immediate constant. */
378 /* The max number of insns of backend generated memset/memcpy/memcmp
379 loops. This value is used in the unroll adjust hook to detect such
380 loops. Current max is 9 coming from the memcmp loop. */
381 #define BLOCK_MEM_OPS_LOOP_INSNS 9
383 struct s390_address
385 rtx base;
386 rtx indx;
387 rtx disp;
388 bool pointer;
389 bool literal_pool;
392 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
394 #define cfun_frame_layout (cfun->machine->frame_layout)
395 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
396 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
397 ? cfun_frame_layout.fpr_bitmap & 0x0f \
398 : cfun_frame_layout.fpr_bitmap & 0x03))
399 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
400 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
401 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
402 (1 << (REGNO - FPR0_REGNUM)))
403 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
404 (1 << (REGNO - FPR0_REGNUM))))
405 #define cfun_gpr_save_slot(REGNO) \
406 cfun->machine->frame_layout.gpr_save_slots[REGNO]
408 /* Number of GPRs and FPRs used for argument passing. */
409 #define GP_ARG_NUM_REG 5
410 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
411 #define VEC_ARG_NUM_REG 8
413 /* A couple of shortcuts. */
414 #define CONST_OK_FOR_J(x) \
415 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
416 #define CONST_OK_FOR_K(x) \
417 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
418 #define CONST_OK_FOR_Os(x) \
419 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
420 #define CONST_OK_FOR_Op(x) \
421 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
422 #define CONST_OK_FOR_On(x) \
423 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
425 #define REGNO_PAIR_OK(REGNO, MODE) \
426 (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
428 /* That's the read ahead of the dynamic branch prediction unit in
429 bytes on a z10 (or higher) CPU. */
430 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
432 /* Masks per jump target register indicating which thunk need to be
433 generated. */
434 static GTY(()) int indirect_branch_prez10thunk_mask = 0;
435 static GTY(()) int indirect_branch_z10thunk_mask = 0;
437 #define INDIRECT_BRANCH_NUM_OPTIONS 4
439 enum s390_indirect_branch_option
441 s390_opt_indirect_branch_jump = 0,
442 s390_opt_indirect_branch_call,
443 s390_opt_function_return_reg,
444 s390_opt_function_return_mem
447 static GTY(()) int indirect_branch_table_label_no[INDIRECT_BRANCH_NUM_OPTIONS] = { 0 };
448 const char *indirect_branch_table_label[INDIRECT_BRANCH_NUM_OPTIONS] = \
449 { "LJUMP", "LCALL", "LRETREG", "LRETMEM" };
450 const char *indirect_branch_table_name[INDIRECT_BRANCH_NUM_OPTIONS] = \
451 { ".s390_indirect_jump", ".s390_indirect_call",
452 ".s390_return_reg", ".s390_return_mem" };
454 bool
455 s390_return_addr_from_memory ()
457 return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
460 /* Return nonzero if it's OK to use fused multiply-add for MODE. */
461 bool
462 s390_fma_allowed_p (machine_mode mode)
464 if (TARGET_VXE && mode == TFmode)
465 return flag_vx_long_double_fma;
467 return true;
470 /* Indicate which ABI has been used for passing vector args.
471 0 - no vector type arguments have been passed where the ABI is relevant
472 1 - the old ABI has been used
473 2 - a vector type argument has been passed either in a vector register
474 or on the stack by value */
475 static int s390_vector_abi = 0;
477 /* Set the vector ABI marker if TYPE is subject to the vector ABI
478 switch. The vector ABI affects only vector data types. There are
479 two aspects of the vector ABI relevant here:
481 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
482 ABI and natural alignment with the old.
484 2. vector <= 16 bytes are passed in VRs or by value on the stack
485 with the new ABI but by reference on the stack with the old.
487 If ARG_P is true TYPE is used for a function argument or return
488 value. The ABI marker then is set for all vector data types. If
489 ARG_P is false only type 1 vectors are being checked. */
491 static void
492 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
494 static hash_set<const_tree> visited_types_hash;
496 if (s390_vector_abi)
497 return;
499 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
500 return;
502 if (visited_types_hash.contains (type))
503 return;
505 visited_types_hash.add (type);
507 if (VECTOR_TYPE_P (type))
509 int type_size = int_size_in_bytes (type);
511 /* Outside arguments only the alignment is changing and this
512 only happens for vector types >= 16 bytes. */
513 if (!arg_p && type_size < 16)
514 return;
516 /* In arguments vector types > 16 are passed as before (GCC
517 never enforced the bigger alignment for arguments which was
518 required by the old vector ABI). However, it might still be
519 ABI relevant due to the changed alignment if it is a struct
520 member. */
521 if (arg_p && type_size > 16 && !in_struct_p)
522 return;
524 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
526 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
528 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
529 natural alignment there will never be ABI dependent padding
530 in an array type. That's why we do not set in_struct_p to
531 true here. */
532 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
534 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
536 tree arg_chain;
538 /* Check the return type. */
539 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
541 for (arg_chain = TYPE_ARG_TYPES (type);
542 arg_chain;
543 arg_chain = TREE_CHAIN (arg_chain))
544 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
546 else if (RECORD_OR_UNION_TYPE_P (type))
548 tree field;
550 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
552 if (TREE_CODE (field) != FIELD_DECL)
553 continue;
555 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
561 /* System z builtins. */
563 #include "s390-builtins.h"
565 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
567 #undef B_DEF
568 #undef OB_DEF
569 #undef OB_DEF_VAR
570 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
571 #define OB_DEF(...)
572 #define OB_DEF_VAR(...)
573 #include "s390-builtins.def"
577 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
579 #undef B_DEF
580 #undef OB_DEF
581 #undef OB_DEF_VAR
582 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
583 #define OB_DEF(...)
584 #define OB_DEF_VAR(...)
585 #include "s390-builtins.def"
589 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
591 #undef B_DEF
592 #undef OB_DEF
593 #undef OB_DEF_VAR
594 #define B_DEF(...)
595 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
596 #define OB_DEF_VAR(...)
597 #include "s390-builtins.def"
601 const unsigned int
602 bflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
604 #undef B_DEF
605 #undef OB_DEF
606 #undef OB_DEF_VAR
607 #define B_DEF(...)
608 #define OB_DEF(...)
609 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
610 #include "s390-builtins.def"
614 const unsigned int
615 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
617 #undef B_DEF
618 #undef OB_DEF
619 #undef OB_DEF_VAR
620 #define B_DEF(...)
621 #define OB_DEF(...)
622 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
623 #include "s390-builtins.def"
627 tree s390_builtin_types[BT_MAX];
628 tree s390_builtin_fn_types[BT_FN_MAX];
629 tree s390_builtin_decls[S390_BUILTIN_MAX +
630 S390_OVERLOADED_BUILTIN_MAX +
631 S390_OVERLOADED_BUILTIN_VAR_MAX];
633 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
634 #undef B_DEF
635 #undef OB_DEF
636 #undef OB_DEF_VAR
637 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
638 #define OB_DEF(...)
639 #define OB_DEF_VAR(...)
641 #include "s390-builtins.def"
642 CODE_FOR_nothing
645 static void
646 s390_init_builtins (void)
648 /* These definitions are being used in s390-builtins.def. */
649 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
650 NULL, NULL);
651 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
652 tree c_uint64_type_node;
654 /* The uint64_type_node from tree.c is not compatible to the C99
655 uint64_t data type. What we want is c_uint64_type_node from
656 c-common.c. But since backend code is not supposed to interface
657 with the frontend we recreate it here. */
658 if (TARGET_64BIT)
659 c_uint64_type_node = long_unsigned_type_node;
660 else
661 c_uint64_type_node = long_long_unsigned_type_node;
663 #undef DEF_TYPE
664 #define DEF_TYPE(INDEX, NODE, CONST_P) \
665 if (s390_builtin_types[INDEX] == NULL) \
666 s390_builtin_types[INDEX] = (!CONST_P) ? \
667 (NODE) : build_type_variant ((NODE), 1, 0);
669 #undef DEF_POINTER_TYPE
670 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \
671 if (s390_builtin_types[INDEX] == NULL) \
672 s390_builtin_types[INDEX] = \
673 build_pointer_type (s390_builtin_types[INDEX_BASE]);
675 #undef DEF_DISTINCT_TYPE
676 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \
677 if (s390_builtin_types[INDEX] == NULL) \
678 s390_builtin_types[INDEX] = \
679 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
681 #undef DEF_VECTOR_TYPE
682 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
683 if (s390_builtin_types[INDEX] == NULL) \
684 s390_builtin_types[INDEX] = \
685 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
687 #undef DEF_OPAQUE_VECTOR_TYPE
688 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
689 if (s390_builtin_types[INDEX] == NULL) \
690 s390_builtin_types[INDEX] = \
691 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
693 #undef DEF_FN_TYPE
694 #define DEF_FN_TYPE(INDEX, args...) \
695 if (s390_builtin_fn_types[INDEX] == NULL) \
696 s390_builtin_fn_types[INDEX] = \
697 build_function_type_list (args, NULL_TREE);
698 #undef DEF_OV_TYPE
699 #define DEF_OV_TYPE(...)
700 #include "s390-builtin-types.def"
702 #undef B_DEF
703 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
704 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
705 s390_builtin_decls[S390_BUILTIN_##NAME] = \
706 add_builtin_function ("__builtin_" #NAME, \
707 s390_builtin_fn_types[FNTYPE], \
708 S390_BUILTIN_##NAME, \
709 BUILT_IN_MD, \
710 NULL, \
711 ATTRS);
712 #undef OB_DEF
713 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
714 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
715 == NULL) \
716 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
717 add_builtin_function ("__builtin_" #NAME, \
718 s390_builtin_fn_types[FNTYPE], \
719 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
720 BUILT_IN_MD, \
721 NULL, \
723 #undef OB_DEF_VAR
724 #define OB_DEF_VAR(...)
725 #include "s390-builtins.def"
729 /* Return true if ARG is appropriate as argument number ARGNUM of
730 builtin DECL. The operand flags from s390-builtins.def have to
731 passed as OP_FLAGS. */
732 bool
733 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
735 if (O_UIMM_P (op_flags))
737 unsigned HOST_WIDE_INT bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32, 4 };
738 unsigned HOST_WIDE_INT bitmasks[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 12 };
739 unsigned HOST_WIDE_INT bitwidth = bitwidths[op_flags - O_U1];
740 unsigned HOST_WIDE_INT bitmask = bitmasks[op_flags - O_U1];
742 gcc_assert(ARRAY_SIZE(bitwidths) == (O_M12 - O_U1 + 1));
743 gcc_assert(ARRAY_SIZE(bitmasks) == (O_M12 - O_U1 + 1));
745 if (!tree_fits_uhwi_p (arg)
746 || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1
747 || (bitmask && tree_to_uhwi (arg) & ~bitmask))
749 if (bitmask)
751 gcc_assert (bitmask < 16);
752 char values[120] = "";
754 for (unsigned HOST_WIDE_INT i = 0; i <= bitmask; i++)
756 char buf[5];
757 if (i & ~bitmask)
758 continue;
759 int ret = snprintf (buf, 5, HOST_WIDE_INT_PRINT_UNSIGNED, i & bitmask);
760 gcc_assert (ret < 5);
761 strcat (values, buf);
762 if (i < bitmask)
763 strcat (values, ", ");
765 error ("constant argument %d for builtin %qF is invalid (%s)",
766 argnum, decl, values);
768 else
769 error ("constant argument %d for builtin %qF is out of range (0..%wu)",
770 argnum, decl, (HOST_WIDE_INT_1U << bitwidth) - 1);
772 return false;
776 if (O_SIMM_P (op_flags))
778 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
779 int bitwidth = bitwidths[op_flags - O_S2];
781 if (!tree_fits_shwi_p (arg)
782 || tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1))
783 || tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1))
785 error ("constant argument %d for builtin %qF is out of range "
786 "(%wd..%wd)", argnum, decl,
787 -(HOST_WIDE_INT_1 << (bitwidth - 1)),
788 (HOST_WIDE_INT_1 << (bitwidth - 1)) - 1);
789 return false;
792 return true;
795 /* Expand an expression EXP that calls a built-in function,
796 with result going to TARGET if that's convenient
797 (and in mode MODE if that's convenient).
798 SUBTARGET may be used as the target for computing one of EXP's operands.
799 IGNORE is nonzero if the value is to be ignored. */
801 static rtx
802 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
803 machine_mode mode ATTRIBUTE_UNUSED,
804 int ignore ATTRIBUTE_UNUSED)
806 #define MAX_ARGS 6
808 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
809 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
810 enum insn_code icode;
811 rtx op[MAX_ARGS], pat;
812 int arity;
813 bool nonvoid;
814 tree arg;
815 call_expr_arg_iterator iter;
816 unsigned int all_op_flags = opflags_for_builtin (fcode);
817 machine_mode last_vec_mode = VOIDmode;
819 if (TARGET_DEBUG_ARG)
821 fprintf (stderr,
822 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
823 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
824 bflags_for_builtin (fcode));
827 if (S390_USE_TARGET_ATTRIBUTE)
829 unsigned int bflags;
831 bflags = bflags_for_builtin (fcode);
832 if ((bflags & B_HTM) && !TARGET_HTM)
834 error ("builtin %qF is not supported without %<-mhtm%> "
835 "(default with %<-march=zEC12%> and higher).", fndecl);
836 return const0_rtx;
838 if (((bflags & B_VX) || (bflags & B_VXE)) && !TARGET_VX)
840 error ("builtin %qF requires %<-mvx%> "
841 "(default with %<-march=z13%> and higher).", fndecl);
842 return const0_rtx;
845 if ((bflags & B_VXE) && !TARGET_VXE)
847 error ("Builtin %qF requires z14 or higher.", fndecl);
848 return const0_rtx;
851 if ((bflags & B_VXE2) && !TARGET_VXE2)
853 error ("Builtin %qF requires z15 or higher.", fndecl);
854 return const0_rtx;
857 if ((bflags & B_NNPA) && !TARGET_NNPA)
859 error ("Builtin %qF requires arch14 or higher.", fndecl);
860 return const0_rtx;
863 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
864 && fcode < S390_ALL_BUILTIN_MAX)
866 gcc_unreachable ();
868 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
870 icode = code_for_builtin[fcode];
871 /* Set a flag in the machine specific cfun part in order to support
872 saving/restoring of FPRs. */
873 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
874 cfun->machine->tbegin_p = true;
876 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
878 error ("unresolved overloaded builtin");
879 return const0_rtx;
881 else
882 internal_error ("bad builtin fcode");
884 if (icode == 0)
885 internal_error ("bad builtin icode");
887 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
889 if (nonvoid)
891 machine_mode tmode = insn_data[icode].operand[0].mode;
892 if (!target
893 || GET_MODE (target) != tmode
894 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
895 target = gen_reg_rtx (tmode);
897 /* There are builtins (e.g. vec_promote) with no vector
898 arguments but an element selector. So we have to also look
899 at the vector return type when emitting the modulo
900 operation. */
901 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
902 last_vec_mode = insn_data[icode].operand[0].mode;
905 arity = 0;
906 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
908 rtx tmp_rtx;
909 const struct insn_operand_data *insn_op;
910 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
912 all_op_flags = all_op_flags >> O_SHIFT;
914 if (arg == error_mark_node)
915 return NULL_RTX;
916 if (arity >= MAX_ARGS)
917 return NULL_RTX;
919 if (O_IMM_P (op_flags)
920 && TREE_CODE (arg) != INTEGER_CST)
922 error ("constant value required for builtin %qF argument %d",
923 fndecl, arity + 1);
924 return const0_rtx;
927 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
928 return const0_rtx;
930 insn_op = &insn_data[icode].operand[arity + nonvoid];
931 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
933 /* expand_expr truncates constants to the target mode only if it
934 is "convenient". However, our checks below rely on this
935 being done. */
936 if (CONST_INT_P (op[arity])
937 && SCALAR_INT_MODE_P (insn_op->mode)
938 && GET_MODE (op[arity]) != insn_op->mode)
939 op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
940 insn_op->mode));
942 /* Wrap the expanded RTX for pointer types into a MEM expr with
943 the proper mode. This allows us to use e.g. (match_operand
944 "memory_operand"..) in the insn patterns instead of (mem
945 (match_operand "address_operand)). This is helpful for
946 patterns not just accepting MEMs. */
947 if (POINTER_TYPE_P (TREE_TYPE (arg))
948 && insn_op->predicate != address_operand)
949 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
951 /* Expand the module operation required on element selectors. */
952 if (op_flags == O_ELEM)
954 gcc_assert (last_vec_mode != VOIDmode);
955 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
956 op[arity],
957 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
958 NULL_RTX, 1, OPTAB_DIRECT);
961 /* Record the vector mode used for an element selector. This assumes:
962 1. There is no builtin with two different vector modes and an element selector
963 2. The element selector comes after the vector type it is referring to.
964 This currently the true for all the builtins but FIXME we
965 should better check for that. */
966 if (VECTOR_MODE_P (insn_op->mode))
967 last_vec_mode = insn_op->mode;
969 if (insn_op->predicate (op[arity], insn_op->mode))
971 arity++;
972 continue;
975 /* A memory operand is rejected by the memory_operand predicate.
976 Try making the address legal by copying it into a register. */
977 if (MEM_P (op[arity])
978 && insn_op->predicate == memory_operand
979 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
980 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
982 op[arity] = replace_equiv_address (op[arity],
983 copy_to_mode_reg (Pmode,
984 XEXP (op[arity], 0)));
986 /* Some of the builtins require different modes/types than the
987 pattern in order to implement a specific API. Instead of
988 adding many expanders which do the mode change we do it here.
989 E.g. s390_vec_add_u128 required to have vector unsigned char
990 arguments is mapped to addti3. */
991 else if (insn_op->mode != VOIDmode
992 && GET_MODE (op[arity]) != VOIDmode
993 && GET_MODE (op[arity]) != insn_op->mode
994 && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
995 GET_MODE (op[arity]), 0))
996 != NULL_RTX))
998 op[arity] = tmp_rtx;
1001 /* The predicate rejects the operand although the mode is fine.
1002 Copy the operand to register. */
1003 if (!insn_op->predicate (op[arity], insn_op->mode)
1004 && (GET_MODE (op[arity]) == insn_op->mode
1005 || GET_MODE (op[arity]) == VOIDmode
1006 || (insn_op->predicate == address_operand
1007 && GET_MODE (op[arity]) == Pmode)))
1009 /* An address_operand usually has VOIDmode in the expander
1010 so we cannot use this. */
1011 machine_mode target_mode =
1012 (insn_op->predicate == address_operand
1013 ? (machine_mode) Pmode : insn_op->mode);
1014 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
1017 if (!insn_op->predicate (op[arity], insn_op->mode))
1019 error ("invalid argument %d for builtin %qF", arity + 1, fndecl);
1020 return const0_rtx;
1022 arity++;
1025 switch (arity)
1027 case 0:
1028 pat = GEN_FCN (icode) (target);
1029 break;
1030 case 1:
1031 if (nonvoid)
1032 pat = GEN_FCN (icode) (target, op[0]);
1033 else
1034 pat = GEN_FCN (icode) (op[0]);
1035 break;
1036 case 2:
1037 if (nonvoid)
1038 pat = GEN_FCN (icode) (target, op[0], op[1]);
1039 else
1040 pat = GEN_FCN (icode) (op[0], op[1]);
1041 break;
1042 case 3:
1043 if (nonvoid)
1044 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1045 else
1046 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1047 break;
1048 case 4:
1049 if (nonvoid)
1050 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1051 else
1052 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1053 break;
1054 case 5:
1055 if (nonvoid)
1056 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1057 else
1058 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1059 break;
1060 case 6:
1061 if (nonvoid)
1062 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1063 else
1064 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1065 break;
1066 default:
1067 gcc_unreachable ();
1069 if (!pat)
1070 return NULL_RTX;
1071 emit_insn (pat);
1073 if (nonvoid)
1074 return target;
1075 else
1076 return const0_rtx;
1080 static const int s390_hotpatch_hw_max = 1000000;
1081 static int s390_hotpatch_hw_before_label = 0;
1082 static int s390_hotpatch_hw_after_label = 0;
1084 /* Check whether the hotpatch attribute is applied to a function and, if it has
1085 an argument, the argument is valid. */
1087 static tree
1088 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1089 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1091 tree expr;
1092 tree expr2;
1093 int err;
1095 if (TREE_CODE (*node) != FUNCTION_DECL)
1097 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1098 name);
1099 *no_add_attrs = true;
1101 if (args != NULL && TREE_CHAIN (args) != NULL)
1103 expr = TREE_VALUE (args);
1104 expr2 = TREE_VALUE (TREE_CHAIN (args));
1106 if (args == NULL || TREE_CHAIN (args) == NULL)
1107 err = 1;
1108 else if (TREE_CODE (expr) != INTEGER_CST
1109 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1110 || wi::gtu_p (wi::to_wide (expr), s390_hotpatch_hw_max))
1111 err = 1;
1112 else if (TREE_CODE (expr2) != INTEGER_CST
1113 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1114 || wi::gtu_p (wi::to_wide (expr2), s390_hotpatch_hw_max))
1115 err = 1;
1116 else
1117 err = 0;
1118 if (err)
1120 error ("requested %qE attribute is not a comma separated pair of"
1121 " non-negative integer constants or too large (max. %d)", name,
1122 s390_hotpatch_hw_max);
1123 *no_add_attrs = true;
1126 return NULL_TREE;
1129 /* Expand the s390_vector_bool type attribute. */
1131 static tree
1132 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1133 tree args ATTRIBUTE_UNUSED,
1134 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1136 tree type = *node, result = NULL_TREE;
1137 machine_mode mode;
1139 while (POINTER_TYPE_P (type)
1140 || TREE_CODE (type) == FUNCTION_TYPE
1141 || TREE_CODE (type) == METHOD_TYPE
1142 || TREE_CODE (type) == ARRAY_TYPE)
1143 type = TREE_TYPE (type);
1145 mode = TYPE_MODE (type);
1146 switch (mode)
1148 case E_DImode: case E_V2DImode:
1149 result = s390_builtin_types[BT_BV2DI];
1150 break;
1151 case E_SImode: case E_V4SImode:
1152 result = s390_builtin_types[BT_BV4SI];
1153 break;
1154 case E_HImode: case E_V8HImode:
1155 result = s390_builtin_types[BT_BV8HI];
1156 break;
1157 case E_QImode: case E_V16QImode:
1158 result = s390_builtin_types[BT_BV16QI];
1159 break;
1160 default:
1161 break;
1164 *no_add_attrs = true; /* No need to hang on to the attribute. */
1166 if (result)
1167 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1169 return NULL_TREE;
1172 /* Check syntax of function decl attributes having a string type value. */
1174 static tree
1175 s390_handle_string_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1176 tree args ATTRIBUTE_UNUSED,
1177 int flags ATTRIBUTE_UNUSED,
1178 bool *no_add_attrs)
1180 tree cst;
1182 if (TREE_CODE (*node) != FUNCTION_DECL)
1184 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1185 name);
1186 *no_add_attrs = true;
1189 cst = TREE_VALUE (args);
1191 if (TREE_CODE (cst) != STRING_CST)
1193 warning (OPT_Wattributes,
1194 "%qE attribute requires a string constant argument",
1195 name);
1196 *no_add_attrs = true;
1199 if (is_attribute_p ("indirect_branch", name)
1200 || is_attribute_p ("indirect_branch_call", name)
1201 || is_attribute_p ("function_return", name)
1202 || is_attribute_p ("function_return_reg", name)
1203 || is_attribute_p ("function_return_mem", name))
1205 if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1206 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1207 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1209 warning (OPT_Wattributes,
1210 "argument to %qE attribute is not "
1211 "(keep|thunk|thunk-extern)", name);
1212 *no_add_attrs = true;
1216 if (is_attribute_p ("indirect_branch_jump", name)
1217 && strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1218 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1219 && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
1220 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1222 warning (OPT_Wattributes,
1223 "argument to %qE attribute is not "
1224 "(keep|thunk|thunk-inline|thunk-extern)", name);
1225 *no_add_attrs = true;
1228 return NULL_TREE;
1231 static const struct attribute_spec s390_attribute_table[] = {
1232 { "hotpatch", 2, 2, true, false, false, false,
1233 s390_handle_hotpatch_attribute, NULL },
1234 { "s390_vector_bool", 0, 0, false, true, false, true,
1235 s390_handle_vectorbool_attribute, NULL },
1236 { "indirect_branch", 1, 1, true, false, false, false,
1237 s390_handle_string_attribute, NULL },
1238 { "indirect_branch_jump", 1, 1, true, false, false, false,
1239 s390_handle_string_attribute, NULL },
1240 { "indirect_branch_call", 1, 1, true, false, false, false,
1241 s390_handle_string_attribute, NULL },
1242 { "function_return", 1, 1, true, false, false, false,
1243 s390_handle_string_attribute, NULL },
1244 { "function_return_reg", 1, 1, true, false, false, false,
1245 s390_handle_string_attribute, NULL },
1246 { "function_return_mem", 1, 1, true, false, false, false,
1247 s390_handle_string_attribute, NULL },
1249 /* End element. */
1250 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1253 /* Return the alignment for LABEL. We default to the -falign-labels
1254 value except for the literal pool base label. */
1256 s390_label_align (rtx_insn *label)
1258 rtx_insn *prev_insn = prev_active_insn (label);
1259 rtx set, src;
1261 if (prev_insn == NULL_RTX)
1262 goto old;
1264 set = single_set (prev_insn);
1266 if (set == NULL_RTX)
1267 goto old;
1269 src = SET_SRC (set);
1271 /* Don't align literal pool base labels. */
1272 if (GET_CODE (src) == UNSPEC
1273 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1274 return 0;
1276 old:
1277 return align_labels.levels[0].log;
1280 static GTY(()) rtx got_symbol;
1282 /* Return the GOT table symbol. The symbol will be created when the
1283 function is invoked for the first time. */
1285 static rtx
1286 s390_got_symbol (void)
1288 if (!got_symbol)
1290 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1291 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
1294 return got_symbol;
1297 static scalar_int_mode
1298 s390_libgcc_cmp_return_mode (void)
1300 return TARGET_64BIT ? DImode : SImode;
1303 static scalar_int_mode
1304 s390_libgcc_shift_count_mode (void)
1306 return TARGET_64BIT ? DImode : SImode;
1309 static scalar_int_mode
1310 s390_unwind_word_mode (void)
1312 return TARGET_64BIT ? DImode : SImode;
1315 /* Return true if the back end supports mode MODE. */
1316 static bool
1317 s390_scalar_mode_supported_p (scalar_mode mode)
1319 /* In contrast to the default implementation reject TImode constants on 31bit
1320 TARGET_ZARCH for ABI compliance. */
1321 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1322 return false;
1324 if (DECIMAL_FLOAT_MODE_P (mode))
1325 return default_decimal_float_supported_p ();
1327 return default_scalar_mode_supported_p (mode);
1330 /* Return true if the back end supports vector mode MODE. */
1331 static bool
1332 s390_vector_mode_supported_p (machine_mode mode)
1334 machine_mode inner;
1336 if (!VECTOR_MODE_P (mode)
1337 || !TARGET_VX
1338 || GET_MODE_SIZE (mode) > 16)
1339 return false;
1341 inner = GET_MODE_INNER (mode);
1343 switch (inner)
1345 case E_QImode:
1346 case E_HImode:
1347 case E_SImode:
1348 case E_DImode:
1349 case E_TImode:
1350 case E_SFmode:
1351 case E_DFmode:
1352 case E_TFmode:
1353 return true;
1354 default:
1355 return false;
1359 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1361 void
1362 s390_set_has_landing_pad_p (bool value)
1364 cfun->machine->has_landing_pad_p = value;
1367 /* If two condition code modes are compatible, return a condition code
1368 mode which is compatible with both. Otherwise, return
1369 VOIDmode. */
1371 static machine_mode
1372 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1374 if (m1 == m2)
1375 return m1;
1377 switch (m1)
1379 case E_CCZmode:
1380 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1381 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1382 return m2;
1383 return VOIDmode;
1385 case E_CCSmode:
1386 case E_CCUmode:
1387 case E_CCTmode:
1388 case E_CCSRmode:
1389 case E_CCURmode:
1390 case E_CCZ1mode:
1391 if (m2 == CCZmode)
1392 return m1;
1394 return VOIDmode;
1396 default:
1397 return VOIDmode;
1399 return VOIDmode;
1402 /* Return true if SET either doesn't set the CC register, or else
1403 the source and destination have matching CC modes and that
1404 CC mode is at least as constrained as REQ_MODE. */
1406 static bool
1407 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1409 machine_mode set_mode;
1411 gcc_assert (GET_CODE (set) == SET);
1413 /* These modes are supposed to be used only in CC consumer
1414 patterns. */
1415 gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
1416 && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
1418 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1419 return 1;
1421 set_mode = GET_MODE (SET_DEST (set));
1422 switch (set_mode)
1424 case E_CCZ1mode:
1425 case E_CCSmode:
1426 case E_CCSRmode:
1427 case E_CCSFPSmode:
1428 case E_CCUmode:
1429 case E_CCURmode:
1430 case E_CCOmode:
1431 case E_CCLmode:
1432 case E_CCL1mode:
1433 case E_CCL2mode:
1434 case E_CCL3mode:
1435 case E_CCT1mode:
1436 case E_CCT2mode:
1437 case E_CCT3mode:
1438 case E_CCVEQmode:
1439 case E_CCVIHmode:
1440 case E_CCVIHUmode:
1441 case E_CCVFHmode:
1442 case E_CCVFHEmode:
1443 if (req_mode != set_mode)
1444 return 0;
1445 break;
1447 case E_CCZmode:
1448 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1449 && req_mode != CCSRmode && req_mode != CCURmode
1450 && req_mode != CCZ1mode)
1451 return 0;
1452 break;
1454 case E_CCAPmode:
1455 case E_CCANmode:
1456 if (req_mode != CCAmode)
1457 return 0;
1458 break;
1460 default:
1461 gcc_unreachable ();
1464 return (GET_MODE (SET_SRC (set)) == set_mode);
1467 /* Return true if every SET in INSN that sets the CC register
1468 has source and destination with matching CC modes and that
1469 CC mode is at least as constrained as REQ_MODE.
1470 If REQ_MODE is VOIDmode, always return false. */
1472 bool
1473 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1475 int i;
1477 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1478 if (req_mode == VOIDmode)
1479 return false;
1481 if (GET_CODE (PATTERN (insn)) == SET)
1482 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1484 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1485 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1487 rtx set = XVECEXP (PATTERN (insn), 0, i);
1488 if (GET_CODE (set) == SET)
1489 if (!s390_match_ccmode_set (set, req_mode))
1490 return false;
1493 return true;
1496 /* If a test-under-mask instruction can be used to implement
1497 (compare (and ... OP1) OP2), return the CC mode required
1498 to do that. Otherwise, return VOIDmode.
1499 MIXED is true if the instruction can distinguish between
1500 CC1 and CC2 for mixed selected bits (TMxx), it is false
1501 if the instruction cannot (TM). */
1503 machine_mode
1504 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1506 int bit0, bit1;
1508 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1509 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1510 return VOIDmode;
1512 /* Selected bits all zero: CC0.
1513 e.g.: int a; if ((a & (16 + 128)) == 0) */
1514 if (INTVAL (op2) == 0)
1515 return CCTmode;
1517 /* Selected bits all one: CC3.
1518 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1519 if (INTVAL (op2) == INTVAL (op1))
1520 return CCT3mode;
1522 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1523 int a;
1524 if ((a & (16 + 128)) == 16) -> CCT1
1525 if ((a & (16 + 128)) == 128) -> CCT2 */
1526 if (mixed)
1528 bit1 = exact_log2 (INTVAL (op2));
1529 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1530 if (bit0 != -1 && bit1 != -1)
1531 return bit0 > bit1 ? CCT1mode : CCT2mode;
1534 return VOIDmode;
1537 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1538 OP0 and OP1 of a COMPARE, return the mode to be used for the
1539 comparison. */
1541 machine_mode
1542 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1544 switch (code)
1546 case EQ:
1547 case NE:
1548 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1549 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1550 return CCAPmode;
1551 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1552 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1553 return CCAPmode;
1554 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1555 || GET_CODE (op1) == NEG)
1556 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1557 return CCLmode;
1559 if (GET_CODE (op0) == AND)
1561 /* Check whether we can potentially do it via TM. */
1562 machine_mode ccmode;
1563 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1564 if (ccmode != VOIDmode)
1566 /* Relax CCTmode to CCZmode to allow fall-back to AND
1567 if that turns out to be beneficial. */
1568 return ccmode == CCTmode ? CCZmode : ccmode;
1572 if (register_operand (op0, HImode)
1573 && GET_CODE (op1) == CONST_INT
1574 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1575 return CCT3mode;
1576 if (register_operand (op0, QImode)
1577 && GET_CODE (op1) == CONST_INT
1578 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1579 return CCT3mode;
1581 return CCZmode;
1583 case LE:
1584 case LT:
1585 case GE:
1586 case GT:
1587 /* The only overflow condition of NEG and ABS happens when
1588 -INT_MAX is used as parameter, which stays negative. So
1589 we have an overflow from a positive value to a negative.
1590 Using CCAP mode the resulting cc can be used for comparisons. */
1591 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1592 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1593 return CCAPmode;
1595 /* If constants are involved in an add instruction it is possible to use
1596 the resulting cc for comparisons with zero. Knowing the sign of the
1597 constant the overflow behavior gets predictable. e.g.:
1598 int a, b; if ((b = a + c) > 0)
1599 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1600 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1601 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1602 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1603 /* Avoid INT32_MIN on 32 bit. */
1604 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1606 if (INTVAL (XEXP((op0), 1)) < 0)
1607 return CCANmode;
1608 else
1609 return CCAPmode;
1612 /* Fall through. */
1613 case LTGT:
1614 if (HONOR_NANS (op0) || HONOR_NANS (op1))
1615 return CCSFPSmode;
1617 /* Fall through. */
1618 case UNORDERED:
1619 case ORDERED:
1620 case UNEQ:
1621 case UNLE:
1622 case UNLT:
1623 case UNGE:
1624 case UNGT:
1625 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1626 && GET_CODE (op1) != CONST_INT)
1627 return CCSRmode;
1628 return CCSmode;
1630 case LTU:
1631 case GEU:
1632 if (GET_CODE (op0) == PLUS
1633 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1634 return CCL1mode;
1636 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1637 && GET_CODE (op1) != CONST_INT)
1638 return CCURmode;
1639 return CCUmode;
1641 case LEU:
1642 case GTU:
1643 if (GET_CODE (op0) == MINUS
1644 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1645 return CCL2mode;
1647 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1648 && GET_CODE (op1) != CONST_INT)
1649 return CCURmode;
1650 return CCUmode;
1652 default:
1653 gcc_unreachable ();
1657 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1658 that we can implement more efficiently. */
1660 static void
1661 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1662 bool op0_preserve_value)
1664 if (op0_preserve_value)
1665 return;
1667 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1668 if ((*code == EQ || *code == NE)
1669 && *op1 == const0_rtx
1670 && GET_CODE (*op0) == ZERO_EXTRACT
1671 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1672 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1673 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1675 rtx inner = XEXP (*op0, 0);
1676 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1677 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1678 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1680 if (len > 0 && len < modesize
1681 && pos >= 0 && pos + len <= modesize
1682 && modesize <= HOST_BITS_PER_WIDE_INT)
1684 unsigned HOST_WIDE_INT block;
1685 block = (HOST_WIDE_INT_1U << len) - 1;
1686 block <<= modesize - pos - len;
1688 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1689 gen_int_mode (block, GET_MODE (inner)));
1693 /* Narrow AND of memory against immediate to enable TM. */
1694 if ((*code == EQ || *code == NE)
1695 && *op1 == const0_rtx
1696 && GET_CODE (*op0) == AND
1697 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1698 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1700 rtx inner = XEXP (*op0, 0);
1701 rtx mask = XEXP (*op0, 1);
1703 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1704 if (GET_CODE (inner) == SUBREG
1705 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1706 && (GET_MODE_SIZE (GET_MODE (inner))
1707 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1708 && ((INTVAL (mask)
1709 & GET_MODE_MASK (GET_MODE (inner))
1710 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1711 == 0))
1712 inner = SUBREG_REG (inner);
1714 /* Do not change volatile MEMs. */
1715 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1717 int part = s390_single_part (XEXP (*op0, 1),
1718 GET_MODE (inner), QImode, 0);
1719 if (part >= 0)
1721 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1722 inner = adjust_address_nv (inner, QImode, part);
1723 *op0 = gen_rtx_AND (QImode, inner, mask);
1728 /* Narrow comparisons against 0xffff to HImode if possible. */
1729 if ((*code == EQ || *code == NE)
1730 && GET_CODE (*op1) == CONST_INT
1731 && INTVAL (*op1) == 0xffff
1732 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1733 && (nonzero_bits (*op0, GET_MODE (*op0))
1734 & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1736 *op0 = gen_lowpart (HImode, *op0);
1737 *op1 = constm1_rtx;
1740 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1741 if (GET_CODE (*op0) == UNSPEC
1742 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1743 && XVECLEN (*op0, 0) == 1
1744 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1745 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1746 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1747 && *op1 == const0_rtx)
1749 enum rtx_code new_code = UNKNOWN;
1750 switch (*code)
1752 case EQ: new_code = EQ; break;
1753 case NE: new_code = NE; break;
1754 case LT: new_code = GTU; break;
1755 case GT: new_code = LTU; break;
1756 case LE: new_code = GEU; break;
1757 case GE: new_code = LEU; break;
1758 default: break;
1761 if (new_code != UNKNOWN)
1763 *op0 = XVECEXP (*op0, 0, 0);
1764 *code = new_code;
1768 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1769 if (GET_CODE (*op0) == UNSPEC
1770 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1771 && XVECLEN (*op0, 0) == 1
1772 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1773 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1774 && CONST_INT_P (*op1))
1776 enum rtx_code new_code = UNKNOWN;
1777 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1779 case E_CCZmode:
1780 case E_CCRAWmode:
1781 switch (*code)
1783 case EQ: new_code = EQ; break;
1784 case NE: new_code = NE; break;
1785 default: break;
1787 break;
1788 default: break;
1791 if (new_code != UNKNOWN)
1793 /* For CCRAWmode put the required cc mask into the second
1794 operand. */
1795 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1796 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1797 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1798 *op0 = XVECEXP (*op0, 0, 0);
1799 *code = new_code;
1803 /* Simplify cascaded EQ, NE with const0_rtx. */
1804 if ((*code == NE || *code == EQ)
1805 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1806 && GET_MODE (*op0) == SImode
1807 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1808 && REG_P (XEXP (*op0, 0))
1809 && XEXP (*op0, 1) == const0_rtx
1810 && *op1 == const0_rtx)
1812 if ((*code == EQ && GET_CODE (*op0) == NE)
1813 || (*code == NE && GET_CODE (*op0) == EQ))
1814 *code = EQ;
1815 else
1816 *code = NE;
1817 *op0 = XEXP (*op0, 0);
1820 /* Prefer register over memory as first operand. */
1821 if (MEM_P (*op0) && REG_P (*op1))
1823 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1824 *code = (int)swap_condition ((enum rtx_code)*code);
1827 /* A comparison result is compared against zero. Replace it with
1828 the (perhaps inverted) original comparison.
1829 This probably should be done by simplify_relational_operation. */
1830 if ((*code == EQ || *code == NE)
1831 && *op1 == const0_rtx
1832 && COMPARISON_P (*op0)
1833 && CC_REG_P (XEXP (*op0, 0)))
1835 enum rtx_code new_code;
1837 if (*code == EQ)
1838 new_code = reversed_comparison_code_parts (GET_CODE (*op0),
1839 XEXP (*op0, 0),
1840 XEXP (*op0, 1), NULL);
1841 else
1842 new_code = GET_CODE (*op0);
1844 if (new_code != UNKNOWN)
1846 *code = new_code;
1847 *op1 = XEXP (*op0, 1);
1848 *op0 = XEXP (*op0, 0);
1852 /* ~a==b -> ~(a^b)==0 ~a!=b -> ~(a^b)!=0 */
1853 if (TARGET_Z15
1854 && (*code == EQ || *code == NE)
1855 && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
1856 && GET_CODE (*op0) == NOT)
1858 machine_mode mode = GET_MODE (*op0);
1859 *op0 = gen_rtx_XOR (mode, XEXP (*op0, 0), *op1);
1860 *op0 = gen_rtx_NOT (mode, *op0);
1861 *op1 = const0_rtx;
1864 /* a&b == -1 -> ~a|~b == 0 a|b == -1 -> ~a&~b == 0 */
1865 if (TARGET_Z15
1866 && (*code == EQ || *code == NE)
1867 && (GET_CODE (*op0) == AND || GET_CODE (*op0) == IOR)
1868 && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
1869 && CONST_INT_P (*op1)
1870 && *op1 == constm1_rtx)
1872 machine_mode mode = GET_MODE (*op0);
1873 rtx op00 = gen_rtx_NOT (mode, XEXP (*op0, 0));
1874 rtx op01 = gen_rtx_NOT (mode, XEXP (*op0, 1));
1876 if (GET_CODE (*op0) == AND)
1877 *op0 = gen_rtx_IOR (mode, op00, op01);
1878 else
1879 *op0 = gen_rtx_AND (mode, op00, op01);
1881 *op1 = const0_rtx;
1886 /* Emit a compare instruction suitable to implement the comparison
1887 OP0 CODE OP1. Return the correct condition RTL to be placed in
1888 the IF_THEN_ELSE of the conditional branch testing the result. */
1891 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1893 machine_mode mode = s390_select_ccmode (code, op0, op1);
1894 rtx cc;
1896 /* Force OP1 into register in order to satisfy VXE TFmode patterns. */
1897 if (TARGET_VXE && GET_MODE (op1) == TFmode)
1898 op1 = force_reg (TFmode, op1);
1900 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1902 /* Do not output a redundant compare instruction if a
1903 compare_and_swap pattern already computed the result and the
1904 machine modes are compatible. */
1905 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1906 == GET_MODE (op0));
1907 cc = op0;
1909 else
1911 cc = gen_rtx_REG (mode, CC_REGNUM);
1912 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1915 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1918 /* If MEM is not a legitimate compare-and-swap memory operand, return a new
1919 MEM, whose address is a pseudo containing the original MEM's address. */
1921 static rtx
1922 s390_legitimize_cs_operand (rtx mem)
1924 rtx tmp;
1926 if (!contains_symbol_ref_p (mem))
1927 return mem;
1928 tmp = gen_reg_rtx (Pmode);
1929 emit_move_insn (tmp, copy_rtx (XEXP (mem, 0)));
1930 return change_address (mem, VOIDmode, tmp);
1933 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1934 matches CMP.
1935 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1936 conditional branch testing the result. */
1938 static rtx
1939 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1940 rtx cmp, rtx new_rtx, machine_mode ccmode)
1942 rtx cc;
1944 mem = s390_legitimize_cs_operand (mem);
1945 cc = gen_rtx_REG (ccmode, CC_REGNUM);
1946 switch (GET_MODE (mem))
1948 case E_SImode:
1949 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp,
1950 new_rtx, cc));
1951 break;
1952 case E_DImode:
1953 emit_insn (gen_atomic_compare_and_swapdi_internal (old, mem, cmp,
1954 new_rtx, cc));
1955 break;
1956 case E_TImode:
1957 emit_insn (gen_atomic_compare_and_swapti_internal (old, mem, cmp,
1958 new_rtx, cc));
1959 break;
1960 case E_QImode:
1961 case E_HImode:
1962 default:
1963 gcc_unreachable ();
1965 return s390_emit_compare (code, cc, const0_rtx);
1968 /* Emit a jump instruction to TARGET and return it. If COND is
1969 NULL_RTX, emit an unconditional jump, else a conditional jump under
1970 condition COND. */
1972 rtx_insn *
1973 s390_emit_jump (rtx target, rtx cond)
1975 rtx insn;
1977 target = gen_rtx_LABEL_REF (VOIDmode, target);
1978 if (cond)
1979 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1981 insn = gen_rtx_SET (pc_rtx, target);
1982 return emit_jump_insn (insn);
1985 /* Return branch condition mask to implement a branch
1986 specified by CODE. Return -1 for invalid comparisons. */
1989 s390_branch_condition_mask (rtx code)
1991 const int CC0 = 1 << 3;
1992 const int CC1 = 1 << 2;
1993 const int CC2 = 1 << 1;
1994 const int CC3 = 1 << 0;
1996 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1997 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1998 gcc_assert (XEXP (code, 1) == const0_rtx
1999 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2000 && CONST_INT_P (XEXP (code, 1))));
2003 switch (GET_MODE (XEXP (code, 0)))
2005 case E_CCZmode:
2006 case E_CCZ1mode:
2007 switch (GET_CODE (code))
2009 case EQ: return CC0;
2010 case NE: return CC1 | CC2 | CC3;
2011 default: return -1;
2013 break;
2015 case E_CCT1mode:
2016 switch (GET_CODE (code))
2018 case EQ: return CC1;
2019 case NE: return CC0 | CC2 | CC3;
2020 default: return -1;
2022 break;
2024 case E_CCT2mode:
2025 switch (GET_CODE (code))
2027 case EQ: return CC2;
2028 case NE: return CC0 | CC1 | CC3;
2029 default: return -1;
2031 break;
2033 case E_CCT3mode:
2034 switch (GET_CODE (code))
2036 case EQ: return CC3;
2037 case NE: return CC0 | CC1 | CC2;
2038 default: return -1;
2040 break;
2042 case E_CCLmode:
2043 switch (GET_CODE (code))
2045 case EQ: return CC0 | CC2;
2046 case NE: return CC1 | CC3;
2047 default: return -1;
2049 break;
2051 case E_CCL1mode:
2052 switch (GET_CODE (code))
2054 case LTU: return CC2 | CC3; /* carry */
2055 case GEU: return CC0 | CC1; /* no carry */
2056 default: return -1;
2058 break;
2060 case E_CCL2mode:
2061 switch (GET_CODE (code))
2063 case GTU: return CC0 | CC1; /* borrow */
2064 case LEU: return CC2 | CC3; /* no borrow */
2065 default: return -1;
2067 break;
2069 case E_CCL3mode:
2070 switch (GET_CODE (code))
2072 case EQ: return CC0 | CC2;
2073 case NE: return CC1 | CC3;
2074 case LTU: return CC1;
2075 case GTU: return CC3;
2076 case LEU: return CC1 | CC2;
2077 case GEU: return CC2 | CC3;
2078 default: return -1;
2081 case E_CCUmode:
2082 switch (GET_CODE (code))
2084 case EQ: return CC0;
2085 case NE: return CC1 | CC2 | CC3;
2086 case LTU: return CC1;
2087 case GTU: return CC2;
2088 case LEU: return CC0 | CC1;
2089 case GEU: return CC0 | CC2;
2090 default: return -1;
2092 break;
2094 case E_CCURmode:
2095 switch (GET_CODE (code))
2097 case EQ: return CC0;
2098 case NE: return CC2 | CC1 | CC3;
2099 case LTU: return CC2;
2100 case GTU: return CC1;
2101 case LEU: return CC0 | CC2;
2102 case GEU: return CC0 | CC1;
2103 default: return -1;
2105 break;
2107 case E_CCAPmode:
2108 switch (GET_CODE (code))
2110 case EQ: return CC0;
2111 case NE: return CC1 | CC2 | CC3;
2112 case LT: return CC1 | CC3;
2113 case GT: return CC2;
2114 case LE: return CC0 | CC1 | CC3;
2115 case GE: return CC0 | CC2;
2116 default: return -1;
2118 break;
2120 case E_CCANmode:
2121 switch (GET_CODE (code))
2123 case EQ: return CC0;
2124 case NE: return CC1 | CC2 | CC3;
2125 case LT: return CC1;
2126 case GT: return CC2 | CC3;
2127 case LE: return CC0 | CC1;
2128 case GE: return CC0 | CC2 | CC3;
2129 default: return -1;
2131 break;
2133 case E_CCOmode:
2134 switch (GET_CODE (code))
2136 case EQ: return CC0 | CC1 | CC2;
2137 case NE: return CC3;
2138 default: return -1;
2140 break;
2142 case E_CCSmode:
2143 case E_CCSFPSmode:
2144 switch (GET_CODE (code))
2146 case EQ: return CC0;
2147 case NE: return CC1 | CC2 | CC3;
2148 case LT: return CC1;
2149 case GT: return CC2;
2150 case LE: return CC0 | CC1;
2151 case GE: return CC0 | CC2;
2152 case UNORDERED: return CC3;
2153 case ORDERED: return CC0 | CC1 | CC2;
2154 case UNEQ: return CC0 | CC3;
2155 case UNLT: return CC1 | CC3;
2156 case UNGT: return CC2 | CC3;
2157 case UNLE: return CC0 | CC1 | CC3;
2158 case UNGE: return CC0 | CC2 | CC3;
2159 case LTGT: return CC1 | CC2;
2160 default: return -1;
2162 break;
2164 case E_CCSRmode:
2165 switch (GET_CODE (code))
2167 case EQ: return CC0;
2168 case NE: return CC2 | CC1 | CC3;
2169 case LT: return CC2;
2170 case GT: return CC1;
2171 case LE: return CC0 | CC2;
2172 case GE: return CC0 | CC1;
2173 case UNORDERED: return CC3;
2174 case ORDERED: return CC0 | CC2 | CC1;
2175 case UNEQ: return CC0 | CC3;
2176 case UNLT: return CC2 | CC3;
2177 case UNGT: return CC1 | CC3;
2178 case UNLE: return CC0 | CC2 | CC3;
2179 case UNGE: return CC0 | CC1 | CC3;
2180 case LTGT: return CC2 | CC1;
2181 default: return -1;
2183 break;
2185 /* Vector comparison modes. */
2186 /* CC2 will never be set. It however is part of the negated
2187 masks. */
2188 case E_CCVIALLmode:
2189 switch (GET_CODE (code))
2191 case EQ:
2192 case GTU:
2193 case GT:
2194 case GE: return CC0;
2195 /* The inverted modes are in fact *any* modes. */
2196 case NE:
2197 case LEU:
2198 case LE:
2199 case LT: return CC3 | CC1 | CC2;
2200 default: return -1;
2203 case E_CCVIANYmode:
2204 switch (GET_CODE (code))
2206 case EQ:
2207 case GTU:
2208 case GT:
2209 case GE: return CC0 | CC1;
2210 /* The inverted modes are in fact *all* modes. */
2211 case NE:
2212 case LEU:
2213 case LE:
2214 case LT: return CC3 | CC2;
2215 default: return -1;
2217 case E_CCVFALLmode:
2218 switch (GET_CODE (code))
2220 case EQ:
2221 case GT:
2222 case GE: return CC0;
2223 /* The inverted modes are in fact *any* modes. */
2224 case NE:
2225 case UNLE:
2226 case UNLT: return CC3 | CC1 | CC2;
2227 default: return -1;
2230 case E_CCVFANYmode:
2231 switch (GET_CODE (code))
2233 case EQ:
2234 case GT:
2235 case GE: return CC0 | CC1;
2236 /* The inverted modes are in fact *all* modes. */
2237 case NE:
2238 case UNLE:
2239 case UNLT: return CC3 | CC2;
2240 default: return -1;
2243 case E_CCRAWmode:
2244 switch (GET_CODE (code))
2246 case EQ:
2247 return INTVAL (XEXP (code, 1));
2248 case NE:
2249 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2250 default:
2251 gcc_unreachable ();
2254 default:
2255 return -1;
2260 /* Return branch condition mask to implement a compare and branch
2261 specified by CODE. Return -1 for invalid comparisons. */
2264 s390_compare_and_branch_condition_mask (rtx code)
2266 const int CC0 = 1 << 3;
2267 const int CC1 = 1 << 2;
2268 const int CC2 = 1 << 1;
2270 switch (GET_CODE (code))
2272 case EQ:
2273 return CC0;
2274 case NE:
2275 return CC1 | CC2;
2276 case LT:
2277 case LTU:
2278 return CC1;
2279 case GT:
2280 case GTU:
2281 return CC2;
2282 case LE:
2283 case LEU:
2284 return CC0 | CC1;
2285 case GE:
2286 case GEU:
2287 return CC0 | CC2;
2288 default:
2289 gcc_unreachable ();
2291 return -1;
2294 /* If INV is false, return assembler mnemonic string to implement
2295 a branch specified by CODE. If INV is true, return mnemonic
2296 for the corresponding inverted branch. */
2298 static const char *
2299 s390_branch_condition_mnemonic (rtx code, int inv)
2301 int mask;
2303 static const char *const mnemonic[16] =
2305 NULL, "o", "h", "nle",
2306 "l", "nhe", "lh", "ne",
2307 "e", "nlh", "he", "nl",
2308 "le", "nh", "no", NULL
2311 if (GET_CODE (XEXP (code, 0)) == REG
2312 && REGNO (XEXP (code, 0)) == CC_REGNUM
2313 && (XEXP (code, 1) == const0_rtx
2314 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2315 && CONST_INT_P (XEXP (code, 1)))))
2316 mask = s390_branch_condition_mask (code);
2317 else
2318 mask = s390_compare_and_branch_condition_mask (code);
2320 gcc_assert (mask >= 0);
2322 if (inv)
2323 mask ^= 15;
2325 gcc_assert (mask >= 1 && mask <= 14);
2327 return mnemonic[mask];
2330 /* Return the part of op which has a value different from def.
2331 The size of the part is determined by mode.
2332 Use this function only if you already know that op really
2333 contains such a part. */
2335 unsigned HOST_WIDE_INT
2336 s390_extract_part (rtx op, machine_mode mode, int def)
2338 unsigned HOST_WIDE_INT value = 0;
2339 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2340 int part_bits = GET_MODE_BITSIZE (mode);
2341 unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1;
2342 int i;
2344 for (i = 0; i < max_parts; i++)
2346 if (i == 0)
2347 value = UINTVAL (op);
2348 else
2349 value >>= part_bits;
2351 if ((value & part_mask) != (def & part_mask))
2352 return value & part_mask;
2355 gcc_unreachable ();
2358 /* If OP is an integer constant of mode MODE with exactly one
2359 part of mode PART_MODE unequal to DEF, return the number of that
2360 part. Otherwise, return -1. */
2363 s390_single_part (rtx op,
2364 machine_mode mode,
2365 machine_mode part_mode,
2366 int def)
2368 unsigned HOST_WIDE_INT value = 0;
2369 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2370 unsigned HOST_WIDE_INT part_mask
2371 = (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1;
2372 int i, part = -1;
2374 if (GET_CODE (op) != CONST_INT)
2375 return -1;
2377 for (i = 0; i < n_parts; i++)
2379 if (i == 0)
2380 value = UINTVAL (op);
2381 else
2382 value >>= GET_MODE_BITSIZE (part_mode);
2384 if ((value & part_mask) != (def & part_mask))
2386 if (part != -1)
2387 return -1;
2388 else
2389 part = i;
2392 return part == -1 ? -1 : n_parts - 1 - part;
2395 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2396 bits and no other bits are set in (the lower SIZE bits of) IN.
2398 PSTART and PEND can be used to obtain the start and end
2399 position (inclusive) of the bitfield relative to 64
2400 bits. *PSTART / *PEND gives the position of the first/last bit
2401 of the bitfield counting from the highest order bit starting
2402 with zero. */
2404 bool
2405 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2406 int *pstart, int *pend)
2408 int start;
2409 int end = -1;
2410 int lowbit = HOST_BITS_PER_WIDE_INT - 1;
2411 int highbit = HOST_BITS_PER_WIDE_INT - size;
2412 unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U;
2414 gcc_assert (!!pstart == !!pend);
2415 for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2416 if (end == -1)
2418 /* Look for the rightmost bit of a contiguous range of ones. */
2419 if (bitmask & in)
2420 /* Found it. */
2421 end = start;
2423 else
2425 /* Look for the firt zero bit after the range of ones. */
2426 if (! (bitmask & in))
2427 /* Found it. */
2428 break;
2430 /* We're one past the last one-bit. */
2431 start++;
2433 if (end == -1)
2434 /* No one bits found. */
2435 return false;
2437 if (start > highbit)
2439 unsigned HOST_WIDE_INT mask;
2441 /* Calculate a mask for all bits beyond the contiguous bits. */
2442 mask = ((~HOST_WIDE_INT_0U >> highbit)
2443 & (~HOST_WIDE_INT_0U << (lowbit - start + 1)));
2444 if (mask & in)
2445 /* There are more bits set beyond the first range of one bits. */
2446 return false;
2449 if (pstart)
2451 *pstart = start;
2452 *pend = end;
2455 return true;
2458 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2459 if ~IN contains a contiguous bitfield. In that case, *END is <
2460 *START.
2462 If WRAP_P is true, a bitmask that wraps around is also tested.
2463 When a wraparoud occurs *START is greater than *END (in
2464 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2465 part of the range. If WRAP_P is false, no wraparound is
2466 tested. */
2468 bool
2469 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2470 int size, int *start, int *end)
2472 int bs = HOST_BITS_PER_WIDE_INT;
2473 bool b;
2475 gcc_assert (!!start == !!end);
2476 if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0)
2477 /* This cannot be expressed as a contiguous bitmask. Exit early because
2478 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2479 a valid bitmask. */
2480 return false;
2481 b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2482 if (b)
2483 return true;
2484 if (! wrap_p)
2485 return false;
2486 b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2487 if (b && start)
2489 int s = *start;
2490 int e = *end;
2492 gcc_assert (s >= 1);
2493 *start = ((e + 1) & (bs - 1));
2494 *end = ((s - 1 + bs) & (bs - 1));
2497 return b;
2500 /* Return true if OP contains the same contiguous bitfield in *all*
2501 its elements. START and END can be used to obtain the start and
2502 end position of the bitfield.
2504 START/STOP give the position of the first/last bit of the bitfield
2505 counting from the lowest order bit starting with zero. In order to
2506 use these values for S/390 instructions this has to be converted to
2507 "bits big endian" style. */
2509 bool
2510 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2512 unsigned HOST_WIDE_INT mask;
2513 int size;
2514 rtx elt;
2515 bool b;
2517 /* Handle floats by bitcasting them to ints. */
2518 op = gen_lowpart (related_int_vector_mode (GET_MODE (op)).require (), op);
2520 gcc_assert (!!start == !!end);
2521 if (!const_vec_duplicate_p (op, &elt)
2522 || !CONST_INT_P (elt))
2523 return false;
2525 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2527 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2528 if (size > 64)
2529 return false;
2531 mask = UINTVAL (elt);
2533 b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2534 if (b)
2536 if (start)
2538 *start -= (HOST_BITS_PER_WIDE_INT - size);
2539 *end -= (HOST_BITS_PER_WIDE_INT - size);
2541 return true;
2543 else
2544 return false;
2547 /* Return true if C consists only of byte chunks being either 0 or
2548 0xff. If MASK is !=NULL a byte mask is generated which is
2549 appropriate for the vector generate byte mask instruction. */
2551 bool
2552 s390_bytemask_vector_p (rtx op, unsigned *mask)
2554 int i;
2555 unsigned tmp_mask = 0;
2556 int nunit, unit_size;
2558 if (!VECTOR_MODE_P (GET_MODE (op))
2559 || GET_CODE (op) != CONST_VECTOR
2560 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2561 return false;
2563 nunit = GET_MODE_NUNITS (GET_MODE (op));
2564 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2566 for (i = 0; i < nunit; i++)
2568 unsigned HOST_WIDE_INT c;
2569 int j;
2571 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2572 return false;
2574 c = UINTVAL (XVECEXP (op, 0, i));
2575 for (j = 0; j < unit_size; j++)
2577 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2578 return false;
2579 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2580 c = c >> BITS_PER_UNIT;
2584 if (mask != NULL)
2585 *mask = tmp_mask;
2587 return true;
2590 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2591 equivalent to a shift followed by the AND. In particular, CONTIG
2592 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2593 for ROTL indicate a rotate to the right. */
2595 bool
2596 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2598 int start, end;
2599 bool ok;
2601 ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2602 gcc_assert (ok);
2604 if (rotl >= 0)
2605 return (64 - end >= rotl);
2606 else
2608 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2609 DIMode. */
2610 rotl = -rotl + (64 - bitsize);
2611 return (start >= rotl);
2615 /* Check whether we can (and want to) split a double-word
2616 move in mode MODE from SRC to DST into two single-word
2617 moves, moving the subword FIRST_SUBWORD first. */
2619 bool
2620 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2622 /* Floating point and vector registers cannot be split. */
2623 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2624 return false;
2626 /* Non-offsettable memory references cannot be split. */
2627 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2628 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2629 return false;
2631 /* Moving the first subword must not clobber a register
2632 needed to move the second subword. */
2633 if (register_operand (dst, mode))
2635 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2636 if (reg_overlap_mentioned_p (subreg, src))
2637 return false;
2640 return true;
2643 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2644 and [MEM2, MEM2 + SIZE] do overlap and false
2645 otherwise. */
2647 bool
2648 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2650 rtx addr1, addr2, addr_delta;
2651 HOST_WIDE_INT delta;
2653 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2654 return true;
2656 if (size == 0)
2657 return false;
2659 addr1 = XEXP (mem1, 0);
2660 addr2 = XEXP (mem2, 0);
2662 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2664 /* This overlapping check is used by peepholes merging memory block operations.
2665 Overlapping operations would otherwise be recognized by the S/390 hardware
2666 and would fall back to a slower implementation. Allowing overlapping
2667 operations would lead to slow code but not to wrong code. Therefore we are
2668 somewhat optimistic if we cannot prove that the memory blocks are
2669 overlapping.
2670 That's why we return false here although this may accept operations on
2671 overlapping memory areas. */
2672 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2673 return false;
2675 delta = INTVAL (addr_delta);
2677 if (delta == 0
2678 || (delta > 0 && delta < size)
2679 || (delta < 0 && -delta < size))
2680 return true;
2682 return false;
2685 /* Check whether the address of memory reference MEM2 equals exactly
2686 the address of memory reference MEM1 plus DELTA. Return true if
2687 we can prove this to be the case, false otherwise. */
2689 bool
2690 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2692 rtx addr1, addr2, addr_delta;
2694 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2695 return false;
2697 addr1 = XEXP (mem1, 0);
2698 addr2 = XEXP (mem2, 0);
2700 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2701 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2702 return false;
2704 return true;
2707 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2709 void
2710 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2711 rtx *operands)
2713 machine_mode wmode = mode;
2714 rtx dst = operands[0];
2715 rtx src1 = operands[1];
2716 rtx src2 = operands[2];
2717 rtx op, clob, tem;
2719 /* If we cannot handle the operation directly, use a temp register. */
2720 if (!s390_logical_operator_ok_p (operands))
2721 dst = gen_reg_rtx (mode);
2723 /* QImode and HImode patterns make sense only if we have a destination
2724 in memory. Otherwise perform the operation in SImode. */
2725 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2726 wmode = SImode;
2728 /* Widen operands if required. */
2729 if (mode != wmode)
2731 if (GET_CODE (dst) == SUBREG
2732 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2733 dst = tem;
2734 else if (REG_P (dst))
2735 dst = gen_rtx_SUBREG (wmode, dst, 0);
2736 else
2737 dst = gen_reg_rtx (wmode);
2739 if (GET_CODE (src1) == SUBREG
2740 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2741 src1 = tem;
2742 else if (GET_MODE (src1) != VOIDmode)
2743 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2745 if (GET_CODE (src2) == SUBREG
2746 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2747 src2 = tem;
2748 else if (GET_MODE (src2) != VOIDmode)
2749 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2752 /* Emit the instruction. */
2753 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2754 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2755 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2757 /* Fix up the destination if needed. */
2758 if (dst != operands[0])
2759 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2762 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2764 bool
2765 s390_logical_operator_ok_p (rtx *operands)
2767 /* If the destination operand is in memory, it needs to coincide
2768 with one of the source operands. After reload, it has to be
2769 the first source operand. */
2770 if (GET_CODE (operands[0]) == MEM)
2771 return rtx_equal_p (operands[0], operands[1])
2772 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2774 return true;
2777 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2778 operand IMMOP to switch from SS to SI type instructions. */
2780 void
2781 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2783 int def = code == AND ? -1 : 0;
2784 HOST_WIDE_INT mask;
2785 int part;
2787 gcc_assert (GET_CODE (*memop) == MEM);
2788 gcc_assert (!MEM_VOLATILE_P (*memop));
2790 mask = s390_extract_part (*immop, QImode, def);
2791 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2792 gcc_assert (part >= 0);
2794 *memop = adjust_address (*memop, QImode, part);
2795 *immop = gen_int_mode (mask, QImode);
2799 /* How to allocate a 'struct machine_function'. */
2801 static struct machine_function *
2802 s390_init_machine_status (void)
2804 return ggc_cleared_alloc<machine_function> ();
2807 /* Map for smallest class containing reg regno. */
2809 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2810 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2811 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2812 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2813 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2814 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2815 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2816 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2817 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2818 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2819 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2820 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2821 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2822 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2823 VEC_REGS, VEC_REGS /* 52 */
2826 /* Return attribute type of insn. */
2828 static enum attr_type
2829 s390_safe_attr_type (rtx_insn *insn)
2831 if (recog_memoized (insn) >= 0)
2832 return get_attr_type (insn);
2833 else
2834 return TYPE_NONE;
2837 /* Return attribute relative_long of insn. */
2839 static bool
2840 s390_safe_relative_long_p (rtx_insn *insn)
2842 if (recog_memoized (insn) >= 0)
2843 return get_attr_relative_long (insn) == RELATIVE_LONG_YES;
2844 else
2845 return false;
2848 /* Return true if DISP is a valid short displacement. */
2850 static bool
2851 s390_short_displacement (rtx disp)
2853 /* No displacement is OK. */
2854 if (!disp)
2855 return true;
2857 /* Without the long displacement facility we don't need to
2858 distingiush between long and short displacement. */
2859 if (!TARGET_LONG_DISPLACEMENT)
2860 return true;
2862 /* Integer displacement in range. */
2863 if (GET_CODE (disp) == CONST_INT)
2864 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2866 /* GOT offset is not OK, the GOT can be large. */
2867 if (GET_CODE (disp) == CONST
2868 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2869 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2870 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2871 return false;
2873 /* All other symbolic constants are literal pool references,
2874 which are OK as the literal pool must be small. */
2875 if (GET_CODE (disp) == CONST)
2876 return true;
2878 return false;
2881 /* Attempts to split `ref', which should be UNSPEC_LTREF, into (base + `disp').
2882 If successful, also determines the
2883 following characteristics of `ref': `is_ptr' - whether it can be an
2884 LA argument, `is_base_ptr' - whether the resulting base is a well-known
2885 base register (stack/frame pointer, etc), `is_pool_ptr` - whether it is
2886 considered a literal pool pointer for purposes of avoiding two different
2887 literal pool pointers per insn during or after reload (`B' constraint). */
2888 static bool
2889 s390_decompose_constant_pool_ref (rtx *ref, rtx *disp, bool *is_ptr,
2890 bool *is_base_ptr, bool *is_pool_ptr)
2892 if (!*ref)
2893 return true;
2895 if (GET_CODE (*ref) == UNSPEC)
2896 switch (XINT (*ref, 1))
2898 case UNSPEC_LTREF:
2899 if (!*disp)
2900 *disp = gen_rtx_UNSPEC (Pmode,
2901 gen_rtvec (1, XVECEXP (*ref, 0, 0)),
2902 UNSPEC_LTREL_OFFSET);
2903 else
2904 return false;
2906 *ref = XVECEXP (*ref, 0, 1);
2907 break;
2909 default:
2910 return false;
2913 if (!REG_P (*ref) || GET_MODE (*ref) != Pmode)
2914 return false;
2916 if (REGNO (*ref) == STACK_POINTER_REGNUM
2917 || REGNO (*ref) == FRAME_POINTER_REGNUM
2918 || ((reload_completed || reload_in_progress)
2919 && frame_pointer_needed
2920 && REGNO (*ref) == HARD_FRAME_POINTER_REGNUM)
2921 || REGNO (*ref) == ARG_POINTER_REGNUM
2922 || (flag_pic
2923 && REGNO (*ref) == PIC_OFFSET_TABLE_REGNUM))
2924 *is_ptr = *is_base_ptr = true;
2926 if ((reload_completed || reload_in_progress)
2927 && *ref == cfun->machine->base_reg)
2928 *is_ptr = *is_base_ptr = *is_pool_ptr = true;
2930 return true;
2933 /* Decompose a RTL expression ADDR for a memory address into
2934 its components, returned in OUT.
2936 Returns false if ADDR is not a valid memory address, true
2937 otherwise. If OUT is NULL, don't return the components,
2938 but check for validity only.
2940 Note: Only addresses in canonical form are recognized.
2941 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2942 canonical form so that they will be recognized. */
2944 static int
2945 s390_decompose_address (rtx addr, struct s390_address *out)
2947 HOST_WIDE_INT offset = 0;
2948 rtx base = NULL_RTX;
2949 rtx indx = NULL_RTX;
2950 rtx disp = NULL_RTX;
2951 rtx orig_disp;
2952 bool pointer = false;
2953 bool base_ptr = false;
2954 bool indx_ptr = false;
2955 bool literal_pool = false;
2957 /* We may need to substitute the literal pool base register into the address
2958 below. However, at this point we do not know which register is going to
2959 be used as base, so we substitute the arg pointer register. This is going
2960 to be treated as holding a pointer below -- it shouldn't be used for any
2961 other purpose. */
2962 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2964 /* Decompose address into base + index + displacement. */
2966 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2967 base = addr;
2969 else if (GET_CODE (addr) == PLUS)
2971 rtx op0 = XEXP (addr, 0);
2972 rtx op1 = XEXP (addr, 1);
2973 enum rtx_code code0 = GET_CODE (op0);
2974 enum rtx_code code1 = GET_CODE (op1);
2976 if (code0 == REG || code0 == UNSPEC)
2978 if (code1 == REG || code1 == UNSPEC)
2980 indx = op0; /* index + base */
2981 base = op1;
2984 else
2986 base = op0; /* base + displacement */
2987 disp = op1;
2991 else if (code0 == PLUS)
2993 indx = XEXP (op0, 0); /* index + base + disp */
2994 base = XEXP (op0, 1);
2995 disp = op1;
2998 else
3000 return false;
3004 else
3005 disp = addr; /* displacement */
3007 /* Extract integer part of displacement. */
3008 orig_disp = disp;
3009 if (disp)
3011 if (GET_CODE (disp) == CONST_INT)
3013 offset = INTVAL (disp);
3014 disp = NULL_RTX;
3016 else if (GET_CODE (disp) == CONST
3017 && GET_CODE (XEXP (disp, 0)) == PLUS
3018 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3020 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
3021 disp = XEXP (XEXP (disp, 0), 0);
3025 /* Strip off CONST here to avoid special case tests later. */
3026 if (disp && GET_CODE (disp) == CONST)
3027 disp = XEXP (disp, 0);
3029 /* We can convert literal pool addresses to
3030 displacements by basing them off the base register. */
3031 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
3033 if (base || indx)
3034 return false;
3036 base = fake_pool_base, literal_pool = true;
3038 /* Mark up the displacement. */
3039 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
3040 UNSPEC_LTREL_OFFSET);
3043 /* Validate base register. */
3044 if (!s390_decompose_constant_pool_ref (&base, &disp, &pointer, &base_ptr,
3045 &literal_pool))
3046 return false;
3048 /* Validate index register. */
3049 if (!s390_decompose_constant_pool_ref (&indx, &disp, &pointer, &indx_ptr,
3050 &literal_pool))
3051 return false;
3053 /* Prefer to use pointer as base, not index. */
3054 if (base && indx && !base_ptr
3055 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
3057 rtx tmp = base;
3058 base = indx;
3059 indx = tmp;
3062 /* Validate displacement. */
3063 if (!disp)
3065 /* If virtual registers are involved, the displacement will change later
3066 anyway as the virtual registers get eliminated. This could make a
3067 valid displacement invalid, but it is more likely to make an invalid
3068 displacement valid, because we sometimes access the register save area
3069 via negative offsets to one of those registers.
3070 Thus we don't check the displacement for validity here. If after
3071 elimination the displacement turns out to be invalid after all,
3072 this is fixed up by reload in any case. */
3073 /* LRA maintains always displacements up to date and we need to
3074 know the displacement is right during all LRA not only at the
3075 final elimination. */
3076 if (lra_in_progress
3077 || (base != arg_pointer_rtx
3078 && indx != arg_pointer_rtx
3079 && base != return_address_pointer_rtx
3080 && indx != return_address_pointer_rtx
3081 && base != frame_pointer_rtx
3082 && indx != frame_pointer_rtx
3083 && base != virtual_stack_vars_rtx
3084 && indx != virtual_stack_vars_rtx))
3085 if (!DISP_IN_RANGE (offset))
3086 return false;
3088 else
3090 /* All the special cases are pointers. */
3091 pointer = true;
3093 /* In the small-PIC case, the linker converts @GOT
3094 and @GOTNTPOFF offsets to possible displacements. */
3095 if (GET_CODE (disp) == UNSPEC
3096 && (XINT (disp, 1) == UNSPEC_GOT
3097 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
3098 && flag_pic == 1)
3103 /* Accept pool label offsets. */
3104 else if (GET_CODE (disp) == UNSPEC
3105 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
3108 /* Accept literal pool references. */
3109 else if (GET_CODE (disp) == UNSPEC
3110 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
3112 /* In case CSE pulled a non literal pool reference out of
3113 the pool we have to reject the address. This is
3114 especially important when loading the GOT pointer on non
3115 zarch CPUs. In this case the literal pool contains an lt
3116 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3117 will most likely exceed the displacement. */
3118 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3119 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
3120 return false;
3122 orig_disp = gen_rtx_CONST (Pmode, disp);
3123 if (offset)
3125 /* If we have an offset, make sure it does not
3126 exceed the size of the constant pool entry.
3127 Otherwise we might generate an out-of-range
3128 displacement for the base register form. */
3129 rtx sym = XVECEXP (disp, 0, 0);
3130 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
3131 return false;
3133 orig_disp = plus_constant (Pmode, orig_disp, offset);
3137 else
3138 return false;
3141 if (!base && !indx)
3142 pointer = true;
3144 if (out)
3146 out->base = base;
3147 out->indx = indx;
3148 out->disp = orig_disp;
3149 out->pointer = pointer;
3150 out->literal_pool = literal_pool;
3153 return true;
3156 /* Decompose a RTL expression OP for an address style operand into its
3157 components, and return the base register in BASE and the offset in
3158 OFFSET. While OP looks like an address it is never supposed to be
3159 used as such.
3161 Return true if OP is a valid address operand, false if not. */
3163 bool
3164 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
3165 HOST_WIDE_INT *offset)
3167 rtx off = NULL_RTX;
3169 /* We can have an integer constant, an address register,
3170 or a sum of the two. */
3171 if (CONST_SCALAR_INT_P (op))
3173 off = op;
3174 op = NULL_RTX;
3176 if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3178 off = XEXP (op, 1);
3179 op = XEXP (op, 0);
3181 while (op && GET_CODE (op) == SUBREG)
3182 op = SUBREG_REG (op);
3184 if (op && GET_CODE (op) != REG)
3185 return false;
3187 if (offset)
3189 if (off == NULL_RTX)
3190 *offset = 0;
3191 else if (CONST_INT_P (off))
3192 *offset = INTVAL (off);
3193 else if (CONST_WIDE_INT_P (off))
3194 /* The offset will anyway be cut down to 12 bits so take just
3195 the lowest order chunk of the wide int. */
3196 *offset = CONST_WIDE_INT_ELT (off, 0);
3197 else
3198 gcc_unreachable ();
3200 if (base)
3201 *base = op;
3203 return true;
3206 /* Check that OP is a valid shift count operand.
3207 It should be of the following structure:
3208 (subreg (and (plus (reg imm_op)) 2^k-1) 7)
3209 where subreg, and and plus are optional.
3211 If IMPLICIT_MASK is > 0 and OP contains and
3212 (AND ... immediate)
3213 it is checked whether IMPLICIT_MASK and the immediate match.
3214 Otherwise, no checking is performed.
3216 bool
3217 s390_valid_shift_count (rtx op, HOST_WIDE_INT implicit_mask)
3219 /* Strip subreg. */
3220 while (GET_CODE (op) == SUBREG && subreg_lowpart_p (op))
3221 op = XEXP (op, 0);
3223 /* Check for an and with proper constant. */
3224 if (GET_CODE (op) == AND)
3226 rtx op1 = XEXP (op, 0);
3227 rtx imm = XEXP (op, 1);
3229 if (GET_CODE (op1) == SUBREG && subreg_lowpart_p (op1))
3230 op1 = XEXP (op1, 0);
3232 if (!(register_operand (op1, GET_MODE (op1)) || GET_CODE (op1) == PLUS))
3233 return false;
3235 if (!immediate_operand (imm, GET_MODE (imm)))
3236 return false;
3238 HOST_WIDE_INT val = INTVAL (imm);
3239 if (implicit_mask > 0
3240 && (val & implicit_mask) != implicit_mask)
3241 return false;
3243 op = op1;
3246 /* Check the rest. */
3247 return s390_decompose_addrstyle_without_index (op, NULL, NULL);
3250 /* Return true if CODE is a valid address without index. */
3252 bool
3253 s390_legitimate_address_without_index_p (rtx op)
3255 struct s390_address addr;
3257 if (!s390_decompose_address (XEXP (op, 0), &addr))
3258 return false;
3259 if (addr.indx)
3260 return false;
3262 return true;
3266 /* Return TRUE if ADDR is an operand valid for a load/store relative
3267 instruction. Be aware that the alignment of the operand needs to
3268 be checked separately.
3269 Valid addresses are single references or a sum of a reference and a
3270 constant integer. Return these parts in SYMREF and ADDEND. You can
3271 pass NULL in REF and/or ADDEND if you are not interested in these
3272 values. */
3274 static bool
3275 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3277 HOST_WIDE_INT tmpaddend = 0;
3279 if (GET_CODE (addr) == CONST)
3280 addr = XEXP (addr, 0);
3282 if (GET_CODE (addr) == PLUS)
3284 if (!CONST_INT_P (XEXP (addr, 1)))
3285 return false;
3287 tmpaddend = INTVAL (XEXP (addr, 1));
3288 addr = XEXP (addr, 0);
3291 if (GET_CODE (addr) == SYMBOL_REF
3292 || (GET_CODE (addr) == UNSPEC
3293 && (XINT (addr, 1) == UNSPEC_GOTENT
3294 || XINT (addr, 1) == UNSPEC_PLT31)))
3296 if (symref)
3297 *symref = addr;
3298 if (addend)
3299 *addend = tmpaddend;
3301 return true;
3303 return false;
3306 /* Return true if the address in OP is valid for constraint letter C
3307 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3308 pool MEMs should be accepted. Only the Q, R, S, T constraint
3309 letters are allowed for C. */
3311 static int
3312 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3314 rtx symref;
3315 struct s390_address addr;
3316 bool decomposed = false;
3318 if (!address_operand (op, GET_MODE (op)))
3319 return 0;
3321 /* This check makes sure that no symbolic address (except literal
3322 pool references) are accepted by the R or T constraints. */
3323 if (s390_loadrelative_operand_p (op, &symref, NULL)
3324 && (!lit_pool_ok
3325 || !SYMBOL_REF_P (symref)
3326 || !CONSTANT_POOL_ADDRESS_P (symref)))
3327 return 0;
3329 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3330 if (!lit_pool_ok)
3332 if (!s390_decompose_address (op, &addr))
3333 return 0;
3334 if (addr.literal_pool)
3335 return 0;
3336 decomposed = true;
3339 /* With reload, we sometimes get intermediate address forms that are
3340 actually invalid as-is, but we need to accept them in the most
3341 generic cases below ('R' or 'T'), since reload will in fact fix
3342 them up. LRA behaves differently here; we never see such forms,
3343 but on the other hand, we need to strictly reject every invalid
3344 address form. After both reload and LRA invalid address forms
3345 must be rejected, because nothing will fix them up later. Perform
3346 this check right up front. */
3347 if (lra_in_progress || reload_completed)
3349 if (!decomposed && !s390_decompose_address (op, &addr))
3350 return 0;
3351 decomposed = true;
3354 switch (c)
3356 case 'Q': /* no index short displacement */
3357 if (!decomposed && !s390_decompose_address (op, &addr))
3358 return 0;
3359 if (addr.indx)
3360 return 0;
3361 if (!s390_short_displacement (addr.disp))
3362 return 0;
3363 break;
3365 case 'R': /* with index short displacement */
3366 if (TARGET_LONG_DISPLACEMENT)
3368 if (!decomposed && !s390_decompose_address (op, &addr))
3369 return 0;
3370 if (!s390_short_displacement (addr.disp))
3371 return 0;
3373 /* Any invalid address here will be fixed up by reload,
3374 so accept it for the most generic constraint. */
3375 break;
3377 case 'S': /* no index long displacement */
3378 if (!decomposed && !s390_decompose_address (op, &addr))
3379 return 0;
3380 if (addr.indx)
3381 return 0;
3382 break;
3384 case 'T': /* with index long displacement */
3385 /* Any invalid address here will be fixed up by reload,
3386 so accept it for the most generic constraint. */
3387 break;
3389 default:
3390 return 0;
3392 return 1;
3396 /* Evaluates constraint strings described by the regular expression
3397 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3398 the constraint given in STR, or 0 else. */
3401 s390_mem_constraint (const char *str, rtx op)
3403 char c = str[0];
3405 switch (c)
3407 case 'A':
3408 /* Check for offsettable variants of memory constraints. */
3409 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3410 return 0;
3411 if ((reload_completed || reload_in_progress)
3412 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3413 return 0;
3414 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3415 case 'B':
3416 /* Check for non-literal-pool variants of memory constraints. */
3417 if (!MEM_P (op))
3418 return 0;
3419 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3420 case 'Q':
3421 case 'R':
3422 case 'S':
3423 case 'T':
3424 if (GET_CODE (op) != MEM)
3425 return 0;
3426 return s390_check_qrst_address (c, XEXP (op, 0), true);
3427 case 'Y':
3428 /* Simply check for the basic form of a shift count. Reload will
3429 take care of making sure we have a proper base register. */
3430 if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3431 return 0;
3432 break;
3433 case 'Z':
3434 return s390_check_qrst_address (str[1], op, true);
3435 default:
3436 return 0;
3438 return 1;
3442 /* Evaluates constraint strings starting with letter O. Input
3443 parameter C is the second letter following the "O" in the constraint
3444 string. Returns 1 if VALUE meets the respective constraint and 0
3445 otherwise. */
3448 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3450 if (!TARGET_EXTIMM)
3451 return 0;
3453 switch (c)
3455 case 's':
3456 return trunc_int_for_mode (value, SImode) == value;
3458 case 'p':
3459 return value == 0
3460 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3462 case 'n':
3463 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3465 default:
3466 gcc_unreachable ();
3471 /* Evaluates constraint strings starting with letter N. Parameter STR
3472 contains the letters following letter "N" in the constraint string.
3473 Returns true if VALUE matches the constraint. */
3476 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3478 machine_mode mode, part_mode;
3479 int def;
3480 int part, part_goal;
3483 if (str[0] == 'x')
3484 part_goal = -1;
3485 else
3486 part_goal = str[0] - '0';
3488 switch (str[1])
3490 case 'Q':
3491 part_mode = QImode;
3492 break;
3493 case 'H':
3494 part_mode = HImode;
3495 break;
3496 case 'S':
3497 part_mode = SImode;
3498 break;
3499 default:
3500 return 0;
3503 switch (str[2])
3505 case 'H':
3506 mode = HImode;
3507 break;
3508 case 'S':
3509 mode = SImode;
3510 break;
3511 case 'D':
3512 mode = DImode;
3513 break;
3514 default:
3515 return 0;
3518 switch (str[3])
3520 case '0':
3521 def = 0;
3522 break;
3523 case 'F':
3524 def = -1;
3525 break;
3526 default:
3527 return 0;
3530 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3531 return 0;
3533 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3534 if (part < 0)
3535 return 0;
3536 if (part_goal != -1 && part_goal != part)
3537 return 0;
3539 return 1;
3543 /* Returns true if the input parameter VALUE is a float zero. */
3546 s390_float_const_zero_p (rtx value)
3548 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3549 && value == CONST0_RTX (GET_MODE (value)));
3552 /* Implement TARGET_REGISTER_MOVE_COST. */
3554 static int
3555 s390_register_move_cost (machine_mode mode,
3556 reg_class_t from, reg_class_t to)
3558 /* On s390, copy between fprs and gprs is expensive. */
3560 /* It becomes somewhat faster having ldgr/lgdr. */
3561 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3563 /* ldgr is single cycle. */
3564 if (reg_classes_intersect_p (from, GENERAL_REGS)
3565 && reg_classes_intersect_p (to, FP_REGS))
3566 return 1;
3567 /* lgdr needs 3 cycles. */
3568 if (reg_classes_intersect_p (to, GENERAL_REGS)
3569 && reg_classes_intersect_p (from, FP_REGS))
3570 return 3;
3573 /* Otherwise copying is done via memory. */
3574 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3575 && reg_classes_intersect_p (to, FP_REGS))
3576 || (reg_classes_intersect_p (from, FP_REGS)
3577 && reg_classes_intersect_p (to, GENERAL_REGS)))
3578 return 10;
3580 /* We usually do not want to copy via CC. */
3581 if (reg_classes_intersect_p (from, CC_REGS)
3582 || reg_classes_intersect_p (to, CC_REGS))
3583 return 5;
3585 return 1;
3588 /* Implement TARGET_MEMORY_MOVE_COST. */
3590 static int
3591 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3592 reg_class_t rclass ATTRIBUTE_UNUSED,
3593 bool in ATTRIBUTE_UNUSED)
3595 return 2;
3598 /* Compute a (partial) cost for rtx X. Return true if the complete
3599 cost has been computed, and false if subexpressions should be
3600 scanned. In either case, *TOTAL contains the cost result. The
3601 initial value of *TOTAL is the default value computed by
3602 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3603 code of the superexpression of x. */
3605 static bool
3606 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3607 int opno ATTRIBUTE_UNUSED,
3608 int *total, bool speed ATTRIBUTE_UNUSED)
3610 int code = GET_CODE (x);
3611 switch (code)
3613 case CONST:
3614 case CONST_INT:
3615 case LABEL_REF:
3616 case SYMBOL_REF:
3617 case CONST_DOUBLE:
3618 case CONST_WIDE_INT:
3619 case MEM:
3620 *total = 0;
3621 return true;
3623 case SET:
3625 /* Without this a conditional move instruction would be
3626 accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3627 comparison operator). That's a bit pessimistic. */
3629 if (!TARGET_Z196 || GET_CODE (SET_SRC (x)) != IF_THEN_ELSE)
3630 return false;
3632 rtx cond = XEXP (SET_SRC (x), 0);
3634 if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1)))
3635 return false;
3637 /* It is going to be a load/store on condition. Make it
3638 slightly more expensive than a normal load. */
3639 *total = COSTS_N_INSNS (1) + 1;
3641 rtx dst = SET_DEST (x);
3642 rtx then = XEXP (SET_SRC (x), 1);
3643 rtx els = XEXP (SET_SRC (x), 2);
3645 /* It is a real IF-THEN-ELSE. An additional move will be
3646 needed to implement that. */
3647 if (!TARGET_Z15
3648 && reload_completed
3649 && !rtx_equal_p (dst, then)
3650 && !rtx_equal_p (dst, els))
3651 *total += COSTS_N_INSNS (1) / 2;
3653 /* A minor penalty for constants we cannot directly handle. */
3654 if ((CONST_INT_P (then) || CONST_INT_P (els))
3655 && (!TARGET_Z13 || MEM_P (dst)
3656 || (CONST_INT_P (then) && !satisfies_constraint_K (then))
3657 || (CONST_INT_P (els) && !satisfies_constraint_K (els))))
3658 *total += COSTS_N_INSNS (1) / 2;
3660 /* A store on condition can only handle register src operands. */
3661 if (MEM_P (dst) && (!REG_P (then) || !REG_P (els)))
3662 *total += COSTS_N_INSNS (1) / 2;
3664 return true;
3666 case IOR:
3668 /* nnrk, nngrk */
3669 if (TARGET_Z15
3670 && (mode == SImode || mode == DImode)
3671 && GET_CODE (XEXP (x, 0)) == NOT
3672 && GET_CODE (XEXP (x, 1)) == NOT)
3674 *total = COSTS_N_INSNS (1);
3675 if (!REG_P (XEXP (XEXP (x, 0), 0)))
3676 *total += 1;
3677 if (!REG_P (XEXP (XEXP (x, 1), 0)))
3678 *total += 1;
3679 return true;
3682 /* risbg */
3683 if (GET_CODE (XEXP (x, 0)) == AND
3684 && GET_CODE (XEXP (x, 1)) == ASHIFT
3685 && REG_P (XEXP (XEXP (x, 0), 0))
3686 && REG_P (XEXP (XEXP (x, 1), 0))
3687 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3688 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3689 && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3690 (HOST_WIDE_INT_1U << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3692 *total = COSTS_N_INSNS (2);
3693 return true;
3696 /* ~AND on a 128 bit mode. This can be done using a vector
3697 instruction. */
3698 if (TARGET_VXE
3699 && GET_CODE (XEXP (x, 0)) == NOT
3700 && GET_CODE (XEXP (x, 1)) == NOT
3701 && REG_P (XEXP (XEXP (x, 0), 0))
3702 && REG_P (XEXP (XEXP (x, 1), 0))
3703 && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 0), 0))) == 16
3704 && s390_hard_regno_mode_ok (VR0_REGNUM,
3705 GET_MODE (XEXP (XEXP (x, 0), 0))))
3707 *total = COSTS_N_INSNS (1);
3708 return true;
3711 *total = COSTS_N_INSNS (1);
3712 return false;
3714 case AND:
3715 /* nork, nogrk */
3716 if (TARGET_Z15
3717 && (mode == SImode || mode == DImode)
3718 && GET_CODE (XEXP (x, 0)) == NOT
3719 && GET_CODE (XEXP (x, 1)) == NOT)
3721 *total = COSTS_N_INSNS (1);
3722 if (!REG_P (XEXP (XEXP (x, 0), 0)))
3723 *total += 1;
3724 if (!REG_P (XEXP (XEXP (x, 1), 0)))
3725 *total += 1;
3726 return true;
3728 /* fallthrough */
3729 case ASHIFT:
3730 case ASHIFTRT:
3731 case LSHIFTRT:
3732 case ROTATE:
3733 case ROTATERT:
3734 case XOR:
3735 case NEG:
3736 case NOT:
3737 case PLUS:
3738 case MINUS:
3739 *total = COSTS_N_INSNS (1);
3740 return false;
3742 case MULT:
3743 switch (mode)
3745 case E_SImode:
3747 rtx left = XEXP (x, 0);
3748 rtx right = XEXP (x, 1);
3749 if (GET_CODE (right) == CONST_INT
3750 && CONST_OK_FOR_K (INTVAL (right)))
3751 *total = s390_cost->mhi;
3752 else if (GET_CODE (left) == SIGN_EXTEND)
3753 *total = s390_cost->mh;
3754 else
3755 *total = s390_cost->ms; /* msr, ms, msy */
3756 break;
3758 case E_DImode:
3760 rtx left = XEXP (x, 0);
3761 rtx right = XEXP (x, 1);
3762 if (TARGET_ZARCH)
3764 if (GET_CODE (right) == CONST_INT
3765 && CONST_OK_FOR_K (INTVAL (right)))
3766 *total = s390_cost->mghi;
3767 else if (GET_CODE (left) == SIGN_EXTEND)
3768 *total = s390_cost->msgf;
3769 else
3770 *total = s390_cost->msg; /* msgr, msg */
3772 else /* TARGET_31BIT */
3774 if (GET_CODE (left) == SIGN_EXTEND
3775 && GET_CODE (right) == SIGN_EXTEND)
3776 /* mulsidi case: mr, m */
3777 *total = s390_cost->m;
3778 else if (GET_CODE (left) == ZERO_EXTEND
3779 && GET_CODE (right) == ZERO_EXTEND)
3780 /* umulsidi case: ml, mlr */
3781 *total = s390_cost->ml;
3782 else
3783 /* Complex calculation is required. */
3784 *total = COSTS_N_INSNS (40);
3786 break;
3788 case E_SFmode:
3789 case E_DFmode:
3790 *total = s390_cost->mult_df;
3791 break;
3792 case E_TFmode:
3793 *total = s390_cost->mxbr;
3794 break;
3795 default:
3796 return false;
3798 return false;
3800 case FMA:
3801 switch (mode)
3803 case E_DFmode:
3804 *total = s390_cost->madbr;
3805 break;
3806 case E_SFmode:
3807 *total = s390_cost->maebr;
3808 break;
3809 default:
3810 return false;
3812 /* Negate in the third argument is free: FMSUB. */
3813 if (GET_CODE (XEXP (x, 2)) == NEG)
3815 *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3816 + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3817 + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3818 return true;
3820 return false;
3822 case UDIV:
3823 case UMOD:
3824 if (mode == TImode) /* 128 bit division */
3825 *total = s390_cost->dlgr;
3826 else if (mode == DImode)
3828 rtx right = XEXP (x, 1);
3829 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3830 *total = s390_cost->dlr;
3831 else /* 64 by 64 bit division */
3832 *total = s390_cost->dlgr;
3834 else if (mode == SImode) /* 32 bit division */
3835 *total = s390_cost->dlr;
3836 return false;
3838 case DIV:
3839 case MOD:
3840 if (mode == DImode)
3842 rtx right = XEXP (x, 1);
3843 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3844 if (TARGET_ZARCH)
3845 *total = s390_cost->dsgfr;
3846 else
3847 *total = s390_cost->dr;
3848 else /* 64 by 64 bit division */
3849 *total = s390_cost->dsgr;
3851 else if (mode == SImode) /* 32 bit division */
3852 *total = s390_cost->dlr;
3853 else if (mode == SFmode)
3855 *total = s390_cost->debr;
3857 else if (mode == DFmode)
3859 *total = s390_cost->ddbr;
3861 else if (mode == TFmode)
3863 *total = s390_cost->dxbr;
3865 return false;
3867 case SQRT:
3868 if (mode == SFmode)
3869 *total = s390_cost->sqebr;
3870 else if (mode == DFmode)
3871 *total = s390_cost->sqdbr;
3872 else /* TFmode */
3873 *total = s390_cost->sqxbr;
3874 return false;
3876 case SIGN_EXTEND:
3877 case ZERO_EXTEND:
3878 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3879 || outer_code == PLUS || outer_code == MINUS
3880 || outer_code == COMPARE)
3881 *total = 0;
3882 return false;
3884 case COMPARE:
3885 *total = COSTS_N_INSNS (1);
3887 /* nxrk, nxgrk ~(a^b)==0 */
3888 if (TARGET_Z15
3889 && GET_CODE (XEXP (x, 0)) == NOT
3890 && XEXP (x, 1) == const0_rtx
3891 && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
3892 && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode)
3893 && mode == CCZmode)
3895 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0)))
3896 *total += 1;
3897 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3898 *total += 1;
3899 return true;
3902 /* nnrk, nngrk, nork, nogrk */
3903 if (TARGET_Z15
3904 && (GET_CODE (XEXP (x, 0)) == AND || GET_CODE (XEXP (x, 0)) == IOR)
3905 && XEXP (x, 1) == const0_rtx
3906 && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode)
3907 && GET_CODE (XEXP (XEXP (x, 0), 0)) == NOT
3908 && GET_CODE (XEXP (XEXP (x, 0), 1)) == NOT
3909 && mode == CCZmode)
3911 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0)))
3912 *total += 1;
3913 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 1), 0)))
3914 *total += 1;
3915 return true;
3918 if (GET_CODE (XEXP (x, 0)) == AND
3919 && GET_CODE (XEXP (x, 1)) == CONST_INT
3920 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3922 rtx op0 = XEXP (XEXP (x, 0), 0);
3923 rtx op1 = XEXP (XEXP (x, 0), 1);
3924 rtx op2 = XEXP (x, 1);
3926 if (memory_operand (op0, GET_MODE (op0))
3927 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3928 return true;
3929 if (register_operand (op0, GET_MODE (op0))
3930 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3931 return true;
3933 return false;
3935 default:
3936 return false;
3940 /* Return the cost of an address rtx ADDR. */
3942 static int
3943 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3944 addr_space_t as ATTRIBUTE_UNUSED,
3945 bool speed ATTRIBUTE_UNUSED)
3947 struct s390_address ad;
3948 if (!s390_decompose_address (addr, &ad))
3949 return 1000;
3951 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3954 /* Implement targetm.vectorize.builtin_vectorization_cost. */
3955 static int
3956 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3957 tree vectype,
3958 int misalign ATTRIBUTE_UNUSED)
3960 switch (type_of_cost)
3962 case scalar_stmt:
3963 case scalar_load:
3964 case scalar_store:
3965 case vector_stmt:
3966 case vector_load:
3967 case vector_store:
3968 case vector_gather_load:
3969 case vector_scatter_store:
3970 case vec_to_scalar:
3971 case scalar_to_vec:
3972 case cond_branch_not_taken:
3973 case vec_perm:
3974 case vec_promote_demote:
3975 case unaligned_load:
3976 case unaligned_store:
3977 return 1;
3979 case cond_branch_taken:
3980 return 3;
3982 case vec_construct:
3983 return TYPE_VECTOR_SUBPARTS (vectype) - 1;
3985 default:
3986 gcc_unreachable ();
3990 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3991 otherwise return 0. */
3994 tls_symbolic_operand (rtx op)
3996 if (GET_CODE (op) != SYMBOL_REF)
3997 return 0;
3998 return SYMBOL_REF_TLS_MODEL (op);
4001 /* Split DImode access register reference REG (on 64-bit) into its constituent
4002 low and high parts, and store them into LO and HI. Note that gen_lowpart/
4003 gen_highpart cannot be used as they assume all registers are word-sized,
4004 while our access registers have only half that size. */
4006 void
4007 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
4009 gcc_assert (TARGET_64BIT);
4010 gcc_assert (ACCESS_REG_P (reg));
4011 gcc_assert (GET_MODE (reg) == DImode);
4012 gcc_assert (!(REGNO (reg) & 1));
4014 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
4015 *hi = gen_rtx_REG (SImode, REGNO (reg));
4018 /* Return true if OP contains a symbol reference */
4020 bool
4021 symbolic_reference_mentioned_p (rtx op)
4023 const char *fmt;
4024 int i;
4026 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4027 return 1;
4029 fmt = GET_RTX_FORMAT (GET_CODE (op));
4030 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4032 if (fmt[i] == 'E')
4034 int j;
4036 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4037 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4038 return 1;
4041 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4042 return 1;
4045 return 0;
4048 /* Return true if OP contains a reference to a thread-local symbol. */
4050 bool
4051 tls_symbolic_reference_mentioned_p (rtx op)
4053 const char *fmt;
4054 int i;
4056 if (GET_CODE (op) == SYMBOL_REF)
4057 return tls_symbolic_operand (op);
4059 fmt = GET_RTX_FORMAT (GET_CODE (op));
4060 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4062 if (fmt[i] == 'E')
4064 int j;
4066 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4067 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4068 return true;
4071 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
4072 return true;
4075 return false;
4079 /* Return true if OP is a legitimate general operand when
4080 generating PIC code. It is given that flag_pic is on
4081 and that OP satisfies CONSTANT_P. */
4084 legitimate_pic_operand_p (rtx op)
4086 /* Accept all non-symbolic constants. */
4087 if (!SYMBOLIC_CONST (op))
4088 return 1;
4090 /* Accept addresses that can be expressed relative to (pc). */
4091 if (larl_operand (op, VOIDmode))
4092 return 1;
4094 /* Reject everything else; must be handled
4095 via emit_symbolic_move. */
4096 return 0;
4099 /* Returns true if the constant value OP is a legitimate general operand.
4100 It is given that OP satisfies CONSTANT_P. */
4102 static bool
4103 s390_legitimate_constant_p (machine_mode mode, rtx op)
4105 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
4107 if (GET_MODE_SIZE (mode) != 16)
4108 return 0;
4110 if (!satisfies_constraint_j00 (op)
4111 && !satisfies_constraint_jm1 (op)
4112 && !satisfies_constraint_jKK (op)
4113 && !satisfies_constraint_jxx (op)
4114 && !satisfies_constraint_jyy (op))
4115 return 0;
4118 /* Accept all non-symbolic constants. */
4119 if (!SYMBOLIC_CONST (op))
4120 return 1;
4122 /* Accept immediate LARL operands. */
4123 if (larl_operand (op, mode))
4124 return 1;
4126 /* Thread-local symbols are never legal constants. This is
4127 so that emit_call knows that computing such addresses
4128 might require a function call. */
4129 if (TLS_SYMBOLIC_CONST (op))
4130 return 0;
4132 /* In the PIC case, symbolic constants must *not* be
4133 forced into the literal pool. We accept them here,
4134 so that they will be handled by emit_symbolic_move. */
4135 if (flag_pic)
4136 return 1;
4138 /* All remaining non-PIC symbolic constants are
4139 forced into the literal pool. */
4140 return 0;
4143 /* Determine if it's legal to put X into the constant pool. This
4144 is not possible if X contains the address of a symbol that is
4145 not constant (TLS) or not known at final link time (PIC). */
4147 static bool
4148 s390_cannot_force_const_mem (machine_mode mode, rtx x)
4150 switch (GET_CODE (x))
4152 case CONST_INT:
4153 case CONST_DOUBLE:
4154 case CONST_WIDE_INT:
4155 case CONST_VECTOR:
4156 /* Accept all non-symbolic constants. */
4157 return false;
4159 case NEG:
4160 /* Accept an unary '-' only on scalar numeric constants. */
4161 switch (GET_CODE (XEXP (x, 0)))
4163 case CONST_INT:
4164 case CONST_DOUBLE:
4165 case CONST_WIDE_INT:
4166 return false;
4167 default:
4168 return true;
4171 case LABEL_REF:
4172 /* Labels are OK iff we are non-PIC. */
4173 return flag_pic != 0;
4175 case SYMBOL_REF:
4176 /* 'Naked' TLS symbol references are never OK,
4177 non-TLS symbols are OK iff we are non-PIC. */
4178 if (tls_symbolic_operand (x))
4179 return true;
4180 else
4181 return flag_pic != 0;
4183 case CONST:
4184 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
4185 case PLUS:
4186 case MINUS:
4187 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
4188 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
4190 case UNSPEC:
4191 switch (XINT (x, 1))
4193 /* Only lt-relative or GOT-relative UNSPECs are OK. */
4194 case UNSPEC_LTREL_OFFSET:
4195 case UNSPEC_GOT:
4196 case UNSPEC_GOTOFF:
4197 case UNSPEC_PLTOFF:
4198 case UNSPEC_TLSGD:
4199 case UNSPEC_TLSLDM:
4200 case UNSPEC_NTPOFF:
4201 case UNSPEC_DTPOFF:
4202 case UNSPEC_GOTNTPOFF:
4203 case UNSPEC_INDNTPOFF:
4204 return false;
4206 /* If the literal pool shares the code section, be put
4207 execute template placeholders into the pool as well. */
4208 case UNSPEC_INSN:
4209 default:
4210 return true;
4212 break;
4214 default:
4215 gcc_unreachable ();
4219 /* Returns true if the constant value OP is a legitimate general
4220 operand during and after reload. The difference to
4221 legitimate_constant_p is that this function will not accept
4222 a constant that would need to be forced to the literal pool
4223 before it can be used as operand.
4224 This function accepts all constants which can be loaded directly
4225 into a GPR. */
4227 bool
4228 legitimate_reload_constant_p (rtx op)
4230 /* Accept la(y) operands. */
4231 if (GET_CODE (op) == CONST_INT
4232 && DISP_IN_RANGE (INTVAL (op)))
4233 return true;
4235 /* Accept l(g)hi/l(g)fi operands. */
4236 if (GET_CODE (op) == CONST_INT
4237 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
4238 return true;
4240 /* Accept lliXX operands. */
4241 if (TARGET_ZARCH
4242 && GET_CODE (op) == CONST_INT
4243 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4244 && s390_single_part (op, word_mode, HImode, 0) >= 0)
4245 return true;
4247 if (TARGET_EXTIMM
4248 && GET_CODE (op) == CONST_INT
4249 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4250 && s390_single_part (op, word_mode, SImode, 0) >= 0)
4251 return true;
4253 /* Accept larl operands. */
4254 if (larl_operand (op, VOIDmode))
4255 return true;
4257 /* Accept floating-point zero operands that fit into a single GPR. */
4258 if (GET_CODE (op) == CONST_DOUBLE
4259 && s390_float_const_zero_p (op)
4260 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
4261 return true;
4263 /* Accept double-word operands that can be split. */
4264 if (GET_CODE (op) == CONST_WIDE_INT
4265 || (GET_CODE (op) == CONST_INT
4266 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
4268 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
4269 rtx hi = operand_subword (op, 0, 0, dword_mode);
4270 rtx lo = operand_subword (op, 1, 0, dword_mode);
4271 return legitimate_reload_constant_p (hi)
4272 && legitimate_reload_constant_p (lo);
4275 /* Everything else cannot be handled without reload. */
4276 return false;
4279 /* Returns true if the constant value OP is a legitimate fp operand
4280 during and after reload.
4281 This function accepts all constants which can be loaded directly
4282 into an FPR. */
4284 static bool
4285 legitimate_reload_fp_constant_p (rtx op)
4287 /* Accept floating-point zero operands if the load zero instruction
4288 can be used. Prior to z196 the load fp zero instruction caused a
4289 performance penalty if the result is used as BFP number. */
4290 if (TARGET_Z196
4291 && GET_CODE (op) == CONST_DOUBLE
4292 && s390_float_const_zero_p (op))
4293 return true;
4295 return false;
4298 /* Returns true if the constant value OP is a legitimate vector operand
4299 during and after reload.
4300 This function accepts all constants which can be loaded directly
4301 into an VR. */
4303 static bool
4304 legitimate_reload_vector_constant_p (rtx op)
4306 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
4307 && (satisfies_constraint_j00 (op)
4308 || satisfies_constraint_jm1 (op)
4309 || satisfies_constraint_jKK (op)
4310 || satisfies_constraint_jxx (op)
4311 || satisfies_constraint_jyy (op)))
4312 return true;
4314 return false;
4317 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4318 return the class of reg to actually use. */
4320 static reg_class_t
4321 s390_preferred_reload_class (rtx op, reg_class_t rclass)
4323 switch (GET_CODE (op))
4325 /* Constants we cannot reload into general registers
4326 must be forced into the literal pool. */
4327 case CONST_VECTOR:
4328 case CONST_DOUBLE:
4329 case CONST_INT:
4330 case CONST_WIDE_INT:
4331 if (reg_class_subset_p (GENERAL_REGS, rclass)
4332 && legitimate_reload_constant_p (op))
4333 return GENERAL_REGS;
4334 else if (reg_class_subset_p (ADDR_REGS, rclass)
4335 && legitimate_reload_constant_p (op))
4336 return ADDR_REGS;
4337 else if (reg_class_subset_p (FP_REGS, rclass)
4338 && legitimate_reload_fp_constant_p (op))
4339 return FP_REGS;
4340 else if (reg_class_subset_p (VEC_REGS, rclass)
4341 && legitimate_reload_vector_constant_p (op))
4342 return VEC_REGS;
4344 return NO_REGS;
4346 /* If a symbolic constant or a PLUS is reloaded,
4347 it is most likely being used as an address, so
4348 prefer ADDR_REGS. If 'class' is not a superset
4349 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4350 case CONST:
4351 /* Symrefs cannot be pushed into the literal pool with -fPIC
4352 so we *MUST NOT* return NO_REGS for these cases
4353 (s390_cannot_force_const_mem will return true).
4355 On the other hand we MUST return NO_REGS for symrefs with
4356 invalid addend which might have been pushed to the literal
4357 pool (no -fPIC). Usually we would expect them to be
4358 handled via secondary reload but this does not happen if
4359 they are used as literal pool slot replacement in reload
4360 inheritance (see emit_input_reload_insns). */
4361 if (GET_CODE (XEXP (op, 0)) == PLUS
4362 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4363 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4365 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4366 return ADDR_REGS;
4367 else
4368 return NO_REGS;
4370 /* fallthrough */
4371 case LABEL_REF:
4372 case SYMBOL_REF:
4373 if (!legitimate_reload_constant_p (op))
4374 return NO_REGS;
4375 /* fallthrough */
4376 case PLUS:
4377 /* load address will be used. */
4378 if (reg_class_subset_p (ADDR_REGS, rclass))
4379 return ADDR_REGS;
4380 else
4381 return NO_REGS;
4383 default:
4384 break;
4387 return rclass;
4390 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4391 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4392 aligned. */
4394 bool
4395 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4397 HOST_WIDE_INT addend;
4398 rtx symref;
4400 /* The "required alignment" might be 0 (e.g. for certain structs
4401 accessed via BLKmode). Early abort in this case, as well as when
4402 an alignment > 8 is required. */
4403 if (alignment < 2 || alignment > 8)
4404 return false;
4406 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4407 return false;
4409 if (addend & (alignment - 1))
4410 return false;
4412 if (GET_CODE (symref) == SYMBOL_REF)
4414 /* s390_encode_section_info is not called for anchors, since they don't
4415 have corresponding VAR_DECLs. Therefore, we cannot rely on
4416 SYMBOL_FLAG_NOTALIGN{2,4,8}_P returning useful information. */
4417 if (SYMBOL_REF_ANCHOR_P (symref))
4419 HOST_WIDE_INT block_offset = SYMBOL_REF_BLOCK_OFFSET (symref);
4420 unsigned int block_alignment = (SYMBOL_REF_BLOCK (symref)->alignment
4421 / BITS_PER_UNIT);
4423 gcc_assert (block_offset >= 0);
4424 return ((block_offset & (alignment - 1)) == 0
4425 && block_alignment >= alignment);
4428 /* We have load-relative instructions for 2-byte, 4-byte, and
4429 8-byte alignment so allow only these. */
4430 switch (alignment)
4432 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4433 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4434 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4435 default: return false;
4439 if (GET_CODE (symref) == UNSPEC
4440 && alignment <= UNITS_PER_LONG)
4441 return true;
4443 return false;
4446 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4447 operand SCRATCH is used to reload the even part of the address and
4448 adding one. */
4450 void
4451 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4453 HOST_WIDE_INT addend;
4454 rtx symref;
4456 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4457 gcc_unreachable ();
4459 if (!(addend & 1))
4460 /* Easy case. The addend is even so larl will do fine. */
4461 emit_move_insn (reg, addr);
4462 else
4464 /* We can leave the scratch register untouched if the target
4465 register is a valid base register. */
4466 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4467 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4468 scratch = reg;
4470 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4471 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4473 if (addend != 1)
4474 emit_move_insn (scratch,
4475 gen_rtx_CONST (Pmode,
4476 gen_rtx_PLUS (Pmode, symref,
4477 GEN_INT (addend - 1))));
4478 else
4479 emit_move_insn (scratch, symref);
4481 /* Increment the address using la in order to avoid clobbering cc. */
4482 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4486 /* Generate what is necessary to move between REG and MEM using
4487 SCRATCH. The direction is given by TOMEM. */
4489 void
4490 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4492 /* Reload might have pulled a constant out of the literal pool.
4493 Force it back in. */
4494 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4495 || GET_CODE (mem) == CONST_WIDE_INT
4496 || GET_CODE (mem) == CONST_VECTOR
4497 || GET_CODE (mem) == CONST)
4498 mem = force_const_mem (GET_MODE (reg), mem);
4500 gcc_assert (MEM_P (mem));
4502 /* For a load from memory we can leave the scratch register
4503 untouched if the target register is a valid base register. */
4504 if (!tomem
4505 && REGNO (reg) < FIRST_PSEUDO_REGISTER
4506 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4507 && GET_MODE (reg) == GET_MODE (scratch))
4508 scratch = reg;
4510 /* Load address into scratch register. Since we can't have a
4511 secondary reload for a secondary reload we have to cover the case
4512 where larl would need a secondary reload here as well. */
4513 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4515 /* Now we can use a standard load/store to do the move. */
4516 if (tomem)
4517 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4518 else
4519 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4522 /* Inform reload about cases where moving X with a mode MODE to a register in
4523 RCLASS requires an extra scratch or immediate register. Return the class
4524 needed for the immediate register. */
4526 static reg_class_t
4527 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4528 machine_mode mode, secondary_reload_info *sri)
4530 enum reg_class rclass = (enum reg_class) rclass_i;
4532 /* Intermediate register needed. */
4533 if (reg_classes_intersect_p (CC_REGS, rclass))
4534 return GENERAL_REGS;
4536 if (TARGET_VX)
4538 /* The vst/vl vector move instructions allow only for short
4539 displacements. */
4540 if (MEM_P (x)
4541 && GET_CODE (XEXP (x, 0)) == PLUS
4542 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4543 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4544 && reg_class_subset_p (rclass, VEC_REGS)
4545 && (!reg_class_subset_p (rclass, FP_REGS)
4546 || (GET_MODE_SIZE (mode) > 8
4547 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4549 if (in_p)
4550 sri->icode = (TARGET_64BIT ?
4551 CODE_FOR_reloaddi_la_in :
4552 CODE_FOR_reloadsi_la_in);
4553 else
4554 sri->icode = (TARGET_64BIT ?
4555 CODE_FOR_reloaddi_la_out :
4556 CODE_FOR_reloadsi_la_out);
4560 if (TARGET_Z10)
4562 HOST_WIDE_INT offset;
4563 rtx symref;
4565 /* On z10 several optimizer steps may generate larl operands with
4566 an odd addend. */
4567 if (in_p
4568 && s390_loadrelative_operand_p (x, &symref, &offset)
4569 && mode == Pmode
4570 && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4571 && (offset & 1) == 1)
4572 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4573 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4575 /* Handle all the (mem (symref)) accesses we cannot use the z10
4576 instructions for. */
4577 if (MEM_P (x)
4578 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4579 && (mode == QImode
4580 || !reg_class_subset_p (rclass, GENERAL_REGS)
4581 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4582 || !s390_check_symref_alignment (XEXP (x, 0),
4583 GET_MODE_SIZE (mode))))
4585 #define __SECONDARY_RELOAD_CASE(M,m) \
4586 case E_##M##mode: \
4587 if (TARGET_64BIT) \
4588 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4589 CODE_FOR_reload##m##di_tomem_z10; \
4590 else \
4591 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4592 CODE_FOR_reload##m##si_tomem_z10; \
4593 break;
4595 switch (GET_MODE (x))
4597 __SECONDARY_RELOAD_CASE (QI, qi);
4598 __SECONDARY_RELOAD_CASE (HI, hi);
4599 __SECONDARY_RELOAD_CASE (SI, si);
4600 __SECONDARY_RELOAD_CASE (DI, di);
4601 __SECONDARY_RELOAD_CASE (TI, ti);
4602 __SECONDARY_RELOAD_CASE (SF, sf);
4603 __SECONDARY_RELOAD_CASE (DF, df);
4604 __SECONDARY_RELOAD_CASE (TF, tf);
4605 __SECONDARY_RELOAD_CASE (SD, sd);
4606 __SECONDARY_RELOAD_CASE (DD, dd);
4607 __SECONDARY_RELOAD_CASE (TD, td);
4608 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4609 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4610 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4611 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4612 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4613 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4614 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4615 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4616 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4617 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4618 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4619 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4620 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4621 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4622 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4623 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4624 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4625 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4626 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4627 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4628 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4629 default:
4630 gcc_unreachable ();
4632 #undef __SECONDARY_RELOAD_CASE
4636 /* We need a scratch register when loading a PLUS expression which
4637 is not a legitimate operand of the LOAD ADDRESS instruction. */
4638 /* LRA can deal with transformation of plus op very well -- so we
4639 don't need to prompt LRA in this case. */
4640 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4641 sri->icode = (TARGET_64BIT ?
4642 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4644 /* Performing a multiword move from or to memory we have to make sure the
4645 second chunk in memory is addressable without causing a displacement
4646 overflow. If that would be the case we calculate the address in
4647 a scratch register. */
4648 if (MEM_P (x)
4649 && GET_CODE (XEXP (x, 0)) == PLUS
4650 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4651 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4652 + GET_MODE_SIZE (mode) - 1))
4654 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4655 in a s_operand address since we may fallback to lm/stm. So we only
4656 have to care about overflows in the b+i+d case. */
4657 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4658 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4659 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4660 /* For FP_REGS no lm/stm is available so this check is triggered
4661 for displacement overflows in b+i+d and b+d like addresses. */
4662 || (reg_classes_intersect_p (FP_REGS, rclass)
4663 && s390_class_max_nregs (FP_REGS, mode) > 1))
4665 if (in_p)
4666 sri->icode = (TARGET_64BIT ?
4667 CODE_FOR_reloaddi_la_in :
4668 CODE_FOR_reloadsi_la_in);
4669 else
4670 sri->icode = (TARGET_64BIT ?
4671 CODE_FOR_reloaddi_la_out :
4672 CODE_FOR_reloadsi_la_out);
4676 /* A scratch address register is needed when a symbolic constant is
4677 copied to r0 compiling with -fPIC. In other cases the target
4678 register might be used as temporary (see legitimize_pic_address). */
4679 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4680 sri->icode = (TARGET_64BIT ?
4681 CODE_FOR_reloaddi_PIC_addr :
4682 CODE_FOR_reloadsi_PIC_addr);
4684 /* Either scratch or no register needed. */
4685 return NO_REGS;
4688 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
4690 We need secondary memory to move data between GPRs and FPRs.
4692 - With DFP the ldgr lgdr instructions are available. Due to the
4693 different alignment we cannot use them for SFmode. For 31 bit a
4694 64 bit value in GPR would be a register pair so here we still
4695 need to go via memory.
4697 - With z13 we can do the SF/SImode moves with vlgvf. Due to the
4698 overlapping of FPRs and VRs we still disallow TF/TD modes to be
4699 in full VRs so as before also on z13 we do these moves via
4700 memory.
4702 FIXME: Should we try splitting it into two vlgvg's/vlvg's instead? */
4704 static bool
4705 s390_secondary_memory_needed (machine_mode mode,
4706 reg_class_t class1, reg_class_t class2)
4708 return (((reg_classes_intersect_p (class1, VEC_REGS)
4709 && reg_classes_intersect_p (class2, GENERAL_REGS))
4710 || (reg_classes_intersect_p (class1, GENERAL_REGS)
4711 && reg_classes_intersect_p (class2, VEC_REGS)))
4712 && (TARGET_TPF || !TARGET_DFP || !TARGET_64BIT
4713 || GET_MODE_SIZE (mode) != 8)
4714 && (!TARGET_VX || (SCALAR_FLOAT_MODE_P (mode)
4715 && GET_MODE_SIZE (mode) > 8)));
4718 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
4720 get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
4721 because the movsi and movsf patterns don't handle r/f moves. */
4723 static machine_mode
4724 s390_secondary_memory_needed_mode (machine_mode mode)
4726 if (GET_MODE_BITSIZE (mode) < 32)
4727 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
4728 return mode;
4731 /* Generate code to load SRC, which is PLUS that is not a
4732 legitimate operand for the LA instruction, into TARGET.
4733 SCRATCH may be used as scratch register. */
4735 void
4736 s390_expand_plus_operand (rtx target, rtx src,
4737 rtx scratch)
4739 rtx sum1, sum2;
4740 struct s390_address ad;
4742 /* src must be a PLUS; get its two operands. */
4743 gcc_assert (GET_CODE (src) == PLUS);
4744 gcc_assert (GET_MODE (src) == Pmode);
4746 /* Check if any of the two operands is already scheduled
4747 for replacement by reload. This can happen e.g. when
4748 float registers occur in an address. */
4749 sum1 = find_replacement (&XEXP (src, 0));
4750 sum2 = find_replacement (&XEXP (src, 1));
4751 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4753 /* If the address is already strictly valid, there's nothing to do. */
4754 if (!s390_decompose_address (src, &ad)
4755 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4756 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4758 /* Otherwise, one of the operands cannot be an address register;
4759 we reload its value into the scratch register. */
4760 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4762 emit_move_insn (scratch, sum1);
4763 sum1 = scratch;
4765 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4767 emit_move_insn (scratch, sum2);
4768 sum2 = scratch;
4771 /* According to the way these invalid addresses are generated
4772 in reload.c, it should never happen (at least on s390) that
4773 *neither* of the PLUS components, after find_replacements
4774 was applied, is an address register. */
4775 if (sum1 == scratch && sum2 == scratch)
4777 debug_rtx (src);
4778 gcc_unreachable ();
4781 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4784 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4785 is only ever performed on addresses, so we can mark the
4786 sum as legitimate for LA in any case. */
4787 s390_load_address (target, src);
4791 /* Return true if ADDR is a valid memory address.
4792 STRICT specifies whether strict register checking applies. */
4794 static bool
4795 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4797 struct s390_address ad;
4799 if (TARGET_Z10
4800 && larl_operand (addr, VOIDmode)
4801 && (mode == VOIDmode
4802 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4803 return true;
4805 if (!s390_decompose_address (addr, &ad))
4806 return false;
4808 /* The vector memory instructions only support short displacements.
4809 Reject invalid displacements early to prevent plenty of lay
4810 instructions to be generated later which then cannot be merged
4811 properly. */
4812 if (TARGET_VX
4813 && VECTOR_MODE_P (mode)
4814 && ad.disp != NULL_RTX
4815 && CONST_INT_P (ad.disp)
4816 && !SHORT_DISP_IN_RANGE (INTVAL (ad.disp)))
4817 return false;
4819 if (strict)
4821 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4822 return false;
4824 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4825 return false;
4827 else
4829 if (ad.base
4830 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4831 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4832 return false;
4834 if (ad.indx
4835 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4836 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4837 return false;
4839 return true;
4842 /* Return true if OP is a valid operand for the LA instruction.
4843 In 31-bit, we need to prove that the result is used as an
4844 address, as LA performs only a 31-bit addition. */
4846 bool
4847 legitimate_la_operand_p (rtx op)
4849 struct s390_address addr;
4850 if (!s390_decompose_address (op, &addr))
4851 return false;
4853 return (TARGET_64BIT || addr.pointer);
4856 /* Return true if it is valid *and* preferable to use LA to
4857 compute the sum of OP1 and OP2. */
4859 bool
4860 preferred_la_operand_p (rtx op1, rtx op2)
4862 struct s390_address addr;
4864 if (op2 != const0_rtx)
4865 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4867 if (!s390_decompose_address (op1, &addr))
4868 return false;
4869 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4870 return false;
4871 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4872 return false;
4874 /* Avoid LA instructions with index (and base) register on z196 or
4875 later; it is preferable to use regular add instructions when
4876 possible. Starting with zEC12 the la with index register is
4877 "uncracked" again but still slower than a regular add. */
4878 if (addr.indx && s390_tune >= PROCESSOR_2817_Z196)
4879 return false;
4881 if (!TARGET_64BIT && !addr.pointer)
4882 return false;
4884 if (addr.pointer)
4885 return true;
4887 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4888 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4889 return true;
4891 return false;
4894 /* Emit a forced load-address operation to load SRC into DST.
4895 This will use the LOAD ADDRESS instruction even in situations
4896 where legitimate_la_operand_p (SRC) returns false. */
4898 void
4899 s390_load_address (rtx dst, rtx src)
4901 if (TARGET_64BIT)
4902 emit_move_insn (dst, src);
4903 else
4904 emit_insn (gen_force_la_31 (dst, src));
4907 /* Return true if it ok to use SYMBOL_REF in a relative address. */
4909 bool
4910 s390_rel_address_ok_p (rtx symbol_ref)
4912 tree decl;
4914 if (symbol_ref == s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref))
4915 return true;
4917 decl = SYMBOL_REF_DECL (symbol_ref);
4919 if (!flag_pic || SYMBOL_REF_LOCAL_P (symbol_ref))
4920 return (s390_pic_data_is_text_relative
4921 || (decl
4922 && TREE_CODE (decl) == FUNCTION_DECL));
4924 return false;
4927 /* Return a legitimate reference for ORIG (an address) using the
4928 register REG. If REG is 0, a new pseudo is generated.
4930 There are two types of references that must be handled:
4932 1. Global data references must load the address from the GOT, via
4933 the PIC reg. An insn is emitted to do this load, and the reg is
4934 returned.
4936 2. Static data references, constant pool addresses, and code labels
4937 compute the address as an offset from the GOT, whose base is in
4938 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4939 differentiate them from global data objects. The returned
4940 address is the PIC reg + an unspec constant.
4942 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4943 reg also appears in the address. */
4946 legitimize_pic_address (rtx orig, rtx reg)
4948 rtx addr = orig;
4949 rtx addend = const0_rtx;
4950 rtx new_rtx = orig;
4952 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4954 if (GET_CODE (addr) == CONST)
4955 addr = XEXP (addr, 0);
4957 if (GET_CODE (addr) == PLUS)
4959 addend = XEXP (addr, 1);
4960 addr = XEXP (addr, 0);
4963 if ((GET_CODE (addr) == LABEL_REF
4964 || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (addr))
4965 || (GET_CODE (addr) == UNSPEC &&
4966 (XINT (addr, 1) == UNSPEC_GOTENT
4967 || XINT (addr, 1) == UNSPEC_PLT31)))
4968 && GET_CODE (addend) == CONST_INT)
4970 /* This can be locally addressed. */
4972 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4973 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4974 gen_rtx_CONST (Pmode, addr) : addr);
4976 if (larl_operand (const_addr, VOIDmode)
4977 && INTVAL (addend) < HOST_WIDE_INT_1 << 31
4978 && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31))
4980 if (INTVAL (addend) & 1)
4982 /* LARL can't handle odd offsets, so emit a pair of LARL
4983 and LA. */
4984 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4986 if (!DISP_IN_RANGE (INTVAL (addend)))
4988 HOST_WIDE_INT even = INTVAL (addend) - 1;
4989 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4990 addr = gen_rtx_CONST (Pmode, addr);
4991 addend = const1_rtx;
4994 emit_move_insn (temp, addr);
4995 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4997 if (reg != 0)
4999 s390_load_address (reg, new_rtx);
5000 new_rtx = reg;
5003 else
5005 /* If the offset is even, we can just use LARL. This
5006 will happen automatically. */
5009 else
5011 /* No larl - Access local symbols relative to the GOT. */
5013 rtx temp = reg? reg : gen_reg_rtx (Pmode);
5015 if (reload_in_progress || reload_completed)
5016 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5018 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5019 if (addend != const0_rtx)
5020 addr = gen_rtx_PLUS (Pmode, addr, addend);
5021 addr = gen_rtx_CONST (Pmode, addr);
5022 addr = force_const_mem (Pmode, addr);
5023 emit_move_insn (temp, addr);
5025 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
5026 if (reg != 0)
5028 s390_load_address (reg, new_rtx);
5029 new_rtx = reg;
5033 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
5035 /* A non-local symbol reference without addend.
5037 The symbol ref is wrapped into an UNSPEC to make sure the
5038 proper operand modifier (@GOT or @GOTENT) will be emitted.
5039 This will tell the linker to put the symbol into the GOT.
5041 Additionally the code dereferencing the GOT slot is emitted here.
5043 An addend to the symref needs to be added afterwards.
5044 legitimize_pic_address calls itself recursively to handle
5045 that case. So no need to do it here. */
5047 if (reg == 0)
5048 reg = gen_reg_rtx (Pmode);
5050 if (TARGET_Z10)
5052 /* Use load relative if possible.
5053 lgrl <target>, sym@GOTENT */
5054 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
5055 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5056 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
5058 emit_move_insn (reg, new_rtx);
5059 new_rtx = reg;
5061 else if (flag_pic == 1)
5063 /* Assume GOT offset is a valid displacement operand (< 4k
5064 or < 512k with z990). This is handled the same way in
5065 both 31- and 64-bit code (@GOT).
5066 lg <target>, sym@GOT(r12) */
5068 if (reload_in_progress || reload_completed)
5069 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5071 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5072 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5073 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5074 new_rtx = gen_const_mem (Pmode, new_rtx);
5075 emit_move_insn (reg, new_rtx);
5076 new_rtx = reg;
5078 else
5080 /* If the GOT offset might be >= 4k, we determine the position
5081 of the GOT entry via a PC-relative LARL (@GOTENT).
5082 larl temp, sym@GOTENT
5083 lg <target>, 0(temp) */
5085 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
5087 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
5088 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
5090 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
5091 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5092 emit_move_insn (temp, new_rtx);
5093 new_rtx = gen_const_mem (Pmode, temp);
5094 emit_move_insn (reg, new_rtx);
5096 new_rtx = reg;
5099 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
5101 gcc_assert (XVECLEN (addr, 0) == 1);
5102 switch (XINT (addr, 1))
5104 /* These address symbols (or PLT slots) relative to the GOT
5105 (not GOT slots!). In general this will exceed the
5106 displacement range so these value belong into the literal
5107 pool. */
5108 case UNSPEC_GOTOFF:
5109 case UNSPEC_PLTOFF:
5110 new_rtx = force_const_mem (Pmode, orig);
5111 break;
5113 /* For -fPIC the GOT size might exceed the displacement
5114 range so make sure the value is in the literal pool. */
5115 case UNSPEC_GOT:
5116 if (flag_pic == 2)
5117 new_rtx = force_const_mem (Pmode, orig);
5118 break;
5120 /* For @GOTENT larl is used. This is handled like local
5121 symbol refs. */
5122 case UNSPEC_GOTENT:
5123 gcc_unreachable ();
5124 break;
5126 /* For @PLT larl is used. This is handled like local
5127 symbol refs. */
5128 case UNSPEC_PLT31:
5129 gcc_unreachable ();
5130 break;
5132 /* Everything else cannot happen. */
5133 default:
5134 gcc_unreachable ();
5137 else if (addend != const0_rtx)
5139 /* Otherwise, compute the sum. */
5141 rtx base = legitimize_pic_address (addr, reg);
5142 new_rtx = legitimize_pic_address (addend,
5143 base == reg ? NULL_RTX : reg);
5144 if (GET_CODE (new_rtx) == CONST_INT)
5145 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
5146 else
5148 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
5150 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
5151 new_rtx = XEXP (new_rtx, 1);
5153 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
5156 if (GET_CODE (new_rtx) == CONST)
5157 new_rtx = XEXP (new_rtx, 0);
5158 new_rtx = force_operand (new_rtx, 0);
5161 return new_rtx;
5164 /* Load the thread pointer into a register. */
5167 s390_get_thread_pointer (void)
5169 rtx tp = gen_reg_rtx (Pmode);
5171 emit_insn (gen_get_thread_pointer (Pmode, tp));
5173 mark_reg_pointer (tp, BITS_PER_WORD);
5175 return tp;
5178 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
5179 in s390_tls_symbol which always refers to __tls_get_offset.
5180 The returned offset is written to RESULT_REG and an USE rtx is
5181 generated for TLS_CALL. */
5183 static GTY(()) rtx s390_tls_symbol;
5185 static void
5186 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
5188 rtx insn;
5190 if (!flag_pic)
5191 emit_insn (s390_load_got ());
5193 if (!s390_tls_symbol)
5195 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
5196 SYMBOL_REF_FLAGS (s390_tls_symbol) |= SYMBOL_FLAG_FUNCTION;
5199 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
5200 gen_rtx_REG (Pmode, RETURN_REGNUM));
5202 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
5203 RTL_CONST_CALL_P (insn) = 1;
5206 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
5207 this (thread-local) address. REG may be used as temporary. */
5209 static rtx
5210 legitimize_tls_address (rtx addr, rtx reg)
5212 rtx new_rtx, tls_call, temp, base, r2;
5213 rtx_insn *insn;
5215 if (GET_CODE (addr) == SYMBOL_REF)
5216 switch (tls_symbolic_operand (addr))
5218 case TLS_MODEL_GLOBAL_DYNAMIC:
5219 start_sequence ();
5220 r2 = gen_rtx_REG (Pmode, 2);
5221 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
5222 new_rtx = gen_rtx_CONST (Pmode, tls_call);
5223 new_rtx = force_const_mem (Pmode, new_rtx);
5224 emit_move_insn (r2, new_rtx);
5225 s390_emit_tls_call_insn (r2, tls_call);
5226 insn = get_insns ();
5227 end_sequence ();
5229 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5230 temp = gen_reg_rtx (Pmode);
5231 emit_libcall_block (insn, temp, r2, new_rtx);
5233 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5234 if (reg != 0)
5236 s390_load_address (reg, new_rtx);
5237 new_rtx = reg;
5239 break;
5241 case TLS_MODEL_LOCAL_DYNAMIC:
5242 start_sequence ();
5243 r2 = gen_rtx_REG (Pmode, 2);
5244 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
5245 new_rtx = gen_rtx_CONST (Pmode, tls_call);
5246 new_rtx = force_const_mem (Pmode, new_rtx);
5247 emit_move_insn (r2, new_rtx);
5248 s390_emit_tls_call_insn (r2, tls_call);
5249 insn = get_insns ();
5250 end_sequence ();
5252 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
5253 temp = gen_reg_rtx (Pmode);
5254 emit_libcall_block (insn, temp, r2, new_rtx);
5256 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5257 base = gen_reg_rtx (Pmode);
5258 s390_load_address (base, new_rtx);
5260 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
5261 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5262 new_rtx = force_const_mem (Pmode, new_rtx);
5263 temp = gen_reg_rtx (Pmode);
5264 emit_move_insn (temp, new_rtx);
5266 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
5267 if (reg != 0)
5269 s390_load_address (reg, new_rtx);
5270 new_rtx = reg;
5272 break;
5274 case TLS_MODEL_INITIAL_EXEC:
5275 if (flag_pic == 1)
5277 /* Assume GOT offset < 4k. This is handled the same way
5278 in both 31- and 64-bit code. */
5280 if (reload_in_progress || reload_completed)
5281 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5283 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5284 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5285 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5286 new_rtx = gen_const_mem (Pmode, new_rtx);
5287 temp = gen_reg_rtx (Pmode);
5288 emit_move_insn (temp, new_rtx);
5290 else
5292 /* If the GOT offset might be >= 4k, we determine the position
5293 of the GOT entry via a PC-relative LARL. */
5295 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5296 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5297 temp = gen_reg_rtx (Pmode);
5298 emit_move_insn (temp, new_rtx);
5300 new_rtx = gen_const_mem (Pmode, temp);
5301 temp = gen_reg_rtx (Pmode);
5302 emit_move_insn (temp, new_rtx);
5305 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5306 if (reg != 0)
5308 s390_load_address (reg, new_rtx);
5309 new_rtx = reg;
5311 break;
5313 case TLS_MODEL_LOCAL_EXEC:
5314 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5315 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5316 new_rtx = force_const_mem (Pmode, new_rtx);
5317 temp = gen_reg_rtx (Pmode);
5318 emit_move_insn (temp, new_rtx);
5320 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5321 if (reg != 0)
5323 s390_load_address (reg, new_rtx);
5324 new_rtx = reg;
5326 break;
5328 default:
5329 gcc_unreachable ();
5332 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
5334 switch (XINT (XEXP (addr, 0), 1))
5336 case UNSPEC_NTPOFF:
5337 case UNSPEC_INDNTPOFF:
5338 new_rtx = addr;
5339 break;
5341 default:
5342 gcc_unreachable ();
5346 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5347 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5349 new_rtx = XEXP (XEXP (addr, 0), 0);
5350 if (GET_CODE (new_rtx) != SYMBOL_REF)
5351 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5353 new_rtx = legitimize_tls_address (new_rtx, reg);
5354 new_rtx = plus_constant (Pmode, new_rtx,
5355 INTVAL (XEXP (XEXP (addr, 0), 1)));
5356 new_rtx = force_operand (new_rtx, 0);
5359 /* (const (neg (unspec (symbol_ref)))) -> (neg (const (unspec (symbol_ref)))) */
5360 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == NEG)
5362 new_rtx = XEXP (XEXP (addr, 0), 0);
5363 if (GET_CODE (new_rtx) != SYMBOL_REF)
5364 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5366 new_rtx = legitimize_tls_address (new_rtx, reg);
5367 new_rtx = gen_rtx_NEG (Pmode, new_rtx);
5368 new_rtx = force_operand (new_rtx, 0);
5371 else
5372 gcc_unreachable (); /* for now ... */
5374 return new_rtx;
5377 /* Emit insns making the address in operands[1] valid for a standard
5378 move to operands[0]. operands[1] is replaced by an address which
5379 should be used instead of the former RTX to emit the move
5380 pattern. */
5382 void
5383 emit_symbolic_move (rtx *operands)
5385 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5387 if (GET_CODE (operands[0]) == MEM)
5388 operands[1] = force_reg (Pmode, operands[1]);
5389 else if (TLS_SYMBOLIC_CONST (operands[1]))
5390 operands[1] = legitimize_tls_address (operands[1], temp);
5391 else if (flag_pic)
5392 operands[1] = legitimize_pic_address (operands[1], temp);
5395 /* Try machine-dependent ways of modifying an illegitimate address X
5396 to be legitimate. If we find one, return the new, valid address.
5398 OLDX is the address as it was before break_out_memory_refs was called.
5399 In some cases it is useful to look at this to decide what needs to be done.
5401 MODE is the mode of the operand pointed to by X.
5403 When -fpic is used, special handling is needed for symbolic references.
5404 See comments by legitimize_pic_address for details. */
5406 static rtx
5407 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5408 machine_mode mode ATTRIBUTE_UNUSED)
5410 rtx constant_term = const0_rtx;
5412 if (TLS_SYMBOLIC_CONST (x))
5414 x = legitimize_tls_address (x, 0);
5416 if (s390_legitimate_address_p (mode, x, FALSE))
5417 return x;
5419 else if (GET_CODE (x) == PLUS
5420 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5421 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5423 return x;
5425 else if (flag_pic)
5427 if (SYMBOLIC_CONST (x)
5428 || (GET_CODE (x) == PLUS
5429 && (SYMBOLIC_CONST (XEXP (x, 0))
5430 || SYMBOLIC_CONST (XEXP (x, 1)))))
5431 x = legitimize_pic_address (x, 0);
5433 if (s390_legitimate_address_p (mode, x, FALSE))
5434 return x;
5437 x = eliminate_constant_term (x, &constant_term);
5439 /* Optimize loading of large displacements by splitting them
5440 into the multiple of 4K and the rest; this allows the
5441 former to be CSE'd if possible.
5443 Don't do this if the displacement is added to a register
5444 pointing into the stack frame, as the offsets will
5445 change later anyway. */
5447 if (GET_CODE (constant_term) == CONST_INT
5448 && !TARGET_LONG_DISPLACEMENT
5449 && !DISP_IN_RANGE (INTVAL (constant_term))
5450 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5452 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5453 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5455 rtx temp = gen_reg_rtx (Pmode);
5456 rtx val = force_operand (GEN_INT (upper), temp);
5457 if (val != temp)
5458 emit_move_insn (temp, val);
5460 x = gen_rtx_PLUS (Pmode, x, temp);
5461 constant_term = GEN_INT (lower);
5464 if (GET_CODE (x) == PLUS)
5466 if (GET_CODE (XEXP (x, 0)) == REG)
5468 rtx temp = gen_reg_rtx (Pmode);
5469 rtx val = force_operand (XEXP (x, 1), temp);
5470 if (val != temp)
5471 emit_move_insn (temp, val);
5473 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5476 else if (GET_CODE (XEXP (x, 1)) == REG)
5478 rtx temp = gen_reg_rtx (Pmode);
5479 rtx val = force_operand (XEXP (x, 0), temp);
5480 if (val != temp)
5481 emit_move_insn (temp, val);
5483 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5487 if (constant_term != const0_rtx)
5488 x = gen_rtx_PLUS (Pmode, x, constant_term);
5490 return x;
5493 /* Try a machine-dependent way of reloading an illegitimate address AD
5494 operand. If we find one, push the reload and return the new address.
5496 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5497 and TYPE is the reload type of the current reload. */
5500 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5501 int opnum, int type)
5503 if (!optimize || TARGET_LONG_DISPLACEMENT)
5504 return NULL_RTX;
5506 if (GET_CODE (ad) == PLUS)
5508 rtx tem = simplify_binary_operation (PLUS, Pmode,
5509 XEXP (ad, 0), XEXP (ad, 1));
5510 if (tem)
5511 ad = tem;
5514 if (GET_CODE (ad) == PLUS
5515 && GET_CODE (XEXP (ad, 0)) == REG
5516 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5517 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5519 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5520 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5521 rtx cst, tem, new_rtx;
5523 cst = GEN_INT (upper);
5524 if (!legitimate_reload_constant_p (cst))
5525 cst = force_const_mem (Pmode, cst);
5527 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5528 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5530 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5531 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5532 opnum, (enum reload_type) type);
5533 return new_rtx;
5536 return NULL_RTX;
5539 /* Emit code to move LEN bytes from DST to SRC. */
5541 bool
5542 s390_expand_cpymem (rtx dst, rtx src, rtx len)
5544 /* When tuning for z10 or higher we rely on the Glibc functions to
5545 do the right thing. Only for constant lengths below 64k we will
5546 generate inline code. */
5547 if (s390_tune >= PROCESSOR_2097_Z10
5548 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5549 return false;
5551 /* Expand memcpy for constant length operands without a loop if it
5552 is shorter that way.
5554 With a constant length argument a
5555 memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
5556 if (GET_CODE (len) == CONST_INT
5557 && INTVAL (len) >= 0
5558 && INTVAL (len) <= 256 * 6
5559 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5561 HOST_WIDE_INT o, l;
5563 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5565 rtx newdst = adjust_address (dst, BLKmode, o);
5566 rtx newsrc = adjust_address (src, BLKmode, o);
5567 emit_insn (gen_cpymem_short (newdst, newsrc,
5568 GEN_INT (l > 256 ? 255 : l - 1)));
5572 else if (TARGET_MVCLE)
5574 emit_insn (gen_cpymem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5577 else
5579 rtx dst_addr, src_addr, count, blocks, temp;
5580 rtx_code_label *loop_start_label = gen_label_rtx ();
5581 rtx_code_label *loop_end_label = gen_label_rtx ();
5582 rtx_code_label *end_label = gen_label_rtx ();
5583 machine_mode mode;
5585 mode = GET_MODE (len);
5586 if (mode == VOIDmode)
5587 mode = Pmode;
5589 dst_addr = gen_reg_rtx (Pmode);
5590 src_addr = gen_reg_rtx (Pmode);
5591 count = gen_reg_rtx (mode);
5592 blocks = gen_reg_rtx (mode);
5594 convert_move (count, len, 1);
5595 emit_cmp_and_jump_insns (count, const0_rtx,
5596 EQ, NULL_RTX, mode, 1, end_label);
5598 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5599 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5600 dst = change_address (dst, VOIDmode, dst_addr);
5601 src = change_address (src, VOIDmode, src_addr);
5603 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5604 OPTAB_DIRECT);
5605 if (temp != count)
5606 emit_move_insn (count, temp);
5608 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5609 OPTAB_DIRECT);
5610 if (temp != blocks)
5611 emit_move_insn (blocks, temp);
5613 emit_cmp_and_jump_insns (blocks, const0_rtx,
5614 EQ, NULL_RTX, mode, 1, loop_end_label);
5616 emit_label (loop_start_label);
5618 if (TARGET_Z10
5619 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5621 rtx prefetch;
5623 /* Issue a read prefetch for the +3 cache line. */
5624 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5625 const0_rtx, const0_rtx);
5626 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5627 emit_insn (prefetch);
5629 /* Issue a write prefetch for the +3 cache line. */
5630 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5631 const1_rtx, const0_rtx);
5632 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5633 emit_insn (prefetch);
5636 emit_insn (gen_cpymem_short (dst, src, GEN_INT (255)));
5637 s390_load_address (dst_addr,
5638 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5639 s390_load_address (src_addr,
5640 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5642 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5643 OPTAB_DIRECT);
5644 if (temp != blocks)
5645 emit_move_insn (blocks, temp);
5647 emit_cmp_and_jump_insns (blocks, const0_rtx,
5648 EQ, NULL_RTX, mode, 1, loop_end_label);
5650 emit_jump (loop_start_label);
5651 emit_label (loop_end_label);
5653 emit_insn (gen_cpymem_short (dst, src,
5654 convert_to_mode (Pmode, count, 1)));
5655 emit_label (end_label);
5657 return true;
5660 /* Emit code to set LEN bytes at DST to VAL.
5661 Make use of clrmem if VAL is zero. */
5663 void
5664 s390_expand_setmem (rtx dst, rtx len, rtx val)
5666 if (GET_CODE (len) == CONST_INT && INTVAL (len) <= 0)
5667 return;
5669 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5671 /* Expand setmem/clrmem for a constant length operand without a
5672 loop if it will be shorter that way.
5673 clrmem loop (with PFD) is 30 bytes -> 5 * xc
5674 clrmem loop (without PFD) is 24 bytes -> 4 * xc
5675 setmem loop (with PFD) is 38 bytes -> ~4 * (mvi/stc + mvc)
5676 setmem loop (without PFD) is 32 bytes -> ~4 * (mvi/stc + mvc) */
5677 if (GET_CODE (len) == CONST_INT
5678 && ((val == const0_rtx
5679 && (INTVAL (len) <= 256 * 4
5680 || (INTVAL (len) <= 256 * 5 && TARGET_SETMEM_PFD(val,len))))
5681 || (val != const0_rtx && INTVAL (len) <= 257 * 4))
5682 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5684 HOST_WIDE_INT o, l;
5686 if (val == const0_rtx)
5687 /* clrmem: emit 256 byte blockwise XCs. */
5688 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5690 rtx newdst = adjust_address (dst, BLKmode, o);
5691 emit_insn (gen_clrmem_short (newdst,
5692 GEN_INT (l > 256 ? 255 : l - 1)));
5694 else
5695 /* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5696 setting first byte to val and using a 256 byte mvc with one
5697 byte overlap to propagate the byte. */
5698 for (l = INTVAL (len), o = 0; l > 0; l -= 257, o += 257)
5700 rtx newdst = adjust_address (dst, BLKmode, o);
5701 emit_move_insn (adjust_address (dst, QImode, o), val);
5702 if (l > 1)
5704 rtx newdstp1 = adjust_address (dst, BLKmode, o + 1);
5705 emit_insn (gen_cpymem_short (newdstp1, newdst,
5706 GEN_INT (l > 257 ? 255 : l - 2)));
5711 else if (TARGET_MVCLE)
5713 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5714 if (TARGET_64BIT)
5715 emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5716 val));
5717 else
5718 emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5719 val));
5722 else
5724 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5725 rtx_code_label *loop_start_label = gen_label_rtx ();
5726 rtx_code_label *onebyte_end_label = gen_label_rtx ();
5727 rtx_code_label *zerobyte_end_label = gen_label_rtx ();
5728 rtx_code_label *restbyte_end_label = gen_label_rtx ();
5729 machine_mode mode;
5731 mode = GET_MODE (len);
5732 if (mode == VOIDmode)
5733 mode = Pmode;
5735 dst_addr = gen_reg_rtx (Pmode);
5736 count = gen_reg_rtx (mode);
5737 blocks = gen_reg_rtx (mode);
5739 convert_move (count, len, 1);
5740 emit_cmp_and_jump_insns (count, const0_rtx,
5741 EQ, NULL_RTX, mode, 1, zerobyte_end_label,
5742 profile_probability::very_unlikely ());
5744 /* We need to make a copy of the target address since memset is
5745 supposed to return it unmodified. We have to make it here
5746 already since the new reg is used at onebyte_end_label. */
5747 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5748 dst = change_address (dst, VOIDmode, dst_addr);
5750 if (val != const0_rtx)
5752 /* When using the overlapping mvc the original target
5753 address is only accessed as single byte entity (even by
5754 the mvc reading this value). */
5755 set_mem_size (dst, 1);
5756 dstp1 = adjust_address (dst, VOIDmode, 1);
5757 emit_cmp_and_jump_insns (count,
5758 const1_rtx, EQ, NULL_RTX, mode, 1,
5759 onebyte_end_label,
5760 profile_probability::very_unlikely ());
5763 /* There is one unconditional (mvi+mvc)/xc after the loop
5764 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5765 or one (xc) here leaves this number of bytes to be handled by
5766 it. */
5767 temp = expand_binop (mode, add_optab, count,
5768 val == const0_rtx ? constm1_rtx : GEN_INT (-2),
5769 count, 1, OPTAB_DIRECT);
5770 if (temp != count)
5771 emit_move_insn (count, temp);
5773 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5774 OPTAB_DIRECT);
5775 if (temp != blocks)
5776 emit_move_insn (blocks, temp);
5778 emit_cmp_and_jump_insns (blocks, const0_rtx,
5779 EQ, NULL_RTX, mode, 1, restbyte_end_label);
5781 emit_jump (loop_start_label);
5783 if (val != const0_rtx)
5785 /* The 1 byte != 0 special case. Not handled efficiently
5786 since we require two jumps for that. However, this
5787 should be very rare. */
5788 emit_label (onebyte_end_label);
5789 emit_move_insn (adjust_address (dst, QImode, 0), val);
5790 emit_jump (zerobyte_end_label);
5793 emit_label (loop_start_label);
5795 if (TARGET_SETMEM_PFD (val, len))
5797 /* Issue a write prefetch. */
5798 rtx distance = GEN_INT (TARGET_SETMEM_PREFETCH_DISTANCE);
5799 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, distance),
5800 const1_rtx, const0_rtx);
5801 emit_insn (prefetch);
5802 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5805 if (val == const0_rtx)
5806 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5807 else
5809 /* Set the first byte in the block to the value and use an
5810 overlapping mvc for the block. */
5811 emit_move_insn (adjust_address (dst, QImode, 0), val);
5812 emit_insn (gen_cpymem_short (dstp1, dst, GEN_INT (254)));
5814 s390_load_address (dst_addr,
5815 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5817 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5818 OPTAB_DIRECT);
5819 if (temp != blocks)
5820 emit_move_insn (blocks, temp);
5822 emit_cmp_and_jump_insns (blocks, const0_rtx,
5823 NE, NULL_RTX, mode, 1, loop_start_label);
5825 emit_label (restbyte_end_label);
5827 if (val == const0_rtx)
5828 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5829 else
5831 /* Set the first byte in the block to the value and use an
5832 overlapping mvc for the block. */
5833 emit_move_insn (adjust_address (dst, QImode, 0), val);
5834 /* execute only uses the lowest 8 bits of count that's
5835 exactly what we need here. */
5836 emit_insn (gen_cpymem_short (dstp1, dst,
5837 convert_to_mode (Pmode, count, 1)));
5840 emit_label (zerobyte_end_label);
5844 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5845 and return the result in TARGET. */
5847 bool
5848 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5850 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5851 rtx tmp;
5853 /* When tuning for z10 or higher we rely on the Glibc functions to
5854 do the right thing. Only for constant lengths below 64k we will
5855 generate inline code. */
5856 if (s390_tune >= PROCESSOR_2097_Z10
5857 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5858 return false;
5860 /* As the result of CMPINT is inverted compared to what we need,
5861 we have to swap the operands. */
5862 tmp = op0; op0 = op1; op1 = tmp;
5864 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5866 if (INTVAL (len) > 0)
5868 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5869 emit_insn (gen_cmpint (target, ccreg));
5871 else
5872 emit_move_insn (target, const0_rtx);
5874 else if (TARGET_MVCLE)
5876 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5877 emit_insn (gen_cmpint (target, ccreg));
5879 else
5881 rtx addr0, addr1, count, blocks, temp;
5882 rtx_code_label *loop_start_label = gen_label_rtx ();
5883 rtx_code_label *loop_end_label = gen_label_rtx ();
5884 rtx_code_label *end_label = gen_label_rtx ();
5885 machine_mode mode;
5887 mode = GET_MODE (len);
5888 if (mode == VOIDmode)
5889 mode = Pmode;
5891 addr0 = gen_reg_rtx (Pmode);
5892 addr1 = gen_reg_rtx (Pmode);
5893 count = gen_reg_rtx (mode);
5894 blocks = gen_reg_rtx (mode);
5896 convert_move (count, len, 1);
5897 emit_cmp_and_jump_insns (count, const0_rtx,
5898 EQ, NULL_RTX, mode, 1, end_label);
5900 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5901 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5902 op0 = change_address (op0, VOIDmode, addr0);
5903 op1 = change_address (op1, VOIDmode, addr1);
5905 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5906 OPTAB_DIRECT);
5907 if (temp != count)
5908 emit_move_insn (count, temp);
5910 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5911 OPTAB_DIRECT);
5912 if (temp != blocks)
5913 emit_move_insn (blocks, temp);
5915 emit_cmp_and_jump_insns (blocks, const0_rtx,
5916 EQ, NULL_RTX, mode, 1, loop_end_label);
5918 emit_label (loop_start_label);
5920 if (TARGET_Z10
5921 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5923 rtx prefetch;
5925 /* Issue a read prefetch for the +2 cache line of operand 1. */
5926 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5927 const0_rtx, const0_rtx);
5928 emit_insn (prefetch);
5929 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5931 /* Issue a read prefetch for the +2 cache line of operand 2. */
5932 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5933 const0_rtx, const0_rtx);
5934 emit_insn (prefetch);
5935 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5938 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5939 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5940 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5941 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5942 temp = gen_rtx_SET (pc_rtx, temp);
5943 emit_jump_insn (temp);
5945 s390_load_address (addr0,
5946 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5947 s390_load_address (addr1,
5948 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5950 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5951 OPTAB_DIRECT);
5952 if (temp != blocks)
5953 emit_move_insn (blocks, temp);
5955 emit_cmp_and_jump_insns (blocks, const0_rtx,
5956 EQ, NULL_RTX, mode, 1, loop_end_label);
5958 emit_jump (loop_start_label);
5959 emit_label (loop_end_label);
5961 emit_insn (gen_cmpmem_short (op0, op1,
5962 convert_to_mode (Pmode, count, 1)));
5963 emit_label (end_label);
5965 emit_insn (gen_cmpint (target, ccreg));
5967 return true;
5970 /* Emit a conditional jump to LABEL for condition code mask MASK using
5971 comparsion operator COMPARISON. Return the emitted jump insn. */
5973 static rtx_insn *
5974 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5976 rtx temp;
5978 gcc_assert (comparison == EQ || comparison == NE);
5979 gcc_assert (mask > 0 && mask < 15);
5981 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5982 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5983 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5984 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5985 temp = gen_rtx_SET (pc_rtx, temp);
5986 return emit_jump_insn (temp);
5989 /* Emit the instructions to implement strlen of STRING and store the
5990 result in TARGET. The string has the known ALIGNMENT. This
5991 version uses vector instructions and is therefore not appropriate
5992 for targets prior to z13. */
5994 void
5995 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5997 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5998 rtx str_reg = gen_reg_rtx (V16QImode);
5999 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
6000 rtx str_idx_reg = gen_reg_rtx (Pmode);
6001 rtx result_reg = gen_reg_rtx (V16QImode);
6002 rtx is_aligned_label = gen_label_rtx ();
6003 rtx into_loop_label = NULL_RTX;
6004 rtx loop_start_label = gen_label_rtx ();
6005 rtx temp;
6006 rtx len = gen_reg_rtx (QImode);
6007 rtx cond;
6008 rtx mem;
6010 s390_load_address (str_addr_base_reg, XEXP (string, 0));
6011 emit_move_insn (str_idx_reg, const0_rtx);
6013 if (INTVAL (alignment) < 16)
6015 /* Check whether the address happens to be aligned properly so
6016 jump directly to the aligned loop. */
6017 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
6018 str_addr_base_reg, GEN_INT (15)),
6019 const0_rtx, EQ, NULL_RTX,
6020 Pmode, 1, is_aligned_label);
6022 temp = gen_reg_rtx (Pmode);
6023 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
6024 GEN_INT (15), temp, 1, OPTAB_DIRECT);
6025 gcc_assert (REG_P (temp));
6026 highest_index_to_load_reg =
6027 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
6028 highest_index_to_load_reg, 1, OPTAB_DIRECT);
6029 gcc_assert (REG_P (highest_index_to_load_reg));
6030 emit_insn (gen_vllv16qi (str_reg,
6031 convert_to_mode (SImode, highest_index_to_load_reg, 1),
6032 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
6034 into_loop_label = gen_label_rtx ();
6035 s390_emit_jump (into_loop_label, NULL_RTX);
6036 emit_barrier ();
6039 emit_label (is_aligned_label);
6040 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
6042 /* Reaching this point we are only performing 16 bytes aligned
6043 loads. */
6044 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
6046 emit_label (loop_start_label);
6047 LABEL_NUSES (loop_start_label) = 1;
6049 /* Load 16 bytes of the string into VR. */
6050 mem = gen_rtx_MEM (V16QImode,
6051 gen_rtx_PLUS (Pmode, str_idx_reg, str_addr_base_reg));
6052 set_mem_align (mem, 128);
6053 emit_move_insn (str_reg, mem);
6054 if (into_loop_label != NULL_RTX)
6056 emit_label (into_loop_label);
6057 LABEL_NUSES (into_loop_label) = 1;
6060 /* Increment string index by 16 bytes. */
6061 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
6062 str_idx_reg, 1, OPTAB_DIRECT);
6064 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
6065 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
6067 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
6068 REG_BR_PROB,
6069 profile_probability::very_likely ().to_reg_br_prob_note ());
6070 emit_insn (gen_vec_extractv16qiqi (len, result_reg, GEN_INT (7)));
6072 /* If the string pointer wasn't aligned we have loaded less then 16
6073 bytes and the remaining bytes got filled with zeros (by vll).
6074 Now we have to check whether the resulting index lies within the
6075 bytes actually part of the string. */
6077 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
6078 highest_index_to_load_reg);
6079 s390_load_address (highest_index_to_load_reg,
6080 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
6081 const1_rtx));
6082 if (TARGET_64BIT)
6083 emit_insn (gen_movdicc (str_idx_reg, cond,
6084 highest_index_to_load_reg, str_idx_reg));
6085 else
6086 emit_insn (gen_movsicc (str_idx_reg, cond,
6087 highest_index_to_load_reg, str_idx_reg));
6089 add_reg_br_prob_note (s390_emit_jump (is_aligned_label, cond),
6090 profile_probability::very_unlikely ());
6092 expand_binop (Pmode, add_optab, str_idx_reg,
6093 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
6094 /* FIXME: len is already zero extended - so avoid the llgcr emitted
6095 here. */
6096 temp = expand_binop (Pmode, add_optab, str_idx_reg,
6097 convert_to_mode (Pmode, len, 1),
6098 target, 1, OPTAB_DIRECT);
6099 if (temp != target)
6100 emit_move_insn (target, temp);
6103 void
6104 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
6106 rtx temp = gen_reg_rtx (Pmode);
6107 rtx src_addr = XEXP (src, 0);
6108 rtx dst_addr = XEXP (dst, 0);
6109 rtx src_addr_reg = gen_reg_rtx (Pmode);
6110 rtx dst_addr_reg = gen_reg_rtx (Pmode);
6111 rtx offset = gen_reg_rtx (Pmode);
6112 rtx vsrc = gen_reg_rtx (V16QImode);
6113 rtx vpos = gen_reg_rtx (V16QImode);
6114 rtx loadlen = gen_reg_rtx (SImode);
6115 rtx gpos_qi = gen_reg_rtx(QImode);
6116 rtx gpos = gen_reg_rtx (SImode);
6117 rtx done_label = gen_label_rtx ();
6118 rtx loop_label = gen_label_rtx ();
6119 rtx exit_label = gen_label_rtx ();
6120 rtx full_label = gen_label_rtx ();
6122 /* Perform a quick check for string ending on the first up to 16
6123 bytes and exit early if successful. */
6125 emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
6126 emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
6127 emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
6128 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6129 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6130 /* gpos is the byte index if a zero was found and 16 otherwise.
6131 So if it is lower than the loaded bytes we have a hit. */
6132 emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
6133 full_label);
6134 emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
6136 force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
6137 1, OPTAB_DIRECT);
6138 emit_jump (exit_label);
6139 emit_barrier ();
6141 emit_label (full_label);
6142 LABEL_NUSES (full_label) = 1;
6144 /* Calculate `offset' so that src + offset points to the last byte
6145 before 16 byte alignment. */
6147 /* temp = src_addr & 0xf */
6148 force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
6149 1, OPTAB_DIRECT);
6151 /* offset = 0xf - temp */
6152 emit_move_insn (offset, GEN_INT (15));
6153 force_expand_binop (Pmode, sub_optab, offset, temp, offset,
6154 1, OPTAB_DIRECT);
6156 /* Store `offset' bytes in the dstination string. The quick check
6157 has loaded at least `offset' bytes into vsrc. */
6159 emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
6161 /* Advance to the next byte to be loaded. */
6162 force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
6163 1, OPTAB_DIRECT);
6165 /* Make sure the addresses are single regs which can be used as a
6166 base. */
6167 emit_move_insn (src_addr_reg, src_addr);
6168 emit_move_insn (dst_addr_reg, dst_addr);
6170 /* MAIN LOOP */
6172 emit_label (loop_label);
6173 LABEL_NUSES (loop_label) = 1;
6175 emit_move_insn (vsrc,
6176 gen_rtx_MEM (V16QImode,
6177 gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
6179 emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
6180 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
6181 add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
6182 REG_BR_PROB, profile_probability::very_unlikely ()
6183 .to_reg_br_prob_note ());
6185 emit_move_insn (gen_rtx_MEM (V16QImode,
6186 gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
6187 vsrc);
6188 /* offset += 16 */
6189 force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
6190 offset, 1, OPTAB_DIRECT);
6192 emit_jump (loop_label);
6193 emit_barrier ();
6195 /* REGULAR EXIT */
6197 /* We are done. Add the offset of the zero character to the dst_addr
6198 pointer to get the result. */
6200 emit_label (done_label);
6201 LABEL_NUSES (done_label) = 1;
6203 force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
6204 1, OPTAB_DIRECT);
6206 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6207 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6209 emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
6211 force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
6212 1, OPTAB_DIRECT);
6214 /* EARLY EXIT */
6216 emit_label (exit_label);
6217 LABEL_NUSES (exit_label) = 1;
6221 /* Expand conditional increment or decrement using alc/slb instructions.
6222 Should generate code setting DST to either SRC or SRC + INCREMENT,
6223 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
6224 Returns true if successful, false otherwise.
6226 That makes it possible to implement some if-constructs without jumps e.g.:
6227 (borrow = CC0 | CC1 and carry = CC2 | CC3)
6228 unsigned int a, b, c;
6229 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
6230 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
6231 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
6232 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
6234 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
6235 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
6236 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
6237 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
6238 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
6240 bool
6241 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
6242 rtx dst, rtx src, rtx increment)
6244 machine_mode cmp_mode;
6245 machine_mode cc_mode;
6246 rtx op_res;
6247 rtx insn;
6248 rtvec p;
6249 int ret;
6251 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
6252 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
6253 cmp_mode = SImode;
6254 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
6255 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
6256 cmp_mode = DImode;
6257 else
6258 return false;
6260 /* Try ADD LOGICAL WITH CARRY. */
6261 if (increment == const1_rtx)
6263 /* Determine CC mode to use. */
6264 if (cmp_code == EQ || cmp_code == NE)
6266 if (cmp_op1 != const0_rtx)
6268 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6269 NULL_RTX, 0, OPTAB_WIDEN);
6270 cmp_op1 = const0_rtx;
6273 cmp_code = cmp_code == EQ ? LEU : GTU;
6276 if (cmp_code == LTU || cmp_code == LEU)
6278 rtx tem = cmp_op0;
6279 cmp_op0 = cmp_op1;
6280 cmp_op1 = tem;
6281 cmp_code = swap_condition (cmp_code);
6284 switch (cmp_code)
6286 case GTU:
6287 cc_mode = CCUmode;
6288 break;
6290 case GEU:
6291 cc_mode = CCL3mode;
6292 break;
6294 default:
6295 return false;
6298 /* Emit comparison instruction pattern. */
6299 if (!register_operand (cmp_op0, cmp_mode))
6300 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6302 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6303 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6304 /* We use insn_invalid_p here to add clobbers if required. */
6305 ret = insn_invalid_p (emit_insn (insn), false);
6306 gcc_assert (!ret);
6308 /* Emit ALC instruction pattern. */
6309 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6310 gen_rtx_REG (cc_mode, CC_REGNUM),
6311 const0_rtx);
6313 if (src != const0_rtx)
6315 if (!register_operand (src, GET_MODE (dst)))
6316 src = force_reg (GET_MODE (dst), src);
6318 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
6319 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
6322 p = rtvec_alloc (2);
6323 RTVEC_ELT (p, 0) =
6324 gen_rtx_SET (dst, op_res);
6325 RTVEC_ELT (p, 1) =
6326 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6327 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6329 return true;
6332 /* Try SUBTRACT LOGICAL WITH BORROW. */
6333 if (increment == constm1_rtx)
6335 /* Determine CC mode to use. */
6336 if (cmp_code == EQ || cmp_code == NE)
6338 if (cmp_op1 != const0_rtx)
6340 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6341 NULL_RTX, 0, OPTAB_WIDEN);
6342 cmp_op1 = const0_rtx;
6345 cmp_code = cmp_code == EQ ? LEU : GTU;
6348 if (cmp_code == GTU || cmp_code == GEU)
6350 rtx tem = cmp_op0;
6351 cmp_op0 = cmp_op1;
6352 cmp_op1 = tem;
6353 cmp_code = swap_condition (cmp_code);
6356 switch (cmp_code)
6358 case LEU:
6359 cc_mode = CCUmode;
6360 break;
6362 case LTU:
6363 cc_mode = CCL3mode;
6364 break;
6366 default:
6367 return false;
6370 /* Emit comparison instruction pattern. */
6371 if (!register_operand (cmp_op0, cmp_mode))
6372 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6374 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6375 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6376 /* We use insn_invalid_p here to add clobbers if required. */
6377 ret = insn_invalid_p (emit_insn (insn), false);
6378 gcc_assert (!ret);
6380 /* Emit SLB instruction pattern. */
6381 if (!register_operand (src, GET_MODE (dst)))
6382 src = force_reg (GET_MODE (dst), src);
6384 op_res = gen_rtx_MINUS (GET_MODE (dst),
6385 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
6386 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6387 gen_rtx_REG (cc_mode, CC_REGNUM),
6388 const0_rtx));
6389 p = rtvec_alloc (2);
6390 RTVEC_ELT (p, 0) =
6391 gen_rtx_SET (dst, op_res);
6392 RTVEC_ELT (p, 1) =
6393 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6394 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6396 return true;
6399 return false;
6402 /* Expand code for the insv template. Return true if successful. */
6404 bool
6405 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6407 int bitsize = INTVAL (op1);
6408 int bitpos = INTVAL (op2);
6409 machine_mode mode = GET_MODE (dest);
6410 machine_mode smode;
6411 int smode_bsize, mode_bsize;
6412 rtx op, clobber;
6414 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6415 return false;
6417 /* Generate INSERT IMMEDIATE (IILL et al). */
6418 /* (set (ze (reg)) (const_int)). */
6419 if (TARGET_ZARCH
6420 && register_operand (dest, word_mode)
6421 && (bitpos % 16) == 0
6422 && (bitsize % 16) == 0
6423 && const_int_operand (src, VOIDmode))
6425 HOST_WIDE_INT val = INTVAL (src);
6426 int regpos = bitpos + bitsize;
6428 while (regpos > bitpos)
6430 machine_mode putmode;
6431 int putsize;
6433 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6434 putmode = SImode;
6435 else
6436 putmode = HImode;
6438 putsize = GET_MODE_BITSIZE (putmode);
6439 regpos -= putsize;
6440 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6441 GEN_INT (putsize),
6442 GEN_INT (regpos)),
6443 gen_int_mode (val, putmode));
6444 val >>= putsize;
6446 gcc_assert (regpos == bitpos);
6447 return true;
6450 smode = smallest_int_mode_for_size (bitsize);
6451 smode_bsize = GET_MODE_BITSIZE (smode);
6452 mode_bsize = GET_MODE_BITSIZE (mode);
6454 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6455 if (bitpos == 0
6456 && (bitsize % BITS_PER_UNIT) == 0
6457 && MEM_P (dest)
6458 && (register_operand (src, word_mode)
6459 || const_int_operand (src, VOIDmode)))
6461 /* Emit standard pattern if possible. */
6462 if (smode_bsize == bitsize)
6464 emit_move_insn (adjust_address (dest, smode, 0),
6465 gen_lowpart (smode, src));
6466 return true;
6469 /* (set (ze (mem)) (const_int)). */
6470 else if (const_int_operand (src, VOIDmode))
6472 int size = bitsize / BITS_PER_UNIT;
6473 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6474 BLKmode,
6475 UNITS_PER_WORD - size);
6477 dest = adjust_address (dest, BLKmode, 0);
6478 set_mem_size (dest, size);
6479 s390_expand_cpymem (dest, src_mem, GEN_INT (size));
6480 return true;
6483 /* (set (ze (mem)) (reg)). */
6484 else if (register_operand (src, word_mode))
6486 if (bitsize <= 32)
6487 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6488 const0_rtx), src);
6489 else
6491 /* Emit st,stcmh sequence. */
6492 int stcmh_width = bitsize - 32;
6493 int size = stcmh_width / BITS_PER_UNIT;
6495 emit_move_insn (adjust_address (dest, SImode, size),
6496 gen_lowpart (SImode, src));
6497 set_mem_size (dest, size);
6498 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6499 GEN_INT (stcmh_width),
6500 const0_rtx),
6501 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6503 return true;
6507 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6508 if ((bitpos % BITS_PER_UNIT) == 0
6509 && (bitsize % BITS_PER_UNIT) == 0
6510 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6511 && MEM_P (src)
6512 && (mode == DImode || mode == SImode)
6513 && register_operand (dest, mode))
6515 /* Emit a strict_low_part pattern if possible. */
6516 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6518 rtx low_dest = gen_lowpart (smode, dest);
6519 rtx low_src = gen_lowpart (smode, src);
6521 switch (smode)
6523 case E_QImode: emit_insn (gen_movstrictqi (low_dest, low_src)); return true;
6524 case E_HImode: emit_insn (gen_movstricthi (low_dest, low_src)); return true;
6525 case E_SImode: emit_insn (gen_movstrictsi (low_dest, low_src)); return true;
6526 default: break;
6530 /* ??? There are more powerful versions of ICM that are not
6531 completely represented in the md file. */
6534 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6535 if (TARGET_Z10 && (mode == DImode || mode == SImode))
6537 machine_mode mode_s = GET_MODE (src);
6539 if (CONSTANT_P (src))
6541 /* For constant zero values the representation with AND
6542 appears to be folded in more situations than the (set
6543 (zero_extract) ...).
6544 We only do this when the start and end of the bitfield
6545 remain in the same SImode chunk. That way nihf or nilf
6546 can be used.
6547 The AND patterns might still generate a risbg for this. */
6548 if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
6549 return false;
6550 else
6551 src = force_reg (mode, src);
6553 else if (mode_s != mode)
6555 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6556 src = force_reg (mode_s, src);
6557 src = gen_lowpart (mode, src);
6560 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6561 op = gen_rtx_SET (op, src);
6563 if (!TARGET_ZEC12)
6565 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6566 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6568 emit_insn (op);
6570 return true;
6573 return false;
6576 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6577 register that holds VAL of mode MODE shifted by COUNT bits. */
6579 static inline rtx
6580 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6582 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6583 NULL_RTX, 1, OPTAB_DIRECT);
6584 return expand_simple_binop (SImode, ASHIFT, val, count,
6585 NULL_RTX, 1, OPTAB_DIRECT);
6588 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6589 the result in TARGET. */
6591 void
6592 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6593 rtx cmp_op1, rtx cmp_op2)
6595 machine_mode mode = GET_MODE (target);
6596 bool neg_p = false, swap_p = false;
6597 rtx tmp;
6599 if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
6601 cmp_op2 = force_reg (GET_MODE (cmp_op1), cmp_op2);
6602 switch (cond)
6604 /* NE a != b -> !(a == b) */
6605 case NE: cond = EQ; neg_p = true; break;
6606 case UNGT:
6607 emit_insn (gen_vec_cmpungt (target, cmp_op1, cmp_op2));
6608 return;
6609 case UNGE:
6610 emit_insn (gen_vec_cmpunge (target, cmp_op1, cmp_op2));
6611 return;
6612 case LE: cond = GE; swap_p = true; break;
6613 /* UNLE: (a u<= b) -> (b u>= a). */
6614 case UNLE:
6615 emit_insn (gen_vec_cmpunge (target, cmp_op2, cmp_op1));
6616 return;
6617 /* LT: a < b -> b > a */
6618 case LT: cond = GT; swap_p = true; break;
6619 /* UNLT: (a u< b) -> (b u> a). */
6620 case UNLT:
6621 emit_insn (gen_vec_cmpungt (target, cmp_op2, cmp_op1));
6622 return;
6623 case UNEQ:
6624 emit_insn (gen_vec_cmpuneq (target, cmp_op1, cmp_op2));
6625 return;
6626 case LTGT:
6627 emit_insn (gen_vec_cmpltgt (target, cmp_op1, cmp_op2));
6628 return;
6629 case ORDERED:
6630 emit_insn (gen_vec_cmpordered (target, cmp_op1, cmp_op2));
6631 return;
6632 case UNORDERED:
6633 emit_insn (gen_vec_cmpunordered (target, cmp_op1, cmp_op2));
6634 return;
6635 default: break;
6638 else
6640 /* Turn x < 0 into x >> (bits per element - 1) */
6641 if (cond == LT && cmp_op2 == CONST0_RTX (mode))
6643 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (mode)) - 1;
6644 rtx res = expand_simple_binop (mode, ASHIFTRT, cmp_op1,
6645 GEN_INT (shift), target,
6646 0, OPTAB_DIRECT);
6647 if (res != target)
6648 emit_move_insn (target, res);
6649 return;
6651 cmp_op2 = force_reg (GET_MODE (cmp_op1), cmp_op2);
6653 switch (cond)
6655 /* NE: a != b -> !(a == b) */
6656 case NE: cond = EQ; neg_p = true; break;
6657 /* GE: a >= b -> !(b > a) */
6658 case GE: cond = GT; neg_p = true; swap_p = true; break;
6659 /* GEU: a >= b -> !(b > a) */
6660 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6661 /* LE: a <= b -> !(a > b) */
6662 case LE: cond = GT; neg_p = true; break;
6663 /* LEU: a <= b -> !(a > b) */
6664 case LEU: cond = GTU; neg_p = true; break;
6665 /* LT: a < b -> b > a */
6666 case LT: cond = GT; swap_p = true; break;
6667 /* LTU: a < b -> b > a */
6668 case LTU: cond = GTU; swap_p = true; break;
6669 default: break;
6673 if (swap_p)
6675 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6678 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6679 mode,
6680 cmp_op1, cmp_op2)));
6681 if (neg_p)
6682 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6685 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6686 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6687 elements in CMP1 and CMP2 fulfill the comparison.
6688 This function is only used to emit patterns for the vx builtins and
6689 therefore only handles comparison codes required by the
6690 builtins. */
6691 void
6692 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6693 rtx cmp1, rtx cmp2, bool all_p)
6695 machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
6696 rtx tmp_reg = gen_reg_rtx (SImode);
6697 bool swap_p = false;
6699 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6701 switch (code)
6703 case EQ:
6704 case NE:
6705 cc_producer_mode = CCVEQmode;
6706 break;
6707 case GE:
6708 case LT:
6709 code = swap_condition (code);
6710 swap_p = true;
6711 /* fallthrough */
6712 case GT:
6713 case LE:
6714 cc_producer_mode = CCVIHmode;
6715 break;
6716 case GEU:
6717 case LTU:
6718 code = swap_condition (code);
6719 swap_p = true;
6720 /* fallthrough */
6721 case GTU:
6722 case LEU:
6723 cc_producer_mode = CCVIHUmode;
6724 break;
6725 default:
6726 gcc_unreachable ();
6729 scratch_mode = GET_MODE (cmp1);
6730 /* These codes represent inverted CC interpretations. Inverting
6731 an ALL CC mode results in an ANY CC mode and the other way
6732 around. Invert the all_p flag here to compensate for
6733 that. */
6734 if (code == NE || code == LE || code == LEU)
6735 all_p = !all_p;
6737 cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
6739 else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
6741 bool inv_p = false;
6743 switch (code)
6745 case EQ: cc_producer_mode = CCVEQmode; break;
6746 case NE: cc_producer_mode = CCVEQmode; inv_p = true; break;
6747 case GT: cc_producer_mode = CCVFHmode; break;
6748 case GE: cc_producer_mode = CCVFHEmode; break;
6749 case UNLE: cc_producer_mode = CCVFHmode; inv_p = true; break;
6750 case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
6751 case LT: cc_producer_mode = CCVFHmode; code = GT; swap_p = true; break;
6752 case LE: cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
6753 default: gcc_unreachable ();
6755 scratch_mode = related_int_vector_mode (GET_MODE (cmp1)).require ();
6757 if (inv_p)
6758 all_p = !all_p;
6760 cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
6762 else
6763 gcc_unreachable ();
6765 if (swap_p)
6767 rtx tmp = cmp2;
6768 cmp2 = cmp1;
6769 cmp1 = tmp;
6772 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6773 gen_rtvec (2, gen_rtx_SET (
6774 gen_rtx_REG (cc_producer_mode, CC_REGNUM),
6775 gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
6776 gen_rtx_CLOBBER (VOIDmode,
6777 gen_rtx_SCRATCH (scratch_mode)))));
6778 emit_move_insn (target, const0_rtx);
6779 emit_move_insn (tmp_reg, const1_rtx);
6781 emit_move_insn (target,
6782 gen_rtx_IF_THEN_ELSE (SImode,
6783 gen_rtx_fmt_ee (code, VOIDmode,
6784 gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
6785 const0_rtx),
6786 tmp_reg, target));
6789 /* Invert the comparison CODE applied to a CC mode. This is only safe
6790 if we know whether there result was created by a floating point
6791 compare or not. For the CCV modes this is encoded as part of the
6792 mode. */
6793 enum rtx_code
6794 s390_reverse_condition (machine_mode mode, enum rtx_code code)
6796 /* Reversal of FP compares takes care -- an ordered compare
6797 becomes an unordered compare and vice versa. */
6798 if (mode == CCVFALLmode || mode == CCVFANYmode || mode == CCSFPSmode)
6799 return reverse_condition_maybe_unordered (code);
6800 else if (mode == CCVIALLmode || mode == CCVIANYmode)
6801 return reverse_condition (code);
6802 else
6803 gcc_unreachable ();
6806 /* Generate a vector comparison expression loading either elements of
6807 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6808 and CMP_OP2. */
6810 void
6811 s390_expand_vcond (rtx target, rtx then, rtx els,
6812 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6814 rtx tmp;
6815 machine_mode result_mode;
6816 rtx result_target;
6818 machine_mode target_mode = GET_MODE (target);
6819 machine_mode cmp_mode = GET_MODE (cmp_op1);
6820 rtx op = (cond == LT) ? els : then;
6822 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6823 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6824 for short and byte (x >> 15 and x >> 7 respectively). */
6825 if ((cond == LT || cond == GE)
6826 && target_mode == cmp_mode
6827 && cmp_op2 == CONST0_RTX (cmp_mode)
6828 && op == CONST0_RTX (target_mode)
6829 && s390_vector_mode_supported_p (target_mode)
6830 && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6832 rtx negop = (cond == LT) ? then : els;
6834 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6836 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6837 if (negop == CONST1_RTX (target_mode))
6839 rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6840 GEN_INT (shift), target,
6841 1, OPTAB_DIRECT);
6842 if (res != target)
6843 emit_move_insn (target, res);
6844 return;
6847 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6848 else if (all_ones_operand (negop, target_mode))
6850 rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6851 GEN_INT (shift), target,
6852 0, OPTAB_DIRECT);
6853 if (res != target)
6854 emit_move_insn (target, res);
6855 return;
6859 /* We always use an integral type vector to hold the comparison
6860 result. */
6861 result_mode = related_int_vector_mode (cmp_mode).require ();
6862 result_target = gen_reg_rtx (result_mode);
6864 /* We allow vector immediates as comparison operands that
6865 can be handled by the optimization above but not by the
6866 following code. Hence, force them into registers here. */
6867 if (!REG_P (cmp_op1))
6868 cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6870 s390_expand_vec_compare (result_target, cond, cmp_op1, cmp_op2);
6872 /* If the results are supposed to be either -1 or 0 we are done
6873 since this is what our compare instructions generate anyway. */
6874 if (all_ones_operand (then, GET_MODE (then))
6875 && const0_operand (els, GET_MODE (els)))
6877 emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6878 result_target, 0));
6879 return;
6882 /* Otherwise we will do a vsel afterwards. */
6883 /* This gets triggered e.g.
6884 with gcc.c-torture/compile/pr53410-1.c */
6885 if (!REG_P (then))
6886 then = force_reg (target_mode, then);
6888 if (!REG_P (els))
6889 els = force_reg (target_mode, els);
6891 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6892 result_target,
6893 CONST0_RTX (result_mode));
6895 /* We compared the result against zero above so we have to swap then
6896 and els here. */
6897 tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6899 gcc_assert (target_mode == GET_MODE (then));
6900 emit_insn (gen_rtx_SET (target, tmp));
6903 /* Emit the RTX necessary to initialize the vector TARGET with values
6904 in VALS. */
6905 void
6906 s390_expand_vec_init (rtx target, rtx vals)
6908 machine_mode mode = GET_MODE (target);
6909 machine_mode inner_mode = GET_MODE_INNER (mode);
6910 int n_elts = GET_MODE_NUNITS (mode);
6911 bool all_same = true, all_regs = true, all_const_int = true;
6912 rtx x;
6913 int i;
6915 for (i = 0; i < n_elts; ++i)
6917 x = XVECEXP (vals, 0, i);
6919 if (!CONST_INT_P (x))
6920 all_const_int = false;
6922 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6923 all_same = false;
6925 if (!REG_P (x))
6926 all_regs = false;
6929 /* Use vector gen mask or vector gen byte mask if possible. */
6930 if (all_same && all_const_int)
6932 rtx vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6933 if (XVECEXP (vals, 0, 0) == const0_rtx
6934 || s390_contiguous_bitmask_vector_p (vec, NULL, NULL)
6935 || s390_bytemask_vector_p (vec, NULL))
6937 emit_insn (gen_rtx_SET (target, vec));
6938 return;
6942 /* Use vector replicate instructions. vlrep/vrepi/vrep */
6943 if (all_same)
6945 rtx elem = XVECEXP (vals, 0, 0);
6947 /* vec_splats accepts general_operand as source. */
6948 if (!general_operand (elem, GET_MODE (elem)))
6949 elem = force_reg (inner_mode, elem);
6951 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, elem)));
6952 return;
6955 if (all_regs
6956 && REG_P (target)
6957 && n_elts == 2
6958 && GET_MODE_SIZE (inner_mode) == 8)
6960 /* Use vector load pair. */
6961 emit_insn (gen_rtx_SET (target,
6962 gen_rtx_VEC_CONCAT (mode,
6963 XVECEXP (vals, 0, 0),
6964 XVECEXP (vals, 0, 1))));
6965 return;
6968 /* Use vector load logical element and zero. */
6969 if (TARGET_VXE && (mode == V4SImode || mode == V4SFmode))
6971 bool found = true;
6973 x = XVECEXP (vals, 0, 0);
6974 if (memory_operand (x, inner_mode))
6976 for (i = 1; i < n_elts; ++i)
6977 found = found && XVECEXP (vals, 0, i) == const0_rtx;
6979 if (found)
6981 machine_mode half_mode = (inner_mode == SFmode
6982 ? V2SFmode : V2SImode);
6983 emit_insn (gen_rtx_SET (target,
6984 gen_rtx_VEC_CONCAT (mode,
6985 gen_rtx_VEC_CONCAT (half_mode,
6987 const0_rtx),
6988 gen_rtx_VEC_CONCAT (half_mode,
6989 const0_rtx,
6990 const0_rtx))));
6991 return;
6996 /* We are about to set the vector elements one by one. Zero out the
6997 full register first in order to help the data flow framework to
6998 detect it as full VR set. */
6999 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
7001 /* Unfortunately the vec_init expander is not allowed to fail. So
7002 we have to implement the fallback ourselves. */
7003 for (i = 0; i < n_elts; i++)
7005 rtx elem = XVECEXP (vals, 0, i);
7006 if (!general_operand (elem, GET_MODE (elem)))
7007 elem = force_reg (inner_mode, elem);
7009 emit_insn (gen_rtx_SET (target,
7010 gen_rtx_UNSPEC (mode,
7011 gen_rtvec (3, elem,
7012 GEN_INT (i), target),
7013 UNSPEC_VEC_SET)));
7017 /* Emit a vector constant that contains 1s in each element's sign bit position
7018 and 0s in other positions. MODE is the desired constant's mode. */
7019 extern rtx
7020 s390_build_signbit_mask (machine_mode mode)
7022 if (mode == TFmode && TARGET_VXE)
7024 wide_int mask_val = wi::set_bit_in_zero (127, 128);
7025 rtx mask = immed_wide_int_const (mask_val, TImode);
7026 return gen_lowpart (TFmode, mask);
7029 /* Generate the integral element mask value. */
7030 machine_mode inner_mode = GET_MODE_INNER (mode);
7031 int inner_bitsize = GET_MODE_BITSIZE (inner_mode);
7032 wide_int mask_val = wi::set_bit_in_zero (inner_bitsize - 1, inner_bitsize);
7034 /* Emit the element mask rtx. Use gen_lowpart in order to cast the integral
7035 value to the desired mode. */
7036 machine_mode int_mode = related_int_vector_mode (mode).require ();
7037 rtx mask = immed_wide_int_const (mask_val, GET_MODE_INNER (int_mode));
7038 mask = gen_lowpart (inner_mode, mask);
7040 /* Emit the vector mask rtx by mode the element mask rtx. */
7041 int nunits = GET_MODE_NUNITS (mode);
7042 rtvec v = rtvec_alloc (nunits);
7043 for (int i = 0; i < nunits; i++)
7044 RTVEC_ELT (v, i) = mask;
7045 return gen_rtx_CONST_VECTOR (mode, v);
7048 /* Structure to hold the initial parameters for a compare_and_swap operation
7049 in HImode and QImode. */
7051 struct alignment_context
7053 rtx memsi; /* SI aligned memory location. */
7054 rtx shift; /* Bit offset with regard to lsb. */
7055 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
7056 rtx modemaski; /* ~modemask */
7057 bool aligned; /* True if memory is aligned, false else. */
7060 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
7061 structure AC for transparent simplifying, if the memory alignment is known
7062 to be at least 32bit. MEM is the memory location for the actual operation
7063 and MODE its mode. */
7065 static void
7066 init_alignment_context (struct alignment_context *ac, rtx mem,
7067 machine_mode mode)
7069 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
7070 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
7072 if (ac->aligned)
7073 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
7074 else
7076 /* Alignment is unknown. */
7077 rtx byteoffset, addr, align;
7079 /* Force the address into a register. */
7080 addr = force_reg (Pmode, XEXP (mem, 0));
7082 /* Align it to SImode. */
7083 align = expand_simple_binop (Pmode, AND, addr,
7084 GEN_INT (-GET_MODE_SIZE (SImode)),
7085 NULL_RTX, 1, OPTAB_DIRECT);
7086 /* Generate MEM. */
7087 ac->memsi = gen_rtx_MEM (SImode, align);
7088 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
7089 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
7090 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
7092 /* Calculate shiftcount. */
7093 byteoffset = expand_simple_binop (Pmode, AND, addr,
7094 GEN_INT (GET_MODE_SIZE (SImode) - 1),
7095 NULL_RTX, 1, OPTAB_DIRECT);
7096 /* As we already have some offset, evaluate the remaining distance. */
7097 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
7098 NULL_RTX, 1, OPTAB_DIRECT);
7101 /* Shift is the byte count, but we need the bitcount. */
7102 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
7103 NULL_RTX, 1, OPTAB_DIRECT);
7105 /* Calculate masks. */
7106 ac->modemask = expand_simple_binop (SImode, ASHIFT,
7107 GEN_INT (GET_MODE_MASK (mode)),
7108 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
7109 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
7110 NULL_RTX, 1);
7113 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
7114 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
7115 perform the merge in SEQ2. */
7117 static rtx
7118 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
7119 machine_mode mode, rtx val, rtx ins)
7121 rtx tmp;
7123 if (ac->aligned)
7125 start_sequence ();
7126 tmp = copy_to_mode_reg (SImode, val);
7127 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
7128 const0_rtx, ins))
7130 *seq1 = NULL;
7131 *seq2 = get_insns ();
7132 end_sequence ();
7133 return tmp;
7135 end_sequence ();
7138 /* Failed to use insv. Generate a two part shift and mask. */
7139 start_sequence ();
7140 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
7141 *seq1 = get_insns ();
7142 end_sequence ();
7144 start_sequence ();
7145 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
7146 *seq2 = get_insns ();
7147 end_sequence ();
7149 return tmp;
7152 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
7153 the memory location, CMP the old value to compare MEM with and NEW_RTX the
7154 value to set if CMP == MEM. */
7156 static void
7157 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7158 rtx cmp, rtx new_rtx, bool is_weak)
7160 struct alignment_context ac;
7161 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
7162 rtx res = gen_reg_rtx (SImode);
7163 rtx_code_label *csloop = NULL, *csend = NULL;
7165 gcc_assert (MEM_P (mem));
7167 init_alignment_context (&ac, mem, mode);
7169 /* Load full word. Subsequent loads are performed by CS. */
7170 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
7171 NULL_RTX, 1, OPTAB_DIRECT);
7173 /* Prepare insertions of cmp and new_rtx into the loaded value. When
7174 possible, we try to use insv to make this happen efficiently. If
7175 that fails we'll generate code both inside and outside the loop. */
7176 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
7177 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
7179 if (seq0)
7180 emit_insn (seq0);
7181 if (seq1)
7182 emit_insn (seq1);
7184 /* Start CS loop. */
7185 if (!is_weak)
7187 /* Begin assuming success. */
7188 emit_move_insn (btarget, const1_rtx);
7190 csloop = gen_label_rtx ();
7191 csend = gen_label_rtx ();
7192 emit_label (csloop);
7195 /* val = "<mem>00..0<mem>"
7196 * cmp = "00..0<cmp>00..0"
7197 * new = "00..0<new>00..0"
7200 emit_insn (seq2);
7201 emit_insn (seq3);
7203 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv, CCZ1mode);
7204 if (is_weak)
7205 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
7206 else
7208 rtx tmp;
7210 /* Jump to end if we're done (likely?). */
7211 s390_emit_jump (csend, cc);
7213 /* Check for changes outside mode, and loop internal if so.
7214 Arrange the moves so that the compare is adjacent to the
7215 branch so that we can generate CRJ. */
7216 tmp = copy_to_reg (val);
7217 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
7218 1, OPTAB_DIRECT);
7219 cc = s390_emit_compare (NE, val, tmp);
7220 s390_emit_jump (csloop, cc);
7222 /* Failed. */
7223 emit_move_insn (btarget, const0_rtx);
7224 emit_label (csend);
7227 /* Return the correct part of the bitfield. */
7228 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
7229 NULL_RTX, 1, OPTAB_DIRECT), 1);
7232 /* Variant of s390_expand_cs for SI, DI and TI modes. */
7233 static void
7234 s390_expand_cs_tdsi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7235 rtx cmp, rtx new_rtx, bool is_weak)
7237 rtx output = vtarget;
7238 rtx_code_label *skip_cs_label = NULL;
7239 bool do_const_opt = false;
7241 if (!register_operand (output, mode))
7242 output = gen_reg_rtx (mode);
7244 /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
7245 with the constant first and skip the compare_and_swap because its very
7246 expensive and likely to fail anyway.
7247 Note 1: This is done only for IS_WEAK. C11 allows optimizations that may
7248 cause spurious in that case.
7249 Note 2: It may be useful to do this also for non-constant INPUT.
7250 Note 3: Currently only targets with "load on condition" are supported
7251 (z196 and newer). */
7253 if (TARGET_Z196
7254 && (mode == SImode || mode == DImode))
7255 do_const_opt = (is_weak && CONST_INT_P (cmp));
7257 if (do_const_opt)
7259 rtx cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7261 skip_cs_label = gen_label_rtx ();
7262 emit_move_insn (btarget, const0_rtx);
7263 if (CONST_INT_P (cmp) && INTVAL (cmp) == 0)
7265 rtvec lt = rtvec_alloc (2);
7267 /* Load-and-test + conditional jump. */
7268 RTVEC_ELT (lt, 0)
7269 = gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, mem, cmp));
7270 RTVEC_ELT (lt, 1) = gen_rtx_SET (output, mem);
7271 emit_insn (gen_rtx_PARALLEL (VOIDmode, lt));
7273 else
7275 emit_move_insn (output, mem);
7276 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp)));
7278 s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx));
7279 add_reg_br_prob_note (get_last_insn (),
7280 profile_probability::very_unlikely ());
7281 /* If the jump is not taken, OUTPUT is the expected value. */
7282 cmp = output;
7283 /* Reload newval to a register manually, *after* the compare and jump
7284 above. Otherwise Reload might place it before the jump. */
7286 else
7287 cmp = force_reg (mode, cmp);
7288 new_rtx = force_reg (mode, new_rtx);
7289 s390_emit_compare_and_swap (EQ, output, mem, cmp, new_rtx,
7290 (do_const_opt) ? CCZmode : CCZ1mode);
7291 if (skip_cs_label != NULL)
7292 emit_label (skip_cs_label);
7294 /* We deliberately accept non-register operands in the predicate
7295 to ensure the write back to the output operand happens *before*
7296 the store-flags code below. This makes it easier for combine
7297 to merge the store-flags code with a potential test-and-branch
7298 pattern following (immediately!) afterwards. */
7299 if (output != vtarget)
7300 emit_move_insn (vtarget, output);
7302 if (do_const_opt)
7304 rtx cc, cond, ite;
7306 /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
7307 btarget has already been initialized with 0 above. */
7308 cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7309 cond = gen_rtx_EQ (VOIDmode, cc, const0_rtx);
7310 ite = gen_rtx_IF_THEN_ELSE (SImode, cond, const1_rtx, btarget);
7311 emit_insn (gen_rtx_SET (btarget, ite));
7313 else
7315 rtx cc, cond;
7317 cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
7318 cond = gen_rtx_EQ (SImode, cc, const0_rtx);
7319 emit_insn (gen_cstorecc4 (btarget, cond, cc, const0_rtx));
7323 /* Expand an atomic compare and swap operation. MEM is the memory location,
7324 CMP the old value to compare MEM with and NEW_RTX the value to set if
7325 CMP == MEM. */
7327 void
7328 s390_expand_cs (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7329 rtx cmp, rtx new_rtx, bool is_weak)
7331 switch (mode)
7333 case E_TImode:
7334 case E_DImode:
7335 case E_SImode:
7336 s390_expand_cs_tdsi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7337 break;
7338 case E_HImode:
7339 case E_QImode:
7340 s390_expand_cs_hqi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7341 break;
7342 default:
7343 gcc_unreachable ();
7347 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7348 The memory location MEM is set to INPUT. OUTPUT is set to the previous value
7349 of MEM. */
7351 void
7352 s390_expand_atomic_exchange_tdsi (rtx output, rtx mem, rtx input)
7354 machine_mode mode = GET_MODE (mem);
7355 rtx_code_label *csloop;
7357 if (TARGET_Z196
7358 && (mode == DImode || mode == SImode)
7359 && CONST_INT_P (input) && INTVAL (input) == 0)
7361 emit_move_insn (output, const0_rtx);
7362 if (mode == DImode)
7363 emit_insn (gen_atomic_fetch_anddi (output, mem, const0_rtx, input));
7364 else
7365 emit_insn (gen_atomic_fetch_andsi (output, mem, const0_rtx, input));
7366 return;
7369 input = force_reg (mode, input);
7370 emit_move_insn (output, mem);
7371 csloop = gen_label_rtx ();
7372 emit_label (csloop);
7373 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, output, mem, output,
7374 input, CCZ1mode));
7377 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
7378 and VAL the value to play with. If AFTER is true then store the value
7379 MEM holds after the operation, if AFTER is false then store the value MEM
7380 holds before the operation. If TARGET is zero then discard that value, else
7381 store it to TARGET. */
7383 void
7384 s390_expand_atomic (machine_mode mode, enum rtx_code code,
7385 rtx target, rtx mem, rtx val, bool after)
7387 struct alignment_context ac;
7388 rtx cmp;
7389 rtx new_rtx = gen_reg_rtx (SImode);
7390 rtx orig = gen_reg_rtx (SImode);
7391 rtx_code_label *csloop = gen_label_rtx ();
7393 gcc_assert (!target || register_operand (target, VOIDmode));
7394 gcc_assert (MEM_P (mem));
7396 init_alignment_context (&ac, mem, mode);
7398 /* Shift val to the correct bit positions.
7399 Preserve "icm", but prevent "ex icm". */
7400 if (!(ac.aligned && code == SET && MEM_P (val)))
7401 val = s390_expand_mask_and_shift (val, mode, ac.shift);
7403 /* Further preparation insns. */
7404 if (code == PLUS || code == MINUS)
7405 emit_move_insn (orig, val);
7406 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
7407 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
7408 NULL_RTX, 1, OPTAB_DIRECT);
7410 /* Load full word. Subsequent loads are performed by CS. */
7411 cmp = force_reg (SImode, ac.memsi);
7413 /* Start CS loop. */
7414 emit_label (csloop);
7415 emit_move_insn (new_rtx, cmp);
7417 /* Patch new with val at correct position. */
7418 switch (code)
7420 case PLUS:
7421 case MINUS:
7422 val = expand_simple_binop (SImode, code, new_rtx, orig,
7423 NULL_RTX, 1, OPTAB_DIRECT);
7424 val = expand_simple_binop (SImode, AND, val, ac.modemask,
7425 NULL_RTX, 1, OPTAB_DIRECT);
7426 /* FALLTHRU */
7427 case SET:
7428 if (ac.aligned && MEM_P (val))
7429 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
7430 0, 0, SImode, val, false);
7431 else
7433 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
7434 NULL_RTX, 1, OPTAB_DIRECT);
7435 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
7436 NULL_RTX, 1, OPTAB_DIRECT);
7438 break;
7439 case AND:
7440 case IOR:
7441 case XOR:
7442 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
7443 NULL_RTX, 1, OPTAB_DIRECT);
7444 break;
7445 case MULT: /* NAND */
7446 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
7447 NULL_RTX, 1, OPTAB_DIRECT);
7448 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
7449 NULL_RTX, 1, OPTAB_DIRECT);
7450 break;
7451 default:
7452 gcc_unreachable ();
7455 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
7456 ac.memsi, cmp, new_rtx,
7457 CCZ1mode));
7459 /* Return the correct part of the bitfield. */
7460 if (target)
7461 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
7462 after ? new_rtx : cmp, ac.shift,
7463 NULL_RTX, 1, OPTAB_DIRECT), 1);
7466 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7467 We need to emit DTP-relative relocations. */
7469 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7471 static void
7472 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
7474 switch (size)
7476 case 4:
7477 fputs ("\t.long\t", file);
7478 break;
7479 case 8:
7480 fputs ("\t.quad\t", file);
7481 break;
7482 default:
7483 gcc_unreachable ();
7485 output_addr_const (file, x);
7486 fputs ("@DTPOFF", file);
7489 /* Return the proper mode for REGNO being represented in the dwarf
7490 unwind table. */
7491 machine_mode
7492 s390_dwarf_frame_reg_mode (int regno)
7494 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
7496 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
7497 if (GENERAL_REGNO_P (regno))
7498 save_mode = Pmode;
7500 /* The rightmost 64 bits of vector registers are call-clobbered. */
7501 if (GET_MODE_SIZE (save_mode) > 8)
7502 save_mode = DImode;
7504 return save_mode;
7507 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7508 /* Implement TARGET_MANGLE_TYPE. */
7510 static const char *
7511 s390_mangle_type (const_tree type)
7513 type = TYPE_MAIN_VARIANT (type);
7515 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
7516 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
7517 return NULL;
7519 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
7520 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
7521 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
7522 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
7524 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
7525 && TARGET_LONG_DOUBLE_128)
7526 return "g";
7528 /* For all other types, use normal C++ mangling. */
7529 return NULL;
7531 #endif
7533 /* In the name of slightly smaller debug output, and to cater to
7534 general assembler lossage, recognize various UNSPEC sequences
7535 and turn them back into a direct symbol reference. */
7537 static rtx
7538 s390_delegitimize_address (rtx orig_x)
7540 rtx x, y;
7542 orig_x = delegitimize_mem_from_attrs (orig_x);
7543 x = orig_x;
7545 /* Extract the symbol ref from:
7546 (plus:SI (reg:SI 12 %r12)
7547 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7548 UNSPEC_GOTOFF/PLTOFF)))
7550 (plus:SI (reg:SI 12 %r12)
7551 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7552 UNSPEC_GOTOFF/PLTOFF)
7553 (const_int 4 [0x4])))) */
7554 if (GET_CODE (x) == PLUS
7555 && REG_P (XEXP (x, 0))
7556 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
7557 && GET_CODE (XEXP (x, 1)) == CONST)
7559 HOST_WIDE_INT offset = 0;
7561 /* The const operand. */
7562 y = XEXP (XEXP (x, 1), 0);
7564 if (GET_CODE (y) == PLUS
7565 && GET_CODE (XEXP (y, 1)) == CONST_INT)
7567 offset = INTVAL (XEXP (y, 1));
7568 y = XEXP (y, 0);
7571 if (GET_CODE (y) == UNSPEC
7572 && (XINT (y, 1) == UNSPEC_GOTOFF
7573 || XINT (y, 1) == UNSPEC_PLTOFF))
7574 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
7577 if (GET_CODE (x) != MEM)
7578 return orig_x;
7580 x = XEXP (x, 0);
7581 if (GET_CODE (x) == PLUS
7582 && GET_CODE (XEXP (x, 1)) == CONST
7583 && GET_CODE (XEXP (x, 0)) == REG
7584 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7586 y = XEXP (XEXP (x, 1), 0);
7587 if (GET_CODE (y) == UNSPEC
7588 && XINT (y, 1) == UNSPEC_GOT)
7589 y = XVECEXP (y, 0, 0);
7590 else
7591 return orig_x;
7593 else if (GET_CODE (x) == CONST)
7595 /* Extract the symbol ref from:
7596 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7597 UNSPEC_PLT/GOTENT))) */
7599 y = XEXP (x, 0);
7600 if (GET_CODE (y) == UNSPEC
7601 && (XINT (y, 1) == UNSPEC_GOTENT
7602 || XINT (y, 1) == UNSPEC_PLT31))
7603 y = XVECEXP (y, 0, 0);
7604 else
7605 return orig_x;
7607 else
7608 return orig_x;
7610 if (GET_MODE (orig_x) != Pmode)
7612 if (GET_MODE (orig_x) == BLKmode)
7613 return orig_x;
7614 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
7615 if (y == NULL_RTX)
7616 return orig_x;
7618 return y;
7621 /* Output operand OP to stdio stream FILE.
7622 OP is an address (register + offset) which is not used to address data;
7623 instead the rightmost bits are interpreted as the value. */
7625 static void
7626 print_addrstyle_operand (FILE *file, rtx op)
7628 HOST_WIDE_INT offset;
7629 rtx base;
7631 /* Extract base register and offset. */
7632 if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
7633 gcc_unreachable ();
7635 /* Sanity check. */
7636 if (base)
7638 gcc_assert (GET_CODE (base) == REG);
7639 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
7640 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
7643 /* Offsets are constricted to twelve bits. */
7644 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
7645 if (base)
7646 fprintf (file, "(%s)", reg_names[REGNO (base)]);
7649 /* Print the shift count operand OP to FILE.
7650 OP is an address-style operand in a form which
7651 s390_valid_shift_count permits. Subregs and no-op
7652 and-masking of the operand are stripped. */
7654 static void
7655 print_shift_count_operand (FILE *file, rtx op)
7657 /* No checking of the and mask required here. */
7658 if (!s390_valid_shift_count (op, 0))
7659 gcc_unreachable ();
7661 while (op && GET_CODE (op) == SUBREG)
7662 op = SUBREG_REG (op);
7664 if (GET_CODE (op) == AND)
7665 op = XEXP (op, 0);
7667 print_addrstyle_operand (file, op);
7670 /* Assigns the number of NOP halfwords to be emitted before and after the
7671 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
7672 If hotpatching is disabled for the function, the values are set to zero.
7675 static void
7676 s390_function_num_hotpatch_hw (tree decl,
7677 int *hw_before,
7678 int *hw_after)
7680 tree attr;
7682 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
7684 /* Handle the arguments of the hotpatch attribute. The values
7685 specified via attribute might override the cmdline argument
7686 values. */
7687 if (attr)
7689 tree args = TREE_VALUE (attr);
7691 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
7692 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
7694 else
7696 /* Use the values specified by the cmdline arguments. */
7697 *hw_before = s390_hotpatch_hw_before_label;
7698 *hw_after = s390_hotpatch_hw_after_label;
7702 /* Write the current .machine and .machinemode specification to the assembler
7703 file. */
7705 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7706 static void
7707 s390_asm_output_machine_for_arch (FILE *asm_out_file)
7709 fprintf (asm_out_file, "\t.machinemode %s\n",
7710 (TARGET_ZARCH) ? "zarch" : "esa");
7711 fprintf (asm_out_file, "\t.machine \"%s",
7712 processor_table[s390_arch].binutils_name);
7713 if (S390_USE_ARCHITECTURE_MODIFIERS)
7715 int cpu_flags;
7717 cpu_flags = processor_flags_table[(int) s390_arch];
7718 if (TARGET_HTM && !(cpu_flags & PF_TX))
7719 fprintf (asm_out_file, "+htm");
7720 else if (!TARGET_HTM && (cpu_flags & PF_TX))
7721 fprintf (asm_out_file, "+nohtm");
7722 if (TARGET_VX && !(cpu_flags & PF_VX))
7723 fprintf (asm_out_file, "+vx");
7724 else if (!TARGET_VX && (cpu_flags & PF_VX))
7725 fprintf (asm_out_file, "+novx");
7727 fprintf (asm_out_file, "\"\n");
7730 /* Write an extra function header before the very start of the function. */
7732 void
7733 s390_asm_output_function_prefix (FILE *asm_out_file,
7734 const char *fnname ATTRIBUTE_UNUSED)
7736 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7737 return;
7738 /* Since only the function specific options are saved but not the indications
7739 which options are set, it's too much work here to figure out which options
7740 have actually changed. Thus, generate .machine and .machinemode whenever a
7741 function has the target attribute or pragma. */
7742 fprintf (asm_out_file, "\t.machinemode push\n");
7743 fprintf (asm_out_file, "\t.machine push\n");
7744 s390_asm_output_machine_for_arch (asm_out_file);
7747 /* Write an extra function footer after the very end of the function. */
7749 void
7750 s390_asm_declare_function_size (FILE *asm_out_file,
7751 const char *fnname, tree decl)
7753 if (!flag_inhibit_size_directive)
7754 ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7755 if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7756 return;
7757 fprintf (asm_out_file, "\t.machine pop\n");
7758 fprintf (asm_out_file, "\t.machinemode pop\n");
7760 #endif
7762 /* Write the extra assembler code needed to declare a function properly. */
7764 void
7765 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
7766 tree decl)
7768 int hw_before, hw_after;
7770 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7771 if (hw_before > 0)
7773 unsigned int function_alignment;
7774 int i;
7776 /* Add a trampoline code area before the function label and initialize it
7777 with two-byte nop instructions. This area can be overwritten with code
7778 that jumps to a patched version of the function. */
7779 asm_fprintf (asm_out_file, "\tnopr\t%%r0"
7780 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7781 hw_before);
7782 for (i = 1; i < hw_before; i++)
7783 fputs ("\tnopr\t%r0\n", asm_out_file);
7785 /* Note: The function label must be aligned so that (a) the bytes of the
7786 following nop do not cross a cacheline boundary, and (b) a jump address
7787 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7788 stored directly before the label without crossing a cacheline
7789 boundary. All this is necessary to make sure the trampoline code can
7790 be changed atomically.
7791 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7792 if there are NOPs before the function label, the alignment is placed
7793 before them. So it is necessary to duplicate the alignment after the
7794 NOPs. */
7795 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7796 if (! DECL_USER_ALIGN (decl))
7797 function_alignment
7798 = MAX (function_alignment,
7799 (unsigned int) align_functions.levels[0].get_value ());
7800 fputs ("\t# alignment for hotpatch\n", asm_out_file);
7801 ASM_OUTPUT_ALIGN (asm_out_file, align_functions.levels[0].log);
7804 if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7806 asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7807 asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7808 asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7809 asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7810 asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7811 asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7812 s390_warn_framesize);
7813 asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7814 asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7815 asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7816 asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7817 asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7818 asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7819 TARGET_PACKED_STACK);
7820 asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7821 asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7822 asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7823 asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7824 s390_warn_dynamicstack_p);
7826 ASM_OUTPUT_LABEL (asm_out_file, fname);
7827 if (hw_after > 0)
7828 asm_fprintf (asm_out_file,
7829 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7830 hw_after);
7833 /* Output machine-dependent UNSPECs occurring in address constant X
7834 in assembler syntax to stdio stream FILE. Returns true if the
7835 constant X could be recognized, false otherwise. */
7837 static bool
7838 s390_output_addr_const_extra (FILE *file, rtx x)
7840 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7841 switch (XINT (x, 1))
7843 case UNSPEC_GOTENT:
7844 output_addr_const (file, XVECEXP (x, 0, 0));
7845 fprintf (file, "@GOTENT");
7846 return true;
7847 case UNSPEC_GOT:
7848 output_addr_const (file, XVECEXP (x, 0, 0));
7849 fprintf (file, "@GOT");
7850 return true;
7851 case UNSPEC_GOTOFF:
7852 output_addr_const (file, XVECEXP (x, 0, 0));
7853 fprintf (file, "@GOTOFF");
7854 return true;
7855 case UNSPEC_PLT31:
7856 output_addr_const (file, XVECEXP (x, 0, 0));
7857 fprintf (file, "@PLT");
7858 return true;
7859 case UNSPEC_PLTOFF:
7860 output_addr_const (file, XVECEXP (x, 0, 0));
7861 fprintf (file, "@PLTOFF");
7862 return true;
7863 case UNSPEC_TLSGD:
7864 output_addr_const (file, XVECEXP (x, 0, 0));
7865 fprintf (file, "@TLSGD");
7866 return true;
7867 case UNSPEC_TLSLDM:
7868 assemble_name (file, get_some_local_dynamic_name ());
7869 fprintf (file, "@TLSLDM");
7870 return true;
7871 case UNSPEC_DTPOFF:
7872 output_addr_const (file, XVECEXP (x, 0, 0));
7873 fprintf (file, "@DTPOFF");
7874 return true;
7875 case UNSPEC_NTPOFF:
7876 output_addr_const (file, XVECEXP (x, 0, 0));
7877 fprintf (file, "@NTPOFF");
7878 return true;
7879 case UNSPEC_GOTNTPOFF:
7880 output_addr_const (file, XVECEXP (x, 0, 0));
7881 fprintf (file, "@GOTNTPOFF");
7882 return true;
7883 case UNSPEC_INDNTPOFF:
7884 output_addr_const (file, XVECEXP (x, 0, 0));
7885 fprintf (file, "@INDNTPOFF");
7886 return true;
7889 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7890 switch (XINT (x, 1))
7892 case UNSPEC_POOL_OFFSET:
7893 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7894 output_addr_const (file, x);
7895 return true;
7897 return false;
7900 /* Output address operand ADDR in assembler syntax to
7901 stdio stream FILE. */
7903 void
7904 print_operand_address (FILE *file, rtx addr)
7906 struct s390_address ad;
7907 memset (&ad, 0, sizeof (s390_address));
7909 if (s390_loadrelative_operand_p (addr, NULL, NULL))
7911 if (!TARGET_Z10)
7913 output_operand_lossage ("symbolic memory references are "
7914 "only supported on z10 or later");
7915 return;
7917 output_addr_const (file, addr);
7918 return;
7921 if (!s390_decompose_address (addr, &ad)
7922 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7923 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7924 output_operand_lossage ("cannot decompose address");
7926 if (ad.disp)
7927 output_addr_const (file, ad.disp);
7928 else
7929 fprintf (file, "0");
7931 if (ad.base && ad.indx)
7932 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7933 reg_names[REGNO (ad.base)]);
7934 else if (ad.base)
7935 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7938 /* Output operand X in assembler syntax to stdio stream FILE.
7939 CODE specified the format flag. The following format flags
7940 are recognized:
7942 'A': On z14 or higher: If operand is a mem print the alignment
7943 hint usable with vl/vst prefixed by a comma.
7944 'C': print opcode suffix for branch condition.
7945 'D': print opcode suffix for inverse branch condition.
7946 'E': print opcode suffix for branch on index instruction.
7947 'G': print the size of the operand in bytes.
7948 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7949 'K': print @PLT suffix for call targets and load address values.
7950 'M': print the second word of a TImode operand.
7951 'N': print the second word of a DImode operand.
7952 'O': print only the displacement of a memory reference or address.
7953 'R': print only the base register of a memory reference or address.
7954 'S': print S-type memory reference (base+displacement).
7955 'Y': print address style operand without index (e.g. shift count or setmem
7956 operand).
7958 'b': print integer X as if it's an unsigned byte.
7959 'c': print integer X as if it's an signed byte.
7960 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7961 'f': "end" contiguous bitmask X in SImode.
7962 'h': print integer X as if it's a signed halfword.
7963 'i': print the first nonzero HImode part of X.
7964 'j': print the first HImode part unequal to -1 of X.
7965 'k': print the first nonzero SImode part of X.
7966 'm': print the first SImode part unequal to -1 of X.
7967 'o': print integer X as if it's an unsigned 32bit word.
7968 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7969 't': CONST_INT: "start" of contiguous bitmask X in SImode.
7970 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7971 'x': print integer X as if it's an unsigned halfword.
7972 'v': print register number as vector register (v1 instead of f1).
7973 'V': print the second word of a TFmode operand as vector register.
7976 void
7977 print_operand (FILE *file, rtx x, int code)
7979 HOST_WIDE_INT ival;
7981 switch (code)
7983 case 'A':
7984 if (TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS && MEM_P (x))
7986 if (MEM_ALIGN (x) >= 128)
7987 fprintf (file, ",4");
7988 else if (MEM_ALIGN (x) == 64)
7989 fprintf (file, ",3");
7991 return;
7992 case 'C':
7993 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7994 return;
7996 case 'D':
7997 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7998 return;
8000 case 'E':
8001 if (GET_CODE (x) == LE)
8002 fprintf (file, "l");
8003 else if (GET_CODE (x) == GT)
8004 fprintf (file, "h");
8005 else
8006 output_operand_lossage ("invalid comparison operator "
8007 "for 'E' output modifier");
8008 return;
8010 case 'J':
8011 if (GET_CODE (x) == SYMBOL_REF)
8013 fprintf (file, "%s", ":tls_load:");
8014 output_addr_const (file, x);
8016 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
8018 fprintf (file, "%s", ":tls_gdcall:");
8019 output_addr_const (file, XVECEXP (x, 0, 0));
8021 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
8023 fprintf (file, "%s", ":tls_ldcall:");
8024 const char *name = get_some_local_dynamic_name ();
8025 gcc_assert (name);
8026 assemble_name (file, name);
8028 else
8029 output_operand_lossage ("invalid reference for 'J' output modifier");
8030 return;
8032 case 'G':
8033 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
8034 return;
8036 case 'O':
8038 struct s390_address ad;
8039 int ret;
8041 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
8043 if (!ret
8044 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
8045 || ad.indx)
8047 output_operand_lossage ("invalid address for 'O' output modifier");
8048 return;
8051 if (ad.disp)
8052 output_addr_const (file, ad.disp);
8053 else
8054 fprintf (file, "0");
8056 return;
8058 case 'R':
8060 struct s390_address ad;
8061 int ret;
8063 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
8065 if (!ret
8066 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
8067 || ad.indx)
8069 output_operand_lossage ("invalid address for 'R' output modifier");
8070 return;
8073 if (ad.base)
8074 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
8075 else
8076 fprintf (file, "0");
8078 return;
8080 case 'S':
8082 struct s390_address ad;
8083 int ret;
8085 if (!MEM_P (x))
8087 output_operand_lossage ("memory reference expected for "
8088 "'S' output modifier");
8089 return;
8091 ret = s390_decompose_address (XEXP (x, 0), &ad);
8093 if (!ret
8094 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
8095 || ad.indx)
8097 output_operand_lossage ("invalid address for 'S' output modifier");
8098 return;
8101 if (ad.disp)
8102 output_addr_const (file, ad.disp);
8103 else
8104 fprintf (file, "0");
8106 if (ad.base)
8107 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
8109 return;
8111 case 'N':
8112 if (GET_CODE (x) == REG)
8113 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
8114 else if (GET_CODE (x) == MEM)
8115 x = change_address (x, VOIDmode,
8116 plus_constant (Pmode, XEXP (x, 0), 4));
8117 else
8118 output_operand_lossage ("register or memory expression expected "
8119 "for 'N' output modifier");
8120 break;
8122 case 'M':
8123 if (GET_CODE (x) == REG)
8124 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
8125 else if (GET_CODE (x) == MEM)
8126 x = change_address (x, VOIDmode,
8127 plus_constant (Pmode, XEXP (x, 0), 8));
8128 else
8129 output_operand_lossage ("register or memory expression expected "
8130 "for 'M' output modifier");
8131 break;
8133 case 'Y':
8134 print_shift_count_operand (file, x);
8135 return;
8137 case 'K':
8138 /* Append @PLT to both local and non-local symbols in order to support
8139 Linux Kernel livepatching: patches contain individual functions and
8140 are loaded further than 2G away from vmlinux, and therefore they must
8141 call even static functions via PLT. ld will optimize @PLT away for
8142 normal code, and keep it for patches.
8144 Do not indiscriminately add @PLT in 31-bit mode due to the %r12
8145 restriction, use UNSPEC_PLT31 instead.
8147 @PLT only makes sense for functions, data is taken care of by
8148 -mno-pic-data-is-text-relative.
8150 Adding @PLT interferes with handling of weak symbols in non-PIC code,
8151 since their addresses are loaded with larl, which then always produces
8152 a non-NULL result, so skip them here as well. */
8153 if (TARGET_64BIT
8154 && GET_CODE (x) == SYMBOL_REF
8155 && SYMBOL_REF_FUNCTION_P (x)
8156 && !(SYMBOL_REF_WEAK (x) && !flag_pic))
8157 fprintf (file, "@PLT");
8158 return;
8161 switch (GET_CODE (x))
8163 case REG:
8164 /* Print FP regs as fx instead of vx when they are accessed
8165 through non-vector mode. */
8166 if ((code == 'v' || code == 'V')
8167 || VECTOR_NOFP_REG_P (x)
8168 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
8169 || (VECTOR_REG_P (x)
8170 && (GET_MODE_SIZE (GET_MODE (x)) /
8171 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
8172 fprintf (file, "%%v%s", reg_names[REGNO (x) + (code == 'V')] + 2);
8173 else
8174 fprintf (file, "%s", reg_names[REGNO (x)]);
8175 break;
8177 case MEM:
8178 output_address (GET_MODE (x), XEXP (x, 0));
8179 break;
8181 case CONST:
8182 case CODE_LABEL:
8183 case LABEL_REF:
8184 case SYMBOL_REF:
8185 output_addr_const (file, x);
8186 break;
8188 case CONST_INT:
8189 ival = INTVAL (x);
8190 switch (code)
8192 case 0:
8193 break;
8194 case 'b':
8195 ival &= 0xff;
8196 break;
8197 case 'c':
8198 ival = ((ival & 0xff) ^ 0x80) - 0x80;
8199 break;
8200 case 'x':
8201 ival &= 0xffff;
8202 break;
8203 case 'h':
8204 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
8205 break;
8206 case 'i':
8207 ival = s390_extract_part (x, HImode, 0);
8208 break;
8209 case 'j':
8210 ival = s390_extract_part (x, HImode, -1);
8211 break;
8212 case 'k':
8213 ival = s390_extract_part (x, SImode, 0);
8214 break;
8215 case 'm':
8216 ival = s390_extract_part (x, SImode, -1);
8217 break;
8218 case 'o':
8219 ival &= 0xffffffff;
8220 break;
8221 case 'e': case 'f':
8222 case 's': case 't':
8224 int start, end;
8225 int len;
8226 bool ok;
8228 len = (code == 's' || code == 'e' ? 64 : 32);
8229 ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
8230 gcc_assert (ok);
8231 if (code == 's' || code == 't')
8232 ival = start;
8233 else
8234 ival = end;
8236 break;
8237 default:
8238 output_operand_lossage ("invalid constant for output modifier '%c'", code);
8240 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8241 break;
8243 case CONST_WIDE_INT:
8244 if (code == 'b')
8245 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8246 CONST_WIDE_INT_ELT (x, 0) & 0xff);
8247 else if (code == 'x')
8248 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8249 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
8250 else if (code == 'h')
8251 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8252 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
8253 else
8255 if (code == 0)
8256 output_operand_lossage ("invalid constant - try using "
8257 "an output modifier");
8258 else
8259 output_operand_lossage ("invalid constant for output modifier '%c'",
8260 code);
8262 break;
8263 case CONST_VECTOR:
8264 switch (code)
8266 case 'h':
8267 gcc_assert (const_vec_duplicate_p (x));
8268 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8269 ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
8270 break;
8271 case 'e':
8272 case 's':
8274 int start, end;
8275 bool ok;
8277 ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
8278 gcc_assert (ok);
8279 ival = (code == 's') ? start : end;
8280 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8282 break;
8283 case 't':
8285 unsigned mask;
8286 bool ok = s390_bytemask_vector_p (x, &mask);
8287 gcc_assert (ok);
8288 fprintf (file, "%u", mask);
8290 break;
8292 default:
8293 output_operand_lossage ("invalid constant vector for output "
8294 "modifier '%c'", code);
8296 break;
8298 default:
8299 if (code == 0)
8300 output_operand_lossage ("invalid expression - try using "
8301 "an output modifier");
8302 else
8303 output_operand_lossage ("invalid expression for output "
8304 "modifier '%c'", code);
8305 break;
8309 /* Target hook for assembling integer objects. We need to define it
8310 here to work a round a bug in some versions of GAS, which couldn't
8311 handle values smaller than INT_MIN when printed in decimal. */
8313 static bool
8314 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
8316 if (size == 8 && aligned_p
8317 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
8319 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
8320 INTVAL (x));
8321 return true;
8323 return default_assemble_integer (x, size, aligned_p);
8326 /* Returns true if register REGNO is used for forming
8327 a memory address in expression X. */
8329 static bool
8330 reg_used_in_mem_p (int regno, rtx x)
8332 enum rtx_code code = GET_CODE (x);
8333 int i, j;
8334 const char *fmt;
8336 if (code == MEM)
8338 if (refers_to_regno_p (regno, XEXP (x, 0)))
8339 return true;
8341 else if (code == SET
8342 && GET_CODE (SET_DEST (x)) == PC)
8344 if (refers_to_regno_p (regno, SET_SRC (x)))
8345 return true;
8348 fmt = GET_RTX_FORMAT (code);
8349 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8351 if (fmt[i] == 'e'
8352 && reg_used_in_mem_p (regno, XEXP (x, i)))
8353 return true;
8355 else if (fmt[i] == 'E')
8356 for (j = 0; j < XVECLEN (x, i); j++)
8357 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
8358 return true;
8360 return false;
8363 /* Returns true if expression DEP_RTX sets an address register
8364 used by instruction INSN to address memory. */
8366 static bool
8367 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
8369 rtx target, pat;
8371 if (NONJUMP_INSN_P (dep_rtx))
8372 dep_rtx = PATTERN (dep_rtx);
8374 if (GET_CODE (dep_rtx) == SET)
8376 target = SET_DEST (dep_rtx);
8377 if (GET_CODE (target) == STRICT_LOW_PART)
8378 target = XEXP (target, 0);
8379 while (GET_CODE (target) == SUBREG)
8380 target = SUBREG_REG (target);
8382 if (GET_CODE (target) == REG)
8384 int regno = REGNO (target);
8386 if (s390_safe_attr_type (insn) == TYPE_LA)
8388 pat = PATTERN (insn);
8389 if (GET_CODE (pat) == PARALLEL)
8391 gcc_assert (XVECLEN (pat, 0) == 2);
8392 pat = XVECEXP (pat, 0, 0);
8394 gcc_assert (GET_CODE (pat) == SET);
8395 return refers_to_regno_p (regno, SET_SRC (pat));
8397 else if (get_attr_atype (insn) == ATYPE_AGEN)
8398 return reg_used_in_mem_p (regno, PATTERN (insn));
8401 return false;
8404 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
8407 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
8409 rtx dep_rtx = PATTERN (dep_insn);
8410 int i;
8412 if (GET_CODE (dep_rtx) == SET
8413 && addr_generation_dependency_p (dep_rtx, insn))
8414 return 1;
8415 else if (GET_CODE (dep_rtx) == PARALLEL)
8417 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
8419 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
8420 return 1;
8423 return 0;
8427 /* A C statement (sans semicolon) to update the integer scheduling priority
8428 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
8429 reduce the priority to execute INSN later. Do not define this macro if
8430 you do not need to adjust the scheduling priorities of insns.
8432 A STD instruction should be scheduled earlier,
8433 in order to use the bypass. */
8434 static int
8435 s390_adjust_priority (rtx_insn *insn, int priority)
8437 if (! INSN_P (insn))
8438 return priority;
8440 if (s390_tune <= PROCESSOR_2064_Z900)
8441 return priority;
8443 switch (s390_safe_attr_type (insn))
8445 case TYPE_FSTOREDF:
8446 case TYPE_FSTORESF:
8447 priority = priority << 3;
8448 break;
8449 case TYPE_STORE:
8450 case TYPE_STM:
8451 priority = priority << 1;
8452 break;
8453 default:
8454 break;
8456 return priority;
8460 /* The number of instructions that can be issued per cycle. */
8462 static int
8463 s390_issue_rate (void)
8465 switch (s390_tune)
8467 case PROCESSOR_2084_Z990:
8468 case PROCESSOR_2094_Z9_109:
8469 case PROCESSOR_2094_Z9_EC:
8470 case PROCESSOR_2817_Z196:
8471 return 3;
8472 case PROCESSOR_2097_Z10:
8473 return 2;
8474 case PROCESSOR_2064_Z900:
8475 /* Starting with EC12 we use the sched_reorder hook to take care
8476 of instruction dispatch constraints. The algorithm only
8477 picks the best instruction and assumes only a single
8478 instruction gets issued per cycle. */
8479 case PROCESSOR_2827_ZEC12:
8480 case PROCESSOR_2964_Z13:
8481 case PROCESSOR_3906_Z14:
8482 case PROCESSOR_ARCH14:
8483 default:
8484 return 1;
8488 static int
8489 s390_first_cycle_multipass_dfa_lookahead (void)
8491 return 4;
8494 static void
8495 annotate_constant_pool_refs_1 (rtx *x)
8497 int i, j;
8498 const char *fmt;
8500 gcc_assert (GET_CODE (*x) != SYMBOL_REF
8501 || !CONSTANT_POOL_ADDRESS_P (*x));
8503 /* Literal pool references can only occur inside a MEM ... */
8504 if (GET_CODE (*x) == MEM)
8506 rtx memref = XEXP (*x, 0);
8508 if (GET_CODE (memref) == SYMBOL_REF
8509 && CONSTANT_POOL_ADDRESS_P (memref))
8511 rtx base = cfun->machine->base_reg;
8512 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
8513 UNSPEC_LTREF);
8515 *x = replace_equiv_address (*x, addr);
8516 return;
8519 if (GET_CODE (memref) == CONST
8520 && GET_CODE (XEXP (memref, 0)) == PLUS
8521 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
8522 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
8523 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
8525 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
8526 rtx sym = XEXP (XEXP (memref, 0), 0);
8527 rtx base = cfun->machine->base_reg;
8528 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8529 UNSPEC_LTREF);
8531 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
8532 return;
8536 /* ... or a load-address type pattern. */
8537 if (GET_CODE (*x) == SET)
8539 rtx addrref = SET_SRC (*x);
8541 if (GET_CODE (addrref) == SYMBOL_REF
8542 && CONSTANT_POOL_ADDRESS_P (addrref))
8544 rtx base = cfun->machine->base_reg;
8545 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
8546 UNSPEC_LTREF);
8548 SET_SRC (*x) = addr;
8549 return;
8552 if (GET_CODE (addrref) == CONST
8553 && GET_CODE (XEXP (addrref, 0)) == PLUS
8554 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
8555 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
8556 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
8558 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
8559 rtx sym = XEXP (XEXP (addrref, 0), 0);
8560 rtx base = cfun->machine->base_reg;
8561 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8562 UNSPEC_LTREF);
8564 SET_SRC (*x) = plus_constant (Pmode, addr, off);
8565 return;
8569 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8570 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8572 if (fmt[i] == 'e')
8574 annotate_constant_pool_refs_1 (&XEXP (*x, i));
8576 else if (fmt[i] == 'E')
8578 for (j = 0; j < XVECLEN (*x, i); j++)
8579 annotate_constant_pool_refs_1 (&XVECEXP (*x, i, j));
8584 /* Annotate every literal pool reference in INSN by an UNSPEC_LTREF expression.
8585 Fix up MEMs as required.
8586 Skip insns which support relative addressing, because they do not use a base
8587 register. */
8589 static void
8590 annotate_constant_pool_refs (rtx_insn *insn)
8592 if (s390_safe_relative_long_p (insn))
8593 return;
8594 annotate_constant_pool_refs_1 (&PATTERN (insn));
8597 static void
8598 find_constant_pool_ref_1 (rtx x, rtx *ref)
8600 int i, j;
8601 const char *fmt;
8603 /* Likewise POOL_ENTRY insns. */
8604 if (GET_CODE (x) == UNSPEC_VOLATILE
8605 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
8606 return;
8608 gcc_assert (GET_CODE (x) != SYMBOL_REF
8609 || !CONSTANT_POOL_ADDRESS_P (x));
8611 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
8613 rtx sym = XVECEXP (x, 0, 0);
8614 gcc_assert (GET_CODE (sym) == SYMBOL_REF
8615 && CONSTANT_POOL_ADDRESS_P (sym));
8617 if (*ref == NULL_RTX)
8618 *ref = sym;
8619 else
8620 gcc_assert (*ref == sym);
8622 return;
8625 fmt = GET_RTX_FORMAT (GET_CODE (x));
8626 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8628 if (fmt[i] == 'e')
8630 find_constant_pool_ref_1 (XEXP (x, i), ref);
8632 else if (fmt[i] == 'E')
8634 for (j = 0; j < XVECLEN (x, i); j++)
8635 find_constant_pool_ref_1 (XVECEXP (x, i, j), ref);
8640 /* Find an annotated literal pool symbol referenced in INSN,
8641 and store it at REF. Will abort if INSN contains references to
8642 more than one such pool symbol; multiple references to the same
8643 symbol are allowed, however.
8645 The rtx pointed to by REF must be initialized to NULL_RTX
8646 by the caller before calling this routine.
8648 Skip insns which support relative addressing, because they do not use a base
8649 register. */
8651 static void
8652 find_constant_pool_ref (rtx_insn *insn, rtx *ref)
8654 if (s390_safe_relative_long_p (insn))
8655 return;
8656 find_constant_pool_ref_1 (PATTERN (insn), ref);
8659 static void
8660 replace_constant_pool_ref_1 (rtx *x, rtx ref, rtx offset)
8662 int i, j;
8663 const char *fmt;
8665 gcc_assert (*x != ref);
8667 if (GET_CODE (*x) == UNSPEC
8668 && XINT (*x, 1) == UNSPEC_LTREF
8669 && XVECEXP (*x, 0, 0) == ref)
8671 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8672 return;
8675 if (GET_CODE (*x) == PLUS
8676 && GET_CODE (XEXP (*x, 1)) == CONST_INT
8677 && GET_CODE (XEXP (*x, 0)) == UNSPEC
8678 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8679 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8681 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8682 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8683 return;
8686 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8687 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8689 if (fmt[i] == 'e')
8691 replace_constant_pool_ref_1 (&XEXP (*x, i), ref, offset);
8693 else if (fmt[i] == 'E')
8695 for (j = 0; j < XVECLEN (*x, i); j++)
8696 replace_constant_pool_ref_1 (&XVECEXP (*x, i, j), ref, offset);
8701 /* Replace every reference to the annotated literal pool
8702 symbol REF in INSN by its base plus OFFSET.
8703 Skip insns which support relative addressing, because they do not use a base
8704 register. */
8706 static void
8707 replace_constant_pool_ref (rtx_insn *insn, rtx ref, rtx offset)
8709 if (s390_safe_relative_long_p (insn))
8710 return;
8711 replace_constant_pool_ref_1 (&PATTERN (insn), ref, offset);
8714 /* We keep a list of constants which we have to add to internal
8715 constant tables in the middle of large functions. */
8717 static machine_mode constant_modes[] =
8719 TFmode, FPRX2mode, TImode, TDmode,
8720 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8721 V4SFmode, V2DFmode, V1TFmode,
8722 DFmode, DImode, DDmode,
8723 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8724 SFmode, SImode, SDmode,
8725 V4QImode, V2HImode, V1SImode, V1SFmode,
8726 HImode,
8727 V2QImode, V1HImode,
8728 QImode,
8729 V1QImode
8731 #define NR_C_MODES (sizeof (constant_modes) / sizeof (constant_modes[0]))
8733 struct constant
8735 struct constant *next;
8736 rtx value;
8737 rtx_code_label *label;
8740 struct constant_pool
8742 struct constant_pool *next;
8743 rtx_insn *first_insn;
8744 rtx_insn *pool_insn;
8745 bitmap insns;
8746 rtx_insn *emit_pool_after;
8748 struct constant *constants[NR_C_MODES];
8749 struct constant *execute;
8750 rtx_code_label *label;
8751 int size;
8754 /* Allocate new constant_pool structure. */
8756 static struct constant_pool *
8757 s390_alloc_pool (void)
8759 struct constant_pool *pool;
8760 size_t i;
8762 pool = (struct constant_pool *) xmalloc (sizeof *pool);
8763 pool->next = NULL;
8764 for (i = 0; i < NR_C_MODES; i++)
8765 pool->constants[i] = NULL;
8767 pool->execute = NULL;
8768 pool->label = gen_label_rtx ();
8769 pool->first_insn = NULL;
8770 pool->pool_insn = NULL;
8771 pool->insns = BITMAP_ALLOC (NULL);
8772 pool->size = 0;
8773 pool->emit_pool_after = NULL;
8775 return pool;
8778 /* Create new constant pool covering instructions starting at INSN
8779 and chain it to the end of POOL_LIST. */
8781 static struct constant_pool *
8782 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8784 struct constant_pool *pool, **prev;
8786 pool = s390_alloc_pool ();
8787 pool->first_insn = insn;
8789 for (prev = pool_list; *prev; prev = &(*prev)->next)
8791 *prev = pool;
8793 return pool;
8796 /* End range of instructions covered by POOL at INSN and emit
8797 placeholder insn representing the pool. */
8799 static void
8800 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8802 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8804 if (!insn)
8805 insn = get_last_insn ();
8807 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8808 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8811 /* Add INSN to the list of insns covered by POOL. */
8813 static void
8814 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8816 bitmap_set_bit (pool->insns, INSN_UID (insn));
8819 /* Return pool out of POOL_LIST that covers INSN. */
8821 static struct constant_pool *
8822 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8824 struct constant_pool *pool;
8826 for (pool = pool_list; pool; pool = pool->next)
8827 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8828 break;
8830 return pool;
8833 /* Add constant VAL of mode MODE to the constant pool POOL. */
8835 static void
8836 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8838 struct constant *c;
8839 size_t i;
8841 for (i = 0; i < NR_C_MODES; i++)
8842 if (constant_modes[i] == mode)
8843 break;
8844 gcc_assert (i != NR_C_MODES);
8846 for (c = pool->constants[i]; c != NULL; c = c->next)
8847 if (rtx_equal_p (val, c->value))
8848 break;
8850 if (c == NULL)
8852 c = (struct constant *) xmalloc (sizeof *c);
8853 c->value = val;
8854 c->label = gen_label_rtx ();
8855 c->next = pool->constants[i];
8856 pool->constants[i] = c;
8857 pool->size += GET_MODE_SIZE (mode);
8861 /* Return an rtx that represents the offset of X from the start of
8862 pool POOL. */
8864 static rtx
8865 s390_pool_offset (struct constant_pool *pool, rtx x)
8867 rtx label;
8869 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8870 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8871 UNSPEC_POOL_OFFSET);
8872 return gen_rtx_CONST (GET_MODE (x), x);
8875 /* Find constant VAL of mode MODE in the constant pool POOL.
8876 Return an RTX describing the distance from the start of
8877 the pool to the location of the new constant. */
8879 static rtx
8880 s390_find_constant (struct constant_pool *pool, rtx val,
8881 machine_mode mode)
8883 struct constant *c;
8884 size_t i;
8886 for (i = 0; i < NR_C_MODES; i++)
8887 if (constant_modes[i] == mode)
8888 break;
8889 gcc_assert (i != NR_C_MODES);
8891 for (c = pool->constants[i]; c != NULL; c = c->next)
8892 if (rtx_equal_p (val, c->value))
8893 break;
8895 gcc_assert (c);
8897 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8900 /* Check whether INSN is an execute. Return the label_ref to its
8901 execute target template if so, NULL_RTX otherwise. */
8903 static rtx
8904 s390_execute_label (rtx insn)
8906 if (INSN_P (insn)
8907 && GET_CODE (PATTERN (insn)) == PARALLEL
8908 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8909 && (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE
8910 || XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE_JUMP))
8912 if (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8913 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8914 else
8916 gcc_assert (JUMP_P (insn));
8917 /* For jump insns as execute target:
8918 - There is one operand less in the parallel (the
8919 modification register of the execute is always 0).
8920 - The execute target label is wrapped into an
8921 if_then_else in order to hide it from jump analysis. */
8922 return XEXP (XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 0), 0);
8926 return NULL_RTX;
8929 /* Find execute target for INSN in the constant pool POOL.
8930 Return an RTX describing the distance from the start of
8931 the pool to the location of the execute target. */
8933 static rtx
8934 s390_find_execute (struct constant_pool *pool, rtx insn)
8936 struct constant *c;
8938 for (c = pool->execute; c != NULL; c = c->next)
8939 if (INSN_UID (insn) == INSN_UID (c->value))
8940 break;
8942 gcc_assert (c);
8944 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8947 /* For an execute INSN, extract the execute target template. */
8949 static rtx
8950 s390_execute_target (rtx insn)
8952 rtx pattern = PATTERN (insn);
8953 gcc_assert (s390_execute_label (insn));
8955 if (XVECLEN (pattern, 0) == 2)
8957 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8959 else
8961 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8962 int i;
8964 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8965 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8967 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8970 return pattern;
8973 /* Indicate that INSN cannot be duplicated. This is the case for
8974 execute insns that carry a unique label. */
8976 static bool
8977 s390_cannot_copy_insn_p (rtx_insn *insn)
8979 rtx label = s390_execute_label (insn);
8980 return label && label != const0_rtx;
8983 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
8984 do not emit the pool base label. */
8986 static void
8987 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8989 struct constant *c;
8990 rtx_insn *insn = pool->pool_insn;
8991 size_t i;
8993 /* Switch to rodata section. */
8994 insn = emit_insn_after (gen_pool_section_start (), insn);
8995 INSN_ADDRESSES_NEW (insn, -1);
8997 /* Ensure minimum pool alignment. */
8998 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8999 INSN_ADDRESSES_NEW (insn, -1);
9001 /* Emit pool base label. */
9002 if (!remote_label)
9004 insn = emit_label_after (pool->label, insn);
9005 INSN_ADDRESSES_NEW (insn, -1);
9008 /* Dump constants in descending alignment requirement order,
9009 ensuring proper alignment for every constant. */
9010 for (i = 0; i < NR_C_MODES; i++)
9011 for (c = pool->constants[i]; c; c = c->next)
9013 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
9014 rtx value = copy_rtx (c->value);
9015 if (GET_CODE (value) == CONST
9016 && GET_CODE (XEXP (value, 0)) == UNSPEC
9017 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
9018 && XVECLEN (XEXP (value, 0), 0) == 1)
9019 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
9021 insn = emit_label_after (c->label, insn);
9022 INSN_ADDRESSES_NEW (insn, -1);
9024 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
9025 gen_rtvec (1, value),
9026 UNSPECV_POOL_ENTRY);
9027 insn = emit_insn_after (value, insn);
9028 INSN_ADDRESSES_NEW (insn, -1);
9031 /* Ensure minimum alignment for instructions. */
9032 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
9033 INSN_ADDRESSES_NEW (insn, -1);
9035 /* Output in-pool execute template insns. */
9036 for (c = pool->execute; c; c = c->next)
9038 insn = emit_label_after (c->label, insn);
9039 INSN_ADDRESSES_NEW (insn, -1);
9041 insn = emit_insn_after (s390_execute_target (c->value), insn);
9042 INSN_ADDRESSES_NEW (insn, -1);
9045 /* Switch back to previous section. */
9046 insn = emit_insn_after (gen_pool_section_end (), insn);
9047 INSN_ADDRESSES_NEW (insn, -1);
9049 insn = emit_barrier_after (insn);
9050 INSN_ADDRESSES_NEW (insn, -1);
9052 /* Remove placeholder insn. */
9053 remove_insn (pool->pool_insn);
9056 /* Free all memory used by POOL. */
9058 static void
9059 s390_free_pool (struct constant_pool *pool)
9061 struct constant *c, *next;
9062 size_t i;
9064 for (i = 0; i < NR_C_MODES; i++)
9065 for (c = pool->constants[i]; c; c = next)
9067 next = c->next;
9068 free (c);
9071 for (c = pool->execute; c; c = next)
9073 next = c->next;
9074 free (c);
9077 BITMAP_FREE (pool->insns);
9078 free (pool);
9082 /* Collect main literal pool. Return NULL on overflow. */
9084 static struct constant_pool *
9085 s390_mainpool_start (void)
9087 struct constant_pool *pool;
9088 rtx_insn *insn;
9090 pool = s390_alloc_pool ();
9092 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9094 if (NONJUMP_INSN_P (insn)
9095 && GET_CODE (PATTERN (insn)) == SET
9096 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
9097 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
9099 /* There might be two main_pool instructions if base_reg
9100 is call-clobbered; one for shrink-wrapped code and one
9101 for the rest. We want to keep the first. */
9102 if (pool->pool_insn)
9104 insn = PREV_INSN (insn);
9105 delete_insn (NEXT_INSN (insn));
9106 continue;
9108 pool->pool_insn = insn;
9111 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9113 rtx pool_ref = NULL_RTX;
9114 find_constant_pool_ref (insn, &pool_ref);
9115 if (pool_ref)
9117 rtx constant = get_pool_constant (pool_ref);
9118 machine_mode mode = get_pool_mode (pool_ref);
9119 s390_add_constant (pool, constant, mode);
9123 /* If hot/cold partitioning is enabled we have to make sure that
9124 the literal pool is emitted in the same section where the
9125 initialization of the literal pool base pointer takes place.
9126 emit_pool_after is only used in the non-overflow case on non
9127 Z cpus where we can emit the literal pool at the end of the
9128 function body within the text section. */
9129 if (NOTE_P (insn)
9130 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
9131 && !pool->emit_pool_after)
9132 pool->emit_pool_after = PREV_INSN (insn);
9135 gcc_assert (pool->pool_insn || pool->size == 0);
9137 if (pool->size >= 4096)
9139 /* We're going to chunkify the pool, so remove the main
9140 pool placeholder insn. */
9141 remove_insn (pool->pool_insn);
9143 s390_free_pool (pool);
9144 pool = NULL;
9147 /* If the functions ends with the section where the literal pool
9148 should be emitted set the marker to its end. */
9149 if (pool && !pool->emit_pool_after)
9150 pool->emit_pool_after = get_last_insn ();
9152 return pool;
9155 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9156 Modify the current function to output the pool constants as well as
9157 the pool register setup instruction. */
9159 static void
9160 s390_mainpool_finish (struct constant_pool *pool)
9162 rtx base_reg = cfun->machine->base_reg;
9163 rtx set;
9164 rtx_insn *insn;
9166 /* If the pool is empty, we're done. */
9167 if (pool->size == 0)
9169 /* We don't actually need a base register after all. */
9170 cfun->machine->base_reg = NULL_RTX;
9172 if (pool->pool_insn)
9173 remove_insn (pool->pool_insn);
9174 s390_free_pool (pool);
9175 return;
9178 /* We need correct insn addresses. */
9179 shorten_branches (get_insns ());
9181 /* Use a LARL to load the pool register. The pool is
9182 located in the .rodata section, so we emit it after the function. */
9183 set = gen_main_base_64 (base_reg, pool->label);
9184 insn = emit_insn_after (set, pool->pool_insn);
9185 INSN_ADDRESSES_NEW (insn, -1);
9186 remove_insn (pool->pool_insn);
9188 insn = get_last_insn ();
9189 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9190 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9192 s390_dump_pool (pool, 0);
9194 /* Replace all literal pool references. */
9196 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
9198 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9200 rtx addr, pool_ref = NULL_RTX;
9201 find_constant_pool_ref (insn, &pool_ref);
9202 if (pool_ref)
9204 if (s390_execute_label (insn))
9205 addr = s390_find_execute (pool, insn);
9206 else
9207 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
9208 get_pool_mode (pool_ref));
9210 replace_constant_pool_ref (insn, pool_ref, addr);
9211 INSN_CODE (insn) = -1;
9217 /* Free the pool. */
9218 s390_free_pool (pool);
9221 /* Chunkify the literal pool. */
9223 #define S390_POOL_CHUNK_MIN 0xc00
9224 #define S390_POOL_CHUNK_MAX 0xe00
9226 static struct constant_pool *
9227 s390_chunkify_start (void)
9229 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
9230 bitmap far_labels;
9231 rtx_insn *insn;
9233 /* We need correct insn addresses. */
9235 shorten_branches (get_insns ());
9237 /* Scan all insns and move literals to pool chunks. */
9239 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9241 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9243 rtx pool_ref = NULL_RTX;
9244 find_constant_pool_ref (insn, &pool_ref);
9245 if (pool_ref)
9247 rtx constant = get_pool_constant (pool_ref);
9248 machine_mode mode = get_pool_mode (pool_ref);
9250 if (!curr_pool)
9251 curr_pool = s390_start_pool (&pool_list, insn);
9253 s390_add_constant (curr_pool, constant, mode);
9254 s390_add_pool_insn (curr_pool, insn);
9258 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
9260 if (curr_pool)
9261 s390_add_pool_insn (curr_pool, insn);
9264 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
9265 continue;
9267 if (!curr_pool
9268 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
9269 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
9270 continue;
9272 if (curr_pool->size < S390_POOL_CHUNK_MAX)
9273 continue;
9275 s390_end_pool (curr_pool, NULL);
9276 curr_pool = NULL;
9279 if (curr_pool)
9280 s390_end_pool (curr_pool, NULL);
9282 /* Find all labels that are branched into
9283 from an insn belonging to a different chunk. */
9285 far_labels = BITMAP_ALLOC (NULL);
9287 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9289 rtx_jump_table_data *table;
9291 /* Labels marked with LABEL_PRESERVE_P can be target
9292 of non-local jumps, so we have to mark them.
9293 The same holds for named labels.
9295 Don't do that, however, if it is the label before
9296 a jump table. */
9298 if (LABEL_P (insn)
9299 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
9301 rtx_insn *vec_insn = NEXT_INSN (insn);
9302 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
9303 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
9305 /* Check potential targets in a table jump (casesi_jump). */
9306 else if (tablejump_p (insn, NULL, &table))
9308 rtx vec_pat = PATTERN (table);
9309 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
9311 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
9313 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
9315 if (s390_find_pool (pool_list, label)
9316 != s390_find_pool (pool_list, insn))
9317 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9320 /* If we have a direct jump (conditional or unconditional),
9321 check all potential targets. */
9322 else if (JUMP_P (insn))
9324 rtx pat = PATTERN (insn);
9326 if (GET_CODE (pat) == PARALLEL)
9327 pat = XVECEXP (pat, 0, 0);
9329 if (GET_CODE (pat) == SET)
9331 rtx label = JUMP_LABEL (insn);
9332 if (label && !ANY_RETURN_P (label))
9334 if (s390_find_pool (pool_list, label)
9335 != s390_find_pool (pool_list, insn))
9336 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9342 /* Insert base register reload insns before every pool. */
9344 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9346 rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
9347 curr_pool->label);
9348 rtx_insn *insn = curr_pool->first_insn;
9349 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
9352 /* Insert base register reload insns at every far label. */
9354 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9355 if (LABEL_P (insn)
9356 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
9358 struct constant_pool *pool = s390_find_pool (pool_list, insn);
9359 if (pool)
9361 rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
9362 pool->label);
9363 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9368 BITMAP_FREE (far_labels);
9371 /* Recompute insn addresses. */
9373 init_insn_lengths ();
9374 shorten_branches (get_insns ());
9376 return pool_list;
9379 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9380 After we have decided to use this list, finish implementing
9381 all changes to the current function as required. */
9383 static void
9384 s390_chunkify_finish (struct constant_pool *pool_list)
9386 struct constant_pool *curr_pool = NULL;
9387 rtx_insn *insn;
9390 /* Replace all literal pool references. */
9392 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9394 curr_pool = s390_find_pool (pool_list, insn);
9395 if (!curr_pool)
9396 continue;
9398 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9400 rtx addr, pool_ref = NULL_RTX;
9401 find_constant_pool_ref (insn, &pool_ref);
9402 if (pool_ref)
9404 if (s390_execute_label (insn))
9405 addr = s390_find_execute (curr_pool, insn);
9406 else
9407 addr = s390_find_constant (curr_pool,
9408 get_pool_constant (pool_ref),
9409 get_pool_mode (pool_ref));
9411 replace_constant_pool_ref (insn, pool_ref, addr);
9412 INSN_CODE (insn) = -1;
9417 /* Dump out all literal pools. */
9419 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9420 s390_dump_pool (curr_pool, 0);
9422 /* Free pool list. */
9424 while (pool_list)
9426 struct constant_pool *next = pool_list->next;
9427 s390_free_pool (pool_list);
9428 pool_list = next;
9432 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9434 void
9435 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9437 switch (GET_MODE_CLASS (mode))
9439 case MODE_FLOAT:
9440 case MODE_DECIMAL_FLOAT:
9441 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9443 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp),
9444 as_a <scalar_float_mode> (mode), align);
9445 break;
9447 case MODE_INT:
9448 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9449 mark_symbol_refs_as_used (exp);
9450 break;
9452 case MODE_VECTOR_INT:
9453 case MODE_VECTOR_FLOAT:
9455 int i;
9456 machine_mode inner_mode;
9457 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9459 inner_mode = GET_MODE_INNER (GET_MODE (exp));
9460 for (i = 0; i < XVECLEN (exp, 0); i++)
9461 s390_output_pool_entry (XVECEXP (exp, 0, i),
9462 inner_mode,
9463 i == 0
9464 ? align
9465 : GET_MODE_BITSIZE (inner_mode));
9467 break;
9469 default:
9470 gcc_unreachable ();
9474 /* Return true if MEM refers to an integer constant in the literal pool. If
9475 VAL is not nullptr, then also fill it with the constant's value. */
9477 bool
9478 s390_const_int_pool_entry_p (rtx mem, HOST_WIDE_INT *val)
9480 /* Try to match the following:
9481 - (mem (unspec [(symbol_ref) (reg)] UNSPEC_LTREF)).
9482 - (mem (symbol_ref)). */
9484 if (!MEM_P (mem))
9485 return false;
9487 rtx addr = XEXP (mem, 0);
9488 rtx sym;
9489 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LTREF)
9490 sym = XVECEXP (addr, 0, 0);
9491 else
9492 sym = addr;
9494 if (!SYMBOL_REF_P (sym) || !CONSTANT_POOL_ADDRESS_P (sym))
9495 return false;
9497 rtx val_rtx = get_pool_constant (sym);
9498 if (!CONST_INT_P (val_rtx))
9499 return false;
9501 if (val != nullptr)
9502 *val = INTVAL (val_rtx);
9503 return true;
9506 /* Return an RTL expression representing the value of the return address
9507 for the frame COUNT steps up from the current frame. FRAME is the
9508 frame pointer of that frame. */
9511 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9513 int offset;
9514 rtx addr;
9516 /* Without backchain, we fail for all but the current frame. */
9518 if (!TARGET_BACKCHAIN && count > 0)
9519 return NULL_RTX;
9521 /* For the current frame, we need to make sure the initial
9522 value of RETURN_REGNUM is actually saved. */
9524 if (count == 0)
9525 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9527 if (TARGET_PACKED_STACK)
9528 offset = -2 * UNITS_PER_LONG;
9529 else
9530 offset = RETURN_REGNUM * UNITS_PER_LONG;
9532 addr = plus_constant (Pmode, frame, offset);
9533 addr = memory_address (Pmode, addr);
9534 return gen_rtx_MEM (Pmode, addr);
9537 /* Return an RTL expression representing the back chain stored in
9538 the current stack frame. */
9541 s390_back_chain_rtx (void)
9543 rtx chain;
9545 gcc_assert (TARGET_BACKCHAIN);
9547 if (TARGET_PACKED_STACK)
9548 chain = plus_constant (Pmode, stack_pointer_rtx,
9549 STACK_POINTER_OFFSET - UNITS_PER_LONG);
9550 else
9551 chain = stack_pointer_rtx;
9553 chain = gen_rtx_MEM (Pmode, chain);
9554 return chain;
9557 /* Find first call clobbered register unused in a function.
9558 This could be used as base register in a leaf function
9559 or for holding the return address before epilogue. */
9561 static int
9562 find_unused_clobbered_reg (void)
9564 int i;
9565 for (i = 0; i < 6; i++)
9566 if (!df_regs_ever_live_p (i))
9567 return i;
9568 return 0;
9572 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9573 clobbered hard regs in SETREG. */
9575 static void
9576 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9578 char *regs_ever_clobbered = (char *)data;
9579 unsigned int i, regno;
9580 machine_mode mode = GET_MODE (setreg);
9582 if (GET_CODE (setreg) == SUBREG)
9584 rtx inner = SUBREG_REG (setreg);
9585 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9586 return;
9587 regno = subreg_regno (setreg);
9589 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9590 regno = REGNO (setreg);
9591 else
9592 return;
9594 for (i = regno;
9595 i < end_hard_regno (mode, regno);
9596 i++)
9597 regs_ever_clobbered[i] = 1;
9600 /* Walks through all basic blocks of the current function looking
9601 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9602 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9603 each of those regs. */
9605 static void
9606 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9608 basic_block cur_bb;
9609 rtx_insn *cur_insn;
9610 unsigned int i;
9612 memset (regs_ever_clobbered, 0, 32);
9614 /* For non-leaf functions we have to consider all call clobbered regs to be
9615 clobbered. */
9616 if (!crtl->is_leaf)
9618 for (i = 0; i < 32; i++)
9619 regs_ever_clobbered[i] = call_used_regs[i];
9622 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9623 this work is done by liveness analysis (mark_regs_live_at_end).
9624 Special care is needed for functions containing landing pads. Landing pads
9625 may use the eh registers, but the code which sets these registers is not
9626 contained in that function. Hence s390_regs_ever_clobbered is not able to
9627 deal with this automatically. */
9628 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9629 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9630 if (crtl->calls_eh_return
9631 || (cfun->machine->has_landing_pad_p
9632 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9633 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9635 /* For nonlocal gotos all call-saved registers have to be saved.
9636 This flag is also set for the unwinding code in libgcc.
9637 See expand_builtin_unwind_init. For regs_ever_live this is done by
9638 reload. */
9639 if (crtl->saves_all_registers)
9640 for (i = 0; i < 32; i++)
9641 if (!call_used_regs[i])
9642 regs_ever_clobbered[i] = 1;
9644 FOR_EACH_BB_FN (cur_bb, cfun)
9646 FOR_BB_INSNS (cur_bb, cur_insn)
9648 rtx pat;
9650 if (!INSN_P (cur_insn))
9651 continue;
9653 pat = PATTERN (cur_insn);
9655 /* Ignore GPR restore insns. */
9656 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9658 if (GET_CODE (pat) == SET
9659 && GENERAL_REG_P (SET_DEST (pat)))
9661 /* lgdr */
9662 if (GET_MODE (SET_SRC (pat)) == DImode
9663 && FP_REG_P (SET_SRC (pat)))
9664 continue;
9666 /* l / lg */
9667 if (GET_CODE (SET_SRC (pat)) == MEM)
9668 continue;
9671 /* lm / lmg */
9672 if (GET_CODE (pat) == PARALLEL
9673 && load_multiple_operation (pat, VOIDmode))
9674 continue;
9677 note_stores (cur_insn,
9678 s390_reg_clobbered_rtx,
9679 regs_ever_clobbered);
9684 /* Determine the frame area which actually has to be accessed
9685 in the function epilogue. The values are stored at the
9686 given pointers AREA_BOTTOM (address of the lowest used stack
9687 address) and AREA_TOP (address of the first item which does
9688 not belong to the stack frame). */
9690 static void
9691 s390_frame_area (int *area_bottom, int *area_top)
9693 int b, t;
9695 b = INT_MAX;
9696 t = INT_MIN;
9698 if (cfun_frame_layout.first_restore_gpr != -1)
9700 b = (cfun_frame_layout.gprs_offset
9701 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9702 t = b + (cfun_frame_layout.last_restore_gpr
9703 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9706 if (TARGET_64BIT && cfun_save_high_fprs_p)
9708 b = MIN (b, cfun_frame_layout.f8_offset);
9709 t = MAX (t, (cfun_frame_layout.f8_offset
9710 + cfun_frame_layout.high_fprs * 8));
9713 if (!TARGET_64BIT)
9715 if (cfun_fpr_save_p (FPR4_REGNUM))
9717 b = MIN (b, cfun_frame_layout.f4_offset);
9718 t = MAX (t, cfun_frame_layout.f4_offset + 8);
9720 if (cfun_fpr_save_p (FPR6_REGNUM))
9722 b = MIN (b, cfun_frame_layout.f4_offset + 8);
9723 t = MAX (t, cfun_frame_layout.f4_offset + 16);
9726 *area_bottom = b;
9727 *area_top = t;
9729 /* Update gpr_save_slots in the frame layout trying to make use of
9730 FPRs as GPR save slots.
9731 This is a helper routine of s390_register_info. */
9733 static void
9734 s390_register_info_gprtofpr ()
9736 int save_reg_slot = FPR0_REGNUM;
9737 int i, j;
9739 if (TARGET_TPF || !TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9740 return;
9742 /* builtin_eh_return needs to be able to modify the return address
9743 on the stack. It could also adjust the FPR save slot instead but
9744 is it worth the trouble?! */
9745 if (crtl->calls_eh_return)
9746 return;
9748 for (i = 15; i >= 6; i--)
9750 if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9751 continue;
9753 /* Advance to the next FP register which can be used as a
9754 GPR save slot. */
9755 while ((!call_used_regs[save_reg_slot]
9756 || df_regs_ever_live_p (save_reg_slot)
9757 || cfun_fpr_save_p (save_reg_slot))
9758 && FP_REGNO_P (save_reg_slot))
9759 save_reg_slot++;
9760 if (!FP_REGNO_P (save_reg_slot))
9762 /* We only want to use ldgr/lgdr if we can get rid of
9763 stm/lm entirely. So undo the gpr slot allocation in
9764 case we ran out of FPR save slots. */
9765 for (j = 6; j <= 15; j++)
9766 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9767 cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9768 break;
9770 cfun_gpr_save_slot (i) = save_reg_slot++;
9774 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9775 stdarg.
9776 This is a helper routine for s390_register_info. */
9778 static void
9779 s390_register_info_stdarg_fpr ()
9781 int i;
9782 int min_fpr;
9783 int max_fpr;
9785 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9786 f0-f4 for 64 bit. */
9787 if (!cfun->stdarg
9788 || !TARGET_HARD_FLOAT
9789 || !cfun->va_list_fpr_size
9790 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9791 return;
9793 min_fpr = crtl->args.info.fprs;
9794 max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9795 if (max_fpr >= FP_ARG_NUM_REG)
9796 max_fpr = FP_ARG_NUM_REG - 1;
9798 /* FPR argument regs start at f0. */
9799 min_fpr += FPR0_REGNUM;
9800 max_fpr += FPR0_REGNUM;
9802 for (i = min_fpr; i <= max_fpr; i++)
9803 cfun_set_fpr_save (i);
9806 /* Reserve the GPR save slots for GPRs which need to be saved due to
9807 stdarg.
9808 This is a helper routine for s390_register_info. */
9810 static void
9811 s390_register_info_stdarg_gpr ()
9813 int i;
9814 int min_gpr;
9815 int max_gpr;
9817 if (!cfun->stdarg
9818 || !cfun->va_list_gpr_size
9819 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9820 return;
9822 min_gpr = crtl->args.info.gprs;
9823 max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9824 if (max_gpr >= GP_ARG_NUM_REG)
9825 max_gpr = GP_ARG_NUM_REG - 1;
9827 /* GPR argument regs start at r2. */
9828 min_gpr += GPR2_REGNUM;
9829 max_gpr += GPR2_REGNUM;
9831 /* If r6 was supposed to be saved into an FPR and now needs to go to
9832 the stack for vararg we have to adjust the restore range to make
9833 sure that the restore is done from stack as well. */
9834 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9835 && min_gpr <= GPR6_REGNUM
9836 && max_gpr >= GPR6_REGNUM)
9838 if (cfun_frame_layout.first_restore_gpr == -1
9839 || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9840 cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9841 if (cfun_frame_layout.last_restore_gpr == -1
9842 || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9843 cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9846 if (cfun_frame_layout.first_save_gpr == -1
9847 || cfun_frame_layout.first_save_gpr > min_gpr)
9848 cfun_frame_layout.first_save_gpr = min_gpr;
9850 if (cfun_frame_layout.last_save_gpr == -1
9851 || cfun_frame_layout.last_save_gpr < max_gpr)
9852 cfun_frame_layout.last_save_gpr = max_gpr;
9854 for (i = min_gpr; i <= max_gpr; i++)
9855 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9858 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9859 prologue and epilogue. */
9861 static void
9862 s390_register_info_set_ranges ()
9864 int i, j;
9866 /* Find the first and the last save slot supposed to use the stack
9867 to set the restore range.
9868 Vararg regs might be marked as save to stack but only the
9869 call-saved regs really need restoring (i.e. r6). This code
9870 assumes that the vararg regs have not yet been recorded in
9871 cfun_gpr_save_slot. */
9872 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
9873 for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
9874 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9875 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9876 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9877 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9880 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9881 for registers which need to be saved in function prologue.
9882 This function can be used until the insns emitted for save/restore
9883 of the regs are visible in the RTL stream. */
9885 static void
9886 s390_register_info ()
9888 int i;
9889 char clobbered_regs[32];
9891 gcc_assert (!epilogue_completed);
9893 if (reload_completed)
9894 /* After reload we rely on our own routine to determine which
9895 registers need saving. */
9896 s390_regs_ever_clobbered (clobbered_regs);
9897 else
9898 /* During reload we use regs_ever_live as a base since reload
9899 does changes in there which we otherwise would not be aware
9900 of. */
9901 for (i = 0; i < 32; i++)
9902 clobbered_regs[i] = df_regs_ever_live_p (i);
9904 for (i = 0; i < 32; i++)
9905 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9907 /* Mark the call-saved FPRs which need to be saved.
9908 This needs to be done before checking the special GPRs since the
9909 stack pointer usage depends on whether high FPRs have to be saved
9910 or not. */
9911 cfun_frame_layout.fpr_bitmap = 0;
9912 cfun_frame_layout.high_fprs = 0;
9913 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9914 if (clobbered_regs[i] && !call_used_regs[i])
9916 cfun_set_fpr_save (i);
9917 if (i >= FPR8_REGNUM)
9918 cfun_frame_layout.high_fprs++;
9921 /* Register 12 is used for GOT address, but also as temp in prologue
9922 for split-stack stdarg functions (unless r14 is available). */
9923 clobbered_regs[12]
9924 |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
9925 || (flag_split_stack && cfun->stdarg
9926 && (crtl->is_leaf || TARGET_TPF_PROFILING
9927 || has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
9929 clobbered_regs[BASE_REGNUM]
9930 |= (cfun->machine->base_reg
9931 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9933 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9934 |= !!frame_pointer_needed;
9936 /* On pre z900 machines this might take until machine dependent
9937 reorg to decide.
9938 save_return_addr_p will only be set on non-zarch machines so
9939 there is no risk that r14 goes into an FPR instead of a stack
9940 slot. */
9941 clobbered_regs[RETURN_REGNUM]
9942 |= (!crtl->is_leaf
9943 || TARGET_TPF_PROFILING
9944 || cfun_frame_layout.save_return_addr_p
9945 || crtl->calls_eh_return);
9947 clobbered_regs[STACK_POINTER_REGNUM]
9948 |= (!crtl->is_leaf
9949 || TARGET_TPF_PROFILING
9950 || cfun_save_high_fprs_p
9951 || get_frame_size () > 0
9952 || (reload_completed && cfun_frame_layout.frame_size > 0)
9953 || cfun->calls_alloca);
9955 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
9957 for (i = 6; i < 16; i++)
9958 if (clobbered_regs[i])
9959 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9961 s390_register_info_stdarg_fpr ();
9962 s390_register_info_gprtofpr ();
9963 s390_register_info_set_ranges ();
9964 /* stdarg functions might need to save GPRs 2 to 6. This might
9965 override the GPR->FPR save decision made by
9966 s390_register_info_gprtofpr for r6 since vararg regs must go to
9967 the stack. */
9968 s390_register_info_stdarg_gpr ();
9971 /* Return true if REGNO is a global register, but not one
9972 of the special ones that need to be saved/restored in anyway. */
9974 static inline bool
9975 global_not_special_regno_p (int regno)
9977 return (global_regs[regno]
9978 /* These registers are special and need to be
9979 restored in any case. */
9980 && !(regno == STACK_POINTER_REGNUM
9981 || regno == RETURN_REGNUM
9982 || regno == BASE_REGNUM
9983 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
9986 /* This function is called by s390_optimize_prologue in order to get
9987 rid of unnecessary GPR save/restore instructions. The register info
9988 for the GPRs is re-computed and the ranges are re-calculated. */
9990 static void
9991 s390_optimize_register_info ()
9993 char clobbered_regs[32];
9994 int i;
9996 gcc_assert (epilogue_completed);
9998 s390_regs_ever_clobbered (clobbered_regs);
10000 /* Global registers do not need to be saved and restored unless it
10001 is one of our special regs. (r12, r13, r14, or r15). */
10002 for (i = 0; i < 32; i++)
10003 clobbered_regs[i] = clobbered_regs[i] && !global_not_special_regno_p (i);
10005 /* There is still special treatment needed for cases invisible to
10006 s390_regs_ever_clobbered. */
10007 clobbered_regs[RETURN_REGNUM]
10008 |= (TARGET_TPF_PROFILING
10009 /* When expanding builtin_return_addr in ESA mode we do not
10010 know whether r14 will later be needed as scratch reg when
10011 doing branch splitting. So the builtin always accesses the
10012 r14 save slot and we need to stick to the save/restore
10013 decision for r14 even if it turns out that it didn't get
10014 clobbered. */
10015 || cfun_frame_layout.save_return_addr_p
10016 || crtl->calls_eh_return);
10018 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
10020 for (i = 6; i < 16; i++)
10021 if (!clobbered_regs[i])
10022 cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
10024 s390_register_info_set_ranges ();
10025 s390_register_info_stdarg_gpr ();
10028 /* Fill cfun->machine with info about frame of current function. */
10030 static void
10031 s390_frame_info (void)
10033 HOST_WIDE_INT lowest_offset;
10035 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
10036 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
10038 /* The va_arg builtin uses a constant distance of 16 *
10039 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
10040 pointer. So even if we are going to save the stack pointer in an
10041 FPR we need the stack space in order to keep the offsets
10042 correct. */
10043 if (cfun->stdarg && cfun_save_arg_fprs_p)
10045 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
10047 if (cfun_frame_layout.first_save_gpr_slot == -1)
10048 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
10051 cfun_frame_layout.frame_size = get_frame_size ();
10052 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
10053 fatal_error (input_location,
10054 "total size of local variables exceeds architecture limit");
10056 if (!TARGET_PACKED_STACK)
10058 /* Fixed stack layout. */
10059 cfun_frame_layout.backchain_offset = 0;
10060 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
10061 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
10062 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
10063 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
10064 * UNITS_PER_LONG);
10066 else if (TARGET_BACKCHAIN)
10068 /* Kernel stack layout - packed stack, backchain, no float */
10069 gcc_assert (TARGET_SOFT_FLOAT);
10070 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
10071 - UNITS_PER_LONG);
10073 /* The distance between the backchain and the return address
10074 save slot must not change. So we always need a slot for the
10075 stack pointer which resides in between. */
10076 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
10078 cfun_frame_layout.gprs_offset
10079 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
10081 /* FPRs will not be saved. Nevertheless pick sane values to
10082 keep area calculations valid. */
10083 cfun_frame_layout.f0_offset =
10084 cfun_frame_layout.f4_offset =
10085 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
10087 else
10089 int num_fprs;
10091 /* Packed stack layout without backchain. */
10093 /* With stdarg FPRs need their dedicated slots. */
10094 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
10095 : (cfun_fpr_save_p (FPR4_REGNUM) +
10096 cfun_fpr_save_p (FPR6_REGNUM)));
10097 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
10099 num_fprs = (cfun->stdarg ? 2
10100 : (cfun_fpr_save_p (FPR0_REGNUM)
10101 + cfun_fpr_save_p (FPR2_REGNUM)));
10102 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
10104 cfun_frame_layout.gprs_offset
10105 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
10107 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
10108 - cfun_frame_layout.high_fprs * 8);
10111 if (cfun_save_high_fprs_p)
10112 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
10114 if (!crtl->is_leaf)
10115 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
10117 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
10118 sized area at the bottom of the stack. This is required also for
10119 leaf functions. When GCC generates a local stack reference it
10120 will always add STACK_POINTER_OFFSET to all these references. */
10121 if (crtl->is_leaf
10122 && !TARGET_TPF_PROFILING
10123 && cfun_frame_layout.frame_size == 0
10124 && !cfun->calls_alloca)
10125 return;
10127 /* Calculate the number of bytes we have used in our own register
10128 save area. With the packed stack layout we can re-use the
10129 remaining bytes for normal stack elements. */
10131 if (TARGET_PACKED_STACK)
10132 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
10133 cfun_frame_layout.f4_offset),
10134 cfun_frame_layout.gprs_offset);
10135 else
10136 lowest_offset = 0;
10138 if (TARGET_BACKCHAIN)
10139 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
10141 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
10143 /* If under 31 bit an odd number of gprs has to be saved we have to
10144 adjust the frame size to sustain 8 byte alignment of stack
10145 frames. */
10146 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
10147 STACK_BOUNDARY / BITS_PER_UNIT - 1)
10148 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
10151 /* Generate frame layout. Fills in register and frame data for the current
10152 function in cfun->machine. This routine can be called multiple times;
10153 it will re-do the complete frame layout every time. */
10155 static void
10156 s390_init_frame_layout (void)
10158 HOST_WIDE_INT frame_size;
10159 int base_used;
10161 /* After LRA the frame layout is supposed to be read-only and should
10162 not be re-computed. */
10163 if (reload_completed)
10164 return;
10168 frame_size = cfun_frame_layout.frame_size;
10170 /* Try to predict whether we'll need the base register. */
10171 base_used = crtl->uses_const_pool
10172 || (!DISP_IN_RANGE (frame_size)
10173 && !CONST_OK_FOR_K (frame_size));
10175 /* Decide which register to use as literal pool base. In small
10176 leaf functions, try to use an unused call-clobbered register
10177 as base register to avoid save/restore overhead. */
10178 if (!base_used)
10179 cfun->machine->base_reg = NULL_RTX;
10180 else
10182 int br = 0;
10184 if (crtl->is_leaf)
10185 /* Prefer r5 (most likely to be free). */
10186 for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
10188 cfun->machine->base_reg =
10189 gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
10192 s390_register_info ();
10193 s390_frame_info ();
10195 while (frame_size != cfun_frame_layout.frame_size);
10198 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
10199 the TX is nonescaping. A transaction is considered escaping if
10200 there is at least one path from tbegin returning CC0 to the
10201 function exit block without an tend.
10203 The check so far has some limitations:
10204 - only single tbegin/tend BBs are supported
10205 - the first cond jump after tbegin must separate the CC0 path from ~CC0
10206 - when CC is copied to a GPR and the CC0 check is done with the GPR
10207 this is not supported
10210 static void
10211 s390_optimize_nonescaping_tx (void)
10213 const unsigned int CC0 = 1 << 3;
10214 basic_block tbegin_bb = NULL;
10215 basic_block tend_bb = NULL;
10216 basic_block bb;
10217 rtx_insn *insn;
10218 bool result = true;
10219 int bb_index;
10220 rtx_insn *tbegin_insn = NULL;
10222 if (!cfun->machine->tbegin_p)
10223 return;
10225 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
10227 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
10229 if (!bb)
10230 continue;
10232 FOR_BB_INSNS (bb, insn)
10234 rtx ite, cc, pat, target;
10235 unsigned HOST_WIDE_INT mask;
10237 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
10238 continue;
10240 pat = PATTERN (insn);
10242 if (GET_CODE (pat) == PARALLEL)
10243 pat = XVECEXP (pat, 0, 0);
10245 if (GET_CODE (pat) != SET
10246 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
10247 continue;
10249 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
10251 rtx_insn *tmp;
10253 tbegin_insn = insn;
10255 /* Just return if the tbegin doesn't have clobbers. */
10256 if (GET_CODE (PATTERN (insn)) != PARALLEL)
10257 return;
10259 if (tbegin_bb != NULL)
10260 return;
10262 /* Find the next conditional jump. */
10263 for (tmp = NEXT_INSN (insn);
10264 tmp != NULL_RTX;
10265 tmp = NEXT_INSN (tmp))
10267 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
10268 return;
10269 if (!JUMP_P (tmp))
10270 continue;
10272 ite = SET_SRC (PATTERN (tmp));
10273 if (GET_CODE (ite) != IF_THEN_ELSE)
10274 continue;
10276 cc = XEXP (XEXP (ite, 0), 0);
10277 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
10278 || GET_MODE (cc) != CCRAWmode
10279 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
10280 return;
10282 if (bb->succs->length () != 2)
10283 return;
10285 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
10286 if (GET_CODE (XEXP (ite, 0)) == NE)
10287 mask ^= 0xf;
10289 if (mask == CC0)
10290 target = XEXP (ite, 1);
10291 else if (mask == (CC0 ^ 0xf))
10292 target = XEXP (ite, 2);
10293 else
10294 return;
10297 edge_iterator ei;
10298 edge e1, e2;
10300 ei = ei_start (bb->succs);
10301 e1 = ei_safe_edge (ei);
10302 ei_next (&ei);
10303 e2 = ei_safe_edge (ei);
10305 if (e2->flags & EDGE_FALLTHRU)
10307 e2 = e1;
10308 e1 = ei_safe_edge (ei);
10311 if (!(e1->flags & EDGE_FALLTHRU))
10312 return;
10314 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
10316 if (tmp == BB_END (bb))
10317 break;
10321 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
10323 if (tend_bb != NULL)
10324 return;
10325 tend_bb = bb;
10330 /* Either we successfully remove the FPR clobbers here or we are not
10331 able to do anything for this TX. Both cases don't qualify for
10332 another look. */
10333 cfun->machine->tbegin_p = false;
10335 if (tbegin_bb == NULL || tend_bb == NULL)
10336 return;
10338 calculate_dominance_info (CDI_POST_DOMINATORS);
10339 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
10340 free_dominance_info (CDI_POST_DOMINATORS);
10342 if (!result)
10343 return;
10345 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10346 gen_rtvec (2,
10347 XVECEXP (PATTERN (tbegin_insn), 0, 0),
10348 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10349 INSN_CODE (tbegin_insn) = -1;
10350 df_insn_rescan (tbegin_insn);
10352 return;
10355 /* Implement TARGET_HARD_REGNO_NREGS. Because all registers in a class
10356 have the same size, this is equivalent to CLASS_MAX_NREGS. */
10358 static unsigned int
10359 s390_hard_regno_nregs (unsigned int regno, machine_mode mode)
10361 return s390_class_max_nregs (REGNO_REG_CLASS (regno), mode);
10364 /* Implement TARGET_HARD_REGNO_MODE_OK.
10366 Integer modes <= word size fit into any GPR.
10367 Integer modes > word size fit into successive GPRs, starting with
10368 an even-numbered register.
10369 SImode and DImode fit into FPRs as well.
10371 Floating point modes <= word size fit into any FPR or GPR.
10372 Floating point modes > word size (i.e. DFmode on 32-bit) fit
10373 into any FPR, or an even-odd GPR pair.
10374 TFmode fits only into an even-odd FPR pair.
10376 Complex floating point modes fit either into two FPRs, or into
10377 successive GPRs (again starting with an even number).
10378 TCmode fits only into two successive even-odd FPR pairs.
10380 Condition code modes fit only into the CC register. */
10382 static bool
10383 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10385 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10386 return false;
10388 switch (REGNO_REG_CLASS (regno))
10390 case VEC_REGS:
10391 return ((GET_MODE_CLASS (mode) == MODE_INT
10392 && s390_class_max_nregs (VEC_REGS, mode) == 1)
10393 || mode == DFmode
10394 || (TARGET_VXE && mode == SFmode)
10395 || s390_vector_mode_supported_p (mode));
10396 break;
10397 case FP_REGS:
10398 if (TARGET_VX
10399 && ((GET_MODE_CLASS (mode) == MODE_INT
10400 && s390_class_max_nregs (FP_REGS, mode) == 1)
10401 || mode == DFmode
10402 || s390_vector_mode_supported_p (mode)))
10403 return true;
10405 if (REGNO_PAIR_OK (regno, mode))
10407 if (mode == SImode || mode == DImode)
10408 return true;
10410 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10411 return true;
10413 break;
10414 case ADDR_REGS:
10415 if (FRAME_REGNO_P (regno) && mode == Pmode)
10416 return true;
10418 /* fallthrough */
10419 case GENERAL_REGS:
10420 if (REGNO_PAIR_OK (regno, mode))
10422 if (TARGET_ZARCH
10423 || (mode != TFmode && mode != TCmode && mode != TDmode))
10424 return true;
10426 break;
10427 case CC_REGS:
10428 if (GET_MODE_CLASS (mode) == MODE_CC)
10429 return true;
10430 break;
10431 case ACCESS_REGS:
10432 if (REGNO_PAIR_OK (regno, mode))
10434 if (mode == SImode || mode == Pmode)
10435 return true;
10437 break;
10438 default:
10439 return false;
10442 return false;
10445 /* Implement TARGET_MODES_TIEABLE_P. */
10447 static bool
10448 s390_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10450 return ((mode1 == SFmode || mode1 == DFmode)
10451 == (mode2 == SFmode || mode2 == DFmode));
10454 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10456 bool
10457 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10459 /* Once we've decided upon a register to use as base register, it must
10460 no longer be used for any other purpose. */
10461 if (cfun->machine->base_reg)
10462 if (REGNO (cfun->machine->base_reg) == old_reg
10463 || REGNO (cfun->machine->base_reg) == new_reg)
10464 return false;
10466 /* Prevent regrename from using call-saved regs which haven't
10467 actually been saved. This is necessary since regrename assumes
10468 the backend save/restore decisions are based on
10469 df_regs_ever_live. Since we have our own routine we have to tell
10470 regrename manually about it. */
10471 if (GENERAL_REGNO_P (new_reg)
10472 && !call_used_regs[new_reg]
10473 && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10474 return false;
10476 return true;
10479 /* Return nonzero if register REGNO can be used as a scratch register
10480 in peephole2. */
10482 static bool
10483 s390_hard_regno_scratch_ok (unsigned int regno)
10485 /* See s390_hard_regno_rename_ok. */
10486 if (GENERAL_REGNO_P (regno)
10487 && !call_used_regs[regno]
10488 && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10489 return false;
10491 return true;
10494 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. When generating
10495 code that runs in z/Architecture mode, but conforms to the 31-bit
10496 ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
10497 bytes are saved across calls, however. */
10499 static bool
10500 s390_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
10501 machine_mode mode)
10503 /* For r12 we know that the only bits we actually care about are
10504 preserved across function calls. Since r12 is a fixed reg all
10505 accesses to r12 are generated by the backend.
10507 This workaround is necessary until gcse implements proper
10508 tracking of partially clobbered registers. */
10509 if (!TARGET_64BIT
10510 && TARGET_ZARCH
10511 && GET_MODE_SIZE (mode) > 4
10512 && (!flag_pic || regno != PIC_OFFSET_TABLE_REGNUM)
10513 && ((regno >= 6 && regno <= 15) || regno == 32))
10514 return true;
10516 if (TARGET_VX
10517 && GET_MODE_SIZE (mode) > 8
10518 && (((TARGET_64BIT && regno >= 24 && regno <= 31))
10519 || (!TARGET_64BIT && (regno == 18 || regno == 19))))
10520 return true;
10522 return false;
10525 /* Maximum number of registers to represent a value of mode MODE
10526 in a register of class RCLASS. */
10529 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10531 int reg_size;
10532 bool reg_pair_required_p = false;
10534 switch (rclass)
10536 case FP_REGS:
10537 case VEC_REGS:
10538 reg_size = TARGET_VX ? 16 : 8;
10540 /* TF and TD modes would fit into a VR but we put them into a
10541 register pair since we do not have 128bit FP instructions on
10542 full VRs. */
10543 if (TARGET_VX
10544 && SCALAR_FLOAT_MODE_P (mode)
10545 && GET_MODE_SIZE (mode) >= 16
10546 && !(TARGET_VXE && mode == TFmode))
10547 reg_pair_required_p = true;
10549 /* Even if complex types would fit into a single FPR/VR we force
10550 them into a register pair to deal with the parts more easily.
10551 (FIXME: What about complex ints?) */
10552 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10553 reg_pair_required_p = true;
10554 break;
10555 case ACCESS_REGS:
10556 reg_size = 4;
10557 break;
10558 default:
10559 reg_size = UNITS_PER_WORD;
10560 break;
10563 if (reg_pair_required_p)
10564 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10566 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10569 /* Return nonzero if mode M describes a 128-bit float in a floating point
10570 register pair. */
10572 static bool
10573 s390_is_fpr128 (machine_mode m)
10575 return m == FPRX2mode || (!TARGET_VXE && m == TFmode);
10578 /* Return nonzero if mode M describes a 128-bit float in a vector
10579 register. */
10581 static bool
10582 s390_is_vr128 (machine_mode m)
10584 return m == V1TFmode || (TARGET_VXE && m == TFmode);
10587 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10589 static bool
10590 s390_can_change_mode_class (machine_mode from_mode,
10591 machine_mode to_mode,
10592 reg_class_t rclass)
10594 machine_mode small_mode;
10595 machine_mode big_mode;
10597 /* 128-bit values have different representations in floating point and
10598 vector registers. */
10599 if (reg_classes_intersect_p (VEC_REGS, rclass)
10600 && ((s390_is_fpr128 (from_mode) && s390_is_vr128 (to_mode))
10601 || (s390_is_vr128 (from_mode) && s390_is_fpr128 (to_mode))))
10602 return false;
10604 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10605 return true;
10607 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10609 small_mode = from_mode;
10610 big_mode = to_mode;
10612 else
10614 small_mode = to_mode;
10615 big_mode = from_mode;
10618 /* Values residing in VRs are little-endian style. All modes are
10619 placed left-aligned in an VR. This means that we cannot allow
10620 switching between modes with differing sizes. Also if the vector
10621 facility is available we still place TFmode values in VR register
10622 pairs, since the only instructions we have operating on TFmodes
10623 only deal with register pairs. Therefore we have to allow DFmode
10624 subregs of TFmodes to enable the TFmode splitters. */
10625 if (reg_classes_intersect_p (VEC_REGS, rclass)
10626 && (GET_MODE_SIZE (small_mode) < 8
10627 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10628 return false;
10630 /* Likewise for access registers, since they have only half the
10631 word size on 64-bit. */
10632 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10633 return false;
10635 return true;
10638 /* Return true if we use LRA instead of reload pass. */
10639 static bool
10640 s390_lra_p (void)
10642 return s390_lra_flag;
10645 /* Return true if register FROM can be eliminated via register TO. */
10647 static bool
10648 s390_can_eliminate (const int from, const int to)
10650 /* We have not marked the base register as fixed.
10651 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10652 If a function requires the base register, we say here that this
10653 elimination cannot be performed. This will cause reload to free
10654 up the base register (as if it were fixed). On the other hand,
10655 if the current function does *not* require the base register, we
10656 say here the elimination succeeds, which in turn allows reload
10657 to allocate the base register for any other purpose. */
10658 if (from == BASE_REGNUM && to == BASE_REGNUM)
10660 s390_init_frame_layout ();
10661 return cfun->machine->base_reg == NULL_RTX;
10664 /* Everything else must point into the stack frame. */
10665 gcc_assert (to == STACK_POINTER_REGNUM
10666 || to == HARD_FRAME_POINTER_REGNUM);
10668 gcc_assert (from == FRAME_POINTER_REGNUM
10669 || from == ARG_POINTER_REGNUM
10670 || from == RETURN_ADDRESS_POINTER_REGNUM);
10672 /* Make sure we actually saved the return address. */
10673 if (from == RETURN_ADDRESS_POINTER_REGNUM)
10674 if (!crtl->calls_eh_return
10675 && !cfun->stdarg
10676 && !cfun_frame_layout.save_return_addr_p)
10677 return false;
10679 return true;
10682 /* Return offset between register FROM and TO initially after prolog. */
10684 HOST_WIDE_INT
10685 s390_initial_elimination_offset (int from, int to)
10687 HOST_WIDE_INT offset;
10689 /* ??? Why are we called for non-eliminable pairs? */
10690 if (!s390_can_eliminate (from, to))
10691 return 0;
10693 switch (from)
10695 case FRAME_POINTER_REGNUM:
10696 offset = (get_frame_size()
10697 + STACK_POINTER_OFFSET
10698 + crtl->outgoing_args_size);
10699 break;
10701 case ARG_POINTER_REGNUM:
10702 s390_init_frame_layout ();
10703 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10704 break;
10706 case RETURN_ADDRESS_POINTER_REGNUM:
10707 s390_init_frame_layout ();
10709 if (cfun_frame_layout.first_save_gpr_slot == -1)
10711 /* If it turns out that for stdarg nothing went into the reg
10712 save area we also do not need the return address
10713 pointer. */
10714 if (cfun->stdarg && !cfun_save_arg_fprs_p)
10715 return 0;
10717 gcc_unreachable ();
10720 /* In order to make the following work it is not necessary for
10721 r14 to have a save slot. It is sufficient if one other GPR
10722 got one. Since the GPRs are always stored without gaps we
10723 are able to calculate where the r14 save slot would
10724 reside. */
10725 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10726 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10727 UNITS_PER_LONG);
10728 break;
10730 case BASE_REGNUM:
10731 offset = 0;
10732 break;
10734 default:
10735 gcc_unreachable ();
10738 return offset;
10741 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10742 to register BASE. Return generated insn. */
10744 static rtx
10745 save_fpr (rtx base, int offset, int regnum)
10747 rtx addr;
10748 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10750 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10751 set_mem_alias_set (addr, get_varargs_alias_set ());
10752 else
10753 set_mem_alias_set (addr, get_frame_alias_set ());
10755 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10758 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10759 to register BASE. Return generated insn. */
10761 static rtx
10762 restore_fpr (rtx base, int offset, int regnum)
10764 rtx addr;
10765 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10766 set_mem_alias_set (addr, get_frame_alias_set ());
10768 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10771 /* Generate insn to save registers FIRST to LAST into
10772 the register save area located at offset OFFSET
10773 relative to register BASE. */
10775 static rtx
10776 save_gprs (rtx base, int offset, int first, int last)
10778 rtx addr, insn, note;
10779 int i;
10781 addr = plus_constant (Pmode, base, offset);
10782 addr = gen_rtx_MEM (Pmode, addr);
10784 set_mem_alias_set (addr, get_frame_alias_set ());
10786 /* Special-case single register. */
10787 if (first == last)
10789 if (TARGET_64BIT)
10790 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10791 else
10792 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10794 if (!global_not_special_regno_p (first))
10795 RTX_FRAME_RELATED_P (insn) = 1;
10796 return insn;
10800 insn = gen_store_multiple (addr,
10801 gen_rtx_REG (Pmode, first),
10802 GEN_INT (last - first + 1));
10804 if (first <= 6 && cfun->stdarg)
10805 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10807 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10809 if (first + i <= 6)
10810 set_mem_alias_set (mem, get_varargs_alias_set ());
10813 /* We need to set the FRAME_RELATED flag on all SETs
10814 inside the store-multiple pattern.
10816 However, we must not emit DWARF records for registers 2..5
10817 if they are stored for use by variable arguments ...
10819 ??? Unfortunately, it is not enough to simply not the
10820 FRAME_RELATED flags for those SETs, because the first SET
10821 of the PARALLEL is always treated as if it had the flag
10822 set, even if it does not. Therefore we emit a new pattern
10823 without those registers as REG_FRAME_RELATED_EXPR note. */
10825 if (first >= 6 && !global_not_special_regno_p (first))
10827 rtx pat = PATTERN (insn);
10829 for (i = 0; i < XVECLEN (pat, 0); i++)
10830 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10831 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10832 0, i)))))
10833 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10835 RTX_FRAME_RELATED_P (insn) = 1;
10837 else if (last >= 6)
10839 int start;
10841 for (start = first >= 6 ? first : 6; start <= last; start++)
10842 if (!global_not_special_regno_p (start))
10843 break;
10845 if (start > last)
10846 return insn;
10848 addr = plus_constant (Pmode, base,
10849 offset + (start - first) * UNITS_PER_LONG);
10851 if (start == last)
10853 if (TARGET_64BIT)
10854 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10855 gen_rtx_REG (Pmode, start));
10856 else
10857 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10858 gen_rtx_REG (Pmode, start));
10859 note = PATTERN (note);
10861 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10862 RTX_FRAME_RELATED_P (insn) = 1;
10864 return insn;
10867 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10868 gen_rtx_REG (Pmode, start),
10869 GEN_INT (last - start + 1));
10870 note = PATTERN (note);
10872 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10874 for (i = 0; i < XVECLEN (note, 0); i++)
10875 if (GET_CODE (XVECEXP (note, 0, i)) == SET
10876 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10877 0, i)))))
10878 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10880 RTX_FRAME_RELATED_P (insn) = 1;
10883 return insn;
10886 /* Generate insn to restore registers FIRST to LAST from
10887 the register save area located at offset OFFSET
10888 relative to register BASE. */
10890 static rtx
10891 restore_gprs (rtx base, int offset, int first, int last)
10893 rtx addr, insn;
10895 addr = plus_constant (Pmode, base, offset);
10896 addr = gen_rtx_MEM (Pmode, addr);
10897 set_mem_alias_set (addr, get_frame_alias_set ());
10899 /* Special-case single register. */
10900 if (first == last)
10902 if (TARGET_64BIT)
10903 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10904 else
10905 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10907 RTX_FRAME_RELATED_P (insn) = 1;
10908 return insn;
10911 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10912 addr,
10913 GEN_INT (last - first + 1));
10914 RTX_FRAME_RELATED_P (insn) = 1;
10915 return insn;
10918 /* Return insn sequence to load the GOT register. */
10920 rtx_insn *
10921 s390_load_got (void)
10923 rtx_insn *insns;
10925 /* We cannot use pic_offset_table_rtx here since we use this
10926 function also for non-pic if __tls_get_offset is called and in
10927 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10928 aren't usable. */
10929 rtx got_rtx = gen_rtx_REG (Pmode, 12);
10931 start_sequence ();
10933 emit_move_insn (got_rtx, s390_got_symbol ());
10935 insns = get_insns ();
10936 end_sequence ();
10937 return insns;
10940 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10941 and the change to the stack pointer. */
10943 static void
10944 s390_emit_stack_tie (void)
10946 rtx mem = gen_frame_mem (BLKmode,
10947 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10949 emit_insn (gen_stack_tie (mem));
10952 /* Copy GPRS into FPR save slots. */
10954 static void
10955 s390_save_gprs_to_fprs (void)
10957 int i;
10959 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10960 return;
10962 for (i = 6; i < 16; i++)
10964 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10966 rtx_insn *insn =
10967 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10968 gen_rtx_REG (DImode, i));
10969 RTX_FRAME_RELATED_P (insn) = 1;
10970 /* This prevents dwarf2cfi from interpreting the set. Doing
10971 so it might emit def_cfa_register infos setting an FPR as
10972 new CFA. */
10973 add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
10978 /* Restore GPRs from FPR save slots. */
10980 static void
10981 s390_restore_gprs_from_fprs (void)
10983 int i;
10985 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10986 return;
10988 /* Restore the GPRs starting with the stack pointer. That way the
10989 stack pointer already has its original value when it comes to
10990 restoring the hard frame pointer. So we can set the cfa reg back
10991 to the stack pointer. */
10992 for (i = STACK_POINTER_REGNUM; i >= 6; i--)
10994 rtx_insn *insn;
10996 if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
10997 continue;
10999 rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
11001 if (i == STACK_POINTER_REGNUM)
11002 insn = emit_insn (gen_stack_restore_from_fpr (fpr));
11003 else
11004 insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
11006 df_set_regs_ever_live (i, true);
11007 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
11009 /* If either the stack pointer or the frame pointer get restored
11010 set the CFA value to its value at function start. Doing this
11011 for the frame pointer results in .cfi_def_cfa_register 15
11012 what is ok since if the stack pointer got modified it has
11013 been restored already. */
11014 if (i == STACK_POINTER_REGNUM || i == HARD_FRAME_POINTER_REGNUM)
11015 add_reg_note (insn, REG_CFA_DEF_CFA,
11016 plus_constant (Pmode, stack_pointer_rtx,
11017 STACK_POINTER_OFFSET));
11018 RTX_FRAME_RELATED_P (insn) = 1;
11023 /* A pass run immediately before shrink-wrapping and prologue and epilogue
11024 generation. */
11026 namespace {
11028 const pass_data pass_data_s390_early_mach =
11030 RTL_PASS, /* type */
11031 "early_mach", /* name */
11032 OPTGROUP_NONE, /* optinfo_flags */
11033 TV_MACH_DEP, /* tv_id */
11034 0, /* properties_required */
11035 0, /* properties_provided */
11036 0, /* properties_destroyed */
11037 0, /* todo_flags_start */
11038 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
11041 class pass_s390_early_mach : public rtl_opt_pass
11043 public:
11044 pass_s390_early_mach (gcc::context *ctxt)
11045 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
11048 /* opt_pass methods: */
11049 virtual unsigned int execute (function *);
11051 }; // class pass_s390_early_mach
11053 unsigned int
11054 pass_s390_early_mach::execute (function *fun)
11056 rtx_insn *insn;
11058 /* Try to get rid of the FPR clobbers. */
11059 s390_optimize_nonescaping_tx ();
11061 /* Re-compute register info. */
11062 s390_register_info ();
11064 /* If we're using a base register, ensure that it is always valid for
11065 the first non-prologue instruction. */
11066 if (fun->machine->base_reg)
11067 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
11069 /* Annotate all constant pool references to let the scheduler know
11070 they implicitly use the base register. */
11071 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11072 if (INSN_P (insn))
11074 annotate_constant_pool_refs (insn);
11075 df_insn_rescan (insn);
11077 return 0;
11080 } // anon namespace
11082 rtl_opt_pass *
11083 make_pass_s390_early_mach (gcc::context *ctxt)
11085 return new pass_s390_early_mach (ctxt);
11088 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
11089 - push too big immediates to the literal pool and annotate the refs
11090 - emit frame related notes for stack pointer changes. */
11092 static rtx
11093 s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool frame_related_p)
11095 rtx_insn *insn;
11096 rtx orig_offset = offset;
11098 gcc_assert (REG_P (target));
11099 gcc_assert (REG_P (reg));
11100 gcc_assert (CONST_INT_P (offset));
11102 if (offset == const0_rtx) /* lr/lgr */
11104 insn = emit_move_insn (target, reg);
11106 else if (DISP_IN_RANGE (INTVAL (offset))) /* la */
11108 insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg,
11109 offset));
11111 else
11113 if (!satisfies_constraint_K (offset) /* ahi/aghi */
11114 && (!TARGET_EXTIMM
11115 || (!satisfies_constraint_Op (offset) /* alfi/algfi */
11116 && !satisfies_constraint_On (offset)))) /* slfi/slgfi */
11117 offset = force_const_mem (Pmode, offset);
11119 if (target != reg)
11121 insn = emit_move_insn (target, reg);
11122 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
11125 insn = emit_insn (gen_add2_insn (target, offset));
11127 if (!CONST_INT_P (offset))
11129 annotate_constant_pool_refs (insn);
11131 if (frame_related_p)
11132 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11133 gen_rtx_SET (target,
11134 gen_rtx_PLUS (Pmode, target,
11135 orig_offset)));
11139 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
11141 /* If this is a stack adjustment and we are generating a stack clash
11142 prologue, then add a REG_STACK_CHECK note to signal that this insn
11143 should be left alone. */
11144 if (flag_stack_clash_protection && target == stack_pointer_rtx)
11145 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
11147 return insn;
11150 /* Emit a compare instruction with a volatile memory access as stack
11151 probe. It does not waste store tags and does not clobber any
11152 registers apart from the condition code. */
11153 static void
11154 s390_emit_stack_probe (rtx addr)
11156 rtx mem = gen_rtx_MEM (word_mode, addr);
11157 MEM_VOLATILE_P (mem) = 1;
11158 emit_insn (gen_probe_stack (mem));
11161 /* Use a runtime loop if we have to emit more probes than this. */
11162 #define MIN_UNROLL_PROBES 3
11164 /* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
11165 if necessary. LAST_PROBE_OFFSET contains the offset of the closest
11166 probe relative to the stack pointer.
11168 Note that SIZE is negative.
11170 The return value is true if TEMP_REG has been clobbered. */
11171 static bool
11172 allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
11173 rtx temp_reg)
11175 bool temp_reg_clobbered_p = false;
11176 HOST_WIDE_INT probe_interval
11177 = 1 << param_stack_clash_protection_probe_interval;
11178 HOST_WIDE_INT guard_size
11179 = 1 << param_stack_clash_protection_guard_size;
11181 if (flag_stack_clash_protection)
11183 if (last_probe_offset + -INTVAL (size) < guard_size)
11184 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
11185 else
11187 rtx offset = GEN_INT (probe_interval - UNITS_PER_LONG);
11188 HOST_WIDE_INT rounded_size = -INTVAL (size) & -probe_interval;
11189 HOST_WIDE_INT num_probes = rounded_size / probe_interval;
11190 HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
11192 if (num_probes < MIN_UNROLL_PROBES)
11194 /* Emit unrolled probe statements. */
11196 for (unsigned int i = 0; i < num_probes; i++)
11198 s390_prologue_plus_offset (stack_pointer_rtx,
11199 stack_pointer_rtx,
11200 GEN_INT (-probe_interval), true);
11201 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11202 stack_pointer_rtx,
11203 offset));
11205 if (num_probes > 0)
11206 last_probe_offset = INTVAL (offset);
11207 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
11209 else
11211 /* Emit a loop probing the pages. */
11213 rtx_code_label *loop_start_label = gen_label_rtx ();
11215 /* From now on temp_reg will be the CFA register. */
11216 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11217 GEN_INT (-rounded_size), true);
11218 emit_label (loop_start_label);
11220 s390_prologue_plus_offset (stack_pointer_rtx,
11221 stack_pointer_rtx,
11222 GEN_INT (-probe_interval), false);
11223 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11224 stack_pointer_rtx,
11225 offset));
11226 emit_cmp_and_jump_insns (stack_pointer_rtx, temp_reg,
11227 GT, NULL_RTX,
11228 Pmode, 1, loop_start_label);
11230 /* Without this make_edges ICEes. */
11231 JUMP_LABEL (get_last_insn ()) = loop_start_label;
11232 LABEL_NUSES (loop_start_label) = 1;
11234 /* That's going to be a NOP since stack pointer and
11235 temp_reg are supposed to be the same here. We just
11236 emit it to set the CFA reg back to r15. */
11237 s390_prologue_plus_offset (stack_pointer_rtx, temp_reg,
11238 const0_rtx, true);
11239 temp_reg_clobbered_p = true;
11240 last_probe_offset = INTVAL (offset);
11241 dump_stack_clash_frame_info (PROBE_LOOP, residual != 0);
11244 /* Handle any residual allocation request. */
11245 s390_prologue_plus_offset (stack_pointer_rtx,
11246 stack_pointer_rtx,
11247 GEN_INT (-residual), true);
11248 last_probe_offset += residual;
11249 if (last_probe_offset >= probe_interval)
11250 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11251 stack_pointer_rtx,
11252 GEN_INT (residual
11253 - UNITS_PER_LONG)));
11255 return temp_reg_clobbered_p;
11259 /* Subtract frame size from stack pointer. */
11260 s390_prologue_plus_offset (stack_pointer_rtx,
11261 stack_pointer_rtx,
11262 size, true);
11264 return temp_reg_clobbered_p;
11267 /* Expand the prologue into a bunch of separate insns. */
11269 void
11270 s390_emit_prologue (void)
11272 rtx insn, addr;
11273 rtx temp_reg;
11274 int i;
11275 int offset;
11276 int next_fpr = 0;
11278 /* Choose best register to use for temp use within prologue.
11279 TPF with profiling must avoid the register 14 - the tracing function
11280 needs the original contents of r14 to be preserved. */
11282 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
11283 && !crtl->is_leaf
11284 && !TARGET_TPF_PROFILING)
11285 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11286 else if (flag_split_stack && cfun->stdarg)
11287 temp_reg = gen_rtx_REG (Pmode, 12);
11288 else
11289 temp_reg = gen_rtx_REG (Pmode, 1);
11291 /* When probing for stack-clash mitigation, we have to track the distance
11292 between the stack pointer and closest known reference.
11294 Most of the time we have to make a worst case assumption. The
11295 only exception is when TARGET_BACKCHAIN is active, in which case
11296 we know *sp (offset 0) was written. */
11297 HOST_WIDE_INT probe_interval
11298 = 1 << param_stack_clash_protection_probe_interval;
11299 HOST_WIDE_INT last_probe_offset
11300 = (TARGET_BACKCHAIN
11301 ? (TARGET_PACKED_STACK ? STACK_POINTER_OFFSET - UNITS_PER_LONG : 0)
11302 : probe_interval - (STACK_BOUNDARY / UNITS_PER_WORD));
11304 s390_save_gprs_to_fprs ();
11306 /* Save call saved gprs. */
11307 if (cfun_frame_layout.first_save_gpr != -1)
11309 insn = save_gprs (stack_pointer_rtx,
11310 cfun_frame_layout.gprs_offset +
11311 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11312 - cfun_frame_layout.first_save_gpr_slot),
11313 cfun_frame_layout.first_save_gpr,
11314 cfun_frame_layout.last_save_gpr);
11316 /* This is not 100% correct. If we have more than one register saved,
11317 then LAST_PROBE_OFFSET can move even closer to sp. */
11318 last_probe_offset
11319 = (cfun_frame_layout.gprs_offset +
11320 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11321 - cfun_frame_layout.first_save_gpr_slot));
11323 emit_insn (insn);
11326 /* Dummy insn to mark literal pool slot. */
11328 if (cfun->machine->base_reg)
11329 emit_insn (gen_main_pool (cfun->machine->base_reg));
11331 offset = cfun_frame_layout.f0_offset;
11333 /* Save f0 and f2. */
11334 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
11336 if (cfun_fpr_save_p (i))
11338 save_fpr (stack_pointer_rtx, offset, i);
11339 if (offset < last_probe_offset)
11340 last_probe_offset = offset;
11341 offset += 8;
11343 else if (!TARGET_PACKED_STACK || cfun->stdarg)
11344 offset += 8;
11347 /* Save f4 and f6. */
11348 offset = cfun_frame_layout.f4_offset;
11349 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11351 if (cfun_fpr_save_p (i))
11353 insn = save_fpr (stack_pointer_rtx, offset, i);
11354 if (offset < last_probe_offset)
11355 last_probe_offset = offset;
11356 offset += 8;
11358 /* If f4 and f6 are call clobbered they are saved due to
11359 stdargs and therefore are not frame related. */
11360 if (!call_used_regs[i])
11361 RTX_FRAME_RELATED_P (insn) = 1;
11363 else if (!TARGET_PACKED_STACK || call_used_regs[i])
11364 offset += 8;
11367 if (TARGET_PACKED_STACK
11368 && cfun_save_high_fprs_p
11369 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
11371 offset = (cfun_frame_layout.f8_offset
11372 + (cfun_frame_layout.high_fprs - 1) * 8);
11374 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
11375 if (cfun_fpr_save_p (i))
11377 insn = save_fpr (stack_pointer_rtx, offset, i);
11378 if (offset < last_probe_offset)
11379 last_probe_offset = offset;
11381 RTX_FRAME_RELATED_P (insn) = 1;
11382 offset -= 8;
11384 if (offset >= cfun_frame_layout.f8_offset)
11385 next_fpr = i;
11388 if (!TARGET_PACKED_STACK)
11389 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
11391 if (flag_stack_usage_info)
11392 current_function_static_stack_size = cfun_frame_layout.frame_size;
11394 /* Decrement stack pointer. */
11396 if (cfun_frame_layout.frame_size > 0)
11398 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
11399 rtx_insn *stack_pointer_backup_loc;
11400 bool temp_reg_clobbered_p;
11402 if (s390_stack_size)
11404 HOST_WIDE_INT stack_guard;
11406 if (s390_stack_guard)
11407 stack_guard = s390_stack_guard;
11408 else
11410 /* If no value for stack guard is provided the smallest power of 2
11411 larger than the current frame size is chosen. */
11412 stack_guard = 1;
11413 while (stack_guard < cfun_frame_layout.frame_size)
11414 stack_guard <<= 1;
11417 if (cfun_frame_layout.frame_size >= s390_stack_size)
11419 warning (0, "frame size of function %qs is %wd"
11420 " bytes exceeding user provided stack limit of "
11421 "%d bytes. "
11422 "An unconditional trap is added.",
11423 current_function_name(), cfun_frame_layout.frame_size,
11424 s390_stack_size);
11425 emit_insn (gen_trap ());
11426 emit_barrier ();
11428 else
11430 /* stack_guard has to be smaller than s390_stack_size.
11431 Otherwise we would emit an AND with zero which would
11432 not match the test under mask pattern. */
11433 if (stack_guard >= s390_stack_size)
11435 warning (0, "frame size of function %qs is %wd"
11436 " bytes which is more than half the stack size. "
11437 "The dynamic check would not be reliable. "
11438 "No check emitted for this function.",
11439 current_function_name(),
11440 cfun_frame_layout.frame_size);
11442 else
11444 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
11445 & ~(stack_guard - 1));
11447 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
11448 GEN_INT (stack_check_mask));
11449 if (TARGET_64BIT)
11450 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
11451 t, const0_rtx),
11452 t, const0_rtx, const0_rtx));
11453 else
11454 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
11455 t, const0_rtx),
11456 t, const0_rtx, const0_rtx));
11461 if (s390_warn_framesize > 0
11462 && cfun_frame_layout.frame_size >= s390_warn_framesize)
11463 warning (0, "frame size of %qs is %wd bytes",
11464 current_function_name (), cfun_frame_layout.frame_size);
11466 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
11467 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11469 /* Save the location where we could backup the incoming stack
11470 pointer. */
11471 stack_pointer_backup_loc = get_last_insn ();
11473 temp_reg_clobbered_p = allocate_stack_space (frame_off, last_probe_offset,
11474 temp_reg);
11476 if (TARGET_BACKCHAIN || next_fpr)
11478 if (temp_reg_clobbered_p)
11480 /* allocate_stack_space had to make use of temp_reg and
11481 we need it to hold a backup of the incoming stack
11482 pointer. Calculate back that value from the current
11483 stack pointer. */
11484 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11485 GEN_INT (cfun_frame_layout.frame_size),
11486 false);
11488 else
11490 /* allocate_stack_space didn't actually required
11491 temp_reg. Insert the stack pointer backup insn
11492 before the stack pointer decrement code - knowing now
11493 that the value will survive. */
11494 emit_insn_after (gen_move_insn (temp_reg, stack_pointer_rtx),
11495 stack_pointer_backup_loc);
11499 /* Set backchain. */
11501 if (TARGET_BACKCHAIN)
11503 if (cfun_frame_layout.backchain_offset)
11504 addr = gen_rtx_MEM (Pmode,
11505 plus_constant (Pmode, stack_pointer_rtx,
11506 cfun_frame_layout.backchain_offset));
11507 else
11508 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11509 set_mem_alias_set (addr, get_frame_alias_set ());
11510 insn = emit_insn (gen_move_insn (addr, temp_reg));
11513 /* If we support non-call exceptions (e.g. for Java),
11514 we need to make sure the backchain pointer is set up
11515 before any possibly trapping memory access. */
11516 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
11518 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
11519 emit_clobber (addr);
11522 else if (flag_stack_clash_protection)
11523 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
11525 /* Save fprs 8 - 15 (64 bit ABI). */
11527 if (cfun_save_high_fprs_p && next_fpr)
11529 /* If the stack might be accessed through a different register
11530 we have to make sure that the stack pointer decrement is not
11531 moved below the use of the stack slots. */
11532 s390_emit_stack_tie ();
11534 insn = emit_insn (gen_add2_insn (temp_reg,
11535 GEN_INT (cfun_frame_layout.f8_offset)));
11537 offset = 0;
11539 for (i = FPR8_REGNUM; i <= next_fpr; i++)
11540 if (cfun_fpr_save_p (i))
11542 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
11543 cfun_frame_layout.frame_size
11544 + cfun_frame_layout.f8_offset
11545 + offset);
11547 insn = save_fpr (temp_reg, offset, i);
11548 offset += 8;
11549 RTX_FRAME_RELATED_P (insn) = 1;
11550 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11551 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
11552 gen_rtx_REG (DFmode, i)));
11556 /* Set frame pointer, if needed. */
11558 if (frame_pointer_needed)
11560 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11561 RTX_FRAME_RELATED_P (insn) = 1;
11564 /* Set up got pointer, if needed. */
11566 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
11568 rtx_insn *insns = s390_load_got ();
11570 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
11571 annotate_constant_pool_refs (insn);
11573 emit_insn (insns);
11576 #if TARGET_TPF != 0
11577 if (TARGET_TPF_PROFILING)
11579 /* Generate a BAS instruction to serve as a function entry
11580 intercept to facilitate the use of tracing algorithms located
11581 at the branch target. */
11582 emit_insn (gen_prologue_tpf (
11583 GEN_INT (s390_tpf_trace_hook_prologue_check),
11584 GEN_INT (s390_tpf_trace_hook_prologue_target)));
11586 /* Emit a blockage here so that all code lies between the
11587 profiling mechanisms. */
11588 emit_insn (gen_blockage ());
11590 #endif
11593 /* Expand the epilogue into a bunch of separate insns. */
11595 void
11596 s390_emit_epilogue (bool sibcall)
11598 rtx frame_pointer, return_reg = NULL_RTX, cfa_restores = NULL_RTX;
11599 int area_bottom, area_top, offset = 0;
11600 int next_offset;
11601 int i;
11603 #if TARGET_TPF != 0
11604 if (TARGET_TPF_PROFILING)
11606 /* Generate a BAS instruction to serve as a function entry
11607 intercept to facilitate the use of tracing algorithms located
11608 at the branch target. */
11610 /* Emit a blockage here so that all code lies between the
11611 profiling mechanisms. */
11612 emit_insn (gen_blockage ());
11614 emit_insn (gen_epilogue_tpf (
11615 GEN_INT (s390_tpf_trace_hook_epilogue_check),
11616 GEN_INT (s390_tpf_trace_hook_epilogue_target)));
11618 #endif
11620 /* Check whether to use frame or stack pointer for restore. */
11622 frame_pointer = (frame_pointer_needed
11623 ? hard_frame_pointer_rtx : stack_pointer_rtx);
11625 s390_frame_area (&area_bottom, &area_top);
11627 /* Check whether we can access the register save area.
11628 If not, increment the frame pointer as required. */
11630 if (area_top <= area_bottom)
11632 /* Nothing to restore. */
11634 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11635 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11637 /* Area is in range. */
11638 offset = cfun_frame_layout.frame_size;
11640 else
11642 rtx_insn *insn;
11643 rtx frame_off, cfa;
11645 offset = area_bottom < 0 ? -area_bottom : 0;
11646 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11648 cfa = gen_rtx_SET (frame_pointer,
11649 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11650 if (DISP_IN_RANGE (INTVAL (frame_off)))
11652 rtx set;
11654 set = gen_rtx_SET (frame_pointer,
11655 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11656 insn = emit_insn (set);
11658 else
11660 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11661 frame_off = force_const_mem (Pmode, frame_off);
11663 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11664 annotate_constant_pool_refs (insn);
11666 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11667 RTX_FRAME_RELATED_P (insn) = 1;
11670 /* Restore call saved fprs. */
11672 if (TARGET_64BIT)
11674 if (cfun_save_high_fprs_p)
11676 next_offset = cfun_frame_layout.f8_offset;
11677 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11679 if (cfun_fpr_save_p (i))
11681 restore_fpr (frame_pointer,
11682 offset + next_offset, i);
11683 cfa_restores
11684 = alloc_reg_note (REG_CFA_RESTORE,
11685 gen_rtx_REG (DFmode, i), cfa_restores);
11686 next_offset += 8;
11692 else
11694 next_offset = cfun_frame_layout.f4_offset;
11695 /* f4, f6 */
11696 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11698 if (cfun_fpr_save_p (i))
11700 restore_fpr (frame_pointer,
11701 offset + next_offset, i);
11702 cfa_restores
11703 = alloc_reg_note (REG_CFA_RESTORE,
11704 gen_rtx_REG (DFmode, i), cfa_restores);
11705 next_offset += 8;
11707 else if (!TARGET_PACKED_STACK)
11708 next_offset += 8;
11713 /* Restore call saved gprs. */
11715 if (cfun_frame_layout.first_restore_gpr != -1)
11717 rtx insn, addr;
11718 int i;
11720 /* Check for global register and save them
11721 to stack location from where they get restored. */
11723 for (i = cfun_frame_layout.first_restore_gpr;
11724 i <= cfun_frame_layout.last_restore_gpr;
11725 i++)
11727 if (global_not_special_regno_p (i))
11729 addr = plus_constant (Pmode, frame_pointer,
11730 offset + cfun_frame_layout.gprs_offset
11731 + (i - cfun_frame_layout.first_save_gpr_slot)
11732 * UNITS_PER_LONG);
11733 addr = gen_rtx_MEM (Pmode, addr);
11734 set_mem_alias_set (addr, get_frame_alias_set ());
11735 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11737 else
11738 cfa_restores
11739 = alloc_reg_note (REG_CFA_RESTORE,
11740 gen_rtx_REG (Pmode, i), cfa_restores);
11743 /* Fetch return address from stack before load multiple,
11744 this will do good for scheduling.
11746 Only do this if we already decided that r14 needs to be
11747 saved to a stack slot. (And not just because r14 happens to
11748 be in between two GPRs which need saving.) Otherwise it
11749 would be difficult to take that decision back in
11750 s390_optimize_prologue.
11752 This optimization is only helpful on in-order machines. */
11753 if (! sibcall
11754 && cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK
11755 && s390_tune <= PROCESSOR_2097_Z10)
11757 int return_regnum = find_unused_clobbered_reg();
11758 if (!return_regnum
11759 || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION
11760 && !TARGET_CPU_Z10
11761 && return_regnum == INDIRECT_BRANCH_THUNK_REGNUM))
11763 gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM != 4);
11764 return_regnum = 4;
11766 return_reg = gen_rtx_REG (Pmode, return_regnum);
11768 addr = plus_constant (Pmode, frame_pointer,
11769 offset + cfun_frame_layout.gprs_offset
11770 + (RETURN_REGNUM
11771 - cfun_frame_layout.first_save_gpr_slot)
11772 * UNITS_PER_LONG);
11773 addr = gen_rtx_MEM (Pmode, addr);
11774 set_mem_alias_set (addr, get_frame_alias_set ());
11775 emit_move_insn (return_reg, addr);
11777 /* Once we did that optimization we have to make sure
11778 s390_optimize_prologue does not try to remove the store
11779 of r14 since we will not be able to find the load issued
11780 here. */
11781 cfun_frame_layout.save_return_addr_p = true;
11784 insn = restore_gprs (frame_pointer,
11785 offset + cfun_frame_layout.gprs_offset
11786 + (cfun_frame_layout.first_restore_gpr
11787 - cfun_frame_layout.first_save_gpr_slot)
11788 * UNITS_PER_LONG,
11789 cfun_frame_layout.first_restore_gpr,
11790 cfun_frame_layout.last_restore_gpr);
11791 insn = emit_insn (insn);
11792 REG_NOTES (insn) = cfa_restores;
11793 add_reg_note (insn, REG_CFA_DEF_CFA,
11794 plus_constant (Pmode, stack_pointer_rtx,
11795 STACK_POINTER_OFFSET));
11796 RTX_FRAME_RELATED_P (insn) = 1;
11799 s390_restore_gprs_from_fprs ();
11801 if (! sibcall)
11803 if (!return_reg && !s390_can_use_return_insn ())
11804 /* We planned to emit (return), be we are not allowed to. */
11805 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11807 if (return_reg)
11808 /* Emit (return) and (use). */
11809 emit_jump_insn (gen_return_use (return_reg));
11810 else
11811 /* The fact that RETURN_REGNUM is used is already reflected by
11812 EPILOGUE_USES. Emit plain (return). */
11813 emit_jump_insn (gen_return ());
11817 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11819 static void
11820 s300_set_up_by_prologue (hard_reg_set_container *regs)
11822 if (cfun->machine->base_reg
11823 && !call_used_regs[REGNO (cfun->machine->base_reg)])
11824 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11827 /* -fsplit-stack support. */
11829 /* A SYMBOL_REF for __morestack. */
11830 static GTY(()) rtx morestack_ref;
11832 /* When using -fsplit-stack, the allocation routines set a field in
11833 the TCB to the bottom of the stack plus this much space, measured
11834 in bytes. */
11836 #define SPLIT_STACK_AVAILABLE 1024
11838 /* Emit the parmblock for __morestack into .rodata section. It
11839 consists of 3 pointer size entries:
11840 - frame size
11841 - size of stack arguments
11842 - offset between parm block and __morestack return label */
11844 void
11845 s390_output_split_stack_data (rtx parm_block, rtx call_done,
11846 rtx frame_size, rtx args_size)
11848 rtx ops[] = { parm_block, call_done };
11850 switch_to_section (targetm.asm_out.function_rodata_section
11851 (current_function_decl, false));
11853 if (TARGET_64BIT)
11854 output_asm_insn (".align\t8", NULL);
11855 else
11856 output_asm_insn (".align\t4", NULL);
11858 (*targetm.asm_out.internal_label) (asm_out_file, "L",
11859 CODE_LABEL_NUMBER (parm_block));
11860 if (TARGET_64BIT)
11862 output_asm_insn (".quad\t%0", &frame_size);
11863 output_asm_insn (".quad\t%0", &args_size);
11864 output_asm_insn (".quad\t%1-%0", ops);
11866 else
11868 output_asm_insn (".long\t%0", &frame_size);
11869 output_asm_insn (".long\t%0", &args_size);
11870 output_asm_insn (".long\t%1-%0", ops);
11873 switch_to_section (current_function_section ());
11876 /* Emit -fsplit-stack prologue, which goes before the regular function
11877 prologue. */
11879 void
11880 s390_expand_split_stack_prologue (void)
11882 rtx r1, guard, cc = NULL;
11883 rtx_insn *insn;
11884 /* Offset from thread pointer to __private_ss. */
11885 int psso = TARGET_64BIT ? 0x38 : 0x20;
11886 /* Pointer size in bytes. */
11887 /* Frame size and argument size - the two parameters to __morestack. */
11888 HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11889 /* Align argument size to 8 bytes - simplifies __morestack code. */
11890 HOST_WIDE_INT args_size = crtl->args.size >= 0
11891 ? ((crtl->args.size + 7) & ~7)
11892 : 0;
11893 /* Label to be called by __morestack. */
11894 rtx_code_label *call_done = NULL;
11895 rtx_code_label *parm_base = NULL;
11896 rtx tmp;
11898 gcc_assert (flag_split_stack && reload_completed);
11900 r1 = gen_rtx_REG (Pmode, 1);
11902 /* If no stack frame will be allocated, don't do anything. */
11903 if (!frame_size)
11905 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11907 /* If va_start is used, just use r15. */
11908 emit_move_insn (r1,
11909 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11910 GEN_INT (STACK_POINTER_OFFSET)));
11913 return;
11916 if (morestack_ref == NULL_RTX)
11918 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11919 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11920 | SYMBOL_FLAG_FUNCTION);
11923 if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11925 /* If frame_size will fit in an add instruction, do a stack space
11926 check, and only call __morestack if there's not enough space. */
11928 /* Get thread pointer. r1 is the only register we can always destroy - r0
11929 could contain a static chain (and cannot be used to address memory
11930 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
11931 emit_insn (gen_get_thread_pointer (Pmode, r1));
11932 /* Aim at __private_ss. */
11933 guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
11935 /* If less that 1kiB used, skip addition and compare directly with
11936 __private_ss. */
11937 if (frame_size > SPLIT_STACK_AVAILABLE)
11939 emit_move_insn (r1, guard);
11940 if (TARGET_64BIT)
11941 emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
11942 else
11943 emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
11944 guard = r1;
11947 /* Compare the (maybe adjusted) guard with the stack pointer. */
11948 cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
11951 call_done = gen_label_rtx ();
11952 parm_base = gen_label_rtx ();
11953 LABEL_NUSES (parm_base)++;
11954 LABEL_NUSES (call_done)++;
11956 /* %r1 = litbase. */
11957 insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
11958 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11959 LABEL_NUSES (parm_base)++;
11961 /* Now, we need to call __morestack. It has very special calling
11962 conventions: it preserves param/return/static chain registers for
11963 calling main function body, and looks for its own parameters at %r1. */
11964 if (cc != NULL)
11965 tmp = gen_split_stack_cond_call (Pmode,
11966 morestack_ref,
11967 parm_base,
11968 call_done,
11969 GEN_INT (frame_size),
11970 GEN_INT (args_size),
11971 cc);
11972 else
11973 tmp = gen_split_stack_call (Pmode,
11974 morestack_ref,
11975 parm_base,
11976 call_done,
11977 GEN_INT (frame_size),
11978 GEN_INT (args_size));
11980 insn = emit_jump_insn (tmp);
11981 JUMP_LABEL (insn) = call_done;
11982 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11983 add_reg_note (insn, REG_LABEL_OPERAND, call_done);
11985 if (cc != NULL)
11987 /* Mark the jump as very unlikely to be taken. */
11988 add_reg_br_prob_note (insn,
11989 profile_probability::very_unlikely ());
11991 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11993 /* If va_start is used, and __morestack was not called, just use
11994 r15. */
11995 emit_move_insn (r1,
11996 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11997 GEN_INT (STACK_POINTER_OFFSET)));
12000 else
12002 emit_barrier ();
12005 /* __morestack will call us here. */
12007 emit_label (call_done);
12010 /* We may have to tell the dataflow pass that the split stack prologue
12011 is initializing a register. */
12013 static void
12014 s390_live_on_entry (bitmap regs)
12016 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12018 gcc_assert (flag_split_stack);
12019 bitmap_set_bit (regs, 1);
12023 /* Return true if the function can use simple_return to return outside
12024 of a shrink-wrapped region. At present shrink-wrapping is supported
12025 in all cases. */
12027 bool
12028 s390_can_use_simple_return_insn (void)
12030 return true;
12033 /* Return true if the epilogue is guaranteed to contain only a return
12034 instruction and if a direct return can therefore be used instead.
12035 One of the main advantages of using direct return instructions
12036 is that we can then use conditional returns. */
12038 bool
12039 s390_can_use_return_insn (void)
12041 int i;
12043 if (!reload_completed)
12044 return false;
12046 if (crtl->profile)
12047 return false;
12049 if (TARGET_TPF_PROFILING)
12050 return false;
12052 for (i = 0; i < 16; i++)
12053 if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
12054 return false;
12056 /* For 31 bit this is not covered by the frame_size check below
12057 since f4, f6 are saved in the register save area without needing
12058 additional stack space. */
12059 if (!TARGET_64BIT
12060 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
12061 return false;
12063 if (cfun->machine->base_reg
12064 && !call_used_regs[REGNO (cfun->machine->base_reg)])
12065 return false;
12067 return cfun_frame_layout.frame_size == 0;
12070 /* The VX ABI differs for vararg functions. Therefore we need the
12071 prototype of the callee to be available when passing vector type
12072 values. */
12073 static const char *
12074 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
12076 return ((TARGET_VX_ABI
12077 && typelist == 0
12078 && VECTOR_TYPE_P (TREE_TYPE (val))
12079 && (funcdecl == NULL_TREE
12080 || (TREE_CODE (funcdecl) == FUNCTION_DECL
12081 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
12082 ? N_("vector argument passed to unprototyped function")
12083 : NULL);
12087 /* Return the size in bytes of a function argument of
12088 type TYPE and/or mode MODE. At least one of TYPE or
12089 MODE must be specified. */
12091 static int
12092 s390_function_arg_size (machine_mode mode, const_tree type)
12094 if (type)
12095 return int_size_in_bytes (type);
12097 /* No type info available for some library calls ... */
12098 if (mode != BLKmode)
12099 return GET_MODE_SIZE (mode);
12101 /* If we have neither type nor mode, abort */
12102 gcc_unreachable ();
12105 /* Return true if a function argument of type TYPE and mode MODE
12106 is to be passed in a vector register, if available. */
12108 bool
12109 s390_function_arg_vector (machine_mode mode, const_tree type)
12111 if (!TARGET_VX_ABI)
12112 return false;
12114 if (s390_function_arg_size (mode, type) > 16)
12115 return false;
12117 /* No type info available for some library calls ... */
12118 if (!type)
12119 return VECTOR_MODE_P (mode);
12121 /* The ABI says that record types with a single member are treated
12122 just like that member would be. */
12123 int empty_base_seen = 0;
12124 const_tree orig_type = type;
12125 while (TREE_CODE (type) == RECORD_TYPE)
12127 tree field, single = NULL_TREE;
12129 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
12131 if (TREE_CODE (field) != FIELD_DECL)
12132 continue;
12134 if (DECL_FIELD_ABI_IGNORED (field))
12136 if (lookup_attribute ("no_unique_address",
12137 DECL_ATTRIBUTES (field)))
12138 empty_base_seen |= 2;
12139 else
12140 empty_base_seen |= 1;
12141 continue;
12144 if (single == NULL_TREE)
12145 single = TREE_TYPE (field);
12146 else
12147 return false;
12150 if (single == NULL_TREE)
12151 return false;
12152 else
12154 /* If the field declaration adds extra byte due to
12155 e.g. padding this is not accepted as vector type. */
12156 if (int_size_in_bytes (single) <= 0
12157 || int_size_in_bytes (single) != int_size_in_bytes (type))
12158 return false;
12159 type = single;
12163 if (!VECTOR_TYPE_P (type))
12164 return false;
12166 if (warn_psabi && empty_base_seen)
12168 static unsigned last_reported_type_uid;
12169 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (orig_type));
12170 if (uid != last_reported_type_uid)
12172 const char *url = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
12173 last_reported_type_uid = uid;
12174 if (empty_base_seen & 1)
12175 inform (input_location,
12176 "parameter passing for argument of type %qT when C++17 "
12177 "is enabled changed to match C++14 %{in GCC 10.1%}",
12178 orig_type, url);
12179 else
12180 inform (input_location,
12181 "parameter passing for argument of type %qT with "
12182 "%<[[no_unique_address]]%> members changed "
12183 "%{in GCC 10.1%}", orig_type, url);
12186 return true;
12189 /* Return true if a function argument of type TYPE and mode MODE
12190 is to be passed in a floating-point register, if available. */
12192 static bool
12193 s390_function_arg_float (machine_mode mode, const_tree type)
12195 if (s390_function_arg_size (mode, type) > 8)
12196 return false;
12198 /* Soft-float changes the ABI: no floating-point registers are used. */
12199 if (TARGET_SOFT_FLOAT)
12200 return false;
12202 /* No type info available for some library calls ... */
12203 if (!type)
12204 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
12206 /* The ABI says that record types with a single member are treated
12207 just like that member would be. */
12208 int empty_base_seen = 0;
12209 const_tree orig_type = type;
12210 while (TREE_CODE (type) == RECORD_TYPE)
12212 tree field, single = NULL_TREE;
12214 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
12216 if (TREE_CODE (field) != FIELD_DECL)
12217 continue;
12218 if (DECL_FIELD_ABI_IGNORED (field))
12220 if (lookup_attribute ("no_unique_address",
12221 DECL_ATTRIBUTES (field)))
12222 empty_base_seen |= 2;
12223 else
12224 empty_base_seen |= 1;
12225 continue;
12228 if (single == NULL_TREE)
12229 single = TREE_TYPE (field);
12230 else
12231 return false;
12234 if (single == NULL_TREE)
12235 return false;
12236 else
12237 type = single;
12240 if (TREE_CODE (type) != REAL_TYPE)
12241 return false;
12243 if (warn_psabi && empty_base_seen)
12245 static unsigned last_reported_type_uid;
12246 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (orig_type));
12247 if (uid != last_reported_type_uid)
12249 const char *url = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
12250 last_reported_type_uid = uid;
12251 if (empty_base_seen & 1)
12252 inform (input_location,
12253 "parameter passing for argument of type %qT when C++17 "
12254 "is enabled changed to match C++14 %{in GCC 10.1%}",
12255 orig_type, url);
12256 else
12257 inform (input_location,
12258 "parameter passing for argument of type %qT with "
12259 "%<[[no_unique_address]]%> members changed "
12260 "%{in GCC 10.1%}", orig_type, url);
12264 return true;
12267 /* Return true if a function argument of type TYPE and mode MODE
12268 is to be passed in an integer register, or a pair of integer
12269 registers, if available. */
12271 static bool
12272 s390_function_arg_integer (machine_mode mode, const_tree type)
12274 int size = s390_function_arg_size (mode, type);
12275 if (size > 8)
12276 return false;
12278 /* No type info available for some library calls ... */
12279 if (!type)
12280 return GET_MODE_CLASS (mode) == MODE_INT
12281 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
12283 /* We accept small integral (and similar) types. */
12284 if (INTEGRAL_TYPE_P (type)
12285 || POINTER_TYPE_P (type)
12286 || TREE_CODE (type) == NULLPTR_TYPE
12287 || TREE_CODE (type) == OFFSET_TYPE
12288 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
12289 return true;
12291 /* We also accept structs of size 1, 2, 4, 8 that are not
12292 passed in floating-point registers. */
12293 if (AGGREGATE_TYPE_P (type)
12294 && exact_log2 (size) >= 0
12295 && !s390_function_arg_float (mode, type))
12296 return true;
12298 return false;
12301 /* Return 1 if a function argument ARG is to be passed by reference.
12302 The ABI specifies that only structures of size 1, 2, 4, or 8 bytes
12303 are passed by value, all other structures (and complex numbers) are
12304 passed by reference. */
12306 static bool
12307 s390_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
12309 int size = s390_function_arg_size (arg.mode, arg.type);
12311 if (s390_function_arg_vector (arg.mode, arg.type))
12312 return false;
12314 if (size > 8)
12315 return true;
12317 if (tree type = arg.type)
12319 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
12320 return true;
12322 if (TREE_CODE (type) == COMPLEX_TYPE
12323 || TREE_CODE (type) == VECTOR_TYPE)
12324 return true;
12327 return false;
12330 /* Update the data in CUM to advance over argument ARG. */
12332 static void
12333 s390_function_arg_advance (cumulative_args_t cum_v,
12334 const function_arg_info &arg)
12336 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12338 if (s390_function_arg_vector (arg.mode, arg.type))
12340 /* We are called for unnamed vector stdarg arguments which are
12341 passed on the stack. In this case this hook does not have to
12342 do anything since stack arguments are tracked by common
12343 code. */
12344 if (!arg.named)
12345 return;
12346 cum->vrs += 1;
12348 else if (s390_function_arg_float (arg.mode, arg.type))
12350 cum->fprs += 1;
12352 else if (s390_function_arg_integer (arg.mode, arg.type))
12354 int size = s390_function_arg_size (arg.mode, arg.type);
12355 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
12357 else
12358 gcc_unreachable ();
12361 /* Define where to put the arguments to a function.
12362 Value is zero to push the argument on the stack,
12363 or a hard register in which to store the argument.
12365 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12366 the preceding args and about the function being called.
12367 ARG is a description of the argument.
12369 On S/390, we use general purpose registers 2 through 6 to
12370 pass integer, pointer, and certain structure arguments, and
12371 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
12372 to pass floating point arguments. All remaining arguments
12373 are pushed to the stack. */
12375 static rtx
12376 s390_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
12378 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12380 if (!arg.named)
12381 s390_check_type_for_vector_abi (arg.type, true, false);
12383 if (s390_function_arg_vector (arg.mode, arg.type))
12385 /* Vector arguments being part of the ellipsis are passed on the
12386 stack. */
12387 if (!arg.named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
12388 return NULL_RTX;
12390 return gen_rtx_REG (arg.mode, cum->vrs + FIRST_VEC_ARG_REGNO);
12392 else if (s390_function_arg_float (arg.mode, arg.type))
12394 if (cum->fprs + 1 > FP_ARG_NUM_REG)
12395 return NULL_RTX;
12396 else
12397 return gen_rtx_REG (arg.mode, cum->fprs + 16);
12399 else if (s390_function_arg_integer (arg.mode, arg.type))
12401 int size = s390_function_arg_size (arg.mode, arg.type);
12402 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12404 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
12405 return NULL_RTX;
12406 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
12407 return gen_rtx_REG (arg.mode, cum->gprs + 2);
12408 else if (n_gprs == 2)
12410 rtvec p = rtvec_alloc (2);
12412 RTVEC_ELT (p, 0)
12413 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
12414 const0_rtx);
12415 RTVEC_ELT (p, 1)
12416 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
12417 GEN_INT (4));
12419 return gen_rtx_PARALLEL (arg.mode, p);
12423 /* After the real arguments, expand_call calls us once again with an
12424 end marker. Whatever we return here is passed as operand 2 to the
12425 call expanders.
12427 We don't need this feature ... */
12428 else if (arg.end_marker_p ())
12429 return const0_rtx;
12431 gcc_unreachable ();
12434 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Vector arguments are
12435 left-justified when placed on the stack during parameter passing. */
12437 static pad_direction
12438 s390_function_arg_padding (machine_mode mode, const_tree type)
12440 if (s390_function_arg_vector (mode, type))
12441 return PAD_UPWARD;
12443 return default_function_arg_padding (mode, type);
12446 /* Return true if return values of type TYPE should be returned
12447 in a memory buffer whose address is passed by the caller as
12448 hidden first argument. */
12450 static bool
12451 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
12453 /* We accept small integral (and similar) types. */
12454 if (INTEGRAL_TYPE_P (type)
12455 || POINTER_TYPE_P (type)
12456 || TREE_CODE (type) == OFFSET_TYPE
12457 || TREE_CODE (type) == REAL_TYPE)
12458 return int_size_in_bytes (type) > 8;
12460 /* vector types which fit into a VR. */
12461 if (TARGET_VX_ABI
12462 && VECTOR_TYPE_P (type)
12463 && int_size_in_bytes (type) <= 16)
12464 return false;
12466 /* Aggregates and similar constructs are always returned
12467 in memory. */
12468 if (AGGREGATE_TYPE_P (type)
12469 || TREE_CODE (type) == COMPLEX_TYPE
12470 || VECTOR_TYPE_P (type))
12471 return true;
12473 /* ??? We get called on all sorts of random stuff from
12474 aggregate_value_p. We can't abort, but it's not clear
12475 what's safe to return. Pretend it's a struct I guess. */
12476 return true;
12479 /* Function arguments and return values are promoted to word size. */
12481 static machine_mode
12482 s390_promote_function_mode (const_tree type, machine_mode mode,
12483 int *punsignedp,
12484 const_tree fntype ATTRIBUTE_UNUSED,
12485 int for_return ATTRIBUTE_UNUSED)
12487 if (INTEGRAL_MODE_P (mode)
12488 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
12490 if (type != NULL_TREE && POINTER_TYPE_P (type))
12491 *punsignedp = POINTERS_EXTEND_UNSIGNED;
12492 return Pmode;
12495 return mode;
12498 /* Define where to return a (scalar) value of type RET_TYPE.
12499 If RET_TYPE is null, define where to return a (scalar)
12500 value of mode MODE from a libcall. */
12502 static rtx
12503 s390_function_and_libcall_value (machine_mode mode,
12504 const_tree ret_type,
12505 const_tree fntype_or_decl,
12506 bool outgoing ATTRIBUTE_UNUSED)
12508 /* For vector return types it is important to use the RET_TYPE
12509 argument whenever available since the middle-end might have
12510 changed the mode to a scalar mode. */
12511 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
12512 || (!ret_type && VECTOR_MODE_P (mode)));
12514 /* For normal functions perform the promotion as
12515 promote_function_mode would do. */
12516 if (ret_type)
12518 int unsignedp = TYPE_UNSIGNED (ret_type);
12519 mode = promote_function_mode (ret_type, mode, &unsignedp,
12520 fntype_or_decl, 1);
12523 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
12524 || SCALAR_FLOAT_MODE_P (mode)
12525 || (TARGET_VX_ABI && vector_ret_type_p));
12526 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
12528 if (TARGET_VX_ABI && vector_ret_type_p)
12529 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
12530 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
12531 return gen_rtx_REG (mode, 16);
12532 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
12533 || UNITS_PER_LONG == UNITS_PER_WORD)
12534 return gen_rtx_REG (mode, 2);
12535 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
12537 /* This case is triggered when returning a 64 bit value with
12538 -m31 -mzarch. Although the value would fit into a single
12539 register it has to be forced into a 32 bit register pair in
12540 order to match the ABI. */
12541 rtvec p = rtvec_alloc (2);
12543 RTVEC_ELT (p, 0)
12544 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
12545 RTVEC_ELT (p, 1)
12546 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
12548 return gen_rtx_PARALLEL (mode, p);
12551 gcc_unreachable ();
12554 /* Define where to return a scalar return value of type RET_TYPE. */
12556 static rtx
12557 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
12558 bool outgoing)
12560 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
12561 fn_decl_or_type, outgoing);
12564 /* Define where to return a scalar libcall return value of mode
12565 MODE. */
12567 static rtx
12568 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
12570 return s390_function_and_libcall_value (mode, NULL_TREE,
12571 NULL_TREE, true);
12575 /* Create and return the va_list datatype.
12577 On S/390, va_list is an array type equivalent to
12579 typedef struct __va_list_tag
12581 long __gpr;
12582 long __fpr;
12583 void *__overflow_arg_area;
12584 void *__reg_save_area;
12585 } va_list[1];
12587 where __gpr and __fpr hold the number of general purpose
12588 or floating point arguments used up to now, respectively,
12589 __overflow_arg_area points to the stack location of the
12590 next argument passed on the stack, and __reg_save_area
12591 always points to the start of the register area in the
12592 call frame of the current function. The function prologue
12593 saves all registers used for argument passing into this
12594 area if the function uses variable arguments. */
12596 static tree
12597 s390_build_builtin_va_list (void)
12599 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
12601 record = lang_hooks.types.make_type (RECORD_TYPE);
12603 type_decl =
12604 build_decl (BUILTINS_LOCATION,
12605 TYPE_DECL, get_identifier ("__va_list_tag"), record);
12607 f_gpr = build_decl (BUILTINS_LOCATION,
12608 FIELD_DECL, get_identifier ("__gpr"),
12609 long_integer_type_node);
12610 f_fpr = build_decl (BUILTINS_LOCATION,
12611 FIELD_DECL, get_identifier ("__fpr"),
12612 long_integer_type_node);
12613 f_ovf = build_decl (BUILTINS_LOCATION,
12614 FIELD_DECL, get_identifier ("__overflow_arg_area"),
12615 ptr_type_node);
12616 f_sav = build_decl (BUILTINS_LOCATION,
12617 FIELD_DECL, get_identifier ("__reg_save_area"),
12618 ptr_type_node);
12620 va_list_gpr_counter_field = f_gpr;
12621 va_list_fpr_counter_field = f_fpr;
12623 DECL_FIELD_CONTEXT (f_gpr) = record;
12624 DECL_FIELD_CONTEXT (f_fpr) = record;
12625 DECL_FIELD_CONTEXT (f_ovf) = record;
12626 DECL_FIELD_CONTEXT (f_sav) = record;
12628 TYPE_STUB_DECL (record) = type_decl;
12629 TYPE_NAME (record) = type_decl;
12630 TYPE_FIELDS (record) = f_gpr;
12631 DECL_CHAIN (f_gpr) = f_fpr;
12632 DECL_CHAIN (f_fpr) = f_ovf;
12633 DECL_CHAIN (f_ovf) = f_sav;
12635 layout_type (record);
12637 /* The correct type is an array type of one element. */
12638 return build_array_type (record, build_index_type (size_zero_node));
12641 /* Implement va_start by filling the va_list structure VALIST.
12642 STDARG_P is always true, and ignored.
12643 NEXTARG points to the first anonymous stack argument.
12645 The following global variables are used to initialize
12646 the va_list structure:
12648 crtl->args.info:
12649 holds number of gprs and fprs used for named arguments.
12650 crtl->args.arg_offset_rtx:
12651 holds the offset of the first anonymous stack argument
12652 (relative to the virtual arg pointer). */
12654 static void
12655 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12657 HOST_WIDE_INT n_gpr, n_fpr;
12658 int off;
12659 tree f_gpr, f_fpr, f_ovf, f_sav;
12660 tree gpr, fpr, ovf, sav, t;
12662 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12663 f_fpr = DECL_CHAIN (f_gpr);
12664 f_ovf = DECL_CHAIN (f_fpr);
12665 f_sav = DECL_CHAIN (f_ovf);
12667 valist = build_simple_mem_ref (valist);
12668 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12669 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12670 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12671 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12673 /* Count number of gp and fp argument registers used. */
12675 n_gpr = crtl->args.info.gprs;
12676 n_fpr = crtl->args.info.fprs;
12678 if (cfun->va_list_gpr_size)
12680 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12681 build_int_cst (NULL_TREE, n_gpr));
12682 TREE_SIDE_EFFECTS (t) = 1;
12683 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12686 if (cfun->va_list_fpr_size)
12688 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12689 build_int_cst (NULL_TREE, n_fpr));
12690 TREE_SIDE_EFFECTS (t) = 1;
12691 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12694 if (flag_split_stack
12695 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
12696 == NULL)
12697 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12699 rtx reg;
12700 rtx_insn *seq;
12702 reg = gen_reg_rtx (Pmode);
12703 cfun->machine->split_stack_varargs_pointer = reg;
12705 start_sequence ();
12706 emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
12707 seq = get_insns ();
12708 end_sequence ();
12710 push_topmost_sequence ();
12711 emit_insn_after (seq, entry_of_function ());
12712 pop_topmost_sequence ();
12715 /* Find the overflow area.
12716 FIXME: This currently is too pessimistic when the vector ABI is
12717 enabled. In that case we *always* set up the overflow area
12718 pointer. */
12719 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
12720 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
12721 || TARGET_VX_ABI)
12723 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12724 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
12725 else
12726 t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
12728 off = INTVAL (crtl->args.arg_offset_rtx);
12729 off = off < 0 ? 0 : off;
12730 if (TARGET_DEBUG_ARG)
12731 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12732 (int)n_gpr, (int)n_fpr, off);
12734 t = fold_build_pointer_plus_hwi (t, off);
12736 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12737 TREE_SIDE_EFFECTS (t) = 1;
12738 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12741 /* Find the register save area. */
12742 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12743 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12745 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12746 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12748 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12749 TREE_SIDE_EFFECTS (t) = 1;
12750 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12754 /* Implement va_arg by updating the va_list structure
12755 VALIST as required to retrieve an argument of type
12756 TYPE, and returning that argument.
12758 Generates code equivalent to:
12760 if (integral value) {
12761 if (size <= 4 && args.gpr < 5 ||
12762 size > 4 && args.gpr < 4 )
12763 ret = args.reg_save_area[args.gpr+8]
12764 else
12765 ret = *args.overflow_arg_area++;
12766 } else if (vector value) {
12767 ret = *args.overflow_arg_area;
12768 args.overflow_arg_area += size / 8;
12769 } else if (float value) {
12770 if (args.fgpr < 2)
12771 ret = args.reg_save_area[args.fpr+64]
12772 else
12773 ret = *args.overflow_arg_area++;
12774 } else if (aggregate value) {
12775 if (args.gpr < 5)
12776 ret = *args.reg_save_area[args.gpr]
12777 else
12778 ret = **args.overflow_arg_area++;
12779 } */
12781 static tree
12782 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12783 gimple_seq *post_p ATTRIBUTE_UNUSED)
12785 tree f_gpr, f_fpr, f_ovf, f_sav;
12786 tree gpr, fpr, ovf, sav, reg, t, u;
12787 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12788 tree lab_false, lab_over = NULL_TREE;
12789 tree addr = create_tmp_var (ptr_type_node, "addr");
12790 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12791 a stack slot. */
12793 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12794 f_fpr = DECL_CHAIN (f_gpr);
12795 f_ovf = DECL_CHAIN (f_fpr);
12796 f_sav = DECL_CHAIN (f_ovf);
12798 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12799 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12800 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12802 /* The tree for args* cannot be shared between gpr/fpr and ovf since
12803 both appear on a lhs. */
12804 valist = unshare_expr (valist);
12805 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12807 size = int_size_in_bytes (type);
12809 s390_check_type_for_vector_abi (type, true, false);
12811 if (pass_va_arg_by_reference (type))
12813 if (TARGET_DEBUG_ARG)
12815 fprintf (stderr, "va_arg: aggregate type");
12816 debug_tree (type);
12819 /* Aggregates are passed by reference. */
12820 indirect_p = 1;
12821 reg = gpr;
12822 n_reg = 1;
12824 /* kernel stack layout on 31 bit: It is assumed here that no padding
12825 will be added by s390_frame_info because for va_args always an even
12826 number of gprs has to be saved r15-r2 = 14 regs. */
12827 sav_ofs = 2 * UNITS_PER_LONG;
12828 sav_scale = UNITS_PER_LONG;
12829 size = UNITS_PER_LONG;
12830 max_reg = GP_ARG_NUM_REG - n_reg;
12831 left_align_p = false;
12833 else if (s390_function_arg_vector (TYPE_MODE (type), type))
12835 if (TARGET_DEBUG_ARG)
12837 fprintf (stderr, "va_arg: vector type");
12838 debug_tree (type);
12841 indirect_p = 0;
12842 reg = NULL_TREE;
12843 n_reg = 0;
12844 sav_ofs = 0;
12845 sav_scale = 8;
12846 max_reg = 0;
12847 left_align_p = true;
12849 else if (s390_function_arg_float (TYPE_MODE (type), type))
12851 if (TARGET_DEBUG_ARG)
12853 fprintf (stderr, "va_arg: float type");
12854 debug_tree (type);
12857 /* FP args go in FP registers, if present. */
12858 indirect_p = 0;
12859 reg = fpr;
12860 n_reg = 1;
12861 sav_ofs = 16 * UNITS_PER_LONG;
12862 sav_scale = 8;
12863 max_reg = FP_ARG_NUM_REG - n_reg;
12864 left_align_p = false;
12866 else
12868 if (TARGET_DEBUG_ARG)
12870 fprintf (stderr, "va_arg: other type");
12871 debug_tree (type);
12874 /* Otherwise into GP registers. */
12875 indirect_p = 0;
12876 reg = gpr;
12877 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12879 /* kernel stack layout on 31 bit: It is assumed here that no padding
12880 will be added by s390_frame_info because for va_args always an even
12881 number of gprs has to be saved r15-r2 = 14 regs. */
12882 sav_ofs = 2 * UNITS_PER_LONG;
12884 if (size < UNITS_PER_LONG)
12885 sav_ofs += UNITS_PER_LONG - size;
12887 sav_scale = UNITS_PER_LONG;
12888 max_reg = GP_ARG_NUM_REG - n_reg;
12889 left_align_p = false;
12892 /* Pull the value out of the saved registers ... */
12894 if (reg != NULL_TREE)
12897 if (reg > ((typeof (reg))max_reg))
12898 goto lab_false;
12900 addr = sav + sav_ofs + reg * save_scale;
12902 goto lab_over;
12904 lab_false:
12907 lab_false = create_artificial_label (UNKNOWN_LOCATION);
12908 lab_over = create_artificial_label (UNKNOWN_LOCATION);
12910 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12911 t = build2 (GT_EXPR, boolean_type_node, reg, t);
12912 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12913 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12914 gimplify_and_add (t, pre_p);
12916 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12917 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12918 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12919 t = fold_build_pointer_plus (t, u);
12921 gimplify_assign (addr, t, pre_p);
12923 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12925 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12928 /* ... Otherwise out of the overflow area. */
12930 t = ovf;
12931 if (size < UNITS_PER_LONG && !left_align_p)
12932 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12934 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12936 gimplify_assign (addr, t, pre_p);
12938 if (size < UNITS_PER_LONG && left_align_p)
12939 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12940 else
12941 t = fold_build_pointer_plus_hwi (t, size);
12943 gimplify_assign (ovf, t, pre_p);
12945 if (reg != NULL_TREE)
12946 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12949 /* Increment register save count. */
12951 if (n_reg > 0)
12953 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12954 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12955 gimplify_and_add (u, pre_p);
12958 if (indirect_p)
12960 t = build_pointer_type_for_mode (build_pointer_type (type),
12961 ptr_mode, true);
12962 addr = fold_convert (t, addr);
12963 addr = build_va_arg_indirect_ref (addr);
12965 else
12967 t = build_pointer_type_for_mode (type, ptr_mode, true);
12968 addr = fold_convert (t, addr);
12971 return build_va_arg_indirect_ref (addr);
12974 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12975 expanders.
12976 DEST - Register location where CC will be stored.
12977 TDB - Pointer to a 256 byte area where to store the transaction.
12978 diagnostic block. NULL if TDB is not needed.
12979 RETRY - Retry count value. If non-NULL a retry loop for CC2
12980 is emitted
12981 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12982 of the tbegin instruction pattern. */
12984 void
12985 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
12987 rtx retry_plus_two = gen_reg_rtx (SImode);
12988 rtx retry_reg = gen_reg_rtx (SImode);
12989 rtx_code_label *retry_label = NULL;
12991 if (retry != NULL_RTX)
12993 emit_move_insn (retry_reg, retry);
12994 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
12995 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
12996 retry_label = gen_label_rtx ();
12997 emit_label (retry_label);
13000 if (clobber_fprs_p)
13002 if (TARGET_VX)
13003 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
13004 tdb));
13005 else
13006 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
13007 tdb));
13009 else
13010 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
13011 tdb));
13013 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
13014 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
13015 CC_REGNUM)),
13016 UNSPEC_CC_TO_INT));
13017 if (retry != NULL_RTX)
13019 const int CC0 = 1 << 3;
13020 const int CC1 = 1 << 2;
13021 const int CC3 = 1 << 0;
13022 rtx jump;
13023 rtx count = gen_reg_rtx (SImode);
13024 rtx_code_label *leave_label = gen_label_rtx ();
13026 /* Exit for success and permanent failures. */
13027 jump = s390_emit_jump (leave_label,
13028 gen_rtx_EQ (VOIDmode,
13029 gen_rtx_REG (CCRAWmode, CC_REGNUM),
13030 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
13031 LABEL_NUSES (leave_label) = 1;
13033 /* CC2 - transient failure. Perform retry with ppa. */
13034 emit_move_insn (count, retry_plus_two);
13035 emit_insn (gen_subsi3 (count, count, retry_reg));
13036 emit_insn (gen_tx_assist (count));
13037 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
13038 retry_reg,
13039 retry_reg));
13040 JUMP_LABEL (jump) = retry_label;
13041 LABEL_NUSES (retry_label) = 1;
13042 emit_label (leave_label);
13047 /* Return the decl for the target specific builtin with the function
13048 code FCODE. */
13050 static tree
13051 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
13053 if (fcode >= S390_BUILTIN_MAX)
13054 return error_mark_node;
13056 return s390_builtin_decls[fcode];
13059 /* We call mcount before the function prologue. So a profiled leaf
13060 function should stay a leaf function. */
13062 static bool
13063 s390_keep_leaf_when_profiled ()
13065 return true;
13068 /* Output assembly code for the trampoline template to
13069 stdio stream FILE.
13071 On S/390, we use gpr 1 internally in the trampoline code;
13072 gpr 0 is used to hold the static chain. */
13074 static void
13075 s390_asm_trampoline_template (FILE *file)
13077 rtx op[2];
13078 op[0] = gen_rtx_REG (Pmode, 0);
13079 op[1] = gen_rtx_REG (Pmode, 1);
13081 if (TARGET_64BIT)
13083 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
13084 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
13085 output_asm_insn ("br\t%1", op); /* 2 byte */
13086 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
13088 else
13090 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
13091 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
13092 output_asm_insn ("br\t%1", op); /* 2 byte */
13093 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
13097 /* Emit RTL insns to initialize the variable parts of a trampoline.
13098 FNADDR is an RTX for the address of the function's pure code.
13099 CXT is an RTX for the static chain value for the function. */
13101 static void
13102 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
13104 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
13105 rtx mem;
13107 emit_block_move (m_tramp, assemble_trampoline_template (),
13108 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
13110 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
13111 emit_move_insn (mem, cxt);
13112 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
13113 emit_move_insn (mem, fnaddr);
13116 static void
13117 output_asm_nops (const char *user, int hw)
13119 asm_fprintf (asm_out_file, "\t# NOPs for %s (%d halfwords)\n", user, hw);
13120 while (hw > 0)
13122 if (hw >= 3)
13124 output_asm_insn ("brcl\t0,0", NULL);
13125 hw -= 3;
13127 else if (hw >= 2)
13129 output_asm_insn ("bc\t0,0", NULL);
13130 hw -= 2;
13132 else
13134 output_asm_insn ("bcr\t0,0", NULL);
13135 hw -= 1;
13140 /* Output assembler code to FILE to call a profiler hook. */
13142 void
13143 s390_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
13145 rtx op[4];
13147 fprintf (file, "# function profiler \n");
13149 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
13150 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
13151 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
13152 op[3] = GEN_INT (UNITS_PER_LONG);
13154 op[2] = gen_rtx_SYMBOL_REF (Pmode, flag_fentry ? "__fentry__" : "_mcount");
13155 SYMBOL_REF_FLAGS (op[2]) |= SYMBOL_FLAG_FUNCTION;
13156 if (flag_pic && !TARGET_64BIT)
13158 op[2] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[2]), UNSPEC_PLT31);
13159 op[2] = gen_rtx_CONST (Pmode, op[2]);
13162 if (flag_record_mcount)
13163 fprintf (file, "1:\n");
13165 if (flag_fentry)
13167 if (flag_nop_mcount)
13168 output_asm_nops ("-mnop-mcount", /* brasl */ 3);
13169 else if (cfun->static_chain_decl)
13170 warning (OPT_Wcannot_profile, "nested functions cannot be profiled "
13171 "with %<-mfentry%> on s390");
13172 else
13173 output_asm_insn ("brasl\t0,%2%K2", op);
13175 else if (TARGET_64BIT)
13177 if (flag_nop_mcount)
13178 output_asm_nops ("-mnop-mcount", /* stg */ 3 + /* brasl */ 3 +
13179 /* lg */ 3);
13180 else
13182 output_asm_insn ("stg\t%0,%1", op);
13183 if (flag_dwarf2_cfi_asm)
13184 output_asm_insn (".cfi_rel_offset\t%0,%3", op);
13185 output_asm_insn ("brasl\t%0,%2%K2", op);
13186 output_asm_insn ("lg\t%0,%1", op);
13187 if (flag_dwarf2_cfi_asm)
13188 output_asm_insn (".cfi_restore\t%0", op);
13191 else
13193 if (flag_nop_mcount)
13194 output_asm_nops ("-mnop-mcount", /* st */ 2 + /* brasl */ 3 +
13195 /* l */ 2);
13196 else
13198 output_asm_insn ("st\t%0,%1", op);
13199 if (flag_dwarf2_cfi_asm)
13200 output_asm_insn (".cfi_rel_offset\t%0,%3", op);
13201 output_asm_insn ("brasl\t%0,%2%K2", op);
13202 output_asm_insn ("l\t%0,%1", op);
13203 if (flag_dwarf2_cfi_asm)
13204 output_asm_insn (".cfi_restore\t%0", op);
13208 if (flag_record_mcount)
13210 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
13211 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
13212 fprintf (file, "\t.previous\n");
13216 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
13217 into its SYMBOL_REF_FLAGS. */
13219 static void
13220 s390_encode_section_info (tree decl, rtx rtl, int first)
13222 default_encode_section_info (decl, rtl, first);
13224 if (TREE_CODE (decl) == VAR_DECL)
13226 /* Store the alignment to be able to check if we can use
13227 a larl/load-relative instruction. We only handle the cases
13228 that can go wrong (i.e. no FUNC_DECLs). */
13229 if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
13230 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13231 else if (DECL_ALIGN (decl) % 32)
13232 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13233 else if (DECL_ALIGN (decl) % 64)
13234 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13237 /* Literal pool references don't have a decl so they are handled
13238 differently here. We rely on the information in the MEM_ALIGN
13239 entry to decide upon the alignment. */
13240 if (MEM_P (rtl)
13241 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
13242 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
13244 if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
13245 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13246 else if (MEM_ALIGN (rtl) % 32)
13247 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13248 else if (MEM_ALIGN (rtl) % 64)
13249 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13253 /* Output thunk to FILE that implements a C++ virtual function call (with
13254 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
13255 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
13256 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
13257 relative to the resulting this pointer. */
13259 static void
13260 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
13261 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
13262 tree function)
13264 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
13265 rtx op[10];
13266 int nonlocal = 0;
13268 assemble_start_function (thunk, fnname);
13269 /* Make sure unwind info is emitted for the thunk if needed. */
13270 final_start_function (emit_barrier (), file, 1);
13272 /* Operand 0 is the target function. */
13273 op[0] = XEXP (DECL_RTL (function), 0);
13274 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
13276 nonlocal = 1;
13277 if (!TARGET_64BIT)
13279 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]), UNSPEC_GOT);
13280 op[0] = gen_rtx_CONST (Pmode, op[0]);
13284 /* Operand 1 is the 'this' pointer. */
13285 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
13286 op[1] = gen_rtx_REG (Pmode, 3);
13287 else
13288 op[1] = gen_rtx_REG (Pmode, 2);
13290 /* Operand 2 is the delta. */
13291 op[2] = GEN_INT (delta);
13293 /* Operand 3 is the vcall_offset. */
13294 op[3] = GEN_INT (vcall_offset);
13296 /* Operand 4 is the temporary register. */
13297 op[4] = gen_rtx_REG (Pmode, 1);
13299 /* Operands 5 to 8 can be used as labels. */
13300 op[5] = NULL_RTX;
13301 op[6] = NULL_RTX;
13302 op[7] = NULL_RTX;
13303 op[8] = NULL_RTX;
13305 /* Operand 9 can be used for temporary register. */
13306 op[9] = NULL_RTX;
13308 /* Generate code. */
13309 if (TARGET_64BIT)
13311 /* Setup literal pool pointer if required. */
13312 if ((!DISP_IN_RANGE (delta)
13313 && !CONST_OK_FOR_K (delta)
13314 && !CONST_OK_FOR_Os (delta))
13315 || (!DISP_IN_RANGE (vcall_offset)
13316 && !CONST_OK_FOR_K (vcall_offset)
13317 && !CONST_OK_FOR_Os (vcall_offset)))
13319 op[5] = gen_label_rtx ();
13320 output_asm_insn ("larl\t%4,%5", op);
13323 /* Add DELTA to this pointer. */
13324 if (delta)
13326 if (CONST_OK_FOR_J (delta))
13327 output_asm_insn ("la\t%1,%2(%1)", op);
13328 else if (DISP_IN_RANGE (delta))
13329 output_asm_insn ("lay\t%1,%2(%1)", op);
13330 else if (CONST_OK_FOR_K (delta))
13331 output_asm_insn ("aghi\t%1,%2", op);
13332 else if (CONST_OK_FOR_Os (delta))
13333 output_asm_insn ("agfi\t%1,%2", op);
13334 else
13336 op[6] = gen_label_rtx ();
13337 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
13341 /* Perform vcall adjustment. */
13342 if (vcall_offset)
13344 if (DISP_IN_RANGE (vcall_offset))
13346 output_asm_insn ("lg\t%4,0(%1)", op);
13347 output_asm_insn ("ag\t%1,%3(%4)", op);
13349 else if (CONST_OK_FOR_K (vcall_offset))
13351 output_asm_insn ("lghi\t%4,%3", op);
13352 output_asm_insn ("ag\t%4,0(%1)", op);
13353 output_asm_insn ("ag\t%1,0(%4)", op);
13355 else if (CONST_OK_FOR_Os (vcall_offset))
13357 output_asm_insn ("lgfi\t%4,%3", op);
13358 output_asm_insn ("ag\t%4,0(%1)", op);
13359 output_asm_insn ("ag\t%1,0(%4)", op);
13361 else
13363 op[7] = gen_label_rtx ();
13364 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
13365 output_asm_insn ("ag\t%4,0(%1)", op);
13366 output_asm_insn ("ag\t%1,0(%4)", op);
13370 /* Jump to target. */
13371 output_asm_insn ("jg\t%0%K0", op);
13373 /* Output literal pool if required. */
13374 if (op[5])
13376 output_asm_insn (".align\t4", op);
13377 targetm.asm_out.internal_label (file, "L",
13378 CODE_LABEL_NUMBER (op[5]));
13380 if (op[6])
13382 targetm.asm_out.internal_label (file, "L",
13383 CODE_LABEL_NUMBER (op[6]));
13384 output_asm_insn (".long\t%2", op);
13386 if (op[7])
13388 targetm.asm_out.internal_label (file, "L",
13389 CODE_LABEL_NUMBER (op[7]));
13390 output_asm_insn (".long\t%3", op);
13393 else
13395 /* Setup base pointer if required. */
13396 if (!vcall_offset
13397 || (!DISP_IN_RANGE (delta)
13398 && !CONST_OK_FOR_K (delta)
13399 && !CONST_OK_FOR_Os (delta))
13400 || (!DISP_IN_RANGE (delta)
13401 && !CONST_OK_FOR_K (vcall_offset)
13402 && !CONST_OK_FOR_Os (vcall_offset)))
13404 op[5] = gen_label_rtx ();
13405 output_asm_insn ("basr\t%4,0", op);
13406 targetm.asm_out.internal_label (file, "L",
13407 CODE_LABEL_NUMBER (op[5]));
13410 /* Add DELTA to this pointer. */
13411 if (delta)
13413 if (CONST_OK_FOR_J (delta))
13414 output_asm_insn ("la\t%1,%2(%1)", op);
13415 else if (DISP_IN_RANGE (delta))
13416 output_asm_insn ("lay\t%1,%2(%1)", op);
13417 else if (CONST_OK_FOR_K (delta))
13418 output_asm_insn ("ahi\t%1,%2", op);
13419 else if (CONST_OK_FOR_Os (delta))
13420 output_asm_insn ("afi\t%1,%2", op);
13421 else
13423 op[6] = gen_label_rtx ();
13424 output_asm_insn ("a\t%1,%6-%5(%4)", op);
13428 /* Perform vcall adjustment. */
13429 if (vcall_offset)
13431 if (CONST_OK_FOR_J (vcall_offset))
13433 output_asm_insn ("l\t%4,0(%1)", op);
13434 output_asm_insn ("a\t%1,%3(%4)", op);
13436 else if (DISP_IN_RANGE (vcall_offset))
13438 output_asm_insn ("l\t%4,0(%1)", op);
13439 output_asm_insn ("ay\t%1,%3(%4)", op);
13441 else if (CONST_OK_FOR_K (vcall_offset))
13443 output_asm_insn ("lhi\t%4,%3", op);
13444 output_asm_insn ("a\t%4,0(%1)", op);
13445 output_asm_insn ("a\t%1,0(%4)", op);
13447 else if (CONST_OK_FOR_Os (vcall_offset))
13449 output_asm_insn ("iilf\t%4,%3", op);
13450 output_asm_insn ("a\t%4,0(%1)", op);
13451 output_asm_insn ("a\t%1,0(%4)", op);
13453 else
13455 op[7] = gen_label_rtx ();
13456 output_asm_insn ("l\t%4,%7-%5(%4)", op);
13457 output_asm_insn ("a\t%4,0(%1)", op);
13458 output_asm_insn ("a\t%1,0(%4)", op);
13461 /* We had to clobber the base pointer register.
13462 Re-setup the base pointer (with a different base). */
13463 op[5] = gen_label_rtx ();
13464 output_asm_insn ("basr\t%4,0", op);
13465 targetm.asm_out.internal_label (file, "L",
13466 CODE_LABEL_NUMBER (op[5]));
13469 /* Jump to target. */
13470 op[8] = gen_label_rtx ();
13472 if (!flag_pic)
13473 output_asm_insn ("l\t%4,%8-%5(%4)", op);
13474 else if (!nonlocal)
13475 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13476 /* We cannot call through .plt, since .plt requires %r12 loaded. */
13477 else if (flag_pic == 1)
13479 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13480 output_asm_insn ("l\t%4,%0(%4)", op);
13482 else if (flag_pic == 2)
13484 op[9] = gen_rtx_REG (Pmode, 0);
13485 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
13486 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13487 output_asm_insn ("ar\t%4,%9", op);
13488 output_asm_insn ("l\t%4,0(%4)", op);
13491 output_asm_insn ("br\t%4", op);
13493 /* Output literal pool. */
13494 output_asm_insn (".align\t4", op);
13496 if (nonlocal && flag_pic == 2)
13497 output_asm_insn (".long\t%0", op);
13498 if (nonlocal)
13500 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13501 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
13504 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
13505 if (!flag_pic)
13506 output_asm_insn (".long\t%0", op);
13507 else
13508 output_asm_insn (".long\t%0-%5", op);
13510 if (op[6])
13512 targetm.asm_out.internal_label (file, "L",
13513 CODE_LABEL_NUMBER (op[6]));
13514 output_asm_insn (".long\t%2", op);
13516 if (op[7])
13518 targetm.asm_out.internal_label (file, "L",
13519 CODE_LABEL_NUMBER (op[7]));
13520 output_asm_insn (".long\t%3", op);
13523 final_end_function ();
13524 assemble_end_function (thunk, fnname);
13527 /* Output either an indirect jump or an indirect call
13528 (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO
13529 using a branch trampoline disabling branch target prediction. */
13531 void
13532 s390_indirect_branch_via_thunk (unsigned int regno,
13533 unsigned int return_addr_regno,
13534 rtx comparison_operator,
13535 enum s390_indirect_branch_type type)
13537 enum s390_indirect_branch_option option;
13539 if (type == s390_indirect_branch_type_return)
13541 if (s390_return_addr_from_memory ())
13542 option = s390_opt_function_return_mem;
13543 else
13544 option = s390_opt_function_return_reg;
13546 else if (type == s390_indirect_branch_type_jump)
13547 option = s390_opt_indirect_branch_jump;
13548 else if (type == s390_indirect_branch_type_call)
13549 option = s390_opt_indirect_branch_call;
13550 else
13551 gcc_unreachable ();
13553 if (TARGET_INDIRECT_BRANCH_TABLE)
13555 char label[32];
13557 ASM_GENERATE_INTERNAL_LABEL (label,
13558 indirect_branch_table_label[option],
13559 indirect_branch_table_label_no[option]++);
13560 ASM_OUTPUT_LABEL (asm_out_file, label);
13563 if (return_addr_regno != INVALID_REGNUM)
13565 gcc_assert (comparison_operator == NULL_RTX);
13566 fprintf (asm_out_file, " \tbrasl\t%%r%d,", return_addr_regno);
13568 else
13570 fputs (" \tjg", asm_out_file);
13571 if (comparison_operator != NULL_RTX)
13572 print_operand (asm_out_file, comparison_operator, 'C');
13574 fputs ("\t", asm_out_file);
13577 if (TARGET_CPU_Z10)
13578 fprintf (asm_out_file,
13579 TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL "\n",
13580 regno);
13581 else
13582 fprintf (asm_out_file,
13583 TARGET_INDIRECT_BRANCH_THUNK_NAME_EX "\n",
13584 INDIRECT_BRANCH_THUNK_REGNUM, regno);
13586 if ((option == s390_opt_indirect_branch_jump
13587 && cfun->machine->indirect_branch_jump == indirect_branch_thunk)
13588 || (option == s390_opt_indirect_branch_call
13589 && cfun->machine->indirect_branch_call == indirect_branch_thunk)
13590 || (option == s390_opt_function_return_reg
13591 && cfun->machine->function_return_reg == indirect_branch_thunk)
13592 || (option == s390_opt_function_return_mem
13593 && cfun->machine->function_return_mem == indirect_branch_thunk))
13595 if (TARGET_CPU_Z10)
13596 indirect_branch_z10thunk_mask |= (1 << regno);
13597 else
13598 indirect_branch_prez10thunk_mask |= (1 << regno);
13602 /* Output an inline thunk for indirect jumps. EXECUTE_TARGET can
13603 either be an address register or a label pointing to the location
13604 of the jump instruction. */
13606 void
13607 s390_indirect_branch_via_inline_thunk (rtx execute_target)
13609 if (TARGET_INDIRECT_BRANCH_TABLE)
13611 char label[32];
13613 ASM_GENERATE_INTERNAL_LABEL (label,
13614 indirect_branch_table_label[s390_opt_indirect_branch_jump],
13615 indirect_branch_table_label_no[s390_opt_indirect_branch_jump]++);
13616 ASM_OUTPUT_LABEL (asm_out_file, label);
13619 if (!TARGET_ZARCH)
13620 fputs ("\t.machinemode zarch\n", asm_out_file);
13622 if (REG_P (execute_target))
13623 fprintf (asm_out_file, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target));
13624 else
13625 output_asm_insn ("\texrl\t%%r0,%0", &execute_target);
13627 if (!TARGET_ZARCH)
13628 fputs ("\t.machinemode esa\n", asm_out_file);
13630 fputs ("0:\tj\t0b\n", asm_out_file);
13633 static bool
13634 s390_valid_pointer_mode (scalar_int_mode mode)
13636 return (mode == SImode || (TARGET_64BIT && mode == DImode));
13639 /* Checks whether the given CALL_EXPR would use a caller
13640 saved register. This is used to decide whether sibling call
13641 optimization could be performed on the respective function
13642 call. */
13644 static bool
13645 s390_call_saved_register_used (tree call_expr)
13647 CUMULATIVE_ARGS cum_v;
13648 cumulative_args_t cum;
13649 tree parameter;
13650 rtx parm_rtx;
13651 int reg, i;
13653 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
13654 cum = pack_cumulative_args (&cum_v);
13656 for (i = 0; i < call_expr_nargs (call_expr); i++)
13658 parameter = CALL_EXPR_ARG (call_expr, i);
13659 gcc_assert (parameter);
13661 /* For an undeclared variable passed as parameter we will get
13662 an ERROR_MARK node here. */
13663 if (TREE_CODE (parameter) == ERROR_MARK)
13664 return true;
13666 /* We assume that in the target function all parameters are
13667 named. This only has an impact on vector argument register
13668 usage none of which is call-saved. */
13669 function_arg_info arg (TREE_TYPE (parameter), /*named=*/true);
13670 apply_pass_by_reference_rules (&cum_v, arg);
13672 parm_rtx = s390_function_arg (cum, arg);
13674 s390_function_arg_advance (cum, arg);
13676 if (!parm_rtx)
13677 continue;
13679 if (REG_P (parm_rtx))
13681 for (reg = 0; reg < REG_NREGS (parm_rtx); reg++)
13682 if (!call_used_or_fixed_reg_p (reg + REGNO (parm_rtx)))
13683 return true;
13686 if (GET_CODE (parm_rtx) == PARALLEL)
13688 int i;
13690 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
13692 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
13694 gcc_assert (REG_P (r));
13696 for (reg = 0; reg < REG_NREGS (r); reg++)
13697 if (!call_used_or_fixed_reg_p (reg + REGNO (r)))
13698 return true;
13703 return false;
13706 /* Return true if the given call expression can be
13707 turned into a sibling call.
13708 DECL holds the declaration of the function to be called whereas
13709 EXP is the call expression itself. */
13711 static bool
13712 s390_function_ok_for_sibcall (tree decl, tree exp)
13714 /* The TPF epilogue uses register 1. */
13715 if (TARGET_TPF_PROFILING)
13716 return false;
13718 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13719 which would have to be restored before the sibcall. */
13720 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
13721 return false;
13723 /* The thunks for indirect branches require r1 if no exrl is
13724 available. r1 might not be available when doing a sibling
13725 call. */
13726 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13727 && !TARGET_CPU_Z10
13728 && !decl)
13729 return false;
13731 /* Register 6 on s390 is available as an argument register but unfortunately
13732 "caller saved". This makes functions needing this register for arguments
13733 not suitable for sibcalls. */
13734 return !s390_call_saved_register_used (exp);
13737 /* Return the fixed registers used for condition codes. */
13739 static bool
13740 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13742 *p1 = CC_REGNUM;
13743 *p2 = INVALID_REGNUM;
13745 return true;
13748 /* This function is used by the call expanders of the machine description.
13749 It emits the call insn itself together with the necessary operations
13750 to adjust the target address and returns the emitted insn.
13751 ADDR_LOCATION is the target address rtx
13752 TLS_CALL the location of the thread-local symbol
13753 RESULT_REG the register where the result of the call should be stored
13754 RETADDR_REG the register where the return address should be stored
13755 If this parameter is NULL_RTX the call is considered
13756 to be a sibling call. */
13758 rtx_insn *
13759 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
13760 rtx retaddr_reg)
13762 bool plt31_call_p = false;
13763 rtx_insn *insn;
13764 rtx vec[4] = { NULL_RTX };
13765 int elts = 0;
13766 rtx *call = &vec[0];
13767 rtx *clobber_ret_reg = &vec[1];
13768 rtx *use = &vec[2];
13769 rtx *clobber_thunk_reg = &vec[3];
13770 int i;
13772 /* Direct function calls need special treatment. */
13773 if (GET_CODE (addr_location) == SYMBOL_REF)
13775 /* When calling a global routine in PIC mode, we must
13776 replace the symbol itself with the PLT stub. */
13777 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location) && !TARGET_64BIT)
13779 if (retaddr_reg != NULL_RTX)
13781 addr_location = gen_rtx_UNSPEC (Pmode,
13782 gen_rtvec (1, addr_location),
13783 UNSPEC_PLT31);
13784 addr_location = gen_rtx_CONST (Pmode, addr_location);
13785 plt31_call_p = true;
13787 else
13788 /* For -fpic code the PLT entries might use r12 which is
13789 call-saved. Therefore we cannot do a sibcall when
13790 calling directly using a symbol ref. When reaching
13791 this point we decided (in s390_function_ok_for_sibcall)
13792 to do a sibcall for a function pointer but one of the
13793 optimizers was able to get rid of the function pointer
13794 by propagating the symbol ref into the call. This
13795 optimization is illegal for S/390 so we turn the direct
13796 call into a indirect call again. */
13797 addr_location = force_reg (Pmode, addr_location);
13801 /* If it is already an indirect call or the code above moved the
13802 SYMBOL_REF to somewhere else make sure the address can be found in
13803 register 1. */
13804 if (retaddr_reg == NULL_RTX
13805 && GET_CODE (addr_location) != SYMBOL_REF
13806 && !plt31_call_p)
13808 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
13809 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
13812 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13813 && GET_CODE (addr_location) != SYMBOL_REF
13814 && !plt31_call_p)
13816 /* Indirect branch thunks require the target to be a single GPR. */
13817 addr_location = force_reg (Pmode, addr_location);
13819 /* Without exrl the indirect branch thunks need an additional
13820 register for larl;ex */
13821 if (!TARGET_CPU_Z10)
13823 *clobber_thunk_reg = gen_rtx_REG (Pmode, INDIRECT_BRANCH_THUNK_REGNUM);
13824 *clobber_thunk_reg = gen_rtx_CLOBBER (VOIDmode, *clobber_thunk_reg);
13828 addr_location = gen_rtx_MEM (QImode, addr_location);
13829 *call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
13831 if (result_reg != NULL_RTX)
13832 *call = gen_rtx_SET (result_reg, *call);
13834 if (retaddr_reg != NULL_RTX)
13836 *clobber_ret_reg = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
13838 if (tls_call != NULL_RTX)
13839 *use = gen_rtx_USE (VOIDmode, tls_call);
13843 for (i = 0; i < 4; i++)
13844 if (vec[i] != NULL_RTX)
13845 elts++;
13847 if (elts > 1)
13849 rtvec v;
13850 int e = 0;
13852 v = rtvec_alloc (elts);
13853 for (i = 0; i < 4; i++)
13854 if (vec[i] != NULL_RTX)
13856 RTVEC_ELT (v, e) = vec[i];
13857 e++;
13860 *call = gen_rtx_PARALLEL (VOIDmode, v);
13863 insn = emit_call_insn (*call);
13865 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
13866 if (plt31_call_p || tls_call != NULL_RTX)
13868 /* s390_function_ok_for_sibcall should
13869 have denied sibcalls in this case. */
13870 gcc_assert (retaddr_reg != NULL_RTX);
13871 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
13873 return insn;
13876 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
13878 static void
13879 s390_conditional_register_usage (void)
13881 int i;
13883 if (flag_pic)
13884 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13885 fixed_regs[BASE_REGNUM] = 0;
13886 fixed_regs[RETURN_REGNUM] = 0;
13887 if (TARGET_64BIT)
13889 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13890 call_used_regs[i] = 0;
13892 else
13894 call_used_regs[FPR4_REGNUM] = 0;
13895 call_used_regs[FPR6_REGNUM] = 0;
13898 if (TARGET_SOFT_FLOAT)
13900 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13901 fixed_regs[i] = 1;
13904 /* Disable v16 - v31 for non-vector target. */
13905 if (!TARGET_VX)
13907 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13908 fixed_regs[i] = call_used_regs[i] = 1;
13912 /* Corresponding function to eh_return expander. */
13914 static GTY(()) rtx s390_tpf_eh_return_symbol;
13915 void
13916 s390_emit_tpf_eh_return (rtx target)
13918 rtx_insn *insn;
13919 rtx reg, orig_ra;
13921 if (!s390_tpf_eh_return_symbol)
13923 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13924 SYMBOL_REF_FLAGS (s390_tpf_eh_return_symbol) |= SYMBOL_FLAG_FUNCTION;
13927 reg = gen_rtx_REG (Pmode, 2);
13928 orig_ra = gen_rtx_REG (Pmode, 3);
13930 emit_move_insn (reg, target);
13931 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13932 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13933 gen_rtx_REG (Pmode, RETURN_REGNUM));
13934 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13935 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13937 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13940 /* Rework the prologue/epilogue to avoid saving/restoring
13941 registers unnecessarily. */
13943 static void
13944 s390_optimize_prologue (void)
13946 rtx_insn *insn, *new_insn, *next_insn;
13948 /* Do a final recompute of the frame-related data. */
13949 s390_optimize_register_info ();
13951 /* If all special registers are in fact used, there's nothing we
13952 can do, so no point in walking the insn list. */
13954 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13955 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM)
13956 return;
13958 /* Search for prologue/epilogue insns and replace them. */
13959 for (insn = get_insns (); insn; insn = next_insn)
13961 int first, last, off;
13962 rtx set, base, offset;
13963 rtx pat;
13965 next_insn = NEXT_INSN (insn);
13967 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13968 continue;
13970 pat = PATTERN (insn);
13972 /* Remove ldgr/lgdr instructions used for saving and restore
13973 GPRs if possible. */
13974 if (TARGET_Z10)
13976 rtx tmp_pat = pat;
13978 if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
13979 tmp_pat = XVECEXP (pat, 0, 0);
13981 if (GET_CODE (tmp_pat) == SET
13982 && GET_MODE (SET_SRC (tmp_pat)) == DImode
13983 && REG_P (SET_SRC (tmp_pat))
13984 && REG_P (SET_DEST (tmp_pat)))
13986 int src_regno = REGNO (SET_SRC (tmp_pat));
13987 int dest_regno = REGNO (SET_DEST (tmp_pat));
13988 int gpr_regno;
13989 int fpr_regno;
13991 if (!((GENERAL_REGNO_P (src_regno)
13992 && FP_REGNO_P (dest_regno))
13993 || (FP_REGNO_P (src_regno)
13994 && GENERAL_REGNO_P (dest_regno))))
13995 continue;
13997 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
13998 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
14000 /* GPR must be call-saved, FPR must be call-clobbered. */
14001 if (!call_used_regs[fpr_regno]
14002 || call_used_regs[gpr_regno])
14003 continue;
14005 /* It must not happen that what we once saved in an FPR now
14006 needs a stack slot. */
14007 gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
14009 if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
14011 remove_insn (insn);
14012 continue;
14017 if (GET_CODE (pat) == PARALLEL
14018 && store_multiple_operation (pat, VOIDmode))
14020 set = XVECEXP (pat, 0, 0);
14021 first = REGNO (SET_SRC (set));
14022 last = first + XVECLEN (pat, 0) - 1;
14023 offset = const0_rtx;
14024 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
14025 off = INTVAL (offset);
14027 if (GET_CODE (base) != REG || off < 0)
14028 continue;
14029 if (cfun_frame_layout.first_save_gpr != -1
14030 && (cfun_frame_layout.first_save_gpr < first
14031 || cfun_frame_layout.last_save_gpr > last))
14032 continue;
14033 if (REGNO (base) != STACK_POINTER_REGNUM
14034 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14035 continue;
14036 if (first > BASE_REGNUM || last < BASE_REGNUM)
14037 continue;
14039 if (cfun_frame_layout.first_save_gpr != -1)
14041 rtx s_pat = save_gprs (base,
14042 off + (cfun_frame_layout.first_save_gpr
14043 - first) * UNITS_PER_LONG,
14044 cfun_frame_layout.first_save_gpr,
14045 cfun_frame_layout.last_save_gpr);
14046 new_insn = emit_insn_before (s_pat, insn);
14047 INSN_ADDRESSES_NEW (new_insn, -1);
14050 remove_insn (insn);
14051 continue;
14054 if (cfun_frame_layout.first_save_gpr == -1
14055 && GET_CODE (pat) == SET
14056 && GENERAL_REG_P (SET_SRC (pat))
14057 && GET_CODE (SET_DEST (pat)) == MEM)
14059 set = pat;
14060 first = REGNO (SET_SRC (set));
14061 offset = const0_rtx;
14062 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
14063 off = INTVAL (offset);
14065 if (GET_CODE (base) != REG || off < 0)
14066 continue;
14067 if (REGNO (base) != STACK_POINTER_REGNUM
14068 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14069 continue;
14071 remove_insn (insn);
14072 continue;
14075 if (GET_CODE (pat) == PARALLEL
14076 && load_multiple_operation (pat, VOIDmode))
14078 set = XVECEXP (pat, 0, 0);
14079 first = REGNO (SET_DEST (set));
14080 last = first + XVECLEN (pat, 0) - 1;
14081 offset = const0_rtx;
14082 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
14083 off = INTVAL (offset);
14085 if (GET_CODE (base) != REG || off < 0)
14086 continue;
14088 if (cfun_frame_layout.first_restore_gpr != -1
14089 && (cfun_frame_layout.first_restore_gpr < first
14090 || cfun_frame_layout.last_restore_gpr > last))
14091 continue;
14092 if (REGNO (base) != STACK_POINTER_REGNUM
14093 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14094 continue;
14095 if (first > BASE_REGNUM || last < BASE_REGNUM)
14096 continue;
14098 if (cfun_frame_layout.first_restore_gpr != -1)
14100 rtx rpat = restore_gprs (base,
14101 off + (cfun_frame_layout.first_restore_gpr
14102 - first) * UNITS_PER_LONG,
14103 cfun_frame_layout.first_restore_gpr,
14104 cfun_frame_layout.last_restore_gpr);
14106 /* Remove REG_CFA_RESTOREs for registers that we no
14107 longer need to save. */
14108 REG_NOTES (rpat) = REG_NOTES (insn);
14109 for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
14110 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
14111 && ((int) REGNO (XEXP (*ptr, 0))
14112 < cfun_frame_layout.first_restore_gpr))
14113 *ptr = XEXP (*ptr, 1);
14114 else
14115 ptr = &XEXP (*ptr, 1);
14116 new_insn = emit_insn_before (rpat, insn);
14117 RTX_FRAME_RELATED_P (new_insn) = 1;
14118 INSN_ADDRESSES_NEW (new_insn, -1);
14121 remove_insn (insn);
14122 continue;
14125 if (cfun_frame_layout.first_restore_gpr == -1
14126 && GET_CODE (pat) == SET
14127 && GENERAL_REG_P (SET_DEST (pat))
14128 && GET_CODE (SET_SRC (pat)) == MEM)
14130 set = pat;
14131 first = REGNO (SET_DEST (set));
14132 offset = const0_rtx;
14133 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
14134 off = INTVAL (offset);
14136 if (GET_CODE (base) != REG || off < 0)
14137 continue;
14139 if (REGNO (base) != STACK_POINTER_REGNUM
14140 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14141 continue;
14143 remove_insn (insn);
14144 continue;
14149 /* On z10 and later the dynamic branch prediction must see the
14150 backward jump within a certain windows. If not it falls back to
14151 the static prediction. This function rearranges the loop backward
14152 branch in a way which makes the static prediction always correct.
14153 The function returns true if it added an instruction. */
14154 static bool
14155 s390_fix_long_loop_prediction (rtx_insn *insn)
14157 rtx set = single_set (insn);
14158 rtx code_label, label_ref;
14159 rtx_insn *uncond_jump;
14160 rtx_insn *cur_insn;
14161 rtx tmp;
14162 int distance;
14164 /* This will exclude branch on count and branch on index patterns
14165 since these are correctly statically predicted.
14167 The additional check for a PARALLEL is required here since
14168 single_set might be != NULL for PARALLELs where the set of the
14169 iteration variable is dead. */
14170 if (GET_CODE (PATTERN (insn)) == PARALLEL
14171 || !set
14172 || SET_DEST (set) != pc_rtx
14173 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
14174 return false;
14176 /* Skip conditional returns. */
14177 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
14178 && XEXP (SET_SRC (set), 2) == pc_rtx)
14179 return false;
14181 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
14182 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
14184 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
14186 code_label = XEXP (label_ref, 0);
14188 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
14189 || INSN_ADDRESSES (INSN_UID (insn)) == -1
14190 || (INSN_ADDRESSES (INSN_UID (insn))
14191 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
14192 return false;
14194 for (distance = 0, cur_insn = PREV_INSN (insn);
14195 distance < PREDICT_DISTANCE - 6;
14196 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
14197 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
14198 return false;
14200 rtx_code_label *new_label = gen_label_rtx ();
14201 uncond_jump = emit_jump_insn_after (
14202 gen_rtx_SET (pc_rtx,
14203 gen_rtx_LABEL_REF (VOIDmode, code_label)),
14204 insn);
14205 emit_label_after (new_label, uncond_jump);
14207 tmp = XEXP (SET_SRC (set), 1);
14208 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
14209 XEXP (SET_SRC (set), 2) = tmp;
14210 INSN_CODE (insn) = -1;
14212 XEXP (label_ref, 0) = new_label;
14213 JUMP_LABEL (insn) = new_label;
14214 JUMP_LABEL (uncond_jump) = code_label;
14216 return true;
14219 /* Returns 1 if INSN reads the value of REG for purposes not related
14220 to addressing of memory, and 0 otherwise. */
14221 static int
14222 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
14224 return reg_referenced_p (reg, PATTERN (insn))
14225 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
14228 /* Starting from INSN find_cond_jump looks downwards in the insn
14229 stream for a single jump insn which is the last user of the
14230 condition code set in INSN. */
14231 static rtx_insn *
14232 find_cond_jump (rtx_insn *insn)
14234 for (; insn; insn = NEXT_INSN (insn))
14236 rtx ite, cc;
14238 if (LABEL_P (insn))
14239 break;
14241 if (!JUMP_P (insn))
14243 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
14244 break;
14245 continue;
14248 /* This will be triggered by a return. */
14249 if (GET_CODE (PATTERN (insn)) != SET)
14250 break;
14252 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
14253 ite = SET_SRC (PATTERN (insn));
14255 if (GET_CODE (ite) != IF_THEN_ELSE)
14256 break;
14258 cc = XEXP (XEXP (ite, 0), 0);
14259 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
14260 break;
14262 if (find_reg_note (insn, REG_DEAD, cc))
14263 return insn;
14264 break;
14267 return NULL;
14270 /* Swap the condition in COND and the operands in OP0 and OP1 so that
14271 the semantics does not change. If NULL_RTX is passed as COND the
14272 function tries to find the conditional jump starting with INSN. */
14273 static void
14274 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
14276 rtx tmp = *op0;
14278 if (cond == NULL_RTX)
14280 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
14281 rtx set = jump ? single_set (jump) : NULL_RTX;
14283 if (set == NULL_RTX)
14284 return;
14286 cond = XEXP (SET_SRC (set), 0);
14289 *op0 = *op1;
14290 *op1 = tmp;
14291 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
14294 /* On z10, instructions of the compare-and-branch family have the
14295 property to access the register occurring as second operand with
14296 its bits complemented. If such a compare is grouped with a second
14297 instruction that accesses the same register non-complemented, and
14298 if that register's value is delivered via a bypass, then the
14299 pipeline recycles, thereby causing significant performance decline.
14300 This function locates such situations and exchanges the two
14301 operands of the compare. The function return true whenever it
14302 added an insn. */
14303 static bool
14304 s390_z10_optimize_cmp (rtx_insn *insn)
14306 rtx_insn *prev_insn, *next_insn;
14307 bool insn_added_p = false;
14308 rtx cond, *op0, *op1;
14310 if (GET_CODE (PATTERN (insn)) == PARALLEL)
14312 /* Handle compare and branch and branch on count
14313 instructions. */
14314 rtx pattern = single_set (insn);
14316 if (!pattern
14317 || SET_DEST (pattern) != pc_rtx
14318 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
14319 return false;
14321 cond = XEXP (SET_SRC (pattern), 0);
14322 op0 = &XEXP (cond, 0);
14323 op1 = &XEXP (cond, 1);
14325 else if (GET_CODE (PATTERN (insn)) == SET)
14327 rtx src, dest;
14329 /* Handle normal compare instructions. */
14330 src = SET_SRC (PATTERN (insn));
14331 dest = SET_DEST (PATTERN (insn));
14333 if (!REG_P (dest)
14334 || !CC_REGNO_P (REGNO (dest))
14335 || GET_CODE (src) != COMPARE)
14336 return false;
14338 /* s390_swap_cmp will try to find the conditional
14339 jump when passing NULL_RTX as condition. */
14340 cond = NULL_RTX;
14341 op0 = &XEXP (src, 0);
14342 op1 = &XEXP (src, 1);
14344 else
14345 return false;
14347 if (!REG_P (*op0) || !REG_P (*op1))
14348 return false;
14350 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
14351 return false;
14353 /* Swap the COMPARE arguments and its mask if there is a
14354 conflicting access in the previous insn. */
14355 prev_insn = prev_active_insn (insn);
14356 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14357 && reg_referenced_p (*op1, PATTERN (prev_insn)))
14358 s390_swap_cmp (cond, op0, op1, insn);
14360 /* Check if there is a conflict with the next insn. If there
14361 was no conflict with the previous insn, then swap the
14362 COMPARE arguments and its mask. If we already swapped
14363 the operands, or if swapping them would cause a conflict
14364 with the previous insn, issue a NOP after the COMPARE in
14365 order to separate the two instuctions. */
14366 next_insn = next_active_insn (insn);
14367 if (next_insn != NULL_RTX && INSN_P (next_insn)
14368 && s390_non_addr_reg_read_p (*op1, next_insn))
14370 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14371 && s390_non_addr_reg_read_p (*op0, prev_insn))
14373 if (REGNO (*op1) == 0)
14374 emit_insn_after (gen_nop_lr1 (), insn);
14375 else
14376 emit_insn_after (gen_nop_lr0 (), insn);
14377 insn_added_p = true;
14379 else
14380 s390_swap_cmp (cond, op0, op1, insn);
14382 return insn_added_p;
14385 /* Number of INSNs to be scanned backward in the last BB of the loop
14386 and forward in the first BB of the loop. This usually should be a
14387 bit more than the number of INSNs which could go into one
14388 group. */
14389 #define S390_OSC_SCAN_INSN_NUM 5
14391 /* Scan LOOP for static OSC collisions and return true if a osc_break
14392 should be issued for this loop. */
14393 static bool
14394 s390_adjust_loop_scan_osc (struct loop* loop)
14397 HARD_REG_SET modregs, newregs;
14398 rtx_insn *insn, *store_insn = NULL;
14399 rtx set;
14400 struct s390_address addr_store, addr_load;
14401 subrtx_iterator::array_type array;
14402 int insn_count;
14404 CLEAR_HARD_REG_SET (modregs);
14406 insn_count = 0;
14407 FOR_BB_INSNS_REVERSE (loop->latch, insn)
14409 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14410 continue;
14412 insn_count++;
14413 if (insn_count > S390_OSC_SCAN_INSN_NUM)
14414 return false;
14416 find_all_hard_reg_sets (insn, &newregs, true);
14417 modregs |= newregs;
14419 set = single_set (insn);
14420 if (!set)
14421 continue;
14423 if (MEM_P (SET_DEST (set))
14424 && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store))
14426 store_insn = insn;
14427 break;
14431 if (store_insn == NULL_RTX)
14432 return false;
14434 insn_count = 0;
14435 FOR_BB_INSNS (loop->header, insn)
14437 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14438 continue;
14440 if (insn == store_insn)
14441 return false;
14443 insn_count++;
14444 if (insn_count > S390_OSC_SCAN_INSN_NUM)
14445 return false;
14447 find_all_hard_reg_sets (insn, &newregs, true);
14448 modregs |= newregs;
14450 set = single_set (insn);
14451 if (!set)
14452 continue;
14454 /* An intermediate store disrupts static OSC checking
14455 anyway. */
14456 if (MEM_P (SET_DEST (set))
14457 && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL))
14458 return false;
14460 FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
14461 if (MEM_P (*iter)
14462 && s390_decompose_address (XEXP (*iter, 0), &addr_load)
14463 && rtx_equal_p (addr_load.base, addr_store.base)
14464 && rtx_equal_p (addr_load.indx, addr_store.indx)
14465 && rtx_equal_p (addr_load.disp, addr_store.disp))
14467 if ((addr_load.base != NULL_RTX
14468 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base)))
14469 || (addr_load.indx != NULL_RTX
14470 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx))))
14471 return true;
14474 return false;
14477 /* Look for adjustments which can be done on simple innermost
14478 loops. */
14479 static void
14480 s390_adjust_loops ()
14482 struct loop *loop = NULL;
14484 df_analyze ();
14485 compute_bb_for_insn ();
14487 /* Find the loops. */
14488 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
14490 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
14492 if (dump_file)
14494 flow_loop_dump (loop, dump_file, NULL, 0);
14495 fprintf (dump_file, ";; OSC loop scan Loop: ");
14497 if (loop->latch == NULL
14498 || pc_set (BB_END (loop->latch)) == NULL_RTX
14499 || !s390_adjust_loop_scan_osc (loop))
14501 if (dump_file)
14503 if (loop->latch == NULL)
14504 fprintf (dump_file, " muliple backward jumps\n");
14505 else
14507 fprintf (dump_file, " header insn: %d latch insn: %d ",
14508 INSN_UID (BB_HEAD (loop->header)),
14509 INSN_UID (BB_END (loop->latch)));
14510 if (pc_set (BB_END (loop->latch)) == NULL_RTX)
14511 fprintf (dump_file, " loop does not end with jump\n");
14512 else
14513 fprintf (dump_file, " not instrumented\n");
14517 else
14519 rtx_insn *new_insn;
14521 if (dump_file)
14522 fprintf (dump_file, " adding OSC break insn: ");
14523 new_insn = emit_insn_before (gen_osc_break (),
14524 BB_END (loop->latch));
14525 INSN_ADDRESSES_NEW (new_insn, -1);
14529 loop_optimizer_finalize ();
14531 df_finish_pass (false);
14534 /* Perform machine-dependent processing. */
14536 static void
14537 s390_reorg (void)
14539 struct constant_pool *pool;
14540 rtx_insn *insn;
14541 int hw_before, hw_after;
14543 if (s390_tune == PROCESSOR_2964_Z13)
14544 s390_adjust_loops ();
14546 /* Make sure all splits have been performed; splits after
14547 machine_dependent_reorg might confuse insn length counts. */
14548 split_all_insns_noflow ();
14550 /* Install the main literal pool and the associated base
14551 register load insns. The literal pool might be > 4096 bytes in
14552 size, so that some of its elements cannot be directly accessed.
14554 To fix this, we split the single literal pool into multiple
14555 pool chunks, reloading the pool base register at various
14556 points throughout the function to ensure it always points to
14557 the pool chunk the following code expects. */
14559 /* Collect the literal pool. */
14560 pool = s390_mainpool_start ();
14561 if (pool)
14563 /* Finish up literal pool related changes. */
14564 s390_mainpool_finish (pool);
14566 else
14568 /* If literal pool overflowed, chunkify it. */
14569 pool = s390_chunkify_start ();
14570 s390_chunkify_finish (pool);
14573 /* Generate out-of-pool execute target insns. */
14574 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14576 rtx label;
14577 rtx_insn *target;
14579 label = s390_execute_label (insn);
14580 if (!label)
14581 continue;
14583 gcc_assert (label != const0_rtx);
14585 target = emit_label (XEXP (label, 0));
14586 INSN_ADDRESSES_NEW (target, -1);
14588 if (JUMP_P (insn))
14590 target = emit_jump_insn (s390_execute_target (insn));
14591 /* This is important in order to keep a table jump
14592 pointing at the jump table label. Only this makes it
14593 being recognized as table jump. */
14594 JUMP_LABEL (target) = JUMP_LABEL (insn);
14596 else
14597 target = emit_insn (s390_execute_target (insn));
14598 INSN_ADDRESSES_NEW (target, -1);
14601 /* Try to optimize prologue and epilogue further. */
14602 s390_optimize_prologue ();
14604 /* Walk over the insns and do some >=z10 specific changes. */
14605 if (s390_tune >= PROCESSOR_2097_Z10)
14607 rtx_insn *insn;
14608 bool insn_added_p = false;
14610 /* The insn lengths and addresses have to be up to date for the
14611 following manipulations. */
14612 shorten_branches (get_insns ());
14614 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14616 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14617 continue;
14619 if (JUMP_P (insn))
14620 insn_added_p |= s390_fix_long_loop_prediction (insn);
14622 if ((GET_CODE (PATTERN (insn)) == PARALLEL
14623 || GET_CODE (PATTERN (insn)) == SET)
14624 && s390_tune == PROCESSOR_2097_Z10)
14625 insn_added_p |= s390_z10_optimize_cmp (insn);
14628 /* Adjust branches if we added new instructions. */
14629 if (insn_added_p)
14630 shorten_branches (get_insns ());
14633 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
14634 if (hw_after > 0)
14636 rtx_insn *insn;
14638 /* Insert NOPs for hotpatching. */
14639 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14640 /* Emit NOPs
14641 1. inside the area covered by debug information to allow setting
14642 breakpoints at the NOPs,
14643 2. before any insn which results in an asm instruction,
14644 3. before in-function labels to avoid jumping to the NOPs, for
14645 example as part of a loop,
14646 4. before any barrier in case the function is completely empty
14647 (__builtin_unreachable ()) and has neither internal labels nor
14648 active insns.
14650 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
14651 break;
14652 /* Output a series of NOPs before the first active insn. */
14653 while (insn && hw_after > 0)
14655 if (hw_after >= 3)
14657 emit_insn_before (gen_nop_6_byte (), insn);
14658 hw_after -= 3;
14660 else if (hw_after >= 2)
14662 emit_insn_before (gen_nop_4_byte (), insn);
14663 hw_after -= 2;
14665 else
14667 emit_insn_before (gen_nop_2_byte (), insn);
14668 hw_after -= 1;
14674 /* Return true if INSN is a fp load insn writing register REGNO. */
14675 static inline bool
14676 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
14678 rtx set;
14679 enum attr_type flag = s390_safe_attr_type (insn);
14681 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
14682 return false;
14684 set = single_set (insn);
14686 if (set == NULL_RTX)
14687 return false;
14689 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
14690 return false;
14692 if (REGNO (SET_DEST (set)) != regno)
14693 return false;
14695 return true;
14698 /* This value describes the distance to be avoided between an
14699 arithmetic fp instruction and an fp load writing the same register.
14700 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14701 fine but the exact value has to be avoided. Otherwise the FP
14702 pipeline will throw an exception causing a major penalty. */
14703 #define Z10_EARLYLOAD_DISTANCE 7
14705 /* Rearrange the ready list in order to avoid the situation described
14706 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
14707 moved to the very end of the ready list. */
14708 static void
14709 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
14711 unsigned int regno;
14712 int nready = *nready_p;
14713 rtx_insn *tmp;
14714 int i;
14715 rtx_insn *insn;
14716 rtx set;
14717 enum attr_type flag;
14718 int distance;
14720 /* Skip DISTANCE - 1 active insns. */
14721 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
14722 distance > 0 && insn != NULL_RTX;
14723 distance--, insn = prev_active_insn (insn))
14724 if (CALL_P (insn) || JUMP_P (insn))
14725 return;
14727 if (insn == NULL_RTX)
14728 return;
14730 set = single_set (insn);
14732 if (set == NULL_RTX || !REG_P (SET_DEST (set))
14733 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
14734 return;
14736 flag = s390_safe_attr_type (insn);
14738 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
14739 return;
14741 regno = REGNO (SET_DEST (set));
14742 i = nready - 1;
14744 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
14745 i--;
14747 if (!i)
14748 return;
14750 tmp = ready[i];
14751 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
14752 ready[0] = tmp;
14755 /* Returns TRUE if BB is entered via a fallthru edge and all other
14756 incoming edges are less than likely. */
14757 static bool
14758 s390_bb_fallthru_entry_likely (basic_block bb)
14760 edge e, fallthru_edge;
14761 edge_iterator ei;
14763 if (!bb)
14764 return false;
14766 fallthru_edge = find_fallthru_edge (bb->preds);
14767 if (!fallthru_edge)
14768 return false;
14770 FOR_EACH_EDGE (e, ei, bb->preds)
14771 if (e != fallthru_edge
14772 && e->probability >= profile_probability::likely ())
14773 return false;
14775 return true;
14778 struct s390_sched_state
14780 /* Number of insns in the group. */
14781 int group_state;
14782 /* Execution side of the group. */
14783 int side;
14784 /* Group can only hold two insns. */
14785 bool group_of_two;
14786 } s390_sched_state;
14788 static struct s390_sched_state sched_state = {0, 1, false};
14790 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
14791 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
14792 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
14793 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14794 #define S390_SCHED_ATTR_MASK_GROUPOFTWO 0x10
14796 static unsigned int
14797 s390_get_sched_attrmask (rtx_insn *insn)
14799 unsigned int mask = 0;
14801 switch (s390_tune)
14803 case PROCESSOR_2827_ZEC12:
14804 if (get_attr_zEC12_cracked (insn))
14805 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14806 if (get_attr_zEC12_expanded (insn))
14807 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14808 if (get_attr_zEC12_endgroup (insn))
14809 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14810 if (get_attr_zEC12_groupalone (insn))
14811 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14812 break;
14813 case PROCESSOR_2964_Z13:
14814 if (get_attr_z13_cracked (insn))
14815 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14816 if (get_attr_z13_expanded (insn))
14817 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14818 if (get_attr_z13_endgroup (insn))
14819 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14820 if (get_attr_z13_groupalone (insn))
14821 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14822 if (get_attr_z13_groupoftwo (insn))
14823 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14824 break;
14825 case PROCESSOR_3906_Z14:
14826 if (get_attr_z14_cracked (insn))
14827 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14828 if (get_attr_z14_expanded (insn))
14829 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14830 if (get_attr_z14_endgroup (insn))
14831 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14832 if (get_attr_z14_groupalone (insn))
14833 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14834 if (get_attr_z14_groupoftwo (insn))
14835 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14836 break;
14837 case PROCESSOR_8561_Z15:
14838 case PROCESSOR_ARCH14:
14839 if (get_attr_z15_cracked (insn))
14840 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14841 if (get_attr_z15_expanded (insn))
14842 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14843 if (get_attr_z15_endgroup (insn))
14844 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14845 if (get_attr_z15_groupalone (insn))
14846 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14847 if (get_attr_z15_groupoftwo (insn))
14848 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14849 break;
14850 default:
14851 gcc_unreachable ();
14853 return mask;
14856 static unsigned int
14857 s390_get_unit_mask (rtx_insn *insn, int *units)
14859 unsigned int mask = 0;
14861 switch (s390_tune)
14863 case PROCESSOR_2964_Z13:
14864 *units = 4;
14865 if (get_attr_z13_unit_lsu (insn))
14866 mask |= 1 << 0;
14867 if (get_attr_z13_unit_fxa (insn))
14868 mask |= 1 << 1;
14869 if (get_attr_z13_unit_fxb (insn))
14870 mask |= 1 << 2;
14871 if (get_attr_z13_unit_vfu (insn))
14872 mask |= 1 << 3;
14873 break;
14874 case PROCESSOR_3906_Z14:
14875 *units = 4;
14876 if (get_attr_z14_unit_lsu (insn))
14877 mask |= 1 << 0;
14878 if (get_attr_z14_unit_fxa (insn))
14879 mask |= 1 << 1;
14880 if (get_attr_z14_unit_fxb (insn))
14881 mask |= 1 << 2;
14882 if (get_attr_z14_unit_vfu (insn))
14883 mask |= 1 << 3;
14884 break;
14885 case PROCESSOR_8561_Z15:
14886 case PROCESSOR_ARCH14:
14887 *units = 4;
14888 if (get_attr_z15_unit_lsu (insn))
14889 mask |= 1 << 0;
14890 if (get_attr_z15_unit_fxa (insn))
14891 mask |= 1 << 1;
14892 if (get_attr_z15_unit_fxb (insn))
14893 mask |= 1 << 2;
14894 if (get_attr_z15_unit_vfu (insn))
14895 mask |= 1 << 3;
14896 break;
14897 default:
14898 gcc_unreachable ();
14900 return mask;
14903 static bool
14904 s390_is_fpd (rtx_insn *insn)
14906 if (insn == NULL_RTX)
14907 return false;
14909 return get_attr_z13_unit_fpd (insn) || get_attr_z14_unit_fpd (insn)
14910 || get_attr_z15_unit_fpd (insn);
14913 static bool
14914 s390_is_fxd (rtx_insn *insn)
14916 if (insn == NULL_RTX)
14917 return false;
14919 return get_attr_z13_unit_fxd (insn) || get_attr_z14_unit_fxd (insn)
14920 || get_attr_z15_unit_fxd (insn);
14923 /* Returns TRUE if INSN is a long-running instruction. */
14924 static bool
14925 s390_is_longrunning (rtx_insn *insn)
14927 if (insn == NULL_RTX)
14928 return false;
14930 return s390_is_fxd (insn) || s390_is_fpd (insn);
14934 /* Return the scheduling score for INSN. The higher the score the
14935 better. The score is calculated from the OOO scheduling attributes
14936 of INSN and the scheduling state sched_state. */
14937 static int
14938 s390_sched_score (rtx_insn *insn)
14940 unsigned int mask = s390_get_sched_attrmask (insn);
14941 int score = 0;
14943 switch (sched_state.group_state)
14945 case 0:
14946 /* Try to put insns into the first slot which would otherwise
14947 break a group. */
14948 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14949 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14950 score += 5;
14951 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14952 score += 10;
14953 break;
14954 case 1:
14955 /* Prefer not cracked insns while trying to put together a
14956 group. */
14957 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14958 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14959 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14960 score += 10;
14961 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
14962 score += 5;
14963 /* If we are in a group of two already, try to schedule another
14964 group-of-two insn to avoid shortening another group. */
14965 if (sched_state.group_of_two
14966 && (mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14967 score += 15;
14968 break;
14969 case 2:
14970 /* Prefer not cracked insns while trying to put together a
14971 group. */
14972 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14973 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14974 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14975 score += 10;
14976 /* Prefer endgroup insns in the last slot. */
14977 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14978 score += 10;
14979 /* Try to avoid group-of-two insns in the last slot as they will
14980 shorten this group as well as the next one. */
14981 if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14982 score = MAX (0, score - 15);
14983 break;
14986 if (s390_tune >= PROCESSOR_2964_Z13)
14988 int units, i;
14989 unsigned unit_mask, m = 1;
14991 unit_mask = s390_get_unit_mask (insn, &units);
14992 gcc_assert (units <= MAX_SCHED_UNITS);
14994 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14995 ago the last insn of this unit type got scheduled. This is
14996 supposed to help providing a proper instruction mix to the
14997 CPU. */
14998 for (i = 0; i < units; i++, m <<= 1)
14999 if (m & unit_mask)
15000 score += (last_scheduled_unit_distance[i][sched_state.side]
15001 * MAX_SCHED_MIX_SCORE / MAX_SCHED_MIX_DISTANCE);
15003 int other_side = 1 - sched_state.side;
15005 /* Try to delay long-running insns when side is busy. */
15006 if (s390_is_longrunning (insn))
15008 if (s390_is_fxd (insn))
15010 if (fxd_longrunning[sched_state.side]
15011 && fxd_longrunning[other_side]
15012 <= fxd_longrunning[sched_state.side])
15013 score = MAX (0, score - 10);
15015 else if (fxd_longrunning[other_side]
15016 >= fxd_longrunning[sched_state.side])
15017 score += 10;
15020 if (s390_is_fpd (insn))
15022 if (fpd_longrunning[sched_state.side]
15023 && fpd_longrunning[other_side]
15024 <= fpd_longrunning[sched_state.side])
15025 score = MAX (0, score - 10);
15027 else if (fpd_longrunning[other_side]
15028 >= fpd_longrunning[sched_state.side])
15029 score += 10;
15034 return score;
15037 /* This function is called via hook TARGET_SCHED_REORDER before
15038 issuing one insn from list READY which contains *NREADYP entries.
15039 For target z10 it reorders load instructions to avoid early load
15040 conflicts in the floating point pipeline */
15041 static int
15042 s390_sched_reorder (FILE *file, int verbose,
15043 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
15045 if (s390_tune == PROCESSOR_2097_Z10
15046 && reload_completed
15047 && *nreadyp > 1)
15048 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
15050 if (s390_tune >= PROCESSOR_2827_ZEC12
15051 && reload_completed
15052 && *nreadyp > 1)
15054 int i;
15055 int last_index = *nreadyp - 1;
15056 int max_index = -1;
15057 int max_score = -1;
15058 rtx_insn *tmp;
15060 /* Just move the insn with the highest score to the top (the
15061 end) of the list. A full sort is not needed since a conflict
15062 in the hazard recognition cannot happen. So the top insn in
15063 the ready list will always be taken. */
15064 for (i = last_index; i >= 0; i--)
15066 int score;
15068 if (recog_memoized (ready[i]) < 0)
15069 continue;
15071 score = s390_sched_score (ready[i]);
15072 if (score > max_score)
15074 max_score = score;
15075 max_index = i;
15079 if (max_index != -1)
15081 if (max_index != last_index)
15083 tmp = ready[max_index];
15084 ready[max_index] = ready[last_index];
15085 ready[last_index] = tmp;
15087 if (verbose > 5)
15088 fprintf (file,
15089 ";;\t\tBACKEND: move insn %d to the top of list\n",
15090 INSN_UID (ready[last_index]));
15092 else if (verbose > 5)
15093 fprintf (file,
15094 ";;\t\tBACKEND: best insn %d already on top\n",
15095 INSN_UID (ready[last_index]));
15098 if (verbose > 5)
15100 fprintf (file, "ready list ooo attributes - sched state: %d\n",
15101 sched_state.group_state);
15103 for (i = last_index; i >= 0; i--)
15105 unsigned int sched_mask;
15106 rtx_insn *insn = ready[i];
15108 if (recog_memoized (insn) < 0)
15109 continue;
15111 sched_mask = s390_get_sched_attrmask (insn);
15112 fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
15113 INSN_UID (insn),
15114 s390_sched_score (insn));
15115 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
15116 ((M) & sched_mask) ? #ATTR : "");
15117 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
15118 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
15119 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
15120 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
15121 #undef PRINT_SCHED_ATTR
15122 if (s390_tune >= PROCESSOR_2964_Z13)
15124 unsigned int unit_mask, m = 1;
15125 int units, j;
15127 unit_mask = s390_get_unit_mask (insn, &units);
15128 fprintf (file, "(units:");
15129 for (j = 0; j < units; j++, m <<= 1)
15130 if (m & unit_mask)
15131 fprintf (file, " u%d", j);
15132 fprintf (file, ")");
15134 fprintf (file, "\n");
15139 return s390_issue_rate ();
15143 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
15144 the scheduler has issued INSN. It stores the last issued insn into
15145 last_scheduled_insn in order to make it available for
15146 s390_sched_reorder. */
15147 static int
15148 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
15150 last_scheduled_insn = insn;
15152 bool ends_group = false;
15154 if (s390_tune >= PROCESSOR_2827_ZEC12
15155 && reload_completed
15156 && recog_memoized (insn) >= 0)
15158 unsigned int mask = s390_get_sched_attrmask (insn);
15160 if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
15161 sched_state.group_of_two = true;
15163 /* If this is a group-of-two insn, we actually ended the last group
15164 and this insn is the first one of the new group. */
15165 if (sched_state.group_state == 2 && sched_state.group_of_two)
15167 sched_state.side = sched_state.side ? 0 : 1;
15168 sched_state.group_state = 0;
15171 /* Longrunning and side bookkeeping. */
15172 for (int i = 0; i < 2; i++)
15174 fxd_longrunning[i] = MAX (0, fxd_longrunning[i] - 1);
15175 fpd_longrunning[i] = MAX (0, fpd_longrunning[i] - 1);
15178 unsigned latency = insn_default_latency (insn);
15179 if (s390_is_longrunning (insn))
15181 if (s390_is_fxd (insn))
15182 fxd_longrunning[sched_state.side] = latency;
15183 else
15184 fpd_longrunning[sched_state.side] = latency;
15187 if (s390_tune >= PROCESSOR_2964_Z13)
15189 int units, i;
15190 unsigned unit_mask, m = 1;
15192 unit_mask = s390_get_unit_mask (insn, &units);
15193 gcc_assert (units <= MAX_SCHED_UNITS);
15195 for (i = 0; i < units; i++, m <<= 1)
15196 if (m & unit_mask)
15197 last_scheduled_unit_distance[i][sched_state.side] = 0;
15198 else if (last_scheduled_unit_distance[i][sched_state.side]
15199 < MAX_SCHED_MIX_DISTANCE)
15200 last_scheduled_unit_distance[i][sched_state.side]++;
15203 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
15204 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0
15205 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0
15206 || (mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
15208 sched_state.group_state = 0;
15209 ends_group = true;
15211 else
15213 switch (sched_state.group_state)
15215 case 0:
15216 sched_state.group_state++;
15217 break;
15218 case 1:
15219 sched_state.group_state++;
15220 if (sched_state.group_of_two)
15222 sched_state.group_state = 0;
15223 ends_group = true;
15225 break;
15226 case 2:
15227 sched_state.group_state++;
15228 ends_group = true;
15229 break;
15233 if (verbose > 5)
15235 unsigned int sched_mask;
15237 sched_mask = s390_get_sched_attrmask (insn);
15239 fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
15240 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
15241 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
15242 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
15243 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
15244 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
15245 #undef PRINT_SCHED_ATTR
15247 if (s390_tune >= PROCESSOR_2964_Z13)
15249 unsigned int unit_mask, m = 1;
15250 int units, j;
15252 unit_mask = s390_get_unit_mask (insn, &units);
15253 fprintf (file, "(units:");
15254 for (j = 0; j < units; j++, m <<= 1)
15255 if (m & unit_mask)
15256 fprintf (file, " %d", j);
15257 fprintf (file, ")");
15259 fprintf (file, " sched state: %d\n", sched_state.group_state);
15261 if (s390_tune >= PROCESSOR_2964_Z13)
15263 int units, j;
15265 s390_get_unit_mask (insn, &units);
15267 fprintf (file, ";;\t\tBACKEND: units on this side unused for: ");
15268 for (j = 0; j < units; j++)
15269 fprintf (file, "%d:%d ", j,
15270 last_scheduled_unit_distance[j][sched_state.side]);
15271 fprintf (file, "\n");
15275 /* If this insn ended a group, the next will be on the other side. */
15276 if (ends_group)
15278 sched_state.group_state = 0;
15279 sched_state.side = sched_state.side ? 0 : 1;
15280 sched_state.group_of_two = false;
15284 if (GET_CODE (PATTERN (insn)) != USE
15285 && GET_CODE (PATTERN (insn)) != CLOBBER)
15286 return more - 1;
15287 else
15288 return more;
15291 static void
15292 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
15293 int verbose ATTRIBUTE_UNUSED,
15294 int max_ready ATTRIBUTE_UNUSED)
15296 /* If the next basic block is most likely entered via a fallthru edge
15297 we keep the last sched state. Otherwise we start a new group.
15298 The scheduler traverses basic blocks in "instruction stream" ordering
15299 so if we see a fallthru edge here, sched_state will be of its
15300 source block.
15302 current_sched_info->prev_head is the insn before the first insn of the
15303 block of insns to be scheduled.
15305 rtx_insn *insn = current_sched_info->prev_head
15306 ? NEXT_INSN (current_sched_info->prev_head) : NULL;
15307 basic_block bb = insn ? BLOCK_FOR_INSN (insn) : NULL;
15308 if (s390_tune < PROCESSOR_2964_Z13 || !s390_bb_fallthru_entry_likely (bb))
15310 last_scheduled_insn = NULL;
15311 memset (last_scheduled_unit_distance, 0,
15312 MAX_SCHED_UNITS * NUM_SIDES * sizeof (int));
15313 sched_state.group_state = 0;
15314 sched_state.group_of_two = false;
15318 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
15319 a new number struct loop *loop should be unrolled if tuned for cpus with
15320 a built-in stride prefetcher.
15321 The loop is analyzed for memory accesses by calling check_dpu for
15322 each rtx of the loop. Depending on the loop_depth and the amount of
15323 memory accesses a new number <=nunroll is returned to improve the
15324 behavior of the hardware prefetch unit. */
15325 static unsigned
15326 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
15328 basic_block *bbs;
15329 rtx_insn *insn;
15330 unsigned i;
15331 unsigned mem_count = 0;
15333 if (s390_tune < PROCESSOR_2097_Z10)
15334 return nunroll;
15336 /* Count the number of memory references within the loop body. */
15337 bbs = get_loop_body (loop);
15338 subrtx_iterator::array_type array;
15339 for (i = 0; i < loop->num_nodes; i++)
15340 FOR_BB_INSNS (bbs[i], insn)
15341 if (INSN_P (insn) && INSN_CODE (insn) != -1)
15343 rtx set;
15345 /* The runtime of small loops with memory block operations
15346 will be determined by the memory operation. Doing
15347 unrolling doesn't help here. Measurements to confirm
15348 this where only done on recent CPU levels. So better do
15349 not change anything for older CPUs. */
15350 if (s390_tune >= PROCESSOR_2964_Z13
15351 && loop->ninsns <= BLOCK_MEM_OPS_LOOP_INSNS
15352 && ((set = single_set (insn)) != NULL_RTX)
15353 && ((GET_MODE (SET_DEST (set)) == BLKmode
15354 && (GET_MODE (SET_SRC (set)) == BLKmode
15355 || SET_SRC (set) == const0_rtx))
15356 || (GET_CODE (SET_SRC (set)) == COMPARE
15357 && GET_MODE (XEXP (SET_SRC (set), 0)) == BLKmode
15358 && GET_MODE (XEXP (SET_SRC (set), 1)) == BLKmode)))
15359 return 1;
15361 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
15362 if (MEM_P (*iter))
15363 mem_count += 1;
15365 free (bbs);
15367 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
15368 if (mem_count == 0)
15369 return nunroll;
15371 switch (loop_depth(loop))
15373 case 1:
15374 return MIN (nunroll, 28 / mem_count);
15375 case 2:
15376 return MIN (nunroll, 22 / mem_count);
15377 default:
15378 return MIN (nunroll, 16 / mem_count);
15382 /* Restore the current options. This is a hook function and also called
15383 internally. */
15385 static void
15386 s390_function_specific_restore (struct gcc_options *opts,
15387 struct gcc_options */* opts_set */,
15388 struct cl_target_option *ptr ATTRIBUTE_UNUSED)
15390 opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
15393 static void
15394 s390_default_align (struct gcc_options *opts)
15396 /* Set the default function alignment to 16 in order to get rid of
15397 some unwanted performance effects. */
15398 if (opts->x_flag_align_functions && !opts->x_str_align_functions
15399 && opts->x_s390_tune >= PROCESSOR_2964_Z13)
15400 opts->x_str_align_functions = "16";
15403 static void
15404 s390_override_options_after_change (void)
15406 s390_default_align (&global_options);
15409 static void
15410 s390_option_override_internal (struct gcc_options *opts,
15411 struct gcc_options *opts_set)
15413 /* Architecture mode defaults according to ABI. */
15414 if (!(opts_set->x_target_flags & MASK_ZARCH))
15416 if (TARGET_64BIT)
15417 opts->x_target_flags |= MASK_ZARCH;
15418 else
15419 opts->x_target_flags &= ~MASK_ZARCH;
15422 /* Set the march default in case it hasn't been specified on cmdline. */
15423 if (!opts_set->x_s390_arch)
15424 opts->x_s390_arch = PROCESSOR_2064_Z900;
15426 opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
15428 /* Determine processor to tune for. */
15429 if (!opts_set->x_s390_tune)
15430 opts->x_s390_tune = opts->x_s390_arch;
15432 opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
15434 /* Sanity checks. */
15435 if (opts->x_s390_arch == PROCESSOR_NATIVE
15436 || opts->x_s390_tune == PROCESSOR_NATIVE)
15437 gcc_unreachable ();
15438 if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
15439 error ("64-bit ABI not supported in ESA/390 mode");
15441 if (opts->x_s390_indirect_branch == indirect_branch_thunk_inline
15442 || opts->x_s390_indirect_branch_call == indirect_branch_thunk_inline
15443 || opts->x_s390_function_return == indirect_branch_thunk_inline
15444 || opts->x_s390_function_return_reg == indirect_branch_thunk_inline
15445 || opts->x_s390_function_return_mem == indirect_branch_thunk_inline)
15446 error ("thunk-inline is only supported with %<-mindirect-branch-jump%>");
15448 if (opts->x_s390_indirect_branch != indirect_branch_keep)
15450 if (!opts_set->x_s390_indirect_branch_call)
15451 opts->x_s390_indirect_branch_call = opts->x_s390_indirect_branch;
15453 if (!opts_set->x_s390_indirect_branch_jump)
15454 opts->x_s390_indirect_branch_jump = opts->x_s390_indirect_branch;
15457 if (opts->x_s390_function_return != indirect_branch_keep)
15459 if (!opts_set->x_s390_function_return_reg)
15460 opts->x_s390_function_return_reg = opts->x_s390_function_return;
15462 if (!opts_set->x_s390_function_return_mem)
15463 opts->x_s390_function_return_mem = opts->x_s390_function_return;
15466 /* Enable hardware transactions if available and not explicitly
15467 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
15468 if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
15470 if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
15471 opts->x_target_flags |= MASK_OPT_HTM;
15472 else
15473 opts->x_target_flags &= ~MASK_OPT_HTM;
15476 if (TARGET_OPT_VX_P (opts_set->x_target_flags))
15478 if (TARGET_OPT_VX_P (opts->x_target_flags))
15480 if (!TARGET_CPU_VX_P (opts))
15481 error ("hardware vector support not available on %s",
15482 processor_table[(int)opts->x_s390_arch].name);
15483 if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15484 error ("hardware vector support not available with "
15485 "%<-msoft-float%>");
15488 else
15490 if (TARGET_CPU_VX_P (opts))
15491 /* Enable vector support if available and not explicitly disabled
15492 by user. E.g. with -m31 -march=z13 -mzarch */
15493 opts->x_target_flags |= MASK_OPT_VX;
15494 else
15495 opts->x_target_flags &= ~MASK_OPT_VX;
15498 /* Use hardware DFP if available and not explicitly disabled by
15499 user. E.g. with -m31 -march=z10 -mzarch */
15500 if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
15502 if (TARGET_DFP_P (opts))
15503 opts->x_target_flags |= MASK_HARD_DFP;
15504 else
15505 opts->x_target_flags &= ~MASK_HARD_DFP;
15508 if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
15510 if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
15512 if (!TARGET_CPU_DFP_P (opts))
15513 error ("hardware decimal floating point instructions"
15514 " not available on %s",
15515 processor_table[(int)opts->x_s390_arch].name);
15516 if (!TARGET_ZARCH_P (opts->x_target_flags))
15517 error ("hardware decimal floating point instructions"
15518 " not available in ESA/390 mode");
15520 else
15521 opts->x_target_flags &= ~MASK_HARD_DFP;
15524 if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
15525 && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15527 if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
15528 && TARGET_HARD_DFP_P (opts->x_target_flags))
15529 error ("%<-mhard-dfp%> can%'t be used in conjunction with "
15530 "%<-msoft-float%>");
15532 opts->x_target_flags &= ~MASK_HARD_DFP;
15535 if (TARGET_BACKCHAIN_P (opts->x_target_flags)
15536 && TARGET_PACKED_STACK_P (opts->x_target_flags)
15537 && TARGET_HARD_FLOAT_P (opts->x_target_flags))
15538 error ("%<-mbackchain%> %<-mpacked-stack%> %<-mhard-float%> are not "
15539 "supported in combination");
15541 if (opts->x_s390_stack_size)
15543 if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
15544 error ("stack size must be greater than the stack guard value");
15545 else if (opts->x_s390_stack_size > 1 << 16)
15546 error ("stack size must not be greater than 64k");
15548 else if (opts->x_s390_stack_guard)
15549 error ("%<-mstack-guard%> implies use of %<-mstack-size%>");
15551 /* Our implementation of the stack probe requires the probe interval
15552 to be used as displacement in an address operand. The maximum
15553 probe interval currently is 64k. This would exceed short
15554 displacements. Trim that value down to 4k if that happens. This
15555 might result in too many probes being generated only on the
15556 oldest supported machine level z900. */
15557 if (!DISP_IN_RANGE ((1 << param_stack_clash_protection_probe_interval)))
15558 param_stack_clash_protection_probe_interval = 12;
15560 #if TARGET_TPF != 0
15561 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_prologue_check))
15562 error ("-mtpf-trace-hook-prologue-check requires integer in range 0..4095");
15564 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_prologue_target))
15565 error ("-mtpf-trace-hook-prologue-target requires integer in range 0..4095");
15567 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_epilogue_check))
15568 error ("-mtpf-trace-hook-epilogue-check requires integer in range 0..4095");
15570 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_epilogue_target))
15571 error ("-mtpf-trace-hook-epilogue-target requires integer in range 0..4095");
15573 if (s390_tpf_trace_skip)
15575 opts->x_s390_tpf_trace_hook_prologue_target = TPF_TRACE_PROLOGUE_SKIP_TARGET;
15576 opts->x_s390_tpf_trace_hook_epilogue_target = TPF_TRACE_EPILOGUE_SKIP_TARGET;
15578 #endif
15580 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
15581 if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
15582 opts->x_target_flags |= MASK_LONG_DOUBLE_128;
15583 #endif
15585 if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
15587 SET_OPTION_IF_UNSET (opts, opts_set, param_max_unrolled_insns,
15588 100);
15589 SET_OPTION_IF_UNSET (opts, opts_set, param_max_unroll_times, 32);
15590 SET_OPTION_IF_UNSET (opts, opts_set, param_max_completely_peeled_insns,
15591 2000);
15592 SET_OPTION_IF_UNSET (opts, opts_set, param_max_completely_peel_times,
15593 64);
15596 SET_OPTION_IF_UNSET (opts, opts_set, param_max_pending_list_length,
15597 256);
15598 /* values for loop prefetching */
15599 SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_line_size, 256);
15600 SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_size, 128);
15601 /* s390 has more than 2 levels and the size is much larger. Since
15602 we are always running virtualized assume that we only get a small
15603 part of the caches above l1. */
15604 SET_OPTION_IF_UNSET (opts, opts_set, param_l2_cache_size, 1500);
15605 SET_OPTION_IF_UNSET (opts, opts_set,
15606 param_prefetch_min_insn_to_mem_ratio, 2);
15607 SET_OPTION_IF_UNSET (opts, opts_set, param_simultaneous_prefetches, 6);
15609 /* Use the alternative scheduling-pressure algorithm by default. */
15610 SET_OPTION_IF_UNSET (opts, opts_set, param_sched_pressure_algorithm, 2);
15611 SET_OPTION_IF_UNSET (opts, opts_set, param_min_vect_loop_bound, 2);
15613 /* Set the default alignment. */
15614 s390_default_align (opts);
15616 /* Call target specific restore function to do post-init work. At the moment,
15617 this just sets opts->x_s390_cost_pointer. */
15618 s390_function_specific_restore (opts, opts_set, NULL);
15620 /* Check whether -mfentry is supported. It cannot be used in 31-bit mode,
15621 because 31-bit PLT stubs assume that %r12 contains GOT address, which is
15622 not the case when the code runs before the prolog. */
15623 if (opts->x_flag_fentry && !TARGET_64BIT)
15624 error ("%<-mfentry%> is supported only for 64-bit CPUs");
15627 static void
15628 s390_option_override (void)
15630 unsigned int i;
15631 cl_deferred_option *opt;
15632 vec<cl_deferred_option> *v =
15633 (vec<cl_deferred_option> *) s390_deferred_options;
15635 if (v)
15636 FOR_EACH_VEC_ELT (*v, i, opt)
15638 switch (opt->opt_index)
15640 case OPT_mhotpatch_:
15642 int val1;
15643 int val2;
15644 char *s = strtok (ASTRDUP (opt->arg), ",");
15645 char *t = strtok (NULL, "\0");
15647 if (t != NULL)
15649 val1 = integral_argument (s);
15650 val2 = integral_argument (t);
15652 else
15654 val1 = -1;
15655 val2 = -1;
15657 if (val1 == -1 || val2 == -1)
15659 /* argument is not a plain number */
15660 error ("arguments to %qs should be non-negative integers",
15661 "-mhotpatch=n,m");
15662 break;
15664 else if (val1 > s390_hotpatch_hw_max
15665 || val2 > s390_hotpatch_hw_max)
15667 error ("argument to %qs is too large (max. %d)",
15668 "-mhotpatch=n,m", s390_hotpatch_hw_max);
15669 break;
15671 s390_hotpatch_hw_before_label = val1;
15672 s390_hotpatch_hw_after_label = val2;
15673 break;
15675 default:
15676 gcc_unreachable ();
15680 /* Set up function hooks. */
15681 init_machine_status = s390_init_machine_status;
15683 s390_option_override_internal (&global_options, &global_options_set);
15685 /* Save the initial options in case the user does function specific
15686 options. */
15687 target_option_default_node
15688 = build_target_option_node (&global_options, &global_options_set);
15689 target_option_current_node = target_option_default_node;
15691 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
15692 requires the arch flags to be evaluated already. Since prefetching
15693 is beneficial on s390, we enable it if available. */
15694 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
15695 flag_prefetch_loop_arrays = 1;
15697 if (!s390_pic_data_is_text_relative && !flag_pic)
15698 error ("%<-mno-pic-data-is-text-relative%> cannot be used without "
15699 "%<-fpic%>/%<-fPIC%>");
15701 if (TARGET_TPF)
15703 /* Don't emit DWARF3/4 unless specifically selected. The TPF
15704 debuggers do not yet support DWARF 3/4. */
15705 if (!global_options_set.x_dwarf_strict)
15706 dwarf_strict = 1;
15707 if (!global_options_set.x_dwarf_version)
15708 dwarf_version = 2;
15712 #if S390_USE_TARGET_ATTRIBUTE
15713 /* Inner function to process the attribute((target(...))), take an argument and
15714 set the current options from the argument. If we have a list, recursively go
15715 over the list. */
15717 static bool
15718 s390_valid_target_attribute_inner_p (tree args,
15719 struct gcc_options *opts,
15720 struct gcc_options *new_opts_set,
15721 bool force_pragma)
15723 char *next_optstr;
15724 bool ret = true;
15726 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
15727 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
15728 static const struct
15730 const char *string;
15731 size_t len;
15732 int opt;
15733 int has_arg;
15734 int only_as_pragma;
15735 } attrs[] = {
15736 /* enum options */
15737 S390_ATTRIB ("arch=", OPT_march_, 1),
15738 S390_ATTRIB ("tune=", OPT_mtune_, 1),
15739 /* uinteger options */
15740 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
15741 S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
15742 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
15743 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
15744 /* flag options */
15745 S390_ATTRIB ("backchain", OPT_mbackchain, 0),
15746 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
15747 S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
15748 S390_ATTRIB ("htm", OPT_mhtm, 0),
15749 S390_ATTRIB ("vx", OPT_mvx, 0),
15750 S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
15751 S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
15752 S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
15753 S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
15754 S390_PRAGMA ("zvector", OPT_mzvector, 0),
15755 /* boolean options */
15756 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
15758 #undef S390_ATTRIB
15759 #undef S390_PRAGMA
15761 /* If this is a list, recurse to get the options. */
15762 if (TREE_CODE (args) == TREE_LIST)
15764 bool ret = true;
15765 int num_pragma_values;
15766 int i;
15768 /* Note: attribs.c:decl_attributes prepends the values from
15769 current_target_pragma to the list of target attributes. To determine
15770 whether we're looking at a value of the attribute or the pragma we
15771 assume that the first [list_length (current_target_pragma)] values in
15772 the list are the values from the pragma. */
15773 num_pragma_values = (!force_pragma && current_target_pragma != NULL)
15774 ? list_length (current_target_pragma) : 0;
15775 for (i = 0; args; args = TREE_CHAIN (args), i++)
15777 bool is_pragma;
15779 is_pragma = (force_pragma || i < num_pragma_values);
15780 if (TREE_VALUE (args)
15781 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
15782 opts, new_opts_set,
15783 is_pragma))
15785 ret = false;
15788 return ret;
15791 else if (TREE_CODE (args) != STRING_CST)
15793 error ("attribute %<target%> argument not a string");
15794 return false;
15797 /* Handle multiple arguments separated by commas. */
15798 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
15800 while (next_optstr && *next_optstr != '\0')
15802 char *p = next_optstr;
15803 char *orig_p = p;
15804 char *comma = strchr (next_optstr, ',');
15805 size_t len, opt_len;
15806 int opt;
15807 bool opt_set_p;
15808 char ch;
15809 unsigned i;
15810 int mask = 0;
15811 enum cl_var_type var_type;
15812 bool found;
15814 if (comma)
15816 *comma = '\0';
15817 len = comma - next_optstr;
15818 next_optstr = comma + 1;
15820 else
15822 len = strlen (p);
15823 next_optstr = NULL;
15826 /* Recognize no-xxx. */
15827 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
15829 opt_set_p = false;
15830 p += 3;
15831 len -= 3;
15833 else
15834 opt_set_p = true;
15836 /* Find the option. */
15837 ch = *p;
15838 found = false;
15839 for (i = 0; i < ARRAY_SIZE (attrs); i++)
15841 opt_len = attrs[i].len;
15842 if (ch == attrs[i].string[0]
15843 && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
15844 && memcmp (p, attrs[i].string, opt_len) == 0)
15846 opt = attrs[i].opt;
15847 if (!opt_set_p && cl_options[opt].cl_reject_negative)
15848 continue;
15849 mask = cl_options[opt].var_value;
15850 var_type = cl_options[opt].var_type;
15851 found = true;
15852 break;
15856 /* Process the option. */
15857 if (!found)
15859 error ("attribute(target(\"%s\")) is unknown", orig_p);
15860 return false;
15862 else if (attrs[i].only_as_pragma && !force_pragma)
15864 /* Value is not allowed for the target attribute. */
15865 error ("value %qs is not supported by attribute %<target%>",
15866 attrs[i].string);
15867 return false;
15870 else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
15872 if (var_type == CLVC_BIT_CLEAR)
15873 opt_set_p = !opt_set_p;
15875 if (opt_set_p)
15876 opts->x_target_flags |= mask;
15877 else
15878 opts->x_target_flags &= ~mask;
15879 new_opts_set->x_target_flags |= mask;
15882 else if (cl_options[opt].var_type == CLVC_BOOLEAN)
15884 int value;
15886 if (cl_options[opt].cl_uinteger)
15888 /* Unsigned integer argument. Code based on the function
15889 decode_cmdline_option () in opts-common.c. */
15890 value = integral_argument (p + opt_len);
15892 else
15893 value = (opt_set_p) ? 1 : 0;
15895 if (value != -1)
15897 struct cl_decoded_option decoded;
15899 /* Value range check; only implemented for numeric and boolean
15900 options at the moment. */
15901 generate_option (opt, NULL, value, CL_TARGET, &decoded);
15902 s390_handle_option (opts, new_opts_set, &decoded, input_location);
15903 set_option (opts, new_opts_set, opt, value,
15904 p + opt_len, DK_UNSPECIFIED, input_location,
15905 global_dc);
15907 else
15909 error ("attribute(target(\"%s\")) is unknown", orig_p);
15910 ret = false;
15914 else if (cl_options[opt].var_type == CLVC_ENUM)
15916 bool arg_ok;
15917 int value;
15919 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
15920 if (arg_ok)
15921 set_option (opts, new_opts_set, opt, value,
15922 p + opt_len, DK_UNSPECIFIED, input_location,
15923 global_dc);
15924 else
15926 error ("attribute(target(\"%s\")) is unknown", orig_p);
15927 ret = false;
15931 else
15932 gcc_unreachable ();
15934 return ret;
15937 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
15939 tree
15940 s390_valid_target_attribute_tree (tree args,
15941 struct gcc_options *opts,
15942 const struct gcc_options *opts_set,
15943 bool force_pragma)
15945 tree t = NULL_TREE;
15946 struct gcc_options new_opts_set;
15948 memset (&new_opts_set, 0, sizeof (new_opts_set));
15950 /* Process each of the options on the chain. */
15951 if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
15952 force_pragma))
15953 return error_mark_node;
15955 /* If some option was set (even if it has not changed), rerun
15956 s390_option_override_internal, and then save the options away. */
15957 if (new_opts_set.x_target_flags
15958 || new_opts_set.x_s390_arch
15959 || new_opts_set.x_s390_tune
15960 || new_opts_set.x_s390_stack_guard
15961 || new_opts_set.x_s390_stack_size
15962 || new_opts_set.x_s390_branch_cost
15963 || new_opts_set.x_s390_warn_framesize
15964 || new_opts_set.x_s390_warn_dynamicstack_p)
15966 const unsigned char *src = (const unsigned char *)opts_set;
15967 unsigned char *dest = (unsigned char *)&new_opts_set;
15968 unsigned int i;
15970 /* Merge the original option flags into the new ones. */
15971 for (i = 0; i < sizeof(*opts_set); i++)
15972 dest[i] |= src[i];
15974 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
15975 s390_option_override_internal (opts, &new_opts_set);
15976 /* Save the current options unless we are validating options for
15977 #pragma. */
15978 t = build_target_option_node (opts, &new_opts_set);
15980 return t;
15983 /* Hook to validate attribute((target("string"))). */
15985 static bool
15986 s390_valid_target_attribute_p (tree fndecl,
15987 tree ARG_UNUSED (name),
15988 tree args,
15989 int ARG_UNUSED (flags))
15991 struct gcc_options func_options, func_options_set;
15992 tree new_target, new_optimize;
15993 bool ret = true;
15995 /* attribute((target("default"))) does nothing, beyond
15996 affecting multi-versioning. */
15997 if (TREE_VALUE (args)
15998 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
15999 && TREE_CHAIN (args) == NULL_TREE
16000 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
16001 return true;
16003 tree old_optimize
16004 = build_optimization_node (&global_options, &global_options_set);
16006 /* Get the optimization options of the current function. */
16007 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
16009 if (!func_optimize)
16010 func_optimize = old_optimize;
16012 /* Init func_options. */
16013 memset (&func_options, 0, sizeof (func_options));
16014 init_options_struct (&func_options, NULL);
16015 lang_hooks.init_options_struct (&func_options);
16016 memset (&func_options_set, 0, sizeof (func_options_set));
16018 cl_optimization_restore (&func_options, &func_options_set,
16019 TREE_OPTIMIZATION (func_optimize));
16021 /* Initialize func_options to the default before its target options can
16022 be set. */
16023 cl_target_option_restore (&func_options, &func_options_set,
16024 TREE_TARGET_OPTION (target_option_default_node));
16026 new_target = s390_valid_target_attribute_tree (args, &func_options,
16027 &global_options_set,
16028 (args ==
16029 current_target_pragma));
16030 new_optimize = build_optimization_node (&func_options, &func_options_set);
16031 if (new_target == error_mark_node)
16032 ret = false;
16033 else if (fndecl && new_target)
16035 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
16036 if (old_optimize != new_optimize)
16037 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
16039 return ret;
16042 /* Hook to determine if one function can safely inline another. */
16044 static bool
16045 s390_can_inline_p (tree caller, tree callee)
16047 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
16048 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
16050 if (!callee_tree)
16051 callee_tree = target_option_default_node;
16052 if (!caller_tree)
16053 caller_tree = target_option_default_node;
16054 if (callee_tree == caller_tree)
16055 return true;
16057 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
16058 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
16059 bool ret = true;
16061 if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))
16062 != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)))
16063 ret = false;
16065 /* Don't inline functions to be compiled for a more recent arch into a
16066 function for an older arch. */
16067 else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
16068 ret = false;
16070 /* Inlining a hard float function into a soft float function is only
16071 allowed if the hard float function doesn't actually make use of
16072 floating point.
16074 We are called from FEs for multi-versioning call optimization, so
16075 beware of ipa_fn_summaries not available. */
16076 else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags)
16077 && !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags))
16078 || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags)
16079 && TARGET_HARD_DFP_P (callee_opts->x_target_flags)))
16080 && (! ipa_fn_summaries
16081 || ipa_fn_summaries->get
16082 (cgraph_node::get (callee))->fp_expressions))
16083 ret = false;
16085 return ret;
16087 #endif
16089 /* Set VAL to correct enum value according to the indirect-branch or
16090 function-return attribute in ATTR. */
16092 static inline void
16093 s390_indirect_branch_attrvalue (tree attr, enum indirect_branch *val)
16095 const char *str = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
16096 if (strcmp (str, "keep") == 0)
16097 *val = indirect_branch_keep;
16098 else if (strcmp (str, "thunk") == 0)
16099 *val = indirect_branch_thunk;
16100 else if (strcmp (str, "thunk-inline") == 0)
16101 *val = indirect_branch_thunk_inline;
16102 else if (strcmp (str, "thunk-extern") == 0)
16103 *val = indirect_branch_thunk_extern;
16106 /* Memorize the setting for -mindirect-branch* and -mfunction-return*
16107 from either the cmdline or the function attributes in
16108 cfun->machine. */
16110 static void
16111 s390_indirect_branch_settings (tree fndecl)
16113 tree attr;
16115 if (!fndecl)
16116 return;
16118 /* Initialize with the cmdline options and let the attributes
16119 override it. */
16120 cfun->machine->indirect_branch_jump = s390_indirect_branch_jump;
16121 cfun->machine->indirect_branch_call = s390_indirect_branch_call;
16123 cfun->machine->function_return_reg = s390_function_return_reg;
16124 cfun->machine->function_return_mem = s390_function_return_mem;
16126 if ((attr = lookup_attribute ("indirect_branch",
16127 DECL_ATTRIBUTES (fndecl))))
16129 s390_indirect_branch_attrvalue (attr,
16130 &cfun->machine->indirect_branch_jump);
16131 s390_indirect_branch_attrvalue (attr,
16132 &cfun->machine->indirect_branch_call);
16135 if ((attr = lookup_attribute ("indirect_branch_jump",
16136 DECL_ATTRIBUTES (fndecl))))
16137 s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_jump);
16139 if ((attr = lookup_attribute ("indirect_branch_call",
16140 DECL_ATTRIBUTES (fndecl))))
16141 s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_call);
16143 if ((attr = lookup_attribute ("function_return",
16144 DECL_ATTRIBUTES (fndecl))))
16146 s390_indirect_branch_attrvalue (attr,
16147 &cfun->machine->function_return_reg);
16148 s390_indirect_branch_attrvalue (attr,
16149 &cfun->machine->function_return_mem);
16152 if ((attr = lookup_attribute ("function_return_reg",
16153 DECL_ATTRIBUTES (fndecl))))
16154 s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_reg);
16156 if ((attr = lookup_attribute ("function_return_mem",
16157 DECL_ATTRIBUTES (fndecl))))
16158 s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_mem);
16161 #if S390_USE_TARGET_ATTRIBUTE
16162 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
16163 cache. */
16165 void
16166 s390_activate_target_options (tree new_tree)
16168 cl_target_option_restore (&global_options, &global_options_set,
16169 TREE_TARGET_OPTION (new_tree));
16170 if (TREE_TARGET_GLOBALS (new_tree))
16171 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
16172 else if (new_tree == target_option_default_node)
16173 restore_target_globals (&default_target_globals);
16174 else
16175 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
16176 s390_previous_fndecl = NULL_TREE;
16178 #endif
16180 /* Establish appropriate back-end context for processing the function
16181 FNDECL. The argument might be NULL to indicate processing at top
16182 level, outside of any function scope. */
16183 static void
16184 s390_set_current_function (tree fndecl)
16186 #if S390_USE_TARGET_ATTRIBUTE
16187 /* Only change the context if the function changes. This hook is called
16188 several times in the course of compiling a function, and we don't want to
16189 slow things down too much or call target_reinit when it isn't safe. */
16190 if (fndecl == s390_previous_fndecl)
16192 s390_indirect_branch_settings (fndecl);
16193 return;
16196 tree old_tree;
16197 if (s390_previous_fndecl == NULL_TREE)
16198 old_tree = target_option_current_node;
16199 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
16200 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
16201 else
16202 old_tree = target_option_default_node;
16204 if (fndecl == NULL_TREE)
16206 if (old_tree != target_option_current_node)
16207 s390_activate_target_options (target_option_current_node);
16208 return;
16211 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
16212 if (new_tree == NULL_TREE)
16213 new_tree = target_option_default_node;
16215 if (old_tree != new_tree)
16216 s390_activate_target_options (new_tree);
16217 s390_previous_fndecl = fndecl;
16218 #endif
16219 s390_indirect_branch_settings (fndecl);
16222 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
16224 static bool
16225 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
16226 unsigned int align ATTRIBUTE_UNUSED,
16227 enum by_pieces_operation op ATTRIBUTE_UNUSED,
16228 bool speed_p ATTRIBUTE_UNUSED)
16230 return (size == 1 || size == 2
16231 || size == 4 || (TARGET_ZARCH && size == 8));
16234 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
16236 static void
16237 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
16239 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
16240 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
16241 tree call_efpc = build_call_expr (efpc, 0);
16242 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
16244 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
16245 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
16246 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
16247 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
16248 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
16249 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
16251 /* Generates the equivalent of feholdexcept (&fenv_var)
16253 fenv_var = __builtin_s390_efpc ();
16254 __builtin_s390_sfpc (fenv_var & mask) */
16255 tree old_fpc = build4 (TARGET_EXPR, unsigned_type_node, fenv_var, call_efpc,
16256 NULL_TREE, NULL_TREE);
16257 tree new_fpc
16258 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
16259 build_int_cst (unsigned_type_node,
16260 ~(FPC_DXC_MASK | FPC_FLAGS_MASK
16261 | FPC_EXCEPTION_MASK)));
16262 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
16263 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
16265 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
16267 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
16268 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
16269 build_int_cst (unsigned_type_node,
16270 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
16271 *clear = build_call_expr (sfpc, 1, new_fpc);
16273 /* Generates the equivalent of feupdateenv (fenv_var)
16275 old_fpc = __builtin_s390_efpc ();
16276 __builtin_s390_sfpc (fenv_var);
16277 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
16279 old_fpc = create_tmp_var_raw (unsigned_type_node);
16280 tree store_old_fpc = build4 (TARGET_EXPR, void_type_node, old_fpc, call_efpc,
16281 NULL_TREE, NULL_TREE);
16283 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
16285 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
16286 build_int_cst (unsigned_type_node,
16287 FPC_FLAGS_MASK));
16288 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
16289 build_int_cst (unsigned_type_node,
16290 FPC_FLAGS_SHIFT));
16291 tree atomic_feraiseexcept
16292 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
16293 raise_old_except = build_call_expr (atomic_feraiseexcept,
16294 1, raise_old_except);
16296 *update = build2 (COMPOUND_EXPR, void_type_node,
16297 build2 (COMPOUND_EXPR, void_type_node,
16298 store_old_fpc, set_new_fpc),
16299 raise_old_except);
16301 #undef FPC_EXCEPTION_MASK
16302 #undef FPC_FLAGS_MASK
16303 #undef FPC_DXC_MASK
16304 #undef FPC_EXCEPTION_MASK_SHIFT
16305 #undef FPC_FLAGS_SHIFT
16306 #undef FPC_DXC_SHIFT
16309 /* Return the vector mode to be used for inner mode MODE when doing
16310 vectorization. */
16311 static machine_mode
16312 s390_preferred_simd_mode (scalar_mode mode)
16314 if (TARGET_VXE)
16315 switch (mode)
16317 case E_SFmode:
16318 return V4SFmode;
16319 default:;
16322 if (TARGET_VX)
16323 switch (mode)
16325 case E_DFmode:
16326 return V2DFmode;
16327 case E_DImode:
16328 return V2DImode;
16329 case E_SImode:
16330 return V4SImode;
16331 case E_HImode:
16332 return V8HImode;
16333 case E_QImode:
16334 return V16QImode;
16335 default:;
16337 return word_mode;
16340 /* Our hardware does not require vectors to be strictly aligned. */
16341 static bool
16342 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
16343 const_tree type ATTRIBUTE_UNUSED,
16344 int misalignment ATTRIBUTE_UNUSED,
16345 bool is_packed ATTRIBUTE_UNUSED)
16347 if (TARGET_VX)
16348 return true;
16350 return default_builtin_support_vector_misalignment (mode, type, misalignment,
16351 is_packed);
16354 /* The vector ABI requires vector types to be aligned on an 8 byte
16355 boundary (our stack alignment). However, we allow this to be
16356 overriden by the user, while this definitely breaks the ABI. */
16357 static HOST_WIDE_INT
16358 s390_vector_alignment (const_tree type)
16360 tree size = TYPE_SIZE (type);
16362 if (!TARGET_VX_ABI)
16363 return default_vector_alignment (type);
16365 if (TYPE_USER_ALIGN (type))
16366 return TYPE_ALIGN (type);
16368 if (tree_fits_uhwi_p (size)
16369 && tree_to_uhwi (size) < BIGGEST_ALIGNMENT)
16370 return tree_to_uhwi (size);
16372 return BIGGEST_ALIGNMENT;
16375 /* Implement TARGET_CONSTANT_ALIGNMENT. Alignment on even addresses for
16376 LARL instruction. */
16378 static HOST_WIDE_INT
16379 s390_constant_alignment (const_tree, HOST_WIDE_INT align)
16381 return MAX (align, 16);
16384 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16385 /* Implement TARGET_ASM_FILE_START. */
16386 static void
16387 s390_asm_file_start (void)
16389 default_file_start ();
16390 s390_asm_output_machine_for_arch (asm_out_file);
16392 #endif
16394 /* Implement TARGET_ASM_FILE_END. */
16395 static void
16396 s390_asm_file_end (void)
16398 #ifdef HAVE_AS_GNU_ATTRIBUTE
16399 varpool_node *vnode;
16400 cgraph_node *cnode;
16402 FOR_EACH_VARIABLE (vnode)
16403 if (TREE_PUBLIC (vnode->decl))
16404 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
16406 FOR_EACH_FUNCTION (cnode)
16407 if (TREE_PUBLIC (cnode->decl))
16408 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
16411 if (s390_vector_abi != 0)
16412 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
16413 s390_vector_abi);
16414 #endif
16415 file_end_indicate_exec_stack ();
16417 if (flag_split_stack)
16418 file_end_indicate_split_stack ();
16421 /* Return true if TYPE is a vector bool type. */
16422 static inline bool
16423 s390_vector_bool_type_p (const_tree type)
16425 return TYPE_VECTOR_OPAQUE (type);
16428 /* Return the diagnostic message string if the binary operation OP is
16429 not permitted on TYPE1 and TYPE2, NULL otherwise. */
16430 static const char*
16431 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
16433 bool bool1_p, bool2_p;
16434 bool plusminus_p;
16435 bool muldiv_p;
16436 bool compare_p;
16437 machine_mode mode1, mode2;
16439 if (!TARGET_ZVECTOR)
16440 return NULL;
16442 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
16443 return NULL;
16445 bool1_p = s390_vector_bool_type_p (type1);
16446 bool2_p = s390_vector_bool_type_p (type2);
16448 /* Mixing signed and unsigned types is forbidden for all
16449 operators. */
16450 if (!bool1_p && !bool2_p
16451 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
16452 return N_("types differ in signedness");
16454 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
16455 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
16456 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
16457 || op == ROUND_DIV_EXPR);
16458 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
16459 || op == EQ_EXPR || op == NE_EXPR);
16461 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
16462 return N_("binary operator does not support two vector bool operands");
16464 if (bool1_p != bool2_p && (muldiv_p || compare_p))
16465 return N_("binary operator does not support vector bool operand");
16467 mode1 = TYPE_MODE (type1);
16468 mode2 = TYPE_MODE (type2);
16470 if (bool1_p != bool2_p && plusminus_p
16471 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
16472 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
16473 return N_("binary operator does not support mixing vector "
16474 "bool with floating point vector operands");
16476 return NULL;
16479 #if ENABLE_S390_EXCESS_FLOAT_PRECISION == 1
16480 /* Implement TARGET_C_EXCESS_PRECISION to maintain historic behavior with older
16481 glibc versions
16483 For historical reasons, float_t and double_t had been typedef'ed to
16484 double on s390, causing operations on float_t to operate in a higher
16485 precision than is necessary. However, it is not the case that SFmode
16486 operations have implicit excess precision, and we generate more optimal
16487 code if we let the compiler know no implicit extra precision is added.
16489 With a glibc with that "historic" definition, configure will enable this hook
16490 to set FLT_EVAL_METHOD to 1 for -fexcess-precision=standard (e.g., as implied
16491 by -std=cXY). That means when we are compiling with -fexcess-precision=fast,
16492 the value we set for FLT_EVAL_METHOD will be out of line with the actual
16493 precision of float_t.
16495 Newer versions of glibc will be modified to derive the definition of float_t
16496 from FLT_EVAL_METHOD on s390x, as on many other architectures. There,
16497 configure will disable this hook by default, so that we defer to the default
16498 of FLT_EVAL_METHOD_PROMOTE_TO_FLOAT and a resulting typedef of float_t to
16499 float. Note that in that scenario, float_t and FLT_EVAL_METHOD will be in
16500 line independent of -fexcess-precision. */
16502 static enum flt_eval_method
16503 s390_excess_precision (enum excess_precision_type type)
16505 switch (type)
16507 case EXCESS_PRECISION_TYPE_IMPLICIT:
16508 case EXCESS_PRECISION_TYPE_FAST:
16509 /* The fastest type to promote to will always be the native type,
16510 whether that occurs with implicit excess precision or
16511 otherwise. */
16512 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
16513 case EXCESS_PRECISION_TYPE_STANDARD:
16514 /* Otherwise, when we are in a standards compliant mode, to
16515 ensure consistency with the implementation in glibc, report that
16516 float is evaluated to the range and precision of double. */
16517 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE;
16518 default:
16519 gcc_unreachable ();
16521 return FLT_EVAL_METHOD_UNPREDICTABLE;
16523 #endif
16525 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
16527 static unsigned HOST_WIDE_INT
16528 s390_asan_shadow_offset (void)
16530 return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000);
16533 #ifdef HAVE_GAS_HIDDEN
16534 # define USE_HIDDEN_LINKONCE 1
16535 #else
16536 # define USE_HIDDEN_LINKONCE 0
16537 #endif
16539 /* Output an indirect branch trampoline for target register REGNO. */
16541 static void
16542 s390_output_indirect_thunk_function (unsigned int regno, bool z10_p)
16544 tree decl;
16545 char thunk_label[32];
16546 int i;
16548 if (z10_p)
16549 sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL, regno);
16550 else
16551 sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX,
16552 INDIRECT_BRANCH_THUNK_REGNUM, regno);
16554 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
16555 get_identifier (thunk_label),
16556 build_function_type_list (void_type_node, NULL_TREE));
16557 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
16558 NULL_TREE, void_type_node);
16559 TREE_PUBLIC (decl) = 1;
16560 TREE_STATIC (decl) = 1;
16561 DECL_IGNORED_P (decl) = 1;
16563 if (USE_HIDDEN_LINKONCE)
16565 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
16567 targetm.asm_out.unique_section (decl, 0);
16568 switch_to_section (get_named_section (decl, NULL, 0));
16570 targetm.asm_out.globalize_label (asm_out_file, thunk_label);
16571 fputs ("\t.hidden\t", asm_out_file);
16572 assemble_name (asm_out_file, thunk_label);
16573 putc ('\n', asm_out_file);
16574 ASM_DECLARE_FUNCTION_NAME (asm_out_file, thunk_label, decl);
16576 else
16578 switch_to_section (text_section);
16579 ASM_OUTPUT_LABEL (asm_out_file, thunk_label);
16582 DECL_INITIAL (decl) = make_node (BLOCK);
16583 current_function_decl = decl;
16584 allocate_struct_function (decl, false);
16585 init_function_start (decl);
16586 cfun->is_thunk = true;
16587 first_function_block_is_cold = false;
16588 final_start_function (emit_barrier (), asm_out_file, 1);
16590 /* This makes CFI at least usable for indirect jumps.
16592 Stopping in the thunk: backtrace will point to the thunk target
16593 is if it was interrupted by a signal. For a call this means that
16594 the call chain will be: caller->callee->thunk */
16595 if (flag_asynchronous_unwind_tables && flag_dwarf2_cfi_asm)
16597 fputs ("\t.cfi_signal_frame\n", asm_out_file);
16598 fprintf (asm_out_file, "\t.cfi_return_column %d\n", regno);
16599 for (i = 0; i < FPR15_REGNUM; i++)
16600 fprintf (asm_out_file, "\t.cfi_same_value %s\n", reg_names[i]);
16603 if (z10_p)
16605 /* exrl 0,1f */
16607 /* We generate a thunk for z10 compiled code although z10 is
16608 currently not enabled. Tell the assembler to accept the
16609 instruction. */
16610 if (!TARGET_CPU_Z10)
16612 fputs ("\t.machine push\n", asm_out_file);
16613 fputs ("\t.machine z10\n", asm_out_file);
16615 /* We use exrl even if -mzarch hasn't been specified on the
16616 command line so we have to tell the assembler to accept
16617 it. */
16618 if (!TARGET_ZARCH)
16619 fputs ("\t.machinemode zarch\n", asm_out_file);
16621 fputs ("\texrl\t0,1f\n", asm_out_file);
16623 if (!TARGET_ZARCH)
16624 fputs ("\t.machinemode esa\n", asm_out_file);
16626 if (!TARGET_CPU_Z10)
16627 fputs ("\t.machine pop\n", asm_out_file);
16629 else
16631 /* larl %r1,1f */
16632 fprintf (asm_out_file, "\tlarl\t%%r%d,1f\n",
16633 INDIRECT_BRANCH_THUNK_REGNUM);
16635 /* ex 0,0(%r1) */
16636 fprintf (asm_out_file, "\tex\t0,0(%%r%d)\n",
16637 INDIRECT_BRANCH_THUNK_REGNUM);
16640 /* 0: j 0b */
16641 fputs ("0:\tj\t0b\n", asm_out_file);
16643 /* 1: br <regno> */
16644 fprintf (asm_out_file, "1:\tbr\t%%r%d\n", regno);
16646 final_end_function ();
16647 init_insn_lengths ();
16648 free_after_compilation (cfun);
16649 set_cfun (NULL);
16650 current_function_decl = NULL;
16653 /* Implement the asm.code_end target hook. */
16655 static void
16656 s390_code_end (void)
16658 int i;
16660 for (i = 1; i < 16; i++)
16662 if (indirect_branch_z10thunk_mask & (1 << i))
16663 s390_output_indirect_thunk_function (i, true);
16665 if (indirect_branch_prez10thunk_mask & (1 << i))
16666 s390_output_indirect_thunk_function (i, false);
16669 if (TARGET_INDIRECT_BRANCH_TABLE)
16671 int o;
16672 int i;
16674 for (o = 0; o < INDIRECT_BRANCH_NUM_OPTIONS; o++)
16676 if (indirect_branch_table_label_no[o] == 0)
16677 continue;
16679 switch_to_section (get_section (indirect_branch_table_name[o],
16681 NULL_TREE));
16682 for (i = 0; i < indirect_branch_table_label_no[o]; i++)
16684 char label_start[32];
16686 ASM_GENERATE_INTERNAL_LABEL (label_start,
16687 indirect_branch_table_label[o], i);
16689 fputs ("\t.long\t", asm_out_file);
16690 assemble_name_raw (asm_out_file, label_start);
16691 fputs ("-.\n", asm_out_file);
16693 switch_to_section (current_function_section ());
16698 /* Implement the TARGET_CASE_VALUES_THRESHOLD target hook. */
16700 unsigned int
16701 s390_case_values_threshold (void)
16703 /* Disabling branch prediction for indirect jumps makes jump tables
16704 much more expensive. */
16705 if (TARGET_INDIRECT_BRANCH_NOBP_JUMP)
16706 return 20;
16708 return default_case_values_threshold ();
16711 /* Evaluate the insns between HEAD and TAIL and do back-end to install
16712 back-end specific dependencies.
16714 Establish an ANTI dependency between r11 and r15 restores from FPRs
16715 to prevent the instructions scheduler from reordering them since
16716 this would break CFI. No further handling in the sched_reorder
16717 hook is required since the r11 and r15 restore will never appear in
16718 the same ready list with that change. */
16719 void
16720 s390_sched_dependencies_evaluation (rtx_insn *head, rtx_insn *tail)
16722 if (!frame_pointer_needed || !epilogue_completed)
16723 return;
16725 while (head != tail && DEBUG_INSN_P (head))
16726 head = NEXT_INSN (head);
16728 rtx_insn *r15_restore = NULL, *r11_restore = NULL;
16730 for (rtx_insn *insn = tail; insn != head; insn = PREV_INSN (insn))
16732 rtx set = single_set (insn);
16733 if (!INSN_P (insn)
16734 || !RTX_FRAME_RELATED_P (insn)
16735 || set == NULL_RTX
16736 || !REG_P (SET_DEST (set))
16737 || !FP_REG_P (SET_SRC (set)))
16738 continue;
16740 if (REGNO (SET_DEST (set)) == HARD_FRAME_POINTER_REGNUM)
16741 r11_restore = insn;
16743 if (REGNO (SET_DEST (set)) == STACK_POINTER_REGNUM)
16744 r15_restore = insn;
16747 if (r11_restore == NULL || r15_restore == NULL)
16748 return;
16749 add_dependence (r11_restore, r15_restore, REG_DEP_ANTI);
16752 /* Implement TARGET_SHIFT_TRUNCATION_MASK for integer shifts. */
16754 static unsigned HOST_WIDE_INT
16755 s390_shift_truncation_mask (machine_mode mode)
16757 return mode == DImode || mode == SImode ? 63 : 0;
16760 /* Return TRUE iff CONSTRAINT is an "f" constraint, possibly with additional
16761 modifiers. */
16763 static bool
16764 f_constraint_p (const char *constraint)
16766 bool seen_f_p = false;
16767 bool seen_v_p = false;
16769 for (size_t i = 0, c_len = strlen (constraint); i < c_len;
16770 i += CONSTRAINT_LEN (constraint[i], constraint + i))
16772 if (constraint[i] == 'f')
16773 seen_f_p = true;
16774 if (constraint[i] == 'v')
16775 seen_v_p = true;
16778 /* Treat "fv" constraints as "v", because LRA will choose the widest register
16779 * class. */
16780 return seen_f_p && !seen_v_p;
16783 /* Return TRUE iff X is a hard floating-point (and not a vector) register. */
16785 static bool
16786 s390_hard_fp_reg_p (rtx x)
16788 if (!(REG_P (x) && HARD_REGISTER_P (x) && REG_ATTRS (x)))
16789 return false;
16791 tree decl = REG_EXPR (x);
16792 if (!(HAS_DECL_ASSEMBLER_NAME_P (decl) && DECL_ASSEMBLER_NAME_SET_P (decl)))
16793 return false;
16795 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
16797 return name[0] == '*' && name[1] == 'f';
16800 /* Implement TARGET_MD_ASM_ADJUST hook in order to fix up "f"
16801 constraints when long doubles are stored in vector registers. */
16803 static rtx_insn *
16804 s390_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
16805 vec<machine_mode> &input_modes,
16806 vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
16807 HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
16809 if (!TARGET_VXE)
16810 /* Long doubles are stored in FPR pairs - nothing to do. */
16811 return NULL;
16813 rtx_insn *after_md_seq = NULL, *after_md_end = NULL;
16815 unsigned ninputs = inputs.length ();
16816 unsigned noutputs = outputs.length ();
16817 for (unsigned i = 0; i < noutputs; i++)
16819 if (GET_MODE (outputs[i]) != TFmode)
16820 /* Not a long double - nothing to do. */
16821 continue;
16822 const char *constraint = constraints[i];
16823 bool allows_mem, allows_reg, is_inout;
16824 bool ok = parse_output_constraint (&constraint, i, ninputs, noutputs,
16825 &allows_mem, &allows_reg, &is_inout);
16826 gcc_assert (ok);
16827 if (!f_constraint_p (constraint))
16828 /* Long double with a constraint other than "=f" - nothing to do. */
16829 continue;
16830 gcc_assert (allows_reg);
16831 gcc_assert (!is_inout);
16832 /* Copy output value from a FPR pair into a vector register. */
16833 rtx fprx2;
16834 push_to_sequence2 (after_md_seq, after_md_end);
16835 if (s390_hard_fp_reg_p (outputs[i]))
16837 fprx2 = gen_rtx_REG (FPRX2mode, REGNO (outputs[i]));
16838 /* The first half is already at the correct location, copy only the
16839 * second one. Use the UNSPEC pattern instead of the SUBREG one,
16840 * since s390_can_change_mode_class() rejects
16841 * (subreg:DF (reg:TF %fN) 8) and thus subreg validation fails. */
16842 rtx v1 = gen_rtx_REG (V2DFmode, REGNO (outputs[i]));
16843 rtx v3 = gen_rtx_REG (V2DFmode, REGNO (outputs[i]) + 1);
16844 emit_insn (gen_vec_permiv2df (v1, v1, v3, const0_rtx));
16846 else
16848 fprx2 = gen_reg_rtx (FPRX2mode);
16849 emit_insn (gen_fprx2_to_tf (outputs[i], fprx2));
16851 after_md_seq = get_insns ();
16852 after_md_end = get_last_insn ();
16853 end_sequence ();
16854 outputs[i] = fprx2;
16857 for (unsigned i = 0; i < ninputs; i++)
16859 if (GET_MODE (inputs[i]) != TFmode)
16860 /* Not a long double - nothing to do. */
16861 continue;
16862 const char *constraint = constraints[noutputs + i];
16863 bool allows_mem, allows_reg;
16864 bool ok = parse_input_constraint (&constraint, i, ninputs, noutputs, 0,
16865 constraints.address (), &allows_mem,
16866 &allows_reg);
16867 gcc_assert (ok);
16868 if (!f_constraint_p (constraint))
16869 /* Long double with a constraint other than "f" (or "=f" for inout
16870 operands) - nothing to do. */
16871 continue;
16872 gcc_assert (allows_reg);
16873 /* Copy input value from a vector register into a FPR pair. */
16874 rtx fprx2;
16875 if (s390_hard_fp_reg_p (inputs[i]))
16877 fprx2 = gen_rtx_REG (FPRX2mode, REGNO (inputs[i]));
16878 /* Copy only the second half. */
16879 rtx v1 = gen_rtx_REG (V2DFmode, REGNO (inputs[i]) + 1);
16880 rtx v2 = gen_rtx_REG (V2DFmode, REGNO (inputs[i]));
16881 emit_insn (gen_vec_permiv2df (v1, v2, v1, GEN_INT (3)));
16883 else
16885 fprx2 = gen_reg_rtx (FPRX2mode);
16886 emit_insn (gen_tf_to_fprx2 (fprx2, inputs[i]));
16888 inputs[i] = fprx2;
16889 input_modes[i] = FPRX2mode;
16892 return after_md_seq;
16895 /* Initialize GCC target structure. */
16897 #undef TARGET_ASM_ALIGNED_HI_OP
16898 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
16899 #undef TARGET_ASM_ALIGNED_DI_OP
16900 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
16901 #undef TARGET_ASM_INTEGER
16902 #define TARGET_ASM_INTEGER s390_assemble_integer
16904 #undef TARGET_ASM_OPEN_PAREN
16905 #define TARGET_ASM_OPEN_PAREN ""
16907 #undef TARGET_ASM_CLOSE_PAREN
16908 #define TARGET_ASM_CLOSE_PAREN ""
16910 #undef TARGET_OPTION_OVERRIDE
16911 #define TARGET_OPTION_OVERRIDE s390_option_override
16913 #ifdef TARGET_THREAD_SSP_OFFSET
16914 #undef TARGET_STACK_PROTECT_GUARD
16915 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
16916 #endif
16918 #undef TARGET_ENCODE_SECTION_INFO
16919 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
16921 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16922 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16924 #ifdef HAVE_AS_TLS
16925 #undef TARGET_HAVE_TLS
16926 #define TARGET_HAVE_TLS true
16927 #endif
16928 #undef TARGET_CANNOT_FORCE_CONST_MEM
16929 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
16931 #undef TARGET_DELEGITIMIZE_ADDRESS
16932 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
16934 #undef TARGET_LEGITIMIZE_ADDRESS
16935 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
16937 #undef TARGET_RETURN_IN_MEMORY
16938 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
16940 #undef TARGET_INIT_BUILTINS
16941 #define TARGET_INIT_BUILTINS s390_init_builtins
16942 #undef TARGET_EXPAND_BUILTIN
16943 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
16944 #undef TARGET_BUILTIN_DECL
16945 #define TARGET_BUILTIN_DECL s390_builtin_decl
16947 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
16948 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
16950 #undef TARGET_ASM_OUTPUT_MI_THUNK
16951 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
16952 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
16953 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
16955 #if ENABLE_S390_EXCESS_FLOAT_PRECISION == 1
16956 /* This hook is only needed to maintain the historic behavior with glibc
16957 versions that typedef float_t to double. */
16958 #undef TARGET_C_EXCESS_PRECISION
16959 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
16960 #endif
16962 #undef TARGET_SCHED_ADJUST_PRIORITY
16963 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
16964 #undef TARGET_SCHED_ISSUE_RATE
16965 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
16966 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
16967 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
16969 #undef TARGET_SCHED_VARIABLE_ISSUE
16970 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
16971 #undef TARGET_SCHED_REORDER
16972 #define TARGET_SCHED_REORDER s390_sched_reorder
16973 #undef TARGET_SCHED_INIT
16974 #define TARGET_SCHED_INIT s390_sched_init
16976 #undef TARGET_CANNOT_COPY_INSN_P
16977 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
16978 #undef TARGET_RTX_COSTS
16979 #define TARGET_RTX_COSTS s390_rtx_costs
16980 #undef TARGET_ADDRESS_COST
16981 #define TARGET_ADDRESS_COST s390_address_cost
16982 #undef TARGET_REGISTER_MOVE_COST
16983 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
16984 #undef TARGET_MEMORY_MOVE_COST
16985 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
16986 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
16987 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
16988 s390_builtin_vectorization_cost
16990 #undef TARGET_MACHINE_DEPENDENT_REORG
16991 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
16993 #undef TARGET_VALID_POINTER_MODE
16994 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
16996 #undef TARGET_BUILD_BUILTIN_VA_LIST
16997 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
16998 #undef TARGET_EXPAND_BUILTIN_VA_START
16999 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
17000 #undef TARGET_ASAN_SHADOW_OFFSET
17001 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
17002 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
17003 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
17005 #undef TARGET_PROMOTE_FUNCTION_MODE
17006 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
17007 #undef TARGET_PASS_BY_REFERENCE
17008 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
17010 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
17011 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE s390_override_options_after_change
17013 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
17014 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
17015 #undef TARGET_FUNCTION_ARG
17016 #define TARGET_FUNCTION_ARG s390_function_arg
17017 #undef TARGET_FUNCTION_ARG_ADVANCE
17018 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
17019 #undef TARGET_FUNCTION_ARG_PADDING
17020 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
17021 #undef TARGET_FUNCTION_VALUE
17022 #define TARGET_FUNCTION_VALUE s390_function_value
17023 #undef TARGET_LIBCALL_VALUE
17024 #define TARGET_LIBCALL_VALUE s390_libcall_value
17025 #undef TARGET_STRICT_ARGUMENT_NAMING
17026 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
17028 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
17029 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
17031 #undef TARGET_FIXED_CONDITION_CODE_REGS
17032 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
17034 #undef TARGET_CC_MODES_COMPATIBLE
17035 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
17037 #undef TARGET_INVALID_WITHIN_DOLOOP
17038 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
17040 #ifdef HAVE_AS_TLS
17041 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
17042 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
17043 #endif
17045 #undef TARGET_DWARF_FRAME_REG_MODE
17046 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
17048 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
17049 #undef TARGET_MANGLE_TYPE
17050 #define TARGET_MANGLE_TYPE s390_mangle_type
17051 #endif
17053 #undef TARGET_SCALAR_MODE_SUPPORTED_P
17054 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
17056 #undef TARGET_VECTOR_MODE_SUPPORTED_P
17057 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
17059 #undef TARGET_PREFERRED_RELOAD_CLASS
17060 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
17062 #undef TARGET_SECONDARY_RELOAD
17063 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
17064 #undef TARGET_SECONDARY_MEMORY_NEEDED
17065 #define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed
17066 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
17067 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode
17069 #undef TARGET_LIBGCC_CMP_RETURN_MODE
17070 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
17072 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
17073 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
17075 #undef TARGET_LEGITIMATE_ADDRESS_P
17076 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
17078 #undef TARGET_LEGITIMATE_CONSTANT_P
17079 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
17081 #undef TARGET_LRA_P
17082 #define TARGET_LRA_P s390_lra_p
17084 #undef TARGET_CAN_ELIMINATE
17085 #define TARGET_CAN_ELIMINATE s390_can_eliminate
17087 #undef TARGET_CONDITIONAL_REGISTER_USAGE
17088 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
17090 #undef TARGET_LOOP_UNROLL_ADJUST
17091 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
17093 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
17094 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
17095 #undef TARGET_TRAMPOLINE_INIT
17096 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
17098 /* PR 79421 */
17099 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
17100 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
17102 #undef TARGET_UNWIND_WORD_MODE
17103 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
17105 #undef TARGET_CANONICALIZE_COMPARISON
17106 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
17108 #undef TARGET_HARD_REGNO_SCRATCH_OK
17109 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
17111 #undef TARGET_HARD_REGNO_NREGS
17112 #define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs
17113 #undef TARGET_HARD_REGNO_MODE_OK
17114 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
17115 #undef TARGET_MODES_TIEABLE_P
17116 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
17118 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
17119 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
17120 s390_hard_regno_call_part_clobbered
17122 #undef TARGET_ATTRIBUTE_TABLE
17123 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
17125 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
17126 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
17128 #undef TARGET_SET_UP_BY_PROLOGUE
17129 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
17131 #undef TARGET_EXTRA_LIVE_ON_ENTRY
17132 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
17134 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
17135 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
17136 s390_use_by_pieces_infrastructure_p
17138 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
17139 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
17141 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
17142 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
17144 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
17145 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
17147 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
17148 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
17150 #undef TARGET_VECTOR_ALIGNMENT
17151 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
17153 #undef TARGET_INVALID_BINARY_OP
17154 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
17156 #ifdef HAVE_AS_MACHINE_MACHINEMODE
17157 #undef TARGET_ASM_FILE_START
17158 #define TARGET_ASM_FILE_START s390_asm_file_start
17159 #endif
17161 #undef TARGET_ASM_FILE_END
17162 #define TARGET_ASM_FILE_END s390_asm_file_end
17164 #undef TARGET_SET_CURRENT_FUNCTION
17165 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
17167 #if S390_USE_TARGET_ATTRIBUTE
17168 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
17169 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
17171 #undef TARGET_CAN_INLINE_P
17172 #define TARGET_CAN_INLINE_P s390_can_inline_p
17173 #endif
17175 #undef TARGET_OPTION_RESTORE
17176 #define TARGET_OPTION_RESTORE s390_function_specific_restore
17178 #undef TARGET_CAN_CHANGE_MODE_CLASS
17179 #define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class
17181 #undef TARGET_CONSTANT_ALIGNMENT
17182 #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
17184 #undef TARGET_ASM_CODE_END
17185 #define TARGET_ASM_CODE_END s390_code_end
17187 #undef TARGET_CASE_VALUES_THRESHOLD
17188 #define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold
17190 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
17191 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
17192 s390_sched_dependencies_evaluation
17194 #undef TARGET_SHIFT_TRUNCATION_MASK
17195 #define TARGET_SHIFT_TRUNCATION_MASK s390_shift_truncation_mask
17197 /* Use only short displacement, since long displacement is not available for
17198 the floating point instructions. */
17199 #undef TARGET_MAX_ANCHOR_OFFSET
17200 #define TARGET_MAX_ANCHOR_OFFSET 0xfff
17202 #undef TARGET_MD_ASM_ADJUST
17203 #define TARGET_MD_ASM_ADJUST s390_md_asm_adjust
17205 struct gcc_target targetm = TARGET_INITIALIZER;
17207 #include "gt-s390.h"