testsuite: Correct vec-rlmi-rlnm.c testsuite expected result
[official-gcc.git] / gcc / config / s390 / s390.c
blobf9b27f96fd77ab164ff60816eafa3315ee848226
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2020 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "target-globals.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "gimple.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "tm_p.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "expmed.h"
42 #include "optabs.h"
43 #include "regs.h"
44 #include "emit-rtl.h"
45 #include "recog.h"
46 #include "cgraph.h"
47 #include "diagnostic-core.h"
48 #include "diagnostic.h"
49 #include "alias.h"
50 #include "fold-const.h"
51 #include "print-tree.h"
52 #include "stor-layout.h"
53 #include "varasm.h"
54 #include "calls.h"
55 #include "conditions.h"
56 #include "output.h"
57 #include "insn-attr.h"
58 #include "flags.h"
59 #include "except.h"
60 #include "dojump.h"
61 #include "explow.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "reload.h"
65 #include "cfgrtl.h"
66 #include "cfganal.h"
67 #include "lcm.h"
68 #include "cfgbuild.h"
69 #include "cfgcleanup.h"
70 #include "debug.h"
71 #include "langhooks.h"
72 #include "internal-fn.h"
73 #include "gimple-fold.h"
74 #include "tree-eh.h"
75 #include "gimplify.h"
76 #include "opts.h"
77 #include "tree-pass.h"
78 #include "context.h"
79 #include "builtins.h"
80 #include "rtl-iter.h"
81 #include "intl.h"
82 #include "tm-constrs.h"
83 #include "tree-vrp.h"
84 #include "symbol-summary.h"
85 #include "ipa-prop.h"
86 #include "ipa-fnsummary.h"
87 #include "sched-int.h"
89 /* This file should be included last. */
90 #include "target-def.h"
92 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode);
94 /* Remember the last target of s390_set_current_function. */
95 static GTY(()) tree s390_previous_fndecl;
97 /* Define the specific costs for a given cpu. */
99 struct processor_costs
101 /* multiplication */
102 const int m; /* cost of an M instruction. */
103 const int mghi; /* cost of an MGHI instruction. */
104 const int mh; /* cost of an MH instruction. */
105 const int mhi; /* cost of an MHI instruction. */
106 const int ml; /* cost of an ML instruction. */
107 const int mr; /* cost of an MR instruction. */
108 const int ms; /* cost of an MS instruction. */
109 const int msg; /* cost of an MSG instruction. */
110 const int msgf; /* cost of an MSGF instruction. */
111 const int msgfr; /* cost of an MSGFR instruction. */
112 const int msgr; /* cost of an MSGR instruction. */
113 const int msr; /* cost of an MSR instruction. */
114 const int mult_df; /* cost of multiplication in DFmode. */
115 const int mxbr;
116 /* square root */
117 const int sqxbr; /* cost of square root in TFmode. */
118 const int sqdbr; /* cost of square root in DFmode. */
119 const int sqebr; /* cost of square root in SFmode. */
120 /* multiply and add */
121 const int madbr; /* cost of multiply and add in DFmode. */
122 const int maebr; /* cost of multiply and add in SFmode. */
123 /* division */
124 const int dxbr;
125 const int ddbr;
126 const int debr;
127 const int dlgr;
128 const int dlr;
129 const int dr;
130 const int dsgfr;
131 const int dsgr;
134 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
136 static const
137 struct processor_costs z900_cost =
139 COSTS_N_INSNS (5), /* M */
140 COSTS_N_INSNS (10), /* MGHI */
141 COSTS_N_INSNS (5), /* MH */
142 COSTS_N_INSNS (4), /* MHI */
143 COSTS_N_INSNS (5), /* ML */
144 COSTS_N_INSNS (5), /* MR */
145 COSTS_N_INSNS (4), /* MS */
146 COSTS_N_INSNS (15), /* MSG */
147 COSTS_N_INSNS (7), /* MSGF */
148 COSTS_N_INSNS (7), /* MSGFR */
149 COSTS_N_INSNS (10), /* MSGR */
150 COSTS_N_INSNS (4), /* MSR */
151 COSTS_N_INSNS (7), /* multiplication in DFmode */
152 COSTS_N_INSNS (13), /* MXBR */
153 COSTS_N_INSNS (136), /* SQXBR */
154 COSTS_N_INSNS (44), /* SQDBR */
155 COSTS_N_INSNS (35), /* SQEBR */
156 COSTS_N_INSNS (18), /* MADBR */
157 COSTS_N_INSNS (13), /* MAEBR */
158 COSTS_N_INSNS (134), /* DXBR */
159 COSTS_N_INSNS (30), /* DDBR */
160 COSTS_N_INSNS (27), /* DEBR */
161 COSTS_N_INSNS (220), /* DLGR */
162 COSTS_N_INSNS (34), /* DLR */
163 COSTS_N_INSNS (34), /* DR */
164 COSTS_N_INSNS (32), /* DSGFR */
165 COSTS_N_INSNS (32), /* DSGR */
168 static const
169 struct processor_costs z990_cost =
171 COSTS_N_INSNS (4), /* M */
172 COSTS_N_INSNS (2), /* MGHI */
173 COSTS_N_INSNS (2), /* MH */
174 COSTS_N_INSNS (2), /* MHI */
175 COSTS_N_INSNS (4), /* ML */
176 COSTS_N_INSNS (4), /* MR */
177 COSTS_N_INSNS (5), /* MS */
178 COSTS_N_INSNS (6), /* MSG */
179 COSTS_N_INSNS (4), /* MSGF */
180 COSTS_N_INSNS (4), /* MSGFR */
181 COSTS_N_INSNS (4), /* MSGR */
182 COSTS_N_INSNS (4), /* MSR */
183 COSTS_N_INSNS (1), /* multiplication in DFmode */
184 COSTS_N_INSNS (28), /* MXBR */
185 COSTS_N_INSNS (130), /* SQXBR */
186 COSTS_N_INSNS (66), /* SQDBR */
187 COSTS_N_INSNS (38), /* SQEBR */
188 COSTS_N_INSNS (1), /* MADBR */
189 COSTS_N_INSNS (1), /* MAEBR */
190 COSTS_N_INSNS (60), /* DXBR */
191 COSTS_N_INSNS (40), /* DDBR */
192 COSTS_N_INSNS (26), /* DEBR */
193 COSTS_N_INSNS (176), /* DLGR */
194 COSTS_N_INSNS (31), /* DLR */
195 COSTS_N_INSNS (31), /* DR */
196 COSTS_N_INSNS (31), /* DSGFR */
197 COSTS_N_INSNS (31), /* DSGR */
200 static const
201 struct processor_costs z9_109_cost =
203 COSTS_N_INSNS (4), /* M */
204 COSTS_N_INSNS (2), /* MGHI */
205 COSTS_N_INSNS (2), /* MH */
206 COSTS_N_INSNS (2), /* MHI */
207 COSTS_N_INSNS (4), /* ML */
208 COSTS_N_INSNS (4), /* MR */
209 COSTS_N_INSNS (5), /* MS */
210 COSTS_N_INSNS (6), /* MSG */
211 COSTS_N_INSNS (4), /* MSGF */
212 COSTS_N_INSNS (4), /* MSGFR */
213 COSTS_N_INSNS (4), /* MSGR */
214 COSTS_N_INSNS (4), /* MSR */
215 COSTS_N_INSNS (1), /* multiplication in DFmode */
216 COSTS_N_INSNS (28), /* MXBR */
217 COSTS_N_INSNS (130), /* SQXBR */
218 COSTS_N_INSNS (66), /* SQDBR */
219 COSTS_N_INSNS (38), /* SQEBR */
220 COSTS_N_INSNS (1), /* MADBR */
221 COSTS_N_INSNS (1), /* MAEBR */
222 COSTS_N_INSNS (60), /* DXBR */
223 COSTS_N_INSNS (40), /* DDBR */
224 COSTS_N_INSNS (26), /* DEBR */
225 COSTS_N_INSNS (30), /* DLGR */
226 COSTS_N_INSNS (23), /* DLR */
227 COSTS_N_INSNS (23), /* DR */
228 COSTS_N_INSNS (24), /* DSGFR */
229 COSTS_N_INSNS (24), /* DSGR */
232 static const
233 struct processor_costs z10_cost =
235 COSTS_N_INSNS (10), /* M */
236 COSTS_N_INSNS (10), /* MGHI */
237 COSTS_N_INSNS (10), /* MH */
238 COSTS_N_INSNS (10), /* MHI */
239 COSTS_N_INSNS (10), /* ML */
240 COSTS_N_INSNS (10), /* MR */
241 COSTS_N_INSNS (10), /* MS */
242 COSTS_N_INSNS (10), /* MSG */
243 COSTS_N_INSNS (10), /* MSGF */
244 COSTS_N_INSNS (10), /* MSGFR */
245 COSTS_N_INSNS (10), /* MSGR */
246 COSTS_N_INSNS (10), /* MSR */
247 COSTS_N_INSNS (1) , /* multiplication in DFmode */
248 COSTS_N_INSNS (50), /* MXBR */
249 COSTS_N_INSNS (120), /* SQXBR */
250 COSTS_N_INSNS (52), /* SQDBR */
251 COSTS_N_INSNS (38), /* SQEBR */
252 COSTS_N_INSNS (1), /* MADBR */
253 COSTS_N_INSNS (1), /* MAEBR */
254 COSTS_N_INSNS (111), /* DXBR */
255 COSTS_N_INSNS (39), /* DDBR */
256 COSTS_N_INSNS (32), /* DEBR */
257 COSTS_N_INSNS (160), /* DLGR */
258 COSTS_N_INSNS (71), /* DLR */
259 COSTS_N_INSNS (71), /* DR */
260 COSTS_N_INSNS (71), /* DSGFR */
261 COSTS_N_INSNS (71), /* DSGR */
264 static const
265 struct processor_costs z196_cost =
267 COSTS_N_INSNS (7), /* M */
268 COSTS_N_INSNS (5), /* MGHI */
269 COSTS_N_INSNS (5), /* MH */
270 COSTS_N_INSNS (5), /* MHI */
271 COSTS_N_INSNS (7), /* ML */
272 COSTS_N_INSNS (7), /* MR */
273 COSTS_N_INSNS (6), /* MS */
274 COSTS_N_INSNS (8), /* MSG */
275 COSTS_N_INSNS (6), /* MSGF */
276 COSTS_N_INSNS (6), /* MSGFR */
277 COSTS_N_INSNS (8), /* MSGR */
278 COSTS_N_INSNS (6), /* MSR */
279 COSTS_N_INSNS (1) , /* multiplication in DFmode */
280 COSTS_N_INSNS (40), /* MXBR B+40 */
281 COSTS_N_INSNS (100), /* SQXBR B+100 */
282 COSTS_N_INSNS (42), /* SQDBR B+42 */
283 COSTS_N_INSNS (28), /* SQEBR B+28 */
284 COSTS_N_INSNS (1), /* MADBR B */
285 COSTS_N_INSNS (1), /* MAEBR B */
286 COSTS_N_INSNS (101), /* DXBR B+101 */
287 COSTS_N_INSNS (29), /* DDBR */
288 COSTS_N_INSNS (22), /* DEBR */
289 COSTS_N_INSNS (160), /* DLGR cracked */
290 COSTS_N_INSNS (160), /* DLR cracked */
291 COSTS_N_INSNS (160), /* DR expanded */
292 COSTS_N_INSNS (160), /* DSGFR cracked */
293 COSTS_N_INSNS (160), /* DSGR cracked */
296 static const
297 struct processor_costs zEC12_cost =
299 COSTS_N_INSNS (7), /* M */
300 COSTS_N_INSNS (5), /* MGHI */
301 COSTS_N_INSNS (5), /* MH */
302 COSTS_N_INSNS (5), /* MHI */
303 COSTS_N_INSNS (7), /* ML */
304 COSTS_N_INSNS (7), /* MR */
305 COSTS_N_INSNS (6), /* MS */
306 COSTS_N_INSNS (8), /* MSG */
307 COSTS_N_INSNS (6), /* MSGF */
308 COSTS_N_INSNS (6), /* MSGFR */
309 COSTS_N_INSNS (8), /* MSGR */
310 COSTS_N_INSNS (6), /* MSR */
311 COSTS_N_INSNS (1) , /* multiplication in DFmode */
312 COSTS_N_INSNS (40), /* MXBR B+40 */
313 COSTS_N_INSNS (100), /* SQXBR B+100 */
314 COSTS_N_INSNS (42), /* SQDBR B+42 */
315 COSTS_N_INSNS (28), /* SQEBR B+28 */
316 COSTS_N_INSNS (1), /* MADBR B */
317 COSTS_N_INSNS (1), /* MAEBR B */
318 COSTS_N_INSNS (131), /* DXBR B+131 */
319 COSTS_N_INSNS (29), /* DDBR */
320 COSTS_N_INSNS (22), /* DEBR */
321 COSTS_N_INSNS (160), /* DLGR cracked */
322 COSTS_N_INSNS (160), /* DLR cracked */
323 COSTS_N_INSNS (160), /* DR expanded */
324 COSTS_N_INSNS (160), /* DSGFR cracked */
325 COSTS_N_INSNS (160), /* DSGR cracked */
328 const struct s390_processor processor_table[] =
330 { "z900", "z900", PROCESSOR_2064_Z900, &z900_cost, 5 },
331 { "z990", "z990", PROCESSOR_2084_Z990, &z990_cost, 6 },
332 { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost, 7 },
333 { "z9-ec", "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost, 7 },
334 { "z10", "z10", PROCESSOR_2097_Z10, &z10_cost, 8 },
335 { "z196", "z196", PROCESSOR_2817_Z196, &z196_cost, 9 },
336 { "zEC12", "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost, 10 },
337 { "z13", "z13", PROCESSOR_2964_Z13, &zEC12_cost, 11 },
338 { "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost, 12 },
339 { "z15", "arch13", PROCESSOR_8561_Z15, &zEC12_cost, 13 },
340 { "native", "", PROCESSOR_NATIVE, NULL, 0 }
343 extern int reload_completed;
345 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
346 static rtx_insn *last_scheduled_insn;
347 #define NUM_SIDES 2
349 #define MAX_SCHED_UNITS 4
350 static int last_scheduled_unit_distance[MAX_SCHED_UNITS][NUM_SIDES];
352 /* Estimate of number of cycles a long-running insn occupies an
353 execution unit. */
354 static int fxd_longrunning[NUM_SIDES];
355 static int fpd_longrunning[NUM_SIDES];
357 /* The maximum score added for an instruction whose unit hasn't been
358 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
359 give instruction mix scheduling more priority over instruction
360 grouping. */
361 #define MAX_SCHED_MIX_SCORE 2
363 /* The maximum distance up to which individual scores will be
364 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
365 Increase this with the OOO windows size of the machine. */
366 #define MAX_SCHED_MIX_DISTANCE 70
368 /* Structure used to hold the components of a S/390 memory
369 address. A legitimate address on S/390 is of the general
370 form
371 base + index + displacement
372 where any of the components is optional.
374 base and index are registers of the class ADDR_REGS,
375 displacement is an unsigned 12-bit immediate constant. */
377 /* The max number of insns of backend generated memset/memcpy/memcmp
378 loops. This value is used in the unroll adjust hook to detect such
379 loops. Current max is 9 coming from the memcmp loop. */
380 #define BLOCK_MEM_OPS_LOOP_INSNS 9
382 struct s390_address
384 rtx base;
385 rtx indx;
386 rtx disp;
387 bool pointer;
388 bool literal_pool;
391 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
393 #define cfun_frame_layout (cfun->machine->frame_layout)
394 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
395 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
396 ? cfun_frame_layout.fpr_bitmap & 0x0f \
397 : cfun_frame_layout.fpr_bitmap & 0x03))
398 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
399 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
400 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
401 (1 << (REGNO - FPR0_REGNUM)))
402 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
403 (1 << (REGNO - FPR0_REGNUM))))
404 #define cfun_gpr_save_slot(REGNO) \
405 cfun->machine->frame_layout.gpr_save_slots[REGNO]
407 /* Number of GPRs and FPRs used for argument passing. */
408 #define GP_ARG_NUM_REG 5
409 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
410 #define VEC_ARG_NUM_REG 8
412 /* A couple of shortcuts. */
413 #define CONST_OK_FOR_J(x) \
414 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
415 #define CONST_OK_FOR_K(x) \
416 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
417 #define CONST_OK_FOR_Os(x) \
418 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
419 #define CONST_OK_FOR_Op(x) \
420 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
421 #define CONST_OK_FOR_On(x) \
422 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
424 #define REGNO_PAIR_OK(REGNO, MODE) \
425 (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
427 /* That's the read ahead of the dynamic branch prediction unit in
428 bytes on a z10 (or higher) CPU. */
429 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
431 /* Masks per jump target register indicating which thunk need to be
432 generated. */
433 static GTY(()) int indirect_branch_prez10thunk_mask = 0;
434 static GTY(()) int indirect_branch_z10thunk_mask = 0;
436 #define INDIRECT_BRANCH_NUM_OPTIONS 4
438 enum s390_indirect_branch_option
440 s390_opt_indirect_branch_jump = 0,
441 s390_opt_indirect_branch_call,
442 s390_opt_function_return_reg,
443 s390_opt_function_return_mem
446 static GTY(()) int indirect_branch_table_label_no[INDIRECT_BRANCH_NUM_OPTIONS] = { 0 };
447 const char *indirect_branch_table_label[INDIRECT_BRANCH_NUM_OPTIONS] = \
448 { "LJUMP", "LCALL", "LRETREG", "LRETMEM" };
449 const char *indirect_branch_table_name[INDIRECT_BRANCH_NUM_OPTIONS] = \
450 { ".s390_indirect_jump", ".s390_indirect_call",
451 ".s390_return_reg", ".s390_return_mem" };
453 bool
454 s390_return_addr_from_memory ()
456 return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
459 /* Indicate which ABI has been used for passing vector args.
460 0 - no vector type arguments have been passed where the ABI is relevant
461 1 - the old ABI has been used
462 2 - a vector type argument has been passed either in a vector register
463 or on the stack by value */
464 static int s390_vector_abi = 0;
466 /* Set the vector ABI marker if TYPE is subject to the vector ABI
467 switch. The vector ABI affects only vector data types. There are
468 two aspects of the vector ABI relevant here:
470 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
471 ABI and natural alignment with the old.
473 2. vector <= 16 bytes are passed in VRs or by value on the stack
474 with the new ABI but by reference on the stack with the old.
476 If ARG_P is true TYPE is used for a function argument or return
477 value. The ABI marker then is set for all vector data types. If
478 ARG_P is false only type 1 vectors are being checked. */
480 static void
481 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
483 static hash_set<const_tree> visited_types_hash;
485 if (s390_vector_abi)
486 return;
488 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
489 return;
491 if (visited_types_hash.contains (type))
492 return;
494 visited_types_hash.add (type);
496 if (VECTOR_TYPE_P (type))
498 int type_size = int_size_in_bytes (type);
500 /* Outside arguments only the alignment is changing and this
501 only happens for vector types >= 16 bytes. */
502 if (!arg_p && type_size < 16)
503 return;
505 /* In arguments vector types > 16 are passed as before (GCC
506 never enforced the bigger alignment for arguments which was
507 required by the old vector ABI). However, it might still be
508 ABI relevant due to the changed alignment if it is a struct
509 member. */
510 if (arg_p && type_size > 16 && !in_struct_p)
511 return;
513 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
515 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
517 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
518 natural alignment there will never be ABI dependent padding
519 in an array type. That's why we do not set in_struct_p to
520 true here. */
521 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
523 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
525 tree arg_chain;
527 /* Check the return type. */
528 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
530 for (arg_chain = TYPE_ARG_TYPES (type);
531 arg_chain;
532 arg_chain = TREE_CHAIN (arg_chain))
533 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
535 else if (RECORD_OR_UNION_TYPE_P (type))
537 tree field;
539 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
541 if (TREE_CODE (field) != FIELD_DECL)
542 continue;
544 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
550 /* System z builtins. */
552 #include "s390-builtins.h"
554 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
556 #undef B_DEF
557 #undef OB_DEF
558 #undef OB_DEF_VAR
559 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
560 #define OB_DEF(...)
561 #define OB_DEF_VAR(...)
562 #include "s390-builtins.def"
566 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
568 #undef B_DEF
569 #undef OB_DEF
570 #undef OB_DEF_VAR
571 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
572 #define OB_DEF(...)
573 #define OB_DEF_VAR(...)
574 #include "s390-builtins.def"
578 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
580 #undef B_DEF
581 #undef OB_DEF
582 #undef OB_DEF_VAR
583 #define B_DEF(...)
584 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
585 #define OB_DEF_VAR(...)
586 #include "s390-builtins.def"
590 const unsigned int
591 bflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
593 #undef B_DEF
594 #undef OB_DEF
595 #undef OB_DEF_VAR
596 #define B_DEF(...)
597 #define OB_DEF(...)
598 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
599 #include "s390-builtins.def"
603 const unsigned int
604 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
606 #undef B_DEF
607 #undef OB_DEF
608 #undef OB_DEF_VAR
609 #define B_DEF(...)
610 #define OB_DEF(...)
611 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
612 #include "s390-builtins.def"
616 tree s390_builtin_types[BT_MAX];
617 tree s390_builtin_fn_types[BT_FN_MAX];
618 tree s390_builtin_decls[S390_BUILTIN_MAX +
619 S390_OVERLOADED_BUILTIN_MAX +
620 S390_OVERLOADED_BUILTIN_VAR_MAX];
622 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
623 #undef B_DEF
624 #undef OB_DEF
625 #undef OB_DEF_VAR
626 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
627 #define OB_DEF(...)
628 #define OB_DEF_VAR(...)
630 #include "s390-builtins.def"
631 CODE_FOR_nothing
634 static void
635 s390_init_builtins (void)
637 /* These definitions are being used in s390-builtins.def. */
638 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
639 NULL, NULL);
640 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
641 tree c_uint64_type_node;
643 /* The uint64_type_node from tree.c is not compatible to the C99
644 uint64_t data type. What we want is c_uint64_type_node from
645 c-common.c. But since backend code is not supposed to interface
646 with the frontend we recreate it here. */
647 if (TARGET_64BIT)
648 c_uint64_type_node = long_unsigned_type_node;
649 else
650 c_uint64_type_node = long_long_unsigned_type_node;
652 #undef DEF_TYPE
653 #define DEF_TYPE(INDEX, NODE, CONST_P) \
654 if (s390_builtin_types[INDEX] == NULL) \
655 s390_builtin_types[INDEX] = (!CONST_P) ? \
656 (NODE) : build_type_variant ((NODE), 1, 0);
658 #undef DEF_POINTER_TYPE
659 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \
660 if (s390_builtin_types[INDEX] == NULL) \
661 s390_builtin_types[INDEX] = \
662 build_pointer_type (s390_builtin_types[INDEX_BASE]);
664 #undef DEF_DISTINCT_TYPE
665 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \
666 if (s390_builtin_types[INDEX] == NULL) \
667 s390_builtin_types[INDEX] = \
668 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
670 #undef DEF_VECTOR_TYPE
671 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
672 if (s390_builtin_types[INDEX] == NULL) \
673 s390_builtin_types[INDEX] = \
674 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
676 #undef DEF_OPAQUE_VECTOR_TYPE
677 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
678 if (s390_builtin_types[INDEX] == NULL) \
679 s390_builtin_types[INDEX] = \
680 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
682 #undef DEF_FN_TYPE
683 #define DEF_FN_TYPE(INDEX, args...) \
684 if (s390_builtin_fn_types[INDEX] == NULL) \
685 s390_builtin_fn_types[INDEX] = \
686 build_function_type_list (args, NULL_TREE);
687 #undef DEF_OV_TYPE
688 #define DEF_OV_TYPE(...)
689 #include "s390-builtin-types.def"
691 #undef B_DEF
692 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
693 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
694 s390_builtin_decls[S390_BUILTIN_##NAME] = \
695 add_builtin_function ("__builtin_" #NAME, \
696 s390_builtin_fn_types[FNTYPE], \
697 S390_BUILTIN_##NAME, \
698 BUILT_IN_MD, \
699 NULL, \
700 ATTRS);
701 #undef OB_DEF
702 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
703 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
704 == NULL) \
705 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
706 add_builtin_function ("__builtin_" #NAME, \
707 s390_builtin_fn_types[FNTYPE], \
708 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
709 BUILT_IN_MD, \
710 NULL, \
712 #undef OB_DEF_VAR
713 #define OB_DEF_VAR(...)
714 #include "s390-builtins.def"
718 /* Return true if ARG is appropriate as argument number ARGNUM of
719 builtin DECL. The operand flags from s390-builtins.def have to
720 passed as OP_FLAGS. */
721 bool
722 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
724 if (O_UIMM_P (op_flags))
726 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
727 int bitwidth = bitwidths[op_flags - O_U1];
729 if (!tree_fits_uhwi_p (arg)
730 || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1)
732 error ("constant argument %d for builtin %qF is out of range "
733 "(0..%wu)", argnum, decl,
734 (HOST_WIDE_INT_1U << bitwidth) - 1);
735 return false;
739 if (O_SIMM_P (op_flags))
741 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
742 int bitwidth = bitwidths[op_flags - O_S2];
744 if (!tree_fits_shwi_p (arg)
745 || tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1))
746 || tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1))
748 error ("constant argument %d for builtin %qF is out of range "
749 "(%wd..%wd)", argnum, decl,
750 -(HOST_WIDE_INT_1 << (bitwidth - 1)),
751 (HOST_WIDE_INT_1 << (bitwidth - 1)) - 1);
752 return false;
755 return true;
758 /* Expand an expression EXP that calls a built-in function,
759 with result going to TARGET if that's convenient
760 (and in mode MODE if that's convenient).
761 SUBTARGET may be used as the target for computing one of EXP's operands.
762 IGNORE is nonzero if the value is to be ignored. */
764 static rtx
765 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
766 machine_mode mode ATTRIBUTE_UNUSED,
767 int ignore ATTRIBUTE_UNUSED)
769 #define MAX_ARGS 6
771 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
772 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
773 enum insn_code icode;
774 rtx op[MAX_ARGS], pat;
775 int arity;
776 bool nonvoid;
777 tree arg;
778 call_expr_arg_iterator iter;
779 unsigned int all_op_flags = opflags_for_builtin (fcode);
780 machine_mode last_vec_mode = VOIDmode;
782 if (TARGET_DEBUG_ARG)
784 fprintf (stderr,
785 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
786 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
787 bflags_for_builtin (fcode));
790 if (S390_USE_TARGET_ATTRIBUTE)
792 unsigned int bflags;
794 bflags = bflags_for_builtin (fcode);
795 if ((bflags & B_HTM) && !TARGET_HTM)
797 error ("builtin %qF is not supported without %<-mhtm%> "
798 "(default with %<-march=zEC12%> and higher).", fndecl);
799 return const0_rtx;
801 if (((bflags & B_VX) || (bflags & B_VXE)) && !TARGET_VX)
803 error ("builtin %qF requires %<-mvx%> "
804 "(default with %<-march=z13%> and higher).", fndecl);
805 return const0_rtx;
808 if ((bflags & B_VXE) && !TARGET_VXE)
810 error ("Builtin %qF requires z14 or higher.", fndecl);
811 return const0_rtx;
814 if ((bflags & B_VXE2) && !TARGET_VXE2)
816 error ("Builtin %qF requires z15 or higher.", fndecl);
817 return const0_rtx;
820 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
821 && fcode < S390_ALL_BUILTIN_MAX)
823 gcc_unreachable ();
825 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
827 icode = code_for_builtin[fcode];
828 /* Set a flag in the machine specific cfun part in order to support
829 saving/restoring of FPRs. */
830 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
831 cfun->machine->tbegin_p = true;
833 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
835 error ("unresolved overloaded builtin");
836 return const0_rtx;
838 else
839 internal_error ("bad builtin fcode");
841 if (icode == 0)
842 internal_error ("bad builtin icode");
844 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
846 if (nonvoid)
848 machine_mode tmode = insn_data[icode].operand[0].mode;
849 if (!target
850 || GET_MODE (target) != tmode
851 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
852 target = gen_reg_rtx (tmode);
854 /* There are builtins (e.g. vec_promote) with no vector
855 arguments but an element selector. So we have to also look
856 at the vector return type when emitting the modulo
857 operation. */
858 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
859 last_vec_mode = insn_data[icode].operand[0].mode;
862 arity = 0;
863 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
865 rtx tmp_rtx;
866 const struct insn_operand_data *insn_op;
867 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
869 all_op_flags = all_op_flags >> O_SHIFT;
871 if (arg == error_mark_node)
872 return NULL_RTX;
873 if (arity >= MAX_ARGS)
874 return NULL_RTX;
876 if (O_IMM_P (op_flags)
877 && TREE_CODE (arg) != INTEGER_CST)
879 error ("constant value required for builtin %qF argument %d",
880 fndecl, arity + 1);
881 return const0_rtx;
884 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
885 return const0_rtx;
887 insn_op = &insn_data[icode].operand[arity + nonvoid];
888 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
890 /* expand_expr truncates constants to the target mode only if it
891 is "convenient". However, our checks below rely on this
892 being done. */
893 if (CONST_INT_P (op[arity])
894 && SCALAR_INT_MODE_P (insn_op->mode)
895 && GET_MODE (op[arity]) != insn_op->mode)
896 op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
897 insn_op->mode));
899 /* Wrap the expanded RTX for pointer types into a MEM expr with
900 the proper mode. This allows us to use e.g. (match_operand
901 "memory_operand"..) in the insn patterns instead of (mem
902 (match_operand "address_operand)). This is helpful for
903 patterns not just accepting MEMs. */
904 if (POINTER_TYPE_P (TREE_TYPE (arg))
905 && insn_op->predicate != address_operand)
906 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
908 /* Expand the module operation required on element selectors. */
909 if (op_flags == O_ELEM)
911 gcc_assert (last_vec_mode != VOIDmode);
912 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
913 op[arity],
914 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
915 NULL_RTX, 1, OPTAB_DIRECT);
918 /* Record the vector mode used for an element selector. This assumes:
919 1. There is no builtin with two different vector modes and an element selector
920 2. The element selector comes after the vector type it is referring to.
921 This currently the true for all the builtins but FIXME we
922 should better check for that. */
923 if (VECTOR_MODE_P (insn_op->mode))
924 last_vec_mode = insn_op->mode;
926 if (insn_op->predicate (op[arity], insn_op->mode))
928 arity++;
929 continue;
932 /* A memory operand is rejected by the memory_operand predicate.
933 Try making the address legal by copying it into a register. */
934 if (MEM_P (op[arity])
935 && insn_op->predicate == memory_operand
936 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
937 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
939 op[arity] = replace_equiv_address (op[arity],
940 copy_to_mode_reg (Pmode,
941 XEXP (op[arity], 0)));
943 /* Some of the builtins require different modes/types than the
944 pattern in order to implement a specific API. Instead of
945 adding many expanders which do the mode change we do it here.
946 E.g. s390_vec_add_u128 required to have vector unsigned char
947 arguments is mapped to addti3. */
948 else if (insn_op->mode != VOIDmode
949 && GET_MODE (op[arity]) != VOIDmode
950 && GET_MODE (op[arity]) != insn_op->mode
951 && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
952 GET_MODE (op[arity]), 0))
953 != NULL_RTX))
955 op[arity] = tmp_rtx;
958 /* The predicate rejects the operand although the mode is fine.
959 Copy the operand to register. */
960 if (!insn_op->predicate (op[arity], insn_op->mode)
961 && (GET_MODE (op[arity]) == insn_op->mode
962 || GET_MODE (op[arity]) == VOIDmode
963 || (insn_op->predicate == address_operand
964 && GET_MODE (op[arity]) == Pmode)))
966 /* An address_operand usually has VOIDmode in the expander
967 so we cannot use this. */
968 machine_mode target_mode =
969 (insn_op->predicate == address_operand
970 ? (machine_mode) Pmode : insn_op->mode);
971 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
974 if (!insn_op->predicate (op[arity], insn_op->mode))
976 error ("invalid argument %d for builtin %qF", arity + 1, fndecl);
977 return const0_rtx;
979 arity++;
982 switch (arity)
984 case 0:
985 pat = GEN_FCN (icode) (target);
986 break;
987 case 1:
988 if (nonvoid)
989 pat = GEN_FCN (icode) (target, op[0]);
990 else
991 pat = GEN_FCN (icode) (op[0]);
992 break;
993 case 2:
994 if (nonvoid)
995 pat = GEN_FCN (icode) (target, op[0], op[1]);
996 else
997 pat = GEN_FCN (icode) (op[0], op[1]);
998 break;
999 case 3:
1000 if (nonvoid)
1001 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1002 else
1003 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1004 break;
1005 case 4:
1006 if (nonvoid)
1007 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1008 else
1009 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1010 break;
1011 case 5:
1012 if (nonvoid)
1013 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1014 else
1015 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1016 break;
1017 case 6:
1018 if (nonvoid)
1019 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1020 else
1021 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1022 break;
1023 default:
1024 gcc_unreachable ();
1026 if (!pat)
1027 return NULL_RTX;
1028 emit_insn (pat);
1030 if (nonvoid)
1031 return target;
1032 else
1033 return const0_rtx;
1037 static const int s390_hotpatch_hw_max = 1000000;
1038 static int s390_hotpatch_hw_before_label = 0;
1039 static int s390_hotpatch_hw_after_label = 0;
1041 /* Check whether the hotpatch attribute is applied to a function and, if it has
1042 an argument, the argument is valid. */
1044 static tree
1045 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1046 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1048 tree expr;
1049 tree expr2;
1050 int err;
1052 if (TREE_CODE (*node) != FUNCTION_DECL)
1054 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1055 name);
1056 *no_add_attrs = true;
1058 if (args != NULL && TREE_CHAIN (args) != NULL)
1060 expr = TREE_VALUE (args);
1061 expr2 = TREE_VALUE (TREE_CHAIN (args));
1063 if (args == NULL || TREE_CHAIN (args) == NULL)
1064 err = 1;
1065 else if (TREE_CODE (expr) != INTEGER_CST
1066 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1067 || wi::gtu_p (wi::to_wide (expr), s390_hotpatch_hw_max))
1068 err = 1;
1069 else if (TREE_CODE (expr2) != INTEGER_CST
1070 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1071 || wi::gtu_p (wi::to_wide (expr2), s390_hotpatch_hw_max))
1072 err = 1;
1073 else
1074 err = 0;
1075 if (err)
1077 error ("requested %qE attribute is not a comma separated pair of"
1078 " non-negative integer constants or too large (max. %d)", name,
1079 s390_hotpatch_hw_max);
1080 *no_add_attrs = true;
1083 return NULL_TREE;
1086 /* Expand the s390_vector_bool type attribute. */
1088 static tree
1089 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1090 tree args ATTRIBUTE_UNUSED,
1091 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1093 tree type = *node, result = NULL_TREE;
1094 machine_mode mode;
1096 while (POINTER_TYPE_P (type)
1097 || TREE_CODE (type) == FUNCTION_TYPE
1098 || TREE_CODE (type) == METHOD_TYPE
1099 || TREE_CODE (type) == ARRAY_TYPE)
1100 type = TREE_TYPE (type);
1102 mode = TYPE_MODE (type);
1103 switch (mode)
1105 case E_DImode: case E_V2DImode:
1106 result = s390_builtin_types[BT_BV2DI];
1107 break;
1108 case E_SImode: case E_V4SImode:
1109 result = s390_builtin_types[BT_BV4SI];
1110 break;
1111 case E_HImode: case E_V8HImode:
1112 result = s390_builtin_types[BT_BV8HI];
1113 break;
1114 case E_QImode: case E_V16QImode:
1115 result = s390_builtin_types[BT_BV16QI];
1116 break;
1117 default:
1118 break;
1121 *no_add_attrs = true; /* No need to hang on to the attribute. */
1123 if (result)
1124 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1126 return NULL_TREE;
1129 /* Check syntax of function decl attributes having a string type value. */
1131 static tree
1132 s390_handle_string_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1133 tree args ATTRIBUTE_UNUSED,
1134 int flags ATTRIBUTE_UNUSED,
1135 bool *no_add_attrs)
1137 tree cst;
1139 if (TREE_CODE (*node) != FUNCTION_DECL)
1141 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1142 name);
1143 *no_add_attrs = true;
1146 cst = TREE_VALUE (args);
1148 if (TREE_CODE (cst) != STRING_CST)
1150 warning (OPT_Wattributes,
1151 "%qE attribute requires a string constant argument",
1152 name);
1153 *no_add_attrs = true;
1156 if (is_attribute_p ("indirect_branch", name)
1157 || is_attribute_p ("indirect_branch_call", name)
1158 || is_attribute_p ("function_return", name)
1159 || is_attribute_p ("function_return_reg", name)
1160 || is_attribute_p ("function_return_mem", name))
1162 if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1163 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1164 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1166 warning (OPT_Wattributes,
1167 "argument to %qE attribute is not "
1168 "(keep|thunk|thunk-extern)", name);
1169 *no_add_attrs = true;
1173 if (is_attribute_p ("indirect_branch_jump", name)
1174 && strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1175 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1176 && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
1177 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1179 warning (OPT_Wattributes,
1180 "argument to %qE attribute is not "
1181 "(keep|thunk|thunk-inline|thunk-extern)", name);
1182 *no_add_attrs = true;
1185 return NULL_TREE;
1188 static const struct attribute_spec s390_attribute_table[] = {
1189 { "hotpatch", 2, 2, true, false, false, false,
1190 s390_handle_hotpatch_attribute, NULL },
1191 { "s390_vector_bool", 0, 0, false, true, false, true,
1192 s390_handle_vectorbool_attribute, NULL },
1193 { "indirect_branch", 1, 1, true, false, false, false,
1194 s390_handle_string_attribute, NULL },
1195 { "indirect_branch_jump", 1, 1, true, false, false, false,
1196 s390_handle_string_attribute, NULL },
1197 { "indirect_branch_call", 1, 1, true, false, false, false,
1198 s390_handle_string_attribute, NULL },
1199 { "function_return", 1, 1, true, false, false, false,
1200 s390_handle_string_attribute, NULL },
1201 { "function_return_reg", 1, 1, true, false, false, false,
1202 s390_handle_string_attribute, NULL },
1203 { "function_return_mem", 1, 1, true, false, false, false,
1204 s390_handle_string_attribute, NULL },
1206 /* End element. */
1207 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1210 /* Return the alignment for LABEL. We default to the -falign-labels
1211 value except for the literal pool base label. */
1213 s390_label_align (rtx_insn *label)
1215 rtx_insn *prev_insn = prev_active_insn (label);
1216 rtx set, src;
1218 if (prev_insn == NULL_RTX)
1219 goto old;
1221 set = single_set (prev_insn);
1223 if (set == NULL_RTX)
1224 goto old;
1226 src = SET_SRC (set);
1228 /* Don't align literal pool base labels. */
1229 if (GET_CODE (src) == UNSPEC
1230 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1231 return 0;
1233 old:
1234 return align_labels.levels[0].log;
1237 static GTY(()) rtx got_symbol;
1239 /* Return the GOT table symbol. The symbol will be created when the
1240 function is invoked for the first time. */
1242 static rtx
1243 s390_got_symbol (void)
1245 if (!got_symbol)
1247 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1248 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
1251 return got_symbol;
1254 static scalar_int_mode
1255 s390_libgcc_cmp_return_mode (void)
1257 return TARGET_64BIT ? DImode : SImode;
1260 static scalar_int_mode
1261 s390_libgcc_shift_count_mode (void)
1263 return TARGET_64BIT ? DImode : SImode;
1266 static scalar_int_mode
1267 s390_unwind_word_mode (void)
1269 return TARGET_64BIT ? DImode : SImode;
1272 /* Return true if the back end supports mode MODE. */
1273 static bool
1274 s390_scalar_mode_supported_p (scalar_mode mode)
1276 /* In contrast to the default implementation reject TImode constants on 31bit
1277 TARGET_ZARCH for ABI compliance. */
1278 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1279 return false;
1281 if (DECIMAL_FLOAT_MODE_P (mode))
1282 return default_decimal_float_supported_p ();
1284 return default_scalar_mode_supported_p (mode);
1287 /* Return true if the back end supports vector mode MODE. */
1288 static bool
1289 s390_vector_mode_supported_p (machine_mode mode)
1291 machine_mode inner;
1293 if (!VECTOR_MODE_P (mode)
1294 || !TARGET_VX
1295 || GET_MODE_SIZE (mode) > 16)
1296 return false;
1298 inner = GET_MODE_INNER (mode);
1300 switch (inner)
1302 case E_QImode:
1303 case E_HImode:
1304 case E_SImode:
1305 case E_DImode:
1306 case E_TImode:
1307 case E_SFmode:
1308 case E_DFmode:
1309 case E_TFmode:
1310 return true;
1311 default:
1312 return false;
1316 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1318 void
1319 s390_set_has_landing_pad_p (bool value)
1321 cfun->machine->has_landing_pad_p = value;
1324 /* If two condition code modes are compatible, return a condition code
1325 mode which is compatible with both. Otherwise, return
1326 VOIDmode. */
1328 static machine_mode
1329 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1331 if (m1 == m2)
1332 return m1;
1334 switch (m1)
1336 case E_CCZmode:
1337 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1338 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1339 return m2;
1340 return VOIDmode;
1342 case E_CCSmode:
1343 case E_CCUmode:
1344 case E_CCTmode:
1345 case E_CCSRmode:
1346 case E_CCURmode:
1347 case E_CCZ1mode:
1348 if (m2 == CCZmode)
1349 return m1;
1351 return VOIDmode;
1353 default:
1354 return VOIDmode;
1356 return VOIDmode;
1359 /* Return true if SET either doesn't set the CC register, or else
1360 the source and destination have matching CC modes and that
1361 CC mode is at least as constrained as REQ_MODE. */
1363 static bool
1364 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1366 machine_mode set_mode;
1368 gcc_assert (GET_CODE (set) == SET);
1370 /* These modes are supposed to be used only in CC consumer
1371 patterns. */
1372 gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
1373 && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
1375 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1376 return 1;
1378 set_mode = GET_MODE (SET_DEST (set));
1379 switch (set_mode)
1381 case E_CCZ1mode:
1382 case E_CCSmode:
1383 case E_CCSRmode:
1384 case E_CCSFPSmode:
1385 case E_CCUmode:
1386 case E_CCURmode:
1387 case E_CCOmode:
1388 case E_CCLmode:
1389 case E_CCL1mode:
1390 case E_CCL2mode:
1391 case E_CCL3mode:
1392 case E_CCT1mode:
1393 case E_CCT2mode:
1394 case E_CCT3mode:
1395 case E_CCVEQmode:
1396 case E_CCVIHmode:
1397 case E_CCVIHUmode:
1398 case E_CCVFHmode:
1399 case E_CCVFHEmode:
1400 if (req_mode != set_mode)
1401 return 0;
1402 break;
1404 case E_CCZmode:
1405 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1406 && req_mode != CCSRmode && req_mode != CCURmode
1407 && req_mode != CCZ1mode)
1408 return 0;
1409 break;
1411 case E_CCAPmode:
1412 case E_CCANmode:
1413 if (req_mode != CCAmode)
1414 return 0;
1415 break;
1417 default:
1418 gcc_unreachable ();
1421 return (GET_MODE (SET_SRC (set)) == set_mode);
1424 /* Return true if every SET in INSN that sets the CC register
1425 has source and destination with matching CC modes and that
1426 CC mode is at least as constrained as REQ_MODE.
1427 If REQ_MODE is VOIDmode, always return false. */
1429 bool
1430 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1432 int i;
1434 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1435 if (req_mode == VOIDmode)
1436 return false;
1438 if (GET_CODE (PATTERN (insn)) == SET)
1439 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1441 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1442 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1444 rtx set = XVECEXP (PATTERN (insn), 0, i);
1445 if (GET_CODE (set) == SET)
1446 if (!s390_match_ccmode_set (set, req_mode))
1447 return false;
1450 return true;
1453 /* If a test-under-mask instruction can be used to implement
1454 (compare (and ... OP1) OP2), return the CC mode required
1455 to do that. Otherwise, return VOIDmode.
1456 MIXED is true if the instruction can distinguish between
1457 CC1 and CC2 for mixed selected bits (TMxx), it is false
1458 if the instruction cannot (TM). */
1460 machine_mode
1461 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1463 int bit0, bit1;
1465 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1466 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1467 return VOIDmode;
1469 /* Selected bits all zero: CC0.
1470 e.g.: int a; if ((a & (16 + 128)) == 0) */
1471 if (INTVAL (op2) == 0)
1472 return CCTmode;
1474 /* Selected bits all one: CC3.
1475 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1476 if (INTVAL (op2) == INTVAL (op1))
1477 return CCT3mode;
1479 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1480 int a;
1481 if ((a & (16 + 128)) == 16) -> CCT1
1482 if ((a & (16 + 128)) == 128) -> CCT2 */
1483 if (mixed)
1485 bit1 = exact_log2 (INTVAL (op2));
1486 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1487 if (bit0 != -1 && bit1 != -1)
1488 return bit0 > bit1 ? CCT1mode : CCT2mode;
1491 return VOIDmode;
1494 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1495 OP0 and OP1 of a COMPARE, return the mode to be used for the
1496 comparison. */
1498 machine_mode
1499 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1501 switch (code)
1503 case EQ:
1504 case NE:
1505 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1506 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1507 return CCAPmode;
1508 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1509 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1510 return CCAPmode;
1511 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1512 || GET_CODE (op1) == NEG)
1513 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1514 return CCLmode;
1516 if (GET_CODE (op0) == AND)
1518 /* Check whether we can potentially do it via TM. */
1519 machine_mode ccmode;
1520 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1521 if (ccmode != VOIDmode)
1523 /* Relax CCTmode to CCZmode to allow fall-back to AND
1524 if that turns out to be beneficial. */
1525 return ccmode == CCTmode ? CCZmode : ccmode;
1529 if (register_operand (op0, HImode)
1530 && GET_CODE (op1) == CONST_INT
1531 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1532 return CCT3mode;
1533 if (register_operand (op0, QImode)
1534 && GET_CODE (op1) == CONST_INT
1535 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1536 return CCT3mode;
1538 return CCZmode;
1540 case LE:
1541 case LT:
1542 case GE:
1543 case GT:
1544 /* The only overflow condition of NEG and ABS happens when
1545 -INT_MAX is used as parameter, which stays negative. So
1546 we have an overflow from a positive value to a negative.
1547 Using CCAP mode the resulting cc can be used for comparisons. */
1548 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1549 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1550 return CCAPmode;
1552 /* If constants are involved in an add instruction it is possible to use
1553 the resulting cc for comparisons with zero. Knowing the sign of the
1554 constant the overflow behavior gets predictable. e.g.:
1555 int a, b; if ((b = a + c) > 0)
1556 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1557 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1558 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1559 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1560 /* Avoid INT32_MIN on 32 bit. */
1561 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1563 if (INTVAL (XEXP((op0), 1)) < 0)
1564 return CCANmode;
1565 else
1566 return CCAPmode;
1569 /* Fall through. */
1570 case LTGT:
1571 if (HONOR_NANS (op0) || HONOR_NANS (op1))
1572 return CCSFPSmode;
1574 /* Fall through. */
1575 case UNORDERED:
1576 case ORDERED:
1577 case UNEQ:
1578 case UNLE:
1579 case UNLT:
1580 case UNGE:
1581 case UNGT:
1582 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1583 && GET_CODE (op1) != CONST_INT)
1584 return CCSRmode;
1585 return CCSmode;
1587 case LTU:
1588 case GEU:
1589 if (GET_CODE (op0) == PLUS
1590 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1591 return CCL1mode;
1593 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1594 && GET_CODE (op1) != CONST_INT)
1595 return CCURmode;
1596 return CCUmode;
1598 case LEU:
1599 case GTU:
1600 if (GET_CODE (op0) == MINUS
1601 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1602 return CCL2mode;
1604 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1605 && GET_CODE (op1) != CONST_INT)
1606 return CCURmode;
1607 return CCUmode;
1609 default:
1610 gcc_unreachable ();
1614 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1615 that we can implement more efficiently. */
1617 static void
1618 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1619 bool op0_preserve_value)
1621 if (op0_preserve_value)
1622 return;
1624 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1625 if ((*code == EQ || *code == NE)
1626 && *op1 == const0_rtx
1627 && GET_CODE (*op0) == ZERO_EXTRACT
1628 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1629 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1630 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1632 rtx inner = XEXP (*op0, 0);
1633 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1634 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1635 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1637 if (len > 0 && len < modesize
1638 && pos >= 0 && pos + len <= modesize
1639 && modesize <= HOST_BITS_PER_WIDE_INT)
1641 unsigned HOST_WIDE_INT block;
1642 block = (HOST_WIDE_INT_1U << len) - 1;
1643 block <<= modesize - pos - len;
1645 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1646 gen_int_mode (block, GET_MODE (inner)));
1650 /* Narrow AND of memory against immediate to enable TM. */
1651 if ((*code == EQ || *code == NE)
1652 && *op1 == const0_rtx
1653 && GET_CODE (*op0) == AND
1654 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1655 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1657 rtx inner = XEXP (*op0, 0);
1658 rtx mask = XEXP (*op0, 1);
1660 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1661 if (GET_CODE (inner) == SUBREG
1662 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1663 && (GET_MODE_SIZE (GET_MODE (inner))
1664 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1665 && ((INTVAL (mask)
1666 & GET_MODE_MASK (GET_MODE (inner))
1667 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1668 == 0))
1669 inner = SUBREG_REG (inner);
1671 /* Do not change volatile MEMs. */
1672 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1674 int part = s390_single_part (XEXP (*op0, 1),
1675 GET_MODE (inner), QImode, 0);
1676 if (part >= 0)
1678 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1679 inner = adjust_address_nv (inner, QImode, part);
1680 *op0 = gen_rtx_AND (QImode, inner, mask);
1685 /* Narrow comparisons against 0xffff to HImode if possible. */
1686 if ((*code == EQ || *code == NE)
1687 && GET_CODE (*op1) == CONST_INT
1688 && INTVAL (*op1) == 0xffff
1689 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1690 && (nonzero_bits (*op0, GET_MODE (*op0))
1691 & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1693 *op0 = gen_lowpart (HImode, *op0);
1694 *op1 = constm1_rtx;
1697 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1698 if (GET_CODE (*op0) == UNSPEC
1699 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1700 && XVECLEN (*op0, 0) == 1
1701 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1702 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1703 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1704 && *op1 == const0_rtx)
1706 enum rtx_code new_code = UNKNOWN;
1707 switch (*code)
1709 case EQ: new_code = EQ; break;
1710 case NE: new_code = NE; break;
1711 case LT: new_code = GTU; break;
1712 case GT: new_code = LTU; break;
1713 case LE: new_code = GEU; break;
1714 case GE: new_code = LEU; break;
1715 default: break;
1718 if (new_code != UNKNOWN)
1720 *op0 = XVECEXP (*op0, 0, 0);
1721 *code = new_code;
1725 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1726 if (GET_CODE (*op0) == UNSPEC
1727 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1728 && XVECLEN (*op0, 0) == 1
1729 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1730 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1731 && CONST_INT_P (*op1))
1733 enum rtx_code new_code = UNKNOWN;
1734 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1736 case E_CCZmode:
1737 case E_CCRAWmode:
1738 switch (*code)
1740 case EQ: new_code = EQ; break;
1741 case NE: new_code = NE; break;
1742 default: break;
1744 break;
1745 default: break;
1748 if (new_code != UNKNOWN)
1750 /* For CCRAWmode put the required cc mask into the second
1751 operand. */
1752 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1753 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1754 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1755 *op0 = XVECEXP (*op0, 0, 0);
1756 *code = new_code;
1760 /* Simplify cascaded EQ, NE with const0_rtx. */
1761 if ((*code == NE || *code == EQ)
1762 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1763 && GET_MODE (*op0) == SImode
1764 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1765 && REG_P (XEXP (*op0, 0))
1766 && XEXP (*op0, 1) == const0_rtx
1767 && *op1 == const0_rtx)
1769 if ((*code == EQ && GET_CODE (*op0) == NE)
1770 || (*code == NE && GET_CODE (*op0) == EQ))
1771 *code = EQ;
1772 else
1773 *code = NE;
1774 *op0 = XEXP (*op0, 0);
1777 /* Prefer register over memory as first operand. */
1778 if (MEM_P (*op0) && REG_P (*op1))
1780 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1781 *code = (int)swap_condition ((enum rtx_code)*code);
1784 /* A comparison result is compared against zero. Replace it with
1785 the (perhaps inverted) original comparison.
1786 This probably should be done by simplify_relational_operation. */
1787 if ((*code == EQ || *code == NE)
1788 && *op1 == const0_rtx
1789 && COMPARISON_P (*op0)
1790 && CC_REG_P (XEXP (*op0, 0)))
1792 enum rtx_code new_code;
1794 if (*code == EQ)
1795 new_code = reversed_comparison_code_parts (GET_CODE (*op0),
1796 XEXP (*op0, 0),
1797 XEXP (*op0, 1), NULL);
1798 else
1799 new_code = GET_CODE (*op0);
1801 if (new_code != UNKNOWN)
1803 *code = new_code;
1804 *op1 = XEXP (*op0, 1);
1805 *op0 = XEXP (*op0, 0);
1809 /* ~a==b -> ~(a^b)==0 ~a!=b -> ~(a^b)!=0 */
1810 if (TARGET_Z15
1811 && (*code == EQ || *code == NE)
1812 && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
1813 && GET_CODE (*op0) == NOT)
1815 machine_mode mode = GET_MODE (*op0);
1816 *op0 = gen_rtx_XOR (mode, XEXP (*op0, 0), *op1);
1817 *op0 = gen_rtx_NOT (mode, *op0);
1818 *op1 = const0_rtx;
1821 /* a&b == -1 -> ~a|~b == 0 a|b == -1 -> ~a&~b == 0 */
1822 if (TARGET_Z15
1823 && (*code == EQ || *code == NE)
1824 && (GET_CODE (*op0) == AND || GET_CODE (*op0) == IOR)
1825 && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
1826 && CONST_INT_P (*op1)
1827 && *op1 == constm1_rtx)
1829 machine_mode mode = GET_MODE (*op0);
1830 rtx op00 = gen_rtx_NOT (mode, XEXP (*op0, 0));
1831 rtx op01 = gen_rtx_NOT (mode, XEXP (*op0, 1));
1833 if (GET_CODE (*op0) == AND)
1834 *op0 = gen_rtx_IOR (mode, op00, op01);
1835 else
1836 *op0 = gen_rtx_AND (mode, op00, op01);
1838 *op1 = const0_rtx;
1843 /* Emit a compare instruction suitable to implement the comparison
1844 OP0 CODE OP1. Return the correct condition RTL to be placed in
1845 the IF_THEN_ELSE of the conditional branch testing the result. */
1848 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1850 machine_mode mode = s390_select_ccmode (code, op0, op1);
1851 rtx cc;
1853 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1855 /* Do not output a redundant compare instruction if a
1856 compare_and_swap pattern already computed the result and the
1857 machine modes are compatible. */
1858 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1859 == GET_MODE (op0));
1860 cc = op0;
1862 else
1864 cc = gen_rtx_REG (mode, CC_REGNUM);
1865 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1868 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1871 /* If MEM is not a legitimate compare-and-swap memory operand, return a new
1872 MEM, whose address is a pseudo containing the original MEM's address. */
1874 static rtx
1875 s390_legitimize_cs_operand (rtx mem)
1877 rtx tmp;
1879 if (!contains_symbol_ref_p (mem))
1880 return mem;
1881 tmp = gen_reg_rtx (Pmode);
1882 emit_move_insn (tmp, copy_rtx (XEXP (mem, 0)));
1883 return change_address (mem, VOIDmode, tmp);
1886 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1887 matches CMP.
1888 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1889 conditional branch testing the result. */
1891 static rtx
1892 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1893 rtx cmp, rtx new_rtx, machine_mode ccmode)
1895 rtx cc;
1897 mem = s390_legitimize_cs_operand (mem);
1898 cc = gen_rtx_REG (ccmode, CC_REGNUM);
1899 switch (GET_MODE (mem))
1901 case E_SImode:
1902 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp,
1903 new_rtx, cc));
1904 break;
1905 case E_DImode:
1906 emit_insn (gen_atomic_compare_and_swapdi_internal (old, mem, cmp,
1907 new_rtx, cc));
1908 break;
1909 case E_TImode:
1910 emit_insn (gen_atomic_compare_and_swapti_internal (old, mem, cmp,
1911 new_rtx, cc));
1912 break;
1913 case E_QImode:
1914 case E_HImode:
1915 default:
1916 gcc_unreachable ();
1918 return s390_emit_compare (code, cc, const0_rtx);
1921 /* Emit a jump instruction to TARGET and return it. If COND is
1922 NULL_RTX, emit an unconditional jump, else a conditional jump under
1923 condition COND. */
1925 rtx_insn *
1926 s390_emit_jump (rtx target, rtx cond)
1928 rtx insn;
1930 target = gen_rtx_LABEL_REF (VOIDmode, target);
1931 if (cond)
1932 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1934 insn = gen_rtx_SET (pc_rtx, target);
1935 return emit_jump_insn (insn);
1938 /* Return branch condition mask to implement a branch
1939 specified by CODE. Return -1 for invalid comparisons. */
1942 s390_branch_condition_mask (rtx code)
1944 const int CC0 = 1 << 3;
1945 const int CC1 = 1 << 2;
1946 const int CC2 = 1 << 1;
1947 const int CC3 = 1 << 0;
1949 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1950 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1951 gcc_assert (XEXP (code, 1) == const0_rtx
1952 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1953 && CONST_INT_P (XEXP (code, 1))));
1956 switch (GET_MODE (XEXP (code, 0)))
1958 case E_CCZmode:
1959 case E_CCZ1mode:
1960 switch (GET_CODE (code))
1962 case EQ: return CC0;
1963 case NE: return CC1 | CC2 | CC3;
1964 default: return -1;
1966 break;
1968 case E_CCT1mode:
1969 switch (GET_CODE (code))
1971 case EQ: return CC1;
1972 case NE: return CC0 | CC2 | CC3;
1973 default: return -1;
1975 break;
1977 case E_CCT2mode:
1978 switch (GET_CODE (code))
1980 case EQ: return CC2;
1981 case NE: return CC0 | CC1 | CC3;
1982 default: return -1;
1984 break;
1986 case E_CCT3mode:
1987 switch (GET_CODE (code))
1989 case EQ: return CC3;
1990 case NE: return CC0 | CC1 | CC2;
1991 default: return -1;
1993 break;
1995 case E_CCLmode:
1996 switch (GET_CODE (code))
1998 case EQ: return CC0 | CC2;
1999 case NE: return CC1 | CC3;
2000 default: return -1;
2002 break;
2004 case E_CCL1mode:
2005 switch (GET_CODE (code))
2007 case LTU: return CC2 | CC3; /* carry */
2008 case GEU: return CC0 | CC1; /* no carry */
2009 default: return -1;
2011 break;
2013 case E_CCL2mode:
2014 switch (GET_CODE (code))
2016 case GTU: return CC0 | CC1; /* borrow */
2017 case LEU: return CC2 | CC3; /* no borrow */
2018 default: return -1;
2020 break;
2022 case E_CCL3mode:
2023 switch (GET_CODE (code))
2025 case EQ: return CC0 | CC2;
2026 case NE: return CC1 | CC3;
2027 case LTU: return CC1;
2028 case GTU: return CC3;
2029 case LEU: return CC1 | CC2;
2030 case GEU: return CC2 | CC3;
2031 default: return -1;
2034 case E_CCUmode:
2035 switch (GET_CODE (code))
2037 case EQ: return CC0;
2038 case NE: return CC1 | CC2 | CC3;
2039 case LTU: return CC1;
2040 case GTU: return CC2;
2041 case LEU: return CC0 | CC1;
2042 case GEU: return CC0 | CC2;
2043 default: return -1;
2045 break;
2047 case E_CCURmode:
2048 switch (GET_CODE (code))
2050 case EQ: return CC0;
2051 case NE: return CC2 | CC1 | CC3;
2052 case LTU: return CC2;
2053 case GTU: return CC1;
2054 case LEU: return CC0 | CC2;
2055 case GEU: return CC0 | CC1;
2056 default: return -1;
2058 break;
2060 case E_CCAPmode:
2061 switch (GET_CODE (code))
2063 case EQ: return CC0;
2064 case NE: return CC1 | CC2 | CC3;
2065 case LT: return CC1 | CC3;
2066 case GT: return CC2;
2067 case LE: return CC0 | CC1 | CC3;
2068 case GE: return CC0 | CC2;
2069 default: return -1;
2071 break;
2073 case E_CCANmode:
2074 switch (GET_CODE (code))
2076 case EQ: return CC0;
2077 case NE: return CC1 | CC2 | CC3;
2078 case LT: return CC1;
2079 case GT: return CC2 | CC3;
2080 case LE: return CC0 | CC1;
2081 case GE: return CC0 | CC2 | CC3;
2082 default: return -1;
2084 break;
2086 case E_CCOmode:
2087 switch (GET_CODE (code))
2089 case EQ: return CC0 | CC1 | CC2;
2090 case NE: return CC3;
2091 default: return -1;
2093 break;
2095 case E_CCSmode:
2096 case E_CCSFPSmode:
2097 switch (GET_CODE (code))
2099 case EQ: return CC0;
2100 case NE: return CC1 | CC2 | CC3;
2101 case LT: return CC1;
2102 case GT: return CC2;
2103 case LE: return CC0 | CC1;
2104 case GE: return CC0 | CC2;
2105 case UNORDERED: return CC3;
2106 case ORDERED: return CC0 | CC1 | CC2;
2107 case UNEQ: return CC0 | CC3;
2108 case UNLT: return CC1 | CC3;
2109 case UNGT: return CC2 | CC3;
2110 case UNLE: return CC0 | CC1 | CC3;
2111 case UNGE: return CC0 | CC2 | CC3;
2112 case LTGT: return CC1 | CC2;
2113 default: return -1;
2115 break;
2117 case E_CCSRmode:
2118 switch (GET_CODE (code))
2120 case EQ: return CC0;
2121 case NE: return CC2 | CC1 | CC3;
2122 case LT: return CC2;
2123 case GT: return CC1;
2124 case LE: return CC0 | CC2;
2125 case GE: return CC0 | CC1;
2126 case UNORDERED: return CC3;
2127 case ORDERED: return CC0 | CC2 | CC1;
2128 case UNEQ: return CC0 | CC3;
2129 case UNLT: return CC2 | CC3;
2130 case UNGT: return CC1 | CC3;
2131 case UNLE: return CC0 | CC2 | CC3;
2132 case UNGE: return CC0 | CC1 | CC3;
2133 case LTGT: return CC2 | CC1;
2134 default: return -1;
2136 break;
2138 /* Vector comparison modes. */
2139 /* CC2 will never be set. It however is part of the negated
2140 masks. */
2141 case E_CCVIALLmode:
2142 switch (GET_CODE (code))
2144 case EQ:
2145 case GTU:
2146 case GT:
2147 case GE: return CC0;
2148 /* The inverted modes are in fact *any* modes. */
2149 case NE:
2150 case LEU:
2151 case LE:
2152 case LT: return CC3 | CC1 | CC2;
2153 default: return -1;
2156 case E_CCVIANYmode:
2157 switch (GET_CODE (code))
2159 case EQ:
2160 case GTU:
2161 case GT:
2162 case GE: return CC0 | CC1;
2163 /* The inverted modes are in fact *all* modes. */
2164 case NE:
2165 case LEU:
2166 case LE:
2167 case LT: return CC3 | CC2;
2168 default: return -1;
2170 case E_CCVFALLmode:
2171 switch (GET_CODE (code))
2173 case EQ:
2174 case GT:
2175 case GE: return CC0;
2176 /* The inverted modes are in fact *any* modes. */
2177 case NE:
2178 case UNLE:
2179 case UNLT: return CC3 | CC1 | CC2;
2180 default: return -1;
2183 case E_CCVFANYmode:
2184 switch (GET_CODE (code))
2186 case EQ:
2187 case GT:
2188 case GE: return CC0 | CC1;
2189 /* The inverted modes are in fact *all* modes. */
2190 case NE:
2191 case UNLE:
2192 case UNLT: return CC3 | CC2;
2193 default: return -1;
2196 case E_CCRAWmode:
2197 switch (GET_CODE (code))
2199 case EQ:
2200 return INTVAL (XEXP (code, 1));
2201 case NE:
2202 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2203 default:
2204 gcc_unreachable ();
2207 default:
2208 return -1;
2213 /* Return branch condition mask to implement a compare and branch
2214 specified by CODE. Return -1 for invalid comparisons. */
2217 s390_compare_and_branch_condition_mask (rtx code)
2219 const int CC0 = 1 << 3;
2220 const int CC1 = 1 << 2;
2221 const int CC2 = 1 << 1;
2223 switch (GET_CODE (code))
2225 case EQ:
2226 return CC0;
2227 case NE:
2228 return CC1 | CC2;
2229 case LT:
2230 case LTU:
2231 return CC1;
2232 case GT:
2233 case GTU:
2234 return CC2;
2235 case LE:
2236 case LEU:
2237 return CC0 | CC1;
2238 case GE:
2239 case GEU:
2240 return CC0 | CC2;
2241 default:
2242 gcc_unreachable ();
2244 return -1;
2247 /* If INV is false, return assembler mnemonic string to implement
2248 a branch specified by CODE. If INV is true, return mnemonic
2249 for the corresponding inverted branch. */
2251 static const char *
2252 s390_branch_condition_mnemonic (rtx code, int inv)
2254 int mask;
2256 static const char *const mnemonic[16] =
2258 NULL, "o", "h", "nle",
2259 "l", "nhe", "lh", "ne",
2260 "e", "nlh", "he", "nl",
2261 "le", "nh", "no", NULL
2264 if (GET_CODE (XEXP (code, 0)) == REG
2265 && REGNO (XEXP (code, 0)) == CC_REGNUM
2266 && (XEXP (code, 1) == const0_rtx
2267 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2268 && CONST_INT_P (XEXP (code, 1)))))
2269 mask = s390_branch_condition_mask (code);
2270 else
2271 mask = s390_compare_and_branch_condition_mask (code);
2273 gcc_assert (mask >= 0);
2275 if (inv)
2276 mask ^= 15;
2278 gcc_assert (mask >= 1 && mask <= 14);
2280 return mnemonic[mask];
2283 /* Return the part of op which has a value different from def.
2284 The size of the part is determined by mode.
2285 Use this function only if you already know that op really
2286 contains such a part. */
2288 unsigned HOST_WIDE_INT
2289 s390_extract_part (rtx op, machine_mode mode, int def)
2291 unsigned HOST_WIDE_INT value = 0;
2292 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2293 int part_bits = GET_MODE_BITSIZE (mode);
2294 unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1;
2295 int i;
2297 for (i = 0; i < max_parts; i++)
2299 if (i == 0)
2300 value = UINTVAL (op);
2301 else
2302 value >>= part_bits;
2304 if ((value & part_mask) != (def & part_mask))
2305 return value & part_mask;
2308 gcc_unreachable ();
2311 /* If OP is an integer constant of mode MODE with exactly one
2312 part of mode PART_MODE unequal to DEF, return the number of that
2313 part. Otherwise, return -1. */
2316 s390_single_part (rtx op,
2317 machine_mode mode,
2318 machine_mode part_mode,
2319 int def)
2321 unsigned HOST_WIDE_INT value = 0;
2322 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2323 unsigned HOST_WIDE_INT part_mask
2324 = (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1;
2325 int i, part = -1;
2327 if (GET_CODE (op) != CONST_INT)
2328 return -1;
2330 for (i = 0; i < n_parts; i++)
2332 if (i == 0)
2333 value = UINTVAL (op);
2334 else
2335 value >>= GET_MODE_BITSIZE (part_mode);
2337 if ((value & part_mask) != (def & part_mask))
2339 if (part != -1)
2340 return -1;
2341 else
2342 part = i;
2345 return part == -1 ? -1 : n_parts - 1 - part;
2348 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2349 bits and no other bits are set in (the lower SIZE bits of) IN.
2351 PSTART and PEND can be used to obtain the start and end
2352 position (inclusive) of the bitfield relative to 64
2353 bits. *PSTART / *PEND gives the position of the first/last bit
2354 of the bitfield counting from the highest order bit starting
2355 with zero. */
2357 bool
2358 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2359 int *pstart, int *pend)
2361 int start;
2362 int end = -1;
2363 int lowbit = HOST_BITS_PER_WIDE_INT - 1;
2364 int highbit = HOST_BITS_PER_WIDE_INT - size;
2365 unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U;
2367 gcc_assert (!!pstart == !!pend);
2368 for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2369 if (end == -1)
2371 /* Look for the rightmost bit of a contiguous range of ones. */
2372 if (bitmask & in)
2373 /* Found it. */
2374 end = start;
2376 else
2378 /* Look for the firt zero bit after the range of ones. */
2379 if (! (bitmask & in))
2380 /* Found it. */
2381 break;
2383 /* We're one past the last one-bit. */
2384 start++;
2386 if (end == -1)
2387 /* No one bits found. */
2388 return false;
2390 if (start > highbit)
2392 unsigned HOST_WIDE_INT mask;
2394 /* Calculate a mask for all bits beyond the contiguous bits. */
2395 mask = ((~HOST_WIDE_INT_0U >> highbit)
2396 & (~HOST_WIDE_INT_0U << (lowbit - start + 1)));
2397 if (mask & in)
2398 /* There are more bits set beyond the first range of one bits. */
2399 return false;
2402 if (pstart)
2404 *pstart = start;
2405 *pend = end;
2408 return true;
2411 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2412 if ~IN contains a contiguous bitfield. In that case, *END is <
2413 *START.
2415 If WRAP_P is true, a bitmask that wraps around is also tested.
2416 When a wraparoud occurs *START is greater than *END (in
2417 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2418 part of the range. If WRAP_P is false, no wraparound is
2419 tested. */
2421 bool
2422 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2423 int size, int *start, int *end)
2425 int bs = HOST_BITS_PER_WIDE_INT;
2426 bool b;
2428 gcc_assert (!!start == !!end);
2429 if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0)
2430 /* This cannot be expressed as a contiguous bitmask. Exit early because
2431 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2432 a valid bitmask. */
2433 return false;
2434 b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2435 if (b)
2436 return true;
2437 if (! wrap_p)
2438 return false;
2439 b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2440 if (b && start)
2442 int s = *start;
2443 int e = *end;
2445 gcc_assert (s >= 1);
2446 *start = ((e + 1) & (bs - 1));
2447 *end = ((s - 1 + bs) & (bs - 1));
2450 return b;
2453 /* Return true if OP contains the same contiguous bitfield in *all*
2454 its elements. START and END can be used to obtain the start and
2455 end position of the bitfield.
2457 START/STOP give the position of the first/last bit of the bitfield
2458 counting from the lowest order bit starting with zero. In order to
2459 use these values for S/390 instructions this has to be converted to
2460 "bits big endian" style. */
2462 bool
2463 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2465 unsigned HOST_WIDE_INT mask;
2466 int size;
2467 rtx elt;
2468 bool b;
2470 /* Handle floats by bitcasting them to ints. */
2471 op = gen_lowpart (related_int_vector_mode (GET_MODE (op)).require (), op);
2473 gcc_assert (!!start == !!end);
2474 if (!const_vec_duplicate_p (op, &elt)
2475 || !CONST_INT_P (elt))
2476 return false;
2478 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2480 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2481 if (size > 64)
2482 return false;
2484 mask = UINTVAL (elt);
2486 b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2487 if (b)
2489 if (start)
2491 *start -= (HOST_BITS_PER_WIDE_INT - size);
2492 *end -= (HOST_BITS_PER_WIDE_INT - size);
2494 return true;
2496 else
2497 return false;
2500 /* Return true if C consists only of byte chunks being either 0 or
2501 0xff. If MASK is !=NULL a byte mask is generated which is
2502 appropriate for the vector generate byte mask instruction. */
2504 bool
2505 s390_bytemask_vector_p (rtx op, unsigned *mask)
2507 int i;
2508 unsigned tmp_mask = 0;
2509 int nunit, unit_size;
2511 if (!VECTOR_MODE_P (GET_MODE (op))
2512 || GET_CODE (op) != CONST_VECTOR
2513 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2514 return false;
2516 nunit = GET_MODE_NUNITS (GET_MODE (op));
2517 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2519 for (i = 0; i < nunit; i++)
2521 unsigned HOST_WIDE_INT c;
2522 int j;
2524 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2525 return false;
2527 c = UINTVAL (XVECEXP (op, 0, i));
2528 for (j = 0; j < unit_size; j++)
2530 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2531 return false;
2532 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2533 c = c >> BITS_PER_UNIT;
2537 if (mask != NULL)
2538 *mask = tmp_mask;
2540 return true;
2543 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2544 equivalent to a shift followed by the AND. In particular, CONTIG
2545 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2546 for ROTL indicate a rotate to the right. */
2548 bool
2549 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2551 int start, end;
2552 bool ok;
2554 ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2555 gcc_assert (ok);
2557 if (rotl >= 0)
2558 return (64 - end >= rotl);
2559 else
2561 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2562 DIMode. */
2563 rotl = -rotl + (64 - bitsize);
2564 return (start >= rotl);
2568 /* Check whether we can (and want to) split a double-word
2569 move in mode MODE from SRC to DST into two single-word
2570 moves, moving the subword FIRST_SUBWORD first. */
2572 bool
2573 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2575 /* Floating point and vector registers cannot be split. */
2576 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2577 return false;
2579 /* Non-offsettable memory references cannot be split. */
2580 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2581 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2582 return false;
2584 /* Moving the first subword must not clobber a register
2585 needed to move the second subword. */
2586 if (register_operand (dst, mode))
2588 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2589 if (reg_overlap_mentioned_p (subreg, src))
2590 return false;
2593 return true;
2596 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2597 and [MEM2, MEM2 + SIZE] do overlap and false
2598 otherwise. */
2600 bool
2601 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2603 rtx addr1, addr2, addr_delta;
2604 HOST_WIDE_INT delta;
2606 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2607 return true;
2609 if (size == 0)
2610 return false;
2612 addr1 = XEXP (mem1, 0);
2613 addr2 = XEXP (mem2, 0);
2615 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2617 /* This overlapping check is used by peepholes merging memory block operations.
2618 Overlapping operations would otherwise be recognized by the S/390 hardware
2619 and would fall back to a slower implementation. Allowing overlapping
2620 operations would lead to slow code but not to wrong code. Therefore we are
2621 somewhat optimistic if we cannot prove that the memory blocks are
2622 overlapping.
2623 That's why we return false here although this may accept operations on
2624 overlapping memory areas. */
2625 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2626 return false;
2628 delta = INTVAL (addr_delta);
2630 if (delta == 0
2631 || (delta > 0 && delta < size)
2632 || (delta < 0 && -delta < size))
2633 return true;
2635 return false;
2638 /* Check whether the address of memory reference MEM2 equals exactly
2639 the address of memory reference MEM1 plus DELTA. Return true if
2640 we can prove this to be the case, false otherwise. */
2642 bool
2643 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2645 rtx addr1, addr2, addr_delta;
2647 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2648 return false;
2650 addr1 = XEXP (mem1, 0);
2651 addr2 = XEXP (mem2, 0);
2653 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2654 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2655 return false;
2657 return true;
2660 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2662 void
2663 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2664 rtx *operands)
2666 machine_mode wmode = mode;
2667 rtx dst = operands[0];
2668 rtx src1 = operands[1];
2669 rtx src2 = operands[2];
2670 rtx op, clob, tem;
2672 /* If we cannot handle the operation directly, use a temp register. */
2673 if (!s390_logical_operator_ok_p (operands))
2674 dst = gen_reg_rtx (mode);
2676 /* QImode and HImode patterns make sense only if we have a destination
2677 in memory. Otherwise perform the operation in SImode. */
2678 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2679 wmode = SImode;
2681 /* Widen operands if required. */
2682 if (mode != wmode)
2684 if (GET_CODE (dst) == SUBREG
2685 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2686 dst = tem;
2687 else if (REG_P (dst))
2688 dst = gen_rtx_SUBREG (wmode, dst, 0);
2689 else
2690 dst = gen_reg_rtx (wmode);
2692 if (GET_CODE (src1) == SUBREG
2693 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2694 src1 = tem;
2695 else if (GET_MODE (src1) != VOIDmode)
2696 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2698 if (GET_CODE (src2) == SUBREG
2699 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2700 src2 = tem;
2701 else if (GET_MODE (src2) != VOIDmode)
2702 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2705 /* Emit the instruction. */
2706 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2707 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2708 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2710 /* Fix up the destination if needed. */
2711 if (dst != operands[0])
2712 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2715 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2717 bool
2718 s390_logical_operator_ok_p (rtx *operands)
2720 /* If the destination operand is in memory, it needs to coincide
2721 with one of the source operands. After reload, it has to be
2722 the first source operand. */
2723 if (GET_CODE (operands[0]) == MEM)
2724 return rtx_equal_p (operands[0], operands[1])
2725 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2727 return true;
2730 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2731 operand IMMOP to switch from SS to SI type instructions. */
2733 void
2734 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2736 int def = code == AND ? -1 : 0;
2737 HOST_WIDE_INT mask;
2738 int part;
2740 gcc_assert (GET_CODE (*memop) == MEM);
2741 gcc_assert (!MEM_VOLATILE_P (*memop));
2743 mask = s390_extract_part (*immop, QImode, def);
2744 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2745 gcc_assert (part >= 0);
2747 *memop = adjust_address (*memop, QImode, part);
2748 *immop = gen_int_mode (mask, QImode);
2752 /* How to allocate a 'struct machine_function'. */
2754 static struct machine_function *
2755 s390_init_machine_status (void)
2757 return ggc_cleared_alloc<machine_function> ();
2760 /* Map for smallest class containing reg regno. */
2762 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2763 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2764 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2765 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2766 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2767 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2768 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2769 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2770 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2771 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2772 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2773 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2774 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2775 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2776 VEC_REGS, VEC_REGS /* 52 */
2779 /* Return attribute type of insn. */
2781 static enum attr_type
2782 s390_safe_attr_type (rtx_insn *insn)
2784 if (recog_memoized (insn) >= 0)
2785 return get_attr_type (insn);
2786 else
2787 return TYPE_NONE;
2790 /* Return attribute relative_long of insn. */
2792 static bool
2793 s390_safe_relative_long_p (rtx_insn *insn)
2795 if (recog_memoized (insn) >= 0)
2796 return get_attr_relative_long (insn) == RELATIVE_LONG_YES;
2797 else
2798 return false;
2801 /* Return true if DISP is a valid short displacement. */
2803 static bool
2804 s390_short_displacement (rtx disp)
2806 /* No displacement is OK. */
2807 if (!disp)
2808 return true;
2810 /* Without the long displacement facility we don't need to
2811 distingiush between long and short displacement. */
2812 if (!TARGET_LONG_DISPLACEMENT)
2813 return true;
2815 /* Integer displacement in range. */
2816 if (GET_CODE (disp) == CONST_INT)
2817 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2819 /* GOT offset is not OK, the GOT can be large. */
2820 if (GET_CODE (disp) == CONST
2821 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2822 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2823 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2824 return false;
2826 /* All other symbolic constants are literal pool references,
2827 which are OK as the literal pool must be small. */
2828 if (GET_CODE (disp) == CONST)
2829 return true;
2831 return false;
2834 /* Attempts to split `ref', which should be UNSPEC_LTREF, into (base + `disp').
2835 If successful, also determines the
2836 following characteristics of `ref': `is_ptr' - whether it can be an
2837 LA argument, `is_base_ptr' - whether the resulting base is a well-known
2838 base register (stack/frame pointer, etc), `is_pool_ptr` - whether it is
2839 considered a literal pool pointer for purposes of avoiding two different
2840 literal pool pointers per insn during or after reload (`B' constraint). */
2841 static bool
2842 s390_decompose_constant_pool_ref (rtx *ref, rtx *disp, bool *is_ptr,
2843 bool *is_base_ptr, bool *is_pool_ptr)
2845 if (!*ref)
2846 return true;
2848 if (GET_CODE (*ref) == UNSPEC)
2849 switch (XINT (*ref, 1))
2851 case UNSPEC_LTREF:
2852 if (!*disp)
2853 *disp = gen_rtx_UNSPEC (Pmode,
2854 gen_rtvec (1, XVECEXP (*ref, 0, 0)),
2855 UNSPEC_LTREL_OFFSET);
2856 else
2857 return false;
2859 *ref = XVECEXP (*ref, 0, 1);
2860 break;
2862 default:
2863 return false;
2866 if (!REG_P (*ref) || GET_MODE (*ref) != Pmode)
2867 return false;
2869 if (REGNO (*ref) == STACK_POINTER_REGNUM
2870 || REGNO (*ref) == FRAME_POINTER_REGNUM
2871 || ((reload_completed || reload_in_progress)
2872 && frame_pointer_needed
2873 && REGNO (*ref) == HARD_FRAME_POINTER_REGNUM)
2874 || REGNO (*ref) == ARG_POINTER_REGNUM
2875 || (flag_pic
2876 && REGNO (*ref) == PIC_OFFSET_TABLE_REGNUM))
2877 *is_ptr = *is_base_ptr = true;
2879 if ((reload_completed || reload_in_progress)
2880 && *ref == cfun->machine->base_reg)
2881 *is_ptr = *is_base_ptr = *is_pool_ptr = true;
2883 return true;
2886 /* Decompose a RTL expression ADDR for a memory address into
2887 its components, returned in OUT.
2889 Returns false if ADDR is not a valid memory address, true
2890 otherwise. If OUT is NULL, don't return the components,
2891 but check for validity only.
2893 Note: Only addresses in canonical form are recognized.
2894 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2895 canonical form so that they will be recognized. */
2897 static int
2898 s390_decompose_address (rtx addr, struct s390_address *out)
2900 HOST_WIDE_INT offset = 0;
2901 rtx base = NULL_RTX;
2902 rtx indx = NULL_RTX;
2903 rtx disp = NULL_RTX;
2904 rtx orig_disp;
2905 bool pointer = false;
2906 bool base_ptr = false;
2907 bool indx_ptr = false;
2908 bool literal_pool = false;
2910 /* We may need to substitute the literal pool base register into the address
2911 below. However, at this point we do not know which register is going to
2912 be used as base, so we substitute the arg pointer register. This is going
2913 to be treated as holding a pointer below -- it shouldn't be used for any
2914 other purpose. */
2915 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2917 /* Decompose address into base + index + displacement. */
2919 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2920 base = addr;
2922 else if (GET_CODE (addr) == PLUS)
2924 rtx op0 = XEXP (addr, 0);
2925 rtx op1 = XEXP (addr, 1);
2926 enum rtx_code code0 = GET_CODE (op0);
2927 enum rtx_code code1 = GET_CODE (op1);
2929 if (code0 == REG || code0 == UNSPEC)
2931 if (code1 == REG || code1 == UNSPEC)
2933 indx = op0; /* index + base */
2934 base = op1;
2937 else
2939 base = op0; /* base + displacement */
2940 disp = op1;
2944 else if (code0 == PLUS)
2946 indx = XEXP (op0, 0); /* index + base + disp */
2947 base = XEXP (op0, 1);
2948 disp = op1;
2951 else
2953 return false;
2957 else
2958 disp = addr; /* displacement */
2960 /* Extract integer part of displacement. */
2961 orig_disp = disp;
2962 if (disp)
2964 if (GET_CODE (disp) == CONST_INT)
2966 offset = INTVAL (disp);
2967 disp = NULL_RTX;
2969 else if (GET_CODE (disp) == CONST
2970 && GET_CODE (XEXP (disp, 0)) == PLUS
2971 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2973 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2974 disp = XEXP (XEXP (disp, 0), 0);
2978 /* Strip off CONST here to avoid special case tests later. */
2979 if (disp && GET_CODE (disp) == CONST)
2980 disp = XEXP (disp, 0);
2982 /* We can convert literal pool addresses to
2983 displacements by basing them off the base register. */
2984 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2986 if (base || indx)
2987 return false;
2989 base = fake_pool_base, literal_pool = true;
2991 /* Mark up the displacement. */
2992 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2993 UNSPEC_LTREL_OFFSET);
2996 /* Validate base register. */
2997 if (!s390_decompose_constant_pool_ref (&base, &disp, &pointer, &base_ptr,
2998 &literal_pool))
2999 return false;
3001 /* Validate index register. */
3002 if (!s390_decompose_constant_pool_ref (&indx, &disp, &pointer, &indx_ptr,
3003 &literal_pool))
3004 return false;
3006 /* Prefer to use pointer as base, not index. */
3007 if (base && indx && !base_ptr
3008 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
3010 rtx tmp = base;
3011 base = indx;
3012 indx = tmp;
3015 /* Validate displacement. */
3016 if (!disp)
3018 /* If virtual registers are involved, the displacement will change later
3019 anyway as the virtual registers get eliminated. This could make a
3020 valid displacement invalid, but it is more likely to make an invalid
3021 displacement valid, because we sometimes access the register save area
3022 via negative offsets to one of those registers.
3023 Thus we don't check the displacement for validity here. If after
3024 elimination the displacement turns out to be invalid after all,
3025 this is fixed up by reload in any case. */
3026 /* LRA maintains always displacements up to date and we need to
3027 know the displacement is right during all LRA not only at the
3028 final elimination. */
3029 if (lra_in_progress
3030 || (base != arg_pointer_rtx
3031 && indx != arg_pointer_rtx
3032 && base != return_address_pointer_rtx
3033 && indx != return_address_pointer_rtx
3034 && base != frame_pointer_rtx
3035 && indx != frame_pointer_rtx
3036 && base != virtual_stack_vars_rtx
3037 && indx != virtual_stack_vars_rtx))
3038 if (!DISP_IN_RANGE (offset))
3039 return false;
3041 else
3043 /* All the special cases are pointers. */
3044 pointer = true;
3046 /* In the small-PIC case, the linker converts @GOT
3047 and @GOTNTPOFF offsets to possible displacements. */
3048 if (GET_CODE (disp) == UNSPEC
3049 && (XINT (disp, 1) == UNSPEC_GOT
3050 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
3051 && flag_pic == 1)
3056 /* Accept pool label offsets. */
3057 else if (GET_CODE (disp) == UNSPEC
3058 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
3061 /* Accept literal pool references. */
3062 else if (GET_CODE (disp) == UNSPEC
3063 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
3065 /* In case CSE pulled a non literal pool reference out of
3066 the pool we have to reject the address. This is
3067 especially important when loading the GOT pointer on non
3068 zarch CPUs. In this case the literal pool contains an lt
3069 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3070 will most likely exceed the displacement. */
3071 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3072 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
3073 return false;
3075 orig_disp = gen_rtx_CONST (Pmode, disp);
3076 if (offset)
3078 /* If we have an offset, make sure it does not
3079 exceed the size of the constant pool entry.
3080 Otherwise we might generate an out-of-range
3081 displacement for the base register form. */
3082 rtx sym = XVECEXP (disp, 0, 0);
3083 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
3084 return false;
3086 orig_disp = plus_constant (Pmode, orig_disp, offset);
3090 else
3091 return false;
3094 if (!base && !indx)
3095 pointer = true;
3097 if (out)
3099 out->base = base;
3100 out->indx = indx;
3101 out->disp = orig_disp;
3102 out->pointer = pointer;
3103 out->literal_pool = literal_pool;
3106 return true;
3109 /* Decompose a RTL expression OP for an address style operand into its
3110 components, and return the base register in BASE and the offset in
3111 OFFSET. While OP looks like an address it is never supposed to be
3112 used as such.
3114 Return true if OP is a valid address operand, false if not. */
3116 bool
3117 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
3118 HOST_WIDE_INT *offset)
3120 rtx off = NULL_RTX;
3122 /* We can have an integer constant, an address register,
3123 or a sum of the two. */
3124 if (CONST_SCALAR_INT_P (op))
3126 off = op;
3127 op = NULL_RTX;
3129 if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3131 off = XEXP (op, 1);
3132 op = XEXP (op, 0);
3134 while (op && GET_CODE (op) == SUBREG)
3135 op = SUBREG_REG (op);
3137 if (op && GET_CODE (op) != REG)
3138 return false;
3140 if (offset)
3142 if (off == NULL_RTX)
3143 *offset = 0;
3144 else if (CONST_INT_P (off))
3145 *offset = INTVAL (off);
3146 else if (CONST_WIDE_INT_P (off))
3147 /* The offset will anyway be cut down to 12 bits so take just
3148 the lowest order chunk of the wide int. */
3149 *offset = CONST_WIDE_INT_ELT (off, 0);
3150 else
3151 gcc_unreachable ();
3153 if (base)
3154 *base = op;
3156 return true;
3159 /* Check that OP is a valid shift count operand.
3160 It should be of the following structure:
3161 (subreg (and (plus (reg imm_op)) 2^k-1) 7)
3162 where subreg, and and plus are optional.
3164 If IMPLICIT_MASK is > 0 and OP contains and
3165 (AND ... immediate)
3166 it is checked whether IMPLICIT_MASK and the immediate match.
3167 Otherwise, no checking is performed.
3169 bool
3170 s390_valid_shift_count (rtx op, HOST_WIDE_INT implicit_mask)
3172 /* Strip subreg. */
3173 while (GET_CODE (op) == SUBREG && subreg_lowpart_p (op))
3174 op = XEXP (op, 0);
3176 /* Check for an and with proper constant. */
3177 if (GET_CODE (op) == AND)
3179 rtx op1 = XEXP (op, 0);
3180 rtx imm = XEXP (op, 1);
3182 if (GET_CODE (op1) == SUBREG && subreg_lowpart_p (op1))
3183 op1 = XEXP (op1, 0);
3185 if (!(register_operand (op1, GET_MODE (op1)) || GET_CODE (op1) == PLUS))
3186 return false;
3188 if (!immediate_operand (imm, GET_MODE (imm)))
3189 return false;
3191 HOST_WIDE_INT val = INTVAL (imm);
3192 if (implicit_mask > 0
3193 && (val & implicit_mask) != implicit_mask)
3194 return false;
3196 op = op1;
3199 /* Check the rest. */
3200 return s390_decompose_addrstyle_without_index (op, NULL, NULL);
3203 /* Return true if CODE is a valid address without index. */
3205 bool
3206 s390_legitimate_address_without_index_p (rtx op)
3208 struct s390_address addr;
3210 if (!s390_decompose_address (XEXP (op, 0), &addr))
3211 return false;
3212 if (addr.indx)
3213 return false;
3215 return true;
3219 /* Return TRUE if ADDR is an operand valid for a load/store relative
3220 instruction. Be aware that the alignment of the operand needs to
3221 be checked separately.
3222 Valid addresses are single references or a sum of a reference and a
3223 constant integer. Return these parts in SYMREF and ADDEND. You can
3224 pass NULL in REF and/or ADDEND if you are not interested in these
3225 values. */
3227 static bool
3228 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3230 HOST_WIDE_INT tmpaddend = 0;
3232 if (GET_CODE (addr) == CONST)
3233 addr = XEXP (addr, 0);
3235 if (GET_CODE (addr) == PLUS)
3237 if (!CONST_INT_P (XEXP (addr, 1)))
3238 return false;
3240 tmpaddend = INTVAL (XEXP (addr, 1));
3241 addr = XEXP (addr, 0);
3244 if (GET_CODE (addr) == SYMBOL_REF
3245 || (GET_CODE (addr) == UNSPEC
3246 && (XINT (addr, 1) == UNSPEC_GOTENT
3247 || XINT (addr, 1) == UNSPEC_PLT)))
3249 if (symref)
3250 *symref = addr;
3251 if (addend)
3252 *addend = tmpaddend;
3254 return true;
3256 return false;
3259 /* Return true if the address in OP is valid for constraint letter C
3260 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3261 pool MEMs should be accepted. Only the Q, R, S, T constraint
3262 letters are allowed for C. */
3264 static int
3265 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3267 rtx symref;
3268 struct s390_address addr;
3269 bool decomposed = false;
3271 if (!address_operand (op, GET_MODE (op)))
3272 return 0;
3274 /* This check makes sure that no symbolic address (except literal
3275 pool references) are accepted by the R or T constraints. */
3276 if (s390_loadrelative_operand_p (op, &symref, NULL)
3277 && (!lit_pool_ok
3278 || !SYMBOL_REF_P (symref)
3279 || !CONSTANT_POOL_ADDRESS_P (symref)))
3280 return 0;
3282 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3283 if (!lit_pool_ok)
3285 if (!s390_decompose_address (op, &addr))
3286 return 0;
3287 if (addr.literal_pool)
3288 return 0;
3289 decomposed = true;
3292 /* With reload, we sometimes get intermediate address forms that are
3293 actually invalid as-is, but we need to accept them in the most
3294 generic cases below ('R' or 'T'), since reload will in fact fix
3295 them up. LRA behaves differently here; we never see such forms,
3296 but on the other hand, we need to strictly reject every invalid
3297 address form. After both reload and LRA invalid address forms
3298 must be rejected, because nothing will fix them up later. Perform
3299 this check right up front. */
3300 if (lra_in_progress || reload_completed)
3302 if (!decomposed && !s390_decompose_address (op, &addr))
3303 return 0;
3304 decomposed = true;
3307 switch (c)
3309 case 'Q': /* no index short displacement */
3310 if (!decomposed && !s390_decompose_address (op, &addr))
3311 return 0;
3312 if (addr.indx)
3313 return 0;
3314 if (!s390_short_displacement (addr.disp))
3315 return 0;
3316 break;
3318 case 'R': /* with index short displacement */
3319 if (TARGET_LONG_DISPLACEMENT)
3321 if (!decomposed && !s390_decompose_address (op, &addr))
3322 return 0;
3323 if (!s390_short_displacement (addr.disp))
3324 return 0;
3326 /* Any invalid address here will be fixed up by reload,
3327 so accept it for the most generic constraint. */
3328 break;
3330 case 'S': /* no index long displacement */
3331 if (!decomposed && !s390_decompose_address (op, &addr))
3332 return 0;
3333 if (addr.indx)
3334 return 0;
3335 break;
3337 case 'T': /* with index long displacement */
3338 /* Any invalid address here will be fixed up by reload,
3339 so accept it for the most generic constraint. */
3340 break;
3342 default:
3343 return 0;
3345 return 1;
3349 /* Evaluates constraint strings described by the regular expression
3350 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3351 the constraint given in STR, or 0 else. */
3354 s390_mem_constraint (const char *str, rtx op)
3356 char c = str[0];
3358 switch (c)
3360 case 'A':
3361 /* Check for offsettable variants of memory constraints. */
3362 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3363 return 0;
3364 if ((reload_completed || reload_in_progress)
3365 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3366 return 0;
3367 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3368 case 'B':
3369 /* Check for non-literal-pool variants of memory constraints. */
3370 if (!MEM_P (op))
3371 return 0;
3372 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3373 case 'Q':
3374 case 'R':
3375 case 'S':
3376 case 'T':
3377 if (GET_CODE (op) != MEM)
3378 return 0;
3379 return s390_check_qrst_address (c, XEXP (op, 0), true);
3380 case 'Y':
3381 /* Simply check for the basic form of a shift count. Reload will
3382 take care of making sure we have a proper base register. */
3383 if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3384 return 0;
3385 break;
3386 case 'Z':
3387 return s390_check_qrst_address (str[1], op, true);
3388 default:
3389 return 0;
3391 return 1;
3395 /* Evaluates constraint strings starting with letter O. Input
3396 parameter C is the second letter following the "O" in the constraint
3397 string. Returns 1 if VALUE meets the respective constraint and 0
3398 otherwise. */
3401 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3403 if (!TARGET_EXTIMM)
3404 return 0;
3406 switch (c)
3408 case 's':
3409 return trunc_int_for_mode (value, SImode) == value;
3411 case 'p':
3412 return value == 0
3413 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3415 case 'n':
3416 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3418 default:
3419 gcc_unreachable ();
3424 /* Evaluates constraint strings starting with letter N. Parameter STR
3425 contains the letters following letter "N" in the constraint string.
3426 Returns true if VALUE matches the constraint. */
3429 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3431 machine_mode mode, part_mode;
3432 int def;
3433 int part, part_goal;
3436 if (str[0] == 'x')
3437 part_goal = -1;
3438 else
3439 part_goal = str[0] - '0';
3441 switch (str[1])
3443 case 'Q':
3444 part_mode = QImode;
3445 break;
3446 case 'H':
3447 part_mode = HImode;
3448 break;
3449 case 'S':
3450 part_mode = SImode;
3451 break;
3452 default:
3453 return 0;
3456 switch (str[2])
3458 case 'H':
3459 mode = HImode;
3460 break;
3461 case 'S':
3462 mode = SImode;
3463 break;
3464 case 'D':
3465 mode = DImode;
3466 break;
3467 default:
3468 return 0;
3471 switch (str[3])
3473 case '0':
3474 def = 0;
3475 break;
3476 case 'F':
3477 def = -1;
3478 break;
3479 default:
3480 return 0;
3483 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3484 return 0;
3486 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3487 if (part < 0)
3488 return 0;
3489 if (part_goal != -1 && part_goal != part)
3490 return 0;
3492 return 1;
3496 /* Returns true if the input parameter VALUE is a float zero. */
3499 s390_float_const_zero_p (rtx value)
3501 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3502 && value == CONST0_RTX (GET_MODE (value)));
3505 /* Implement TARGET_REGISTER_MOVE_COST. */
3507 static int
3508 s390_register_move_cost (machine_mode mode,
3509 reg_class_t from, reg_class_t to)
3511 /* On s390, copy between fprs and gprs is expensive. */
3513 /* It becomes somewhat faster having ldgr/lgdr. */
3514 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3516 /* ldgr is single cycle. */
3517 if (reg_classes_intersect_p (from, GENERAL_REGS)
3518 && reg_classes_intersect_p (to, FP_REGS))
3519 return 1;
3520 /* lgdr needs 3 cycles. */
3521 if (reg_classes_intersect_p (to, GENERAL_REGS)
3522 && reg_classes_intersect_p (from, FP_REGS))
3523 return 3;
3526 /* Otherwise copying is done via memory. */
3527 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3528 && reg_classes_intersect_p (to, FP_REGS))
3529 || (reg_classes_intersect_p (from, FP_REGS)
3530 && reg_classes_intersect_p (to, GENERAL_REGS)))
3531 return 10;
3533 /* We usually do not want to copy via CC. */
3534 if (reg_classes_intersect_p (from, CC_REGS)
3535 || reg_classes_intersect_p (to, CC_REGS))
3536 return 5;
3538 return 1;
3541 /* Implement TARGET_MEMORY_MOVE_COST. */
3543 static int
3544 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3545 reg_class_t rclass ATTRIBUTE_UNUSED,
3546 bool in ATTRIBUTE_UNUSED)
3548 return 2;
3551 /* Compute a (partial) cost for rtx X. Return true if the complete
3552 cost has been computed, and false if subexpressions should be
3553 scanned. In either case, *TOTAL contains the cost result. The
3554 initial value of *TOTAL is the default value computed by
3555 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3556 code of the superexpression of x. */
3558 static bool
3559 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3560 int opno ATTRIBUTE_UNUSED,
3561 int *total, bool speed ATTRIBUTE_UNUSED)
3563 int code = GET_CODE (x);
3564 switch (code)
3566 case CONST:
3567 case CONST_INT:
3568 case LABEL_REF:
3569 case SYMBOL_REF:
3570 case CONST_DOUBLE:
3571 case CONST_WIDE_INT:
3572 case MEM:
3573 *total = 0;
3574 return true;
3576 case SET:
3578 /* Without this a conditional move instruction would be
3579 accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3580 comparison operator). That's a bit pessimistic. */
3582 if (!TARGET_Z196 || GET_CODE (SET_SRC (x)) != IF_THEN_ELSE)
3583 return false;
3585 rtx cond = XEXP (SET_SRC (x), 0);
3587 if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1)))
3588 return false;
3590 /* It is going to be a load/store on condition. Make it
3591 slightly more expensive than a normal load. */
3592 *total = COSTS_N_INSNS (1) + 1;
3594 rtx dst = SET_DEST (x);
3595 rtx then = XEXP (SET_SRC (x), 1);
3596 rtx els = XEXP (SET_SRC (x), 2);
3598 /* It is a real IF-THEN-ELSE. An additional move will be
3599 needed to implement that. */
3600 if (!TARGET_Z15
3601 && reload_completed
3602 && !rtx_equal_p (dst, then)
3603 && !rtx_equal_p (dst, els))
3604 *total += COSTS_N_INSNS (1) / 2;
3606 /* A minor penalty for constants we cannot directly handle. */
3607 if ((CONST_INT_P (then) || CONST_INT_P (els))
3608 && (!TARGET_Z13 || MEM_P (dst)
3609 || (CONST_INT_P (then) && !satisfies_constraint_K (then))
3610 || (CONST_INT_P (els) && !satisfies_constraint_K (els))))
3611 *total += COSTS_N_INSNS (1) / 2;
3613 /* A store on condition can only handle register src operands. */
3614 if (MEM_P (dst) && (!REG_P (then) || !REG_P (els)))
3615 *total += COSTS_N_INSNS (1) / 2;
3617 return true;
3619 case IOR:
3621 /* nnrk, nngrk */
3622 if (TARGET_Z15
3623 && (mode == SImode || mode == DImode)
3624 && GET_CODE (XEXP (x, 0)) == NOT
3625 && GET_CODE (XEXP (x, 1)) == NOT)
3627 *total = COSTS_N_INSNS (1);
3628 if (!REG_P (XEXP (XEXP (x, 0), 0)))
3629 *total += 1;
3630 if (!REG_P (XEXP (XEXP (x, 1), 0)))
3631 *total += 1;
3632 return true;
3635 /* risbg */
3636 if (GET_CODE (XEXP (x, 0)) == AND
3637 && GET_CODE (XEXP (x, 1)) == ASHIFT
3638 && REG_P (XEXP (XEXP (x, 0), 0))
3639 && REG_P (XEXP (XEXP (x, 1), 0))
3640 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3641 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3642 && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3643 (HOST_WIDE_INT_1U << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3645 *total = COSTS_N_INSNS (2);
3646 return true;
3649 /* ~AND on a 128 bit mode. This can be done using a vector
3650 instruction. */
3651 if (TARGET_VXE
3652 && GET_CODE (XEXP (x, 0)) == NOT
3653 && GET_CODE (XEXP (x, 1)) == NOT
3654 && REG_P (XEXP (XEXP (x, 0), 0))
3655 && REG_P (XEXP (XEXP (x, 1), 0))
3656 && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 0), 0))) == 16
3657 && s390_hard_regno_mode_ok (VR0_REGNUM,
3658 GET_MODE (XEXP (XEXP (x, 0), 0))))
3660 *total = COSTS_N_INSNS (1);
3661 return true;
3664 *total = COSTS_N_INSNS (1);
3665 return false;
3667 case AND:
3668 /* nork, nogrk */
3669 if (TARGET_Z15
3670 && (mode == SImode || mode == DImode)
3671 && GET_CODE (XEXP (x, 0)) == NOT
3672 && GET_CODE (XEXP (x, 1)) == NOT)
3674 *total = COSTS_N_INSNS (1);
3675 if (!REG_P (XEXP (XEXP (x, 0), 0)))
3676 *total += 1;
3677 if (!REG_P (XEXP (XEXP (x, 1), 0)))
3678 *total += 1;
3679 return true;
3681 /* fallthrough */
3682 case ASHIFT:
3683 case ASHIFTRT:
3684 case LSHIFTRT:
3685 case ROTATE:
3686 case ROTATERT:
3687 case XOR:
3688 case NEG:
3689 case NOT:
3690 case PLUS:
3691 case MINUS:
3692 *total = COSTS_N_INSNS (1);
3693 return false;
3695 case MULT:
3696 switch (mode)
3698 case E_SImode:
3700 rtx left = XEXP (x, 0);
3701 rtx right = XEXP (x, 1);
3702 if (GET_CODE (right) == CONST_INT
3703 && CONST_OK_FOR_K (INTVAL (right)))
3704 *total = s390_cost->mhi;
3705 else if (GET_CODE (left) == SIGN_EXTEND)
3706 *total = s390_cost->mh;
3707 else
3708 *total = s390_cost->ms; /* msr, ms, msy */
3709 break;
3711 case E_DImode:
3713 rtx left = XEXP (x, 0);
3714 rtx right = XEXP (x, 1);
3715 if (TARGET_ZARCH)
3717 if (GET_CODE (right) == CONST_INT
3718 && CONST_OK_FOR_K (INTVAL (right)))
3719 *total = s390_cost->mghi;
3720 else if (GET_CODE (left) == SIGN_EXTEND)
3721 *total = s390_cost->msgf;
3722 else
3723 *total = s390_cost->msg; /* msgr, msg */
3725 else /* TARGET_31BIT */
3727 if (GET_CODE (left) == SIGN_EXTEND
3728 && GET_CODE (right) == SIGN_EXTEND)
3729 /* mulsidi case: mr, m */
3730 *total = s390_cost->m;
3731 else if (GET_CODE (left) == ZERO_EXTEND
3732 && GET_CODE (right) == ZERO_EXTEND)
3733 /* umulsidi case: ml, mlr */
3734 *total = s390_cost->ml;
3735 else
3736 /* Complex calculation is required. */
3737 *total = COSTS_N_INSNS (40);
3739 break;
3741 case E_SFmode:
3742 case E_DFmode:
3743 *total = s390_cost->mult_df;
3744 break;
3745 case E_TFmode:
3746 *total = s390_cost->mxbr;
3747 break;
3748 default:
3749 return false;
3751 return false;
3753 case FMA:
3754 switch (mode)
3756 case E_DFmode:
3757 *total = s390_cost->madbr;
3758 break;
3759 case E_SFmode:
3760 *total = s390_cost->maebr;
3761 break;
3762 default:
3763 return false;
3765 /* Negate in the third argument is free: FMSUB. */
3766 if (GET_CODE (XEXP (x, 2)) == NEG)
3768 *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3769 + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3770 + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3771 return true;
3773 return false;
3775 case UDIV:
3776 case UMOD:
3777 if (mode == TImode) /* 128 bit division */
3778 *total = s390_cost->dlgr;
3779 else if (mode == DImode)
3781 rtx right = XEXP (x, 1);
3782 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3783 *total = s390_cost->dlr;
3784 else /* 64 by 64 bit division */
3785 *total = s390_cost->dlgr;
3787 else if (mode == SImode) /* 32 bit division */
3788 *total = s390_cost->dlr;
3789 return false;
3791 case DIV:
3792 case MOD:
3793 if (mode == DImode)
3795 rtx right = XEXP (x, 1);
3796 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3797 if (TARGET_ZARCH)
3798 *total = s390_cost->dsgfr;
3799 else
3800 *total = s390_cost->dr;
3801 else /* 64 by 64 bit division */
3802 *total = s390_cost->dsgr;
3804 else if (mode == SImode) /* 32 bit division */
3805 *total = s390_cost->dlr;
3806 else if (mode == SFmode)
3808 *total = s390_cost->debr;
3810 else if (mode == DFmode)
3812 *total = s390_cost->ddbr;
3814 else if (mode == TFmode)
3816 *total = s390_cost->dxbr;
3818 return false;
3820 case SQRT:
3821 if (mode == SFmode)
3822 *total = s390_cost->sqebr;
3823 else if (mode == DFmode)
3824 *total = s390_cost->sqdbr;
3825 else /* TFmode */
3826 *total = s390_cost->sqxbr;
3827 return false;
3829 case SIGN_EXTEND:
3830 case ZERO_EXTEND:
3831 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3832 || outer_code == PLUS || outer_code == MINUS
3833 || outer_code == COMPARE)
3834 *total = 0;
3835 return false;
3837 case COMPARE:
3838 *total = COSTS_N_INSNS (1);
3840 /* nxrk, nxgrk ~(a^b)==0 */
3841 if (TARGET_Z15
3842 && GET_CODE (XEXP (x, 0)) == NOT
3843 && XEXP (x, 1) == const0_rtx
3844 && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
3845 && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode)
3846 && mode == CCZmode)
3848 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0)))
3849 *total += 1;
3850 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3851 *total += 1;
3852 return true;
3855 /* nnrk, nngrk, nork, nogrk */
3856 if (TARGET_Z15
3857 && (GET_CODE (XEXP (x, 0)) == AND || GET_CODE (XEXP (x, 0)) == IOR)
3858 && XEXP (x, 1) == const0_rtx
3859 && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode)
3860 && GET_CODE (XEXP (XEXP (x, 0), 0)) == NOT
3861 && GET_CODE (XEXP (XEXP (x, 0), 1)) == NOT
3862 && mode == CCZmode)
3864 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0)))
3865 *total += 1;
3866 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 1), 0)))
3867 *total += 1;
3868 return true;
3871 if (GET_CODE (XEXP (x, 0)) == AND
3872 && GET_CODE (XEXP (x, 1)) == CONST_INT
3873 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3875 rtx op0 = XEXP (XEXP (x, 0), 0);
3876 rtx op1 = XEXP (XEXP (x, 0), 1);
3877 rtx op2 = XEXP (x, 1);
3879 if (memory_operand (op0, GET_MODE (op0))
3880 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3881 return true;
3882 if (register_operand (op0, GET_MODE (op0))
3883 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3884 return true;
3886 return false;
3888 default:
3889 return false;
3893 /* Return the cost of an address rtx ADDR. */
3895 static int
3896 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3897 addr_space_t as ATTRIBUTE_UNUSED,
3898 bool speed ATTRIBUTE_UNUSED)
3900 struct s390_address ad;
3901 if (!s390_decompose_address (addr, &ad))
3902 return 1000;
3904 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3907 /* Implement targetm.vectorize.builtin_vectorization_cost. */
3908 static int
3909 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3910 tree vectype,
3911 int misalign ATTRIBUTE_UNUSED)
3913 switch (type_of_cost)
3915 case scalar_stmt:
3916 case scalar_load:
3917 case scalar_store:
3918 case vector_stmt:
3919 case vector_load:
3920 case vector_store:
3921 case vector_gather_load:
3922 case vector_scatter_store:
3923 case vec_to_scalar:
3924 case scalar_to_vec:
3925 case cond_branch_not_taken:
3926 case vec_perm:
3927 case vec_promote_demote:
3928 case unaligned_load:
3929 case unaligned_store:
3930 return 1;
3932 case cond_branch_taken:
3933 return 3;
3935 case vec_construct:
3936 return TYPE_VECTOR_SUBPARTS (vectype) - 1;
3938 default:
3939 gcc_unreachable ();
3943 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3944 otherwise return 0. */
3947 tls_symbolic_operand (rtx op)
3949 if (GET_CODE (op) != SYMBOL_REF)
3950 return 0;
3951 return SYMBOL_REF_TLS_MODEL (op);
3954 /* Split DImode access register reference REG (on 64-bit) into its constituent
3955 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3956 gen_highpart cannot be used as they assume all registers are word-sized,
3957 while our access registers have only half that size. */
3959 void
3960 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3962 gcc_assert (TARGET_64BIT);
3963 gcc_assert (ACCESS_REG_P (reg));
3964 gcc_assert (GET_MODE (reg) == DImode);
3965 gcc_assert (!(REGNO (reg) & 1));
3967 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3968 *hi = gen_rtx_REG (SImode, REGNO (reg));
3971 /* Return true if OP contains a symbol reference */
3973 bool
3974 symbolic_reference_mentioned_p (rtx op)
3976 const char *fmt;
3977 int i;
3979 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3980 return 1;
3982 fmt = GET_RTX_FORMAT (GET_CODE (op));
3983 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3985 if (fmt[i] == 'E')
3987 int j;
3989 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3990 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3991 return 1;
3994 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3995 return 1;
3998 return 0;
4001 /* Return true if OP contains a reference to a thread-local symbol. */
4003 bool
4004 tls_symbolic_reference_mentioned_p (rtx op)
4006 const char *fmt;
4007 int i;
4009 if (GET_CODE (op) == SYMBOL_REF)
4010 return tls_symbolic_operand (op);
4012 fmt = GET_RTX_FORMAT (GET_CODE (op));
4013 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4015 if (fmt[i] == 'E')
4017 int j;
4019 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4020 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4021 return true;
4024 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
4025 return true;
4028 return false;
4032 /* Return true if OP is a legitimate general operand when
4033 generating PIC code. It is given that flag_pic is on
4034 and that OP satisfies CONSTANT_P. */
4037 legitimate_pic_operand_p (rtx op)
4039 /* Accept all non-symbolic constants. */
4040 if (!SYMBOLIC_CONST (op))
4041 return 1;
4043 /* Accept addresses that can be expressed relative to (pc). */
4044 if (larl_operand (op, VOIDmode))
4045 return 1;
4047 /* Reject everything else; must be handled
4048 via emit_symbolic_move. */
4049 return 0;
4052 /* Returns true if the constant value OP is a legitimate general operand.
4053 It is given that OP satisfies CONSTANT_P. */
4055 static bool
4056 s390_legitimate_constant_p (machine_mode mode, rtx op)
4058 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
4060 if (GET_MODE_SIZE (mode) != 16)
4061 return 0;
4063 if (!satisfies_constraint_j00 (op)
4064 && !satisfies_constraint_jm1 (op)
4065 && !satisfies_constraint_jKK (op)
4066 && !satisfies_constraint_jxx (op)
4067 && !satisfies_constraint_jyy (op))
4068 return 0;
4071 /* Accept all non-symbolic constants. */
4072 if (!SYMBOLIC_CONST (op))
4073 return 1;
4075 /* Accept immediate LARL operands. */
4076 if (larl_operand (op, mode))
4077 return 1;
4079 /* Thread-local symbols are never legal constants. This is
4080 so that emit_call knows that computing such addresses
4081 might require a function call. */
4082 if (TLS_SYMBOLIC_CONST (op))
4083 return 0;
4085 /* In the PIC case, symbolic constants must *not* be
4086 forced into the literal pool. We accept them here,
4087 so that they will be handled by emit_symbolic_move. */
4088 if (flag_pic)
4089 return 1;
4091 /* All remaining non-PIC symbolic constants are
4092 forced into the literal pool. */
4093 return 0;
4096 /* Determine if it's legal to put X into the constant pool. This
4097 is not possible if X contains the address of a symbol that is
4098 not constant (TLS) or not known at final link time (PIC). */
4100 static bool
4101 s390_cannot_force_const_mem (machine_mode mode, rtx x)
4103 switch (GET_CODE (x))
4105 case CONST_INT:
4106 case CONST_DOUBLE:
4107 case CONST_WIDE_INT:
4108 case CONST_VECTOR:
4109 /* Accept all non-symbolic constants. */
4110 return false;
4112 case NEG:
4113 /* Accept an unary '-' only on scalar numeric constants. */
4114 switch (GET_CODE (XEXP (x, 0)))
4116 case CONST_INT:
4117 case CONST_DOUBLE:
4118 case CONST_WIDE_INT:
4119 return false;
4120 default:
4121 return true;
4124 case LABEL_REF:
4125 /* Labels are OK iff we are non-PIC. */
4126 return flag_pic != 0;
4128 case SYMBOL_REF:
4129 /* 'Naked' TLS symbol references are never OK,
4130 non-TLS symbols are OK iff we are non-PIC. */
4131 if (tls_symbolic_operand (x))
4132 return true;
4133 else
4134 return flag_pic != 0;
4136 case CONST:
4137 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
4138 case PLUS:
4139 case MINUS:
4140 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
4141 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
4143 case UNSPEC:
4144 switch (XINT (x, 1))
4146 /* Only lt-relative or GOT-relative UNSPECs are OK. */
4147 case UNSPEC_LTREL_OFFSET:
4148 case UNSPEC_GOT:
4149 case UNSPEC_GOTOFF:
4150 case UNSPEC_PLTOFF:
4151 case UNSPEC_TLSGD:
4152 case UNSPEC_TLSLDM:
4153 case UNSPEC_NTPOFF:
4154 case UNSPEC_DTPOFF:
4155 case UNSPEC_GOTNTPOFF:
4156 case UNSPEC_INDNTPOFF:
4157 return false;
4159 /* If the literal pool shares the code section, be put
4160 execute template placeholders into the pool as well. */
4161 case UNSPEC_INSN:
4162 default:
4163 return true;
4165 break;
4167 default:
4168 gcc_unreachable ();
4172 /* Returns true if the constant value OP is a legitimate general
4173 operand during and after reload. The difference to
4174 legitimate_constant_p is that this function will not accept
4175 a constant that would need to be forced to the literal pool
4176 before it can be used as operand.
4177 This function accepts all constants which can be loaded directly
4178 into a GPR. */
4180 bool
4181 legitimate_reload_constant_p (rtx op)
4183 /* Accept la(y) operands. */
4184 if (GET_CODE (op) == CONST_INT
4185 && DISP_IN_RANGE (INTVAL (op)))
4186 return true;
4188 /* Accept l(g)hi/l(g)fi operands. */
4189 if (GET_CODE (op) == CONST_INT
4190 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
4191 return true;
4193 /* Accept lliXX operands. */
4194 if (TARGET_ZARCH
4195 && GET_CODE (op) == CONST_INT
4196 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4197 && s390_single_part (op, word_mode, HImode, 0) >= 0)
4198 return true;
4200 if (TARGET_EXTIMM
4201 && GET_CODE (op) == CONST_INT
4202 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4203 && s390_single_part (op, word_mode, SImode, 0) >= 0)
4204 return true;
4206 /* Accept larl operands. */
4207 if (larl_operand (op, VOIDmode))
4208 return true;
4210 /* Accept floating-point zero operands that fit into a single GPR. */
4211 if (GET_CODE (op) == CONST_DOUBLE
4212 && s390_float_const_zero_p (op)
4213 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
4214 return true;
4216 /* Accept double-word operands that can be split. */
4217 if (GET_CODE (op) == CONST_WIDE_INT
4218 || (GET_CODE (op) == CONST_INT
4219 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
4221 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
4222 rtx hi = operand_subword (op, 0, 0, dword_mode);
4223 rtx lo = operand_subword (op, 1, 0, dword_mode);
4224 return legitimate_reload_constant_p (hi)
4225 && legitimate_reload_constant_p (lo);
4228 /* Everything else cannot be handled without reload. */
4229 return false;
4232 /* Returns true if the constant value OP is a legitimate fp operand
4233 during and after reload.
4234 This function accepts all constants which can be loaded directly
4235 into an FPR. */
4237 static bool
4238 legitimate_reload_fp_constant_p (rtx op)
4240 /* Accept floating-point zero operands if the load zero instruction
4241 can be used. Prior to z196 the load fp zero instruction caused a
4242 performance penalty if the result is used as BFP number. */
4243 if (TARGET_Z196
4244 && GET_CODE (op) == CONST_DOUBLE
4245 && s390_float_const_zero_p (op))
4246 return true;
4248 return false;
4251 /* Returns true if the constant value OP is a legitimate vector operand
4252 during and after reload.
4253 This function accepts all constants which can be loaded directly
4254 into an VR. */
4256 static bool
4257 legitimate_reload_vector_constant_p (rtx op)
4259 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
4260 && (satisfies_constraint_j00 (op)
4261 || satisfies_constraint_jm1 (op)
4262 || satisfies_constraint_jKK (op)
4263 || satisfies_constraint_jxx (op)
4264 || satisfies_constraint_jyy (op)))
4265 return true;
4267 return false;
4270 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4271 return the class of reg to actually use. */
4273 static reg_class_t
4274 s390_preferred_reload_class (rtx op, reg_class_t rclass)
4276 switch (GET_CODE (op))
4278 /* Constants we cannot reload into general registers
4279 must be forced into the literal pool. */
4280 case CONST_VECTOR:
4281 case CONST_DOUBLE:
4282 case CONST_INT:
4283 case CONST_WIDE_INT:
4284 if (reg_class_subset_p (GENERAL_REGS, rclass)
4285 && legitimate_reload_constant_p (op))
4286 return GENERAL_REGS;
4287 else if (reg_class_subset_p (ADDR_REGS, rclass)
4288 && legitimate_reload_constant_p (op))
4289 return ADDR_REGS;
4290 else if (reg_class_subset_p (FP_REGS, rclass)
4291 && legitimate_reload_fp_constant_p (op))
4292 return FP_REGS;
4293 else if (reg_class_subset_p (VEC_REGS, rclass)
4294 && legitimate_reload_vector_constant_p (op))
4295 return VEC_REGS;
4297 return NO_REGS;
4299 /* If a symbolic constant or a PLUS is reloaded,
4300 it is most likely being used as an address, so
4301 prefer ADDR_REGS. If 'class' is not a superset
4302 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4303 case CONST:
4304 /* Symrefs cannot be pushed into the literal pool with -fPIC
4305 so we *MUST NOT* return NO_REGS for these cases
4306 (s390_cannot_force_const_mem will return true).
4308 On the other hand we MUST return NO_REGS for symrefs with
4309 invalid addend which might have been pushed to the literal
4310 pool (no -fPIC). Usually we would expect them to be
4311 handled via secondary reload but this does not happen if
4312 they are used as literal pool slot replacement in reload
4313 inheritance (see emit_input_reload_insns). */
4314 if (GET_CODE (XEXP (op, 0)) == PLUS
4315 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4316 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4318 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4319 return ADDR_REGS;
4320 else
4321 return NO_REGS;
4323 /* fallthrough */
4324 case LABEL_REF:
4325 case SYMBOL_REF:
4326 if (!legitimate_reload_constant_p (op))
4327 return NO_REGS;
4328 /* fallthrough */
4329 case PLUS:
4330 /* load address will be used. */
4331 if (reg_class_subset_p (ADDR_REGS, rclass))
4332 return ADDR_REGS;
4333 else
4334 return NO_REGS;
4336 default:
4337 break;
4340 return rclass;
4343 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4344 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4345 aligned. */
4347 bool
4348 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4350 HOST_WIDE_INT addend;
4351 rtx symref;
4353 /* The "required alignment" might be 0 (e.g. for certain structs
4354 accessed via BLKmode). Early abort in this case, as well as when
4355 an alignment > 8 is required. */
4356 if (alignment < 2 || alignment > 8)
4357 return false;
4359 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4360 return false;
4362 if (addend & (alignment - 1))
4363 return false;
4365 if (GET_CODE (symref) == SYMBOL_REF)
4367 /* s390_encode_section_info is not called for anchors, since they don't
4368 have corresponding VAR_DECLs. Therefore, we cannot rely on
4369 SYMBOL_FLAG_NOTALIGN{2,4,8}_P returning useful information. */
4370 if (SYMBOL_REF_ANCHOR_P (symref))
4372 HOST_WIDE_INT block_offset = SYMBOL_REF_BLOCK_OFFSET (symref);
4373 unsigned int block_alignment = (SYMBOL_REF_BLOCK (symref)->alignment
4374 / BITS_PER_UNIT);
4376 gcc_assert (block_offset >= 0);
4377 return ((block_offset & (alignment - 1)) == 0
4378 && block_alignment >= alignment);
4381 /* We have load-relative instructions for 2-byte, 4-byte, and
4382 8-byte alignment so allow only these. */
4383 switch (alignment)
4385 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4386 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4387 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4388 default: return false;
4392 if (GET_CODE (symref) == UNSPEC
4393 && alignment <= UNITS_PER_LONG)
4394 return true;
4396 return false;
4399 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4400 operand SCRATCH is used to reload the even part of the address and
4401 adding one. */
4403 void
4404 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4406 HOST_WIDE_INT addend;
4407 rtx symref;
4409 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4410 gcc_unreachable ();
4412 if (!(addend & 1))
4413 /* Easy case. The addend is even so larl will do fine. */
4414 emit_move_insn (reg, addr);
4415 else
4417 /* We can leave the scratch register untouched if the target
4418 register is a valid base register. */
4419 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4420 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4421 scratch = reg;
4423 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4424 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4426 if (addend != 1)
4427 emit_move_insn (scratch,
4428 gen_rtx_CONST (Pmode,
4429 gen_rtx_PLUS (Pmode, symref,
4430 GEN_INT (addend - 1))));
4431 else
4432 emit_move_insn (scratch, symref);
4434 /* Increment the address using la in order to avoid clobbering cc. */
4435 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4439 /* Generate what is necessary to move between REG and MEM using
4440 SCRATCH. The direction is given by TOMEM. */
4442 void
4443 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4445 /* Reload might have pulled a constant out of the literal pool.
4446 Force it back in. */
4447 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4448 || GET_CODE (mem) == CONST_WIDE_INT
4449 || GET_CODE (mem) == CONST_VECTOR
4450 || GET_CODE (mem) == CONST)
4451 mem = force_const_mem (GET_MODE (reg), mem);
4453 gcc_assert (MEM_P (mem));
4455 /* For a load from memory we can leave the scratch register
4456 untouched if the target register is a valid base register. */
4457 if (!tomem
4458 && REGNO (reg) < FIRST_PSEUDO_REGISTER
4459 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4460 && GET_MODE (reg) == GET_MODE (scratch))
4461 scratch = reg;
4463 /* Load address into scratch register. Since we can't have a
4464 secondary reload for a secondary reload we have to cover the case
4465 where larl would need a secondary reload here as well. */
4466 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4468 /* Now we can use a standard load/store to do the move. */
4469 if (tomem)
4470 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4471 else
4472 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4475 /* Inform reload about cases where moving X with a mode MODE to a register in
4476 RCLASS requires an extra scratch or immediate register. Return the class
4477 needed for the immediate register. */
4479 static reg_class_t
4480 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4481 machine_mode mode, secondary_reload_info *sri)
4483 enum reg_class rclass = (enum reg_class) rclass_i;
4485 /* Intermediate register needed. */
4486 if (reg_classes_intersect_p (CC_REGS, rclass))
4487 return GENERAL_REGS;
4489 if (TARGET_VX)
4491 /* The vst/vl vector move instructions allow only for short
4492 displacements. */
4493 if (MEM_P (x)
4494 && GET_CODE (XEXP (x, 0)) == PLUS
4495 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4496 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4497 && reg_class_subset_p (rclass, VEC_REGS)
4498 && (!reg_class_subset_p (rclass, FP_REGS)
4499 || (GET_MODE_SIZE (mode) > 8
4500 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4502 if (in_p)
4503 sri->icode = (TARGET_64BIT ?
4504 CODE_FOR_reloaddi_la_in :
4505 CODE_FOR_reloadsi_la_in);
4506 else
4507 sri->icode = (TARGET_64BIT ?
4508 CODE_FOR_reloaddi_la_out :
4509 CODE_FOR_reloadsi_la_out);
4513 if (TARGET_Z10)
4515 HOST_WIDE_INT offset;
4516 rtx symref;
4518 /* On z10 several optimizer steps may generate larl operands with
4519 an odd addend. */
4520 if (in_p
4521 && s390_loadrelative_operand_p (x, &symref, &offset)
4522 && mode == Pmode
4523 && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4524 && (offset & 1) == 1)
4525 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4526 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4528 /* Handle all the (mem (symref)) accesses we cannot use the z10
4529 instructions for. */
4530 if (MEM_P (x)
4531 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4532 && (mode == QImode
4533 || !reg_class_subset_p (rclass, GENERAL_REGS)
4534 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4535 || !s390_check_symref_alignment (XEXP (x, 0),
4536 GET_MODE_SIZE (mode))))
4538 #define __SECONDARY_RELOAD_CASE(M,m) \
4539 case E_##M##mode: \
4540 if (TARGET_64BIT) \
4541 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4542 CODE_FOR_reload##m##di_tomem_z10; \
4543 else \
4544 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4545 CODE_FOR_reload##m##si_tomem_z10; \
4546 break;
4548 switch (GET_MODE (x))
4550 __SECONDARY_RELOAD_CASE (QI, qi);
4551 __SECONDARY_RELOAD_CASE (HI, hi);
4552 __SECONDARY_RELOAD_CASE (SI, si);
4553 __SECONDARY_RELOAD_CASE (DI, di);
4554 __SECONDARY_RELOAD_CASE (TI, ti);
4555 __SECONDARY_RELOAD_CASE (SF, sf);
4556 __SECONDARY_RELOAD_CASE (DF, df);
4557 __SECONDARY_RELOAD_CASE (TF, tf);
4558 __SECONDARY_RELOAD_CASE (SD, sd);
4559 __SECONDARY_RELOAD_CASE (DD, dd);
4560 __SECONDARY_RELOAD_CASE (TD, td);
4561 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4562 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4563 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4564 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4565 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4566 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4567 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4568 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4569 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4570 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4571 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4572 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4573 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4574 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4575 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4576 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4577 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4578 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4579 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4580 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4581 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4582 default:
4583 gcc_unreachable ();
4585 #undef __SECONDARY_RELOAD_CASE
4589 /* We need a scratch register when loading a PLUS expression which
4590 is not a legitimate operand of the LOAD ADDRESS instruction. */
4591 /* LRA can deal with transformation of plus op very well -- so we
4592 don't need to prompt LRA in this case. */
4593 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4594 sri->icode = (TARGET_64BIT ?
4595 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4597 /* Performing a multiword move from or to memory we have to make sure the
4598 second chunk in memory is addressable without causing a displacement
4599 overflow. If that would be the case we calculate the address in
4600 a scratch register. */
4601 if (MEM_P (x)
4602 && GET_CODE (XEXP (x, 0)) == PLUS
4603 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4604 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4605 + GET_MODE_SIZE (mode) - 1))
4607 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4608 in a s_operand address since we may fallback to lm/stm. So we only
4609 have to care about overflows in the b+i+d case. */
4610 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4611 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4612 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4613 /* For FP_REGS no lm/stm is available so this check is triggered
4614 for displacement overflows in b+i+d and b+d like addresses. */
4615 || (reg_classes_intersect_p (FP_REGS, rclass)
4616 && s390_class_max_nregs (FP_REGS, mode) > 1))
4618 if (in_p)
4619 sri->icode = (TARGET_64BIT ?
4620 CODE_FOR_reloaddi_la_in :
4621 CODE_FOR_reloadsi_la_in);
4622 else
4623 sri->icode = (TARGET_64BIT ?
4624 CODE_FOR_reloaddi_la_out :
4625 CODE_FOR_reloadsi_la_out);
4629 /* A scratch address register is needed when a symbolic constant is
4630 copied to r0 compiling with -fPIC. In other cases the target
4631 register might be used as temporary (see legitimize_pic_address). */
4632 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4633 sri->icode = (TARGET_64BIT ?
4634 CODE_FOR_reloaddi_PIC_addr :
4635 CODE_FOR_reloadsi_PIC_addr);
4637 /* Either scratch or no register needed. */
4638 return NO_REGS;
4641 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
4643 We need secondary memory to move data between GPRs and FPRs.
4645 - With DFP the ldgr lgdr instructions are available. Due to the
4646 different alignment we cannot use them for SFmode. For 31 bit a
4647 64 bit value in GPR would be a register pair so here we still
4648 need to go via memory.
4650 - With z13 we can do the SF/SImode moves with vlgvf. Due to the
4651 overlapping of FPRs and VRs we still disallow TF/TD modes to be
4652 in full VRs so as before also on z13 we do these moves via
4653 memory.
4655 FIXME: Should we try splitting it into two vlgvg's/vlvg's instead? */
4657 static bool
4658 s390_secondary_memory_needed (machine_mode mode,
4659 reg_class_t class1, reg_class_t class2)
4661 return (((reg_classes_intersect_p (class1, VEC_REGS)
4662 && reg_classes_intersect_p (class2, GENERAL_REGS))
4663 || (reg_classes_intersect_p (class1, GENERAL_REGS)
4664 && reg_classes_intersect_p (class2, VEC_REGS)))
4665 && (TARGET_TPF || !TARGET_DFP || !TARGET_64BIT
4666 || GET_MODE_SIZE (mode) != 8)
4667 && (!TARGET_VX || (SCALAR_FLOAT_MODE_P (mode)
4668 && GET_MODE_SIZE (mode) > 8)));
4671 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
4673 get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
4674 because the movsi and movsf patterns don't handle r/f moves. */
4676 static machine_mode
4677 s390_secondary_memory_needed_mode (machine_mode mode)
4679 if (GET_MODE_BITSIZE (mode) < 32)
4680 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
4681 return mode;
4684 /* Generate code to load SRC, which is PLUS that is not a
4685 legitimate operand for the LA instruction, into TARGET.
4686 SCRATCH may be used as scratch register. */
4688 void
4689 s390_expand_plus_operand (rtx target, rtx src,
4690 rtx scratch)
4692 rtx sum1, sum2;
4693 struct s390_address ad;
4695 /* src must be a PLUS; get its two operands. */
4696 gcc_assert (GET_CODE (src) == PLUS);
4697 gcc_assert (GET_MODE (src) == Pmode);
4699 /* Check if any of the two operands is already scheduled
4700 for replacement by reload. This can happen e.g. when
4701 float registers occur in an address. */
4702 sum1 = find_replacement (&XEXP (src, 0));
4703 sum2 = find_replacement (&XEXP (src, 1));
4704 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4706 /* If the address is already strictly valid, there's nothing to do. */
4707 if (!s390_decompose_address (src, &ad)
4708 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4709 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4711 /* Otherwise, one of the operands cannot be an address register;
4712 we reload its value into the scratch register. */
4713 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4715 emit_move_insn (scratch, sum1);
4716 sum1 = scratch;
4718 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4720 emit_move_insn (scratch, sum2);
4721 sum2 = scratch;
4724 /* According to the way these invalid addresses are generated
4725 in reload.c, it should never happen (at least on s390) that
4726 *neither* of the PLUS components, after find_replacements
4727 was applied, is an address register. */
4728 if (sum1 == scratch && sum2 == scratch)
4730 debug_rtx (src);
4731 gcc_unreachable ();
4734 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4737 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4738 is only ever performed on addresses, so we can mark the
4739 sum as legitimate for LA in any case. */
4740 s390_load_address (target, src);
4744 /* Return true if ADDR is a valid memory address.
4745 STRICT specifies whether strict register checking applies. */
4747 static bool
4748 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4750 struct s390_address ad;
4752 if (TARGET_Z10
4753 && larl_operand (addr, VOIDmode)
4754 && (mode == VOIDmode
4755 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4756 return true;
4758 if (!s390_decompose_address (addr, &ad))
4759 return false;
4761 /* The vector memory instructions only support short displacements.
4762 Reject invalid displacements early to prevent plenty of lay
4763 instructions to be generated later which then cannot be merged
4764 properly. */
4765 if (TARGET_VX
4766 && VECTOR_MODE_P (mode)
4767 && ad.disp != NULL_RTX
4768 && CONST_INT_P (ad.disp)
4769 && !SHORT_DISP_IN_RANGE (INTVAL (ad.disp)))
4770 return false;
4772 if (strict)
4774 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4775 return false;
4777 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4778 return false;
4780 else
4782 if (ad.base
4783 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4784 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4785 return false;
4787 if (ad.indx
4788 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4789 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4790 return false;
4792 return true;
4795 /* Return true if OP is a valid operand for the LA instruction.
4796 In 31-bit, we need to prove that the result is used as an
4797 address, as LA performs only a 31-bit addition. */
4799 bool
4800 legitimate_la_operand_p (rtx op)
4802 struct s390_address addr;
4803 if (!s390_decompose_address (op, &addr))
4804 return false;
4806 return (TARGET_64BIT || addr.pointer);
4809 /* Return true if it is valid *and* preferable to use LA to
4810 compute the sum of OP1 and OP2. */
4812 bool
4813 preferred_la_operand_p (rtx op1, rtx op2)
4815 struct s390_address addr;
4817 if (op2 != const0_rtx)
4818 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4820 if (!s390_decompose_address (op1, &addr))
4821 return false;
4822 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4823 return false;
4824 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4825 return false;
4827 /* Avoid LA instructions with index (and base) register on z196 or
4828 later; it is preferable to use regular add instructions when
4829 possible. Starting with zEC12 the la with index register is
4830 "uncracked" again but still slower than a regular add. */
4831 if (addr.indx && s390_tune >= PROCESSOR_2817_Z196)
4832 return false;
4834 if (!TARGET_64BIT && !addr.pointer)
4835 return false;
4837 if (addr.pointer)
4838 return true;
4840 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4841 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4842 return true;
4844 return false;
4847 /* Emit a forced load-address operation to load SRC into DST.
4848 This will use the LOAD ADDRESS instruction even in situations
4849 where legitimate_la_operand_p (SRC) returns false. */
4851 void
4852 s390_load_address (rtx dst, rtx src)
4854 if (TARGET_64BIT)
4855 emit_move_insn (dst, src);
4856 else
4857 emit_insn (gen_force_la_31 (dst, src));
4860 /* Return true if it ok to use SYMBOL_REF in a relative address. */
4862 bool
4863 s390_rel_address_ok_p (rtx symbol_ref)
4865 tree decl;
4867 if (symbol_ref == s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref))
4868 return true;
4870 decl = SYMBOL_REF_DECL (symbol_ref);
4872 if (!flag_pic || SYMBOL_REF_LOCAL_P (symbol_ref))
4873 return (s390_pic_data_is_text_relative
4874 || (decl
4875 && TREE_CODE (decl) == FUNCTION_DECL));
4877 return false;
4880 /* Return a legitimate reference for ORIG (an address) using the
4881 register REG. If REG is 0, a new pseudo is generated.
4883 There are two types of references that must be handled:
4885 1. Global data references must load the address from the GOT, via
4886 the PIC reg. An insn is emitted to do this load, and the reg is
4887 returned.
4889 2. Static data references, constant pool addresses, and code labels
4890 compute the address as an offset from the GOT, whose base is in
4891 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4892 differentiate them from global data objects. The returned
4893 address is the PIC reg + an unspec constant.
4895 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4896 reg also appears in the address. */
4899 legitimize_pic_address (rtx orig, rtx reg)
4901 rtx addr = orig;
4902 rtx addend = const0_rtx;
4903 rtx new_rtx = orig;
4905 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4907 if (GET_CODE (addr) == CONST)
4908 addr = XEXP (addr, 0);
4910 if (GET_CODE (addr) == PLUS)
4912 addend = XEXP (addr, 1);
4913 addr = XEXP (addr, 0);
4916 if ((GET_CODE (addr) == LABEL_REF
4917 || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (addr))
4918 || (GET_CODE (addr) == UNSPEC &&
4919 (XINT (addr, 1) == UNSPEC_GOTENT
4920 || XINT (addr, 1) == UNSPEC_PLT)))
4921 && GET_CODE (addend) == CONST_INT)
4923 /* This can be locally addressed. */
4925 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4926 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4927 gen_rtx_CONST (Pmode, addr) : addr);
4929 if (larl_operand (const_addr, VOIDmode)
4930 && INTVAL (addend) < HOST_WIDE_INT_1 << 31
4931 && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31))
4933 if (INTVAL (addend) & 1)
4935 /* LARL can't handle odd offsets, so emit a pair of LARL
4936 and LA. */
4937 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4939 if (!DISP_IN_RANGE (INTVAL (addend)))
4941 HOST_WIDE_INT even = INTVAL (addend) - 1;
4942 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4943 addr = gen_rtx_CONST (Pmode, addr);
4944 addend = const1_rtx;
4947 emit_move_insn (temp, addr);
4948 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4950 if (reg != 0)
4952 s390_load_address (reg, new_rtx);
4953 new_rtx = reg;
4956 else
4958 /* If the offset is even, we can just use LARL. This
4959 will happen automatically. */
4962 else
4964 /* No larl - Access local symbols relative to the GOT. */
4966 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4968 if (reload_in_progress || reload_completed)
4969 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4971 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4972 if (addend != const0_rtx)
4973 addr = gen_rtx_PLUS (Pmode, addr, addend);
4974 addr = gen_rtx_CONST (Pmode, addr);
4975 addr = force_const_mem (Pmode, addr);
4976 emit_move_insn (temp, addr);
4978 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4979 if (reg != 0)
4981 s390_load_address (reg, new_rtx);
4982 new_rtx = reg;
4986 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4988 /* A non-local symbol reference without addend.
4990 The symbol ref is wrapped into an UNSPEC to make sure the
4991 proper operand modifier (@GOT or @GOTENT) will be emitted.
4992 This will tell the linker to put the symbol into the GOT.
4994 Additionally the code dereferencing the GOT slot is emitted here.
4996 An addend to the symref needs to be added afterwards.
4997 legitimize_pic_address calls itself recursively to handle
4998 that case. So no need to do it here. */
5000 if (reg == 0)
5001 reg = gen_reg_rtx (Pmode);
5003 if (TARGET_Z10)
5005 /* Use load relative if possible.
5006 lgrl <target>, sym@GOTENT */
5007 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
5008 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5009 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
5011 emit_move_insn (reg, new_rtx);
5012 new_rtx = reg;
5014 else if (flag_pic == 1)
5016 /* Assume GOT offset is a valid displacement operand (< 4k
5017 or < 512k with z990). This is handled the same way in
5018 both 31- and 64-bit code (@GOT).
5019 lg <target>, sym@GOT(r12) */
5021 if (reload_in_progress || reload_completed)
5022 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5024 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5025 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5026 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5027 new_rtx = gen_const_mem (Pmode, new_rtx);
5028 emit_move_insn (reg, new_rtx);
5029 new_rtx = reg;
5031 else
5033 /* If the GOT offset might be >= 4k, we determine the position
5034 of the GOT entry via a PC-relative LARL (@GOTENT).
5035 larl temp, sym@GOTENT
5036 lg <target>, 0(temp) */
5038 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
5040 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
5041 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
5043 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
5044 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5045 emit_move_insn (temp, new_rtx);
5046 new_rtx = gen_const_mem (Pmode, temp);
5047 emit_move_insn (reg, new_rtx);
5049 new_rtx = reg;
5052 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
5054 gcc_assert (XVECLEN (addr, 0) == 1);
5055 switch (XINT (addr, 1))
5057 /* These address symbols (or PLT slots) relative to the GOT
5058 (not GOT slots!). In general this will exceed the
5059 displacement range so these value belong into the literal
5060 pool. */
5061 case UNSPEC_GOTOFF:
5062 case UNSPEC_PLTOFF:
5063 new_rtx = force_const_mem (Pmode, orig);
5064 break;
5066 /* For -fPIC the GOT size might exceed the displacement
5067 range so make sure the value is in the literal pool. */
5068 case UNSPEC_GOT:
5069 if (flag_pic == 2)
5070 new_rtx = force_const_mem (Pmode, orig);
5071 break;
5073 /* For @GOTENT larl is used. This is handled like local
5074 symbol refs. */
5075 case UNSPEC_GOTENT:
5076 gcc_unreachable ();
5077 break;
5079 /* For @PLT larl is used. This is handled like local
5080 symbol refs. */
5081 case UNSPEC_PLT:
5082 gcc_unreachable ();
5083 break;
5085 /* Everything else cannot happen. */
5086 default:
5087 gcc_unreachable ();
5090 else if (addend != const0_rtx)
5092 /* Otherwise, compute the sum. */
5094 rtx base = legitimize_pic_address (addr, reg);
5095 new_rtx = legitimize_pic_address (addend,
5096 base == reg ? NULL_RTX : reg);
5097 if (GET_CODE (new_rtx) == CONST_INT)
5098 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
5099 else
5101 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
5103 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
5104 new_rtx = XEXP (new_rtx, 1);
5106 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
5109 if (GET_CODE (new_rtx) == CONST)
5110 new_rtx = XEXP (new_rtx, 0);
5111 new_rtx = force_operand (new_rtx, 0);
5114 return new_rtx;
5117 /* Load the thread pointer into a register. */
5120 s390_get_thread_pointer (void)
5122 rtx tp = gen_reg_rtx (Pmode);
5124 emit_insn (gen_get_thread_pointer (Pmode, tp));
5126 mark_reg_pointer (tp, BITS_PER_WORD);
5128 return tp;
5131 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
5132 in s390_tls_symbol which always refers to __tls_get_offset.
5133 The returned offset is written to RESULT_REG and an USE rtx is
5134 generated for TLS_CALL. */
5136 static GTY(()) rtx s390_tls_symbol;
5138 static void
5139 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
5141 rtx insn;
5143 if (!flag_pic)
5144 emit_insn (s390_load_got ());
5146 if (!s390_tls_symbol)
5147 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
5149 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
5150 gen_rtx_REG (Pmode, RETURN_REGNUM));
5152 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
5153 RTL_CONST_CALL_P (insn) = 1;
5156 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
5157 this (thread-local) address. REG may be used as temporary. */
5159 static rtx
5160 legitimize_tls_address (rtx addr, rtx reg)
5162 rtx new_rtx, tls_call, temp, base, r2;
5163 rtx_insn *insn;
5165 if (GET_CODE (addr) == SYMBOL_REF)
5166 switch (tls_symbolic_operand (addr))
5168 case TLS_MODEL_GLOBAL_DYNAMIC:
5169 start_sequence ();
5170 r2 = gen_rtx_REG (Pmode, 2);
5171 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
5172 new_rtx = gen_rtx_CONST (Pmode, tls_call);
5173 new_rtx = force_const_mem (Pmode, new_rtx);
5174 emit_move_insn (r2, new_rtx);
5175 s390_emit_tls_call_insn (r2, tls_call);
5176 insn = get_insns ();
5177 end_sequence ();
5179 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5180 temp = gen_reg_rtx (Pmode);
5181 emit_libcall_block (insn, temp, r2, new_rtx);
5183 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5184 if (reg != 0)
5186 s390_load_address (reg, new_rtx);
5187 new_rtx = reg;
5189 break;
5191 case TLS_MODEL_LOCAL_DYNAMIC:
5192 start_sequence ();
5193 r2 = gen_rtx_REG (Pmode, 2);
5194 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
5195 new_rtx = gen_rtx_CONST (Pmode, tls_call);
5196 new_rtx = force_const_mem (Pmode, new_rtx);
5197 emit_move_insn (r2, new_rtx);
5198 s390_emit_tls_call_insn (r2, tls_call);
5199 insn = get_insns ();
5200 end_sequence ();
5202 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
5203 temp = gen_reg_rtx (Pmode);
5204 emit_libcall_block (insn, temp, r2, new_rtx);
5206 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5207 base = gen_reg_rtx (Pmode);
5208 s390_load_address (base, new_rtx);
5210 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
5211 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5212 new_rtx = force_const_mem (Pmode, new_rtx);
5213 temp = gen_reg_rtx (Pmode);
5214 emit_move_insn (temp, new_rtx);
5216 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
5217 if (reg != 0)
5219 s390_load_address (reg, new_rtx);
5220 new_rtx = reg;
5222 break;
5224 case TLS_MODEL_INITIAL_EXEC:
5225 if (flag_pic == 1)
5227 /* Assume GOT offset < 4k. This is handled the same way
5228 in both 31- and 64-bit code. */
5230 if (reload_in_progress || reload_completed)
5231 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5233 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5234 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5235 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5236 new_rtx = gen_const_mem (Pmode, new_rtx);
5237 temp = gen_reg_rtx (Pmode);
5238 emit_move_insn (temp, new_rtx);
5240 else
5242 /* If the GOT offset might be >= 4k, we determine the position
5243 of the GOT entry via a PC-relative LARL. */
5245 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5246 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5247 temp = gen_reg_rtx (Pmode);
5248 emit_move_insn (temp, new_rtx);
5250 new_rtx = gen_const_mem (Pmode, temp);
5251 temp = gen_reg_rtx (Pmode);
5252 emit_move_insn (temp, new_rtx);
5255 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5256 if (reg != 0)
5258 s390_load_address (reg, new_rtx);
5259 new_rtx = reg;
5261 break;
5263 case TLS_MODEL_LOCAL_EXEC:
5264 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5265 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5266 new_rtx = force_const_mem (Pmode, new_rtx);
5267 temp = gen_reg_rtx (Pmode);
5268 emit_move_insn (temp, new_rtx);
5270 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5271 if (reg != 0)
5273 s390_load_address (reg, new_rtx);
5274 new_rtx = reg;
5276 break;
5278 default:
5279 gcc_unreachable ();
5282 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
5284 switch (XINT (XEXP (addr, 0), 1))
5286 case UNSPEC_NTPOFF:
5287 case UNSPEC_INDNTPOFF:
5288 new_rtx = addr;
5289 break;
5291 default:
5292 gcc_unreachable ();
5296 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5297 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5299 new_rtx = XEXP (XEXP (addr, 0), 0);
5300 if (GET_CODE (new_rtx) != SYMBOL_REF)
5301 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5303 new_rtx = legitimize_tls_address (new_rtx, reg);
5304 new_rtx = plus_constant (Pmode, new_rtx,
5305 INTVAL (XEXP (XEXP (addr, 0), 1)));
5306 new_rtx = force_operand (new_rtx, 0);
5309 /* (const (neg (unspec (symbol_ref)))) -> (neg (const (unspec (symbol_ref)))) */
5310 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == NEG)
5312 new_rtx = XEXP (XEXP (addr, 0), 0);
5313 if (GET_CODE (new_rtx) != SYMBOL_REF)
5314 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5316 new_rtx = legitimize_tls_address (new_rtx, reg);
5317 new_rtx = gen_rtx_NEG (Pmode, new_rtx);
5318 new_rtx = force_operand (new_rtx, 0);
5321 else
5322 gcc_unreachable (); /* for now ... */
5324 return new_rtx;
5327 /* Emit insns making the address in operands[1] valid for a standard
5328 move to operands[0]. operands[1] is replaced by an address which
5329 should be used instead of the former RTX to emit the move
5330 pattern. */
5332 void
5333 emit_symbolic_move (rtx *operands)
5335 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5337 if (GET_CODE (operands[0]) == MEM)
5338 operands[1] = force_reg (Pmode, operands[1]);
5339 else if (TLS_SYMBOLIC_CONST (operands[1]))
5340 operands[1] = legitimize_tls_address (operands[1], temp);
5341 else if (flag_pic)
5342 operands[1] = legitimize_pic_address (operands[1], temp);
5345 /* Try machine-dependent ways of modifying an illegitimate address X
5346 to be legitimate. If we find one, return the new, valid address.
5348 OLDX is the address as it was before break_out_memory_refs was called.
5349 In some cases it is useful to look at this to decide what needs to be done.
5351 MODE is the mode of the operand pointed to by X.
5353 When -fpic is used, special handling is needed for symbolic references.
5354 See comments by legitimize_pic_address for details. */
5356 static rtx
5357 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5358 machine_mode mode ATTRIBUTE_UNUSED)
5360 rtx constant_term = const0_rtx;
5362 if (TLS_SYMBOLIC_CONST (x))
5364 x = legitimize_tls_address (x, 0);
5366 if (s390_legitimate_address_p (mode, x, FALSE))
5367 return x;
5369 else if (GET_CODE (x) == PLUS
5370 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5371 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5373 return x;
5375 else if (flag_pic)
5377 if (SYMBOLIC_CONST (x)
5378 || (GET_CODE (x) == PLUS
5379 && (SYMBOLIC_CONST (XEXP (x, 0))
5380 || SYMBOLIC_CONST (XEXP (x, 1)))))
5381 x = legitimize_pic_address (x, 0);
5383 if (s390_legitimate_address_p (mode, x, FALSE))
5384 return x;
5387 x = eliminate_constant_term (x, &constant_term);
5389 /* Optimize loading of large displacements by splitting them
5390 into the multiple of 4K and the rest; this allows the
5391 former to be CSE'd if possible.
5393 Don't do this if the displacement is added to a register
5394 pointing into the stack frame, as the offsets will
5395 change later anyway. */
5397 if (GET_CODE (constant_term) == CONST_INT
5398 && !TARGET_LONG_DISPLACEMENT
5399 && !DISP_IN_RANGE (INTVAL (constant_term))
5400 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5402 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5403 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5405 rtx temp = gen_reg_rtx (Pmode);
5406 rtx val = force_operand (GEN_INT (upper), temp);
5407 if (val != temp)
5408 emit_move_insn (temp, val);
5410 x = gen_rtx_PLUS (Pmode, x, temp);
5411 constant_term = GEN_INT (lower);
5414 if (GET_CODE (x) == PLUS)
5416 if (GET_CODE (XEXP (x, 0)) == REG)
5418 rtx temp = gen_reg_rtx (Pmode);
5419 rtx val = force_operand (XEXP (x, 1), temp);
5420 if (val != temp)
5421 emit_move_insn (temp, val);
5423 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5426 else if (GET_CODE (XEXP (x, 1)) == REG)
5428 rtx temp = gen_reg_rtx (Pmode);
5429 rtx val = force_operand (XEXP (x, 0), temp);
5430 if (val != temp)
5431 emit_move_insn (temp, val);
5433 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5437 if (constant_term != const0_rtx)
5438 x = gen_rtx_PLUS (Pmode, x, constant_term);
5440 return x;
5443 /* Try a machine-dependent way of reloading an illegitimate address AD
5444 operand. If we find one, push the reload and return the new address.
5446 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5447 and TYPE is the reload type of the current reload. */
5450 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5451 int opnum, int type)
5453 if (!optimize || TARGET_LONG_DISPLACEMENT)
5454 return NULL_RTX;
5456 if (GET_CODE (ad) == PLUS)
5458 rtx tem = simplify_binary_operation (PLUS, Pmode,
5459 XEXP (ad, 0), XEXP (ad, 1));
5460 if (tem)
5461 ad = tem;
5464 if (GET_CODE (ad) == PLUS
5465 && GET_CODE (XEXP (ad, 0)) == REG
5466 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5467 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5469 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5470 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5471 rtx cst, tem, new_rtx;
5473 cst = GEN_INT (upper);
5474 if (!legitimate_reload_constant_p (cst))
5475 cst = force_const_mem (Pmode, cst);
5477 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5478 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5480 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5481 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5482 opnum, (enum reload_type) type);
5483 return new_rtx;
5486 return NULL_RTX;
5489 /* Emit code to move LEN bytes from DST to SRC. */
5491 bool
5492 s390_expand_cpymem (rtx dst, rtx src, rtx len)
5494 /* When tuning for z10 or higher we rely on the Glibc functions to
5495 do the right thing. Only for constant lengths below 64k we will
5496 generate inline code. */
5497 if (s390_tune >= PROCESSOR_2097_Z10
5498 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5499 return false;
5501 /* Expand memcpy for constant length operands without a loop if it
5502 is shorter that way.
5504 With a constant length argument a
5505 memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
5506 if (GET_CODE (len) == CONST_INT
5507 && INTVAL (len) >= 0
5508 && INTVAL (len) <= 256 * 6
5509 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5511 HOST_WIDE_INT o, l;
5513 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5515 rtx newdst = adjust_address (dst, BLKmode, o);
5516 rtx newsrc = adjust_address (src, BLKmode, o);
5517 emit_insn (gen_cpymem_short (newdst, newsrc,
5518 GEN_INT (l > 256 ? 255 : l - 1)));
5522 else if (TARGET_MVCLE)
5524 emit_insn (gen_cpymem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5527 else
5529 rtx dst_addr, src_addr, count, blocks, temp;
5530 rtx_code_label *loop_start_label = gen_label_rtx ();
5531 rtx_code_label *loop_end_label = gen_label_rtx ();
5532 rtx_code_label *end_label = gen_label_rtx ();
5533 machine_mode mode;
5535 mode = GET_MODE (len);
5536 if (mode == VOIDmode)
5537 mode = Pmode;
5539 dst_addr = gen_reg_rtx (Pmode);
5540 src_addr = gen_reg_rtx (Pmode);
5541 count = gen_reg_rtx (mode);
5542 blocks = gen_reg_rtx (mode);
5544 convert_move (count, len, 1);
5545 emit_cmp_and_jump_insns (count, const0_rtx,
5546 EQ, NULL_RTX, mode, 1, end_label);
5548 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5549 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5550 dst = change_address (dst, VOIDmode, dst_addr);
5551 src = change_address (src, VOIDmode, src_addr);
5553 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5554 OPTAB_DIRECT);
5555 if (temp != count)
5556 emit_move_insn (count, temp);
5558 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5559 OPTAB_DIRECT);
5560 if (temp != blocks)
5561 emit_move_insn (blocks, temp);
5563 emit_cmp_and_jump_insns (blocks, const0_rtx,
5564 EQ, NULL_RTX, mode, 1, loop_end_label);
5566 emit_label (loop_start_label);
5568 if (TARGET_Z10
5569 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5571 rtx prefetch;
5573 /* Issue a read prefetch for the +3 cache line. */
5574 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5575 const0_rtx, const0_rtx);
5576 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5577 emit_insn (prefetch);
5579 /* Issue a write prefetch for the +3 cache line. */
5580 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5581 const1_rtx, const0_rtx);
5582 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5583 emit_insn (prefetch);
5586 emit_insn (gen_cpymem_short (dst, src, GEN_INT (255)));
5587 s390_load_address (dst_addr,
5588 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5589 s390_load_address (src_addr,
5590 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5592 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5593 OPTAB_DIRECT);
5594 if (temp != blocks)
5595 emit_move_insn (blocks, temp);
5597 emit_cmp_and_jump_insns (blocks, const0_rtx,
5598 EQ, NULL_RTX, mode, 1, loop_end_label);
5600 emit_jump (loop_start_label);
5601 emit_label (loop_end_label);
5603 emit_insn (gen_cpymem_short (dst, src,
5604 convert_to_mode (Pmode, count, 1)));
5605 emit_label (end_label);
5607 return true;
5610 /* Emit code to set LEN bytes at DST to VAL.
5611 Make use of clrmem if VAL is zero. */
5613 void
5614 s390_expand_setmem (rtx dst, rtx len, rtx val)
5616 if (GET_CODE (len) == CONST_INT && INTVAL (len) <= 0)
5617 return;
5619 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5621 /* Expand setmem/clrmem for a constant length operand without a
5622 loop if it will be shorter that way.
5623 clrmem loop (with PFD) is 30 bytes -> 5 * xc
5624 clrmem loop (without PFD) is 24 bytes -> 4 * xc
5625 setmem loop (with PFD) is 38 bytes -> ~4 * (mvi/stc + mvc)
5626 setmem loop (without PFD) is 32 bytes -> ~4 * (mvi/stc + mvc) */
5627 if (GET_CODE (len) == CONST_INT
5628 && ((val == const0_rtx
5629 && (INTVAL (len) <= 256 * 4
5630 || (INTVAL (len) <= 256 * 5 && TARGET_SETMEM_PFD(val,len))))
5631 || (val != const0_rtx && INTVAL (len) <= 257 * 4))
5632 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5634 HOST_WIDE_INT o, l;
5636 if (val == const0_rtx)
5637 /* clrmem: emit 256 byte blockwise XCs. */
5638 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5640 rtx newdst = adjust_address (dst, BLKmode, o);
5641 emit_insn (gen_clrmem_short (newdst,
5642 GEN_INT (l > 256 ? 255 : l - 1)));
5644 else
5645 /* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5646 setting first byte to val and using a 256 byte mvc with one
5647 byte overlap to propagate the byte. */
5648 for (l = INTVAL (len), o = 0; l > 0; l -= 257, o += 257)
5650 rtx newdst = adjust_address (dst, BLKmode, o);
5651 emit_move_insn (adjust_address (dst, QImode, o), val);
5652 if (l > 1)
5654 rtx newdstp1 = adjust_address (dst, BLKmode, o + 1);
5655 emit_insn (gen_cpymem_short (newdstp1, newdst,
5656 GEN_INT (l > 257 ? 255 : l - 2)));
5661 else if (TARGET_MVCLE)
5663 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5664 if (TARGET_64BIT)
5665 emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5666 val));
5667 else
5668 emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5669 val));
5672 else
5674 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5675 rtx_code_label *loop_start_label = gen_label_rtx ();
5676 rtx_code_label *onebyte_end_label = gen_label_rtx ();
5677 rtx_code_label *zerobyte_end_label = gen_label_rtx ();
5678 rtx_code_label *restbyte_end_label = gen_label_rtx ();
5679 machine_mode mode;
5681 mode = GET_MODE (len);
5682 if (mode == VOIDmode)
5683 mode = Pmode;
5685 dst_addr = gen_reg_rtx (Pmode);
5686 count = gen_reg_rtx (mode);
5687 blocks = gen_reg_rtx (mode);
5689 convert_move (count, len, 1);
5690 emit_cmp_and_jump_insns (count, const0_rtx,
5691 EQ, NULL_RTX, mode, 1, zerobyte_end_label,
5692 profile_probability::very_unlikely ());
5694 /* We need to make a copy of the target address since memset is
5695 supposed to return it unmodified. We have to make it here
5696 already since the new reg is used at onebyte_end_label. */
5697 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5698 dst = change_address (dst, VOIDmode, dst_addr);
5700 if (val != const0_rtx)
5702 /* When using the overlapping mvc the original target
5703 address is only accessed as single byte entity (even by
5704 the mvc reading this value). */
5705 set_mem_size (dst, 1);
5706 dstp1 = adjust_address (dst, VOIDmode, 1);
5707 emit_cmp_and_jump_insns (count,
5708 const1_rtx, EQ, NULL_RTX, mode, 1,
5709 onebyte_end_label,
5710 profile_probability::very_unlikely ());
5713 /* There is one unconditional (mvi+mvc)/xc after the loop
5714 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5715 or one (xc) here leaves this number of bytes to be handled by
5716 it. */
5717 temp = expand_binop (mode, add_optab, count,
5718 val == const0_rtx ? constm1_rtx : GEN_INT (-2),
5719 count, 1, OPTAB_DIRECT);
5720 if (temp != count)
5721 emit_move_insn (count, temp);
5723 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5724 OPTAB_DIRECT);
5725 if (temp != blocks)
5726 emit_move_insn (blocks, temp);
5728 emit_cmp_and_jump_insns (blocks, const0_rtx,
5729 EQ, NULL_RTX, mode, 1, restbyte_end_label);
5731 emit_jump (loop_start_label);
5733 if (val != const0_rtx)
5735 /* The 1 byte != 0 special case. Not handled efficiently
5736 since we require two jumps for that. However, this
5737 should be very rare. */
5738 emit_label (onebyte_end_label);
5739 emit_move_insn (adjust_address (dst, QImode, 0), val);
5740 emit_jump (zerobyte_end_label);
5743 emit_label (loop_start_label);
5745 if (TARGET_SETMEM_PFD (val, len))
5747 /* Issue a write prefetch. */
5748 rtx distance = GEN_INT (TARGET_SETMEM_PREFETCH_DISTANCE);
5749 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, distance),
5750 const1_rtx, const0_rtx);
5751 emit_insn (prefetch);
5752 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5755 if (val == const0_rtx)
5756 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5757 else
5759 /* Set the first byte in the block to the value and use an
5760 overlapping mvc for the block. */
5761 emit_move_insn (adjust_address (dst, QImode, 0), val);
5762 emit_insn (gen_cpymem_short (dstp1, dst, GEN_INT (254)));
5764 s390_load_address (dst_addr,
5765 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5767 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5768 OPTAB_DIRECT);
5769 if (temp != blocks)
5770 emit_move_insn (blocks, temp);
5772 emit_cmp_and_jump_insns (blocks, const0_rtx,
5773 NE, NULL_RTX, mode, 1, loop_start_label);
5775 emit_label (restbyte_end_label);
5777 if (val == const0_rtx)
5778 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5779 else
5781 /* Set the first byte in the block to the value and use an
5782 overlapping mvc for the block. */
5783 emit_move_insn (adjust_address (dst, QImode, 0), val);
5784 /* execute only uses the lowest 8 bits of count that's
5785 exactly what we need here. */
5786 emit_insn (gen_cpymem_short (dstp1, dst,
5787 convert_to_mode (Pmode, count, 1)));
5790 emit_label (zerobyte_end_label);
5794 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5795 and return the result in TARGET. */
5797 bool
5798 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5800 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5801 rtx tmp;
5803 /* When tuning for z10 or higher we rely on the Glibc functions to
5804 do the right thing. Only for constant lengths below 64k we will
5805 generate inline code. */
5806 if (s390_tune >= PROCESSOR_2097_Z10
5807 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5808 return false;
5810 /* As the result of CMPINT is inverted compared to what we need,
5811 we have to swap the operands. */
5812 tmp = op0; op0 = op1; op1 = tmp;
5814 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5816 if (INTVAL (len) > 0)
5818 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5819 emit_insn (gen_cmpint (target, ccreg));
5821 else
5822 emit_move_insn (target, const0_rtx);
5824 else if (TARGET_MVCLE)
5826 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5827 emit_insn (gen_cmpint (target, ccreg));
5829 else
5831 rtx addr0, addr1, count, blocks, temp;
5832 rtx_code_label *loop_start_label = gen_label_rtx ();
5833 rtx_code_label *loop_end_label = gen_label_rtx ();
5834 rtx_code_label *end_label = gen_label_rtx ();
5835 machine_mode mode;
5837 mode = GET_MODE (len);
5838 if (mode == VOIDmode)
5839 mode = Pmode;
5841 addr0 = gen_reg_rtx (Pmode);
5842 addr1 = gen_reg_rtx (Pmode);
5843 count = gen_reg_rtx (mode);
5844 blocks = gen_reg_rtx (mode);
5846 convert_move (count, len, 1);
5847 emit_cmp_and_jump_insns (count, const0_rtx,
5848 EQ, NULL_RTX, mode, 1, end_label);
5850 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5851 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5852 op0 = change_address (op0, VOIDmode, addr0);
5853 op1 = change_address (op1, VOIDmode, addr1);
5855 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5856 OPTAB_DIRECT);
5857 if (temp != count)
5858 emit_move_insn (count, temp);
5860 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5861 OPTAB_DIRECT);
5862 if (temp != blocks)
5863 emit_move_insn (blocks, temp);
5865 emit_cmp_and_jump_insns (blocks, const0_rtx,
5866 EQ, NULL_RTX, mode, 1, loop_end_label);
5868 emit_label (loop_start_label);
5870 if (TARGET_Z10
5871 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5873 rtx prefetch;
5875 /* Issue a read prefetch for the +2 cache line of operand 1. */
5876 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5877 const0_rtx, const0_rtx);
5878 emit_insn (prefetch);
5879 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5881 /* Issue a read prefetch for the +2 cache line of operand 2. */
5882 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5883 const0_rtx, const0_rtx);
5884 emit_insn (prefetch);
5885 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5888 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5889 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5890 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5891 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5892 temp = gen_rtx_SET (pc_rtx, temp);
5893 emit_jump_insn (temp);
5895 s390_load_address (addr0,
5896 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5897 s390_load_address (addr1,
5898 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5900 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5901 OPTAB_DIRECT);
5902 if (temp != blocks)
5903 emit_move_insn (blocks, temp);
5905 emit_cmp_and_jump_insns (blocks, const0_rtx,
5906 EQ, NULL_RTX, mode, 1, loop_end_label);
5908 emit_jump (loop_start_label);
5909 emit_label (loop_end_label);
5911 emit_insn (gen_cmpmem_short (op0, op1,
5912 convert_to_mode (Pmode, count, 1)));
5913 emit_label (end_label);
5915 emit_insn (gen_cmpint (target, ccreg));
5917 return true;
5920 /* Emit a conditional jump to LABEL for condition code mask MASK using
5921 comparsion operator COMPARISON. Return the emitted jump insn. */
5923 static rtx_insn *
5924 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5926 rtx temp;
5928 gcc_assert (comparison == EQ || comparison == NE);
5929 gcc_assert (mask > 0 && mask < 15);
5931 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5932 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5933 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5934 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5935 temp = gen_rtx_SET (pc_rtx, temp);
5936 return emit_jump_insn (temp);
5939 /* Emit the instructions to implement strlen of STRING and store the
5940 result in TARGET. The string has the known ALIGNMENT. This
5941 version uses vector instructions and is therefore not appropriate
5942 for targets prior to z13. */
5944 void
5945 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5947 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5948 rtx str_reg = gen_reg_rtx (V16QImode);
5949 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5950 rtx str_idx_reg = gen_reg_rtx (Pmode);
5951 rtx result_reg = gen_reg_rtx (V16QImode);
5952 rtx is_aligned_label = gen_label_rtx ();
5953 rtx into_loop_label = NULL_RTX;
5954 rtx loop_start_label = gen_label_rtx ();
5955 rtx temp;
5956 rtx len = gen_reg_rtx (QImode);
5957 rtx cond;
5958 rtx mem;
5960 s390_load_address (str_addr_base_reg, XEXP (string, 0));
5961 emit_move_insn (str_idx_reg, const0_rtx);
5963 if (INTVAL (alignment) < 16)
5965 /* Check whether the address happens to be aligned properly so
5966 jump directly to the aligned loop. */
5967 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5968 str_addr_base_reg, GEN_INT (15)),
5969 const0_rtx, EQ, NULL_RTX,
5970 Pmode, 1, is_aligned_label);
5972 temp = gen_reg_rtx (Pmode);
5973 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5974 GEN_INT (15), temp, 1, OPTAB_DIRECT);
5975 gcc_assert (REG_P (temp));
5976 highest_index_to_load_reg =
5977 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5978 highest_index_to_load_reg, 1, OPTAB_DIRECT);
5979 gcc_assert (REG_P (highest_index_to_load_reg));
5980 emit_insn (gen_vllv16qi (str_reg,
5981 convert_to_mode (SImode, highest_index_to_load_reg, 1),
5982 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5984 into_loop_label = gen_label_rtx ();
5985 s390_emit_jump (into_loop_label, NULL_RTX);
5986 emit_barrier ();
5989 emit_label (is_aligned_label);
5990 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5992 /* Reaching this point we are only performing 16 bytes aligned
5993 loads. */
5994 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5996 emit_label (loop_start_label);
5997 LABEL_NUSES (loop_start_label) = 1;
5999 /* Load 16 bytes of the string into VR. */
6000 mem = gen_rtx_MEM (V16QImode,
6001 gen_rtx_PLUS (Pmode, str_idx_reg, str_addr_base_reg));
6002 set_mem_align (mem, 128);
6003 emit_move_insn (str_reg, mem);
6004 if (into_loop_label != NULL_RTX)
6006 emit_label (into_loop_label);
6007 LABEL_NUSES (into_loop_label) = 1;
6010 /* Increment string index by 16 bytes. */
6011 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
6012 str_idx_reg, 1, OPTAB_DIRECT);
6014 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
6015 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
6017 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
6018 REG_BR_PROB,
6019 profile_probability::very_likely ().to_reg_br_prob_note ());
6020 emit_insn (gen_vec_extractv16qiqi (len, result_reg, GEN_INT (7)));
6022 /* If the string pointer wasn't aligned we have loaded less then 16
6023 bytes and the remaining bytes got filled with zeros (by vll).
6024 Now we have to check whether the resulting index lies within the
6025 bytes actually part of the string. */
6027 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
6028 highest_index_to_load_reg);
6029 s390_load_address (highest_index_to_load_reg,
6030 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
6031 const1_rtx));
6032 if (TARGET_64BIT)
6033 emit_insn (gen_movdicc (str_idx_reg, cond,
6034 highest_index_to_load_reg, str_idx_reg));
6035 else
6036 emit_insn (gen_movsicc (str_idx_reg, cond,
6037 highest_index_to_load_reg, str_idx_reg));
6039 add_reg_br_prob_note (s390_emit_jump (is_aligned_label, cond),
6040 profile_probability::very_unlikely ());
6042 expand_binop (Pmode, add_optab, str_idx_reg,
6043 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
6044 /* FIXME: len is already zero extended - so avoid the llgcr emitted
6045 here. */
6046 temp = expand_binop (Pmode, add_optab, str_idx_reg,
6047 convert_to_mode (Pmode, len, 1),
6048 target, 1, OPTAB_DIRECT);
6049 if (temp != target)
6050 emit_move_insn (target, temp);
6053 void
6054 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
6056 rtx temp = gen_reg_rtx (Pmode);
6057 rtx src_addr = XEXP (src, 0);
6058 rtx dst_addr = XEXP (dst, 0);
6059 rtx src_addr_reg = gen_reg_rtx (Pmode);
6060 rtx dst_addr_reg = gen_reg_rtx (Pmode);
6061 rtx offset = gen_reg_rtx (Pmode);
6062 rtx vsrc = gen_reg_rtx (V16QImode);
6063 rtx vpos = gen_reg_rtx (V16QImode);
6064 rtx loadlen = gen_reg_rtx (SImode);
6065 rtx gpos_qi = gen_reg_rtx(QImode);
6066 rtx gpos = gen_reg_rtx (SImode);
6067 rtx done_label = gen_label_rtx ();
6068 rtx loop_label = gen_label_rtx ();
6069 rtx exit_label = gen_label_rtx ();
6070 rtx full_label = gen_label_rtx ();
6072 /* Perform a quick check for string ending on the first up to 16
6073 bytes and exit early if successful. */
6075 emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
6076 emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
6077 emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
6078 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6079 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6080 /* gpos is the byte index if a zero was found and 16 otherwise.
6081 So if it is lower than the loaded bytes we have a hit. */
6082 emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
6083 full_label);
6084 emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
6086 force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
6087 1, OPTAB_DIRECT);
6088 emit_jump (exit_label);
6089 emit_barrier ();
6091 emit_label (full_label);
6092 LABEL_NUSES (full_label) = 1;
6094 /* Calculate `offset' so that src + offset points to the last byte
6095 before 16 byte alignment. */
6097 /* temp = src_addr & 0xf */
6098 force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
6099 1, OPTAB_DIRECT);
6101 /* offset = 0xf - temp */
6102 emit_move_insn (offset, GEN_INT (15));
6103 force_expand_binop (Pmode, sub_optab, offset, temp, offset,
6104 1, OPTAB_DIRECT);
6106 /* Store `offset' bytes in the dstination string. The quick check
6107 has loaded at least `offset' bytes into vsrc. */
6109 emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
6111 /* Advance to the next byte to be loaded. */
6112 force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
6113 1, OPTAB_DIRECT);
6115 /* Make sure the addresses are single regs which can be used as a
6116 base. */
6117 emit_move_insn (src_addr_reg, src_addr);
6118 emit_move_insn (dst_addr_reg, dst_addr);
6120 /* MAIN LOOP */
6122 emit_label (loop_label);
6123 LABEL_NUSES (loop_label) = 1;
6125 emit_move_insn (vsrc,
6126 gen_rtx_MEM (V16QImode,
6127 gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
6129 emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
6130 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
6131 add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
6132 REG_BR_PROB, profile_probability::very_unlikely ()
6133 .to_reg_br_prob_note ());
6135 emit_move_insn (gen_rtx_MEM (V16QImode,
6136 gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
6137 vsrc);
6138 /* offset += 16 */
6139 force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
6140 offset, 1, OPTAB_DIRECT);
6142 emit_jump (loop_label);
6143 emit_barrier ();
6145 /* REGULAR EXIT */
6147 /* We are done. Add the offset of the zero character to the dst_addr
6148 pointer to get the result. */
6150 emit_label (done_label);
6151 LABEL_NUSES (done_label) = 1;
6153 force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
6154 1, OPTAB_DIRECT);
6156 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6157 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6159 emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
6161 force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
6162 1, OPTAB_DIRECT);
6164 /* EARLY EXIT */
6166 emit_label (exit_label);
6167 LABEL_NUSES (exit_label) = 1;
6171 /* Expand conditional increment or decrement using alc/slb instructions.
6172 Should generate code setting DST to either SRC or SRC + INCREMENT,
6173 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
6174 Returns true if successful, false otherwise.
6176 That makes it possible to implement some if-constructs without jumps e.g.:
6177 (borrow = CC0 | CC1 and carry = CC2 | CC3)
6178 unsigned int a, b, c;
6179 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
6180 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
6181 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
6182 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
6184 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
6185 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
6186 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
6187 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
6188 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
6190 bool
6191 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
6192 rtx dst, rtx src, rtx increment)
6194 machine_mode cmp_mode;
6195 machine_mode cc_mode;
6196 rtx op_res;
6197 rtx insn;
6198 rtvec p;
6199 int ret;
6201 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
6202 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
6203 cmp_mode = SImode;
6204 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
6205 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
6206 cmp_mode = DImode;
6207 else
6208 return false;
6210 /* Try ADD LOGICAL WITH CARRY. */
6211 if (increment == const1_rtx)
6213 /* Determine CC mode to use. */
6214 if (cmp_code == EQ || cmp_code == NE)
6216 if (cmp_op1 != const0_rtx)
6218 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6219 NULL_RTX, 0, OPTAB_WIDEN);
6220 cmp_op1 = const0_rtx;
6223 cmp_code = cmp_code == EQ ? LEU : GTU;
6226 if (cmp_code == LTU || cmp_code == LEU)
6228 rtx tem = cmp_op0;
6229 cmp_op0 = cmp_op1;
6230 cmp_op1 = tem;
6231 cmp_code = swap_condition (cmp_code);
6234 switch (cmp_code)
6236 case GTU:
6237 cc_mode = CCUmode;
6238 break;
6240 case GEU:
6241 cc_mode = CCL3mode;
6242 break;
6244 default:
6245 return false;
6248 /* Emit comparison instruction pattern. */
6249 if (!register_operand (cmp_op0, cmp_mode))
6250 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6252 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6253 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6254 /* We use insn_invalid_p here to add clobbers if required. */
6255 ret = insn_invalid_p (emit_insn (insn), false);
6256 gcc_assert (!ret);
6258 /* Emit ALC instruction pattern. */
6259 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6260 gen_rtx_REG (cc_mode, CC_REGNUM),
6261 const0_rtx);
6263 if (src != const0_rtx)
6265 if (!register_operand (src, GET_MODE (dst)))
6266 src = force_reg (GET_MODE (dst), src);
6268 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
6269 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
6272 p = rtvec_alloc (2);
6273 RTVEC_ELT (p, 0) =
6274 gen_rtx_SET (dst, op_res);
6275 RTVEC_ELT (p, 1) =
6276 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6277 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6279 return true;
6282 /* Try SUBTRACT LOGICAL WITH BORROW. */
6283 if (increment == constm1_rtx)
6285 /* Determine CC mode to use. */
6286 if (cmp_code == EQ || cmp_code == NE)
6288 if (cmp_op1 != const0_rtx)
6290 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6291 NULL_RTX, 0, OPTAB_WIDEN);
6292 cmp_op1 = const0_rtx;
6295 cmp_code = cmp_code == EQ ? LEU : GTU;
6298 if (cmp_code == GTU || cmp_code == GEU)
6300 rtx tem = cmp_op0;
6301 cmp_op0 = cmp_op1;
6302 cmp_op1 = tem;
6303 cmp_code = swap_condition (cmp_code);
6306 switch (cmp_code)
6308 case LEU:
6309 cc_mode = CCUmode;
6310 break;
6312 case LTU:
6313 cc_mode = CCL3mode;
6314 break;
6316 default:
6317 return false;
6320 /* Emit comparison instruction pattern. */
6321 if (!register_operand (cmp_op0, cmp_mode))
6322 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6324 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6325 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6326 /* We use insn_invalid_p here to add clobbers if required. */
6327 ret = insn_invalid_p (emit_insn (insn), false);
6328 gcc_assert (!ret);
6330 /* Emit SLB instruction pattern. */
6331 if (!register_operand (src, GET_MODE (dst)))
6332 src = force_reg (GET_MODE (dst), src);
6334 op_res = gen_rtx_MINUS (GET_MODE (dst),
6335 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
6336 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6337 gen_rtx_REG (cc_mode, CC_REGNUM),
6338 const0_rtx));
6339 p = rtvec_alloc (2);
6340 RTVEC_ELT (p, 0) =
6341 gen_rtx_SET (dst, op_res);
6342 RTVEC_ELT (p, 1) =
6343 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6344 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6346 return true;
6349 return false;
6352 /* Expand code for the insv template. Return true if successful. */
6354 bool
6355 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6357 int bitsize = INTVAL (op1);
6358 int bitpos = INTVAL (op2);
6359 machine_mode mode = GET_MODE (dest);
6360 machine_mode smode;
6361 int smode_bsize, mode_bsize;
6362 rtx op, clobber;
6364 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6365 return false;
6367 /* Generate INSERT IMMEDIATE (IILL et al). */
6368 /* (set (ze (reg)) (const_int)). */
6369 if (TARGET_ZARCH
6370 && register_operand (dest, word_mode)
6371 && (bitpos % 16) == 0
6372 && (bitsize % 16) == 0
6373 && const_int_operand (src, VOIDmode))
6375 HOST_WIDE_INT val = INTVAL (src);
6376 int regpos = bitpos + bitsize;
6378 while (regpos > bitpos)
6380 machine_mode putmode;
6381 int putsize;
6383 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6384 putmode = SImode;
6385 else
6386 putmode = HImode;
6388 putsize = GET_MODE_BITSIZE (putmode);
6389 regpos -= putsize;
6390 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6391 GEN_INT (putsize),
6392 GEN_INT (regpos)),
6393 gen_int_mode (val, putmode));
6394 val >>= putsize;
6396 gcc_assert (regpos == bitpos);
6397 return true;
6400 smode = smallest_int_mode_for_size (bitsize);
6401 smode_bsize = GET_MODE_BITSIZE (smode);
6402 mode_bsize = GET_MODE_BITSIZE (mode);
6404 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6405 if (bitpos == 0
6406 && (bitsize % BITS_PER_UNIT) == 0
6407 && MEM_P (dest)
6408 && (register_operand (src, word_mode)
6409 || const_int_operand (src, VOIDmode)))
6411 /* Emit standard pattern if possible. */
6412 if (smode_bsize == bitsize)
6414 emit_move_insn (adjust_address (dest, smode, 0),
6415 gen_lowpart (smode, src));
6416 return true;
6419 /* (set (ze (mem)) (const_int)). */
6420 else if (const_int_operand (src, VOIDmode))
6422 int size = bitsize / BITS_PER_UNIT;
6423 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6424 BLKmode,
6425 UNITS_PER_WORD - size);
6427 dest = adjust_address (dest, BLKmode, 0);
6428 set_mem_size (dest, size);
6429 s390_expand_cpymem (dest, src_mem, GEN_INT (size));
6430 return true;
6433 /* (set (ze (mem)) (reg)). */
6434 else if (register_operand (src, word_mode))
6436 if (bitsize <= 32)
6437 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6438 const0_rtx), src);
6439 else
6441 /* Emit st,stcmh sequence. */
6442 int stcmh_width = bitsize - 32;
6443 int size = stcmh_width / BITS_PER_UNIT;
6445 emit_move_insn (adjust_address (dest, SImode, size),
6446 gen_lowpart (SImode, src));
6447 set_mem_size (dest, size);
6448 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6449 GEN_INT (stcmh_width),
6450 const0_rtx),
6451 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6453 return true;
6457 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6458 if ((bitpos % BITS_PER_UNIT) == 0
6459 && (bitsize % BITS_PER_UNIT) == 0
6460 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6461 && MEM_P (src)
6462 && (mode == DImode || mode == SImode)
6463 && register_operand (dest, mode))
6465 /* Emit a strict_low_part pattern if possible. */
6466 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6468 rtx low_dest = gen_lowpart (smode, dest);
6469 rtx low_src = gen_lowpart (smode, src);
6471 switch (smode)
6473 case E_QImode: emit_insn (gen_movstrictqi (low_dest, low_src)); return true;
6474 case E_HImode: emit_insn (gen_movstricthi (low_dest, low_src)); return true;
6475 case E_SImode: emit_insn (gen_movstrictsi (low_dest, low_src)); return true;
6476 default: break;
6480 /* ??? There are more powerful versions of ICM that are not
6481 completely represented in the md file. */
6484 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6485 if (TARGET_Z10 && (mode == DImode || mode == SImode))
6487 machine_mode mode_s = GET_MODE (src);
6489 if (CONSTANT_P (src))
6491 /* For constant zero values the representation with AND
6492 appears to be folded in more situations than the (set
6493 (zero_extract) ...).
6494 We only do this when the start and end of the bitfield
6495 remain in the same SImode chunk. That way nihf or nilf
6496 can be used.
6497 The AND patterns might still generate a risbg for this. */
6498 if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
6499 return false;
6500 else
6501 src = force_reg (mode, src);
6503 else if (mode_s != mode)
6505 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6506 src = force_reg (mode_s, src);
6507 src = gen_lowpart (mode, src);
6510 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6511 op = gen_rtx_SET (op, src);
6513 if (!TARGET_ZEC12)
6515 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6516 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6518 emit_insn (op);
6520 return true;
6523 return false;
6526 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6527 register that holds VAL of mode MODE shifted by COUNT bits. */
6529 static inline rtx
6530 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6532 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6533 NULL_RTX, 1, OPTAB_DIRECT);
6534 return expand_simple_binop (SImode, ASHIFT, val, count,
6535 NULL_RTX, 1, OPTAB_DIRECT);
6538 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6539 the result in TARGET. */
6541 void
6542 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6543 rtx cmp_op1, rtx cmp_op2)
6545 machine_mode mode = GET_MODE (target);
6546 bool neg_p = false, swap_p = false;
6547 rtx tmp;
6549 if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
6551 switch (cond)
6553 /* NE a != b -> !(a == b) */
6554 case NE: cond = EQ; neg_p = true; break;
6555 case UNGT:
6556 emit_insn (gen_vec_cmpungt (target, cmp_op1, cmp_op2));
6557 return;
6558 case UNGE:
6559 emit_insn (gen_vec_cmpunge (target, cmp_op1, cmp_op2));
6560 return;
6561 case LE: cond = GE; swap_p = true; break;
6562 /* UNLE: (a u<= b) -> (b u>= a). */
6563 case UNLE:
6564 emit_insn (gen_vec_cmpunge (target, cmp_op2, cmp_op1));
6565 return;
6566 /* LT: a < b -> b > a */
6567 case LT: cond = GT; swap_p = true; break;
6568 /* UNLT: (a u< b) -> (b u> a). */
6569 case UNLT:
6570 emit_insn (gen_vec_cmpungt (target, cmp_op2, cmp_op1));
6571 return;
6572 case UNEQ:
6573 emit_insn (gen_vec_cmpuneq (target, cmp_op1, cmp_op2));
6574 return;
6575 case LTGT:
6576 emit_insn (gen_vec_cmpltgt (target, cmp_op1, cmp_op2));
6577 return;
6578 case ORDERED:
6579 emit_insn (gen_vec_cmpordered (target, cmp_op1, cmp_op2));
6580 return;
6581 case UNORDERED:
6582 emit_insn (gen_vec_cmpunordered (target, cmp_op1, cmp_op2));
6583 return;
6584 default: break;
6587 else
6589 switch (cond)
6591 /* NE: a != b -> !(a == b) */
6592 case NE: cond = EQ; neg_p = true; break;
6593 /* GE: a >= b -> !(b > a) */
6594 case GE: cond = GT; neg_p = true; swap_p = true; break;
6595 /* GEU: a >= b -> !(b > a) */
6596 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6597 /* LE: a <= b -> !(a > b) */
6598 case LE: cond = GT; neg_p = true; break;
6599 /* LEU: a <= b -> !(a > b) */
6600 case LEU: cond = GTU; neg_p = true; break;
6601 /* LT: a < b -> b > a */
6602 case LT: cond = GT; swap_p = true; break;
6603 /* LTU: a < b -> b > a */
6604 case LTU: cond = GTU; swap_p = true; break;
6605 default: break;
6609 if (swap_p)
6611 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6614 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6615 mode,
6616 cmp_op1, cmp_op2)));
6617 if (neg_p)
6618 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6621 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6622 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6623 elements in CMP1 and CMP2 fulfill the comparison.
6624 This function is only used to emit patterns for the vx builtins and
6625 therefore only handles comparison codes required by the
6626 builtins. */
6627 void
6628 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6629 rtx cmp1, rtx cmp2, bool all_p)
6631 machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
6632 rtx tmp_reg = gen_reg_rtx (SImode);
6633 bool swap_p = false;
6635 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6637 switch (code)
6639 case EQ:
6640 case NE:
6641 cc_producer_mode = CCVEQmode;
6642 break;
6643 case GE:
6644 case LT:
6645 code = swap_condition (code);
6646 swap_p = true;
6647 /* fallthrough */
6648 case GT:
6649 case LE:
6650 cc_producer_mode = CCVIHmode;
6651 break;
6652 case GEU:
6653 case LTU:
6654 code = swap_condition (code);
6655 swap_p = true;
6656 /* fallthrough */
6657 case GTU:
6658 case LEU:
6659 cc_producer_mode = CCVIHUmode;
6660 break;
6661 default:
6662 gcc_unreachable ();
6665 scratch_mode = GET_MODE (cmp1);
6666 /* These codes represent inverted CC interpretations. Inverting
6667 an ALL CC mode results in an ANY CC mode and the other way
6668 around. Invert the all_p flag here to compensate for
6669 that. */
6670 if (code == NE || code == LE || code == LEU)
6671 all_p = !all_p;
6673 cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
6675 else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
6677 bool inv_p = false;
6679 switch (code)
6681 case EQ: cc_producer_mode = CCVEQmode; break;
6682 case NE: cc_producer_mode = CCVEQmode; inv_p = true; break;
6683 case GT: cc_producer_mode = CCVFHmode; break;
6684 case GE: cc_producer_mode = CCVFHEmode; break;
6685 case UNLE: cc_producer_mode = CCVFHmode; inv_p = true; break;
6686 case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
6687 case LT: cc_producer_mode = CCVFHmode; code = GT; swap_p = true; break;
6688 case LE: cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
6689 default: gcc_unreachable ();
6691 scratch_mode = related_int_vector_mode (GET_MODE (cmp1)).require ();
6693 if (inv_p)
6694 all_p = !all_p;
6696 cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
6698 else
6699 gcc_unreachable ();
6701 if (swap_p)
6703 rtx tmp = cmp2;
6704 cmp2 = cmp1;
6705 cmp1 = tmp;
6708 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6709 gen_rtvec (2, gen_rtx_SET (
6710 gen_rtx_REG (cc_producer_mode, CC_REGNUM),
6711 gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
6712 gen_rtx_CLOBBER (VOIDmode,
6713 gen_rtx_SCRATCH (scratch_mode)))));
6714 emit_move_insn (target, const0_rtx);
6715 emit_move_insn (tmp_reg, const1_rtx);
6717 emit_move_insn (target,
6718 gen_rtx_IF_THEN_ELSE (SImode,
6719 gen_rtx_fmt_ee (code, VOIDmode,
6720 gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
6721 const0_rtx),
6722 tmp_reg, target));
6725 /* Invert the comparison CODE applied to a CC mode. This is only safe
6726 if we know whether there result was created by a floating point
6727 compare or not. For the CCV modes this is encoded as part of the
6728 mode. */
6729 enum rtx_code
6730 s390_reverse_condition (machine_mode mode, enum rtx_code code)
6732 /* Reversal of FP compares takes care -- an ordered compare
6733 becomes an unordered compare and vice versa. */
6734 if (mode == CCVFALLmode || mode == CCVFANYmode || mode == CCSFPSmode)
6735 return reverse_condition_maybe_unordered (code);
6736 else if (mode == CCVIALLmode || mode == CCVIANYmode)
6737 return reverse_condition (code);
6738 else
6739 gcc_unreachable ();
6742 /* Generate a vector comparison expression loading either elements of
6743 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6744 and CMP_OP2. */
6746 void
6747 s390_expand_vcond (rtx target, rtx then, rtx els,
6748 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6750 rtx tmp;
6751 machine_mode result_mode;
6752 rtx result_target;
6754 machine_mode target_mode = GET_MODE (target);
6755 machine_mode cmp_mode = GET_MODE (cmp_op1);
6756 rtx op = (cond == LT) ? els : then;
6758 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6759 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6760 for short and byte (x >> 15 and x >> 7 respectively). */
6761 if ((cond == LT || cond == GE)
6762 && target_mode == cmp_mode
6763 && cmp_op2 == CONST0_RTX (cmp_mode)
6764 && op == CONST0_RTX (target_mode)
6765 && s390_vector_mode_supported_p (target_mode)
6766 && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6768 rtx negop = (cond == LT) ? then : els;
6770 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6772 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6773 if (negop == CONST1_RTX (target_mode))
6775 rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6776 GEN_INT (shift), target,
6777 1, OPTAB_DIRECT);
6778 if (res != target)
6779 emit_move_insn (target, res);
6780 return;
6783 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6784 else if (all_ones_operand (negop, target_mode))
6786 rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6787 GEN_INT (shift), target,
6788 0, OPTAB_DIRECT);
6789 if (res != target)
6790 emit_move_insn (target, res);
6791 return;
6795 /* We always use an integral type vector to hold the comparison
6796 result. */
6797 result_mode = related_int_vector_mode (cmp_mode).require ();
6798 result_target = gen_reg_rtx (result_mode);
6800 /* We allow vector immediates as comparison operands that
6801 can be handled by the optimization above but not by the
6802 following code. Hence, force them into registers here. */
6803 if (!REG_P (cmp_op1))
6804 cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6806 if (!REG_P (cmp_op2))
6807 cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6809 s390_expand_vec_compare (result_target, cond,
6810 cmp_op1, cmp_op2);
6812 /* If the results are supposed to be either -1 or 0 we are done
6813 since this is what our compare instructions generate anyway. */
6814 if (all_ones_operand (then, GET_MODE (then))
6815 && const0_operand (els, GET_MODE (els)))
6817 emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6818 result_target, 0));
6819 return;
6822 /* Otherwise we will do a vsel afterwards. */
6823 /* This gets triggered e.g.
6824 with gcc.c-torture/compile/pr53410-1.c */
6825 if (!REG_P (then))
6826 then = force_reg (target_mode, then);
6828 if (!REG_P (els))
6829 els = force_reg (target_mode, els);
6831 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6832 result_target,
6833 CONST0_RTX (result_mode));
6835 /* We compared the result against zero above so we have to swap then
6836 and els here. */
6837 tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6839 gcc_assert (target_mode == GET_MODE (then));
6840 emit_insn (gen_rtx_SET (target, tmp));
6843 /* Emit the RTX necessary to initialize the vector TARGET with values
6844 in VALS. */
6845 void
6846 s390_expand_vec_init (rtx target, rtx vals)
6848 machine_mode mode = GET_MODE (target);
6849 machine_mode inner_mode = GET_MODE_INNER (mode);
6850 int n_elts = GET_MODE_NUNITS (mode);
6851 bool all_same = true, all_regs = true, all_const_int = true;
6852 rtx x;
6853 int i;
6855 for (i = 0; i < n_elts; ++i)
6857 x = XVECEXP (vals, 0, i);
6859 if (!CONST_INT_P (x))
6860 all_const_int = false;
6862 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6863 all_same = false;
6865 if (!REG_P (x))
6866 all_regs = false;
6869 /* Use vector gen mask or vector gen byte mask if possible. */
6870 if (all_same && all_const_int)
6872 rtx vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6873 if (XVECEXP (vals, 0, 0) == const0_rtx
6874 || s390_contiguous_bitmask_vector_p (vec, NULL, NULL)
6875 || s390_bytemask_vector_p (vec, NULL))
6877 emit_insn (gen_rtx_SET (target, vec));
6878 return;
6882 /* Use vector replicate instructions. vlrep/vrepi/vrep */
6883 if (all_same)
6885 rtx elem = XVECEXP (vals, 0, 0);
6887 /* vec_splats accepts general_operand as source. */
6888 if (!general_operand (elem, GET_MODE (elem)))
6889 elem = force_reg (inner_mode, elem);
6891 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, elem)));
6892 return;
6895 if (all_regs
6896 && REG_P (target)
6897 && n_elts == 2
6898 && GET_MODE_SIZE (inner_mode) == 8)
6900 /* Use vector load pair. */
6901 emit_insn (gen_rtx_SET (target,
6902 gen_rtx_VEC_CONCAT (mode,
6903 XVECEXP (vals, 0, 0),
6904 XVECEXP (vals, 0, 1))));
6905 return;
6908 /* Use vector load logical element and zero. */
6909 if (TARGET_VXE && (mode == V4SImode || mode == V4SFmode))
6911 bool found = true;
6913 x = XVECEXP (vals, 0, 0);
6914 if (memory_operand (x, inner_mode))
6916 for (i = 1; i < n_elts; ++i)
6917 found = found && XVECEXP (vals, 0, i) == const0_rtx;
6919 if (found)
6921 machine_mode half_mode = (inner_mode == SFmode
6922 ? V2SFmode : V2SImode);
6923 emit_insn (gen_rtx_SET (target,
6924 gen_rtx_VEC_CONCAT (mode,
6925 gen_rtx_VEC_CONCAT (half_mode,
6927 const0_rtx),
6928 gen_rtx_VEC_CONCAT (half_mode,
6929 const0_rtx,
6930 const0_rtx))));
6931 return;
6936 /* We are about to set the vector elements one by one. Zero out the
6937 full register first in order to help the data flow framework to
6938 detect it as full VR set. */
6939 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6941 /* Unfortunately the vec_init expander is not allowed to fail. So
6942 we have to implement the fallback ourselves. */
6943 for (i = 0; i < n_elts; i++)
6945 rtx elem = XVECEXP (vals, 0, i);
6946 if (!general_operand (elem, GET_MODE (elem)))
6947 elem = force_reg (inner_mode, elem);
6949 emit_insn (gen_rtx_SET (target,
6950 gen_rtx_UNSPEC (mode,
6951 gen_rtvec (3, elem,
6952 GEN_INT (i), target),
6953 UNSPEC_VEC_SET)));
6957 /* Emit a vector constant that contains 1s in each element's sign bit position
6958 and 0s in other positions. MODE is the desired constant's mode. */
6959 extern rtx
6960 s390_build_signbit_mask (machine_mode mode)
6962 /* Generate the integral element mask value. */
6963 machine_mode inner_mode = GET_MODE_INNER (mode);
6964 int inner_bitsize = GET_MODE_BITSIZE (inner_mode);
6965 wide_int mask_val = wi::set_bit_in_zero (inner_bitsize - 1, inner_bitsize);
6967 /* Emit the element mask rtx. Use gen_lowpart in order to cast the integral
6968 value to the desired mode. */
6969 machine_mode int_mode = related_int_vector_mode (mode).require ();
6970 rtx mask = immed_wide_int_const (mask_val, GET_MODE_INNER (int_mode));
6971 mask = gen_lowpart (inner_mode, mask);
6973 /* Emit the vector mask rtx by mode the element mask rtx. */
6974 int nunits = GET_MODE_NUNITS (mode);
6975 rtvec v = rtvec_alloc (nunits);
6976 for (int i = 0; i < nunits; i++)
6977 RTVEC_ELT (v, i) = mask;
6978 return gen_rtx_CONST_VECTOR (mode, v);
6981 /* Structure to hold the initial parameters for a compare_and_swap operation
6982 in HImode and QImode. */
6984 struct alignment_context
6986 rtx memsi; /* SI aligned memory location. */
6987 rtx shift; /* Bit offset with regard to lsb. */
6988 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
6989 rtx modemaski; /* ~modemask */
6990 bool aligned; /* True if memory is aligned, false else. */
6993 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6994 structure AC for transparent simplifying, if the memory alignment is known
6995 to be at least 32bit. MEM is the memory location for the actual operation
6996 and MODE its mode. */
6998 static void
6999 init_alignment_context (struct alignment_context *ac, rtx mem,
7000 machine_mode mode)
7002 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
7003 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
7005 if (ac->aligned)
7006 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
7007 else
7009 /* Alignment is unknown. */
7010 rtx byteoffset, addr, align;
7012 /* Force the address into a register. */
7013 addr = force_reg (Pmode, XEXP (mem, 0));
7015 /* Align it to SImode. */
7016 align = expand_simple_binop (Pmode, AND, addr,
7017 GEN_INT (-GET_MODE_SIZE (SImode)),
7018 NULL_RTX, 1, OPTAB_DIRECT);
7019 /* Generate MEM. */
7020 ac->memsi = gen_rtx_MEM (SImode, align);
7021 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
7022 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
7023 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
7025 /* Calculate shiftcount. */
7026 byteoffset = expand_simple_binop (Pmode, AND, addr,
7027 GEN_INT (GET_MODE_SIZE (SImode) - 1),
7028 NULL_RTX, 1, OPTAB_DIRECT);
7029 /* As we already have some offset, evaluate the remaining distance. */
7030 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
7031 NULL_RTX, 1, OPTAB_DIRECT);
7034 /* Shift is the byte count, but we need the bitcount. */
7035 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
7036 NULL_RTX, 1, OPTAB_DIRECT);
7038 /* Calculate masks. */
7039 ac->modemask = expand_simple_binop (SImode, ASHIFT,
7040 GEN_INT (GET_MODE_MASK (mode)),
7041 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
7042 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
7043 NULL_RTX, 1);
7046 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
7047 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
7048 perform the merge in SEQ2. */
7050 static rtx
7051 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
7052 machine_mode mode, rtx val, rtx ins)
7054 rtx tmp;
7056 if (ac->aligned)
7058 start_sequence ();
7059 tmp = copy_to_mode_reg (SImode, val);
7060 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
7061 const0_rtx, ins))
7063 *seq1 = NULL;
7064 *seq2 = get_insns ();
7065 end_sequence ();
7066 return tmp;
7068 end_sequence ();
7071 /* Failed to use insv. Generate a two part shift and mask. */
7072 start_sequence ();
7073 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
7074 *seq1 = get_insns ();
7075 end_sequence ();
7077 start_sequence ();
7078 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
7079 *seq2 = get_insns ();
7080 end_sequence ();
7082 return tmp;
7085 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
7086 the memory location, CMP the old value to compare MEM with and NEW_RTX the
7087 value to set if CMP == MEM. */
7089 static void
7090 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7091 rtx cmp, rtx new_rtx, bool is_weak)
7093 struct alignment_context ac;
7094 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
7095 rtx res = gen_reg_rtx (SImode);
7096 rtx_code_label *csloop = NULL, *csend = NULL;
7098 gcc_assert (MEM_P (mem));
7100 init_alignment_context (&ac, mem, mode);
7102 /* Load full word. Subsequent loads are performed by CS. */
7103 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
7104 NULL_RTX, 1, OPTAB_DIRECT);
7106 /* Prepare insertions of cmp and new_rtx into the loaded value. When
7107 possible, we try to use insv to make this happen efficiently. If
7108 that fails we'll generate code both inside and outside the loop. */
7109 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
7110 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
7112 if (seq0)
7113 emit_insn (seq0);
7114 if (seq1)
7115 emit_insn (seq1);
7117 /* Start CS loop. */
7118 if (!is_weak)
7120 /* Begin assuming success. */
7121 emit_move_insn (btarget, const1_rtx);
7123 csloop = gen_label_rtx ();
7124 csend = gen_label_rtx ();
7125 emit_label (csloop);
7128 /* val = "<mem>00..0<mem>"
7129 * cmp = "00..0<cmp>00..0"
7130 * new = "00..0<new>00..0"
7133 emit_insn (seq2);
7134 emit_insn (seq3);
7136 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv, CCZ1mode);
7137 if (is_weak)
7138 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
7139 else
7141 rtx tmp;
7143 /* Jump to end if we're done (likely?). */
7144 s390_emit_jump (csend, cc);
7146 /* Check for changes outside mode, and loop internal if so.
7147 Arrange the moves so that the compare is adjacent to the
7148 branch so that we can generate CRJ. */
7149 tmp = copy_to_reg (val);
7150 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
7151 1, OPTAB_DIRECT);
7152 cc = s390_emit_compare (NE, val, tmp);
7153 s390_emit_jump (csloop, cc);
7155 /* Failed. */
7156 emit_move_insn (btarget, const0_rtx);
7157 emit_label (csend);
7160 /* Return the correct part of the bitfield. */
7161 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
7162 NULL_RTX, 1, OPTAB_DIRECT), 1);
7165 /* Variant of s390_expand_cs for SI, DI and TI modes. */
7166 static void
7167 s390_expand_cs_tdsi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7168 rtx cmp, rtx new_rtx, bool is_weak)
7170 rtx output = vtarget;
7171 rtx_code_label *skip_cs_label = NULL;
7172 bool do_const_opt = false;
7174 if (!register_operand (output, mode))
7175 output = gen_reg_rtx (mode);
7177 /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
7178 with the constant first and skip the compare_and_swap because its very
7179 expensive and likely to fail anyway.
7180 Note 1: This is done only for IS_WEAK. C11 allows optimizations that may
7181 cause spurious in that case.
7182 Note 2: It may be useful to do this also for non-constant INPUT.
7183 Note 3: Currently only targets with "load on condition" are supported
7184 (z196 and newer). */
7186 if (TARGET_Z196
7187 && (mode == SImode || mode == DImode))
7188 do_const_opt = (is_weak && CONST_INT_P (cmp));
7190 if (do_const_opt)
7192 rtx cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7194 skip_cs_label = gen_label_rtx ();
7195 emit_move_insn (btarget, const0_rtx);
7196 if (CONST_INT_P (cmp) && INTVAL (cmp) == 0)
7198 rtvec lt = rtvec_alloc (2);
7200 /* Load-and-test + conditional jump. */
7201 RTVEC_ELT (lt, 0)
7202 = gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, mem, cmp));
7203 RTVEC_ELT (lt, 1) = gen_rtx_SET (output, mem);
7204 emit_insn (gen_rtx_PARALLEL (VOIDmode, lt));
7206 else
7208 emit_move_insn (output, mem);
7209 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp)));
7211 s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx));
7212 add_reg_br_prob_note (get_last_insn (),
7213 profile_probability::very_unlikely ());
7214 /* If the jump is not taken, OUTPUT is the expected value. */
7215 cmp = output;
7216 /* Reload newval to a register manually, *after* the compare and jump
7217 above. Otherwise Reload might place it before the jump. */
7219 else
7220 cmp = force_reg (mode, cmp);
7221 new_rtx = force_reg (mode, new_rtx);
7222 s390_emit_compare_and_swap (EQ, output, mem, cmp, new_rtx,
7223 (do_const_opt) ? CCZmode : CCZ1mode);
7224 if (skip_cs_label != NULL)
7225 emit_label (skip_cs_label);
7227 /* We deliberately accept non-register operands in the predicate
7228 to ensure the write back to the output operand happens *before*
7229 the store-flags code below. This makes it easier for combine
7230 to merge the store-flags code with a potential test-and-branch
7231 pattern following (immediately!) afterwards. */
7232 if (output != vtarget)
7233 emit_move_insn (vtarget, output);
7235 if (do_const_opt)
7237 rtx cc, cond, ite;
7239 /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
7240 btarget has already been initialized with 0 above. */
7241 cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7242 cond = gen_rtx_EQ (VOIDmode, cc, const0_rtx);
7243 ite = gen_rtx_IF_THEN_ELSE (SImode, cond, const1_rtx, btarget);
7244 emit_insn (gen_rtx_SET (btarget, ite));
7246 else
7248 rtx cc, cond;
7250 cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
7251 cond = gen_rtx_EQ (SImode, cc, const0_rtx);
7252 emit_insn (gen_cstorecc4 (btarget, cond, cc, const0_rtx));
7256 /* Expand an atomic compare and swap operation. MEM is the memory location,
7257 CMP the old value to compare MEM with and NEW_RTX the value to set if
7258 CMP == MEM. */
7260 void
7261 s390_expand_cs (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7262 rtx cmp, rtx new_rtx, bool is_weak)
7264 switch (mode)
7266 case E_TImode:
7267 case E_DImode:
7268 case E_SImode:
7269 s390_expand_cs_tdsi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7270 break;
7271 case E_HImode:
7272 case E_QImode:
7273 s390_expand_cs_hqi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7274 break;
7275 default:
7276 gcc_unreachable ();
7280 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7281 The memory location MEM is set to INPUT. OUTPUT is set to the previous value
7282 of MEM. */
7284 void
7285 s390_expand_atomic_exchange_tdsi (rtx output, rtx mem, rtx input)
7287 machine_mode mode = GET_MODE (mem);
7288 rtx_code_label *csloop;
7290 if (TARGET_Z196
7291 && (mode == DImode || mode == SImode)
7292 && CONST_INT_P (input) && INTVAL (input) == 0)
7294 emit_move_insn (output, const0_rtx);
7295 if (mode == DImode)
7296 emit_insn (gen_atomic_fetch_anddi (output, mem, const0_rtx, input));
7297 else
7298 emit_insn (gen_atomic_fetch_andsi (output, mem, const0_rtx, input));
7299 return;
7302 input = force_reg (mode, input);
7303 emit_move_insn (output, mem);
7304 csloop = gen_label_rtx ();
7305 emit_label (csloop);
7306 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, output, mem, output,
7307 input, CCZ1mode));
7310 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
7311 and VAL the value to play with. If AFTER is true then store the value
7312 MEM holds after the operation, if AFTER is false then store the value MEM
7313 holds before the operation. If TARGET is zero then discard that value, else
7314 store it to TARGET. */
7316 void
7317 s390_expand_atomic (machine_mode mode, enum rtx_code code,
7318 rtx target, rtx mem, rtx val, bool after)
7320 struct alignment_context ac;
7321 rtx cmp;
7322 rtx new_rtx = gen_reg_rtx (SImode);
7323 rtx orig = gen_reg_rtx (SImode);
7324 rtx_code_label *csloop = gen_label_rtx ();
7326 gcc_assert (!target || register_operand (target, VOIDmode));
7327 gcc_assert (MEM_P (mem));
7329 init_alignment_context (&ac, mem, mode);
7331 /* Shift val to the correct bit positions.
7332 Preserve "icm", but prevent "ex icm". */
7333 if (!(ac.aligned && code == SET && MEM_P (val)))
7334 val = s390_expand_mask_and_shift (val, mode, ac.shift);
7336 /* Further preparation insns. */
7337 if (code == PLUS || code == MINUS)
7338 emit_move_insn (orig, val);
7339 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
7340 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
7341 NULL_RTX, 1, OPTAB_DIRECT);
7343 /* Load full word. Subsequent loads are performed by CS. */
7344 cmp = force_reg (SImode, ac.memsi);
7346 /* Start CS loop. */
7347 emit_label (csloop);
7348 emit_move_insn (new_rtx, cmp);
7350 /* Patch new with val at correct position. */
7351 switch (code)
7353 case PLUS:
7354 case MINUS:
7355 val = expand_simple_binop (SImode, code, new_rtx, orig,
7356 NULL_RTX, 1, OPTAB_DIRECT);
7357 val = expand_simple_binop (SImode, AND, val, ac.modemask,
7358 NULL_RTX, 1, OPTAB_DIRECT);
7359 /* FALLTHRU */
7360 case SET:
7361 if (ac.aligned && MEM_P (val))
7362 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
7363 0, 0, SImode, val, false);
7364 else
7366 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
7367 NULL_RTX, 1, OPTAB_DIRECT);
7368 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
7369 NULL_RTX, 1, OPTAB_DIRECT);
7371 break;
7372 case AND:
7373 case IOR:
7374 case XOR:
7375 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
7376 NULL_RTX, 1, OPTAB_DIRECT);
7377 break;
7378 case MULT: /* NAND */
7379 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
7380 NULL_RTX, 1, OPTAB_DIRECT);
7381 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
7382 NULL_RTX, 1, OPTAB_DIRECT);
7383 break;
7384 default:
7385 gcc_unreachable ();
7388 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
7389 ac.memsi, cmp, new_rtx,
7390 CCZ1mode));
7392 /* Return the correct part of the bitfield. */
7393 if (target)
7394 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
7395 after ? new_rtx : cmp, ac.shift,
7396 NULL_RTX, 1, OPTAB_DIRECT), 1);
7399 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7400 We need to emit DTP-relative relocations. */
7402 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7404 static void
7405 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
7407 switch (size)
7409 case 4:
7410 fputs ("\t.long\t", file);
7411 break;
7412 case 8:
7413 fputs ("\t.quad\t", file);
7414 break;
7415 default:
7416 gcc_unreachable ();
7418 output_addr_const (file, x);
7419 fputs ("@DTPOFF", file);
7422 /* Return the proper mode for REGNO being represented in the dwarf
7423 unwind table. */
7424 machine_mode
7425 s390_dwarf_frame_reg_mode (int regno)
7427 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
7429 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
7430 if (GENERAL_REGNO_P (regno))
7431 save_mode = Pmode;
7433 /* The rightmost 64 bits of vector registers are call-clobbered. */
7434 if (GET_MODE_SIZE (save_mode) > 8)
7435 save_mode = DImode;
7437 return save_mode;
7440 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7441 /* Implement TARGET_MANGLE_TYPE. */
7443 static const char *
7444 s390_mangle_type (const_tree type)
7446 type = TYPE_MAIN_VARIANT (type);
7448 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
7449 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
7450 return NULL;
7452 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
7453 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
7454 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
7455 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
7457 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
7458 && TARGET_LONG_DOUBLE_128)
7459 return "g";
7461 /* For all other types, use normal C++ mangling. */
7462 return NULL;
7464 #endif
7466 /* In the name of slightly smaller debug output, and to cater to
7467 general assembler lossage, recognize various UNSPEC sequences
7468 and turn them back into a direct symbol reference. */
7470 static rtx
7471 s390_delegitimize_address (rtx orig_x)
7473 rtx x, y;
7475 orig_x = delegitimize_mem_from_attrs (orig_x);
7476 x = orig_x;
7478 /* Extract the symbol ref from:
7479 (plus:SI (reg:SI 12 %r12)
7480 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7481 UNSPEC_GOTOFF/PLTOFF)))
7483 (plus:SI (reg:SI 12 %r12)
7484 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7485 UNSPEC_GOTOFF/PLTOFF)
7486 (const_int 4 [0x4])))) */
7487 if (GET_CODE (x) == PLUS
7488 && REG_P (XEXP (x, 0))
7489 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
7490 && GET_CODE (XEXP (x, 1)) == CONST)
7492 HOST_WIDE_INT offset = 0;
7494 /* The const operand. */
7495 y = XEXP (XEXP (x, 1), 0);
7497 if (GET_CODE (y) == PLUS
7498 && GET_CODE (XEXP (y, 1)) == CONST_INT)
7500 offset = INTVAL (XEXP (y, 1));
7501 y = XEXP (y, 0);
7504 if (GET_CODE (y) == UNSPEC
7505 && (XINT (y, 1) == UNSPEC_GOTOFF
7506 || XINT (y, 1) == UNSPEC_PLTOFF))
7507 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
7510 if (GET_CODE (x) != MEM)
7511 return orig_x;
7513 x = XEXP (x, 0);
7514 if (GET_CODE (x) == PLUS
7515 && GET_CODE (XEXP (x, 1)) == CONST
7516 && GET_CODE (XEXP (x, 0)) == REG
7517 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7519 y = XEXP (XEXP (x, 1), 0);
7520 if (GET_CODE (y) == UNSPEC
7521 && XINT (y, 1) == UNSPEC_GOT)
7522 y = XVECEXP (y, 0, 0);
7523 else
7524 return orig_x;
7526 else if (GET_CODE (x) == CONST)
7528 /* Extract the symbol ref from:
7529 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7530 UNSPEC_PLT/GOTENT))) */
7532 y = XEXP (x, 0);
7533 if (GET_CODE (y) == UNSPEC
7534 && (XINT (y, 1) == UNSPEC_GOTENT
7535 || XINT (y, 1) == UNSPEC_PLT))
7536 y = XVECEXP (y, 0, 0);
7537 else
7538 return orig_x;
7540 else
7541 return orig_x;
7543 if (GET_MODE (orig_x) != Pmode)
7545 if (GET_MODE (orig_x) == BLKmode)
7546 return orig_x;
7547 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
7548 if (y == NULL_RTX)
7549 return orig_x;
7551 return y;
7554 /* Output operand OP to stdio stream FILE.
7555 OP is an address (register + offset) which is not used to address data;
7556 instead the rightmost bits are interpreted as the value. */
7558 static void
7559 print_addrstyle_operand (FILE *file, rtx op)
7561 HOST_WIDE_INT offset;
7562 rtx base;
7564 /* Extract base register and offset. */
7565 if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
7566 gcc_unreachable ();
7568 /* Sanity check. */
7569 if (base)
7571 gcc_assert (GET_CODE (base) == REG);
7572 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
7573 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
7576 /* Offsets are constricted to twelve bits. */
7577 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
7578 if (base)
7579 fprintf (file, "(%s)", reg_names[REGNO (base)]);
7582 /* Print the shift count operand OP to FILE.
7583 OP is an address-style operand in a form which
7584 s390_valid_shift_count permits. Subregs and no-op
7585 and-masking of the operand are stripped. */
7587 static void
7588 print_shift_count_operand (FILE *file, rtx op)
7590 /* No checking of the and mask required here. */
7591 if (!s390_valid_shift_count (op, 0))
7592 gcc_unreachable ();
7594 while (op && GET_CODE (op) == SUBREG)
7595 op = SUBREG_REG (op);
7597 if (GET_CODE (op) == AND)
7598 op = XEXP (op, 0);
7600 print_addrstyle_operand (file, op);
7603 /* Assigns the number of NOP halfwords to be emitted before and after the
7604 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
7605 If hotpatching is disabled for the function, the values are set to zero.
7608 static void
7609 s390_function_num_hotpatch_hw (tree decl,
7610 int *hw_before,
7611 int *hw_after)
7613 tree attr;
7615 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
7617 /* Handle the arguments of the hotpatch attribute. The values
7618 specified via attribute might override the cmdline argument
7619 values. */
7620 if (attr)
7622 tree args = TREE_VALUE (attr);
7624 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
7625 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
7627 else
7629 /* Use the values specified by the cmdline arguments. */
7630 *hw_before = s390_hotpatch_hw_before_label;
7631 *hw_after = s390_hotpatch_hw_after_label;
7635 /* Write the current .machine and .machinemode specification to the assembler
7636 file. */
7638 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7639 static void
7640 s390_asm_output_machine_for_arch (FILE *asm_out_file)
7642 fprintf (asm_out_file, "\t.machinemode %s\n",
7643 (TARGET_ZARCH) ? "zarch" : "esa");
7644 fprintf (asm_out_file, "\t.machine \"%s",
7645 processor_table[s390_arch].binutils_name);
7646 if (S390_USE_ARCHITECTURE_MODIFIERS)
7648 int cpu_flags;
7650 cpu_flags = processor_flags_table[(int) s390_arch];
7651 if (TARGET_HTM && !(cpu_flags & PF_TX))
7652 fprintf (asm_out_file, "+htm");
7653 else if (!TARGET_HTM && (cpu_flags & PF_TX))
7654 fprintf (asm_out_file, "+nohtm");
7655 if (TARGET_VX && !(cpu_flags & PF_VX))
7656 fprintf (asm_out_file, "+vx");
7657 else if (!TARGET_VX && (cpu_flags & PF_VX))
7658 fprintf (asm_out_file, "+novx");
7660 fprintf (asm_out_file, "\"\n");
7663 /* Write an extra function header before the very start of the function. */
7665 void
7666 s390_asm_output_function_prefix (FILE *asm_out_file,
7667 const char *fnname ATTRIBUTE_UNUSED)
7669 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7670 return;
7671 /* Since only the function specific options are saved but not the indications
7672 which options are set, it's too much work here to figure out which options
7673 have actually changed. Thus, generate .machine and .machinemode whenever a
7674 function has the target attribute or pragma. */
7675 fprintf (asm_out_file, "\t.machinemode push\n");
7676 fprintf (asm_out_file, "\t.machine push\n");
7677 s390_asm_output_machine_for_arch (asm_out_file);
7680 /* Write an extra function footer after the very end of the function. */
7682 void
7683 s390_asm_declare_function_size (FILE *asm_out_file,
7684 const char *fnname, tree decl)
7686 if (!flag_inhibit_size_directive)
7687 ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7688 if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7689 return;
7690 fprintf (asm_out_file, "\t.machine pop\n");
7691 fprintf (asm_out_file, "\t.machinemode pop\n");
7693 #endif
7695 /* Write the extra assembler code needed to declare a function properly. */
7697 void
7698 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
7699 tree decl)
7701 int hw_before, hw_after;
7703 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7704 if (hw_before > 0)
7706 unsigned int function_alignment;
7707 int i;
7709 /* Add a trampoline code area before the function label and initialize it
7710 with two-byte nop instructions. This area can be overwritten with code
7711 that jumps to a patched version of the function. */
7712 asm_fprintf (asm_out_file, "\tnopr\t%%r0"
7713 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7714 hw_before);
7715 for (i = 1; i < hw_before; i++)
7716 fputs ("\tnopr\t%r0\n", asm_out_file);
7718 /* Note: The function label must be aligned so that (a) the bytes of the
7719 following nop do not cross a cacheline boundary, and (b) a jump address
7720 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7721 stored directly before the label without crossing a cacheline
7722 boundary. All this is necessary to make sure the trampoline code can
7723 be changed atomically.
7724 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7725 if there are NOPs before the function label, the alignment is placed
7726 before them. So it is necessary to duplicate the alignment after the
7727 NOPs. */
7728 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7729 if (! DECL_USER_ALIGN (decl))
7730 function_alignment
7731 = MAX (function_alignment,
7732 (unsigned int) align_functions.levels[0].get_value ());
7733 fputs ("\t# alignment for hotpatch\n", asm_out_file);
7734 ASM_OUTPUT_ALIGN (asm_out_file, align_functions.levels[0].log);
7737 if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7739 asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7740 asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7741 asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7742 asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7743 asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7744 asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7745 s390_warn_framesize);
7746 asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7747 asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7748 asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7749 asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7750 asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7751 asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7752 TARGET_PACKED_STACK);
7753 asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7754 asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7755 asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7756 asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7757 s390_warn_dynamicstack_p);
7759 ASM_OUTPUT_LABEL (asm_out_file, fname);
7760 if (hw_after > 0)
7761 asm_fprintf (asm_out_file,
7762 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7763 hw_after);
7766 /* Output machine-dependent UNSPECs occurring in address constant X
7767 in assembler syntax to stdio stream FILE. Returns true if the
7768 constant X could be recognized, false otherwise. */
7770 static bool
7771 s390_output_addr_const_extra (FILE *file, rtx x)
7773 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7774 switch (XINT (x, 1))
7776 case UNSPEC_GOTENT:
7777 output_addr_const (file, XVECEXP (x, 0, 0));
7778 fprintf (file, "@GOTENT");
7779 return true;
7780 case UNSPEC_GOT:
7781 output_addr_const (file, XVECEXP (x, 0, 0));
7782 fprintf (file, "@GOT");
7783 return true;
7784 case UNSPEC_GOTOFF:
7785 output_addr_const (file, XVECEXP (x, 0, 0));
7786 fprintf (file, "@GOTOFF");
7787 return true;
7788 case UNSPEC_PLT:
7789 output_addr_const (file, XVECEXP (x, 0, 0));
7790 fprintf (file, "@PLT");
7791 return true;
7792 case UNSPEC_PLTOFF:
7793 output_addr_const (file, XVECEXP (x, 0, 0));
7794 fprintf (file, "@PLTOFF");
7795 return true;
7796 case UNSPEC_TLSGD:
7797 output_addr_const (file, XVECEXP (x, 0, 0));
7798 fprintf (file, "@TLSGD");
7799 return true;
7800 case UNSPEC_TLSLDM:
7801 assemble_name (file, get_some_local_dynamic_name ());
7802 fprintf (file, "@TLSLDM");
7803 return true;
7804 case UNSPEC_DTPOFF:
7805 output_addr_const (file, XVECEXP (x, 0, 0));
7806 fprintf (file, "@DTPOFF");
7807 return true;
7808 case UNSPEC_NTPOFF:
7809 output_addr_const (file, XVECEXP (x, 0, 0));
7810 fprintf (file, "@NTPOFF");
7811 return true;
7812 case UNSPEC_GOTNTPOFF:
7813 output_addr_const (file, XVECEXP (x, 0, 0));
7814 fprintf (file, "@GOTNTPOFF");
7815 return true;
7816 case UNSPEC_INDNTPOFF:
7817 output_addr_const (file, XVECEXP (x, 0, 0));
7818 fprintf (file, "@INDNTPOFF");
7819 return true;
7822 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7823 switch (XINT (x, 1))
7825 case UNSPEC_POOL_OFFSET:
7826 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7827 output_addr_const (file, x);
7828 return true;
7830 return false;
7833 /* Output address operand ADDR in assembler syntax to
7834 stdio stream FILE. */
7836 void
7837 print_operand_address (FILE *file, rtx addr)
7839 struct s390_address ad;
7840 memset (&ad, 0, sizeof (s390_address));
7842 if (s390_loadrelative_operand_p (addr, NULL, NULL))
7844 if (!TARGET_Z10)
7846 output_operand_lossage ("symbolic memory references are "
7847 "only supported on z10 or later");
7848 return;
7850 output_addr_const (file, addr);
7851 return;
7854 if (!s390_decompose_address (addr, &ad)
7855 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7856 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7857 output_operand_lossage ("cannot decompose address");
7859 if (ad.disp)
7860 output_addr_const (file, ad.disp);
7861 else
7862 fprintf (file, "0");
7864 if (ad.base && ad.indx)
7865 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7866 reg_names[REGNO (ad.base)]);
7867 else if (ad.base)
7868 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7871 /* Output operand X in assembler syntax to stdio stream FILE.
7872 CODE specified the format flag. The following format flags
7873 are recognized:
7875 'A': On z14 or higher: If operand is a mem print the alignment
7876 hint usable with vl/vst prefixed by a comma.
7877 'C': print opcode suffix for branch condition.
7878 'D': print opcode suffix for inverse branch condition.
7879 'E': print opcode suffix for branch on index instruction.
7880 'G': print the size of the operand in bytes.
7881 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7882 'M': print the second word of a TImode operand.
7883 'N': print the second word of a DImode operand.
7884 'O': print only the displacement of a memory reference or address.
7885 'R': print only the base register of a memory reference or address.
7886 'S': print S-type memory reference (base+displacement).
7887 'Y': print address style operand without index (e.g. shift count or setmem
7888 operand).
7890 'b': print integer X as if it's an unsigned byte.
7891 'c': print integer X as if it's an signed byte.
7892 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7893 'f': "end" contiguous bitmask X in SImode.
7894 'h': print integer X as if it's a signed halfword.
7895 'i': print the first nonzero HImode part of X.
7896 'j': print the first HImode part unequal to -1 of X.
7897 'k': print the first nonzero SImode part of X.
7898 'm': print the first SImode part unequal to -1 of X.
7899 'o': print integer X as if it's an unsigned 32bit word.
7900 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7901 't': CONST_INT: "start" of contiguous bitmask X in SImode.
7902 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7903 'x': print integer X as if it's an unsigned halfword.
7904 'v': print register number as vector register (v1 instead of f1).
7907 void
7908 print_operand (FILE *file, rtx x, int code)
7910 HOST_WIDE_INT ival;
7912 switch (code)
7914 case 'A':
7915 if (TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS && MEM_P (x))
7917 if (MEM_ALIGN (x) >= 128)
7918 fprintf (file, ",4");
7919 else if (MEM_ALIGN (x) == 64)
7920 fprintf (file, ",3");
7922 return;
7923 case 'C':
7924 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7925 return;
7927 case 'D':
7928 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7929 return;
7931 case 'E':
7932 if (GET_CODE (x) == LE)
7933 fprintf (file, "l");
7934 else if (GET_CODE (x) == GT)
7935 fprintf (file, "h");
7936 else
7937 output_operand_lossage ("invalid comparison operator "
7938 "for 'E' output modifier");
7939 return;
7941 case 'J':
7942 if (GET_CODE (x) == SYMBOL_REF)
7944 fprintf (file, "%s", ":tls_load:");
7945 output_addr_const (file, x);
7947 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7949 fprintf (file, "%s", ":tls_gdcall:");
7950 output_addr_const (file, XVECEXP (x, 0, 0));
7952 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7954 fprintf (file, "%s", ":tls_ldcall:");
7955 const char *name = get_some_local_dynamic_name ();
7956 gcc_assert (name);
7957 assemble_name (file, name);
7959 else
7960 output_operand_lossage ("invalid reference for 'J' output modifier");
7961 return;
7963 case 'G':
7964 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7965 return;
7967 case 'O':
7969 struct s390_address ad;
7970 int ret;
7972 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7974 if (!ret
7975 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7976 || ad.indx)
7978 output_operand_lossage ("invalid address for 'O' output modifier");
7979 return;
7982 if (ad.disp)
7983 output_addr_const (file, ad.disp);
7984 else
7985 fprintf (file, "0");
7987 return;
7989 case 'R':
7991 struct s390_address ad;
7992 int ret;
7994 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7996 if (!ret
7997 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7998 || ad.indx)
8000 output_operand_lossage ("invalid address for 'R' output modifier");
8001 return;
8004 if (ad.base)
8005 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
8006 else
8007 fprintf (file, "0");
8009 return;
8011 case 'S':
8013 struct s390_address ad;
8014 int ret;
8016 if (!MEM_P (x))
8018 output_operand_lossage ("memory reference expected for "
8019 "'S' output modifier");
8020 return;
8022 ret = s390_decompose_address (XEXP (x, 0), &ad);
8024 if (!ret
8025 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
8026 || ad.indx)
8028 output_operand_lossage ("invalid address for 'S' output modifier");
8029 return;
8032 if (ad.disp)
8033 output_addr_const (file, ad.disp);
8034 else
8035 fprintf (file, "0");
8037 if (ad.base)
8038 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
8040 return;
8042 case 'N':
8043 if (GET_CODE (x) == REG)
8044 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
8045 else if (GET_CODE (x) == MEM)
8046 x = change_address (x, VOIDmode,
8047 plus_constant (Pmode, XEXP (x, 0), 4));
8048 else
8049 output_operand_lossage ("register or memory expression expected "
8050 "for 'N' output modifier");
8051 break;
8053 case 'M':
8054 if (GET_CODE (x) == REG)
8055 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
8056 else if (GET_CODE (x) == MEM)
8057 x = change_address (x, VOIDmode,
8058 plus_constant (Pmode, XEXP (x, 0), 8));
8059 else
8060 output_operand_lossage ("register or memory expression expected "
8061 "for 'M' output modifier");
8062 break;
8064 case 'Y':
8065 print_shift_count_operand (file, x);
8066 return;
8069 switch (GET_CODE (x))
8071 case REG:
8072 /* Print FP regs as fx instead of vx when they are accessed
8073 through non-vector mode. */
8074 if (code == 'v'
8075 || VECTOR_NOFP_REG_P (x)
8076 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
8077 || (VECTOR_REG_P (x)
8078 && (GET_MODE_SIZE (GET_MODE (x)) /
8079 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
8080 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
8081 else
8082 fprintf (file, "%s", reg_names[REGNO (x)]);
8083 break;
8085 case MEM:
8086 output_address (GET_MODE (x), XEXP (x, 0));
8087 break;
8089 case CONST:
8090 case CODE_LABEL:
8091 case LABEL_REF:
8092 case SYMBOL_REF:
8093 output_addr_const (file, x);
8094 break;
8096 case CONST_INT:
8097 ival = INTVAL (x);
8098 switch (code)
8100 case 0:
8101 break;
8102 case 'b':
8103 ival &= 0xff;
8104 break;
8105 case 'c':
8106 ival = ((ival & 0xff) ^ 0x80) - 0x80;
8107 break;
8108 case 'x':
8109 ival &= 0xffff;
8110 break;
8111 case 'h':
8112 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
8113 break;
8114 case 'i':
8115 ival = s390_extract_part (x, HImode, 0);
8116 break;
8117 case 'j':
8118 ival = s390_extract_part (x, HImode, -1);
8119 break;
8120 case 'k':
8121 ival = s390_extract_part (x, SImode, 0);
8122 break;
8123 case 'm':
8124 ival = s390_extract_part (x, SImode, -1);
8125 break;
8126 case 'o':
8127 ival &= 0xffffffff;
8128 break;
8129 case 'e': case 'f':
8130 case 's': case 't':
8132 int start, end;
8133 int len;
8134 bool ok;
8136 len = (code == 's' || code == 'e' ? 64 : 32);
8137 ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
8138 gcc_assert (ok);
8139 if (code == 's' || code == 't')
8140 ival = start;
8141 else
8142 ival = end;
8144 break;
8145 default:
8146 output_operand_lossage ("invalid constant for output modifier '%c'", code);
8148 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8149 break;
8151 case CONST_WIDE_INT:
8152 if (code == 'b')
8153 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8154 CONST_WIDE_INT_ELT (x, 0) & 0xff);
8155 else if (code == 'x')
8156 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8157 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
8158 else if (code == 'h')
8159 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8160 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
8161 else
8163 if (code == 0)
8164 output_operand_lossage ("invalid constant - try using "
8165 "an output modifier");
8166 else
8167 output_operand_lossage ("invalid constant for output modifier '%c'",
8168 code);
8170 break;
8171 case CONST_VECTOR:
8172 switch (code)
8174 case 'h':
8175 gcc_assert (const_vec_duplicate_p (x));
8176 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8177 ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
8178 break;
8179 case 'e':
8180 case 's':
8182 int start, end;
8183 bool ok;
8185 ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
8186 gcc_assert (ok);
8187 ival = (code == 's') ? start : end;
8188 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8190 break;
8191 case 't':
8193 unsigned mask;
8194 bool ok = s390_bytemask_vector_p (x, &mask);
8195 gcc_assert (ok);
8196 fprintf (file, "%u", mask);
8198 break;
8200 default:
8201 output_operand_lossage ("invalid constant vector for output "
8202 "modifier '%c'", code);
8204 break;
8206 default:
8207 if (code == 0)
8208 output_operand_lossage ("invalid expression - try using "
8209 "an output modifier");
8210 else
8211 output_operand_lossage ("invalid expression for output "
8212 "modifier '%c'", code);
8213 break;
8217 /* Target hook for assembling integer objects. We need to define it
8218 here to work a round a bug in some versions of GAS, which couldn't
8219 handle values smaller than INT_MIN when printed in decimal. */
8221 static bool
8222 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
8224 if (size == 8 && aligned_p
8225 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
8227 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
8228 INTVAL (x));
8229 return true;
8231 return default_assemble_integer (x, size, aligned_p);
8234 /* Returns true if register REGNO is used for forming
8235 a memory address in expression X. */
8237 static bool
8238 reg_used_in_mem_p (int regno, rtx x)
8240 enum rtx_code code = GET_CODE (x);
8241 int i, j;
8242 const char *fmt;
8244 if (code == MEM)
8246 if (refers_to_regno_p (regno, XEXP (x, 0)))
8247 return true;
8249 else if (code == SET
8250 && GET_CODE (SET_DEST (x)) == PC)
8252 if (refers_to_regno_p (regno, SET_SRC (x)))
8253 return true;
8256 fmt = GET_RTX_FORMAT (code);
8257 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8259 if (fmt[i] == 'e'
8260 && reg_used_in_mem_p (regno, XEXP (x, i)))
8261 return true;
8263 else if (fmt[i] == 'E')
8264 for (j = 0; j < XVECLEN (x, i); j++)
8265 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
8266 return true;
8268 return false;
8271 /* Returns true if expression DEP_RTX sets an address register
8272 used by instruction INSN to address memory. */
8274 static bool
8275 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
8277 rtx target, pat;
8279 if (NONJUMP_INSN_P (dep_rtx))
8280 dep_rtx = PATTERN (dep_rtx);
8282 if (GET_CODE (dep_rtx) == SET)
8284 target = SET_DEST (dep_rtx);
8285 if (GET_CODE (target) == STRICT_LOW_PART)
8286 target = XEXP (target, 0);
8287 while (GET_CODE (target) == SUBREG)
8288 target = SUBREG_REG (target);
8290 if (GET_CODE (target) == REG)
8292 int regno = REGNO (target);
8294 if (s390_safe_attr_type (insn) == TYPE_LA)
8296 pat = PATTERN (insn);
8297 if (GET_CODE (pat) == PARALLEL)
8299 gcc_assert (XVECLEN (pat, 0) == 2);
8300 pat = XVECEXP (pat, 0, 0);
8302 gcc_assert (GET_CODE (pat) == SET);
8303 return refers_to_regno_p (regno, SET_SRC (pat));
8305 else if (get_attr_atype (insn) == ATYPE_AGEN)
8306 return reg_used_in_mem_p (regno, PATTERN (insn));
8309 return false;
8312 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
8315 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
8317 rtx dep_rtx = PATTERN (dep_insn);
8318 int i;
8320 if (GET_CODE (dep_rtx) == SET
8321 && addr_generation_dependency_p (dep_rtx, insn))
8322 return 1;
8323 else if (GET_CODE (dep_rtx) == PARALLEL)
8325 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
8327 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
8328 return 1;
8331 return 0;
8335 /* A C statement (sans semicolon) to update the integer scheduling priority
8336 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
8337 reduce the priority to execute INSN later. Do not define this macro if
8338 you do not need to adjust the scheduling priorities of insns.
8340 A STD instruction should be scheduled earlier,
8341 in order to use the bypass. */
8342 static int
8343 s390_adjust_priority (rtx_insn *insn, int priority)
8345 if (! INSN_P (insn))
8346 return priority;
8348 if (s390_tune <= PROCESSOR_2064_Z900)
8349 return priority;
8351 switch (s390_safe_attr_type (insn))
8353 case TYPE_FSTOREDF:
8354 case TYPE_FSTORESF:
8355 priority = priority << 3;
8356 break;
8357 case TYPE_STORE:
8358 case TYPE_STM:
8359 priority = priority << 1;
8360 break;
8361 default:
8362 break;
8364 return priority;
8368 /* The number of instructions that can be issued per cycle. */
8370 static int
8371 s390_issue_rate (void)
8373 switch (s390_tune)
8375 case PROCESSOR_2084_Z990:
8376 case PROCESSOR_2094_Z9_109:
8377 case PROCESSOR_2094_Z9_EC:
8378 case PROCESSOR_2817_Z196:
8379 return 3;
8380 case PROCESSOR_2097_Z10:
8381 return 2;
8382 case PROCESSOR_2064_Z900:
8383 /* Starting with EC12 we use the sched_reorder hook to take care
8384 of instruction dispatch constraints. The algorithm only
8385 picks the best instruction and assumes only a single
8386 instruction gets issued per cycle. */
8387 case PROCESSOR_2827_ZEC12:
8388 case PROCESSOR_2964_Z13:
8389 case PROCESSOR_3906_Z14:
8390 default:
8391 return 1;
8395 static int
8396 s390_first_cycle_multipass_dfa_lookahead (void)
8398 return 4;
8401 static void
8402 annotate_constant_pool_refs_1 (rtx *x)
8404 int i, j;
8405 const char *fmt;
8407 gcc_assert (GET_CODE (*x) != SYMBOL_REF
8408 || !CONSTANT_POOL_ADDRESS_P (*x));
8410 /* Literal pool references can only occur inside a MEM ... */
8411 if (GET_CODE (*x) == MEM)
8413 rtx memref = XEXP (*x, 0);
8415 if (GET_CODE (memref) == SYMBOL_REF
8416 && CONSTANT_POOL_ADDRESS_P (memref))
8418 rtx base = cfun->machine->base_reg;
8419 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
8420 UNSPEC_LTREF);
8422 *x = replace_equiv_address (*x, addr);
8423 return;
8426 if (GET_CODE (memref) == CONST
8427 && GET_CODE (XEXP (memref, 0)) == PLUS
8428 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
8429 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
8430 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
8432 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
8433 rtx sym = XEXP (XEXP (memref, 0), 0);
8434 rtx base = cfun->machine->base_reg;
8435 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8436 UNSPEC_LTREF);
8438 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
8439 return;
8443 /* ... or a load-address type pattern. */
8444 if (GET_CODE (*x) == SET)
8446 rtx addrref = SET_SRC (*x);
8448 if (GET_CODE (addrref) == SYMBOL_REF
8449 && CONSTANT_POOL_ADDRESS_P (addrref))
8451 rtx base = cfun->machine->base_reg;
8452 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
8453 UNSPEC_LTREF);
8455 SET_SRC (*x) = addr;
8456 return;
8459 if (GET_CODE (addrref) == CONST
8460 && GET_CODE (XEXP (addrref, 0)) == PLUS
8461 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
8462 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
8463 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
8465 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
8466 rtx sym = XEXP (XEXP (addrref, 0), 0);
8467 rtx base = cfun->machine->base_reg;
8468 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8469 UNSPEC_LTREF);
8471 SET_SRC (*x) = plus_constant (Pmode, addr, off);
8472 return;
8476 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8477 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8479 if (fmt[i] == 'e')
8481 annotate_constant_pool_refs_1 (&XEXP (*x, i));
8483 else if (fmt[i] == 'E')
8485 for (j = 0; j < XVECLEN (*x, i); j++)
8486 annotate_constant_pool_refs_1 (&XVECEXP (*x, i, j));
8491 /* Annotate every literal pool reference in INSN by an UNSPEC_LTREF expression.
8492 Fix up MEMs as required.
8493 Skip insns which support relative addressing, because they do not use a base
8494 register. */
8496 static void
8497 annotate_constant_pool_refs (rtx_insn *insn)
8499 if (s390_safe_relative_long_p (insn))
8500 return;
8501 annotate_constant_pool_refs_1 (&PATTERN (insn));
8504 static void
8505 find_constant_pool_ref_1 (rtx x, rtx *ref)
8507 int i, j;
8508 const char *fmt;
8510 /* Likewise POOL_ENTRY insns. */
8511 if (GET_CODE (x) == UNSPEC_VOLATILE
8512 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
8513 return;
8515 gcc_assert (GET_CODE (x) != SYMBOL_REF
8516 || !CONSTANT_POOL_ADDRESS_P (x));
8518 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
8520 rtx sym = XVECEXP (x, 0, 0);
8521 gcc_assert (GET_CODE (sym) == SYMBOL_REF
8522 && CONSTANT_POOL_ADDRESS_P (sym));
8524 if (*ref == NULL_RTX)
8525 *ref = sym;
8526 else
8527 gcc_assert (*ref == sym);
8529 return;
8532 fmt = GET_RTX_FORMAT (GET_CODE (x));
8533 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8535 if (fmt[i] == 'e')
8537 find_constant_pool_ref_1 (XEXP (x, i), ref);
8539 else if (fmt[i] == 'E')
8541 for (j = 0; j < XVECLEN (x, i); j++)
8542 find_constant_pool_ref_1 (XVECEXP (x, i, j), ref);
8547 /* Find an annotated literal pool symbol referenced in INSN,
8548 and store it at REF. Will abort if INSN contains references to
8549 more than one such pool symbol; multiple references to the same
8550 symbol are allowed, however.
8552 The rtx pointed to by REF must be initialized to NULL_RTX
8553 by the caller before calling this routine.
8555 Skip insns which support relative addressing, because they do not use a base
8556 register. */
8558 static void
8559 find_constant_pool_ref (rtx_insn *insn, rtx *ref)
8561 if (s390_safe_relative_long_p (insn))
8562 return;
8563 find_constant_pool_ref_1 (PATTERN (insn), ref);
8566 static void
8567 replace_constant_pool_ref_1 (rtx *x, rtx ref, rtx offset)
8569 int i, j;
8570 const char *fmt;
8572 gcc_assert (*x != ref);
8574 if (GET_CODE (*x) == UNSPEC
8575 && XINT (*x, 1) == UNSPEC_LTREF
8576 && XVECEXP (*x, 0, 0) == ref)
8578 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8579 return;
8582 if (GET_CODE (*x) == PLUS
8583 && GET_CODE (XEXP (*x, 1)) == CONST_INT
8584 && GET_CODE (XEXP (*x, 0)) == UNSPEC
8585 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8586 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8588 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8589 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8590 return;
8593 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8594 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8596 if (fmt[i] == 'e')
8598 replace_constant_pool_ref_1 (&XEXP (*x, i), ref, offset);
8600 else if (fmt[i] == 'E')
8602 for (j = 0; j < XVECLEN (*x, i); j++)
8603 replace_constant_pool_ref_1 (&XVECEXP (*x, i, j), ref, offset);
8608 /* Replace every reference to the annotated literal pool
8609 symbol REF in INSN by its base plus OFFSET.
8610 Skip insns which support relative addressing, because they do not use a base
8611 register. */
8613 static void
8614 replace_constant_pool_ref (rtx_insn *insn, rtx ref, rtx offset)
8616 if (s390_safe_relative_long_p (insn))
8617 return;
8618 replace_constant_pool_ref_1 (&PATTERN (insn), ref, offset);
8621 /* We keep a list of constants which we have to add to internal
8622 constant tables in the middle of large functions. */
8624 #define NR_C_MODES 32
8625 machine_mode constant_modes[NR_C_MODES] =
8627 TFmode, TImode, TDmode,
8628 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8629 V4SFmode, V2DFmode, V1TFmode,
8630 DFmode, DImode, DDmode,
8631 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8632 SFmode, SImode, SDmode,
8633 V4QImode, V2HImode, V1SImode, V1SFmode,
8634 HImode,
8635 V2QImode, V1HImode,
8636 QImode,
8637 V1QImode
8640 struct constant
8642 struct constant *next;
8643 rtx value;
8644 rtx_code_label *label;
8647 struct constant_pool
8649 struct constant_pool *next;
8650 rtx_insn *first_insn;
8651 rtx_insn *pool_insn;
8652 bitmap insns;
8653 rtx_insn *emit_pool_after;
8655 struct constant *constants[NR_C_MODES];
8656 struct constant *execute;
8657 rtx_code_label *label;
8658 int size;
8661 /* Allocate new constant_pool structure. */
8663 static struct constant_pool *
8664 s390_alloc_pool (void)
8666 struct constant_pool *pool;
8667 int i;
8669 pool = (struct constant_pool *) xmalloc (sizeof *pool);
8670 pool->next = NULL;
8671 for (i = 0; i < NR_C_MODES; i++)
8672 pool->constants[i] = NULL;
8674 pool->execute = NULL;
8675 pool->label = gen_label_rtx ();
8676 pool->first_insn = NULL;
8677 pool->pool_insn = NULL;
8678 pool->insns = BITMAP_ALLOC (NULL);
8679 pool->size = 0;
8680 pool->emit_pool_after = NULL;
8682 return pool;
8685 /* Create new constant pool covering instructions starting at INSN
8686 and chain it to the end of POOL_LIST. */
8688 static struct constant_pool *
8689 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8691 struct constant_pool *pool, **prev;
8693 pool = s390_alloc_pool ();
8694 pool->first_insn = insn;
8696 for (prev = pool_list; *prev; prev = &(*prev)->next)
8698 *prev = pool;
8700 return pool;
8703 /* End range of instructions covered by POOL at INSN and emit
8704 placeholder insn representing the pool. */
8706 static void
8707 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8709 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8711 if (!insn)
8712 insn = get_last_insn ();
8714 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8715 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8718 /* Add INSN to the list of insns covered by POOL. */
8720 static void
8721 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8723 bitmap_set_bit (pool->insns, INSN_UID (insn));
8726 /* Return pool out of POOL_LIST that covers INSN. */
8728 static struct constant_pool *
8729 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8731 struct constant_pool *pool;
8733 for (pool = pool_list; pool; pool = pool->next)
8734 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8735 break;
8737 return pool;
8740 /* Add constant VAL of mode MODE to the constant pool POOL. */
8742 static void
8743 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8745 struct constant *c;
8746 int i;
8748 for (i = 0; i < NR_C_MODES; i++)
8749 if (constant_modes[i] == mode)
8750 break;
8751 gcc_assert (i != NR_C_MODES);
8753 for (c = pool->constants[i]; c != NULL; c = c->next)
8754 if (rtx_equal_p (val, c->value))
8755 break;
8757 if (c == NULL)
8759 c = (struct constant *) xmalloc (sizeof *c);
8760 c->value = val;
8761 c->label = gen_label_rtx ();
8762 c->next = pool->constants[i];
8763 pool->constants[i] = c;
8764 pool->size += GET_MODE_SIZE (mode);
8768 /* Return an rtx that represents the offset of X from the start of
8769 pool POOL. */
8771 static rtx
8772 s390_pool_offset (struct constant_pool *pool, rtx x)
8774 rtx label;
8776 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8777 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8778 UNSPEC_POOL_OFFSET);
8779 return gen_rtx_CONST (GET_MODE (x), x);
8782 /* Find constant VAL of mode MODE in the constant pool POOL.
8783 Return an RTX describing the distance from the start of
8784 the pool to the location of the new constant. */
8786 static rtx
8787 s390_find_constant (struct constant_pool *pool, rtx val,
8788 machine_mode mode)
8790 struct constant *c;
8791 int i;
8793 for (i = 0; i < NR_C_MODES; i++)
8794 if (constant_modes[i] == mode)
8795 break;
8796 gcc_assert (i != NR_C_MODES);
8798 for (c = pool->constants[i]; c != NULL; c = c->next)
8799 if (rtx_equal_p (val, c->value))
8800 break;
8802 gcc_assert (c);
8804 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8807 /* Check whether INSN is an execute. Return the label_ref to its
8808 execute target template if so, NULL_RTX otherwise. */
8810 static rtx
8811 s390_execute_label (rtx insn)
8813 if (INSN_P (insn)
8814 && GET_CODE (PATTERN (insn)) == PARALLEL
8815 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8816 && (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE
8817 || XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE_JUMP))
8819 if (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8820 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8821 else
8823 gcc_assert (JUMP_P (insn));
8824 /* For jump insns as execute target:
8825 - There is one operand less in the parallel (the
8826 modification register of the execute is always 0).
8827 - The execute target label is wrapped into an
8828 if_then_else in order to hide it from jump analysis. */
8829 return XEXP (XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 0), 0);
8833 return NULL_RTX;
8836 /* Find execute target for INSN in the constant pool POOL.
8837 Return an RTX describing the distance from the start of
8838 the pool to the location of the execute target. */
8840 static rtx
8841 s390_find_execute (struct constant_pool *pool, rtx insn)
8843 struct constant *c;
8845 for (c = pool->execute; c != NULL; c = c->next)
8846 if (INSN_UID (insn) == INSN_UID (c->value))
8847 break;
8849 gcc_assert (c);
8851 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8854 /* For an execute INSN, extract the execute target template. */
8856 static rtx
8857 s390_execute_target (rtx insn)
8859 rtx pattern = PATTERN (insn);
8860 gcc_assert (s390_execute_label (insn));
8862 if (XVECLEN (pattern, 0) == 2)
8864 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8866 else
8868 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8869 int i;
8871 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8872 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8874 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8877 return pattern;
8880 /* Indicate that INSN cannot be duplicated. This is the case for
8881 execute insns that carry a unique label. */
8883 static bool
8884 s390_cannot_copy_insn_p (rtx_insn *insn)
8886 rtx label = s390_execute_label (insn);
8887 return label && label != const0_rtx;
8890 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
8891 do not emit the pool base label. */
8893 static void
8894 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8896 struct constant *c;
8897 rtx_insn *insn = pool->pool_insn;
8898 int i;
8900 /* Switch to rodata section. */
8901 insn = emit_insn_after (gen_pool_section_start (), insn);
8902 INSN_ADDRESSES_NEW (insn, -1);
8904 /* Ensure minimum pool alignment. */
8905 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8906 INSN_ADDRESSES_NEW (insn, -1);
8908 /* Emit pool base label. */
8909 if (!remote_label)
8911 insn = emit_label_after (pool->label, insn);
8912 INSN_ADDRESSES_NEW (insn, -1);
8915 /* Dump constants in descending alignment requirement order,
8916 ensuring proper alignment for every constant. */
8917 for (i = 0; i < NR_C_MODES; i++)
8918 for (c = pool->constants[i]; c; c = c->next)
8920 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
8921 rtx value = copy_rtx (c->value);
8922 if (GET_CODE (value) == CONST
8923 && GET_CODE (XEXP (value, 0)) == UNSPEC
8924 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8925 && XVECLEN (XEXP (value, 0), 0) == 1)
8926 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8928 insn = emit_label_after (c->label, insn);
8929 INSN_ADDRESSES_NEW (insn, -1);
8931 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8932 gen_rtvec (1, value),
8933 UNSPECV_POOL_ENTRY);
8934 insn = emit_insn_after (value, insn);
8935 INSN_ADDRESSES_NEW (insn, -1);
8938 /* Ensure minimum alignment for instructions. */
8939 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8940 INSN_ADDRESSES_NEW (insn, -1);
8942 /* Output in-pool execute template insns. */
8943 for (c = pool->execute; c; c = c->next)
8945 insn = emit_label_after (c->label, insn);
8946 INSN_ADDRESSES_NEW (insn, -1);
8948 insn = emit_insn_after (s390_execute_target (c->value), insn);
8949 INSN_ADDRESSES_NEW (insn, -1);
8952 /* Switch back to previous section. */
8953 insn = emit_insn_after (gen_pool_section_end (), insn);
8954 INSN_ADDRESSES_NEW (insn, -1);
8956 insn = emit_barrier_after (insn);
8957 INSN_ADDRESSES_NEW (insn, -1);
8959 /* Remove placeholder insn. */
8960 remove_insn (pool->pool_insn);
8963 /* Free all memory used by POOL. */
8965 static void
8966 s390_free_pool (struct constant_pool *pool)
8968 struct constant *c, *next;
8969 int i;
8971 for (i = 0; i < NR_C_MODES; i++)
8972 for (c = pool->constants[i]; c; c = next)
8974 next = c->next;
8975 free (c);
8978 for (c = pool->execute; c; c = next)
8980 next = c->next;
8981 free (c);
8984 BITMAP_FREE (pool->insns);
8985 free (pool);
8989 /* Collect main literal pool. Return NULL on overflow. */
8991 static struct constant_pool *
8992 s390_mainpool_start (void)
8994 struct constant_pool *pool;
8995 rtx_insn *insn;
8997 pool = s390_alloc_pool ();
8999 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9001 if (NONJUMP_INSN_P (insn)
9002 && GET_CODE (PATTERN (insn)) == SET
9003 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
9004 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
9006 /* There might be two main_pool instructions if base_reg
9007 is call-clobbered; one for shrink-wrapped code and one
9008 for the rest. We want to keep the first. */
9009 if (pool->pool_insn)
9011 insn = PREV_INSN (insn);
9012 delete_insn (NEXT_INSN (insn));
9013 continue;
9015 pool->pool_insn = insn;
9018 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9020 rtx pool_ref = NULL_RTX;
9021 find_constant_pool_ref (insn, &pool_ref);
9022 if (pool_ref)
9024 rtx constant = get_pool_constant (pool_ref);
9025 machine_mode mode = get_pool_mode (pool_ref);
9026 s390_add_constant (pool, constant, mode);
9030 /* If hot/cold partitioning is enabled we have to make sure that
9031 the literal pool is emitted in the same section where the
9032 initialization of the literal pool base pointer takes place.
9033 emit_pool_after is only used in the non-overflow case on non
9034 Z cpus where we can emit the literal pool at the end of the
9035 function body within the text section. */
9036 if (NOTE_P (insn)
9037 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
9038 && !pool->emit_pool_after)
9039 pool->emit_pool_after = PREV_INSN (insn);
9042 gcc_assert (pool->pool_insn || pool->size == 0);
9044 if (pool->size >= 4096)
9046 /* We're going to chunkify the pool, so remove the main
9047 pool placeholder insn. */
9048 remove_insn (pool->pool_insn);
9050 s390_free_pool (pool);
9051 pool = NULL;
9054 /* If the functions ends with the section where the literal pool
9055 should be emitted set the marker to its end. */
9056 if (pool && !pool->emit_pool_after)
9057 pool->emit_pool_after = get_last_insn ();
9059 return pool;
9062 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9063 Modify the current function to output the pool constants as well as
9064 the pool register setup instruction. */
9066 static void
9067 s390_mainpool_finish (struct constant_pool *pool)
9069 rtx base_reg = cfun->machine->base_reg;
9070 rtx set;
9071 rtx_insn *insn;
9073 /* If the pool is empty, we're done. */
9074 if (pool->size == 0)
9076 /* We don't actually need a base register after all. */
9077 cfun->machine->base_reg = NULL_RTX;
9079 if (pool->pool_insn)
9080 remove_insn (pool->pool_insn);
9081 s390_free_pool (pool);
9082 return;
9085 /* We need correct insn addresses. */
9086 shorten_branches (get_insns ());
9088 /* Use a LARL to load the pool register. The pool is
9089 located in the .rodata section, so we emit it after the function. */
9090 set = gen_main_base_64 (base_reg, pool->label);
9091 insn = emit_insn_after (set, pool->pool_insn);
9092 INSN_ADDRESSES_NEW (insn, -1);
9093 remove_insn (pool->pool_insn);
9095 insn = get_last_insn ();
9096 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9097 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9099 s390_dump_pool (pool, 0);
9101 /* Replace all literal pool references. */
9103 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
9105 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9107 rtx addr, pool_ref = NULL_RTX;
9108 find_constant_pool_ref (insn, &pool_ref);
9109 if (pool_ref)
9111 if (s390_execute_label (insn))
9112 addr = s390_find_execute (pool, insn);
9113 else
9114 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
9115 get_pool_mode (pool_ref));
9117 replace_constant_pool_ref (insn, pool_ref, addr);
9118 INSN_CODE (insn) = -1;
9124 /* Free the pool. */
9125 s390_free_pool (pool);
9128 /* Chunkify the literal pool. */
9130 #define S390_POOL_CHUNK_MIN 0xc00
9131 #define S390_POOL_CHUNK_MAX 0xe00
9133 static struct constant_pool *
9134 s390_chunkify_start (void)
9136 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
9137 bitmap far_labels;
9138 rtx_insn *insn;
9140 /* We need correct insn addresses. */
9142 shorten_branches (get_insns ());
9144 /* Scan all insns and move literals to pool chunks. */
9146 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9148 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9150 rtx pool_ref = NULL_RTX;
9151 find_constant_pool_ref (insn, &pool_ref);
9152 if (pool_ref)
9154 rtx constant = get_pool_constant (pool_ref);
9155 machine_mode mode = get_pool_mode (pool_ref);
9157 if (!curr_pool)
9158 curr_pool = s390_start_pool (&pool_list, insn);
9160 s390_add_constant (curr_pool, constant, mode);
9161 s390_add_pool_insn (curr_pool, insn);
9165 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
9167 if (curr_pool)
9168 s390_add_pool_insn (curr_pool, insn);
9171 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
9172 continue;
9174 if (!curr_pool
9175 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
9176 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
9177 continue;
9179 if (curr_pool->size < S390_POOL_CHUNK_MAX)
9180 continue;
9182 s390_end_pool (curr_pool, NULL);
9183 curr_pool = NULL;
9186 if (curr_pool)
9187 s390_end_pool (curr_pool, NULL);
9189 /* Find all labels that are branched into
9190 from an insn belonging to a different chunk. */
9192 far_labels = BITMAP_ALLOC (NULL);
9194 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9196 rtx_jump_table_data *table;
9198 /* Labels marked with LABEL_PRESERVE_P can be target
9199 of non-local jumps, so we have to mark them.
9200 The same holds for named labels.
9202 Don't do that, however, if it is the label before
9203 a jump table. */
9205 if (LABEL_P (insn)
9206 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
9208 rtx_insn *vec_insn = NEXT_INSN (insn);
9209 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
9210 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
9212 /* Check potential targets in a table jump (casesi_jump). */
9213 else if (tablejump_p (insn, NULL, &table))
9215 rtx vec_pat = PATTERN (table);
9216 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
9218 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
9220 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
9222 if (s390_find_pool (pool_list, label)
9223 != s390_find_pool (pool_list, insn))
9224 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9227 /* If we have a direct jump (conditional or unconditional),
9228 check all potential targets. */
9229 else if (JUMP_P (insn))
9231 rtx pat = PATTERN (insn);
9233 if (GET_CODE (pat) == PARALLEL)
9234 pat = XVECEXP (pat, 0, 0);
9236 if (GET_CODE (pat) == SET)
9238 rtx label = JUMP_LABEL (insn);
9239 if (label && !ANY_RETURN_P (label))
9241 if (s390_find_pool (pool_list, label)
9242 != s390_find_pool (pool_list, insn))
9243 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9249 /* Insert base register reload insns before every pool. */
9251 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9253 rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
9254 curr_pool->label);
9255 rtx_insn *insn = curr_pool->first_insn;
9256 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
9259 /* Insert base register reload insns at every far label. */
9261 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9262 if (LABEL_P (insn)
9263 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
9265 struct constant_pool *pool = s390_find_pool (pool_list, insn);
9266 if (pool)
9268 rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
9269 pool->label);
9270 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9275 BITMAP_FREE (far_labels);
9278 /* Recompute insn addresses. */
9280 init_insn_lengths ();
9281 shorten_branches (get_insns ());
9283 return pool_list;
9286 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9287 After we have decided to use this list, finish implementing
9288 all changes to the current function as required. */
9290 static void
9291 s390_chunkify_finish (struct constant_pool *pool_list)
9293 struct constant_pool *curr_pool = NULL;
9294 rtx_insn *insn;
9297 /* Replace all literal pool references. */
9299 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9301 curr_pool = s390_find_pool (pool_list, insn);
9302 if (!curr_pool)
9303 continue;
9305 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9307 rtx addr, pool_ref = NULL_RTX;
9308 find_constant_pool_ref (insn, &pool_ref);
9309 if (pool_ref)
9311 if (s390_execute_label (insn))
9312 addr = s390_find_execute (curr_pool, insn);
9313 else
9314 addr = s390_find_constant (curr_pool,
9315 get_pool_constant (pool_ref),
9316 get_pool_mode (pool_ref));
9318 replace_constant_pool_ref (insn, pool_ref, addr);
9319 INSN_CODE (insn) = -1;
9324 /* Dump out all literal pools. */
9326 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9327 s390_dump_pool (curr_pool, 0);
9329 /* Free pool list. */
9331 while (pool_list)
9333 struct constant_pool *next = pool_list->next;
9334 s390_free_pool (pool_list);
9335 pool_list = next;
9339 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9341 void
9342 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9344 switch (GET_MODE_CLASS (mode))
9346 case MODE_FLOAT:
9347 case MODE_DECIMAL_FLOAT:
9348 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9350 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp),
9351 as_a <scalar_float_mode> (mode), align);
9352 break;
9354 case MODE_INT:
9355 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9356 mark_symbol_refs_as_used (exp);
9357 break;
9359 case MODE_VECTOR_INT:
9360 case MODE_VECTOR_FLOAT:
9362 int i;
9363 machine_mode inner_mode;
9364 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9366 inner_mode = GET_MODE_INNER (GET_MODE (exp));
9367 for (i = 0; i < XVECLEN (exp, 0); i++)
9368 s390_output_pool_entry (XVECEXP (exp, 0, i),
9369 inner_mode,
9370 i == 0
9371 ? align
9372 : GET_MODE_BITSIZE (inner_mode));
9374 break;
9376 default:
9377 gcc_unreachable ();
9382 /* Return an RTL expression representing the value of the return address
9383 for the frame COUNT steps up from the current frame. FRAME is the
9384 frame pointer of that frame. */
9387 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9389 int offset;
9390 rtx addr;
9392 /* Without backchain, we fail for all but the current frame. */
9394 if (!TARGET_BACKCHAIN && count > 0)
9395 return NULL_RTX;
9397 /* For the current frame, we need to make sure the initial
9398 value of RETURN_REGNUM is actually saved. */
9400 if (count == 0)
9401 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9403 if (TARGET_PACKED_STACK)
9404 offset = -2 * UNITS_PER_LONG;
9405 else
9406 offset = RETURN_REGNUM * UNITS_PER_LONG;
9408 addr = plus_constant (Pmode, frame, offset);
9409 addr = memory_address (Pmode, addr);
9410 return gen_rtx_MEM (Pmode, addr);
9413 /* Return an RTL expression representing the back chain stored in
9414 the current stack frame. */
9417 s390_back_chain_rtx (void)
9419 rtx chain;
9421 gcc_assert (TARGET_BACKCHAIN);
9423 if (TARGET_PACKED_STACK)
9424 chain = plus_constant (Pmode, stack_pointer_rtx,
9425 STACK_POINTER_OFFSET - UNITS_PER_LONG);
9426 else
9427 chain = stack_pointer_rtx;
9429 chain = gen_rtx_MEM (Pmode, chain);
9430 return chain;
9433 /* Find first call clobbered register unused in a function.
9434 This could be used as base register in a leaf function
9435 or for holding the return address before epilogue. */
9437 static int
9438 find_unused_clobbered_reg (void)
9440 int i;
9441 for (i = 0; i < 6; i++)
9442 if (!df_regs_ever_live_p (i))
9443 return i;
9444 return 0;
9448 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9449 clobbered hard regs in SETREG. */
9451 static void
9452 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9454 char *regs_ever_clobbered = (char *)data;
9455 unsigned int i, regno;
9456 machine_mode mode = GET_MODE (setreg);
9458 if (GET_CODE (setreg) == SUBREG)
9460 rtx inner = SUBREG_REG (setreg);
9461 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9462 return;
9463 regno = subreg_regno (setreg);
9465 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9466 regno = REGNO (setreg);
9467 else
9468 return;
9470 for (i = regno;
9471 i < end_hard_regno (mode, regno);
9472 i++)
9473 regs_ever_clobbered[i] = 1;
9476 /* Walks through all basic blocks of the current function looking
9477 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9478 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9479 each of those regs. */
9481 static void
9482 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9484 basic_block cur_bb;
9485 rtx_insn *cur_insn;
9486 unsigned int i;
9488 memset (regs_ever_clobbered, 0, 32);
9490 /* For non-leaf functions we have to consider all call clobbered regs to be
9491 clobbered. */
9492 if (!crtl->is_leaf)
9494 for (i = 0; i < 32; i++)
9495 regs_ever_clobbered[i] = call_used_regs[i];
9498 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9499 this work is done by liveness analysis (mark_regs_live_at_end).
9500 Special care is needed for functions containing landing pads. Landing pads
9501 may use the eh registers, but the code which sets these registers is not
9502 contained in that function. Hence s390_regs_ever_clobbered is not able to
9503 deal with this automatically. */
9504 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9505 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9506 if (crtl->calls_eh_return
9507 || (cfun->machine->has_landing_pad_p
9508 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9509 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9511 /* For nonlocal gotos all call-saved registers have to be saved.
9512 This flag is also set for the unwinding code in libgcc.
9513 See expand_builtin_unwind_init. For regs_ever_live this is done by
9514 reload. */
9515 if (crtl->saves_all_registers)
9516 for (i = 0; i < 32; i++)
9517 if (!call_used_regs[i])
9518 regs_ever_clobbered[i] = 1;
9520 FOR_EACH_BB_FN (cur_bb, cfun)
9522 FOR_BB_INSNS (cur_bb, cur_insn)
9524 rtx pat;
9526 if (!INSN_P (cur_insn))
9527 continue;
9529 pat = PATTERN (cur_insn);
9531 /* Ignore GPR restore insns. */
9532 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9534 if (GET_CODE (pat) == SET
9535 && GENERAL_REG_P (SET_DEST (pat)))
9537 /* lgdr */
9538 if (GET_MODE (SET_SRC (pat)) == DImode
9539 && FP_REG_P (SET_SRC (pat)))
9540 continue;
9542 /* l / lg */
9543 if (GET_CODE (SET_SRC (pat)) == MEM)
9544 continue;
9547 /* lm / lmg */
9548 if (GET_CODE (pat) == PARALLEL
9549 && load_multiple_operation (pat, VOIDmode))
9550 continue;
9553 note_stores (cur_insn,
9554 s390_reg_clobbered_rtx,
9555 regs_ever_clobbered);
9560 /* Determine the frame area which actually has to be accessed
9561 in the function epilogue. The values are stored at the
9562 given pointers AREA_BOTTOM (address of the lowest used stack
9563 address) and AREA_TOP (address of the first item which does
9564 not belong to the stack frame). */
9566 static void
9567 s390_frame_area (int *area_bottom, int *area_top)
9569 int b, t;
9571 b = INT_MAX;
9572 t = INT_MIN;
9574 if (cfun_frame_layout.first_restore_gpr != -1)
9576 b = (cfun_frame_layout.gprs_offset
9577 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9578 t = b + (cfun_frame_layout.last_restore_gpr
9579 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9582 if (TARGET_64BIT && cfun_save_high_fprs_p)
9584 b = MIN (b, cfun_frame_layout.f8_offset);
9585 t = MAX (t, (cfun_frame_layout.f8_offset
9586 + cfun_frame_layout.high_fprs * 8));
9589 if (!TARGET_64BIT)
9591 if (cfun_fpr_save_p (FPR4_REGNUM))
9593 b = MIN (b, cfun_frame_layout.f4_offset);
9594 t = MAX (t, cfun_frame_layout.f4_offset + 8);
9596 if (cfun_fpr_save_p (FPR6_REGNUM))
9598 b = MIN (b, cfun_frame_layout.f4_offset + 8);
9599 t = MAX (t, cfun_frame_layout.f4_offset + 16);
9602 *area_bottom = b;
9603 *area_top = t;
9605 /* Update gpr_save_slots in the frame layout trying to make use of
9606 FPRs as GPR save slots.
9607 This is a helper routine of s390_register_info. */
9609 static void
9610 s390_register_info_gprtofpr ()
9612 int save_reg_slot = FPR0_REGNUM;
9613 int i, j;
9615 if (TARGET_TPF || !TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9616 return;
9618 /* builtin_eh_return needs to be able to modify the return address
9619 on the stack. It could also adjust the FPR save slot instead but
9620 is it worth the trouble?! */
9621 if (crtl->calls_eh_return)
9622 return;
9624 for (i = 15; i >= 6; i--)
9626 if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9627 continue;
9629 /* Advance to the next FP register which can be used as a
9630 GPR save slot. */
9631 while ((!call_used_regs[save_reg_slot]
9632 || df_regs_ever_live_p (save_reg_slot)
9633 || cfun_fpr_save_p (save_reg_slot))
9634 && FP_REGNO_P (save_reg_slot))
9635 save_reg_slot++;
9636 if (!FP_REGNO_P (save_reg_slot))
9638 /* We only want to use ldgr/lgdr if we can get rid of
9639 stm/lm entirely. So undo the gpr slot allocation in
9640 case we ran out of FPR save slots. */
9641 for (j = 6; j <= 15; j++)
9642 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9643 cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9644 break;
9646 cfun_gpr_save_slot (i) = save_reg_slot++;
9650 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9651 stdarg.
9652 This is a helper routine for s390_register_info. */
9654 static void
9655 s390_register_info_stdarg_fpr ()
9657 int i;
9658 int min_fpr;
9659 int max_fpr;
9661 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9662 f0-f4 for 64 bit. */
9663 if (!cfun->stdarg
9664 || !TARGET_HARD_FLOAT
9665 || !cfun->va_list_fpr_size
9666 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9667 return;
9669 min_fpr = crtl->args.info.fprs;
9670 max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9671 if (max_fpr >= FP_ARG_NUM_REG)
9672 max_fpr = FP_ARG_NUM_REG - 1;
9674 /* FPR argument regs start at f0. */
9675 min_fpr += FPR0_REGNUM;
9676 max_fpr += FPR0_REGNUM;
9678 for (i = min_fpr; i <= max_fpr; i++)
9679 cfun_set_fpr_save (i);
9682 /* Reserve the GPR save slots for GPRs which need to be saved due to
9683 stdarg.
9684 This is a helper routine for s390_register_info. */
9686 static void
9687 s390_register_info_stdarg_gpr ()
9689 int i;
9690 int min_gpr;
9691 int max_gpr;
9693 if (!cfun->stdarg
9694 || !cfun->va_list_gpr_size
9695 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9696 return;
9698 min_gpr = crtl->args.info.gprs;
9699 max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9700 if (max_gpr >= GP_ARG_NUM_REG)
9701 max_gpr = GP_ARG_NUM_REG - 1;
9703 /* GPR argument regs start at r2. */
9704 min_gpr += GPR2_REGNUM;
9705 max_gpr += GPR2_REGNUM;
9707 /* If r6 was supposed to be saved into an FPR and now needs to go to
9708 the stack for vararg we have to adjust the restore range to make
9709 sure that the restore is done from stack as well. */
9710 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9711 && min_gpr <= GPR6_REGNUM
9712 && max_gpr >= GPR6_REGNUM)
9714 if (cfun_frame_layout.first_restore_gpr == -1
9715 || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9716 cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9717 if (cfun_frame_layout.last_restore_gpr == -1
9718 || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9719 cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9722 if (cfun_frame_layout.first_save_gpr == -1
9723 || cfun_frame_layout.first_save_gpr > min_gpr)
9724 cfun_frame_layout.first_save_gpr = min_gpr;
9726 if (cfun_frame_layout.last_save_gpr == -1
9727 || cfun_frame_layout.last_save_gpr < max_gpr)
9728 cfun_frame_layout.last_save_gpr = max_gpr;
9730 for (i = min_gpr; i <= max_gpr; i++)
9731 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9734 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9735 prologue and epilogue. */
9737 static void
9738 s390_register_info_set_ranges ()
9740 int i, j;
9742 /* Find the first and the last save slot supposed to use the stack
9743 to set the restore range.
9744 Vararg regs might be marked as save to stack but only the
9745 call-saved regs really need restoring (i.e. r6). This code
9746 assumes that the vararg regs have not yet been recorded in
9747 cfun_gpr_save_slot. */
9748 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
9749 for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
9750 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9751 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9752 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9753 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9756 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9757 for registers which need to be saved in function prologue.
9758 This function can be used until the insns emitted for save/restore
9759 of the regs are visible in the RTL stream. */
9761 static void
9762 s390_register_info ()
9764 int i;
9765 char clobbered_regs[32];
9767 gcc_assert (!epilogue_completed);
9769 if (reload_completed)
9770 /* After reload we rely on our own routine to determine which
9771 registers need saving. */
9772 s390_regs_ever_clobbered (clobbered_regs);
9773 else
9774 /* During reload we use regs_ever_live as a base since reload
9775 does changes in there which we otherwise would not be aware
9776 of. */
9777 for (i = 0; i < 32; i++)
9778 clobbered_regs[i] = df_regs_ever_live_p (i);
9780 for (i = 0; i < 32; i++)
9781 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9783 /* Mark the call-saved FPRs which need to be saved.
9784 This needs to be done before checking the special GPRs since the
9785 stack pointer usage depends on whether high FPRs have to be saved
9786 or not. */
9787 cfun_frame_layout.fpr_bitmap = 0;
9788 cfun_frame_layout.high_fprs = 0;
9789 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9790 if (clobbered_regs[i] && !call_used_regs[i])
9792 cfun_set_fpr_save (i);
9793 if (i >= FPR8_REGNUM)
9794 cfun_frame_layout.high_fprs++;
9797 /* Register 12 is used for GOT address, but also as temp in prologue
9798 for split-stack stdarg functions (unless r14 is available). */
9799 clobbered_regs[12]
9800 |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
9801 || (flag_split_stack && cfun->stdarg
9802 && (crtl->is_leaf || TARGET_TPF_PROFILING
9803 || has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
9805 clobbered_regs[BASE_REGNUM]
9806 |= (cfun->machine->base_reg
9807 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9809 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9810 |= !!frame_pointer_needed;
9812 /* On pre z900 machines this might take until machine dependent
9813 reorg to decide.
9814 save_return_addr_p will only be set on non-zarch machines so
9815 there is no risk that r14 goes into an FPR instead of a stack
9816 slot. */
9817 clobbered_regs[RETURN_REGNUM]
9818 |= (!crtl->is_leaf
9819 || TARGET_TPF_PROFILING
9820 || cfun_frame_layout.save_return_addr_p
9821 || crtl->calls_eh_return);
9823 clobbered_regs[STACK_POINTER_REGNUM]
9824 |= (!crtl->is_leaf
9825 || TARGET_TPF_PROFILING
9826 || cfun_save_high_fprs_p
9827 || get_frame_size () > 0
9828 || (reload_completed && cfun_frame_layout.frame_size > 0)
9829 || cfun->calls_alloca);
9831 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
9833 for (i = 6; i < 16; i++)
9834 if (clobbered_regs[i])
9835 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9837 s390_register_info_stdarg_fpr ();
9838 s390_register_info_gprtofpr ();
9839 s390_register_info_set_ranges ();
9840 /* stdarg functions might need to save GPRs 2 to 6. This might
9841 override the GPR->FPR save decision made by
9842 s390_register_info_gprtofpr for r6 since vararg regs must go to
9843 the stack. */
9844 s390_register_info_stdarg_gpr ();
9847 /* Return true if REGNO is a global register, but not one
9848 of the special ones that need to be saved/restored in anyway. */
9850 static inline bool
9851 global_not_special_regno_p (int regno)
9853 return (global_regs[regno]
9854 /* These registers are special and need to be
9855 restored in any case. */
9856 && !(regno == STACK_POINTER_REGNUM
9857 || regno == RETURN_REGNUM
9858 || regno == BASE_REGNUM
9859 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
9862 /* This function is called by s390_optimize_prologue in order to get
9863 rid of unnecessary GPR save/restore instructions. The register info
9864 for the GPRs is re-computed and the ranges are re-calculated. */
9866 static void
9867 s390_optimize_register_info ()
9869 char clobbered_regs[32];
9870 int i;
9872 gcc_assert (epilogue_completed);
9874 s390_regs_ever_clobbered (clobbered_regs);
9876 /* Global registers do not need to be saved and restored unless it
9877 is one of our special regs. (r12, r13, r14, or r15). */
9878 for (i = 0; i < 32; i++)
9879 clobbered_regs[i] = clobbered_regs[i] && !global_not_special_regno_p (i);
9881 /* There is still special treatment needed for cases invisible to
9882 s390_regs_ever_clobbered. */
9883 clobbered_regs[RETURN_REGNUM]
9884 |= (TARGET_TPF_PROFILING
9885 /* When expanding builtin_return_addr in ESA mode we do not
9886 know whether r14 will later be needed as scratch reg when
9887 doing branch splitting. So the builtin always accesses the
9888 r14 save slot and we need to stick to the save/restore
9889 decision for r14 even if it turns out that it didn't get
9890 clobbered. */
9891 || cfun_frame_layout.save_return_addr_p
9892 || crtl->calls_eh_return);
9894 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
9896 for (i = 6; i < 16; i++)
9897 if (!clobbered_regs[i])
9898 cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
9900 s390_register_info_set_ranges ();
9901 s390_register_info_stdarg_gpr ();
9904 /* Fill cfun->machine with info about frame of current function. */
9906 static void
9907 s390_frame_info (void)
9909 HOST_WIDE_INT lowest_offset;
9911 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
9912 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
9914 /* The va_arg builtin uses a constant distance of 16 *
9915 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9916 pointer. So even if we are going to save the stack pointer in an
9917 FPR we need the stack space in order to keep the offsets
9918 correct. */
9919 if (cfun->stdarg && cfun_save_arg_fprs_p)
9921 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9923 if (cfun_frame_layout.first_save_gpr_slot == -1)
9924 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
9927 cfun_frame_layout.frame_size = get_frame_size ();
9928 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
9929 fatal_error (input_location,
9930 "total size of local variables exceeds architecture limit");
9932 if (!TARGET_PACKED_STACK)
9934 /* Fixed stack layout. */
9935 cfun_frame_layout.backchain_offset = 0;
9936 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
9937 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
9938 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
9939 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
9940 * UNITS_PER_LONG);
9942 else if (TARGET_BACKCHAIN)
9944 /* Kernel stack layout - packed stack, backchain, no float */
9945 gcc_assert (TARGET_SOFT_FLOAT);
9946 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
9947 - UNITS_PER_LONG);
9949 /* The distance between the backchain and the return address
9950 save slot must not change. So we always need a slot for the
9951 stack pointer which resides in between. */
9952 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9954 cfun_frame_layout.gprs_offset
9955 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
9957 /* FPRs will not be saved. Nevertheless pick sane values to
9958 keep area calculations valid. */
9959 cfun_frame_layout.f0_offset =
9960 cfun_frame_layout.f4_offset =
9961 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
9963 else
9965 int num_fprs;
9967 /* Packed stack layout without backchain. */
9969 /* With stdarg FPRs need their dedicated slots. */
9970 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
9971 : (cfun_fpr_save_p (FPR4_REGNUM) +
9972 cfun_fpr_save_p (FPR6_REGNUM)));
9973 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
9975 num_fprs = (cfun->stdarg ? 2
9976 : (cfun_fpr_save_p (FPR0_REGNUM)
9977 + cfun_fpr_save_p (FPR2_REGNUM)));
9978 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
9980 cfun_frame_layout.gprs_offset
9981 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
9983 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
9984 - cfun_frame_layout.high_fprs * 8);
9987 if (cfun_save_high_fprs_p)
9988 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
9990 if (!crtl->is_leaf)
9991 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
9993 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
9994 sized area at the bottom of the stack. This is required also for
9995 leaf functions. When GCC generates a local stack reference it
9996 will always add STACK_POINTER_OFFSET to all these references. */
9997 if (crtl->is_leaf
9998 && !TARGET_TPF_PROFILING
9999 && cfun_frame_layout.frame_size == 0
10000 && !cfun->calls_alloca)
10001 return;
10003 /* Calculate the number of bytes we have used in our own register
10004 save area. With the packed stack layout we can re-use the
10005 remaining bytes for normal stack elements. */
10007 if (TARGET_PACKED_STACK)
10008 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
10009 cfun_frame_layout.f4_offset),
10010 cfun_frame_layout.gprs_offset);
10011 else
10012 lowest_offset = 0;
10014 if (TARGET_BACKCHAIN)
10015 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
10017 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
10019 /* If under 31 bit an odd number of gprs has to be saved we have to
10020 adjust the frame size to sustain 8 byte alignment of stack
10021 frames. */
10022 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
10023 STACK_BOUNDARY / BITS_PER_UNIT - 1)
10024 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
10027 /* Generate frame layout. Fills in register and frame data for the current
10028 function in cfun->machine. This routine can be called multiple times;
10029 it will re-do the complete frame layout every time. */
10031 static void
10032 s390_init_frame_layout (void)
10034 HOST_WIDE_INT frame_size;
10035 int base_used;
10037 /* After LRA the frame layout is supposed to be read-only and should
10038 not be re-computed. */
10039 if (reload_completed)
10040 return;
10044 frame_size = cfun_frame_layout.frame_size;
10046 /* Try to predict whether we'll need the base register. */
10047 base_used = crtl->uses_const_pool
10048 || (!DISP_IN_RANGE (frame_size)
10049 && !CONST_OK_FOR_K (frame_size));
10051 /* Decide which register to use as literal pool base. In small
10052 leaf functions, try to use an unused call-clobbered register
10053 as base register to avoid save/restore overhead. */
10054 if (!base_used)
10055 cfun->machine->base_reg = NULL_RTX;
10056 else
10058 int br = 0;
10060 if (crtl->is_leaf)
10061 /* Prefer r5 (most likely to be free). */
10062 for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
10064 cfun->machine->base_reg =
10065 gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
10068 s390_register_info ();
10069 s390_frame_info ();
10071 while (frame_size != cfun_frame_layout.frame_size);
10074 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
10075 the TX is nonescaping. A transaction is considered escaping if
10076 there is at least one path from tbegin returning CC0 to the
10077 function exit block without an tend.
10079 The check so far has some limitations:
10080 - only single tbegin/tend BBs are supported
10081 - the first cond jump after tbegin must separate the CC0 path from ~CC0
10082 - when CC is copied to a GPR and the CC0 check is done with the GPR
10083 this is not supported
10086 static void
10087 s390_optimize_nonescaping_tx (void)
10089 const unsigned int CC0 = 1 << 3;
10090 basic_block tbegin_bb = NULL;
10091 basic_block tend_bb = NULL;
10092 basic_block bb;
10093 rtx_insn *insn;
10094 bool result = true;
10095 int bb_index;
10096 rtx_insn *tbegin_insn = NULL;
10098 if (!cfun->machine->tbegin_p)
10099 return;
10101 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
10103 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
10105 if (!bb)
10106 continue;
10108 FOR_BB_INSNS (bb, insn)
10110 rtx ite, cc, pat, target;
10111 unsigned HOST_WIDE_INT mask;
10113 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
10114 continue;
10116 pat = PATTERN (insn);
10118 if (GET_CODE (pat) == PARALLEL)
10119 pat = XVECEXP (pat, 0, 0);
10121 if (GET_CODE (pat) != SET
10122 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
10123 continue;
10125 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
10127 rtx_insn *tmp;
10129 tbegin_insn = insn;
10131 /* Just return if the tbegin doesn't have clobbers. */
10132 if (GET_CODE (PATTERN (insn)) != PARALLEL)
10133 return;
10135 if (tbegin_bb != NULL)
10136 return;
10138 /* Find the next conditional jump. */
10139 for (tmp = NEXT_INSN (insn);
10140 tmp != NULL_RTX;
10141 tmp = NEXT_INSN (tmp))
10143 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
10144 return;
10145 if (!JUMP_P (tmp))
10146 continue;
10148 ite = SET_SRC (PATTERN (tmp));
10149 if (GET_CODE (ite) != IF_THEN_ELSE)
10150 continue;
10152 cc = XEXP (XEXP (ite, 0), 0);
10153 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
10154 || GET_MODE (cc) != CCRAWmode
10155 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
10156 return;
10158 if (bb->succs->length () != 2)
10159 return;
10161 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
10162 if (GET_CODE (XEXP (ite, 0)) == NE)
10163 mask ^= 0xf;
10165 if (mask == CC0)
10166 target = XEXP (ite, 1);
10167 else if (mask == (CC0 ^ 0xf))
10168 target = XEXP (ite, 2);
10169 else
10170 return;
10173 edge_iterator ei;
10174 edge e1, e2;
10176 ei = ei_start (bb->succs);
10177 e1 = ei_safe_edge (ei);
10178 ei_next (&ei);
10179 e2 = ei_safe_edge (ei);
10181 if (e2->flags & EDGE_FALLTHRU)
10183 e2 = e1;
10184 e1 = ei_safe_edge (ei);
10187 if (!(e1->flags & EDGE_FALLTHRU))
10188 return;
10190 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
10192 if (tmp == BB_END (bb))
10193 break;
10197 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
10199 if (tend_bb != NULL)
10200 return;
10201 tend_bb = bb;
10206 /* Either we successfully remove the FPR clobbers here or we are not
10207 able to do anything for this TX. Both cases don't qualify for
10208 another look. */
10209 cfun->machine->tbegin_p = false;
10211 if (tbegin_bb == NULL || tend_bb == NULL)
10212 return;
10214 calculate_dominance_info (CDI_POST_DOMINATORS);
10215 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
10216 free_dominance_info (CDI_POST_DOMINATORS);
10218 if (!result)
10219 return;
10221 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10222 gen_rtvec (2,
10223 XVECEXP (PATTERN (tbegin_insn), 0, 0),
10224 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10225 INSN_CODE (tbegin_insn) = -1;
10226 df_insn_rescan (tbegin_insn);
10228 return;
10231 /* Implement TARGET_HARD_REGNO_NREGS. Because all registers in a class
10232 have the same size, this is equivalent to CLASS_MAX_NREGS. */
10234 static unsigned int
10235 s390_hard_regno_nregs (unsigned int regno, machine_mode mode)
10237 return s390_class_max_nregs (REGNO_REG_CLASS (regno), mode);
10240 /* Implement TARGET_HARD_REGNO_MODE_OK.
10242 Integer modes <= word size fit into any GPR.
10243 Integer modes > word size fit into successive GPRs, starting with
10244 an even-numbered register.
10245 SImode and DImode fit into FPRs as well.
10247 Floating point modes <= word size fit into any FPR or GPR.
10248 Floating point modes > word size (i.e. DFmode on 32-bit) fit
10249 into any FPR, or an even-odd GPR pair.
10250 TFmode fits only into an even-odd FPR pair.
10252 Complex floating point modes fit either into two FPRs, or into
10253 successive GPRs (again starting with an even number).
10254 TCmode fits only into two successive even-odd FPR pairs.
10256 Condition code modes fit only into the CC register. */
10258 static bool
10259 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10261 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10262 return false;
10264 switch (REGNO_REG_CLASS (regno))
10266 case VEC_REGS:
10267 return ((GET_MODE_CLASS (mode) == MODE_INT
10268 && s390_class_max_nregs (VEC_REGS, mode) == 1)
10269 || mode == DFmode
10270 || (TARGET_VXE && mode == SFmode)
10271 || s390_vector_mode_supported_p (mode));
10272 break;
10273 case FP_REGS:
10274 if (TARGET_VX
10275 && ((GET_MODE_CLASS (mode) == MODE_INT
10276 && s390_class_max_nregs (FP_REGS, mode) == 1)
10277 || mode == DFmode
10278 || s390_vector_mode_supported_p (mode)))
10279 return true;
10281 if (REGNO_PAIR_OK (regno, mode))
10283 if (mode == SImode || mode == DImode)
10284 return true;
10286 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10287 return true;
10289 break;
10290 case ADDR_REGS:
10291 if (FRAME_REGNO_P (regno) && mode == Pmode)
10292 return true;
10294 /* fallthrough */
10295 case GENERAL_REGS:
10296 if (REGNO_PAIR_OK (regno, mode))
10298 if (TARGET_ZARCH
10299 || (mode != TFmode && mode != TCmode && mode != TDmode))
10300 return true;
10302 break;
10303 case CC_REGS:
10304 if (GET_MODE_CLASS (mode) == MODE_CC)
10305 return true;
10306 break;
10307 case ACCESS_REGS:
10308 if (REGNO_PAIR_OK (regno, mode))
10310 if (mode == SImode || mode == Pmode)
10311 return true;
10313 break;
10314 default:
10315 return false;
10318 return false;
10321 /* Implement TARGET_MODES_TIEABLE_P. */
10323 static bool
10324 s390_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10326 return ((mode1 == SFmode || mode1 == DFmode)
10327 == (mode2 == SFmode || mode2 == DFmode));
10330 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10332 bool
10333 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10335 /* Once we've decided upon a register to use as base register, it must
10336 no longer be used for any other purpose. */
10337 if (cfun->machine->base_reg)
10338 if (REGNO (cfun->machine->base_reg) == old_reg
10339 || REGNO (cfun->machine->base_reg) == new_reg)
10340 return false;
10342 /* Prevent regrename from using call-saved regs which haven't
10343 actually been saved. This is necessary since regrename assumes
10344 the backend save/restore decisions are based on
10345 df_regs_ever_live. Since we have our own routine we have to tell
10346 regrename manually about it. */
10347 if (GENERAL_REGNO_P (new_reg)
10348 && !call_used_regs[new_reg]
10349 && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10350 return false;
10352 return true;
10355 /* Return nonzero if register REGNO can be used as a scratch register
10356 in peephole2. */
10358 static bool
10359 s390_hard_regno_scratch_ok (unsigned int regno)
10361 /* See s390_hard_regno_rename_ok. */
10362 if (GENERAL_REGNO_P (regno)
10363 && !call_used_regs[regno]
10364 && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10365 return false;
10367 return true;
10370 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. When generating
10371 code that runs in z/Architecture mode, but conforms to the 31-bit
10372 ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
10373 bytes are saved across calls, however. */
10375 static bool
10376 s390_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
10377 machine_mode mode)
10379 if (!TARGET_64BIT
10380 && TARGET_ZARCH
10381 && GET_MODE_SIZE (mode) > 4
10382 && ((regno >= 6 && regno <= 15) || regno == 32))
10383 return true;
10385 if (TARGET_VX
10386 && GET_MODE_SIZE (mode) > 8
10387 && (((TARGET_64BIT && regno >= 24 && regno <= 31))
10388 || (!TARGET_64BIT && (regno == 18 || regno == 19))))
10389 return true;
10391 return false;
10394 /* Maximum number of registers to represent a value of mode MODE
10395 in a register of class RCLASS. */
10398 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10400 int reg_size;
10401 bool reg_pair_required_p = false;
10403 switch (rclass)
10405 case FP_REGS:
10406 case VEC_REGS:
10407 reg_size = TARGET_VX ? 16 : 8;
10409 /* TF and TD modes would fit into a VR but we put them into a
10410 register pair since we do not have 128bit FP instructions on
10411 full VRs. */
10412 if (TARGET_VX
10413 && SCALAR_FLOAT_MODE_P (mode)
10414 && GET_MODE_SIZE (mode) >= 16)
10415 reg_pair_required_p = true;
10417 /* Even if complex types would fit into a single FPR/VR we force
10418 them into a register pair to deal with the parts more easily.
10419 (FIXME: What about complex ints?) */
10420 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10421 reg_pair_required_p = true;
10422 break;
10423 case ACCESS_REGS:
10424 reg_size = 4;
10425 break;
10426 default:
10427 reg_size = UNITS_PER_WORD;
10428 break;
10431 if (reg_pair_required_p)
10432 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10434 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10437 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10439 static bool
10440 s390_can_change_mode_class (machine_mode from_mode,
10441 machine_mode to_mode,
10442 reg_class_t rclass)
10444 machine_mode small_mode;
10445 machine_mode big_mode;
10447 /* V1TF and TF have different representations in vector
10448 registers. */
10449 if (reg_classes_intersect_p (VEC_REGS, rclass)
10450 && ((from_mode == V1TFmode && to_mode == TFmode)
10451 || (from_mode == TFmode && to_mode == V1TFmode)))
10452 return false;
10454 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10455 return true;
10457 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10459 small_mode = from_mode;
10460 big_mode = to_mode;
10462 else
10464 small_mode = to_mode;
10465 big_mode = from_mode;
10468 /* Values residing in VRs are little-endian style. All modes are
10469 placed left-aligned in an VR. This means that we cannot allow
10470 switching between modes with differing sizes. Also if the vector
10471 facility is available we still place TFmode values in VR register
10472 pairs, since the only instructions we have operating on TFmodes
10473 only deal with register pairs. Therefore we have to allow DFmode
10474 subregs of TFmodes to enable the TFmode splitters. */
10475 if (reg_classes_intersect_p (VEC_REGS, rclass)
10476 && (GET_MODE_SIZE (small_mode) < 8
10477 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10478 return false;
10480 /* Likewise for access registers, since they have only half the
10481 word size on 64-bit. */
10482 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10483 return false;
10485 return true;
10488 /* Return true if we use LRA instead of reload pass. */
10489 static bool
10490 s390_lra_p (void)
10492 return s390_lra_flag;
10495 /* Return true if register FROM can be eliminated via register TO. */
10497 static bool
10498 s390_can_eliminate (const int from, const int to)
10500 /* We have not marked the base register as fixed.
10501 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10502 If a function requires the base register, we say here that this
10503 elimination cannot be performed. This will cause reload to free
10504 up the base register (as if it were fixed). On the other hand,
10505 if the current function does *not* require the base register, we
10506 say here the elimination succeeds, which in turn allows reload
10507 to allocate the base register for any other purpose. */
10508 if (from == BASE_REGNUM && to == BASE_REGNUM)
10510 s390_init_frame_layout ();
10511 return cfun->machine->base_reg == NULL_RTX;
10514 /* Everything else must point into the stack frame. */
10515 gcc_assert (to == STACK_POINTER_REGNUM
10516 || to == HARD_FRAME_POINTER_REGNUM);
10518 gcc_assert (from == FRAME_POINTER_REGNUM
10519 || from == ARG_POINTER_REGNUM
10520 || from == RETURN_ADDRESS_POINTER_REGNUM);
10522 /* Make sure we actually saved the return address. */
10523 if (from == RETURN_ADDRESS_POINTER_REGNUM)
10524 if (!crtl->calls_eh_return
10525 && !cfun->stdarg
10526 && !cfun_frame_layout.save_return_addr_p)
10527 return false;
10529 return true;
10532 /* Return offset between register FROM and TO initially after prolog. */
10534 HOST_WIDE_INT
10535 s390_initial_elimination_offset (int from, int to)
10537 HOST_WIDE_INT offset;
10539 /* ??? Why are we called for non-eliminable pairs? */
10540 if (!s390_can_eliminate (from, to))
10541 return 0;
10543 switch (from)
10545 case FRAME_POINTER_REGNUM:
10546 offset = (get_frame_size()
10547 + STACK_POINTER_OFFSET
10548 + crtl->outgoing_args_size);
10549 break;
10551 case ARG_POINTER_REGNUM:
10552 s390_init_frame_layout ();
10553 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10554 break;
10556 case RETURN_ADDRESS_POINTER_REGNUM:
10557 s390_init_frame_layout ();
10559 if (cfun_frame_layout.first_save_gpr_slot == -1)
10561 /* If it turns out that for stdarg nothing went into the reg
10562 save area we also do not need the return address
10563 pointer. */
10564 if (cfun->stdarg && !cfun_save_arg_fprs_p)
10565 return 0;
10567 gcc_unreachable ();
10570 /* In order to make the following work it is not necessary for
10571 r14 to have a save slot. It is sufficient if one other GPR
10572 got one. Since the GPRs are always stored without gaps we
10573 are able to calculate where the r14 save slot would
10574 reside. */
10575 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10576 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10577 UNITS_PER_LONG);
10578 break;
10580 case BASE_REGNUM:
10581 offset = 0;
10582 break;
10584 default:
10585 gcc_unreachable ();
10588 return offset;
10591 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10592 to register BASE. Return generated insn. */
10594 static rtx
10595 save_fpr (rtx base, int offset, int regnum)
10597 rtx addr;
10598 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10600 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10601 set_mem_alias_set (addr, get_varargs_alias_set ());
10602 else
10603 set_mem_alias_set (addr, get_frame_alias_set ());
10605 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10608 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10609 to register BASE. Return generated insn. */
10611 static rtx
10612 restore_fpr (rtx base, int offset, int regnum)
10614 rtx addr;
10615 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10616 set_mem_alias_set (addr, get_frame_alias_set ());
10618 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10621 /* Generate insn to save registers FIRST to LAST into
10622 the register save area located at offset OFFSET
10623 relative to register BASE. */
10625 static rtx
10626 save_gprs (rtx base, int offset, int first, int last)
10628 rtx addr, insn, note;
10629 int i;
10631 addr = plus_constant (Pmode, base, offset);
10632 addr = gen_rtx_MEM (Pmode, addr);
10634 set_mem_alias_set (addr, get_frame_alias_set ());
10636 /* Special-case single register. */
10637 if (first == last)
10639 if (TARGET_64BIT)
10640 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10641 else
10642 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10644 if (!global_not_special_regno_p (first))
10645 RTX_FRAME_RELATED_P (insn) = 1;
10646 return insn;
10650 insn = gen_store_multiple (addr,
10651 gen_rtx_REG (Pmode, first),
10652 GEN_INT (last - first + 1));
10654 if (first <= 6 && cfun->stdarg)
10655 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10657 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10659 if (first + i <= 6)
10660 set_mem_alias_set (mem, get_varargs_alias_set ());
10663 /* We need to set the FRAME_RELATED flag on all SETs
10664 inside the store-multiple pattern.
10666 However, we must not emit DWARF records for registers 2..5
10667 if they are stored for use by variable arguments ...
10669 ??? Unfortunately, it is not enough to simply not the
10670 FRAME_RELATED flags for those SETs, because the first SET
10671 of the PARALLEL is always treated as if it had the flag
10672 set, even if it does not. Therefore we emit a new pattern
10673 without those registers as REG_FRAME_RELATED_EXPR note. */
10675 if (first >= 6 && !global_not_special_regno_p (first))
10677 rtx pat = PATTERN (insn);
10679 for (i = 0; i < XVECLEN (pat, 0); i++)
10680 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10681 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10682 0, i)))))
10683 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10685 RTX_FRAME_RELATED_P (insn) = 1;
10687 else if (last >= 6)
10689 int start;
10691 for (start = first >= 6 ? first : 6; start <= last; start++)
10692 if (!global_not_special_regno_p (start))
10693 break;
10695 if (start > last)
10696 return insn;
10698 addr = plus_constant (Pmode, base,
10699 offset + (start - first) * UNITS_PER_LONG);
10701 if (start == last)
10703 if (TARGET_64BIT)
10704 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10705 gen_rtx_REG (Pmode, start));
10706 else
10707 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10708 gen_rtx_REG (Pmode, start));
10709 note = PATTERN (note);
10711 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10712 RTX_FRAME_RELATED_P (insn) = 1;
10714 return insn;
10717 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10718 gen_rtx_REG (Pmode, start),
10719 GEN_INT (last - start + 1));
10720 note = PATTERN (note);
10722 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10724 for (i = 0; i < XVECLEN (note, 0); i++)
10725 if (GET_CODE (XVECEXP (note, 0, i)) == SET
10726 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10727 0, i)))))
10728 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10730 RTX_FRAME_RELATED_P (insn) = 1;
10733 return insn;
10736 /* Generate insn to restore registers FIRST to LAST from
10737 the register save area located at offset OFFSET
10738 relative to register BASE. */
10740 static rtx
10741 restore_gprs (rtx base, int offset, int first, int last)
10743 rtx addr, insn;
10745 addr = plus_constant (Pmode, base, offset);
10746 addr = gen_rtx_MEM (Pmode, addr);
10747 set_mem_alias_set (addr, get_frame_alias_set ());
10749 /* Special-case single register. */
10750 if (first == last)
10752 if (TARGET_64BIT)
10753 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10754 else
10755 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10757 RTX_FRAME_RELATED_P (insn) = 1;
10758 return insn;
10761 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10762 addr,
10763 GEN_INT (last - first + 1));
10764 RTX_FRAME_RELATED_P (insn) = 1;
10765 return insn;
10768 /* Return insn sequence to load the GOT register. */
10770 rtx_insn *
10771 s390_load_got (void)
10773 rtx_insn *insns;
10775 /* We cannot use pic_offset_table_rtx here since we use this
10776 function also for non-pic if __tls_get_offset is called and in
10777 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10778 aren't usable. */
10779 rtx got_rtx = gen_rtx_REG (Pmode, 12);
10781 start_sequence ();
10783 emit_move_insn (got_rtx, s390_got_symbol ());
10785 insns = get_insns ();
10786 end_sequence ();
10787 return insns;
10790 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10791 and the change to the stack pointer. */
10793 static void
10794 s390_emit_stack_tie (void)
10796 rtx mem = gen_frame_mem (BLKmode,
10797 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10799 emit_insn (gen_stack_tie (mem));
10802 /* Copy GPRS into FPR save slots. */
10804 static void
10805 s390_save_gprs_to_fprs (void)
10807 int i;
10809 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10810 return;
10812 for (i = 6; i < 16; i++)
10814 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10816 rtx_insn *insn =
10817 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10818 gen_rtx_REG (DImode, i));
10819 RTX_FRAME_RELATED_P (insn) = 1;
10820 /* This prevents dwarf2cfi from interpreting the set. Doing
10821 so it might emit def_cfa_register infos setting an FPR as
10822 new CFA. */
10823 add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
10828 /* Restore GPRs from FPR save slots. */
10830 static void
10831 s390_restore_gprs_from_fprs (void)
10833 int i;
10835 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10836 return;
10838 /* Restore the GPRs starting with the stack pointer. That way the
10839 stack pointer already has its original value when it comes to
10840 restoring the hard frame pointer. So we can set the cfa reg back
10841 to the stack pointer. */
10842 for (i = STACK_POINTER_REGNUM; i >= 6; i--)
10844 rtx_insn *insn;
10846 if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
10847 continue;
10849 rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
10851 if (i == STACK_POINTER_REGNUM)
10852 insn = emit_insn (gen_stack_restore_from_fpr (fpr));
10853 else
10854 insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
10856 df_set_regs_ever_live (i, true);
10857 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
10859 /* If either the stack pointer or the frame pointer get restored
10860 set the CFA value to its value at function start. Doing this
10861 for the frame pointer results in .cfi_def_cfa_register 15
10862 what is ok since if the stack pointer got modified it has
10863 been restored already. */
10864 if (i == STACK_POINTER_REGNUM || i == HARD_FRAME_POINTER_REGNUM)
10865 add_reg_note (insn, REG_CFA_DEF_CFA,
10866 plus_constant (Pmode, stack_pointer_rtx,
10867 STACK_POINTER_OFFSET));
10868 RTX_FRAME_RELATED_P (insn) = 1;
10873 /* A pass run immediately before shrink-wrapping and prologue and epilogue
10874 generation. */
10876 namespace {
10878 const pass_data pass_data_s390_early_mach =
10880 RTL_PASS, /* type */
10881 "early_mach", /* name */
10882 OPTGROUP_NONE, /* optinfo_flags */
10883 TV_MACH_DEP, /* tv_id */
10884 0, /* properties_required */
10885 0, /* properties_provided */
10886 0, /* properties_destroyed */
10887 0, /* todo_flags_start */
10888 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
10891 class pass_s390_early_mach : public rtl_opt_pass
10893 public:
10894 pass_s390_early_mach (gcc::context *ctxt)
10895 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
10898 /* opt_pass methods: */
10899 virtual unsigned int execute (function *);
10901 }; // class pass_s390_early_mach
10903 unsigned int
10904 pass_s390_early_mach::execute (function *fun)
10906 rtx_insn *insn;
10908 /* Try to get rid of the FPR clobbers. */
10909 s390_optimize_nonescaping_tx ();
10911 /* Re-compute register info. */
10912 s390_register_info ();
10914 /* If we're using a base register, ensure that it is always valid for
10915 the first non-prologue instruction. */
10916 if (fun->machine->base_reg)
10917 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
10919 /* Annotate all constant pool references to let the scheduler know
10920 they implicitly use the base register. */
10921 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10922 if (INSN_P (insn))
10924 annotate_constant_pool_refs (insn);
10925 df_insn_rescan (insn);
10927 return 0;
10930 } // anon namespace
10932 rtl_opt_pass *
10933 make_pass_s390_early_mach (gcc::context *ctxt)
10935 return new pass_s390_early_mach (ctxt);
10938 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
10939 - push too big immediates to the literal pool and annotate the refs
10940 - emit frame related notes for stack pointer changes. */
10942 static rtx
10943 s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool frame_related_p)
10945 rtx_insn *insn;
10946 rtx orig_offset = offset;
10948 gcc_assert (REG_P (target));
10949 gcc_assert (REG_P (reg));
10950 gcc_assert (CONST_INT_P (offset));
10952 if (offset == const0_rtx) /* lr/lgr */
10954 insn = emit_move_insn (target, reg);
10956 else if (DISP_IN_RANGE (INTVAL (offset))) /* la */
10958 insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg,
10959 offset));
10961 else
10963 if (!satisfies_constraint_K (offset) /* ahi/aghi */
10964 && (!TARGET_EXTIMM
10965 || (!satisfies_constraint_Op (offset) /* alfi/algfi */
10966 && !satisfies_constraint_On (offset)))) /* slfi/slgfi */
10967 offset = force_const_mem (Pmode, offset);
10969 if (target != reg)
10971 insn = emit_move_insn (target, reg);
10972 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
10975 insn = emit_insn (gen_add2_insn (target, offset));
10977 if (!CONST_INT_P (offset))
10979 annotate_constant_pool_refs (insn);
10981 if (frame_related_p)
10982 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10983 gen_rtx_SET (target,
10984 gen_rtx_PLUS (Pmode, target,
10985 orig_offset)));
10989 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
10991 /* If this is a stack adjustment and we are generating a stack clash
10992 prologue, then add a REG_STACK_CHECK note to signal that this insn
10993 should be left alone. */
10994 if (flag_stack_clash_protection && target == stack_pointer_rtx)
10995 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
10997 return insn;
11000 /* Emit a compare instruction with a volatile memory access as stack
11001 probe. It does not waste store tags and does not clobber any
11002 registers apart from the condition code. */
11003 static void
11004 s390_emit_stack_probe (rtx addr)
11006 rtx mem = gen_rtx_MEM (Pmode, addr);
11007 MEM_VOLATILE_P (mem) = 1;
11008 emit_insn (gen_probe_stack (mem));
11011 /* Use a runtime loop if we have to emit more probes than this. */
11012 #define MIN_UNROLL_PROBES 3
11014 /* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
11015 if necessary. LAST_PROBE_OFFSET contains the offset of the closest
11016 probe relative to the stack pointer.
11018 Note that SIZE is negative.
11020 The return value is true if TEMP_REG has been clobbered. */
11021 static bool
11022 allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
11023 rtx temp_reg)
11025 bool temp_reg_clobbered_p = false;
11026 HOST_WIDE_INT probe_interval
11027 = 1 << param_stack_clash_protection_probe_interval;
11028 HOST_WIDE_INT guard_size
11029 = 1 << param_stack_clash_protection_guard_size;
11031 if (flag_stack_clash_protection)
11033 if (last_probe_offset + -INTVAL (size) < guard_size)
11034 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
11035 else
11037 rtx offset = GEN_INT (probe_interval - UNITS_PER_LONG);
11038 HOST_WIDE_INT rounded_size = -INTVAL (size) & -probe_interval;
11039 HOST_WIDE_INT num_probes = rounded_size / probe_interval;
11040 HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
11042 if (num_probes < MIN_UNROLL_PROBES)
11044 /* Emit unrolled probe statements. */
11046 for (unsigned int i = 0; i < num_probes; i++)
11048 s390_prologue_plus_offset (stack_pointer_rtx,
11049 stack_pointer_rtx,
11050 GEN_INT (-probe_interval), true);
11051 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11052 stack_pointer_rtx,
11053 offset));
11055 if (num_probes > 0)
11056 last_probe_offset = INTVAL (offset);
11057 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
11059 else
11061 /* Emit a loop probing the pages. */
11063 rtx_code_label *loop_start_label = gen_label_rtx ();
11065 /* From now on temp_reg will be the CFA register. */
11066 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11067 GEN_INT (-rounded_size), true);
11068 emit_label (loop_start_label);
11070 s390_prologue_plus_offset (stack_pointer_rtx,
11071 stack_pointer_rtx,
11072 GEN_INT (-probe_interval), false);
11073 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11074 stack_pointer_rtx,
11075 offset));
11076 emit_cmp_and_jump_insns (stack_pointer_rtx, temp_reg,
11077 GT, NULL_RTX,
11078 Pmode, 1, loop_start_label);
11080 /* Without this make_edges ICEes. */
11081 JUMP_LABEL (get_last_insn ()) = loop_start_label;
11082 LABEL_NUSES (loop_start_label) = 1;
11084 /* That's going to be a NOP since stack pointer and
11085 temp_reg are supposed to be the same here. We just
11086 emit it to set the CFA reg back to r15. */
11087 s390_prologue_plus_offset (stack_pointer_rtx, temp_reg,
11088 const0_rtx, true);
11089 temp_reg_clobbered_p = true;
11090 last_probe_offset = INTVAL (offset);
11091 dump_stack_clash_frame_info (PROBE_LOOP, residual != 0);
11094 /* Handle any residual allocation request. */
11095 s390_prologue_plus_offset (stack_pointer_rtx,
11096 stack_pointer_rtx,
11097 GEN_INT (-residual), true);
11098 last_probe_offset += residual;
11099 if (last_probe_offset >= probe_interval)
11100 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11101 stack_pointer_rtx,
11102 GEN_INT (residual
11103 - UNITS_PER_LONG)));
11105 return temp_reg_clobbered_p;
11109 /* Subtract frame size from stack pointer. */
11110 s390_prologue_plus_offset (stack_pointer_rtx,
11111 stack_pointer_rtx,
11112 size, true);
11114 return temp_reg_clobbered_p;
11117 /* Expand the prologue into a bunch of separate insns. */
11119 void
11120 s390_emit_prologue (void)
11122 rtx insn, addr;
11123 rtx temp_reg;
11124 int i;
11125 int offset;
11126 int next_fpr = 0;
11128 /* Choose best register to use for temp use within prologue.
11129 TPF with profiling must avoid the register 14 - the tracing function
11130 needs the original contents of r14 to be preserved. */
11132 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
11133 && !crtl->is_leaf
11134 && !TARGET_TPF_PROFILING)
11135 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11136 else if (flag_split_stack && cfun->stdarg)
11137 temp_reg = gen_rtx_REG (Pmode, 12);
11138 else
11139 temp_reg = gen_rtx_REG (Pmode, 1);
11141 /* When probing for stack-clash mitigation, we have to track the distance
11142 between the stack pointer and closest known reference.
11144 Most of the time we have to make a worst case assumption. The
11145 only exception is when TARGET_BACKCHAIN is active, in which case
11146 we know *sp (offset 0) was written. */
11147 HOST_WIDE_INT probe_interval
11148 = 1 << param_stack_clash_protection_probe_interval;
11149 HOST_WIDE_INT last_probe_offset
11150 = (TARGET_BACKCHAIN
11151 ? (TARGET_PACKED_STACK ? STACK_POINTER_OFFSET - UNITS_PER_LONG : 0)
11152 : probe_interval - (STACK_BOUNDARY / UNITS_PER_WORD));
11154 s390_save_gprs_to_fprs ();
11156 /* Save call saved gprs. */
11157 if (cfun_frame_layout.first_save_gpr != -1)
11159 insn = save_gprs (stack_pointer_rtx,
11160 cfun_frame_layout.gprs_offset +
11161 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11162 - cfun_frame_layout.first_save_gpr_slot),
11163 cfun_frame_layout.first_save_gpr,
11164 cfun_frame_layout.last_save_gpr);
11166 /* This is not 100% correct. If we have more than one register saved,
11167 then LAST_PROBE_OFFSET can move even closer to sp. */
11168 last_probe_offset
11169 = (cfun_frame_layout.gprs_offset +
11170 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11171 - cfun_frame_layout.first_save_gpr_slot));
11173 emit_insn (insn);
11176 /* Dummy insn to mark literal pool slot. */
11178 if (cfun->machine->base_reg)
11179 emit_insn (gen_main_pool (cfun->machine->base_reg));
11181 offset = cfun_frame_layout.f0_offset;
11183 /* Save f0 and f2. */
11184 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
11186 if (cfun_fpr_save_p (i))
11188 save_fpr (stack_pointer_rtx, offset, i);
11189 if (offset < last_probe_offset)
11190 last_probe_offset = offset;
11191 offset += 8;
11193 else if (!TARGET_PACKED_STACK || cfun->stdarg)
11194 offset += 8;
11197 /* Save f4 and f6. */
11198 offset = cfun_frame_layout.f4_offset;
11199 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11201 if (cfun_fpr_save_p (i))
11203 insn = save_fpr (stack_pointer_rtx, offset, i);
11204 if (offset < last_probe_offset)
11205 last_probe_offset = offset;
11206 offset += 8;
11208 /* If f4 and f6 are call clobbered they are saved due to
11209 stdargs and therefore are not frame related. */
11210 if (!call_used_regs[i])
11211 RTX_FRAME_RELATED_P (insn) = 1;
11213 else if (!TARGET_PACKED_STACK || call_used_regs[i])
11214 offset += 8;
11217 if (TARGET_PACKED_STACK
11218 && cfun_save_high_fprs_p
11219 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
11221 offset = (cfun_frame_layout.f8_offset
11222 + (cfun_frame_layout.high_fprs - 1) * 8);
11224 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
11225 if (cfun_fpr_save_p (i))
11227 insn = save_fpr (stack_pointer_rtx, offset, i);
11228 if (offset < last_probe_offset)
11229 last_probe_offset = offset;
11231 RTX_FRAME_RELATED_P (insn) = 1;
11232 offset -= 8;
11234 if (offset >= cfun_frame_layout.f8_offset)
11235 next_fpr = i;
11238 if (!TARGET_PACKED_STACK)
11239 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
11241 if (flag_stack_usage_info)
11242 current_function_static_stack_size = cfun_frame_layout.frame_size;
11244 /* Decrement stack pointer. */
11246 if (cfun_frame_layout.frame_size > 0)
11248 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
11249 rtx_insn *stack_pointer_backup_loc;
11250 bool temp_reg_clobbered_p;
11252 if (s390_stack_size)
11254 HOST_WIDE_INT stack_guard;
11256 if (s390_stack_guard)
11257 stack_guard = s390_stack_guard;
11258 else
11260 /* If no value for stack guard is provided the smallest power of 2
11261 larger than the current frame size is chosen. */
11262 stack_guard = 1;
11263 while (stack_guard < cfun_frame_layout.frame_size)
11264 stack_guard <<= 1;
11267 if (cfun_frame_layout.frame_size >= s390_stack_size)
11269 warning (0, "frame size of function %qs is %wd"
11270 " bytes exceeding user provided stack limit of "
11271 "%d bytes. "
11272 "An unconditional trap is added.",
11273 current_function_name(), cfun_frame_layout.frame_size,
11274 s390_stack_size);
11275 emit_insn (gen_trap ());
11276 emit_barrier ();
11278 else
11280 /* stack_guard has to be smaller than s390_stack_size.
11281 Otherwise we would emit an AND with zero which would
11282 not match the test under mask pattern. */
11283 if (stack_guard >= s390_stack_size)
11285 warning (0, "frame size of function %qs is %wd"
11286 " bytes which is more than half the stack size. "
11287 "The dynamic check would not be reliable. "
11288 "No check emitted for this function.",
11289 current_function_name(),
11290 cfun_frame_layout.frame_size);
11292 else
11294 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
11295 & ~(stack_guard - 1));
11297 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
11298 GEN_INT (stack_check_mask));
11299 if (TARGET_64BIT)
11300 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
11301 t, const0_rtx),
11302 t, const0_rtx, const0_rtx));
11303 else
11304 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
11305 t, const0_rtx),
11306 t, const0_rtx, const0_rtx));
11311 if (s390_warn_framesize > 0
11312 && cfun_frame_layout.frame_size >= s390_warn_framesize)
11313 warning (0, "frame size of %qs is %wd bytes",
11314 current_function_name (), cfun_frame_layout.frame_size);
11316 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
11317 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11319 /* Save the location where we could backup the incoming stack
11320 pointer. */
11321 stack_pointer_backup_loc = get_last_insn ();
11323 temp_reg_clobbered_p = allocate_stack_space (frame_off, last_probe_offset,
11324 temp_reg);
11326 if (TARGET_BACKCHAIN || next_fpr)
11328 if (temp_reg_clobbered_p)
11330 /* allocate_stack_space had to make use of temp_reg and
11331 we need it to hold a backup of the incoming stack
11332 pointer. Calculate back that value from the current
11333 stack pointer. */
11334 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11335 GEN_INT (cfun_frame_layout.frame_size),
11336 false);
11338 else
11340 /* allocate_stack_space didn't actually required
11341 temp_reg. Insert the stack pointer backup insn
11342 before the stack pointer decrement code - knowing now
11343 that the value will survive. */
11344 emit_insn_after (gen_move_insn (temp_reg, stack_pointer_rtx),
11345 stack_pointer_backup_loc);
11349 /* Set backchain. */
11351 if (TARGET_BACKCHAIN)
11353 if (cfun_frame_layout.backchain_offset)
11354 addr = gen_rtx_MEM (Pmode,
11355 plus_constant (Pmode, stack_pointer_rtx,
11356 cfun_frame_layout.backchain_offset));
11357 else
11358 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11359 set_mem_alias_set (addr, get_frame_alias_set ());
11360 insn = emit_insn (gen_move_insn (addr, temp_reg));
11363 /* If we support non-call exceptions (e.g. for Java),
11364 we need to make sure the backchain pointer is set up
11365 before any possibly trapping memory access. */
11366 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
11368 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
11369 emit_clobber (addr);
11372 else if (flag_stack_clash_protection)
11373 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
11375 /* Save fprs 8 - 15 (64 bit ABI). */
11377 if (cfun_save_high_fprs_p && next_fpr)
11379 /* If the stack might be accessed through a different register
11380 we have to make sure that the stack pointer decrement is not
11381 moved below the use of the stack slots. */
11382 s390_emit_stack_tie ();
11384 insn = emit_insn (gen_add2_insn (temp_reg,
11385 GEN_INT (cfun_frame_layout.f8_offset)));
11387 offset = 0;
11389 for (i = FPR8_REGNUM; i <= next_fpr; i++)
11390 if (cfun_fpr_save_p (i))
11392 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
11393 cfun_frame_layout.frame_size
11394 + cfun_frame_layout.f8_offset
11395 + offset);
11397 insn = save_fpr (temp_reg, offset, i);
11398 offset += 8;
11399 RTX_FRAME_RELATED_P (insn) = 1;
11400 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11401 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
11402 gen_rtx_REG (DFmode, i)));
11406 /* Set frame pointer, if needed. */
11408 if (frame_pointer_needed)
11410 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11411 RTX_FRAME_RELATED_P (insn) = 1;
11414 /* Set up got pointer, if needed. */
11416 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
11418 rtx_insn *insns = s390_load_got ();
11420 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
11421 annotate_constant_pool_refs (insn);
11423 emit_insn (insns);
11426 #if TARGET_TPF != 0
11427 if (TARGET_TPF_PROFILING)
11429 /* Generate a BAS instruction to serve as a function entry
11430 intercept to facilitate the use of tracing algorithms located
11431 at the branch target. */
11432 emit_insn (gen_prologue_tpf (
11433 GEN_INT (s390_tpf_trace_hook_prologue_check),
11434 GEN_INT (s390_tpf_trace_hook_prologue_target)));
11436 /* Emit a blockage here so that all code lies between the
11437 profiling mechanisms. */
11438 emit_insn (gen_blockage ());
11440 #endif
11443 /* Expand the epilogue into a bunch of separate insns. */
11445 void
11446 s390_emit_epilogue (bool sibcall)
11448 rtx frame_pointer, return_reg = NULL_RTX, cfa_restores = NULL_RTX;
11449 int area_bottom, area_top, offset = 0;
11450 int next_offset;
11451 int i;
11453 #if TARGET_TPF != 0
11454 if (TARGET_TPF_PROFILING)
11456 /* Generate a BAS instruction to serve as a function entry
11457 intercept to facilitate the use of tracing algorithms located
11458 at the branch target. */
11460 /* Emit a blockage here so that all code lies between the
11461 profiling mechanisms. */
11462 emit_insn (gen_blockage ());
11464 emit_insn (gen_epilogue_tpf (
11465 GEN_INT (s390_tpf_trace_hook_epilogue_check),
11466 GEN_INT (s390_tpf_trace_hook_epilogue_target)));
11468 #endif
11470 /* Check whether to use frame or stack pointer for restore. */
11472 frame_pointer = (frame_pointer_needed
11473 ? hard_frame_pointer_rtx : stack_pointer_rtx);
11475 s390_frame_area (&area_bottom, &area_top);
11477 /* Check whether we can access the register save area.
11478 If not, increment the frame pointer as required. */
11480 if (area_top <= area_bottom)
11482 /* Nothing to restore. */
11484 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11485 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11487 /* Area is in range. */
11488 offset = cfun_frame_layout.frame_size;
11490 else
11492 rtx_insn *insn;
11493 rtx frame_off, cfa;
11495 offset = area_bottom < 0 ? -area_bottom : 0;
11496 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11498 cfa = gen_rtx_SET (frame_pointer,
11499 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11500 if (DISP_IN_RANGE (INTVAL (frame_off)))
11502 rtx set;
11504 set = gen_rtx_SET (frame_pointer,
11505 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11506 insn = emit_insn (set);
11508 else
11510 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11511 frame_off = force_const_mem (Pmode, frame_off);
11513 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11514 annotate_constant_pool_refs (insn);
11516 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11517 RTX_FRAME_RELATED_P (insn) = 1;
11520 /* Restore call saved fprs. */
11522 if (TARGET_64BIT)
11524 if (cfun_save_high_fprs_p)
11526 next_offset = cfun_frame_layout.f8_offset;
11527 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11529 if (cfun_fpr_save_p (i))
11531 restore_fpr (frame_pointer,
11532 offset + next_offset, i);
11533 cfa_restores
11534 = alloc_reg_note (REG_CFA_RESTORE,
11535 gen_rtx_REG (DFmode, i), cfa_restores);
11536 next_offset += 8;
11542 else
11544 next_offset = cfun_frame_layout.f4_offset;
11545 /* f4, f6 */
11546 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11548 if (cfun_fpr_save_p (i))
11550 restore_fpr (frame_pointer,
11551 offset + next_offset, i);
11552 cfa_restores
11553 = alloc_reg_note (REG_CFA_RESTORE,
11554 gen_rtx_REG (DFmode, i), cfa_restores);
11555 next_offset += 8;
11557 else if (!TARGET_PACKED_STACK)
11558 next_offset += 8;
11563 /* Restore call saved gprs. */
11565 if (cfun_frame_layout.first_restore_gpr != -1)
11567 rtx insn, addr;
11568 int i;
11570 /* Check for global register and save them
11571 to stack location from where they get restored. */
11573 for (i = cfun_frame_layout.first_restore_gpr;
11574 i <= cfun_frame_layout.last_restore_gpr;
11575 i++)
11577 if (global_not_special_regno_p (i))
11579 addr = plus_constant (Pmode, frame_pointer,
11580 offset + cfun_frame_layout.gprs_offset
11581 + (i - cfun_frame_layout.first_save_gpr_slot)
11582 * UNITS_PER_LONG);
11583 addr = gen_rtx_MEM (Pmode, addr);
11584 set_mem_alias_set (addr, get_frame_alias_set ());
11585 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11587 else
11588 cfa_restores
11589 = alloc_reg_note (REG_CFA_RESTORE,
11590 gen_rtx_REG (Pmode, i), cfa_restores);
11593 /* Fetch return address from stack before load multiple,
11594 this will do good for scheduling.
11596 Only do this if we already decided that r14 needs to be
11597 saved to a stack slot. (And not just because r14 happens to
11598 be in between two GPRs which need saving.) Otherwise it
11599 would be difficult to take that decision back in
11600 s390_optimize_prologue.
11602 This optimization is only helpful on in-order machines. */
11603 if (! sibcall
11604 && cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK
11605 && s390_tune <= PROCESSOR_2097_Z10)
11607 int return_regnum = find_unused_clobbered_reg();
11608 if (!return_regnum
11609 || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION
11610 && !TARGET_CPU_Z10
11611 && return_regnum == INDIRECT_BRANCH_THUNK_REGNUM))
11613 gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM != 4);
11614 return_regnum = 4;
11616 return_reg = gen_rtx_REG (Pmode, return_regnum);
11618 addr = plus_constant (Pmode, frame_pointer,
11619 offset + cfun_frame_layout.gprs_offset
11620 + (RETURN_REGNUM
11621 - cfun_frame_layout.first_save_gpr_slot)
11622 * UNITS_PER_LONG);
11623 addr = gen_rtx_MEM (Pmode, addr);
11624 set_mem_alias_set (addr, get_frame_alias_set ());
11625 emit_move_insn (return_reg, addr);
11627 /* Once we did that optimization we have to make sure
11628 s390_optimize_prologue does not try to remove the store
11629 of r14 since we will not be able to find the load issued
11630 here. */
11631 cfun_frame_layout.save_return_addr_p = true;
11634 insn = restore_gprs (frame_pointer,
11635 offset + cfun_frame_layout.gprs_offset
11636 + (cfun_frame_layout.first_restore_gpr
11637 - cfun_frame_layout.first_save_gpr_slot)
11638 * UNITS_PER_LONG,
11639 cfun_frame_layout.first_restore_gpr,
11640 cfun_frame_layout.last_restore_gpr);
11641 insn = emit_insn (insn);
11642 REG_NOTES (insn) = cfa_restores;
11643 add_reg_note (insn, REG_CFA_DEF_CFA,
11644 plus_constant (Pmode, stack_pointer_rtx,
11645 STACK_POINTER_OFFSET));
11646 RTX_FRAME_RELATED_P (insn) = 1;
11649 s390_restore_gprs_from_fprs ();
11651 if (! sibcall)
11653 if (!return_reg && !s390_can_use_return_insn ())
11654 /* We planned to emit (return), be we are not allowed to. */
11655 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11657 if (return_reg)
11658 /* Emit (return) and (use). */
11659 emit_jump_insn (gen_return_use (return_reg));
11660 else
11661 /* The fact that RETURN_REGNUM is used is already reflected by
11662 EPILOGUE_USES. Emit plain (return). */
11663 emit_jump_insn (gen_return ());
11667 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11669 static void
11670 s300_set_up_by_prologue (hard_reg_set_container *regs)
11672 if (cfun->machine->base_reg
11673 && !call_used_regs[REGNO (cfun->machine->base_reg)])
11674 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11677 /* -fsplit-stack support. */
11679 /* A SYMBOL_REF for __morestack. */
11680 static GTY(()) rtx morestack_ref;
11682 /* When using -fsplit-stack, the allocation routines set a field in
11683 the TCB to the bottom of the stack plus this much space, measured
11684 in bytes. */
11686 #define SPLIT_STACK_AVAILABLE 1024
11688 /* Emit the parmblock for __morestack into .rodata section. It
11689 consists of 3 pointer size entries:
11690 - frame size
11691 - size of stack arguments
11692 - offset between parm block and __morestack return label */
11694 void
11695 s390_output_split_stack_data (rtx parm_block, rtx call_done,
11696 rtx frame_size, rtx args_size)
11698 rtx ops[] = { parm_block, call_done };
11700 switch_to_section (targetm.asm_out.function_rodata_section
11701 (current_function_decl));
11703 if (TARGET_64BIT)
11704 output_asm_insn (".align\t8", NULL);
11705 else
11706 output_asm_insn (".align\t4", NULL);
11708 (*targetm.asm_out.internal_label) (asm_out_file, "L",
11709 CODE_LABEL_NUMBER (parm_block));
11710 if (TARGET_64BIT)
11712 output_asm_insn (".quad\t%0", &frame_size);
11713 output_asm_insn (".quad\t%0", &args_size);
11714 output_asm_insn (".quad\t%1-%0", ops);
11716 else
11718 output_asm_insn (".long\t%0", &frame_size);
11719 output_asm_insn (".long\t%0", &args_size);
11720 output_asm_insn (".long\t%1-%0", ops);
11723 switch_to_section (current_function_section ());
11726 /* Emit -fsplit-stack prologue, which goes before the regular function
11727 prologue. */
11729 void
11730 s390_expand_split_stack_prologue (void)
11732 rtx r1, guard, cc = NULL;
11733 rtx_insn *insn;
11734 /* Offset from thread pointer to __private_ss. */
11735 int psso = TARGET_64BIT ? 0x38 : 0x20;
11736 /* Pointer size in bytes. */
11737 /* Frame size and argument size - the two parameters to __morestack. */
11738 HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11739 /* Align argument size to 8 bytes - simplifies __morestack code. */
11740 HOST_WIDE_INT args_size = crtl->args.size >= 0
11741 ? ((crtl->args.size + 7) & ~7)
11742 : 0;
11743 /* Label to be called by __morestack. */
11744 rtx_code_label *call_done = NULL;
11745 rtx_code_label *parm_base = NULL;
11746 rtx tmp;
11748 gcc_assert (flag_split_stack && reload_completed);
11750 r1 = gen_rtx_REG (Pmode, 1);
11752 /* If no stack frame will be allocated, don't do anything. */
11753 if (!frame_size)
11755 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11757 /* If va_start is used, just use r15. */
11758 emit_move_insn (r1,
11759 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11760 GEN_INT (STACK_POINTER_OFFSET)));
11763 return;
11766 if (morestack_ref == NULL_RTX)
11768 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11769 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11770 | SYMBOL_FLAG_FUNCTION);
11773 if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11775 /* If frame_size will fit in an add instruction, do a stack space
11776 check, and only call __morestack if there's not enough space. */
11778 /* Get thread pointer. r1 is the only register we can always destroy - r0
11779 could contain a static chain (and cannot be used to address memory
11780 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
11781 emit_insn (gen_get_thread_pointer (Pmode, r1));
11782 /* Aim at __private_ss. */
11783 guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
11785 /* If less that 1kiB used, skip addition and compare directly with
11786 __private_ss. */
11787 if (frame_size > SPLIT_STACK_AVAILABLE)
11789 emit_move_insn (r1, guard);
11790 if (TARGET_64BIT)
11791 emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
11792 else
11793 emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
11794 guard = r1;
11797 /* Compare the (maybe adjusted) guard with the stack pointer. */
11798 cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
11801 call_done = gen_label_rtx ();
11802 parm_base = gen_label_rtx ();
11803 LABEL_NUSES (parm_base)++;
11804 LABEL_NUSES (call_done)++;
11806 /* %r1 = litbase. */
11807 insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
11808 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11809 LABEL_NUSES (parm_base)++;
11811 /* Now, we need to call __morestack. It has very special calling
11812 conventions: it preserves param/return/static chain registers for
11813 calling main function body, and looks for its own parameters at %r1. */
11814 if (cc != NULL)
11815 tmp = gen_split_stack_cond_call (Pmode,
11816 morestack_ref,
11817 parm_base,
11818 call_done,
11819 GEN_INT (frame_size),
11820 GEN_INT (args_size),
11821 cc);
11822 else
11823 tmp = gen_split_stack_call (Pmode,
11824 morestack_ref,
11825 parm_base,
11826 call_done,
11827 GEN_INT (frame_size),
11828 GEN_INT (args_size));
11830 insn = emit_jump_insn (tmp);
11831 JUMP_LABEL (insn) = call_done;
11832 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11833 add_reg_note (insn, REG_LABEL_OPERAND, call_done);
11835 if (cc != NULL)
11837 /* Mark the jump as very unlikely to be taken. */
11838 add_reg_br_prob_note (insn,
11839 profile_probability::very_unlikely ());
11841 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11843 /* If va_start is used, and __morestack was not called, just use
11844 r15. */
11845 emit_move_insn (r1,
11846 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11847 GEN_INT (STACK_POINTER_OFFSET)));
11850 else
11852 emit_barrier ();
11855 /* __morestack will call us here. */
11857 emit_label (call_done);
11860 /* We may have to tell the dataflow pass that the split stack prologue
11861 is initializing a register. */
11863 static void
11864 s390_live_on_entry (bitmap regs)
11866 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11868 gcc_assert (flag_split_stack);
11869 bitmap_set_bit (regs, 1);
11873 /* Return true if the function can use simple_return to return outside
11874 of a shrink-wrapped region. At present shrink-wrapping is supported
11875 in all cases. */
11877 bool
11878 s390_can_use_simple_return_insn (void)
11880 return true;
11883 /* Return true if the epilogue is guaranteed to contain only a return
11884 instruction and if a direct return can therefore be used instead.
11885 One of the main advantages of using direct return instructions
11886 is that we can then use conditional returns. */
11888 bool
11889 s390_can_use_return_insn (void)
11891 int i;
11893 if (!reload_completed)
11894 return false;
11896 if (crtl->profile)
11897 return false;
11899 if (TARGET_TPF_PROFILING)
11900 return false;
11902 for (i = 0; i < 16; i++)
11903 if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
11904 return false;
11906 /* For 31 bit this is not covered by the frame_size check below
11907 since f4, f6 are saved in the register save area without needing
11908 additional stack space. */
11909 if (!TARGET_64BIT
11910 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
11911 return false;
11913 if (cfun->machine->base_reg
11914 && !call_used_regs[REGNO (cfun->machine->base_reg)])
11915 return false;
11917 return cfun_frame_layout.frame_size == 0;
11920 /* The VX ABI differs for vararg functions. Therefore we need the
11921 prototype of the callee to be available when passing vector type
11922 values. */
11923 static const char *
11924 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
11926 return ((TARGET_VX_ABI
11927 && typelist == 0
11928 && VECTOR_TYPE_P (TREE_TYPE (val))
11929 && (funcdecl == NULL_TREE
11930 || (TREE_CODE (funcdecl) == FUNCTION_DECL
11931 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
11932 ? N_("vector argument passed to unprototyped function")
11933 : NULL);
11937 /* Return the size in bytes of a function argument of
11938 type TYPE and/or mode MODE. At least one of TYPE or
11939 MODE must be specified. */
11941 static int
11942 s390_function_arg_size (machine_mode mode, const_tree type)
11944 if (type)
11945 return int_size_in_bytes (type);
11947 /* No type info available for some library calls ... */
11948 if (mode != BLKmode)
11949 return GET_MODE_SIZE (mode);
11951 /* If we have neither type nor mode, abort */
11952 gcc_unreachable ();
11955 /* Return true if a function argument of type TYPE and mode MODE
11956 is to be passed in a vector register, if available. */
11958 bool
11959 s390_function_arg_vector (machine_mode mode, const_tree type)
11961 if (!TARGET_VX_ABI)
11962 return false;
11964 if (s390_function_arg_size (mode, type) > 16)
11965 return false;
11967 /* No type info available for some library calls ... */
11968 if (!type)
11969 return VECTOR_MODE_P (mode);
11971 /* The ABI says that record types with a single member are treated
11972 just like that member would be. */
11973 int empty_base_seen = 0;
11974 const_tree orig_type = type;
11975 while (TREE_CODE (type) == RECORD_TYPE)
11977 tree field, single = NULL_TREE;
11979 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11981 if (TREE_CODE (field) != FIELD_DECL)
11982 continue;
11984 if (DECL_FIELD_ABI_IGNORED (field))
11986 if (lookup_attribute ("no_unique_address",
11987 DECL_ATTRIBUTES (field)))
11988 empty_base_seen |= 2;
11989 else
11990 empty_base_seen |= 1;
11991 continue;
11994 if (single == NULL_TREE)
11995 single = TREE_TYPE (field);
11996 else
11997 return false;
12000 if (single == NULL_TREE)
12001 return false;
12002 else
12004 /* If the field declaration adds extra byte due to
12005 e.g. padding this is not accepted as vector type. */
12006 if (int_size_in_bytes (single) <= 0
12007 || int_size_in_bytes (single) != int_size_in_bytes (type))
12008 return false;
12009 type = single;
12013 if (!VECTOR_TYPE_P (type))
12014 return false;
12016 if (warn_psabi && empty_base_seen)
12018 static unsigned last_reported_type_uid;
12019 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (orig_type));
12020 if (uid != last_reported_type_uid)
12022 const char *url = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
12023 last_reported_type_uid = uid;
12024 if (empty_base_seen & 1)
12025 inform (input_location,
12026 "parameter passing for argument of type %qT when C++17 "
12027 "is enabled changed to match C++14 %{in GCC 10.1%}",
12028 orig_type, url);
12029 else
12030 inform (input_location,
12031 "parameter passing for argument of type %qT with "
12032 "%<[[no_unique_address]]%> members changed "
12033 "%{in GCC 10.1%}", orig_type, url);
12036 return true;
12039 /* Return true if a function argument of type TYPE and mode MODE
12040 is to be passed in a floating-point register, if available. */
12042 static bool
12043 s390_function_arg_float (machine_mode mode, const_tree type)
12045 if (s390_function_arg_size (mode, type) > 8)
12046 return false;
12048 /* Soft-float changes the ABI: no floating-point registers are used. */
12049 if (TARGET_SOFT_FLOAT)
12050 return false;
12052 /* No type info available for some library calls ... */
12053 if (!type)
12054 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
12056 /* The ABI says that record types with a single member are treated
12057 just like that member would be. */
12058 int empty_base_seen = 0;
12059 const_tree orig_type = type;
12060 while (TREE_CODE (type) == RECORD_TYPE)
12062 tree field, single = NULL_TREE;
12064 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
12066 if (TREE_CODE (field) != FIELD_DECL)
12067 continue;
12068 if (DECL_FIELD_ABI_IGNORED (field))
12070 if (lookup_attribute ("no_unique_address",
12071 DECL_ATTRIBUTES (field)))
12072 empty_base_seen |= 2;
12073 else
12074 empty_base_seen |= 1;
12075 continue;
12078 if (single == NULL_TREE)
12079 single = TREE_TYPE (field);
12080 else
12081 return false;
12084 if (single == NULL_TREE)
12085 return false;
12086 else
12087 type = single;
12090 if (TREE_CODE (type) != REAL_TYPE)
12091 return false;
12093 if (warn_psabi && empty_base_seen)
12095 static unsigned last_reported_type_uid;
12096 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (orig_type));
12097 if (uid != last_reported_type_uid)
12099 const char *url = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
12100 last_reported_type_uid = uid;
12101 if (empty_base_seen & 1)
12102 inform (input_location,
12103 "parameter passing for argument of type %qT when C++17 "
12104 "is enabled changed to match C++14 %{in GCC 10.1%}",
12105 orig_type, url);
12106 else
12107 inform (input_location,
12108 "parameter passing for argument of type %qT with "
12109 "%<[[no_unique_address]]%> members changed "
12110 "%{in GCC 10.1%}", orig_type, url);
12114 return true;
12117 /* Return true if a function argument of type TYPE and mode MODE
12118 is to be passed in an integer register, or a pair of integer
12119 registers, if available. */
12121 static bool
12122 s390_function_arg_integer (machine_mode mode, const_tree type)
12124 int size = s390_function_arg_size (mode, type);
12125 if (size > 8)
12126 return false;
12128 /* No type info available for some library calls ... */
12129 if (!type)
12130 return GET_MODE_CLASS (mode) == MODE_INT
12131 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
12133 /* We accept small integral (and similar) types. */
12134 if (INTEGRAL_TYPE_P (type)
12135 || POINTER_TYPE_P (type)
12136 || TREE_CODE (type) == NULLPTR_TYPE
12137 || TREE_CODE (type) == OFFSET_TYPE
12138 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
12139 return true;
12141 /* We also accept structs of size 1, 2, 4, 8 that are not
12142 passed in floating-point registers. */
12143 if (AGGREGATE_TYPE_P (type)
12144 && exact_log2 (size) >= 0
12145 && !s390_function_arg_float (mode, type))
12146 return true;
12148 return false;
12151 /* Return 1 if a function argument ARG is to be passed by reference.
12152 The ABI specifies that only structures of size 1, 2, 4, or 8 bytes
12153 are passed by value, all other structures (and complex numbers) are
12154 passed by reference. */
12156 static bool
12157 s390_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
12159 int size = s390_function_arg_size (arg.mode, arg.type);
12161 if (s390_function_arg_vector (arg.mode, arg.type))
12162 return false;
12164 if (size > 8)
12165 return true;
12167 if (tree type = arg.type)
12169 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
12170 return true;
12172 if (TREE_CODE (type) == COMPLEX_TYPE
12173 || TREE_CODE (type) == VECTOR_TYPE)
12174 return true;
12177 return false;
12180 /* Update the data in CUM to advance over argument ARG. */
12182 static void
12183 s390_function_arg_advance (cumulative_args_t cum_v,
12184 const function_arg_info &arg)
12186 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12188 if (s390_function_arg_vector (arg.mode, arg.type))
12190 /* We are called for unnamed vector stdarg arguments which are
12191 passed on the stack. In this case this hook does not have to
12192 do anything since stack arguments are tracked by common
12193 code. */
12194 if (!arg.named)
12195 return;
12196 cum->vrs += 1;
12198 else if (s390_function_arg_float (arg.mode, arg.type))
12200 cum->fprs += 1;
12202 else if (s390_function_arg_integer (arg.mode, arg.type))
12204 int size = s390_function_arg_size (arg.mode, arg.type);
12205 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
12207 else
12208 gcc_unreachable ();
12211 /* Define where to put the arguments to a function.
12212 Value is zero to push the argument on the stack,
12213 or a hard register in which to store the argument.
12215 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12216 the preceding args and about the function being called.
12217 ARG is a description of the argument.
12219 On S/390, we use general purpose registers 2 through 6 to
12220 pass integer, pointer, and certain structure arguments, and
12221 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
12222 to pass floating point arguments. All remaining arguments
12223 are pushed to the stack. */
12225 static rtx
12226 s390_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
12228 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12230 if (!arg.named)
12231 s390_check_type_for_vector_abi (arg.type, true, false);
12233 if (s390_function_arg_vector (arg.mode, arg.type))
12235 /* Vector arguments being part of the ellipsis are passed on the
12236 stack. */
12237 if (!arg.named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
12238 return NULL_RTX;
12240 return gen_rtx_REG (arg.mode, cum->vrs + FIRST_VEC_ARG_REGNO);
12242 else if (s390_function_arg_float (arg.mode, arg.type))
12244 if (cum->fprs + 1 > FP_ARG_NUM_REG)
12245 return NULL_RTX;
12246 else
12247 return gen_rtx_REG (arg.mode, cum->fprs + 16);
12249 else if (s390_function_arg_integer (arg.mode, arg.type))
12251 int size = s390_function_arg_size (arg.mode, arg.type);
12252 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12254 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
12255 return NULL_RTX;
12256 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
12257 return gen_rtx_REG (arg.mode, cum->gprs + 2);
12258 else if (n_gprs == 2)
12260 rtvec p = rtvec_alloc (2);
12262 RTVEC_ELT (p, 0)
12263 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
12264 const0_rtx);
12265 RTVEC_ELT (p, 1)
12266 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
12267 GEN_INT (4));
12269 return gen_rtx_PARALLEL (arg.mode, p);
12273 /* After the real arguments, expand_call calls us once again with an
12274 end marker. Whatever we return here is passed as operand 2 to the
12275 call expanders.
12277 We don't need this feature ... */
12278 else if (arg.end_marker_p ())
12279 return const0_rtx;
12281 gcc_unreachable ();
12284 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Vector arguments are
12285 left-justified when placed on the stack during parameter passing. */
12287 static pad_direction
12288 s390_function_arg_padding (machine_mode mode, const_tree type)
12290 if (s390_function_arg_vector (mode, type))
12291 return PAD_UPWARD;
12293 return default_function_arg_padding (mode, type);
12296 /* Return true if return values of type TYPE should be returned
12297 in a memory buffer whose address is passed by the caller as
12298 hidden first argument. */
12300 static bool
12301 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
12303 /* We accept small integral (and similar) types. */
12304 if (INTEGRAL_TYPE_P (type)
12305 || POINTER_TYPE_P (type)
12306 || TREE_CODE (type) == OFFSET_TYPE
12307 || TREE_CODE (type) == REAL_TYPE)
12308 return int_size_in_bytes (type) > 8;
12310 /* vector types which fit into a VR. */
12311 if (TARGET_VX_ABI
12312 && VECTOR_TYPE_P (type)
12313 && int_size_in_bytes (type) <= 16)
12314 return false;
12316 /* Aggregates and similar constructs are always returned
12317 in memory. */
12318 if (AGGREGATE_TYPE_P (type)
12319 || TREE_CODE (type) == COMPLEX_TYPE
12320 || VECTOR_TYPE_P (type))
12321 return true;
12323 /* ??? We get called on all sorts of random stuff from
12324 aggregate_value_p. We can't abort, but it's not clear
12325 what's safe to return. Pretend it's a struct I guess. */
12326 return true;
12329 /* Function arguments and return values are promoted to word size. */
12331 static machine_mode
12332 s390_promote_function_mode (const_tree type, machine_mode mode,
12333 int *punsignedp,
12334 const_tree fntype ATTRIBUTE_UNUSED,
12335 int for_return ATTRIBUTE_UNUSED)
12337 if (INTEGRAL_MODE_P (mode)
12338 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
12340 if (type != NULL_TREE && POINTER_TYPE_P (type))
12341 *punsignedp = POINTERS_EXTEND_UNSIGNED;
12342 return Pmode;
12345 return mode;
12348 /* Define where to return a (scalar) value of type RET_TYPE.
12349 If RET_TYPE is null, define where to return a (scalar)
12350 value of mode MODE from a libcall. */
12352 static rtx
12353 s390_function_and_libcall_value (machine_mode mode,
12354 const_tree ret_type,
12355 const_tree fntype_or_decl,
12356 bool outgoing ATTRIBUTE_UNUSED)
12358 /* For vector return types it is important to use the RET_TYPE
12359 argument whenever available since the middle-end might have
12360 changed the mode to a scalar mode. */
12361 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
12362 || (!ret_type && VECTOR_MODE_P (mode)));
12364 /* For normal functions perform the promotion as
12365 promote_function_mode would do. */
12366 if (ret_type)
12368 int unsignedp = TYPE_UNSIGNED (ret_type);
12369 mode = promote_function_mode (ret_type, mode, &unsignedp,
12370 fntype_or_decl, 1);
12373 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
12374 || SCALAR_FLOAT_MODE_P (mode)
12375 || (TARGET_VX_ABI && vector_ret_type_p));
12376 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
12378 if (TARGET_VX_ABI && vector_ret_type_p)
12379 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
12380 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
12381 return gen_rtx_REG (mode, 16);
12382 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
12383 || UNITS_PER_LONG == UNITS_PER_WORD)
12384 return gen_rtx_REG (mode, 2);
12385 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
12387 /* This case is triggered when returning a 64 bit value with
12388 -m31 -mzarch. Although the value would fit into a single
12389 register it has to be forced into a 32 bit register pair in
12390 order to match the ABI. */
12391 rtvec p = rtvec_alloc (2);
12393 RTVEC_ELT (p, 0)
12394 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
12395 RTVEC_ELT (p, 1)
12396 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
12398 return gen_rtx_PARALLEL (mode, p);
12401 gcc_unreachable ();
12404 /* Define where to return a scalar return value of type RET_TYPE. */
12406 static rtx
12407 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
12408 bool outgoing)
12410 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
12411 fn_decl_or_type, outgoing);
12414 /* Define where to return a scalar libcall return value of mode
12415 MODE. */
12417 static rtx
12418 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
12420 return s390_function_and_libcall_value (mode, NULL_TREE,
12421 NULL_TREE, true);
12425 /* Create and return the va_list datatype.
12427 On S/390, va_list is an array type equivalent to
12429 typedef struct __va_list_tag
12431 long __gpr;
12432 long __fpr;
12433 void *__overflow_arg_area;
12434 void *__reg_save_area;
12435 } va_list[1];
12437 where __gpr and __fpr hold the number of general purpose
12438 or floating point arguments used up to now, respectively,
12439 __overflow_arg_area points to the stack location of the
12440 next argument passed on the stack, and __reg_save_area
12441 always points to the start of the register area in the
12442 call frame of the current function. The function prologue
12443 saves all registers used for argument passing into this
12444 area if the function uses variable arguments. */
12446 static tree
12447 s390_build_builtin_va_list (void)
12449 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
12451 record = lang_hooks.types.make_type (RECORD_TYPE);
12453 type_decl =
12454 build_decl (BUILTINS_LOCATION,
12455 TYPE_DECL, get_identifier ("__va_list_tag"), record);
12457 f_gpr = build_decl (BUILTINS_LOCATION,
12458 FIELD_DECL, get_identifier ("__gpr"),
12459 long_integer_type_node);
12460 f_fpr = build_decl (BUILTINS_LOCATION,
12461 FIELD_DECL, get_identifier ("__fpr"),
12462 long_integer_type_node);
12463 f_ovf = build_decl (BUILTINS_LOCATION,
12464 FIELD_DECL, get_identifier ("__overflow_arg_area"),
12465 ptr_type_node);
12466 f_sav = build_decl (BUILTINS_LOCATION,
12467 FIELD_DECL, get_identifier ("__reg_save_area"),
12468 ptr_type_node);
12470 va_list_gpr_counter_field = f_gpr;
12471 va_list_fpr_counter_field = f_fpr;
12473 DECL_FIELD_CONTEXT (f_gpr) = record;
12474 DECL_FIELD_CONTEXT (f_fpr) = record;
12475 DECL_FIELD_CONTEXT (f_ovf) = record;
12476 DECL_FIELD_CONTEXT (f_sav) = record;
12478 TYPE_STUB_DECL (record) = type_decl;
12479 TYPE_NAME (record) = type_decl;
12480 TYPE_FIELDS (record) = f_gpr;
12481 DECL_CHAIN (f_gpr) = f_fpr;
12482 DECL_CHAIN (f_fpr) = f_ovf;
12483 DECL_CHAIN (f_ovf) = f_sav;
12485 layout_type (record);
12487 /* The correct type is an array type of one element. */
12488 return build_array_type (record, build_index_type (size_zero_node));
12491 /* Implement va_start by filling the va_list structure VALIST.
12492 STDARG_P is always true, and ignored.
12493 NEXTARG points to the first anonymous stack argument.
12495 The following global variables are used to initialize
12496 the va_list structure:
12498 crtl->args.info:
12499 holds number of gprs and fprs used for named arguments.
12500 crtl->args.arg_offset_rtx:
12501 holds the offset of the first anonymous stack argument
12502 (relative to the virtual arg pointer). */
12504 static void
12505 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12507 HOST_WIDE_INT n_gpr, n_fpr;
12508 int off;
12509 tree f_gpr, f_fpr, f_ovf, f_sav;
12510 tree gpr, fpr, ovf, sav, t;
12512 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12513 f_fpr = DECL_CHAIN (f_gpr);
12514 f_ovf = DECL_CHAIN (f_fpr);
12515 f_sav = DECL_CHAIN (f_ovf);
12517 valist = build_simple_mem_ref (valist);
12518 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12519 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12520 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12521 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12523 /* Count number of gp and fp argument registers used. */
12525 n_gpr = crtl->args.info.gprs;
12526 n_fpr = crtl->args.info.fprs;
12528 if (cfun->va_list_gpr_size)
12530 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12531 build_int_cst (NULL_TREE, n_gpr));
12532 TREE_SIDE_EFFECTS (t) = 1;
12533 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12536 if (cfun->va_list_fpr_size)
12538 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12539 build_int_cst (NULL_TREE, n_fpr));
12540 TREE_SIDE_EFFECTS (t) = 1;
12541 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12544 if (flag_split_stack
12545 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
12546 == NULL)
12547 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12549 rtx reg;
12550 rtx_insn *seq;
12552 reg = gen_reg_rtx (Pmode);
12553 cfun->machine->split_stack_varargs_pointer = reg;
12555 start_sequence ();
12556 emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
12557 seq = get_insns ();
12558 end_sequence ();
12560 push_topmost_sequence ();
12561 emit_insn_after (seq, entry_of_function ());
12562 pop_topmost_sequence ();
12565 /* Find the overflow area.
12566 FIXME: This currently is too pessimistic when the vector ABI is
12567 enabled. In that case we *always* set up the overflow area
12568 pointer. */
12569 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
12570 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
12571 || TARGET_VX_ABI)
12573 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12574 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
12575 else
12576 t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
12578 off = INTVAL (crtl->args.arg_offset_rtx);
12579 off = off < 0 ? 0 : off;
12580 if (TARGET_DEBUG_ARG)
12581 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12582 (int)n_gpr, (int)n_fpr, off);
12584 t = fold_build_pointer_plus_hwi (t, off);
12586 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12587 TREE_SIDE_EFFECTS (t) = 1;
12588 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12591 /* Find the register save area. */
12592 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12593 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12595 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12596 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12598 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12599 TREE_SIDE_EFFECTS (t) = 1;
12600 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12604 /* Implement va_arg by updating the va_list structure
12605 VALIST as required to retrieve an argument of type
12606 TYPE, and returning that argument.
12608 Generates code equivalent to:
12610 if (integral value) {
12611 if (size <= 4 && args.gpr < 5 ||
12612 size > 4 && args.gpr < 4 )
12613 ret = args.reg_save_area[args.gpr+8]
12614 else
12615 ret = *args.overflow_arg_area++;
12616 } else if (vector value) {
12617 ret = *args.overflow_arg_area;
12618 args.overflow_arg_area += size / 8;
12619 } else if (float value) {
12620 if (args.fgpr < 2)
12621 ret = args.reg_save_area[args.fpr+64]
12622 else
12623 ret = *args.overflow_arg_area++;
12624 } else if (aggregate value) {
12625 if (args.gpr < 5)
12626 ret = *args.reg_save_area[args.gpr]
12627 else
12628 ret = **args.overflow_arg_area++;
12629 } */
12631 static tree
12632 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12633 gimple_seq *post_p ATTRIBUTE_UNUSED)
12635 tree f_gpr, f_fpr, f_ovf, f_sav;
12636 tree gpr, fpr, ovf, sav, reg, t, u;
12637 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12638 tree lab_false, lab_over = NULL_TREE;
12639 tree addr = create_tmp_var (ptr_type_node, "addr");
12640 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12641 a stack slot. */
12643 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12644 f_fpr = DECL_CHAIN (f_gpr);
12645 f_ovf = DECL_CHAIN (f_fpr);
12646 f_sav = DECL_CHAIN (f_ovf);
12648 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12649 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12650 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12652 /* The tree for args* cannot be shared between gpr/fpr and ovf since
12653 both appear on a lhs. */
12654 valist = unshare_expr (valist);
12655 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12657 size = int_size_in_bytes (type);
12659 s390_check_type_for_vector_abi (type, true, false);
12661 if (pass_va_arg_by_reference (type))
12663 if (TARGET_DEBUG_ARG)
12665 fprintf (stderr, "va_arg: aggregate type");
12666 debug_tree (type);
12669 /* Aggregates are passed by reference. */
12670 indirect_p = 1;
12671 reg = gpr;
12672 n_reg = 1;
12674 /* kernel stack layout on 31 bit: It is assumed here that no padding
12675 will be added by s390_frame_info because for va_args always an even
12676 number of gprs has to be saved r15-r2 = 14 regs. */
12677 sav_ofs = 2 * UNITS_PER_LONG;
12678 sav_scale = UNITS_PER_LONG;
12679 size = UNITS_PER_LONG;
12680 max_reg = GP_ARG_NUM_REG - n_reg;
12681 left_align_p = false;
12683 else if (s390_function_arg_vector (TYPE_MODE (type), type))
12685 if (TARGET_DEBUG_ARG)
12687 fprintf (stderr, "va_arg: vector type");
12688 debug_tree (type);
12691 indirect_p = 0;
12692 reg = NULL_TREE;
12693 n_reg = 0;
12694 sav_ofs = 0;
12695 sav_scale = 8;
12696 max_reg = 0;
12697 left_align_p = true;
12699 else if (s390_function_arg_float (TYPE_MODE (type), type))
12701 if (TARGET_DEBUG_ARG)
12703 fprintf (stderr, "va_arg: float type");
12704 debug_tree (type);
12707 /* FP args go in FP registers, if present. */
12708 indirect_p = 0;
12709 reg = fpr;
12710 n_reg = 1;
12711 sav_ofs = 16 * UNITS_PER_LONG;
12712 sav_scale = 8;
12713 max_reg = FP_ARG_NUM_REG - n_reg;
12714 left_align_p = false;
12716 else
12718 if (TARGET_DEBUG_ARG)
12720 fprintf (stderr, "va_arg: other type");
12721 debug_tree (type);
12724 /* Otherwise into GP registers. */
12725 indirect_p = 0;
12726 reg = gpr;
12727 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12729 /* kernel stack layout on 31 bit: It is assumed here that no padding
12730 will be added by s390_frame_info because for va_args always an even
12731 number of gprs has to be saved r15-r2 = 14 regs. */
12732 sav_ofs = 2 * UNITS_PER_LONG;
12734 if (size < UNITS_PER_LONG)
12735 sav_ofs += UNITS_PER_LONG - size;
12737 sav_scale = UNITS_PER_LONG;
12738 max_reg = GP_ARG_NUM_REG - n_reg;
12739 left_align_p = false;
12742 /* Pull the value out of the saved registers ... */
12744 if (reg != NULL_TREE)
12747 if (reg > ((typeof (reg))max_reg))
12748 goto lab_false;
12750 addr = sav + sav_ofs + reg * save_scale;
12752 goto lab_over;
12754 lab_false:
12757 lab_false = create_artificial_label (UNKNOWN_LOCATION);
12758 lab_over = create_artificial_label (UNKNOWN_LOCATION);
12760 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12761 t = build2 (GT_EXPR, boolean_type_node, reg, t);
12762 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12763 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12764 gimplify_and_add (t, pre_p);
12766 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12767 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12768 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12769 t = fold_build_pointer_plus (t, u);
12771 gimplify_assign (addr, t, pre_p);
12773 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12775 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12778 /* ... Otherwise out of the overflow area. */
12780 t = ovf;
12781 if (size < UNITS_PER_LONG && !left_align_p)
12782 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12784 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12786 gimplify_assign (addr, t, pre_p);
12788 if (size < UNITS_PER_LONG && left_align_p)
12789 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12790 else
12791 t = fold_build_pointer_plus_hwi (t, size);
12793 gimplify_assign (ovf, t, pre_p);
12795 if (reg != NULL_TREE)
12796 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12799 /* Increment register save count. */
12801 if (n_reg > 0)
12803 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12804 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12805 gimplify_and_add (u, pre_p);
12808 if (indirect_p)
12810 t = build_pointer_type_for_mode (build_pointer_type (type),
12811 ptr_mode, true);
12812 addr = fold_convert (t, addr);
12813 addr = build_va_arg_indirect_ref (addr);
12815 else
12817 t = build_pointer_type_for_mode (type, ptr_mode, true);
12818 addr = fold_convert (t, addr);
12821 return build_va_arg_indirect_ref (addr);
12824 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12825 expanders.
12826 DEST - Register location where CC will be stored.
12827 TDB - Pointer to a 256 byte area where to store the transaction.
12828 diagnostic block. NULL if TDB is not needed.
12829 RETRY - Retry count value. If non-NULL a retry loop for CC2
12830 is emitted
12831 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12832 of the tbegin instruction pattern. */
12834 void
12835 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
12837 rtx retry_plus_two = gen_reg_rtx (SImode);
12838 rtx retry_reg = gen_reg_rtx (SImode);
12839 rtx_code_label *retry_label = NULL;
12841 if (retry != NULL_RTX)
12843 emit_move_insn (retry_reg, retry);
12844 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
12845 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
12846 retry_label = gen_label_rtx ();
12847 emit_label (retry_label);
12850 if (clobber_fprs_p)
12852 if (TARGET_VX)
12853 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12854 tdb));
12855 else
12856 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12857 tdb));
12859 else
12860 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12861 tdb));
12863 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
12864 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
12865 CC_REGNUM)),
12866 UNSPEC_CC_TO_INT));
12867 if (retry != NULL_RTX)
12869 const int CC0 = 1 << 3;
12870 const int CC1 = 1 << 2;
12871 const int CC3 = 1 << 0;
12872 rtx jump;
12873 rtx count = gen_reg_rtx (SImode);
12874 rtx_code_label *leave_label = gen_label_rtx ();
12876 /* Exit for success and permanent failures. */
12877 jump = s390_emit_jump (leave_label,
12878 gen_rtx_EQ (VOIDmode,
12879 gen_rtx_REG (CCRAWmode, CC_REGNUM),
12880 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
12881 LABEL_NUSES (leave_label) = 1;
12883 /* CC2 - transient failure. Perform retry with ppa. */
12884 emit_move_insn (count, retry_plus_two);
12885 emit_insn (gen_subsi3 (count, count, retry_reg));
12886 emit_insn (gen_tx_assist (count));
12887 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
12888 retry_reg,
12889 retry_reg));
12890 JUMP_LABEL (jump) = retry_label;
12891 LABEL_NUSES (retry_label) = 1;
12892 emit_label (leave_label);
12897 /* Return the decl for the target specific builtin with the function
12898 code FCODE. */
12900 static tree
12901 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
12903 if (fcode >= S390_BUILTIN_MAX)
12904 return error_mark_node;
12906 return s390_builtin_decls[fcode];
12909 /* We call mcount before the function prologue. So a profiled leaf
12910 function should stay a leaf function. */
12912 static bool
12913 s390_keep_leaf_when_profiled ()
12915 return true;
12918 /* Output assembly code for the trampoline template to
12919 stdio stream FILE.
12921 On S/390, we use gpr 1 internally in the trampoline code;
12922 gpr 0 is used to hold the static chain. */
12924 static void
12925 s390_asm_trampoline_template (FILE *file)
12927 rtx op[2];
12928 op[0] = gen_rtx_REG (Pmode, 0);
12929 op[1] = gen_rtx_REG (Pmode, 1);
12931 if (TARGET_64BIT)
12933 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12934 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
12935 output_asm_insn ("br\t%1", op); /* 2 byte */
12936 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
12938 else
12940 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12941 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
12942 output_asm_insn ("br\t%1", op); /* 2 byte */
12943 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
12947 /* Emit RTL insns to initialize the variable parts of a trampoline.
12948 FNADDR is an RTX for the address of the function's pure code.
12949 CXT is an RTX for the static chain value for the function. */
12951 static void
12952 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
12954 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
12955 rtx mem;
12957 emit_block_move (m_tramp, assemble_trampoline_template (),
12958 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
12960 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
12961 emit_move_insn (mem, cxt);
12962 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
12963 emit_move_insn (mem, fnaddr);
12966 static void
12967 output_asm_nops (const char *user, int hw)
12969 asm_fprintf (asm_out_file, "\t# NOPs for %s (%d halfwords)\n", user, hw);
12970 while (hw > 0)
12972 if (hw >= 3)
12974 output_asm_insn ("brcl\t0,0", NULL);
12975 hw -= 3;
12977 else if (hw >= 2)
12979 output_asm_insn ("bc\t0,0", NULL);
12980 hw -= 2;
12982 else
12984 output_asm_insn ("bcr\t0,0", NULL);
12985 hw -= 1;
12990 /* Output assembler code to FILE to increment profiler label # LABELNO
12991 for profiling a function entry. */
12993 void
12994 s390_function_profiler (FILE *file, int labelno)
12996 rtx op[8];
12998 char label[128];
12999 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
13001 fprintf (file, "# function profiler \n");
13003 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
13004 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
13005 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
13006 op[7] = GEN_INT (UNITS_PER_LONG);
13008 op[2] = gen_rtx_REG (Pmode, 1);
13009 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
13010 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
13012 op[4] = gen_rtx_SYMBOL_REF (Pmode, flag_fentry ? "__fentry__" : "_mcount");
13013 if (flag_pic)
13015 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
13016 op[4] = gen_rtx_CONST (Pmode, op[4]);
13019 if (flag_record_mcount)
13020 fprintf (file, "1:\n");
13022 if (flag_fentry)
13024 if (flag_nop_mcount)
13025 output_asm_nops ("-mnop-mcount", /* brasl */ 3);
13026 else if (cfun->static_chain_decl)
13027 warning (OPT_Wcannot_profile, "nested functions cannot be profiled "
13028 "with %<-mfentry%> on s390");
13029 else
13030 output_asm_insn ("brasl\t0,%4", op);
13032 else if (TARGET_64BIT)
13034 if (flag_nop_mcount)
13035 output_asm_nops ("-mnop-mcount", /* stg */ 3 + /* larl */ 3 +
13036 /* brasl */ 3 + /* lg */ 3);
13037 else
13039 output_asm_insn ("stg\t%0,%1", op);
13040 if (flag_dwarf2_cfi_asm)
13041 output_asm_insn (".cfi_rel_offset\t%0,%7", op);
13042 output_asm_insn ("larl\t%2,%3", op);
13043 output_asm_insn ("brasl\t%0,%4", op);
13044 output_asm_insn ("lg\t%0,%1", op);
13045 if (flag_dwarf2_cfi_asm)
13046 output_asm_insn (".cfi_restore\t%0", op);
13049 else
13051 if (flag_nop_mcount)
13052 output_asm_nops ("-mnop-mcount", /* st */ 2 + /* larl */ 3 +
13053 /* brasl */ 3 + /* l */ 2);
13054 else
13056 output_asm_insn ("st\t%0,%1", op);
13057 if (flag_dwarf2_cfi_asm)
13058 output_asm_insn (".cfi_rel_offset\t%0,%7", op);
13059 output_asm_insn ("larl\t%2,%3", op);
13060 output_asm_insn ("brasl\t%0,%4", op);
13061 output_asm_insn ("l\t%0,%1", op);
13062 if (flag_dwarf2_cfi_asm)
13063 output_asm_insn (".cfi_restore\t%0", op);
13067 if (flag_record_mcount)
13069 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
13070 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
13071 fprintf (file, "\t.previous\n");
13075 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
13076 into its SYMBOL_REF_FLAGS. */
13078 static void
13079 s390_encode_section_info (tree decl, rtx rtl, int first)
13081 default_encode_section_info (decl, rtl, first);
13083 if (TREE_CODE (decl) == VAR_DECL)
13085 /* Store the alignment to be able to check if we can use
13086 a larl/load-relative instruction. We only handle the cases
13087 that can go wrong (i.e. no FUNC_DECLs). */
13088 if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
13089 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13090 else if (DECL_ALIGN (decl) % 32)
13091 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13092 else if (DECL_ALIGN (decl) % 64)
13093 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13096 /* Literal pool references don't have a decl so they are handled
13097 differently here. We rely on the information in the MEM_ALIGN
13098 entry to decide upon the alignment. */
13099 if (MEM_P (rtl)
13100 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
13101 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
13103 if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
13104 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13105 else if (MEM_ALIGN (rtl) % 32)
13106 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13107 else if (MEM_ALIGN (rtl) % 64)
13108 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13112 /* Output thunk to FILE that implements a C++ virtual function call (with
13113 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
13114 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
13115 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
13116 relative to the resulting this pointer. */
13118 static void
13119 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
13120 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
13121 tree function)
13123 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
13124 rtx op[10];
13125 int nonlocal = 0;
13127 assemble_start_function (thunk, fnname);
13128 /* Make sure unwind info is emitted for the thunk if needed. */
13129 final_start_function (emit_barrier (), file, 1);
13131 /* Operand 0 is the target function. */
13132 op[0] = XEXP (DECL_RTL (function), 0);
13133 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
13135 nonlocal = 1;
13136 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
13137 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
13138 op[0] = gen_rtx_CONST (Pmode, op[0]);
13141 /* Operand 1 is the 'this' pointer. */
13142 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
13143 op[1] = gen_rtx_REG (Pmode, 3);
13144 else
13145 op[1] = gen_rtx_REG (Pmode, 2);
13147 /* Operand 2 is the delta. */
13148 op[2] = GEN_INT (delta);
13150 /* Operand 3 is the vcall_offset. */
13151 op[3] = GEN_INT (vcall_offset);
13153 /* Operand 4 is the temporary register. */
13154 op[4] = gen_rtx_REG (Pmode, 1);
13156 /* Operands 5 to 8 can be used as labels. */
13157 op[5] = NULL_RTX;
13158 op[6] = NULL_RTX;
13159 op[7] = NULL_RTX;
13160 op[8] = NULL_RTX;
13162 /* Operand 9 can be used for temporary register. */
13163 op[9] = NULL_RTX;
13165 /* Generate code. */
13166 if (TARGET_64BIT)
13168 /* Setup literal pool pointer if required. */
13169 if ((!DISP_IN_RANGE (delta)
13170 && !CONST_OK_FOR_K (delta)
13171 && !CONST_OK_FOR_Os (delta))
13172 || (!DISP_IN_RANGE (vcall_offset)
13173 && !CONST_OK_FOR_K (vcall_offset)
13174 && !CONST_OK_FOR_Os (vcall_offset)))
13176 op[5] = gen_label_rtx ();
13177 output_asm_insn ("larl\t%4,%5", op);
13180 /* Add DELTA to this pointer. */
13181 if (delta)
13183 if (CONST_OK_FOR_J (delta))
13184 output_asm_insn ("la\t%1,%2(%1)", op);
13185 else if (DISP_IN_RANGE (delta))
13186 output_asm_insn ("lay\t%1,%2(%1)", op);
13187 else if (CONST_OK_FOR_K (delta))
13188 output_asm_insn ("aghi\t%1,%2", op);
13189 else if (CONST_OK_FOR_Os (delta))
13190 output_asm_insn ("agfi\t%1,%2", op);
13191 else
13193 op[6] = gen_label_rtx ();
13194 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
13198 /* Perform vcall adjustment. */
13199 if (vcall_offset)
13201 if (DISP_IN_RANGE (vcall_offset))
13203 output_asm_insn ("lg\t%4,0(%1)", op);
13204 output_asm_insn ("ag\t%1,%3(%4)", op);
13206 else if (CONST_OK_FOR_K (vcall_offset))
13208 output_asm_insn ("lghi\t%4,%3", op);
13209 output_asm_insn ("ag\t%4,0(%1)", op);
13210 output_asm_insn ("ag\t%1,0(%4)", op);
13212 else if (CONST_OK_FOR_Os (vcall_offset))
13214 output_asm_insn ("lgfi\t%4,%3", op);
13215 output_asm_insn ("ag\t%4,0(%1)", op);
13216 output_asm_insn ("ag\t%1,0(%4)", op);
13218 else
13220 op[7] = gen_label_rtx ();
13221 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
13222 output_asm_insn ("ag\t%4,0(%1)", op);
13223 output_asm_insn ("ag\t%1,0(%4)", op);
13227 /* Jump to target. */
13228 output_asm_insn ("jg\t%0", op);
13230 /* Output literal pool if required. */
13231 if (op[5])
13233 output_asm_insn (".align\t4", op);
13234 targetm.asm_out.internal_label (file, "L",
13235 CODE_LABEL_NUMBER (op[5]));
13237 if (op[6])
13239 targetm.asm_out.internal_label (file, "L",
13240 CODE_LABEL_NUMBER (op[6]));
13241 output_asm_insn (".long\t%2", op);
13243 if (op[7])
13245 targetm.asm_out.internal_label (file, "L",
13246 CODE_LABEL_NUMBER (op[7]));
13247 output_asm_insn (".long\t%3", op);
13250 else
13252 /* Setup base pointer if required. */
13253 if (!vcall_offset
13254 || (!DISP_IN_RANGE (delta)
13255 && !CONST_OK_FOR_K (delta)
13256 && !CONST_OK_FOR_Os (delta))
13257 || (!DISP_IN_RANGE (delta)
13258 && !CONST_OK_FOR_K (vcall_offset)
13259 && !CONST_OK_FOR_Os (vcall_offset)))
13261 op[5] = gen_label_rtx ();
13262 output_asm_insn ("basr\t%4,0", op);
13263 targetm.asm_out.internal_label (file, "L",
13264 CODE_LABEL_NUMBER (op[5]));
13267 /* Add DELTA to this pointer. */
13268 if (delta)
13270 if (CONST_OK_FOR_J (delta))
13271 output_asm_insn ("la\t%1,%2(%1)", op);
13272 else if (DISP_IN_RANGE (delta))
13273 output_asm_insn ("lay\t%1,%2(%1)", op);
13274 else if (CONST_OK_FOR_K (delta))
13275 output_asm_insn ("ahi\t%1,%2", op);
13276 else if (CONST_OK_FOR_Os (delta))
13277 output_asm_insn ("afi\t%1,%2", op);
13278 else
13280 op[6] = gen_label_rtx ();
13281 output_asm_insn ("a\t%1,%6-%5(%4)", op);
13285 /* Perform vcall adjustment. */
13286 if (vcall_offset)
13288 if (CONST_OK_FOR_J (vcall_offset))
13290 output_asm_insn ("l\t%4,0(%1)", op);
13291 output_asm_insn ("a\t%1,%3(%4)", op);
13293 else if (DISP_IN_RANGE (vcall_offset))
13295 output_asm_insn ("l\t%4,0(%1)", op);
13296 output_asm_insn ("ay\t%1,%3(%4)", op);
13298 else if (CONST_OK_FOR_K (vcall_offset))
13300 output_asm_insn ("lhi\t%4,%3", op);
13301 output_asm_insn ("a\t%4,0(%1)", op);
13302 output_asm_insn ("a\t%1,0(%4)", op);
13304 else if (CONST_OK_FOR_Os (vcall_offset))
13306 output_asm_insn ("iilf\t%4,%3", op);
13307 output_asm_insn ("a\t%4,0(%1)", op);
13308 output_asm_insn ("a\t%1,0(%4)", op);
13310 else
13312 op[7] = gen_label_rtx ();
13313 output_asm_insn ("l\t%4,%7-%5(%4)", op);
13314 output_asm_insn ("a\t%4,0(%1)", op);
13315 output_asm_insn ("a\t%1,0(%4)", op);
13318 /* We had to clobber the base pointer register.
13319 Re-setup the base pointer (with a different base). */
13320 op[5] = gen_label_rtx ();
13321 output_asm_insn ("basr\t%4,0", op);
13322 targetm.asm_out.internal_label (file, "L",
13323 CODE_LABEL_NUMBER (op[5]));
13326 /* Jump to target. */
13327 op[8] = gen_label_rtx ();
13329 if (!flag_pic)
13330 output_asm_insn ("l\t%4,%8-%5(%4)", op);
13331 else if (!nonlocal)
13332 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13333 /* We cannot call through .plt, since .plt requires %r12 loaded. */
13334 else if (flag_pic == 1)
13336 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13337 output_asm_insn ("l\t%4,%0(%4)", op);
13339 else if (flag_pic == 2)
13341 op[9] = gen_rtx_REG (Pmode, 0);
13342 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
13343 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13344 output_asm_insn ("ar\t%4,%9", op);
13345 output_asm_insn ("l\t%4,0(%4)", op);
13348 output_asm_insn ("br\t%4", op);
13350 /* Output literal pool. */
13351 output_asm_insn (".align\t4", op);
13353 if (nonlocal && flag_pic == 2)
13354 output_asm_insn (".long\t%0", op);
13355 if (nonlocal)
13357 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13358 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
13361 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
13362 if (!flag_pic)
13363 output_asm_insn (".long\t%0", op);
13364 else
13365 output_asm_insn (".long\t%0-%5", op);
13367 if (op[6])
13369 targetm.asm_out.internal_label (file, "L",
13370 CODE_LABEL_NUMBER (op[6]));
13371 output_asm_insn (".long\t%2", op);
13373 if (op[7])
13375 targetm.asm_out.internal_label (file, "L",
13376 CODE_LABEL_NUMBER (op[7]));
13377 output_asm_insn (".long\t%3", op);
13380 final_end_function ();
13381 assemble_end_function (thunk, fnname);
13384 /* Output either an indirect jump or an indirect call
13385 (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO
13386 using a branch trampoline disabling branch target prediction. */
13388 void
13389 s390_indirect_branch_via_thunk (unsigned int regno,
13390 unsigned int return_addr_regno,
13391 rtx comparison_operator,
13392 enum s390_indirect_branch_type type)
13394 enum s390_indirect_branch_option option;
13396 if (type == s390_indirect_branch_type_return)
13398 if (s390_return_addr_from_memory ())
13399 option = s390_opt_function_return_mem;
13400 else
13401 option = s390_opt_function_return_reg;
13403 else if (type == s390_indirect_branch_type_jump)
13404 option = s390_opt_indirect_branch_jump;
13405 else if (type == s390_indirect_branch_type_call)
13406 option = s390_opt_indirect_branch_call;
13407 else
13408 gcc_unreachable ();
13410 if (TARGET_INDIRECT_BRANCH_TABLE)
13412 char label[32];
13414 ASM_GENERATE_INTERNAL_LABEL (label,
13415 indirect_branch_table_label[option],
13416 indirect_branch_table_label_no[option]++);
13417 ASM_OUTPUT_LABEL (asm_out_file, label);
13420 if (return_addr_regno != INVALID_REGNUM)
13422 gcc_assert (comparison_operator == NULL_RTX);
13423 fprintf (asm_out_file, " \tbrasl\t%%r%d,", return_addr_regno);
13425 else
13427 fputs (" \tjg", asm_out_file);
13428 if (comparison_operator != NULL_RTX)
13429 print_operand (asm_out_file, comparison_operator, 'C');
13431 fputs ("\t", asm_out_file);
13434 if (TARGET_CPU_Z10)
13435 fprintf (asm_out_file,
13436 TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL "\n",
13437 regno);
13438 else
13439 fprintf (asm_out_file,
13440 TARGET_INDIRECT_BRANCH_THUNK_NAME_EX "\n",
13441 INDIRECT_BRANCH_THUNK_REGNUM, regno);
13443 if ((option == s390_opt_indirect_branch_jump
13444 && cfun->machine->indirect_branch_jump == indirect_branch_thunk)
13445 || (option == s390_opt_indirect_branch_call
13446 && cfun->machine->indirect_branch_call == indirect_branch_thunk)
13447 || (option == s390_opt_function_return_reg
13448 && cfun->machine->function_return_reg == indirect_branch_thunk)
13449 || (option == s390_opt_function_return_mem
13450 && cfun->machine->function_return_mem == indirect_branch_thunk))
13452 if (TARGET_CPU_Z10)
13453 indirect_branch_z10thunk_mask |= (1 << regno);
13454 else
13455 indirect_branch_prez10thunk_mask |= (1 << regno);
13459 /* Output an inline thunk for indirect jumps. EXECUTE_TARGET can
13460 either be an address register or a label pointing to the location
13461 of the jump instruction. */
13463 void
13464 s390_indirect_branch_via_inline_thunk (rtx execute_target)
13466 if (TARGET_INDIRECT_BRANCH_TABLE)
13468 char label[32];
13470 ASM_GENERATE_INTERNAL_LABEL (label,
13471 indirect_branch_table_label[s390_opt_indirect_branch_jump],
13472 indirect_branch_table_label_no[s390_opt_indirect_branch_jump]++);
13473 ASM_OUTPUT_LABEL (asm_out_file, label);
13476 if (!TARGET_ZARCH)
13477 fputs ("\t.machinemode zarch\n", asm_out_file);
13479 if (REG_P (execute_target))
13480 fprintf (asm_out_file, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target));
13481 else
13482 output_asm_insn ("\texrl\t%%r0,%0", &execute_target);
13484 if (!TARGET_ZARCH)
13485 fputs ("\t.machinemode esa\n", asm_out_file);
13487 fputs ("0:\tj\t0b\n", asm_out_file);
13490 static bool
13491 s390_valid_pointer_mode (scalar_int_mode mode)
13493 return (mode == SImode || (TARGET_64BIT && mode == DImode));
13496 /* Checks whether the given CALL_EXPR would use a caller
13497 saved register. This is used to decide whether sibling call
13498 optimization could be performed on the respective function
13499 call. */
13501 static bool
13502 s390_call_saved_register_used (tree call_expr)
13504 CUMULATIVE_ARGS cum_v;
13505 cumulative_args_t cum;
13506 tree parameter;
13507 rtx parm_rtx;
13508 int reg, i;
13510 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
13511 cum = pack_cumulative_args (&cum_v);
13513 for (i = 0; i < call_expr_nargs (call_expr); i++)
13515 parameter = CALL_EXPR_ARG (call_expr, i);
13516 gcc_assert (parameter);
13518 /* For an undeclared variable passed as parameter we will get
13519 an ERROR_MARK node here. */
13520 if (TREE_CODE (parameter) == ERROR_MARK)
13521 return true;
13523 /* We assume that in the target function all parameters are
13524 named. This only has an impact on vector argument register
13525 usage none of which is call-saved. */
13526 function_arg_info arg (TREE_TYPE (parameter), /*named=*/true);
13527 apply_pass_by_reference_rules (&cum_v, arg);
13529 parm_rtx = s390_function_arg (cum, arg);
13531 s390_function_arg_advance (cum, arg);
13533 if (!parm_rtx)
13534 continue;
13536 if (REG_P (parm_rtx))
13538 for (reg = 0; reg < REG_NREGS (parm_rtx); reg++)
13539 if (!call_used_or_fixed_reg_p (reg + REGNO (parm_rtx)))
13540 return true;
13543 if (GET_CODE (parm_rtx) == PARALLEL)
13545 int i;
13547 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
13549 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
13551 gcc_assert (REG_P (r));
13553 for (reg = 0; reg < REG_NREGS (r); reg++)
13554 if (!call_used_or_fixed_reg_p (reg + REGNO (r)))
13555 return true;
13560 return false;
13563 /* Return true if the given call expression can be
13564 turned into a sibling call.
13565 DECL holds the declaration of the function to be called whereas
13566 EXP is the call expression itself. */
13568 static bool
13569 s390_function_ok_for_sibcall (tree decl, tree exp)
13571 /* The TPF epilogue uses register 1. */
13572 if (TARGET_TPF_PROFILING)
13573 return false;
13575 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13576 which would have to be restored before the sibcall. */
13577 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
13578 return false;
13580 /* The thunks for indirect branches require r1 if no exrl is
13581 available. r1 might not be available when doing a sibling
13582 call. */
13583 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13584 && !TARGET_CPU_Z10
13585 && !decl)
13586 return false;
13588 /* Register 6 on s390 is available as an argument register but unfortunately
13589 "caller saved". This makes functions needing this register for arguments
13590 not suitable for sibcalls. */
13591 return !s390_call_saved_register_used (exp);
13594 /* Return the fixed registers used for condition codes. */
13596 static bool
13597 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13599 *p1 = CC_REGNUM;
13600 *p2 = INVALID_REGNUM;
13602 return true;
13605 /* This function is used by the call expanders of the machine description.
13606 It emits the call insn itself together with the necessary operations
13607 to adjust the target address and returns the emitted insn.
13608 ADDR_LOCATION is the target address rtx
13609 TLS_CALL the location of the thread-local symbol
13610 RESULT_REG the register where the result of the call should be stored
13611 RETADDR_REG the register where the return address should be stored
13612 If this parameter is NULL_RTX the call is considered
13613 to be a sibling call. */
13615 rtx_insn *
13616 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
13617 rtx retaddr_reg)
13619 bool plt_call = false;
13620 rtx_insn *insn;
13621 rtx vec[4] = { NULL_RTX };
13622 int elts = 0;
13623 rtx *call = &vec[0];
13624 rtx *clobber_ret_reg = &vec[1];
13625 rtx *use = &vec[2];
13626 rtx *clobber_thunk_reg = &vec[3];
13627 int i;
13629 /* Direct function calls need special treatment. */
13630 if (GET_CODE (addr_location) == SYMBOL_REF)
13632 /* When calling a global routine in PIC mode, we must
13633 replace the symbol itself with the PLT stub. */
13634 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
13636 if (TARGET_64BIT || retaddr_reg != NULL_RTX)
13638 addr_location = gen_rtx_UNSPEC (Pmode,
13639 gen_rtvec (1, addr_location),
13640 UNSPEC_PLT);
13641 addr_location = gen_rtx_CONST (Pmode, addr_location);
13642 plt_call = true;
13644 else
13645 /* For -fpic code the PLT entries might use r12 which is
13646 call-saved. Therefore we cannot do a sibcall when
13647 calling directly using a symbol ref. When reaching
13648 this point we decided (in s390_function_ok_for_sibcall)
13649 to do a sibcall for a function pointer but one of the
13650 optimizers was able to get rid of the function pointer
13651 by propagating the symbol ref into the call. This
13652 optimization is illegal for S/390 so we turn the direct
13653 call into a indirect call again. */
13654 addr_location = force_reg (Pmode, addr_location);
13658 /* If it is already an indirect call or the code above moved the
13659 SYMBOL_REF to somewhere else make sure the address can be found in
13660 register 1. */
13661 if (retaddr_reg == NULL_RTX
13662 && GET_CODE (addr_location) != SYMBOL_REF
13663 && !plt_call)
13665 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
13666 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
13669 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13670 && GET_CODE (addr_location) != SYMBOL_REF
13671 && !plt_call)
13673 /* Indirect branch thunks require the target to be a single GPR. */
13674 addr_location = force_reg (Pmode, addr_location);
13676 /* Without exrl the indirect branch thunks need an additional
13677 register for larl;ex */
13678 if (!TARGET_CPU_Z10)
13680 *clobber_thunk_reg = gen_rtx_REG (Pmode, INDIRECT_BRANCH_THUNK_REGNUM);
13681 *clobber_thunk_reg = gen_rtx_CLOBBER (VOIDmode, *clobber_thunk_reg);
13685 addr_location = gen_rtx_MEM (QImode, addr_location);
13686 *call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
13688 if (result_reg != NULL_RTX)
13689 *call = gen_rtx_SET (result_reg, *call);
13691 if (retaddr_reg != NULL_RTX)
13693 *clobber_ret_reg = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
13695 if (tls_call != NULL_RTX)
13696 *use = gen_rtx_USE (VOIDmode, tls_call);
13700 for (i = 0; i < 4; i++)
13701 if (vec[i] != NULL_RTX)
13702 elts++;
13704 if (elts > 1)
13706 rtvec v;
13707 int e = 0;
13709 v = rtvec_alloc (elts);
13710 for (i = 0; i < 4; i++)
13711 if (vec[i] != NULL_RTX)
13713 RTVEC_ELT (v, e) = vec[i];
13714 e++;
13717 *call = gen_rtx_PARALLEL (VOIDmode, v);
13720 insn = emit_call_insn (*call);
13722 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
13723 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
13725 /* s390_function_ok_for_sibcall should
13726 have denied sibcalls in this case. */
13727 gcc_assert (retaddr_reg != NULL_RTX);
13728 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
13730 return insn;
13733 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
13735 static void
13736 s390_conditional_register_usage (void)
13738 int i;
13740 if (flag_pic)
13741 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13742 fixed_regs[BASE_REGNUM] = 0;
13743 fixed_regs[RETURN_REGNUM] = 0;
13744 if (TARGET_64BIT)
13746 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13747 call_used_regs[i] = 0;
13749 else
13751 call_used_regs[FPR4_REGNUM] = 0;
13752 call_used_regs[FPR6_REGNUM] = 0;
13755 if (TARGET_SOFT_FLOAT)
13757 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13758 fixed_regs[i] = 1;
13761 /* Disable v16 - v31 for non-vector target. */
13762 if (!TARGET_VX)
13764 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13765 fixed_regs[i] = call_used_regs[i] = 1;
13769 /* Corresponding function to eh_return expander. */
13771 static GTY(()) rtx s390_tpf_eh_return_symbol;
13772 void
13773 s390_emit_tpf_eh_return (rtx target)
13775 rtx_insn *insn;
13776 rtx reg, orig_ra;
13778 if (!s390_tpf_eh_return_symbol)
13779 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13781 reg = gen_rtx_REG (Pmode, 2);
13782 orig_ra = gen_rtx_REG (Pmode, 3);
13784 emit_move_insn (reg, target);
13785 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13786 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13787 gen_rtx_REG (Pmode, RETURN_REGNUM));
13788 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13789 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13791 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13794 /* Rework the prologue/epilogue to avoid saving/restoring
13795 registers unnecessarily. */
13797 static void
13798 s390_optimize_prologue (void)
13800 rtx_insn *insn, *new_insn, *next_insn;
13802 /* Do a final recompute of the frame-related data. */
13803 s390_optimize_register_info ();
13805 /* If all special registers are in fact used, there's nothing we
13806 can do, so no point in walking the insn list. */
13808 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13809 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM)
13810 return;
13812 /* Search for prologue/epilogue insns and replace them. */
13813 for (insn = get_insns (); insn; insn = next_insn)
13815 int first, last, off;
13816 rtx set, base, offset;
13817 rtx pat;
13819 next_insn = NEXT_INSN (insn);
13821 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13822 continue;
13824 pat = PATTERN (insn);
13826 /* Remove ldgr/lgdr instructions used for saving and restore
13827 GPRs if possible. */
13828 if (TARGET_Z10)
13830 rtx tmp_pat = pat;
13832 if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
13833 tmp_pat = XVECEXP (pat, 0, 0);
13835 if (GET_CODE (tmp_pat) == SET
13836 && GET_MODE (SET_SRC (tmp_pat)) == DImode
13837 && REG_P (SET_SRC (tmp_pat))
13838 && REG_P (SET_DEST (tmp_pat)))
13840 int src_regno = REGNO (SET_SRC (tmp_pat));
13841 int dest_regno = REGNO (SET_DEST (tmp_pat));
13842 int gpr_regno;
13843 int fpr_regno;
13845 if (!((GENERAL_REGNO_P (src_regno)
13846 && FP_REGNO_P (dest_regno))
13847 || (FP_REGNO_P (src_regno)
13848 && GENERAL_REGNO_P (dest_regno))))
13849 continue;
13851 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
13852 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
13854 /* GPR must be call-saved, FPR must be call-clobbered. */
13855 if (!call_used_regs[fpr_regno]
13856 || call_used_regs[gpr_regno])
13857 continue;
13859 /* It must not happen that what we once saved in an FPR now
13860 needs a stack slot. */
13861 gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
13863 if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
13865 remove_insn (insn);
13866 continue;
13871 if (GET_CODE (pat) == PARALLEL
13872 && store_multiple_operation (pat, VOIDmode))
13874 set = XVECEXP (pat, 0, 0);
13875 first = REGNO (SET_SRC (set));
13876 last = first + XVECLEN (pat, 0) - 1;
13877 offset = const0_rtx;
13878 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13879 off = INTVAL (offset);
13881 if (GET_CODE (base) != REG || off < 0)
13882 continue;
13883 if (cfun_frame_layout.first_save_gpr != -1
13884 && (cfun_frame_layout.first_save_gpr < first
13885 || cfun_frame_layout.last_save_gpr > last))
13886 continue;
13887 if (REGNO (base) != STACK_POINTER_REGNUM
13888 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13889 continue;
13890 if (first > BASE_REGNUM || last < BASE_REGNUM)
13891 continue;
13893 if (cfun_frame_layout.first_save_gpr != -1)
13895 rtx s_pat = save_gprs (base,
13896 off + (cfun_frame_layout.first_save_gpr
13897 - first) * UNITS_PER_LONG,
13898 cfun_frame_layout.first_save_gpr,
13899 cfun_frame_layout.last_save_gpr);
13900 new_insn = emit_insn_before (s_pat, insn);
13901 INSN_ADDRESSES_NEW (new_insn, -1);
13904 remove_insn (insn);
13905 continue;
13908 if (cfun_frame_layout.first_save_gpr == -1
13909 && GET_CODE (pat) == SET
13910 && GENERAL_REG_P (SET_SRC (pat))
13911 && GET_CODE (SET_DEST (pat)) == MEM)
13913 set = pat;
13914 first = REGNO (SET_SRC (set));
13915 offset = const0_rtx;
13916 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13917 off = INTVAL (offset);
13919 if (GET_CODE (base) != REG || off < 0)
13920 continue;
13921 if (REGNO (base) != STACK_POINTER_REGNUM
13922 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13923 continue;
13925 remove_insn (insn);
13926 continue;
13929 if (GET_CODE (pat) == PARALLEL
13930 && load_multiple_operation (pat, VOIDmode))
13932 set = XVECEXP (pat, 0, 0);
13933 first = REGNO (SET_DEST (set));
13934 last = first + XVECLEN (pat, 0) - 1;
13935 offset = const0_rtx;
13936 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13937 off = INTVAL (offset);
13939 if (GET_CODE (base) != REG || off < 0)
13940 continue;
13942 if (cfun_frame_layout.first_restore_gpr != -1
13943 && (cfun_frame_layout.first_restore_gpr < first
13944 || cfun_frame_layout.last_restore_gpr > last))
13945 continue;
13946 if (REGNO (base) != STACK_POINTER_REGNUM
13947 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13948 continue;
13949 if (first > BASE_REGNUM || last < BASE_REGNUM)
13950 continue;
13952 if (cfun_frame_layout.first_restore_gpr != -1)
13954 rtx rpat = restore_gprs (base,
13955 off + (cfun_frame_layout.first_restore_gpr
13956 - first) * UNITS_PER_LONG,
13957 cfun_frame_layout.first_restore_gpr,
13958 cfun_frame_layout.last_restore_gpr);
13960 /* Remove REG_CFA_RESTOREs for registers that we no
13961 longer need to save. */
13962 REG_NOTES (rpat) = REG_NOTES (insn);
13963 for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
13964 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
13965 && ((int) REGNO (XEXP (*ptr, 0))
13966 < cfun_frame_layout.first_restore_gpr))
13967 *ptr = XEXP (*ptr, 1);
13968 else
13969 ptr = &XEXP (*ptr, 1);
13970 new_insn = emit_insn_before (rpat, insn);
13971 RTX_FRAME_RELATED_P (new_insn) = 1;
13972 INSN_ADDRESSES_NEW (new_insn, -1);
13975 remove_insn (insn);
13976 continue;
13979 if (cfun_frame_layout.first_restore_gpr == -1
13980 && GET_CODE (pat) == SET
13981 && GENERAL_REG_P (SET_DEST (pat))
13982 && GET_CODE (SET_SRC (pat)) == MEM)
13984 set = pat;
13985 first = REGNO (SET_DEST (set));
13986 offset = const0_rtx;
13987 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13988 off = INTVAL (offset);
13990 if (GET_CODE (base) != REG || off < 0)
13991 continue;
13993 if (REGNO (base) != STACK_POINTER_REGNUM
13994 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13995 continue;
13997 remove_insn (insn);
13998 continue;
14003 /* On z10 and later the dynamic branch prediction must see the
14004 backward jump within a certain windows. If not it falls back to
14005 the static prediction. This function rearranges the loop backward
14006 branch in a way which makes the static prediction always correct.
14007 The function returns true if it added an instruction. */
14008 static bool
14009 s390_fix_long_loop_prediction (rtx_insn *insn)
14011 rtx set = single_set (insn);
14012 rtx code_label, label_ref;
14013 rtx_insn *uncond_jump;
14014 rtx_insn *cur_insn;
14015 rtx tmp;
14016 int distance;
14018 /* This will exclude branch on count and branch on index patterns
14019 since these are correctly statically predicted.
14021 The additional check for a PARALLEL is required here since
14022 single_set might be != NULL for PARALLELs where the set of the
14023 iteration variable is dead. */
14024 if (GET_CODE (PATTERN (insn)) == PARALLEL
14025 || !set
14026 || SET_DEST (set) != pc_rtx
14027 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
14028 return false;
14030 /* Skip conditional returns. */
14031 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
14032 && XEXP (SET_SRC (set), 2) == pc_rtx)
14033 return false;
14035 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
14036 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
14038 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
14040 code_label = XEXP (label_ref, 0);
14042 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
14043 || INSN_ADDRESSES (INSN_UID (insn)) == -1
14044 || (INSN_ADDRESSES (INSN_UID (insn))
14045 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
14046 return false;
14048 for (distance = 0, cur_insn = PREV_INSN (insn);
14049 distance < PREDICT_DISTANCE - 6;
14050 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
14051 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
14052 return false;
14054 rtx_code_label *new_label = gen_label_rtx ();
14055 uncond_jump = emit_jump_insn_after (
14056 gen_rtx_SET (pc_rtx,
14057 gen_rtx_LABEL_REF (VOIDmode, code_label)),
14058 insn);
14059 emit_label_after (new_label, uncond_jump);
14061 tmp = XEXP (SET_SRC (set), 1);
14062 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
14063 XEXP (SET_SRC (set), 2) = tmp;
14064 INSN_CODE (insn) = -1;
14066 XEXP (label_ref, 0) = new_label;
14067 JUMP_LABEL (insn) = new_label;
14068 JUMP_LABEL (uncond_jump) = code_label;
14070 return true;
14073 /* Returns 1 if INSN reads the value of REG for purposes not related
14074 to addressing of memory, and 0 otherwise. */
14075 static int
14076 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
14078 return reg_referenced_p (reg, PATTERN (insn))
14079 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
14082 /* Starting from INSN find_cond_jump looks downwards in the insn
14083 stream for a single jump insn which is the last user of the
14084 condition code set in INSN. */
14085 static rtx_insn *
14086 find_cond_jump (rtx_insn *insn)
14088 for (; insn; insn = NEXT_INSN (insn))
14090 rtx ite, cc;
14092 if (LABEL_P (insn))
14093 break;
14095 if (!JUMP_P (insn))
14097 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
14098 break;
14099 continue;
14102 /* This will be triggered by a return. */
14103 if (GET_CODE (PATTERN (insn)) != SET)
14104 break;
14106 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
14107 ite = SET_SRC (PATTERN (insn));
14109 if (GET_CODE (ite) != IF_THEN_ELSE)
14110 break;
14112 cc = XEXP (XEXP (ite, 0), 0);
14113 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
14114 break;
14116 if (find_reg_note (insn, REG_DEAD, cc))
14117 return insn;
14118 break;
14121 return NULL;
14124 /* Swap the condition in COND and the operands in OP0 and OP1 so that
14125 the semantics does not change. If NULL_RTX is passed as COND the
14126 function tries to find the conditional jump starting with INSN. */
14127 static void
14128 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
14130 rtx tmp = *op0;
14132 if (cond == NULL_RTX)
14134 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
14135 rtx set = jump ? single_set (jump) : NULL_RTX;
14137 if (set == NULL_RTX)
14138 return;
14140 cond = XEXP (SET_SRC (set), 0);
14143 *op0 = *op1;
14144 *op1 = tmp;
14145 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
14148 /* On z10, instructions of the compare-and-branch family have the
14149 property to access the register occurring as second operand with
14150 its bits complemented. If such a compare is grouped with a second
14151 instruction that accesses the same register non-complemented, and
14152 if that register's value is delivered via a bypass, then the
14153 pipeline recycles, thereby causing significant performance decline.
14154 This function locates such situations and exchanges the two
14155 operands of the compare. The function return true whenever it
14156 added an insn. */
14157 static bool
14158 s390_z10_optimize_cmp (rtx_insn *insn)
14160 rtx_insn *prev_insn, *next_insn;
14161 bool insn_added_p = false;
14162 rtx cond, *op0, *op1;
14164 if (GET_CODE (PATTERN (insn)) == PARALLEL)
14166 /* Handle compare and branch and branch on count
14167 instructions. */
14168 rtx pattern = single_set (insn);
14170 if (!pattern
14171 || SET_DEST (pattern) != pc_rtx
14172 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
14173 return false;
14175 cond = XEXP (SET_SRC (pattern), 0);
14176 op0 = &XEXP (cond, 0);
14177 op1 = &XEXP (cond, 1);
14179 else if (GET_CODE (PATTERN (insn)) == SET)
14181 rtx src, dest;
14183 /* Handle normal compare instructions. */
14184 src = SET_SRC (PATTERN (insn));
14185 dest = SET_DEST (PATTERN (insn));
14187 if (!REG_P (dest)
14188 || !CC_REGNO_P (REGNO (dest))
14189 || GET_CODE (src) != COMPARE)
14190 return false;
14192 /* s390_swap_cmp will try to find the conditional
14193 jump when passing NULL_RTX as condition. */
14194 cond = NULL_RTX;
14195 op0 = &XEXP (src, 0);
14196 op1 = &XEXP (src, 1);
14198 else
14199 return false;
14201 if (!REG_P (*op0) || !REG_P (*op1))
14202 return false;
14204 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
14205 return false;
14207 /* Swap the COMPARE arguments and its mask if there is a
14208 conflicting access in the previous insn. */
14209 prev_insn = prev_active_insn (insn);
14210 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14211 && reg_referenced_p (*op1, PATTERN (prev_insn)))
14212 s390_swap_cmp (cond, op0, op1, insn);
14214 /* Check if there is a conflict with the next insn. If there
14215 was no conflict with the previous insn, then swap the
14216 COMPARE arguments and its mask. If we already swapped
14217 the operands, or if swapping them would cause a conflict
14218 with the previous insn, issue a NOP after the COMPARE in
14219 order to separate the two instuctions. */
14220 next_insn = next_active_insn (insn);
14221 if (next_insn != NULL_RTX && INSN_P (next_insn)
14222 && s390_non_addr_reg_read_p (*op1, next_insn))
14224 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14225 && s390_non_addr_reg_read_p (*op0, prev_insn))
14227 if (REGNO (*op1) == 0)
14228 emit_insn_after (gen_nop_lr1 (), insn);
14229 else
14230 emit_insn_after (gen_nop_lr0 (), insn);
14231 insn_added_p = true;
14233 else
14234 s390_swap_cmp (cond, op0, op1, insn);
14236 return insn_added_p;
14239 /* Number of INSNs to be scanned backward in the last BB of the loop
14240 and forward in the first BB of the loop. This usually should be a
14241 bit more than the number of INSNs which could go into one
14242 group. */
14243 #define S390_OSC_SCAN_INSN_NUM 5
14245 /* Scan LOOP for static OSC collisions and return true if a osc_break
14246 should be issued for this loop. */
14247 static bool
14248 s390_adjust_loop_scan_osc (struct loop* loop)
14251 HARD_REG_SET modregs, newregs;
14252 rtx_insn *insn, *store_insn = NULL;
14253 rtx set;
14254 struct s390_address addr_store, addr_load;
14255 subrtx_iterator::array_type array;
14256 int insn_count;
14258 CLEAR_HARD_REG_SET (modregs);
14260 insn_count = 0;
14261 FOR_BB_INSNS_REVERSE (loop->latch, insn)
14263 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14264 continue;
14266 insn_count++;
14267 if (insn_count > S390_OSC_SCAN_INSN_NUM)
14268 return false;
14270 find_all_hard_reg_sets (insn, &newregs, true);
14271 modregs |= newregs;
14273 set = single_set (insn);
14274 if (!set)
14275 continue;
14277 if (MEM_P (SET_DEST (set))
14278 && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store))
14280 store_insn = insn;
14281 break;
14285 if (store_insn == NULL_RTX)
14286 return false;
14288 insn_count = 0;
14289 FOR_BB_INSNS (loop->header, insn)
14291 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14292 continue;
14294 if (insn == store_insn)
14295 return false;
14297 insn_count++;
14298 if (insn_count > S390_OSC_SCAN_INSN_NUM)
14299 return false;
14301 find_all_hard_reg_sets (insn, &newregs, true);
14302 modregs |= newregs;
14304 set = single_set (insn);
14305 if (!set)
14306 continue;
14308 /* An intermediate store disrupts static OSC checking
14309 anyway. */
14310 if (MEM_P (SET_DEST (set))
14311 && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL))
14312 return false;
14314 FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
14315 if (MEM_P (*iter)
14316 && s390_decompose_address (XEXP (*iter, 0), &addr_load)
14317 && rtx_equal_p (addr_load.base, addr_store.base)
14318 && rtx_equal_p (addr_load.indx, addr_store.indx)
14319 && rtx_equal_p (addr_load.disp, addr_store.disp))
14321 if ((addr_load.base != NULL_RTX
14322 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base)))
14323 || (addr_load.indx != NULL_RTX
14324 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx))))
14325 return true;
14328 return false;
14331 /* Look for adjustments which can be done on simple innermost
14332 loops. */
14333 static void
14334 s390_adjust_loops ()
14336 struct loop *loop = NULL;
14338 df_analyze ();
14339 compute_bb_for_insn ();
14341 /* Find the loops. */
14342 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
14344 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
14346 if (dump_file)
14348 flow_loop_dump (loop, dump_file, NULL, 0);
14349 fprintf (dump_file, ";; OSC loop scan Loop: ");
14351 if (loop->latch == NULL
14352 || pc_set (BB_END (loop->latch)) == NULL_RTX
14353 || !s390_adjust_loop_scan_osc (loop))
14355 if (dump_file)
14357 if (loop->latch == NULL)
14358 fprintf (dump_file, " muliple backward jumps\n");
14359 else
14361 fprintf (dump_file, " header insn: %d latch insn: %d ",
14362 INSN_UID (BB_HEAD (loop->header)),
14363 INSN_UID (BB_END (loop->latch)));
14364 if (pc_set (BB_END (loop->latch)) == NULL_RTX)
14365 fprintf (dump_file, " loop does not end with jump\n");
14366 else
14367 fprintf (dump_file, " not instrumented\n");
14371 else
14373 rtx_insn *new_insn;
14375 if (dump_file)
14376 fprintf (dump_file, " adding OSC break insn: ");
14377 new_insn = emit_insn_before (gen_osc_break (),
14378 BB_END (loop->latch));
14379 INSN_ADDRESSES_NEW (new_insn, -1);
14383 loop_optimizer_finalize ();
14385 df_finish_pass (false);
14388 /* Perform machine-dependent processing. */
14390 static void
14391 s390_reorg (void)
14393 struct constant_pool *pool;
14394 rtx_insn *insn;
14395 int hw_before, hw_after;
14397 if (s390_tune == PROCESSOR_2964_Z13)
14398 s390_adjust_loops ();
14400 /* Make sure all splits have been performed; splits after
14401 machine_dependent_reorg might confuse insn length counts. */
14402 split_all_insns_noflow ();
14404 /* Install the main literal pool and the associated base
14405 register load insns. The literal pool might be > 4096 bytes in
14406 size, so that some of its elements cannot be directly accessed.
14408 To fix this, we split the single literal pool into multiple
14409 pool chunks, reloading the pool base register at various
14410 points throughout the function to ensure it always points to
14411 the pool chunk the following code expects. */
14413 /* Collect the literal pool. */
14414 pool = s390_mainpool_start ();
14415 if (pool)
14417 /* Finish up literal pool related changes. */
14418 s390_mainpool_finish (pool);
14420 else
14422 /* If literal pool overflowed, chunkify it. */
14423 pool = s390_chunkify_start ();
14424 s390_chunkify_finish (pool);
14427 /* Generate out-of-pool execute target insns. */
14428 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14430 rtx label;
14431 rtx_insn *target;
14433 label = s390_execute_label (insn);
14434 if (!label)
14435 continue;
14437 gcc_assert (label != const0_rtx);
14439 target = emit_label (XEXP (label, 0));
14440 INSN_ADDRESSES_NEW (target, -1);
14442 if (JUMP_P (insn))
14444 target = emit_jump_insn (s390_execute_target (insn));
14445 /* This is important in order to keep a table jump
14446 pointing at the jump table label. Only this makes it
14447 being recognized as table jump. */
14448 JUMP_LABEL (target) = JUMP_LABEL (insn);
14450 else
14451 target = emit_insn (s390_execute_target (insn));
14452 INSN_ADDRESSES_NEW (target, -1);
14455 /* Try to optimize prologue and epilogue further. */
14456 s390_optimize_prologue ();
14458 /* Walk over the insns and do some >=z10 specific changes. */
14459 if (s390_tune >= PROCESSOR_2097_Z10)
14461 rtx_insn *insn;
14462 bool insn_added_p = false;
14464 /* The insn lengths and addresses have to be up to date for the
14465 following manipulations. */
14466 shorten_branches (get_insns ());
14468 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14470 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14471 continue;
14473 if (JUMP_P (insn))
14474 insn_added_p |= s390_fix_long_loop_prediction (insn);
14476 if ((GET_CODE (PATTERN (insn)) == PARALLEL
14477 || GET_CODE (PATTERN (insn)) == SET)
14478 && s390_tune == PROCESSOR_2097_Z10)
14479 insn_added_p |= s390_z10_optimize_cmp (insn);
14482 /* Adjust branches if we added new instructions. */
14483 if (insn_added_p)
14484 shorten_branches (get_insns ());
14487 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
14488 if (hw_after > 0)
14490 rtx_insn *insn;
14492 /* Insert NOPs for hotpatching. */
14493 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14494 /* Emit NOPs
14495 1. inside the area covered by debug information to allow setting
14496 breakpoints at the NOPs,
14497 2. before any insn which results in an asm instruction,
14498 3. before in-function labels to avoid jumping to the NOPs, for
14499 example as part of a loop,
14500 4. before any barrier in case the function is completely empty
14501 (__builtin_unreachable ()) and has neither internal labels nor
14502 active insns.
14504 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
14505 break;
14506 /* Output a series of NOPs before the first active insn. */
14507 while (insn && hw_after > 0)
14509 if (hw_after >= 3)
14511 emit_insn_before (gen_nop_6_byte (), insn);
14512 hw_after -= 3;
14514 else if (hw_after >= 2)
14516 emit_insn_before (gen_nop_4_byte (), insn);
14517 hw_after -= 2;
14519 else
14521 emit_insn_before (gen_nop_2_byte (), insn);
14522 hw_after -= 1;
14528 /* Return true if INSN is a fp load insn writing register REGNO. */
14529 static inline bool
14530 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
14532 rtx set;
14533 enum attr_type flag = s390_safe_attr_type (insn);
14535 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
14536 return false;
14538 set = single_set (insn);
14540 if (set == NULL_RTX)
14541 return false;
14543 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
14544 return false;
14546 if (REGNO (SET_DEST (set)) != regno)
14547 return false;
14549 return true;
14552 /* This value describes the distance to be avoided between an
14553 arithmetic fp instruction and an fp load writing the same register.
14554 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14555 fine but the exact value has to be avoided. Otherwise the FP
14556 pipeline will throw an exception causing a major penalty. */
14557 #define Z10_EARLYLOAD_DISTANCE 7
14559 /* Rearrange the ready list in order to avoid the situation described
14560 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
14561 moved to the very end of the ready list. */
14562 static void
14563 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
14565 unsigned int regno;
14566 int nready = *nready_p;
14567 rtx_insn *tmp;
14568 int i;
14569 rtx_insn *insn;
14570 rtx set;
14571 enum attr_type flag;
14572 int distance;
14574 /* Skip DISTANCE - 1 active insns. */
14575 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
14576 distance > 0 && insn != NULL_RTX;
14577 distance--, insn = prev_active_insn (insn))
14578 if (CALL_P (insn) || JUMP_P (insn))
14579 return;
14581 if (insn == NULL_RTX)
14582 return;
14584 set = single_set (insn);
14586 if (set == NULL_RTX || !REG_P (SET_DEST (set))
14587 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
14588 return;
14590 flag = s390_safe_attr_type (insn);
14592 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
14593 return;
14595 regno = REGNO (SET_DEST (set));
14596 i = nready - 1;
14598 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
14599 i--;
14601 if (!i)
14602 return;
14604 tmp = ready[i];
14605 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
14606 ready[0] = tmp;
14609 /* Returns TRUE if BB is entered via a fallthru edge and all other
14610 incoming edges are less than likely. */
14611 static bool
14612 s390_bb_fallthru_entry_likely (basic_block bb)
14614 edge e, fallthru_edge;
14615 edge_iterator ei;
14617 if (!bb)
14618 return false;
14620 fallthru_edge = find_fallthru_edge (bb->preds);
14621 if (!fallthru_edge)
14622 return false;
14624 FOR_EACH_EDGE (e, ei, bb->preds)
14625 if (e != fallthru_edge
14626 && e->probability >= profile_probability::likely ())
14627 return false;
14629 return true;
14632 struct s390_sched_state
14634 /* Number of insns in the group. */
14635 int group_state;
14636 /* Execution side of the group. */
14637 int side;
14638 /* Group can only hold two insns. */
14639 bool group_of_two;
14640 } s390_sched_state;
14642 static struct s390_sched_state sched_state = {0, 1, false};
14644 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
14645 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
14646 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
14647 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14648 #define S390_SCHED_ATTR_MASK_GROUPOFTWO 0x10
14650 static unsigned int
14651 s390_get_sched_attrmask (rtx_insn *insn)
14653 unsigned int mask = 0;
14655 switch (s390_tune)
14657 case PROCESSOR_2827_ZEC12:
14658 if (get_attr_zEC12_cracked (insn))
14659 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14660 if (get_attr_zEC12_expanded (insn))
14661 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14662 if (get_attr_zEC12_endgroup (insn))
14663 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14664 if (get_attr_zEC12_groupalone (insn))
14665 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14666 break;
14667 case PROCESSOR_2964_Z13:
14668 if (get_attr_z13_cracked (insn))
14669 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14670 if (get_attr_z13_expanded (insn))
14671 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14672 if (get_attr_z13_endgroup (insn))
14673 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14674 if (get_attr_z13_groupalone (insn))
14675 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14676 if (get_attr_z13_groupoftwo (insn))
14677 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14678 break;
14679 case PROCESSOR_3906_Z14:
14680 if (get_attr_z14_cracked (insn))
14681 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14682 if (get_attr_z14_expanded (insn))
14683 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14684 if (get_attr_z14_endgroup (insn))
14685 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14686 if (get_attr_z14_groupalone (insn))
14687 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14688 if (get_attr_z14_groupoftwo (insn))
14689 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14690 break;
14691 case PROCESSOR_8561_Z15:
14692 if (get_attr_z15_cracked (insn))
14693 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14694 if (get_attr_z15_expanded (insn))
14695 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14696 if (get_attr_z15_endgroup (insn))
14697 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14698 if (get_attr_z15_groupalone (insn))
14699 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14700 if (get_attr_z15_groupoftwo (insn))
14701 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14702 break;
14703 default:
14704 gcc_unreachable ();
14706 return mask;
14709 static unsigned int
14710 s390_get_unit_mask (rtx_insn *insn, int *units)
14712 unsigned int mask = 0;
14714 switch (s390_tune)
14716 case PROCESSOR_2964_Z13:
14717 *units = 4;
14718 if (get_attr_z13_unit_lsu (insn))
14719 mask |= 1 << 0;
14720 if (get_attr_z13_unit_fxa (insn))
14721 mask |= 1 << 1;
14722 if (get_attr_z13_unit_fxb (insn))
14723 mask |= 1 << 2;
14724 if (get_attr_z13_unit_vfu (insn))
14725 mask |= 1 << 3;
14726 break;
14727 case PROCESSOR_3906_Z14:
14728 *units = 4;
14729 if (get_attr_z14_unit_lsu (insn))
14730 mask |= 1 << 0;
14731 if (get_attr_z14_unit_fxa (insn))
14732 mask |= 1 << 1;
14733 if (get_attr_z14_unit_fxb (insn))
14734 mask |= 1 << 2;
14735 if (get_attr_z14_unit_vfu (insn))
14736 mask |= 1 << 3;
14737 break;
14738 case PROCESSOR_8561_Z15:
14739 *units = 4;
14740 if (get_attr_z15_unit_lsu (insn))
14741 mask |= 1 << 0;
14742 if (get_attr_z15_unit_fxa (insn))
14743 mask |= 1 << 1;
14744 if (get_attr_z15_unit_fxb (insn))
14745 mask |= 1 << 2;
14746 if (get_attr_z15_unit_vfu (insn))
14747 mask |= 1 << 3;
14748 break;
14749 default:
14750 gcc_unreachable ();
14752 return mask;
14755 static bool
14756 s390_is_fpd (rtx_insn *insn)
14758 if (insn == NULL_RTX)
14759 return false;
14761 return get_attr_z13_unit_fpd (insn) || get_attr_z14_unit_fpd (insn)
14762 || get_attr_z15_unit_fpd (insn);
14765 static bool
14766 s390_is_fxd (rtx_insn *insn)
14768 if (insn == NULL_RTX)
14769 return false;
14771 return get_attr_z13_unit_fxd (insn) || get_attr_z14_unit_fxd (insn)
14772 || get_attr_z15_unit_fxd (insn);
14775 /* Returns TRUE if INSN is a long-running instruction. */
14776 static bool
14777 s390_is_longrunning (rtx_insn *insn)
14779 if (insn == NULL_RTX)
14780 return false;
14782 return s390_is_fxd (insn) || s390_is_fpd (insn);
14786 /* Return the scheduling score for INSN. The higher the score the
14787 better. The score is calculated from the OOO scheduling attributes
14788 of INSN and the scheduling state sched_state. */
14789 static int
14790 s390_sched_score (rtx_insn *insn)
14792 unsigned int mask = s390_get_sched_attrmask (insn);
14793 int score = 0;
14795 switch (sched_state.group_state)
14797 case 0:
14798 /* Try to put insns into the first slot which would otherwise
14799 break a group. */
14800 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14801 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14802 score += 5;
14803 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14804 score += 10;
14805 break;
14806 case 1:
14807 /* Prefer not cracked insns while trying to put together a
14808 group. */
14809 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14810 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14811 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14812 score += 10;
14813 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
14814 score += 5;
14815 /* If we are in a group of two already, try to schedule another
14816 group-of-two insn to avoid shortening another group. */
14817 if (sched_state.group_of_two
14818 && (mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14819 score += 15;
14820 break;
14821 case 2:
14822 /* Prefer not cracked insns while trying to put together a
14823 group. */
14824 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14825 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14826 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14827 score += 10;
14828 /* Prefer endgroup insns in the last slot. */
14829 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14830 score += 10;
14831 /* Try to avoid group-of-two insns in the last slot as they will
14832 shorten this group as well as the next one. */
14833 if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14834 score = MAX (0, score - 15);
14835 break;
14838 if (s390_tune >= PROCESSOR_2964_Z13)
14840 int units, i;
14841 unsigned unit_mask, m = 1;
14843 unit_mask = s390_get_unit_mask (insn, &units);
14844 gcc_assert (units <= MAX_SCHED_UNITS);
14846 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14847 ago the last insn of this unit type got scheduled. This is
14848 supposed to help providing a proper instruction mix to the
14849 CPU. */
14850 for (i = 0; i < units; i++, m <<= 1)
14851 if (m & unit_mask)
14852 score += (last_scheduled_unit_distance[i][sched_state.side]
14853 * MAX_SCHED_MIX_SCORE / MAX_SCHED_MIX_DISTANCE);
14855 int other_side = 1 - sched_state.side;
14857 /* Try to delay long-running insns when side is busy. */
14858 if (s390_is_longrunning (insn))
14860 if (s390_is_fxd (insn))
14862 if (fxd_longrunning[sched_state.side]
14863 && fxd_longrunning[other_side]
14864 <= fxd_longrunning[sched_state.side])
14865 score = MAX (0, score - 10);
14867 else if (fxd_longrunning[other_side]
14868 >= fxd_longrunning[sched_state.side])
14869 score += 10;
14872 if (s390_is_fpd (insn))
14874 if (fpd_longrunning[sched_state.side]
14875 && fpd_longrunning[other_side]
14876 <= fpd_longrunning[sched_state.side])
14877 score = MAX (0, score - 10);
14879 else if (fpd_longrunning[other_side]
14880 >= fpd_longrunning[sched_state.side])
14881 score += 10;
14886 return score;
14889 /* This function is called via hook TARGET_SCHED_REORDER before
14890 issuing one insn from list READY which contains *NREADYP entries.
14891 For target z10 it reorders load instructions to avoid early load
14892 conflicts in the floating point pipeline */
14893 static int
14894 s390_sched_reorder (FILE *file, int verbose,
14895 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
14897 if (s390_tune == PROCESSOR_2097_Z10
14898 && reload_completed
14899 && *nreadyp > 1)
14900 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
14902 if (s390_tune >= PROCESSOR_2827_ZEC12
14903 && reload_completed
14904 && *nreadyp > 1)
14906 int i;
14907 int last_index = *nreadyp - 1;
14908 int max_index = -1;
14909 int max_score = -1;
14910 rtx_insn *tmp;
14912 /* Just move the insn with the highest score to the top (the
14913 end) of the list. A full sort is not needed since a conflict
14914 in the hazard recognition cannot happen. So the top insn in
14915 the ready list will always be taken. */
14916 for (i = last_index; i >= 0; i--)
14918 int score;
14920 if (recog_memoized (ready[i]) < 0)
14921 continue;
14923 score = s390_sched_score (ready[i]);
14924 if (score > max_score)
14926 max_score = score;
14927 max_index = i;
14931 if (max_index != -1)
14933 if (max_index != last_index)
14935 tmp = ready[max_index];
14936 ready[max_index] = ready[last_index];
14937 ready[last_index] = tmp;
14939 if (verbose > 5)
14940 fprintf (file,
14941 ";;\t\tBACKEND: move insn %d to the top of list\n",
14942 INSN_UID (ready[last_index]));
14944 else if (verbose > 5)
14945 fprintf (file,
14946 ";;\t\tBACKEND: best insn %d already on top\n",
14947 INSN_UID (ready[last_index]));
14950 if (verbose > 5)
14952 fprintf (file, "ready list ooo attributes - sched state: %d\n",
14953 sched_state.group_state);
14955 for (i = last_index; i >= 0; i--)
14957 unsigned int sched_mask;
14958 rtx_insn *insn = ready[i];
14960 if (recog_memoized (insn) < 0)
14961 continue;
14963 sched_mask = s390_get_sched_attrmask (insn);
14964 fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
14965 INSN_UID (insn),
14966 s390_sched_score (insn));
14967 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
14968 ((M) & sched_mask) ? #ATTR : "");
14969 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14970 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14971 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14972 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14973 #undef PRINT_SCHED_ATTR
14974 if (s390_tune >= PROCESSOR_2964_Z13)
14976 unsigned int unit_mask, m = 1;
14977 int units, j;
14979 unit_mask = s390_get_unit_mask (insn, &units);
14980 fprintf (file, "(units:");
14981 for (j = 0; j < units; j++, m <<= 1)
14982 if (m & unit_mask)
14983 fprintf (file, " u%d", j);
14984 fprintf (file, ")");
14986 fprintf (file, "\n");
14991 return s390_issue_rate ();
14995 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
14996 the scheduler has issued INSN. It stores the last issued insn into
14997 last_scheduled_insn in order to make it available for
14998 s390_sched_reorder. */
14999 static int
15000 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
15002 last_scheduled_insn = insn;
15004 bool ends_group = false;
15006 if (s390_tune >= PROCESSOR_2827_ZEC12
15007 && reload_completed
15008 && recog_memoized (insn) >= 0)
15010 unsigned int mask = s390_get_sched_attrmask (insn);
15012 if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
15013 sched_state.group_of_two = true;
15015 /* If this is a group-of-two insn, we actually ended the last group
15016 and this insn is the first one of the new group. */
15017 if (sched_state.group_state == 2 && sched_state.group_of_two)
15019 sched_state.side = sched_state.side ? 0 : 1;
15020 sched_state.group_state = 0;
15023 /* Longrunning and side bookkeeping. */
15024 for (int i = 0; i < 2; i++)
15026 fxd_longrunning[i] = MAX (0, fxd_longrunning[i] - 1);
15027 fpd_longrunning[i] = MAX (0, fpd_longrunning[i] - 1);
15030 unsigned latency = insn_default_latency (insn);
15031 if (s390_is_longrunning (insn))
15033 if (s390_is_fxd (insn))
15034 fxd_longrunning[sched_state.side] = latency;
15035 else
15036 fpd_longrunning[sched_state.side] = latency;
15039 if (s390_tune >= PROCESSOR_2964_Z13)
15041 int units, i;
15042 unsigned unit_mask, m = 1;
15044 unit_mask = s390_get_unit_mask (insn, &units);
15045 gcc_assert (units <= MAX_SCHED_UNITS);
15047 for (i = 0; i < units; i++, m <<= 1)
15048 if (m & unit_mask)
15049 last_scheduled_unit_distance[i][sched_state.side] = 0;
15050 else if (last_scheduled_unit_distance[i][sched_state.side]
15051 < MAX_SCHED_MIX_DISTANCE)
15052 last_scheduled_unit_distance[i][sched_state.side]++;
15055 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
15056 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0
15057 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0
15058 || (mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
15060 sched_state.group_state = 0;
15061 ends_group = true;
15063 else
15065 switch (sched_state.group_state)
15067 case 0:
15068 sched_state.group_state++;
15069 break;
15070 case 1:
15071 sched_state.group_state++;
15072 if (sched_state.group_of_two)
15074 sched_state.group_state = 0;
15075 ends_group = true;
15077 break;
15078 case 2:
15079 sched_state.group_state++;
15080 ends_group = true;
15081 break;
15085 if (verbose > 5)
15087 unsigned int sched_mask;
15089 sched_mask = s390_get_sched_attrmask (insn);
15091 fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
15092 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
15093 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
15094 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
15095 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
15096 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
15097 #undef PRINT_SCHED_ATTR
15099 if (s390_tune >= PROCESSOR_2964_Z13)
15101 unsigned int unit_mask, m = 1;
15102 int units, j;
15104 unit_mask = s390_get_unit_mask (insn, &units);
15105 fprintf (file, "(units:");
15106 for (j = 0; j < units; j++, m <<= 1)
15107 if (m & unit_mask)
15108 fprintf (file, " %d", j);
15109 fprintf (file, ")");
15111 fprintf (file, " sched state: %d\n", sched_state.group_state);
15113 if (s390_tune >= PROCESSOR_2964_Z13)
15115 int units, j;
15117 s390_get_unit_mask (insn, &units);
15119 fprintf (file, ";;\t\tBACKEND: units on this side unused for: ");
15120 for (j = 0; j < units; j++)
15121 fprintf (file, "%d:%d ", j,
15122 last_scheduled_unit_distance[j][sched_state.side]);
15123 fprintf (file, "\n");
15127 /* If this insn ended a group, the next will be on the other side. */
15128 if (ends_group)
15130 sched_state.group_state = 0;
15131 sched_state.side = sched_state.side ? 0 : 1;
15132 sched_state.group_of_two = false;
15136 if (GET_CODE (PATTERN (insn)) != USE
15137 && GET_CODE (PATTERN (insn)) != CLOBBER)
15138 return more - 1;
15139 else
15140 return more;
15143 static void
15144 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
15145 int verbose ATTRIBUTE_UNUSED,
15146 int max_ready ATTRIBUTE_UNUSED)
15148 /* If the next basic block is most likely entered via a fallthru edge
15149 we keep the last sched state. Otherwise we start a new group.
15150 The scheduler traverses basic blocks in "instruction stream" ordering
15151 so if we see a fallthru edge here, sched_state will be of its
15152 source block.
15154 current_sched_info->prev_head is the insn before the first insn of the
15155 block of insns to be scheduled.
15157 rtx_insn *insn = current_sched_info->prev_head
15158 ? NEXT_INSN (current_sched_info->prev_head) : NULL;
15159 basic_block bb = insn ? BLOCK_FOR_INSN (insn) : NULL;
15160 if (s390_tune < PROCESSOR_2964_Z13 || !s390_bb_fallthru_entry_likely (bb))
15162 last_scheduled_insn = NULL;
15163 memset (last_scheduled_unit_distance, 0,
15164 MAX_SCHED_UNITS * NUM_SIDES * sizeof (int));
15165 sched_state.group_state = 0;
15166 sched_state.group_of_two = false;
15170 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
15171 a new number struct loop *loop should be unrolled if tuned for cpus with
15172 a built-in stride prefetcher.
15173 The loop is analyzed for memory accesses by calling check_dpu for
15174 each rtx of the loop. Depending on the loop_depth and the amount of
15175 memory accesses a new number <=nunroll is returned to improve the
15176 behavior of the hardware prefetch unit. */
15177 static unsigned
15178 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
15180 basic_block *bbs;
15181 rtx_insn *insn;
15182 unsigned i;
15183 unsigned mem_count = 0;
15185 if (s390_tune < PROCESSOR_2097_Z10)
15186 return nunroll;
15188 /* Count the number of memory references within the loop body. */
15189 bbs = get_loop_body (loop);
15190 subrtx_iterator::array_type array;
15191 for (i = 0; i < loop->num_nodes; i++)
15192 FOR_BB_INSNS (bbs[i], insn)
15193 if (INSN_P (insn) && INSN_CODE (insn) != -1)
15195 rtx set;
15197 /* The runtime of small loops with memory block operations
15198 will be determined by the memory operation. Doing
15199 unrolling doesn't help here. Measurements to confirm
15200 this where only done on recent CPU levels. So better do
15201 not change anything for older CPUs. */
15202 if (s390_tune >= PROCESSOR_2964_Z13
15203 && loop->ninsns <= BLOCK_MEM_OPS_LOOP_INSNS
15204 && ((set = single_set (insn)) != NULL_RTX)
15205 && ((GET_MODE (SET_DEST (set)) == BLKmode
15206 && (GET_MODE (SET_SRC (set)) == BLKmode
15207 || SET_SRC (set) == const0_rtx))
15208 || (GET_CODE (SET_SRC (set)) == COMPARE
15209 && GET_MODE (XEXP (SET_SRC (set), 0)) == BLKmode
15210 && GET_MODE (XEXP (SET_SRC (set), 1)) == BLKmode)))
15211 return 1;
15213 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
15214 if (MEM_P (*iter))
15215 mem_count += 1;
15217 free (bbs);
15219 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
15220 if (mem_count == 0)
15221 return nunroll;
15223 switch (loop_depth(loop))
15225 case 1:
15226 return MIN (nunroll, 28 / mem_count);
15227 case 2:
15228 return MIN (nunroll, 22 / mem_count);
15229 default:
15230 return MIN (nunroll, 16 / mem_count);
15234 /* Restore the current options. This is a hook function and also called
15235 internally. */
15237 static void
15238 s390_function_specific_restore (struct gcc_options *opts,
15239 struct gcc_options */* opts_set */,
15240 struct cl_target_option *ptr ATTRIBUTE_UNUSED)
15242 opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
15245 static void
15246 s390_default_align (struct gcc_options *opts)
15248 /* Set the default function alignment to 16 in order to get rid of
15249 some unwanted performance effects. */
15250 if (opts->x_flag_align_functions && !opts->x_str_align_functions
15251 && opts->x_s390_tune >= PROCESSOR_2964_Z13)
15252 opts->x_str_align_functions = "16";
15255 static void
15256 s390_override_options_after_change (void)
15258 s390_default_align (&global_options);
15261 static void
15262 s390_option_override_internal (struct gcc_options *opts,
15263 struct gcc_options *opts_set)
15265 /* Architecture mode defaults according to ABI. */
15266 if (!(opts_set->x_target_flags & MASK_ZARCH))
15268 if (TARGET_64BIT)
15269 opts->x_target_flags |= MASK_ZARCH;
15270 else
15271 opts->x_target_flags &= ~MASK_ZARCH;
15274 /* Set the march default in case it hasn't been specified on cmdline. */
15275 if (!opts_set->x_s390_arch)
15276 opts->x_s390_arch = PROCESSOR_2064_Z900;
15278 opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
15280 /* Determine processor to tune for. */
15281 if (!opts_set->x_s390_tune)
15282 opts->x_s390_tune = opts->x_s390_arch;
15284 opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
15286 /* Sanity checks. */
15287 if (opts->x_s390_arch == PROCESSOR_NATIVE
15288 || opts->x_s390_tune == PROCESSOR_NATIVE)
15289 gcc_unreachable ();
15290 if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
15291 error ("64-bit ABI not supported in ESA/390 mode");
15293 if (opts->x_s390_indirect_branch == indirect_branch_thunk_inline
15294 || opts->x_s390_indirect_branch_call == indirect_branch_thunk_inline
15295 || opts->x_s390_function_return == indirect_branch_thunk_inline
15296 || opts->x_s390_function_return_reg == indirect_branch_thunk_inline
15297 || opts->x_s390_function_return_mem == indirect_branch_thunk_inline)
15298 error ("thunk-inline is only supported with %<-mindirect-branch-jump%>");
15300 if (opts->x_s390_indirect_branch != indirect_branch_keep)
15302 if (!opts_set->x_s390_indirect_branch_call)
15303 opts->x_s390_indirect_branch_call = opts->x_s390_indirect_branch;
15305 if (!opts_set->x_s390_indirect_branch_jump)
15306 opts->x_s390_indirect_branch_jump = opts->x_s390_indirect_branch;
15309 if (opts->x_s390_function_return != indirect_branch_keep)
15311 if (!opts_set->x_s390_function_return_reg)
15312 opts->x_s390_function_return_reg = opts->x_s390_function_return;
15314 if (!opts_set->x_s390_function_return_mem)
15315 opts->x_s390_function_return_mem = opts->x_s390_function_return;
15318 /* Enable hardware transactions if available and not explicitly
15319 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
15320 if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
15322 if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
15323 opts->x_target_flags |= MASK_OPT_HTM;
15324 else
15325 opts->x_target_flags &= ~MASK_OPT_HTM;
15328 if (TARGET_OPT_VX_P (opts_set->x_target_flags))
15330 if (TARGET_OPT_VX_P (opts->x_target_flags))
15332 if (!TARGET_CPU_VX_P (opts))
15333 error ("hardware vector support not available on %s",
15334 processor_table[(int)opts->x_s390_arch].name);
15335 if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15336 error ("hardware vector support not available with "
15337 "%<-msoft-float%>");
15340 else
15342 if (TARGET_CPU_VX_P (opts))
15343 /* Enable vector support if available and not explicitly disabled
15344 by user. E.g. with -m31 -march=z13 -mzarch */
15345 opts->x_target_flags |= MASK_OPT_VX;
15346 else
15347 opts->x_target_flags &= ~MASK_OPT_VX;
15350 /* Use hardware DFP if available and not explicitly disabled by
15351 user. E.g. with -m31 -march=z10 -mzarch */
15352 if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
15354 if (TARGET_DFP_P (opts))
15355 opts->x_target_flags |= MASK_HARD_DFP;
15356 else
15357 opts->x_target_flags &= ~MASK_HARD_DFP;
15360 if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
15362 if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
15364 if (!TARGET_CPU_DFP_P (opts))
15365 error ("hardware decimal floating point instructions"
15366 " not available on %s",
15367 processor_table[(int)opts->x_s390_arch].name);
15368 if (!TARGET_ZARCH_P (opts->x_target_flags))
15369 error ("hardware decimal floating point instructions"
15370 " not available in ESA/390 mode");
15372 else
15373 opts->x_target_flags &= ~MASK_HARD_DFP;
15376 if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
15377 && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15379 if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
15380 && TARGET_HARD_DFP_P (opts->x_target_flags))
15381 error ("%<-mhard-dfp%> can%'t be used in conjunction with "
15382 "%<-msoft-float%>");
15384 opts->x_target_flags &= ~MASK_HARD_DFP;
15387 if (TARGET_BACKCHAIN_P (opts->x_target_flags)
15388 && TARGET_PACKED_STACK_P (opts->x_target_flags)
15389 && TARGET_HARD_FLOAT_P (opts->x_target_flags))
15390 error ("%<-mbackchain%> %<-mpacked-stack%> %<-mhard-float%> are not "
15391 "supported in combination");
15393 if (opts->x_s390_stack_size)
15395 if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
15396 error ("stack size must be greater than the stack guard value");
15397 else if (opts->x_s390_stack_size > 1 << 16)
15398 error ("stack size must not be greater than 64k");
15400 else if (opts->x_s390_stack_guard)
15401 error ("%<-mstack-guard%> implies use of %<-mstack-size%>");
15403 /* Our implementation of the stack probe requires the probe interval
15404 to be used as displacement in an address operand. The maximum
15405 probe interval currently is 64k. This would exceed short
15406 displacements. Trim that value down to 4k if that happens. This
15407 might result in too many probes being generated only on the
15408 oldest supported machine level z900. */
15409 if (!DISP_IN_RANGE ((1 << param_stack_clash_protection_probe_interval)))
15410 param_stack_clash_protection_probe_interval = 12;
15412 #if TARGET_TPF != 0
15413 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_prologue_check))
15414 error ("-mtpf-trace-hook-prologue-check requires integer in range 0..4095");
15416 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_prologue_target))
15417 error ("-mtpf-trace-hook-prologue-target requires integer in range 0..4095");
15419 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_epilogue_check))
15420 error ("-mtpf-trace-hook-epilogue-check requires integer in range 0..4095");
15422 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_epilogue_target))
15423 error ("-mtpf-trace-hook-epilogue-target requires integer in range 0..4095");
15425 if (s390_tpf_trace_skip)
15427 opts->x_s390_tpf_trace_hook_prologue_target = TPF_TRACE_PROLOGUE_SKIP_TARGET;
15428 opts->x_s390_tpf_trace_hook_epilogue_target = TPF_TRACE_EPILOGUE_SKIP_TARGET;
15430 #endif
15432 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
15433 if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
15434 opts->x_target_flags |= MASK_LONG_DOUBLE_128;
15435 #endif
15437 if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
15439 SET_OPTION_IF_UNSET (opts, opts_set, param_max_unrolled_insns,
15440 100);
15441 SET_OPTION_IF_UNSET (opts, opts_set, param_max_unroll_times, 32);
15442 SET_OPTION_IF_UNSET (opts, opts_set, param_max_completely_peeled_insns,
15443 2000);
15444 SET_OPTION_IF_UNSET (opts, opts_set, param_max_completely_peel_times,
15445 64);
15448 SET_OPTION_IF_UNSET (opts, opts_set, param_max_pending_list_length,
15449 256);
15450 /* values for loop prefetching */
15451 SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_line_size, 256);
15452 SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_size, 128);
15453 /* s390 has more than 2 levels and the size is much larger. Since
15454 we are always running virtualized assume that we only get a small
15455 part of the caches above l1. */
15456 SET_OPTION_IF_UNSET (opts, opts_set, param_l2_cache_size, 1500);
15457 SET_OPTION_IF_UNSET (opts, opts_set,
15458 param_prefetch_min_insn_to_mem_ratio, 2);
15459 SET_OPTION_IF_UNSET (opts, opts_set, param_simultaneous_prefetches, 6);
15461 /* Use the alternative scheduling-pressure algorithm by default. */
15462 SET_OPTION_IF_UNSET (opts, opts_set, param_sched_pressure_algorithm, 2);
15463 SET_OPTION_IF_UNSET (opts, opts_set, param_min_vect_loop_bound, 2);
15465 /* Use aggressive inlining parameters. */
15466 if (opts->x_s390_tune >= PROCESSOR_2964_Z13)
15468 SET_OPTION_IF_UNSET (opts, opts_set, param_inline_min_speedup, 2);
15469 SET_OPTION_IF_UNSET (opts, opts_set, param_max_inline_insns_auto, 80);
15472 /* Set the default alignment. */
15473 s390_default_align (opts);
15475 /* Call target specific restore function to do post-init work. At the moment,
15476 this just sets opts->x_s390_cost_pointer. */
15477 s390_function_specific_restore (opts, opts_set, NULL);
15479 /* Check whether -mfentry is supported. It cannot be used in 31-bit mode,
15480 because 31-bit PLT stubs assume that %r12 contains GOT address, which is
15481 not the case when the code runs before the prolog. */
15482 if (opts->x_flag_fentry && !TARGET_64BIT)
15483 error ("%<-mfentry%> is supported only for 64-bit CPUs");
15486 static void
15487 s390_option_override (void)
15489 unsigned int i;
15490 cl_deferred_option *opt;
15491 vec<cl_deferred_option> *v =
15492 (vec<cl_deferred_option> *) s390_deferred_options;
15494 if (v)
15495 FOR_EACH_VEC_ELT (*v, i, opt)
15497 switch (opt->opt_index)
15499 case OPT_mhotpatch_:
15501 int val1;
15502 int val2;
15503 char *s = strtok (ASTRDUP (opt->arg), ",");
15504 char *t = strtok (NULL, "\0");
15506 if (t != NULL)
15508 val1 = integral_argument (s);
15509 val2 = integral_argument (t);
15511 else
15513 val1 = -1;
15514 val2 = -1;
15516 if (val1 == -1 || val2 == -1)
15518 /* argument is not a plain number */
15519 error ("arguments to %qs should be non-negative integers",
15520 "-mhotpatch=n,m");
15521 break;
15523 else if (val1 > s390_hotpatch_hw_max
15524 || val2 > s390_hotpatch_hw_max)
15526 error ("argument to %qs is too large (max. %d)",
15527 "-mhotpatch=n,m", s390_hotpatch_hw_max);
15528 break;
15530 s390_hotpatch_hw_before_label = val1;
15531 s390_hotpatch_hw_after_label = val2;
15532 break;
15534 default:
15535 gcc_unreachable ();
15539 /* Set up function hooks. */
15540 init_machine_status = s390_init_machine_status;
15542 s390_option_override_internal (&global_options, &global_options_set);
15544 /* Save the initial options in case the user does function specific
15545 options. */
15546 target_option_default_node
15547 = build_target_option_node (&global_options, &global_options_set);
15548 target_option_current_node = target_option_default_node;
15550 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
15551 requires the arch flags to be evaluated already. Since prefetching
15552 is beneficial on s390, we enable it if available. */
15553 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
15554 flag_prefetch_loop_arrays = 1;
15556 if (!s390_pic_data_is_text_relative && !flag_pic)
15557 error ("%<-mno-pic-data-is-text-relative%> cannot be used without "
15558 "%<-fpic%>/%<-fPIC%>");
15560 if (TARGET_TPF)
15562 /* Don't emit DWARF3/4 unless specifically selected. The TPF
15563 debuggers do not yet support DWARF 3/4. */
15564 if (!global_options_set.x_dwarf_strict)
15565 dwarf_strict = 1;
15566 if (!global_options_set.x_dwarf_version)
15567 dwarf_version = 2;
15571 #if S390_USE_TARGET_ATTRIBUTE
15572 /* Inner function to process the attribute((target(...))), take an argument and
15573 set the current options from the argument. If we have a list, recursively go
15574 over the list. */
15576 static bool
15577 s390_valid_target_attribute_inner_p (tree args,
15578 struct gcc_options *opts,
15579 struct gcc_options *new_opts_set,
15580 bool force_pragma)
15582 char *next_optstr;
15583 bool ret = true;
15585 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
15586 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
15587 static const struct
15589 const char *string;
15590 size_t len;
15591 int opt;
15592 int has_arg;
15593 int only_as_pragma;
15594 } attrs[] = {
15595 /* enum options */
15596 S390_ATTRIB ("arch=", OPT_march_, 1),
15597 S390_ATTRIB ("tune=", OPT_mtune_, 1),
15598 /* uinteger options */
15599 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
15600 S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
15601 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
15602 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
15603 /* flag options */
15604 S390_ATTRIB ("backchain", OPT_mbackchain, 0),
15605 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
15606 S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
15607 S390_ATTRIB ("htm", OPT_mhtm, 0),
15608 S390_ATTRIB ("vx", OPT_mvx, 0),
15609 S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
15610 S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
15611 S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
15612 S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
15613 S390_PRAGMA ("zvector", OPT_mzvector, 0),
15614 /* boolean options */
15615 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
15617 #undef S390_ATTRIB
15618 #undef S390_PRAGMA
15620 /* If this is a list, recurse to get the options. */
15621 if (TREE_CODE (args) == TREE_LIST)
15623 bool ret = true;
15624 int num_pragma_values;
15625 int i;
15627 /* Note: attribs.c:decl_attributes prepends the values from
15628 current_target_pragma to the list of target attributes. To determine
15629 whether we're looking at a value of the attribute or the pragma we
15630 assume that the first [list_length (current_target_pragma)] values in
15631 the list are the values from the pragma. */
15632 num_pragma_values = (!force_pragma && current_target_pragma != NULL)
15633 ? list_length (current_target_pragma) : 0;
15634 for (i = 0; args; args = TREE_CHAIN (args), i++)
15636 bool is_pragma;
15638 is_pragma = (force_pragma || i < num_pragma_values);
15639 if (TREE_VALUE (args)
15640 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
15641 opts, new_opts_set,
15642 is_pragma))
15644 ret = false;
15647 return ret;
15650 else if (TREE_CODE (args) != STRING_CST)
15652 error ("attribute %<target%> argument not a string");
15653 return false;
15656 /* Handle multiple arguments separated by commas. */
15657 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
15659 while (next_optstr && *next_optstr != '\0')
15661 char *p = next_optstr;
15662 char *orig_p = p;
15663 char *comma = strchr (next_optstr, ',');
15664 size_t len, opt_len;
15665 int opt;
15666 bool opt_set_p;
15667 char ch;
15668 unsigned i;
15669 int mask = 0;
15670 enum cl_var_type var_type;
15671 bool found;
15673 if (comma)
15675 *comma = '\0';
15676 len = comma - next_optstr;
15677 next_optstr = comma + 1;
15679 else
15681 len = strlen (p);
15682 next_optstr = NULL;
15685 /* Recognize no-xxx. */
15686 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
15688 opt_set_p = false;
15689 p += 3;
15690 len -= 3;
15692 else
15693 opt_set_p = true;
15695 /* Find the option. */
15696 ch = *p;
15697 found = false;
15698 for (i = 0; i < ARRAY_SIZE (attrs); i++)
15700 opt_len = attrs[i].len;
15701 if (ch == attrs[i].string[0]
15702 && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
15703 && memcmp (p, attrs[i].string, opt_len) == 0)
15705 opt = attrs[i].opt;
15706 if (!opt_set_p && cl_options[opt].cl_reject_negative)
15707 continue;
15708 mask = cl_options[opt].var_value;
15709 var_type = cl_options[opt].var_type;
15710 found = true;
15711 break;
15715 /* Process the option. */
15716 if (!found)
15718 error ("attribute(target(\"%s\")) is unknown", orig_p);
15719 return false;
15721 else if (attrs[i].only_as_pragma && !force_pragma)
15723 /* Value is not allowed for the target attribute. */
15724 error ("value %qs is not supported by attribute %<target%>",
15725 attrs[i].string);
15726 return false;
15729 else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
15731 if (var_type == CLVC_BIT_CLEAR)
15732 opt_set_p = !opt_set_p;
15734 if (opt_set_p)
15735 opts->x_target_flags |= mask;
15736 else
15737 opts->x_target_flags &= ~mask;
15738 new_opts_set->x_target_flags |= mask;
15741 else if (cl_options[opt].var_type == CLVC_BOOLEAN)
15743 int value;
15745 if (cl_options[opt].cl_uinteger)
15747 /* Unsigned integer argument. Code based on the function
15748 decode_cmdline_option () in opts-common.c. */
15749 value = integral_argument (p + opt_len);
15751 else
15752 value = (opt_set_p) ? 1 : 0;
15754 if (value != -1)
15756 struct cl_decoded_option decoded;
15758 /* Value range check; only implemented for numeric and boolean
15759 options at the moment. */
15760 generate_option (opt, NULL, value, CL_TARGET, &decoded);
15761 s390_handle_option (opts, new_opts_set, &decoded, input_location);
15762 set_option (opts, new_opts_set, opt, value,
15763 p + opt_len, DK_UNSPECIFIED, input_location,
15764 global_dc);
15766 else
15768 error ("attribute(target(\"%s\")) is unknown", orig_p);
15769 ret = false;
15773 else if (cl_options[opt].var_type == CLVC_ENUM)
15775 bool arg_ok;
15776 int value;
15778 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
15779 if (arg_ok)
15780 set_option (opts, new_opts_set, opt, value,
15781 p + opt_len, DK_UNSPECIFIED, input_location,
15782 global_dc);
15783 else
15785 error ("attribute(target(\"%s\")) is unknown", orig_p);
15786 ret = false;
15790 else
15791 gcc_unreachable ();
15793 return ret;
15796 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
15798 tree
15799 s390_valid_target_attribute_tree (tree args,
15800 struct gcc_options *opts,
15801 const struct gcc_options *opts_set,
15802 bool force_pragma)
15804 tree t = NULL_TREE;
15805 struct gcc_options new_opts_set;
15807 memset (&new_opts_set, 0, sizeof (new_opts_set));
15809 /* Process each of the options on the chain. */
15810 if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
15811 force_pragma))
15812 return error_mark_node;
15814 /* If some option was set (even if it has not changed), rerun
15815 s390_option_override_internal, and then save the options away. */
15816 if (new_opts_set.x_target_flags
15817 || new_opts_set.x_s390_arch
15818 || new_opts_set.x_s390_tune
15819 || new_opts_set.x_s390_stack_guard
15820 || new_opts_set.x_s390_stack_size
15821 || new_opts_set.x_s390_branch_cost
15822 || new_opts_set.x_s390_warn_framesize
15823 || new_opts_set.x_s390_warn_dynamicstack_p)
15825 const unsigned char *src = (const unsigned char *)opts_set;
15826 unsigned char *dest = (unsigned char *)&new_opts_set;
15827 unsigned int i;
15829 /* Merge the original option flags into the new ones. */
15830 for (i = 0; i < sizeof(*opts_set); i++)
15831 dest[i] |= src[i];
15833 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
15834 s390_option_override_internal (opts, &new_opts_set);
15835 /* Save the current options unless we are validating options for
15836 #pragma. */
15837 t = build_target_option_node (opts, &new_opts_set);
15839 return t;
15842 /* Hook to validate attribute((target("string"))). */
15844 static bool
15845 s390_valid_target_attribute_p (tree fndecl,
15846 tree ARG_UNUSED (name),
15847 tree args,
15848 int ARG_UNUSED (flags))
15850 struct gcc_options func_options, func_options_set;
15851 tree new_target, new_optimize;
15852 bool ret = true;
15854 /* attribute((target("default"))) does nothing, beyond
15855 affecting multi-versioning. */
15856 if (TREE_VALUE (args)
15857 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
15858 && TREE_CHAIN (args) == NULL_TREE
15859 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
15860 return true;
15862 tree old_optimize
15863 = build_optimization_node (&global_options, &global_options_set);
15865 /* Get the optimization options of the current function. */
15866 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
15868 if (!func_optimize)
15869 func_optimize = old_optimize;
15871 /* Init func_options. */
15872 memset (&func_options, 0, sizeof (func_options));
15873 init_options_struct (&func_options, NULL);
15874 lang_hooks.init_options_struct (&func_options);
15875 memset (&func_options_set, 0, sizeof (func_options_set));
15877 cl_optimization_restore (&func_options, &func_options_set,
15878 TREE_OPTIMIZATION (func_optimize));
15880 /* Initialize func_options to the default before its target options can
15881 be set. */
15882 cl_target_option_restore (&func_options, &func_options_set,
15883 TREE_TARGET_OPTION (target_option_default_node));
15885 new_target = s390_valid_target_attribute_tree (args, &func_options,
15886 &global_options_set,
15887 (args ==
15888 current_target_pragma));
15889 new_optimize = build_optimization_node (&func_options, &func_options_set);
15890 if (new_target == error_mark_node)
15891 ret = false;
15892 else if (fndecl && new_target)
15894 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
15895 if (old_optimize != new_optimize)
15896 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
15898 return ret;
15901 /* Hook to determine if one function can safely inline another. */
15903 static bool
15904 s390_can_inline_p (tree caller, tree callee)
15906 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
15907 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
15909 if (!callee_tree)
15910 callee_tree = target_option_default_node;
15911 if (!caller_tree)
15912 caller_tree = target_option_default_node;
15913 if (callee_tree == caller_tree)
15914 return true;
15916 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
15917 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
15918 bool ret = true;
15920 if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))
15921 != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)))
15922 ret = false;
15924 /* Don't inline functions to be compiled for a more recent arch into a
15925 function for an older arch. */
15926 else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
15927 ret = false;
15929 /* Inlining a hard float function into a soft float function is only
15930 allowed if the hard float function doesn't actually make use of
15931 floating point.
15933 We are called from FEs for multi-versioning call optimization, so
15934 beware of ipa_fn_summaries not available. */
15935 else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags)
15936 && !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags))
15937 || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags)
15938 && TARGET_HARD_DFP_P (callee_opts->x_target_flags)))
15939 && (! ipa_fn_summaries
15940 || ipa_fn_summaries->get
15941 (cgraph_node::get (callee))->fp_expressions))
15942 ret = false;
15944 return ret;
15946 #endif
15948 /* Set VAL to correct enum value according to the indirect-branch or
15949 function-return attribute in ATTR. */
15951 static inline void
15952 s390_indirect_branch_attrvalue (tree attr, enum indirect_branch *val)
15954 const char *str = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
15955 if (strcmp (str, "keep") == 0)
15956 *val = indirect_branch_keep;
15957 else if (strcmp (str, "thunk") == 0)
15958 *val = indirect_branch_thunk;
15959 else if (strcmp (str, "thunk-inline") == 0)
15960 *val = indirect_branch_thunk_inline;
15961 else if (strcmp (str, "thunk-extern") == 0)
15962 *val = indirect_branch_thunk_extern;
15965 /* Memorize the setting for -mindirect-branch* and -mfunction-return*
15966 from either the cmdline or the function attributes in
15967 cfun->machine. */
15969 static void
15970 s390_indirect_branch_settings (tree fndecl)
15972 tree attr;
15974 if (!fndecl)
15975 return;
15977 /* Initialize with the cmdline options and let the attributes
15978 override it. */
15979 cfun->machine->indirect_branch_jump = s390_indirect_branch_jump;
15980 cfun->machine->indirect_branch_call = s390_indirect_branch_call;
15982 cfun->machine->function_return_reg = s390_function_return_reg;
15983 cfun->machine->function_return_mem = s390_function_return_mem;
15985 if ((attr = lookup_attribute ("indirect_branch",
15986 DECL_ATTRIBUTES (fndecl))))
15988 s390_indirect_branch_attrvalue (attr,
15989 &cfun->machine->indirect_branch_jump);
15990 s390_indirect_branch_attrvalue (attr,
15991 &cfun->machine->indirect_branch_call);
15994 if ((attr = lookup_attribute ("indirect_branch_jump",
15995 DECL_ATTRIBUTES (fndecl))))
15996 s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_jump);
15998 if ((attr = lookup_attribute ("indirect_branch_call",
15999 DECL_ATTRIBUTES (fndecl))))
16000 s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_call);
16002 if ((attr = lookup_attribute ("function_return",
16003 DECL_ATTRIBUTES (fndecl))))
16005 s390_indirect_branch_attrvalue (attr,
16006 &cfun->machine->function_return_reg);
16007 s390_indirect_branch_attrvalue (attr,
16008 &cfun->machine->function_return_mem);
16011 if ((attr = lookup_attribute ("function_return_reg",
16012 DECL_ATTRIBUTES (fndecl))))
16013 s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_reg);
16015 if ((attr = lookup_attribute ("function_return_mem",
16016 DECL_ATTRIBUTES (fndecl))))
16017 s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_mem);
16020 #if S390_USE_TARGET_ATTRIBUTE
16021 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
16022 cache. */
16024 void
16025 s390_activate_target_options (tree new_tree)
16027 cl_target_option_restore (&global_options, &global_options_set,
16028 TREE_TARGET_OPTION (new_tree));
16029 if (TREE_TARGET_GLOBALS (new_tree))
16030 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
16031 else if (new_tree == target_option_default_node)
16032 restore_target_globals (&default_target_globals);
16033 else
16034 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
16035 s390_previous_fndecl = NULL_TREE;
16037 #endif
16039 /* Establish appropriate back-end context for processing the function
16040 FNDECL. The argument might be NULL to indicate processing at top
16041 level, outside of any function scope. */
16042 static void
16043 s390_set_current_function (tree fndecl)
16045 #if S390_USE_TARGET_ATTRIBUTE
16046 /* Only change the context if the function changes. This hook is called
16047 several times in the course of compiling a function, and we don't want to
16048 slow things down too much or call target_reinit when it isn't safe. */
16049 if (fndecl == s390_previous_fndecl)
16051 s390_indirect_branch_settings (fndecl);
16052 return;
16055 tree old_tree;
16056 if (s390_previous_fndecl == NULL_TREE)
16057 old_tree = target_option_current_node;
16058 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
16059 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
16060 else
16061 old_tree = target_option_default_node;
16063 if (fndecl == NULL_TREE)
16065 if (old_tree != target_option_current_node)
16066 s390_activate_target_options (target_option_current_node);
16067 return;
16070 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
16071 if (new_tree == NULL_TREE)
16072 new_tree = target_option_default_node;
16074 if (old_tree != new_tree)
16075 s390_activate_target_options (new_tree);
16076 s390_previous_fndecl = fndecl;
16077 #endif
16078 s390_indirect_branch_settings (fndecl);
16081 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
16083 static bool
16084 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
16085 unsigned int align ATTRIBUTE_UNUSED,
16086 enum by_pieces_operation op ATTRIBUTE_UNUSED,
16087 bool speed_p ATTRIBUTE_UNUSED)
16089 return (size == 1 || size == 2
16090 || size == 4 || (TARGET_ZARCH && size == 8));
16093 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
16095 static void
16096 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
16098 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
16099 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
16100 tree call_efpc = build_call_expr (efpc, 0);
16101 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
16103 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
16104 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
16105 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
16106 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
16107 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
16108 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
16110 /* Generates the equivalent of feholdexcept (&fenv_var)
16112 fenv_var = __builtin_s390_efpc ();
16113 __builtin_s390_sfpc (fenv_var & mask) */
16114 tree old_fpc = build4 (TARGET_EXPR, unsigned_type_node, fenv_var, call_efpc,
16115 NULL_TREE, NULL_TREE);
16116 tree new_fpc
16117 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
16118 build_int_cst (unsigned_type_node,
16119 ~(FPC_DXC_MASK | FPC_FLAGS_MASK
16120 | FPC_EXCEPTION_MASK)));
16121 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
16122 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
16124 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
16126 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
16127 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
16128 build_int_cst (unsigned_type_node,
16129 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
16130 *clear = build_call_expr (sfpc, 1, new_fpc);
16132 /* Generates the equivalent of feupdateenv (fenv_var)
16134 old_fpc = __builtin_s390_efpc ();
16135 __builtin_s390_sfpc (fenv_var);
16136 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
16138 old_fpc = create_tmp_var_raw (unsigned_type_node);
16139 tree store_old_fpc = build4 (TARGET_EXPR, void_type_node, old_fpc, call_efpc,
16140 NULL_TREE, NULL_TREE);
16142 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
16144 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
16145 build_int_cst (unsigned_type_node,
16146 FPC_FLAGS_MASK));
16147 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
16148 build_int_cst (unsigned_type_node,
16149 FPC_FLAGS_SHIFT));
16150 tree atomic_feraiseexcept
16151 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
16152 raise_old_except = build_call_expr (atomic_feraiseexcept,
16153 1, raise_old_except);
16155 *update = build2 (COMPOUND_EXPR, void_type_node,
16156 build2 (COMPOUND_EXPR, void_type_node,
16157 store_old_fpc, set_new_fpc),
16158 raise_old_except);
16160 #undef FPC_EXCEPTION_MASK
16161 #undef FPC_FLAGS_MASK
16162 #undef FPC_DXC_MASK
16163 #undef FPC_EXCEPTION_MASK_SHIFT
16164 #undef FPC_FLAGS_SHIFT
16165 #undef FPC_DXC_SHIFT
16168 /* Return the vector mode to be used for inner mode MODE when doing
16169 vectorization. */
16170 static machine_mode
16171 s390_preferred_simd_mode (scalar_mode mode)
16173 if (TARGET_VXE)
16174 switch (mode)
16176 case E_SFmode:
16177 return V4SFmode;
16178 default:;
16181 if (TARGET_VX)
16182 switch (mode)
16184 case E_DFmode:
16185 return V2DFmode;
16186 case E_DImode:
16187 return V2DImode;
16188 case E_SImode:
16189 return V4SImode;
16190 case E_HImode:
16191 return V8HImode;
16192 case E_QImode:
16193 return V16QImode;
16194 default:;
16196 return word_mode;
16199 /* Our hardware does not require vectors to be strictly aligned. */
16200 static bool
16201 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
16202 const_tree type ATTRIBUTE_UNUSED,
16203 int misalignment ATTRIBUTE_UNUSED,
16204 bool is_packed ATTRIBUTE_UNUSED)
16206 if (TARGET_VX)
16207 return true;
16209 return default_builtin_support_vector_misalignment (mode, type, misalignment,
16210 is_packed);
16213 /* The vector ABI requires vector types to be aligned on an 8 byte
16214 boundary (our stack alignment). However, we allow this to be
16215 overriden by the user, while this definitely breaks the ABI. */
16216 static HOST_WIDE_INT
16217 s390_vector_alignment (const_tree type)
16219 tree size = TYPE_SIZE (type);
16221 if (!TARGET_VX_ABI)
16222 return default_vector_alignment (type);
16224 if (TYPE_USER_ALIGN (type))
16225 return TYPE_ALIGN (type);
16227 if (tree_fits_uhwi_p (size)
16228 && tree_to_uhwi (size) < BIGGEST_ALIGNMENT)
16229 return tree_to_uhwi (size);
16231 return BIGGEST_ALIGNMENT;
16234 /* Implement TARGET_CONSTANT_ALIGNMENT. Alignment on even addresses for
16235 LARL instruction. */
16237 static HOST_WIDE_INT
16238 s390_constant_alignment (const_tree, HOST_WIDE_INT align)
16240 return MAX (align, 16);
16243 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16244 /* Implement TARGET_ASM_FILE_START. */
16245 static void
16246 s390_asm_file_start (void)
16248 default_file_start ();
16249 s390_asm_output_machine_for_arch (asm_out_file);
16251 #endif
16253 /* Implement TARGET_ASM_FILE_END. */
16254 static void
16255 s390_asm_file_end (void)
16257 #ifdef HAVE_AS_GNU_ATTRIBUTE
16258 varpool_node *vnode;
16259 cgraph_node *cnode;
16261 FOR_EACH_VARIABLE (vnode)
16262 if (TREE_PUBLIC (vnode->decl))
16263 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
16265 FOR_EACH_FUNCTION (cnode)
16266 if (TREE_PUBLIC (cnode->decl))
16267 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
16270 if (s390_vector_abi != 0)
16271 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
16272 s390_vector_abi);
16273 #endif
16274 file_end_indicate_exec_stack ();
16276 if (flag_split_stack)
16277 file_end_indicate_split_stack ();
16280 /* Return true if TYPE is a vector bool type. */
16281 static inline bool
16282 s390_vector_bool_type_p (const_tree type)
16284 return TYPE_VECTOR_OPAQUE (type);
16287 /* Return the diagnostic message string if the binary operation OP is
16288 not permitted on TYPE1 and TYPE2, NULL otherwise. */
16289 static const char*
16290 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
16292 bool bool1_p, bool2_p;
16293 bool plusminus_p;
16294 bool muldiv_p;
16295 bool compare_p;
16296 machine_mode mode1, mode2;
16298 if (!TARGET_ZVECTOR)
16299 return NULL;
16301 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
16302 return NULL;
16304 bool1_p = s390_vector_bool_type_p (type1);
16305 bool2_p = s390_vector_bool_type_p (type2);
16307 /* Mixing signed and unsigned types is forbidden for all
16308 operators. */
16309 if (!bool1_p && !bool2_p
16310 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
16311 return N_("types differ in signedness");
16313 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
16314 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
16315 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
16316 || op == ROUND_DIV_EXPR);
16317 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
16318 || op == EQ_EXPR || op == NE_EXPR);
16320 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
16321 return N_("binary operator does not support two vector bool operands");
16323 if (bool1_p != bool2_p && (muldiv_p || compare_p))
16324 return N_("binary operator does not support vector bool operand");
16326 mode1 = TYPE_MODE (type1);
16327 mode2 = TYPE_MODE (type2);
16329 if (bool1_p != bool2_p && plusminus_p
16330 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
16331 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
16332 return N_("binary operator does not support mixing vector "
16333 "bool with floating point vector operands");
16335 return NULL;
16338 /* Implement TARGET_C_EXCESS_PRECISION.
16340 FIXME: For historical reasons, float_t and double_t are typedef'ed to
16341 double on s390, causing operations on float_t to operate in a higher
16342 precision than is necessary. However, it is not the case that SFmode
16343 operations have implicit excess precision, and we generate more optimal
16344 code if we let the compiler know no implicit extra precision is added.
16346 That means when we are compiling with -fexcess-precision=fast, the value
16347 we set for FLT_EVAL_METHOD will be out of line with the actual precision of
16348 float_t (though they would be correct for -fexcess-precision=standard).
16350 A complete fix would modify glibc to remove the unnecessary typedef
16351 of float_t to double. */
16353 static enum flt_eval_method
16354 s390_excess_precision (enum excess_precision_type type)
16356 switch (type)
16358 case EXCESS_PRECISION_TYPE_IMPLICIT:
16359 case EXCESS_PRECISION_TYPE_FAST:
16360 /* The fastest type to promote to will always be the native type,
16361 whether that occurs with implicit excess precision or
16362 otherwise. */
16363 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
16364 case EXCESS_PRECISION_TYPE_STANDARD:
16365 /* Otherwise, when we are in a standards compliant mode, to
16366 ensure consistency with the implementation in glibc, report that
16367 float is evaluated to the range and precision of double. */
16368 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE;
16369 default:
16370 gcc_unreachable ();
16372 return FLT_EVAL_METHOD_UNPREDICTABLE;
16375 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
16377 static unsigned HOST_WIDE_INT
16378 s390_asan_shadow_offset (void)
16380 return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000);
16383 #ifdef HAVE_GAS_HIDDEN
16384 # define USE_HIDDEN_LINKONCE 1
16385 #else
16386 # define USE_HIDDEN_LINKONCE 0
16387 #endif
16389 /* Output an indirect branch trampoline for target register REGNO. */
16391 static void
16392 s390_output_indirect_thunk_function (unsigned int regno, bool z10_p)
16394 tree decl;
16395 char thunk_label[32];
16396 int i;
16398 if (z10_p)
16399 sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL, regno);
16400 else
16401 sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX,
16402 INDIRECT_BRANCH_THUNK_REGNUM, regno);
16404 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
16405 get_identifier (thunk_label),
16406 build_function_type_list (void_type_node, NULL_TREE));
16407 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
16408 NULL_TREE, void_type_node);
16409 TREE_PUBLIC (decl) = 1;
16410 TREE_STATIC (decl) = 1;
16411 DECL_IGNORED_P (decl) = 1;
16413 if (USE_HIDDEN_LINKONCE)
16415 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
16417 targetm.asm_out.unique_section (decl, 0);
16418 switch_to_section (get_named_section (decl, NULL, 0));
16420 targetm.asm_out.globalize_label (asm_out_file, thunk_label);
16421 fputs ("\t.hidden\t", asm_out_file);
16422 assemble_name (asm_out_file, thunk_label);
16423 putc ('\n', asm_out_file);
16424 ASM_DECLARE_FUNCTION_NAME (asm_out_file, thunk_label, decl);
16426 else
16428 switch_to_section (text_section);
16429 ASM_OUTPUT_LABEL (asm_out_file, thunk_label);
16432 DECL_INITIAL (decl) = make_node (BLOCK);
16433 current_function_decl = decl;
16434 allocate_struct_function (decl, false);
16435 init_function_start (decl);
16436 cfun->is_thunk = true;
16437 first_function_block_is_cold = false;
16438 final_start_function (emit_barrier (), asm_out_file, 1);
16440 /* This makes CFI at least usable for indirect jumps.
16442 Stopping in the thunk: backtrace will point to the thunk target
16443 is if it was interrupted by a signal. For a call this means that
16444 the call chain will be: caller->callee->thunk */
16445 if (flag_asynchronous_unwind_tables && flag_dwarf2_cfi_asm)
16447 fputs ("\t.cfi_signal_frame\n", asm_out_file);
16448 fprintf (asm_out_file, "\t.cfi_return_column %d\n", regno);
16449 for (i = 0; i < FPR15_REGNUM; i++)
16450 fprintf (asm_out_file, "\t.cfi_same_value %s\n", reg_names[i]);
16453 if (z10_p)
16455 /* exrl 0,1f */
16457 /* We generate a thunk for z10 compiled code although z10 is
16458 currently not enabled. Tell the assembler to accept the
16459 instruction. */
16460 if (!TARGET_CPU_Z10)
16462 fputs ("\t.machine push\n", asm_out_file);
16463 fputs ("\t.machine z10\n", asm_out_file);
16465 /* We use exrl even if -mzarch hasn't been specified on the
16466 command line so we have to tell the assembler to accept
16467 it. */
16468 if (!TARGET_ZARCH)
16469 fputs ("\t.machinemode zarch\n", asm_out_file);
16471 fputs ("\texrl\t0,1f\n", asm_out_file);
16473 if (!TARGET_ZARCH)
16474 fputs ("\t.machinemode esa\n", asm_out_file);
16476 if (!TARGET_CPU_Z10)
16477 fputs ("\t.machine pop\n", asm_out_file);
16479 else
16481 /* larl %r1,1f */
16482 fprintf (asm_out_file, "\tlarl\t%%r%d,1f\n",
16483 INDIRECT_BRANCH_THUNK_REGNUM);
16485 /* ex 0,0(%r1) */
16486 fprintf (asm_out_file, "\tex\t0,0(%%r%d)\n",
16487 INDIRECT_BRANCH_THUNK_REGNUM);
16490 /* 0: j 0b */
16491 fputs ("0:\tj\t0b\n", asm_out_file);
16493 /* 1: br <regno> */
16494 fprintf (asm_out_file, "1:\tbr\t%%r%d\n", regno);
16496 final_end_function ();
16497 init_insn_lengths ();
16498 free_after_compilation (cfun);
16499 set_cfun (NULL);
16500 current_function_decl = NULL;
16503 /* Implement the asm.code_end target hook. */
16505 static void
16506 s390_code_end (void)
16508 int i;
16510 for (i = 1; i < 16; i++)
16512 if (indirect_branch_z10thunk_mask & (1 << i))
16513 s390_output_indirect_thunk_function (i, true);
16515 if (indirect_branch_prez10thunk_mask & (1 << i))
16516 s390_output_indirect_thunk_function (i, false);
16519 if (TARGET_INDIRECT_BRANCH_TABLE)
16521 int o;
16522 int i;
16524 for (o = 0; o < INDIRECT_BRANCH_NUM_OPTIONS; o++)
16526 if (indirect_branch_table_label_no[o] == 0)
16527 continue;
16529 switch_to_section (get_section (indirect_branch_table_name[o],
16531 NULL_TREE));
16532 for (i = 0; i < indirect_branch_table_label_no[o]; i++)
16534 char label_start[32];
16536 ASM_GENERATE_INTERNAL_LABEL (label_start,
16537 indirect_branch_table_label[o], i);
16539 fputs ("\t.long\t", asm_out_file);
16540 assemble_name_raw (asm_out_file, label_start);
16541 fputs ("-.\n", asm_out_file);
16543 switch_to_section (current_function_section ());
16548 /* Implement the TARGET_CASE_VALUES_THRESHOLD target hook. */
16550 unsigned int
16551 s390_case_values_threshold (void)
16553 /* Disabling branch prediction for indirect jumps makes jump tables
16554 much more expensive. */
16555 if (TARGET_INDIRECT_BRANCH_NOBP_JUMP)
16556 return 20;
16558 return default_case_values_threshold ();
16561 /* Evaluate the insns between HEAD and TAIL and do back-end to install
16562 back-end specific dependencies.
16564 Establish an ANTI dependency between r11 and r15 restores from FPRs
16565 to prevent the instructions scheduler from reordering them since
16566 this would break CFI. No further handling in the sched_reorder
16567 hook is required since the r11 and r15 restore will never appear in
16568 the same ready list with that change. */
16569 void
16570 s390_sched_dependencies_evaluation (rtx_insn *head, rtx_insn *tail)
16572 if (!frame_pointer_needed || !epilogue_completed)
16573 return;
16575 while (head != tail && DEBUG_INSN_P (head))
16576 head = NEXT_INSN (head);
16578 rtx_insn *r15_restore = NULL, *r11_restore = NULL;
16580 for (rtx_insn *insn = tail; insn != head; insn = PREV_INSN (insn))
16582 rtx set = single_set (insn);
16583 if (!INSN_P (insn)
16584 || !RTX_FRAME_RELATED_P (insn)
16585 || set == NULL_RTX
16586 || !REG_P (SET_DEST (set))
16587 || !FP_REG_P (SET_SRC (set)))
16588 continue;
16590 if (REGNO (SET_DEST (set)) == HARD_FRAME_POINTER_REGNUM)
16591 r11_restore = insn;
16593 if (REGNO (SET_DEST (set)) == STACK_POINTER_REGNUM)
16594 r15_restore = insn;
16597 if (r11_restore == NULL || r15_restore == NULL)
16598 return;
16599 add_dependence (r11_restore, r15_restore, REG_DEP_ANTI);
16602 /* Implement TARGET_SHIFT_TRUNCATION_MASK for integer shifts. */
16604 static unsigned HOST_WIDE_INT
16605 s390_shift_truncation_mask (machine_mode mode)
16607 return mode == DImode || mode == SImode ? 63 : 0;
16610 /* Initialize GCC target structure. */
16612 #undef TARGET_ASM_ALIGNED_HI_OP
16613 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
16614 #undef TARGET_ASM_ALIGNED_DI_OP
16615 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
16616 #undef TARGET_ASM_INTEGER
16617 #define TARGET_ASM_INTEGER s390_assemble_integer
16619 #undef TARGET_ASM_OPEN_PAREN
16620 #define TARGET_ASM_OPEN_PAREN ""
16622 #undef TARGET_ASM_CLOSE_PAREN
16623 #define TARGET_ASM_CLOSE_PAREN ""
16625 #undef TARGET_OPTION_OVERRIDE
16626 #define TARGET_OPTION_OVERRIDE s390_option_override
16628 #ifdef TARGET_THREAD_SSP_OFFSET
16629 #undef TARGET_STACK_PROTECT_GUARD
16630 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
16631 #endif
16633 #undef TARGET_ENCODE_SECTION_INFO
16634 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
16636 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16637 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16639 #ifdef HAVE_AS_TLS
16640 #undef TARGET_HAVE_TLS
16641 #define TARGET_HAVE_TLS true
16642 #endif
16643 #undef TARGET_CANNOT_FORCE_CONST_MEM
16644 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
16646 #undef TARGET_DELEGITIMIZE_ADDRESS
16647 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
16649 #undef TARGET_LEGITIMIZE_ADDRESS
16650 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
16652 #undef TARGET_RETURN_IN_MEMORY
16653 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
16655 #undef TARGET_INIT_BUILTINS
16656 #define TARGET_INIT_BUILTINS s390_init_builtins
16657 #undef TARGET_EXPAND_BUILTIN
16658 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
16659 #undef TARGET_BUILTIN_DECL
16660 #define TARGET_BUILTIN_DECL s390_builtin_decl
16662 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
16663 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
16665 #undef TARGET_ASM_OUTPUT_MI_THUNK
16666 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
16667 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
16668 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
16670 #undef TARGET_C_EXCESS_PRECISION
16671 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
16673 #undef TARGET_SCHED_ADJUST_PRIORITY
16674 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
16675 #undef TARGET_SCHED_ISSUE_RATE
16676 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
16677 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
16678 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
16680 #undef TARGET_SCHED_VARIABLE_ISSUE
16681 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
16682 #undef TARGET_SCHED_REORDER
16683 #define TARGET_SCHED_REORDER s390_sched_reorder
16684 #undef TARGET_SCHED_INIT
16685 #define TARGET_SCHED_INIT s390_sched_init
16687 #undef TARGET_CANNOT_COPY_INSN_P
16688 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
16689 #undef TARGET_RTX_COSTS
16690 #define TARGET_RTX_COSTS s390_rtx_costs
16691 #undef TARGET_ADDRESS_COST
16692 #define TARGET_ADDRESS_COST s390_address_cost
16693 #undef TARGET_REGISTER_MOVE_COST
16694 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
16695 #undef TARGET_MEMORY_MOVE_COST
16696 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
16697 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
16698 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
16699 s390_builtin_vectorization_cost
16701 #undef TARGET_MACHINE_DEPENDENT_REORG
16702 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
16704 #undef TARGET_VALID_POINTER_MODE
16705 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
16707 #undef TARGET_BUILD_BUILTIN_VA_LIST
16708 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
16709 #undef TARGET_EXPAND_BUILTIN_VA_START
16710 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
16711 #undef TARGET_ASAN_SHADOW_OFFSET
16712 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
16713 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
16714 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
16716 #undef TARGET_PROMOTE_FUNCTION_MODE
16717 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
16718 #undef TARGET_PASS_BY_REFERENCE
16719 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
16721 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
16722 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE s390_override_options_after_change
16724 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
16725 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
16726 #undef TARGET_FUNCTION_ARG
16727 #define TARGET_FUNCTION_ARG s390_function_arg
16728 #undef TARGET_FUNCTION_ARG_ADVANCE
16729 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
16730 #undef TARGET_FUNCTION_ARG_PADDING
16731 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
16732 #undef TARGET_FUNCTION_VALUE
16733 #define TARGET_FUNCTION_VALUE s390_function_value
16734 #undef TARGET_LIBCALL_VALUE
16735 #define TARGET_LIBCALL_VALUE s390_libcall_value
16736 #undef TARGET_STRICT_ARGUMENT_NAMING
16737 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
16739 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
16740 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
16742 #undef TARGET_FIXED_CONDITION_CODE_REGS
16743 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
16745 #undef TARGET_CC_MODES_COMPATIBLE
16746 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
16748 #undef TARGET_INVALID_WITHIN_DOLOOP
16749 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
16751 #ifdef HAVE_AS_TLS
16752 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
16753 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
16754 #endif
16756 #undef TARGET_DWARF_FRAME_REG_MODE
16757 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
16759 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
16760 #undef TARGET_MANGLE_TYPE
16761 #define TARGET_MANGLE_TYPE s390_mangle_type
16762 #endif
16764 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16765 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16767 #undef TARGET_VECTOR_MODE_SUPPORTED_P
16768 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
16770 #undef TARGET_PREFERRED_RELOAD_CLASS
16771 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
16773 #undef TARGET_SECONDARY_RELOAD
16774 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
16775 #undef TARGET_SECONDARY_MEMORY_NEEDED
16776 #define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed
16777 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
16778 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode
16780 #undef TARGET_LIBGCC_CMP_RETURN_MODE
16781 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
16783 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
16784 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
16786 #undef TARGET_LEGITIMATE_ADDRESS_P
16787 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
16789 #undef TARGET_LEGITIMATE_CONSTANT_P
16790 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
16792 #undef TARGET_LRA_P
16793 #define TARGET_LRA_P s390_lra_p
16795 #undef TARGET_CAN_ELIMINATE
16796 #define TARGET_CAN_ELIMINATE s390_can_eliminate
16798 #undef TARGET_CONDITIONAL_REGISTER_USAGE
16799 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
16801 #undef TARGET_LOOP_UNROLL_ADJUST
16802 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
16804 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
16805 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
16806 #undef TARGET_TRAMPOLINE_INIT
16807 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
16809 /* PR 79421 */
16810 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
16811 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
16813 #undef TARGET_UNWIND_WORD_MODE
16814 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
16816 #undef TARGET_CANONICALIZE_COMPARISON
16817 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
16819 #undef TARGET_HARD_REGNO_SCRATCH_OK
16820 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
16822 #undef TARGET_HARD_REGNO_NREGS
16823 #define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs
16824 #undef TARGET_HARD_REGNO_MODE_OK
16825 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
16826 #undef TARGET_MODES_TIEABLE_P
16827 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
16829 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
16830 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
16831 s390_hard_regno_call_part_clobbered
16833 #undef TARGET_ATTRIBUTE_TABLE
16834 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
16836 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
16837 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
16839 #undef TARGET_SET_UP_BY_PROLOGUE
16840 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
16842 #undef TARGET_EXTRA_LIVE_ON_ENTRY
16843 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
16845 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
16846 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
16847 s390_use_by_pieces_infrastructure_p
16849 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
16850 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
16852 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
16853 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
16855 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
16856 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
16858 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
16859 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
16861 #undef TARGET_VECTOR_ALIGNMENT
16862 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
16864 #undef TARGET_INVALID_BINARY_OP
16865 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
16867 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16868 #undef TARGET_ASM_FILE_START
16869 #define TARGET_ASM_FILE_START s390_asm_file_start
16870 #endif
16872 #undef TARGET_ASM_FILE_END
16873 #define TARGET_ASM_FILE_END s390_asm_file_end
16875 #undef TARGET_SET_CURRENT_FUNCTION
16876 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
16878 #if S390_USE_TARGET_ATTRIBUTE
16879 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
16880 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
16882 #undef TARGET_CAN_INLINE_P
16883 #define TARGET_CAN_INLINE_P s390_can_inline_p
16884 #endif
16886 #undef TARGET_OPTION_RESTORE
16887 #define TARGET_OPTION_RESTORE s390_function_specific_restore
16889 #undef TARGET_CAN_CHANGE_MODE_CLASS
16890 #define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class
16892 #undef TARGET_CONSTANT_ALIGNMENT
16893 #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
16895 #undef TARGET_ASM_CODE_END
16896 #define TARGET_ASM_CODE_END s390_code_end
16898 #undef TARGET_CASE_VALUES_THRESHOLD
16899 #define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold
16901 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
16902 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
16903 s390_sched_dependencies_evaluation
16905 #undef TARGET_SHIFT_TRUNCATION_MASK
16906 #define TARGET_SHIFT_TRUNCATION_MASK s390_shift_truncation_mask
16908 /* Use only short displacement, since long displacement is not available for
16909 the floating point instructions. */
16910 #undef TARGET_MAX_ANCHOR_OFFSET
16911 #define TARGET_MAX_ANCHOR_OFFSET 0xfff
16913 struct gcc_target targetm = TARGET_INITIALIZER;
16915 #include "gt-s390.h"