IBM Z: Prevent mach optimization on doloop patterns
[official-gcc.git] / gcc / config / s390 / s390.c
blobbd49a897c76f2969e14ff97aa36eca4ddfb22f5e
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2020 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "target-globals.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "gimple.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "tm_p.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "expmed.h"
42 #include "optabs.h"
43 #include "regs.h"
44 #include "emit-rtl.h"
45 #include "recog.h"
46 #include "cgraph.h"
47 #include "diagnostic-core.h"
48 #include "diagnostic.h"
49 #include "alias.h"
50 #include "fold-const.h"
51 #include "print-tree.h"
52 #include "stor-layout.h"
53 #include "varasm.h"
54 #include "calls.h"
55 #include "conditions.h"
56 #include "output.h"
57 #include "insn-attr.h"
58 #include "flags.h"
59 #include "except.h"
60 #include "dojump.h"
61 #include "explow.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "reload.h"
65 #include "cfgrtl.h"
66 #include "cfganal.h"
67 #include "lcm.h"
68 #include "cfgbuild.h"
69 #include "cfgcleanup.h"
70 #include "debug.h"
71 #include "langhooks.h"
72 #include "internal-fn.h"
73 #include "gimple-fold.h"
74 #include "tree-eh.h"
75 #include "gimplify.h"
76 #include "opts.h"
77 #include "tree-pass.h"
78 #include "context.h"
79 #include "builtins.h"
80 #include "rtl-iter.h"
81 #include "intl.h"
82 #include "tm-constrs.h"
83 #include "tree-vrp.h"
84 #include "symbol-summary.h"
85 #include "ipa-prop.h"
86 #include "ipa-fnsummary.h"
87 #include "sched-int.h"
89 /* This file should be included last. */
90 #include "target-def.h"
92 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode);
94 /* Remember the last target of s390_set_current_function. */
95 static GTY(()) tree s390_previous_fndecl;
97 /* Define the specific costs for a given cpu. */
99 struct processor_costs
101 /* multiplication */
102 const int m; /* cost of an M instruction. */
103 const int mghi; /* cost of an MGHI instruction. */
104 const int mh; /* cost of an MH instruction. */
105 const int mhi; /* cost of an MHI instruction. */
106 const int ml; /* cost of an ML instruction. */
107 const int mr; /* cost of an MR instruction. */
108 const int ms; /* cost of an MS instruction. */
109 const int msg; /* cost of an MSG instruction. */
110 const int msgf; /* cost of an MSGF instruction. */
111 const int msgfr; /* cost of an MSGFR instruction. */
112 const int msgr; /* cost of an MSGR instruction. */
113 const int msr; /* cost of an MSR instruction. */
114 const int mult_df; /* cost of multiplication in DFmode. */
115 const int mxbr;
116 /* square root */
117 const int sqxbr; /* cost of square root in TFmode. */
118 const int sqdbr; /* cost of square root in DFmode. */
119 const int sqebr; /* cost of square root in SFmode. */
120 /* multiply and add */
121 const int madbr; /* cost of multiply and add in DFmode. */
122 const int maebr; /* cost of multiply and add in SFmode. */
123 /* division */
124 const int dxbr;
125 const int ddbr;
126 const int debr;
127 const int dlgr;
128 const int dlr;
129 const int dr;
130 const int dsgfr;
131 const int dsgr;
134 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
136 static const
137 struct processor_costs z900_cost =
139 COSTS_N_INSNS (5), /* M */
140 COSTS_N_INSNS (10), /* MGHI */
141 COSTS_N_INSNS (5), /* MH */
142 COSTS_N_INSNS (4), /* MHI */
143 COSTS_N_INSNS (5), /* ML */
144 COSTS_N_INSNS (5), /* MR */
145 COSTS_N_INSNS (4), /* MS */
146 COSTS_N_INSNS (15), /* MSG */
147 COSTS_N_INSNS (7), /* MSGF */
148 COSTS_N_INSNS (7), /* MSGFR */
149 COSTS_N_INSNS (10), /* MSGR */
150 COSTS_N_INSNS (4), /* MSR */
151 COSTS_N_INSNS (7), /* multiplication in DFmode */
152 COSTS_N_INSNS (13), /* MXBR */
153 COSTS_N_INSNS (136), /* SQXBR */
154 COSTS_N_INSNS (44), /* SQDBR */
155 COSTS_N_INSNS (35), /* SQEBR */
156 COSTS_N_INSNS (18), /* MADBR */
157 COSTS_N_INSNS (13), /* MAEBR */
158 COSTS_N_INSNS (134), /* DXBR */
159 COSTS_N_INSNS (30), /* DDBR */
160 COSTS_N_INSNS (27), /* DEBR */
161 COSTS_N_INSNS (220), /* DLGR */
162 COSTS_N_INSNS (34), /* DLR */
163 COSTS_N_INSNS (34), /* DR */
164 COSTS_N_INSNS (32), /* DSGFR */
165 COSTS_N_INSNS (32), /* DSGR */
168 static const
169 struct processor_costs z990_cost =
171 COSTS_N_INSNS (4), /* M */
172 COSTS_N_INSNS (2), /* MGHI */
173 COSTS_N_INSNS (2), /* MH */
174 COSTS_N_INSNS (2), /* MHI */
175 COSTS_N_INSNS (4), /* ML */
176 COSTS_N_INSNS (4), /* MR */
177 COSTS_N_INSNS (5), /* MS */
178 COSTS_N_INSNS (6), /* MSG */
179 COSTS_N_INSNS (4), /* MSGF */
180 COSTS_N_INSNS (4), /* MSGFR */
181 COSTS_N_INSNS (4), /* MSGR */
182 COSTS_N_INSNS (4), /* MSR */
183 COSTS_N_INSNS (1), /* multiplication in DFmode */
184 COSTS_N_INSNS (28), /* MXBR */
185 COSTS_N_INSNS (130), /* SQXBR */
186 COSTS_N_INSNS (66), /* SQDBR */
187 COSTS_N_INSNS (38), /* SQEBR */
188 COSTS_N_INSNS (1), /* MADBR */
189 COSTS_N_INSNS (1), /* MAEBR */
190 COSTS_N_INSNS (60), /* DXBR */
191 COSTS_N_INSNS (40), /* DDBR */
192 COSTS_N_INSNS (26), /* DEBR */
193 COSTS_N_INSNS (176), /* DLGR */
194 COSTS_N_INSNS (31), /* DLR */
195 COSTS_N_INSNS (31), /* DR */
196 COSTS_N_INSNS (31), /* DSGFR */
197 COSTS_N_INSNS (31), /* DSGR */
200 static const
201 struct processor_costs z9_109_cost =
203 COSTS_N_INSNS (4), /* M */
204 COSTS_N_INSNS (2), /* MGHI */
205 COSTS_N_INSNS (2), /* MH */
206 COSTS_N_INSNS (2), /* MHI */
207 COSTS_N_INSNS (4), /* ML */
208 COSTS_N_INSNS (4), /* MR */
209 COSTS_N_INSNS (5), /* MS */
210 COSTS_N_INSNS (6), /* MSG */
211 COSTS_N_INSNS (4), /* MSGF */
212 COSTS_N_INSNS (4), /* MSGFR */
213 COSTS_N_INSNS (4), /* MSGR */
214 COSTS_N_INSNS (4), /* MSR */
215 COSTS_N_INSNS (1), /* multiplication in DFmode */
216 COSTS_N_INSNS (28), /* MXBR */
217 COSTS_N_INSNS (130), /* SQXBR */
218 COSTS_N_INSNS (66), /* SQDBR */
219 COSTS_N_INSNS (38), /* SQEBR */
220 COSTS_N_INSNS (1), /* MADBR */
221 COSTS_N_INSNS (1), /* MAEBR */
222 COSTS_N_INSNS (60), /* DXBR */
223 COSTS_N_INSNS (40), /* DDBR */
224 COSTS_N_INSNS (26), /* DEBR */
225 COSTS_N_INSNS (30), /* DLGR */
226 COSTS_N_INSNS (23), /* DLR */
227 COSTS_N_INSNS (23), /* DR */
228 COSTS_N_INSNS (24), /* DSGFR */
229 COSTS_N_INSNS (24), /* DSGR */
232 static const
233 struct processor_costs z10_cost =
235 COSTS_N_INSNS (10), /* M */
236 COSTS_N_INSNS (10), /* MGHI */
237 COSTS_N_INSNS (10), /* MH */
238 COSTS_N_INSNS (10), /* MHI */
239 COSTS_N_INSNS (10), /* ML */
240 COSTS_N_INSNS (10), /* MR */
241 COSTS_N_INSNS (10), /* MS */
242 COSTS_N_INSNS (10), /* MSG */
243 COSTS_N_INSNS (10), /* MSGF */
244 COSTS_N_INSNS (10), /* MSGFR */
245 COSTS_N_INSNS (10), /* MSGR */
246 COSTS_N_INSNS (10), /* MSR */
247 COSTS_N_INSNS (1) , /* multiplication in DFmode */
248 COSTS_N_INSNS (50), /* MXBR */
249 COSTS_N_INSNS (120), /* SQXBR */
250 COSTS_N_INSNS (52), /* SQDBR */
251 COSTS_N_INSNS (38), /* SQEBR */
252 COSTS_N_INSNS (1), /* MADBR */
253 COSTS_N_INSNS (1), /* MAEBR */
254 COSTS_N_INSNS (111), /* DXBR */
255 COSTS_N_INSNS (39), /* DDBR */
256 COSTS_N_INSNS (32), /* DEBR */
257 COSTS_N_INSNS (160), /* DLGR */
258 COSTS_N_INSNS (71), /* DLR */
259 COSTS_N_INSNS (71), /* DR */
260 COSTS_N_INSNS (71), /* DSGFR */
261 COSTS_N_INSNS (71), /* DSGR */
264 static const
265 struct processor_costs z196_cost =
267 COSTS_N_INSNS (7), /* M */
268 COSTS_N_INSNS (5), /* MGHI */
269 COSTS_N_INSNS (5), /* MH */
270 COSTS_N_INSNS (5), /* MHI */
271 COSTS_N_INSNS (7), /* ML */
272 COSTS_N_INSNS (7), /* MR */
273 COSTS_N_INSNS (6), /* MS */
274 COSTS_N_INSNS (8), /* MSG */
275 COSTS_N_INSNS (6), /* MSGF */
276 COSTS_N_INSNS (6), /* MSGFR */
277 COSTS_N_INSNS (8), /* MSGR */
278 COSTS_N_INSNS (6), /* MSR */
279 COSTS_N_INSNS (1) , /* multiplication in DFmode */
280 COSTS_N_INSNS (40), /* MXBR B+40 */
281 COSTS_N_INSNS (100), /* SQXBR B+100 */
282 COSTS_N_INSNS (42), /* SQDBR B+42 */
283 COSTS_N_INSNS (28), /* SQEBR B+28 */
284 COSTS_N_INSNS (1), /* MADBR B */
285 COSTS_N_INSNS (1), /* MAEBR B */
286 COSTS_N_INSNS (101), /* DXBR B+101 */
287 COSTS_N_INSNS (29), /* DDBR */
288 COSTS_N_INSNS (22), /* DEBR */
289 COSTS_N_INSNS (160), /* DLGR cracked */
290 COSTS_N_INSNS (160), /* DLR cracked */
291 COSTS_N_INSNS (160), /* DR expanded */
292 COSTS_N_INSNS (160), /* DSGFR cracked */
293 COSTS_N_INSNS (160), /* DSGR cracked */
296 static const
297 struct processor_costs zEC12_cost =
299 COSTS_N_INSNS (7), /* M */
300 COSTS_N_INSNS (5), /* MGHI */
301 COSTS_N_INSNS (5), /* MH */
302 COSTS_N_INSNS (5), /* MHI */
303 COSTS_N_INSNS (7), /* ML */
304 COSTS_N_INSNS (7), /* MR */
305 COSTS_N_INSNS (6), /* MS */
306 COSTS_N_INSNS (8), /* MSG */
307 COSTS_N_INSNS (6), /* MSGF */
308 COSTS_N_INSNS (6), /* MSGFR */
309 COSTS_N_INSNS (8), /* MSGR */
310 COSTS_N_INSNS (6), /* MSR */
311 COSTS_N_INSNS (1) , /* multiplication in DFmode */
312 COSTS_N_INSNS (40), /* MXBR B+40 */
313 COSTS_N_INSNS (100), /* SQXBR B+100 */
314 COSTS_N_INSNS (42), /* SQDBR B+42 */
315 COSTS_N_INSNS (28), /* SQEBR B+28 */
316 COSTS_N_INSNS (1), /* MADBR B */
317 COSTS_N_INSNS (1), /* MAEBR B */
318 COSTS_N_INSNS (131), /* DXBR B+131 */
319 COSTS_N_INSNS (29), /* DDBR */
320 COSTS_N_INSNS (22), /* DEBR */
321 COSTS_N_INSNS (160), /* DLGR cracked */
322 COSTS_N_INSNS (160), /* DLR cracked */
323 COSTS_N_INSNS (160), /* DR expanded */
324 COSTS_N_INSNS (160), /* DSGFR cracked */
325 COSTS_N_INSNS (160), /* DSGR cracked */
328 const struct s390_processor processor_table[] =
330 { "z900", "z900", PROCESSOR_2064_Z900, &z900_cost, 5 },
331 { "z990", "z990", PROCESSOR_2084_Z990, &z990_cost, 6 },
332 { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost, 7 },
333 { "z9-ec", "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost, 7 },
334 { "z10", "z10", PROCESSOR_2097_Z10, &z10_cost, 8 },
335 { "z196", "z196", PROCESSOR_2817_Z196, &z196_cost, 9 },
336 { "zEC12", "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost, 10 },
337 { "z13", "z13", PROCESSOR_2964_Z13, &zEC12_cost, 11 },
338 { "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost, 12 },
339 { "z15", "arch13", PROCESSOR_8561_Z15, &zEC12_cost, 13 },
340 { "native", "", PROCESSOR_NATIVE, NULL, 0 }
343 extern int reload_completed;
345 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
346 static rtx_insn *last_scheduled_insn;
347 #define NUM_SIDES 2
349 #define MAX_SCHED_UNITS 4
350 static int last_scheduled_unit_distance[MAX_SCHED_UNITS][NUM_SIDES];
352 /* Estimate of number of cycles a long-running insn occupies an
353 execution unit. */
354 static int fxd_longrunning[NUM_SIDES];
355 static int fpd_longrunning[NUM_SIDES];
357 /* The maximum score added for an instruction whose unit hasn't been
358 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
359 give instruction mix scheduling more priority over instruction
360 grouping. */
361 #define MAX_SCHED_MIX_SCORE 2
363 /* The maximum distance up to which individual scores will be
364 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
365 Increase this with the OOO windows size of the machine. */
366 #define MAX_SCHED_MIX_DISTANCE 70
368 /* Structure used to hold the components of a S/390 memory
369 address. A legitimate address on S/390 is of the general
370 form
371 base + index + displacement
372 where any of the components is optional.
374 base and index are registers of the class ADDR_REGS,
375 displacement is an unsigned 12-bit immediate constant. */
377 /* The max number of insns of backend generated memset/memcpy/memcmp
378 loops. This value is used in the unroll adjust hook to detect such
379 loops. Current max is 9 coming from the memcmp loop. */
380 #define BLOCK_MEM_OPS_LOOP_INSNS 9
382 struct s390_address
384 rtx base;
385 rtx indx;
386 rtx disp;
387 bool pointer;
388 bool literal_pool;
391 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
393 #define cfun_frame_layout (cfun->machine->frame_layout)
394 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
395 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
396 ? cfun_frame_layout.fpr_bitmap & 0x0f \
397 : cfun_frame_layout.fpr_bitmap & 0x03))
398 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
399 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
400 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
401 (1 << (REGNO - FPR0_REGNUM)))
402 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
403 (1 << (REGNO - FPR0_REGNUM))))
404 #define cfun_gpr_save_slot(REGNO) \
405 cfun->machine->frame_layout.gpr_save_slots[REGNO]
407 /* Number of GPRs and FPRs used for argument passing. */
408 #define GP_ARG_NUM_REG 5
409 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
410 #define VEC_ARG_NUM_REG 8
412 /* A couple of shortcuts. */
413 #define CONST_OK_FOR_J(x) \
414 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
415 #define CONST_OK_FOR_K(x) \
416 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
417 #define CONST_OK_FOR_Os(x) \
418 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
419 #define CONST_OK_FOR_Op(x) \
420 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
421 #define CONST_OK_FOR_On(x) \
422 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
424 #define REGNO_PAIR_OK(REGNO, MODE) \
425 (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
427 /* That's the read ahead of the dynamic branch prediction unit in
428 bytes on a z10 (or higher) CPU. */
429 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
431 /* Masks per jump target register indicating which thunk need to be
432 generated. */
433 static GTY(()) int indirect_branch_prez10thunk_mask = 0;
434 static GTY(()) int indirect_branch_z10thunk_mask = 0;
436 #define INDIRECT_BRANCH_NUM_OPTIONS 4
438 enum s390_indirect_branch_option
440 s390_opt_indirect_branch_jump = 0,
441 s390_opt_indirect_branch_call,
442 s390_opt_function_return_reg,
443 s390_opt_function_return_mem
446 static GTY(()) int indirect_branch_table_label_no[INDIRECT_BRANCH_NUM_OPTIONS] = { 0 };
447 const char *indirect_branch_table_label[INDIRECT_BRANCH_NUM_OPTIONS] = \
448 { "LJUMP", "LCALL", "LRETREG", "LRETMEM" };
449 const char *indirect_branch_table_name[INDIRECT_BRANCH_NUM_OPTIONS] = \
450 { ".s390_indirect_jump", ".s390_indirect_call",
451 ".s390_return_reg", ".s390_return_mem" };
453 bool
454 s390_return_addr_from_memory ()
456 return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
459 /* Indicate which ABI has been used for passing vector args.
460 0 - no vector type arguments have been passed where the ABI is relevant
461 1 - the old ABI has been used
462 2 - a vector type argument has been passed either in a vector register
463 or on the stack by value */
464 static int s390_vector_abi = 0;
466 /* Set the vector ABI marker if TYPE is subject to the vector ABI
467 switch. The vector ABI affects only vector data types. There are
468 two aspects of the vector ABI relevant here:
470 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
471 ABI and natural alignment with the old.
473 2. vector <= 16 bytes are passed in VRs or by value on the stack
474 with the new ABI but by reference on the stack with the old.
476 If ARG_P is true TYPE is used for a function argument or return
477 value. The ABI marker then is set for all vector data types. If
478 ARG_P is false only type 1 vectors are being checked. */
480 static void
481 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
483 static hash_set<const_tree> visited_types_hash;
485 if (s390_vector_abi)
486 return;
488 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
489 return;
491 if (visited_types_hash.contains (type))
492 return;
494 visited_types_hash.add (type);
496 if (VECTOR_TYPE_P (type))
498 int type_size = int_size_in_bytes (type);
500 /* Outside arguments only the alignment is changing and this
501 only happens for vector types >= 16 bytes. */
502 if (!arg_p && type_size < 16)
503 return;
505 /* In arguments vector types > 16 are passed as before (GCC
506 never enforced the bigger alignment for arguments which was
507 required by the old vector ABI). However, it might still be
508 ABI relevant due to the changed alignment if it is a struct
509 member. */
510 if (arg_p && type_size > 16 && !in_struct_p)
511 return;
513 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
515 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
517 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
518 natural alignment there will never be ABI dependent padding
519 in an array type. That's why we do not set in_struct_p to
520 true here. */
521 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
523 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
525 tree arg_chain;
527 /* Check the return type. */
528 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
530 for (arg_chain = TYPE_ARG_TYPES (type);
531 arg_chain;
532 arg_chain = TREE_CHAIN (arg_chain))
533 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
535 else if (RECORD_OR_UNION_TYPE_P (type))
537 tree field;
539 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
541 if (TREE_CODE (field) != FIELD_DECL)
542 continue;
544 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
550 /* System z builtins. */
552 #include "s390-builtins.h"
554 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
556 #undef B_DEF
557 #undef OB_DEF
558 #undef OB_DEF_VAR
559 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
560 #define OB_DEF(...)
561 #define OB_DEF_VAR(...)
562 #include "s390-builtins.def"
566 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
568 #undef B_DEF
569 #undef OB_DEF
570 #undef OB_DEF_VAR
571 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
572 #define OB_DEF(...)
573 #define OB_DEF_VAR(...)
574 #include "s390-builtins.def"
578 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
580 #undef B_DEF
581 #undef OB_DEF
582 #undef OB_DEF_VAR
583 #define B_DEF(...)
584 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
585 #define OB_DEF_VAR(...)
586 #include "s390-builtins.def"
590 const unsigned int
591 bflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
593 #undef B_DEF
594 #undef OB_DEF
595 #undef OB_DEF_VAR
596 #define B_DEF(...)
597 #define OB_DEF(...)
598 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
599 #include "s390-builtins.def"
603 const unsigned int
604 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
606 #undef B_DEF
607 #undef OB_DEF
608 #undef OB_DEF_VAR
609 #define B_DEF(...)
610 #define OB_DEF(...)
611 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
612 #include "s390-builtins.def"
616 tree s390_builtin_types[BT_MAX];
617 tree s390_builtin_fn_types[BT_FN_MAX];
618 tree s390_builtin_decls[S390_BUILTIN_MAX +
619 S390_OVERLOADED_BUILTIN_MAX +
620 S390_OVERLOADED_BUILTIN_VAR_MAX];
622 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
623 #undef B_DEF
624 #undef OB_DEF
625 #undef OB_DEF_VAR
626 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
627 #define OB_DEF(...)
628 #define OB_DEF_VAR(...)
630 #include "s390-builtins.def"
631 CODE_FOR_nothing
634 static void
635 s390_init_builtins (void)
637 /* These definitions are being used in s390-builtins.def. */
638 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
639 NULL, NULL);
640 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
641 tree c_uint64_type_node;
643 /* The uint64_type_node from tree.c is not compatible to the C99
644 uint64_t data type. What we want is c_uint64_type_node from
645 c-common.c. But since backend code is not supposed to interface
646 with the frontend we recreate it here. */
647 if (TARGET_64BIT)
648 c_uint64_type_node = long_unsigned_type_node;
649 else
650 c_uint64_type_node = long_long_unsigned_type_node;
652 #undef DEF_TYPE
653 #define DEF_TYPE(INDEX, NODE, CONST_P) \
654 if (s390_builtin_types[INDEX] == NULL) \
655 s390_builtin_types[INDEX] = (!CONST_P) ? \
656 (NODE) : build_type_variant ((NODE), 1, 0);
658 #undef DEF_POINTER_TYPE
659 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \
660 if (s390_builtin_types[INDEX] == NULL) \
661 s390_builtin_types[INDEX] = \
662 build_pointer_type (s390_builtin_types[INDEX_BASE]);
664 #undef DEF_DISTINCT_TYPE
665 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \
666 if (s390_builtin_types[INDEX] == NULL) \
667 s390_builtin_types[INDEX] = \
668 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
670 #undef DEF_VECTOR_TYPE
671 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
672 if (s390_builtin_types[INDEX] == NULL) \
673 s390_builtin_types[INDEX] = \
674 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
676 #undef DEF_OPAQUE_VECTOR_TYPE
677 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
678 if (s390_builtin_types[INDEX] == NULL) \
679 s390_builtin_types[INDEX] = \
680 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
682 #undef DEF_FN_TYPE
683 #define DEF_FN_TYPE(INDEX, args...) \
684 if (s390_builtin_fn_types[INDEX] == NULL) \
685 s390_builtin_fn_types[INDEX] = \
686 build_function_type_list (args, NULL_TREE);
687 #undef DEF_OV_TYPE
688 #define DEF_OV_TYPE(...)
689 #include "s390-builtin-types.def"
691 #undef B_DEF
692 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
693 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
694 s390_builtin_decls[S390_BUILTIN_##NAME] = \
695 add_builtin_function ("__builtin_" #NAME, \
696 s390_builtin_fn_types[FNTYPE], \
697 S390_BUILTIN_##NAME, \
698 BUILT_IN_MD, \
699 NULL, \
700 ATTRS);
701 #undef OB_DEF
702 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
703 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
704 == NULL) \
705 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
706 add_builtin_function ("__builtin_" #NAME, \
707 s390_builtin_fn_types[FNTYPE], \
708 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
709 BUILT_IN_MD, \
710 NULL, \
712 #undef OB_DEF_VAR
713 #define OB_DEF_VAR(...)
714 #include "s390-builtins.def"
718 /* Return true if ARG is appropriate as argument number ARGNUM of
719 builtin DECL. The operand flags from s390-builtins.def have to
720 passed as OP_FLAGS. */
721 bool
722 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
724 if (O_UIMM_P (op_flags))
726 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
727 int bitwidth = bitwidths[op_flags - O_U1];
729 if (!tree_fits_uhwi_p (arg)
730 || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1)
732 error ("constant argument %d for builtin %qF is out of range "
733 "(0..%wu)", argnum, decl,
734 (HOST_WIDE_INT_1U << bitwidth) - 1);
735 return false;
739 if (O_SIMM_P (op_flags))
741 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
742 int bitwidth = bitwidths[op_flags - O_S2];
744 if (!tree_fits_shwi_p (arg)
745 || tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1))
746 || tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1))
748 error ("constant argument %d for builtin %qF is out of range "
749 "(%wd..%wd)", argnum, decl,
750 -(HOST_WIDE_INT_1 << (bitwidth - 1)),
751 (HOST_WIDE_INT_1 << (bitwidth - 1)) - 1);
752 return false;
755 return true;
758 /* Expand an expression EXP that calls a built-in function,
759 with result going to TARGET if that's convenient
760 (and in mode MODE if that's convenient).
761 SUBTARGET may be used as the target for computing one of EXP's operands.
762 IGNORE is nonzero if the value is to be ignored. */
764 static rtx
765 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
766 machine_mode mode ATTRIBUTE_UNUSED,
767 int ignore ATTRIBUTE_UNUSED)
769 #define MAX_ARGS 6
771 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
772 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
773 enum insn_code icode;
774 rtx op[MAX_ARGS], pat;
775 int arity;
776 bool nonvoid;
777 tree arg;
778 call_expr_arg_iterator iter;
779 unsigned int all_op_flags = opflags_for_builtin (fcode);
780 machine_mode last_vec_mode = VOIDmode;
782 if (TARGET_DEBUG_ARG)
784 fprintf (stderr,
785 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
786 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
787 bflags_for_builtin (fcode));
790 if (S390_USE_TARGET_ATTRIBUTE)
792 unsigned int bflags;
794 bflags = bflags_for_builtin (fcode);
795 if ((bflags & B_HTM) && !TARGET_HTM)
797 error ("builtin %qF is not supported without %<-mhtm%> "
798 "(default with %<-march=zEC12%> and higher).", fndecl);
799 return const0_rtx;
801 if (((bflags & B_VX) || (bflags & B_VXE)) && !TARGET_VX)
803 error ("builtin %qF requires %<-mvx%> "
804 "(default with %<-march=z13%> and higher).", fndecl);
805 return const0_rtx;
808 if ((bflags & B_VXE) && !TARGET_VXE)
810 error ("Builtin %qF requires z14 or higher.", fndecl);
811 return const0_rtx;
814 if ((bflags & B_VXE2) && !TARGET_VXE2)
816 error ("Builtin %qF requires z15 or higher.", fndecl);
817 return const0_rtx;
820 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
821 && fcode < S390_ALL_BUILTIN_MAX)
823 gcc_unreachable ();
825 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
827 icode = code_for_builtin[fcode];
828 /* Set a flag in the machine specific cfun part in order to support
829 saving/restoring of FPRs. */
830 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
831 cfun->machine->tbegin_p = true;
833 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
835 error ("unresolved overloaded builtin");
836 return const0_rtx;
838 else
839 internal_error ("bad builtin fcode");
841 if (icode == 0)
842 internal_error ("bad builtin icode");
844 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
846 if (nonvoid)
848 machine_mode tmode = insn_data[icode].operand[0].mode;
849 if (!target
850 || GET_MODE (target) != tmode
851 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
852 target = gen_reg_rtx (tmode);
854 /* There are builtins (e.g. vec_promote) with no vector
855 arguments but an element selector. So we have to also look
856 at the vector return type when emitting the modulo
857 operation. */
858 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
859 last_vec_mode = insn_data[icode].operand[0].mode;
862 arity = 0;
863 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
865 rtx tmp_rtx;
866 const struct insn_operand_data *insn_op;
867 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
869 all_op_flags = all_op_flags >> O_SHIFT;
871 if (arg == error_mark_node)
872 return NULL_RTX;
873 if (arity >= MAX_ARGS)
874 return NULL_RTX;
876 if (O_IMM_P (op_flags)
877 && TREE_CODE (arg) != INTEGER_CST)
879 error ("constant value required for builtin %qF argument %d",
880 fndecl, arity + 1);
881 return const0_rtx;
884 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
885 return const0_rtx;
887 insn_op = &insn_data[icode].operand[arity + nonvoid];
888 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
890 /* expand_expr truncates constants to the target mode only if it
891 is "convenient". However, our checks below rely on this
892 being done. */
893 if (CONST_INT_P (op[arity])
894 && SCALAR_INT_MODE_P (insn_op->mode)
895 && GET_MODE (op[arity]) != insn_op->mode)
896 op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
897 insn_op->mode));
899 /* Wrap the expanded RTX for pointer types into a MEM expr with
900 the proper mode. This allows us to use e.g. (match_operand
901 "memory_operand"..) in the insn patterns instead of (mem
902 (match_operand "address_operand)). This is helpful for
903 patterns not just accepting MEMs. */
904 if (POINTER_TYPE_P (TREE_TYPE (arg))
905 && insn_op->predicate != address_operand)
906 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
908 /* Expand the module operation required on element selectors. */
909 if (op_flags == O_ELEM)
911 gcc_assert (last_vec_mode != VOIDmode);
912 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
913 op[arity],
914 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
915 NULL_RTX, 1, OPTAB_DIRECT);
918 /* Record the vector mode used for an element selector. This assumes:
919 1. There is no builtin with two different vector modes and an element selector
920 2. The element selector comes after the vector type it is referring to.
921 This currently the true for all the builtins but FIXME we
922 should better check for that. */
923 if (VECTOR_MODE_P (insn_op->mode))
924 last_vec_mode = insn_op->mode;
926 if (insn_op->predicate (op[arity], insn_op->mode))
928 arity++;
929 continue;
932 /* A memory operand is rejected by the memory_operand predicate.
933 Try making the address legal by copying it into a register. */
934 if (MEM_P (op[arity])
935 && insn_op->predicate == memory_operand
936 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
937 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
939 op[arity] = replace_equiv_address (op[arity],
940 copy_to_mode_reg (Pmode,
941 XEXP (op[arity], 0)));
943 /* Some of the builtins require different modes/types than the
944 pattern in order to implement a specific API. Instead of
945 adding many expanders which do the mode change we do it here.
946 E.g. s390_vec_add_u128 required to have vector unsigned char
947 arguments is mapped to addti3. */
948 else if (insn_op->mode != VOIDmode
949 && GET_MODE (op[arity]) != VOIDmode
950 && GET_MODE (op[arity]) != insn_op->mode
951 && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
952 GET_MODE (op[arity]), 0))
953 != NULL_RTX))
955 op[arity] = tmp_rtx;
958 /* The predicate rejects the operand although the mode is fine.
959 Copy the operand to register. */
960 if (!insn_op->predicate (op[arity], insn_op->mode)
961 && (GET_MODE (op[arity]) == insn_op->mode
962 || GET_MODE (op[arity]) == VOIDmode
963 || (insn_op->predicate == address_operand
964 && GET_MODE (op[arity]) == Pmode)))
966 /* An address_operand usually has VOIDmode in the expander
967 so we cannot use this. */
968 machine_mode target_mode =
969 (insn_op->predicate == address_operand
970 ? (machine_mode) Pmode : insn_op->mode);
971 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
974 if (!insn_op->predicate (op[arity], insn_op->mode))
976 error ("invalid argument %d for builtin %qF", arity + 1, fndecl);
977 return const0_rtx;
979 arity++;
982 switch (arity)
984 case 0:
985 pat = GEN_FCN (icode) (target);
986 break;
987 case 1:
988 if (nonvoid)
989 pat = GEN_FCN (icode) (target, op[0]);
990 else
991 pat = GEN_FCN (icode) (op[0]);
992 break;
993 case 2:
994 if (nonvoid)
995 pat = GEN_FCN (icode) (target, op[0], op[1]);
996 else
997 pat = GEN_FCN (icode) (op[0], op[1]);
998 break;
999 case 3:
1000 if (nonvoid)
1001 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1002 else
1003 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1004 break;
1005 case 4:
1006 if (nonvoid)
1007 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1008 else
1009 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1010 break;
1011 case 5:
1012 if (nonvoid)
1013 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1014 else
1015 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1016 break;
1017 case 6:
1018 if (nonvoid)
1019 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1020 else
1021 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1022 break;
1023 default:
1024 gcc_unreachable ();
1026 if (!pat)
1027 return NULL_RTX;
1028 emit_insn (pat);
1030 if (nonvoid)
1031 return target;
1032 else
1033 return const0_rtx;
1037 static const int s390_hotpatch_hw_max = 1000000;
1038 static int s390_hotpatch_hw_before_label = 0;
1039 static int s390_hotpatch_hw_after_label = 0;
1041 /* Check whether the hotpatch attribute is applied to a function and, if it has
1042 an argument, the argument is valid. */
1044 static tree
1045 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1046 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1048 tree expr;
1049 tree expr2;
1050 int err;
1052 if (TREE_CODE (*node) != FUNCTION_DECL)
1054 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1055 name);
1056 *no_add_attrs = true;
1058 if (args != NULL && TREE_CHAIN (args) != NULL)
1060 expr = TREE_VALUE (args);
1061 expr2 = TREE_VALUE (TREE_CHAIN (args));
1063 if (args == NULL || TREE_CHAIN (args) == NULL)
1064 err = 1;
1065 else if (TREE_CODE (expr) != INTEGER_CST
1066 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1067 || wi::gtu_p (wi::to_wide (expr), s390_hotpatch_hw_max))
1068 err = 1;
1069 else if (TREE_CODE (expr2) != INTEGER_CST
1070 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1071 || wi::gtu_p (wi::to_wide (expr2), s390_hotpatch_hw_max))
1072 err = 1;
1073 else
1074 err = 0;
1075 if (err)
1077 error ("requested %qE attribute is not a comma separated pair of"
1078 " non-negative integer constants or too large (max. %d)", name,
1079 s390_hotpatch_hw_max);
1080 *no_add_attrs = true;
1083 return NULL_TREE;
1086 /* Expand the s390_vector_bool type attribute. */
1088 static tree
1089 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1090 tree args ATTRIBUTE_UNUSED,
1091 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1093 tree type = *node, result = NULL_TREE;
1094 machine_mode mode;
1096 while (POINTER_TYPE_P (type)
1097 || TREE_CODE (type) == FUNCTION_TYPE
1098 || TREE_CODE (type) == METHOD_TYPE
1099 || TREE_CODE (type) == ARRAY_TYPE)
1100 type = TREE_TYPE (type);
1102 mode = TYPE_MODE (type);
1103 switch (mode)
1105 case E_DImode: case E_V2DImode:
1106 result = s390_builtin_types[BT_BV2DI];
1107 break;
1108 case E_SImode: case E_V4SImode:
1109 result = s390_builtin_types[BT_BV4SI];
1110 break;
1111 case E_HImode: case E_V8HImode:
1112 result = s390_builtin_types[BT_BV8HI];
1113 break;
1114 case E_QImode: case E_V16QImode:
1115 result = s390_builtin_types[BT_BV16QI];
1116 break;
1117 default:
1118 break;
1121 *no_add_attrs = true; /* No need to hang on to the attribute. */
1123 if (result)
1124 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1126 return NULL_TREE;
1129 /* Check syntax of function decl attributes having a string type value. */
1131 static tree
1132 s390_handle_string_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1133 tree args ATTRIBUTE_UNUSED,
1134 int flags ATTRIBUTE_UNUSED,
1135 bool *no_add_attrs)
1137 tree cst;
1139 if (TREE_CODE (*node) != FUNCTION_DECL)
1141 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1142 name);
1143 *no_add_attrs = true;
1146 cst = TREE_VALUE (args);
1148 if (TREE_CODE (cst) != STRING_CST)
1150 warning (OPT_Wattributes,
1151 "%qE attribute requires a string constant argument",
1152 name);
1153 *no_add_attrs = true;
1156 if (is_attribute_p ("indirect_branch", name)
1157 || is_attribute_p ("indirect_branch_call", name)
1158 || is_attribute_p ("function_return", name)
1159 || is_attribute_p ("function_return_reg", name)
1160 || is_attribute_p ("function_return_mem", name))
1162 if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1163 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1164 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1166 warning (OPT_Wattributes,
1167 "argument to %qE attribute is not "
1168 "(keep|thunk|thunk-extern)", name);
1169 *no_add_attrs = true;
1173 if (is_attribute_p ("indirect_branch_jump", name)
1174 && strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1175 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1176 && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
1177 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1179 warning (OPT_Wattributes,
1180 "argument to %qE attribute is not "
1181 "(keep|thunk|thunk-inline|thunk-extern)", name);
1182 *no_add_attrs = true;
1185 return NULL_TREE;
1188 static const struct attribute_spec s390_attribute_table[] = {
1189 { "hotpatch", 2, 2, true, false, false, false,
1190 s390_handle_hotpatch_attribute, NULL },
1191 { "s390_vector_bool", 0, 0, false, true, false, true,
1192 s390_handle_vectorbool_attribute, NULL },
1193 { "indirect_branch", 1, 1, true, false, false, false,
1194 s390_handle_string_attribute, NULL },
1195 { "indirect_branch_jump", 1, 1, true, false, false, false,
1196 s390_handle_string_attribute, NULL },
1197 { "indirect_branch_call", 1, 1, true, false, false, false,
1198 s390_handle_string_attribute, NULL },
1199 { "function_return", 1, 1, true, false, false, false,
1200 s390_handle_string_attribute, NULL },
1201 { "function_return_reg", 1, 1, true, false, false, false,
1202 s390_handle_string_attribute, NULL },
1203 { "function_return_mem", 1, 1, true, false, false, false,
1204 s390_handle_string_attribute, NULL },
1206 /* End element. */
1207 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1210 /* Return the alignment for LABEL. We default to the -falign-labels
1211 value except for the literal pool base label. */
1213 s390_label_align (rtx_insn *label)
1215 rtx_insn *prev_insn = prev_active_insn (label);
1216 rtx set, src;
1218 if (prev_insn == NULL_RTX)
1219 goto old;
1221 set = single_set (prev_insn);
1223 if (set == NULL_RTX)
1224 goto old;
1226 src = SET_SRC (set);
1228 /* Don't align literal pool base labels. */
1229 if (GET_CODE (src) == UNSPEC
1230 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1231 return 0;
1233 old:
1234 return align_labels.levels[0].log;
1237 static GTY(()) rtx got_symbol;
1239 /* Return the GOT table symbol. The symbol will be created when the
1240 function is invoked for the first time. */
1242 static rtx
1243 s390_got_symbol (void)
1245 if (!got_symbol)
1247 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1248 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
1251 return got_symbol;
1254 static scalar_int_mode
1255 s390_libgcc_cmp_return_mode (void)
1257 return TARGET_64BIT ? DImode : SImode;
1260 static scalar_int_mode
1261 s390_libgcc_shift_count_mode (void)
1263 return TARGET_64BIT ? DImode : SImode;
1266 static scalar_int_mode
1267 s390_unwind_word_mode (void)
1269 return TARGET_64BIT ? DImode : SImode;
1272 /* Return true if the back end supports mode MODE. */
1273 static bool
1274 s390_scalar_mode_supported_p (scalar_mode mode)
1276 /* In contrast to the default implementation reject TImode constants on 31bit
1277 TARGET_ZARCH for ABI compliance. */
1278 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1279 return false;
1281 if (DECIMAL_FLOAT_MODE_P (mode))
1282 return default_decimal_float_supported_p ();
1284 return default_scalar_mode_supported_p (mode);
1287 /* Return true if the back end supports vector mode MODE. */
1288 static bool
1289 s390_vector_mode_supported_p (machine_mode mode)
1291 machine_mode inner;
1293 if (!VECTOR_MODE_P (mode)
1294 || !TARGET_VX
1295 || GET_MODE_SIZE (mode) > 16)
1296 return false;
1298 inner = GET_MODE_INNER (mode);
1300 switch (inner)
1302 case E_QImode:
1303 case E_HImode:
1304 case E_SImode:
1305 case E_DImode:
1306 case E_TImode:
1307 case E_SFmode:
1308 case E_DFmode:
1309 case E_TFmode:
1310 return true;
1311 default:
1312 return false;
1316 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1318 void
1319 s390_set_has_landing_pad_p (bool value)
1321 cfun->machine->has_landing_pad_p = value;
1324 /* If two condition code modes are compatible, return a condition code
1325 mode which is compatible with both. Otherwise, return
1326 VOIDmode. */
1328 static machine_mode
1329 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1331 if (m1 == m2)
1332 return m1;
1334 switch (m1)
1336 case E_CCZmode:
1337 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1338 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1339 return m2;
1340 return VOIDmode;
1342 case E_CCSmode:
1343 case E_CCUmode:
1344 case E_CCTmode:
1345 case E_CCSRmode:
1346 case E_CCURmode:
1347 case E_CCZ1mode:
1348 if (m2 == CCZmode)
1349 return m1;
1351 return VOIDmode;
1353 default:
1354 return VOIDmode;
1356 return VOIDmode;
1359 /* Return true if SET either doesn't set the CC register, or else
1360 the source and destination have matching CC modes and that
1361 CC mode is at least as constrained as REQ_MODE. */
1363 static bool
1364 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1366 machine_mode set_mode;
1368 gcc_assert (GET_CODE (set) == SET);
1370 /* These modes are supposed to be used only in CC consumer
1371 patterns. */
1372 gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
1373 && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
1375 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1376 return 1;
1378 set_mode = GET_MODE (SET_DEST (set));
1379 switch (set_mode)
1381 case E_CCZ1mode:
1382 case E_CCSmode:
1383 case E_CCSRmode:
1384 case E_CCSFPSmode:
1385 case E_CCUmode:
1386 case E_CCURmode:
1387 case E_CCOmode:
1388 case E_CCLmode:
1389 case E_CCL1mode:
1390 case E_CCL2mode:
1391 case E_CCL3mode:
1392 case E_CCT1mode:
1393 case E_CCT2mode:
1394 case E_CCT3mode:
1395 case E_CCVEQmode:
1396 case E_CCVIHmode:
1397 case E_CCVIHUmode:
1398 case E_CCVFHmode:
1399 case E_CCVFHEmode:
1400 if (req_mode != set_mode)
1401 return 0;
1402 break;
1404 case E_CCZmode:
1405 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1406 && req_mode != CCSRmode && req_mode != CCURmode
1407 && req_mode != CCZ1mode)
1408 return 0;
1409 break;
1411 case E_CCAPmode:
1412 case E_CCANmode:
1413 if (req_mode != CCAmode)
1414 return 0;
1415 break;
1417 default:
1418 gcc_unreachable ();
1421 return (GET_MODE (SET_SRC (set)) == set_mode);
1424 /* Return true if every SET in INSN that sets the CC register
1425 has source and destination with matching CC modes and that
1426 CC mode is at least as constrained as REQ_MODE.
1427 If REQ_MODE is VOIDmode, always return false. */
1429 bool
1430 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1432 int i;
1434 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1435 if (req_mode == VOIDmode)
1436 return false;
1438 if (GET_CODE (PATTERN (insn)) == SET)
1439 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1441 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1442 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1444 rtx set = XVECEXP (PATTERN (insn), 0, i);
1445 if (GET_CODE (set) == SET)
1446 if (!s390_match_ccmode_set (set, req_mode))
1447 return false;
1450 return true;
1453 /* If a test-under-mask instruction can be used to implement
1454 (compare (and ... OP1) OP2), return the CC mode required
1455 to do that. Otherwise, return VOIDmode.
1456 MIXED is true if the instruction can distinguish between
1457 CC1 and CC2 for mixed selected bits (TMxx), it is false
1458 if the instruction cannot (TM). */
1460 machine_mode
1461 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1463 int bit0, bit1;
1465 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1466 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1467 return VOIDmode;
1469 /* Selected bits all zero: CC0.
1470 e.g.: int a; if ((a & (16 + 128)) == 0) */
1471 if (INTVAL (op2) == 0)
1472 return CCTmode;
1474 /* Selected bits all one: CC3.
1475 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1476 if (INTVAL (op2) == INTVAL (op1))
1477 return CCT3mode;
1479 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1480 int a;
1481 if ((a & (16 + 128)) == 16) -> CCT1
1482 if ((a & (16 + 128)) == 128) -> CCT2 */
1483 if (mixed)
1485 bit1 = exact_log2 (INTVAL (op2));
1486 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1487 if (bit0 != -1 && bit1 != -1)
1488 return bit0 > bit1 ? CCT1mode : CCT2mode;
1491 return VOIDmode;
1494 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1495 OP0 and OP1 of a COMPARE, return the mode to be used for the
1496 comparison. */
1498 machine_mode
1499 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1501 switch (code)
1503 case EQ:
1504 case NE:
1505 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1506 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1507 return CCAPmode;
1508 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1509 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1510 return CCAPmode;
1511 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1512 || GET_CODE (op1) == NEG)
1513 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1514 return CCLmode;
1516 if (GET_CODE (op0) == AND)
1518 /* Check whether we can potentially do it via TM. */
1519 machine_mode ccmode;
1520 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1521 if (ccmode != VOIDmode)
1523 /* Relax CCTmode to CCZmode to allow fall-back to AND
1524 if that turns out to be beneficial. */
1525 return ccmode == CCTmode ? CCZmode : ccmode;
1529 if (register_operand (op0, HImode)
1530 && GET_CODE (op1) == CONST_INT
1531 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1532 return CCT3mode;
1533 if (register_operand (op0, QImode)
1534 && GET_CODE (op1) == CONST_INT
1535 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1536 return CCT3mode;
1538 return CCZmode;
1540 case LE:
1541 case LT:
1542 case GE:
1543 case GT:
1544 /* The only overflow condition of NEG and ABS happens when
1545 -INT_MAX is used as parameter, which stays negative. So
1546 we have an overflow from a positive value to a negative.
1547 Using CCAP mode the resulting cc can be used for comparisons. */
1548 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1549 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1550 return CCAPmode;
1552 /* If constants are involved in an add instruction it is possible to use
1553 the resulting cc for comparisons with zero. Knowing the sign of the
1554 constant the overflow behavior gets predictable. e.g.:
1555 int a, b; if ((b = a + c) > 0)
1556 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1557 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1558 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1559 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1560 /* Avoid INT32_MIN on 32 bit. */
1561 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1563 if (INTVAL (XEXP((op0), 1)) < 0)
1564 return CCANmode;
1565 else
1566 return CCAPmode;
1569 /* Fall through. */
1570 case LTGT:
1571 if (HONOR_NANS (op0) || HONOR_NANS (op1))
1572 return CCSFPSmode;
1574 /* Fall through. */
1575 case UNORDERED:
1576 case ORDERED:
1577 case UNEQ:
1578 case UNLE:
1579 case UNLT:
1580 case UNGE:
1581 case UNGT:
1582 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1583 && GET_CODE (op1) != CONST_INT)
1584 return CCSRmode;
1585 return CCSmode;
1587 case LTU:
1588 case GEU:
1589 if (GET_CODE (op0) == PLUS
1590 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1591 return CCL1mode;
1593 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1594 && GET_CODE (op1) != CONST_INT)
1595 return CCURmode;
1596 return CCUmode;
1598 case LEU:
1599 case GTU:
1600 if (GET_CODE (op0) == MINUS
1601 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1602 return CCL2mode;
1604 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1605 && GET_CODE (op1) != CONST_INT)
1606 return CCURmode;
1607 return CCUmode;
1609 default:
1610 gcc_unreachable ();
1614 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1615 that we can implement more efficiently. */
1617 static void
1618 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1619 bool op0_preserve_value)
1621 if (op0_preserve_value)
1622 return;
1624 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1625 if ((*code == EQ || *code == NE)
1626 && *op1 == const0_rtx
1627 && GET_CODE (*op0) == ZERO_EXTRACT
1628 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1629 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1630 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1632 rtx inner = XEXP (*op0, 0);
1633 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1634 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1635 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1637 if (len > 0 && len < modesize
1638 && pos >= 0 && pos + len <= modesize
1639 && modesize <= HOST_BITS_PER_WIDE_INT)
1641 unsigned HOST_WIDE_INT block;
1642 block = (HOST_WIDE_INT_1U << len) - 1;
1643 block <<= modesize - pos - len;
1645 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1646 gen_int_mode (block, GET_MODE (inner)));
1650 /* Narrow AND of memory against immediate to enable TM. */
1651 if ((*code == EQ || *code == NE)
1652 && *op1 == const0_rtx
1653 && GET_CODE (*op0) == AND
1654 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1655 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1657 rtx inner = XEXP (*op0, 0);
1658 rtx mask = XEXP (*op0, 1);
1660 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1661 if (GET_CODE (inner) == SUBREG
1662 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1663 && (GET_MODE_SIZE (GET_MODE (inner))
1664 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1665 && ((INTVAL (mask)
1666 & GET_MODE_MASK (GET_MODE (inner))
1667 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1668 == 0))
1669 inner = SUBREG_REG (inner);
1671 /* Do not change volatile MEMs. */
1672 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1674 int part = s390_single_part (XEXP (*op0, 1),
1675 GET_MODE (inner), QImode, 0);
1676 if (part >= 0)
1678 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1679 inner = adjust_address_nv (inner, QImode, part);
1680 *op0 = gen_rtx_AND (QImode, inner, mask);
1685 /* Narrow comparisons against 0xffff to HImode if possible. */
1686 if ((*code == EQ || *code == NE)
1687 && GET_CODE (*op1) == CONST_INT
1688 && INTVAL (*op1) == 0xffff
1689 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1690 && (nonzero_bits (*op0, GET_MODE (*op0))
1691 & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1693 *op0 = gen_lowpart (HImode, *op0);
1694 *op1 = constm1_rtx;
1697 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1698 if (GET_CODE (*op0) == UNSPEC
1699 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1700 && XVECLEN (*op0, 0) == 1
1701 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1702 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1703 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1704 && *op1 == const0_rtx)
1706 enum rtx_code new_code = UNKNOWN;
1707 switch (*code)
1709 case EQ: new_code = EQ; break;
1710 case NE: new_code = NE; break;
1711 case LT: new_code = GTU; break;
1712 case GT: new_code = LTU; break;
1713 case LE: new_code = GEU; break;
1714 case GE: new_code = LEU; break;
1715 default: break;
1718 if (new_code != UNKNOWN)
1720 *op0 = XVECEXP (*op0, 0, 0);
1721 *code = new_code;
1725 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1726 if (GET_CODE (*op0) == UNSPEC
1727 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1728 && XVECLEN (*op0, 0) == 1
1729 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1730 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1731 && CONST_INT_P (*op1))
1733 enum rtx_code new_code = UNKNOWN;
1734 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1736 case E_CCZmode:
1737 case E_CCRAWmode:
1738 switch (*code)
1740 case EQ: new_code = EQ; break;
1741 case NE: new_code = NE; break;
1742 default: break;
1744 break;
1745 default: break;
1748 if (new_code != UNKNOWN)
1750 /* For CCRAWmode put the required cc mask into the second
1751 operand. */
1752 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1753 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1754 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1755 *op0 = XVECEXP (*op0, 0, 0);
1756 *code = new_code;
1760 /* Simplify cascaded EQ, NE with const0_rtx. */
1761 if ((*code == NE || *code == EQ)
1762 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1763 && GET_MODE (*op0) == SImode
1764 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1765 && REG_P (XEXP (*op0, 0))
1766 && XEXP (*op0, 1) == const0_rtx
1767 && *op1 == const0_rtx)
1769 if ((*code == EQ && GET_CODE (*op0) == NE)
1770 || (*code == NE && GET_CODE (*op0) == EQ))
1771 *code = EQ;
1772 else
1773 *code = NE;
1774 *op0 = XEXP (*op0, 0);
1777 /* Prefer register over memory as first operand. */
1778 if (MEM_P (*op0) && REG_P (*op1))
1780 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1781 *code = (int)swap_condition ((enum rtx_code)*code);
1784 /* A comparison result is compared against zero. Replace it with
1785 the (perhaps inverted) original comparison.
1786 This probably should be done by simplify_relational_operation. */
1787 if ((*code == EQ || *code == NE)
1788 && *op1 == const0_rtx
1789 && COMPARISON_P (*op0)
1790 && CC_REG_P (XEXP (*op0, 0)))
1792 enum rtx_code new_code;
1794 if (*code == EQ)
1795 new_code = reversed_comparison_code_parts (GET_CODE (*op0),
1796 XEXP (*op0, 0),
1797 XEXP (*op0, 1), NULL);
1798 else
1799 new_code = GET_CODE (*op0);
1801 if (new_code != UNKNOWN)
1803 *code = new_code;
1804 *op1 = XEXP (*op0, 1);
1805 *op0 = XEXP (*op0, 0);
1809 /* ~a==b -> ~(a^b)==0 ~a!=b -> ~(a^b)!=0 */
1810 if (TARGET_Z15
1811 && (*code == EQ || *code == NE)
1812 && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
1813 && GET_CODE (*op0) == NOT)
1815 machine_mode mode = GET_MODE (*op0);
1816 *op0 = gen_rtx_XOR (mode, XEXP (*op0, 0), *op1);
1817 *op0 = gen_rtx_NOT (mode, *op0);
1818 *op1 = const0_rtx;
1821 /* a&b == -1 -> ~a|~b == 0 a|b == -1 -> ~a&~b == 0 */
1822 if (TARGET_Z15
1823 && (*code == EQ || *code == NE)
1824 && (GET_CODE (*op0) == AND || GET_CODE (*op0) == IOR)
1825 && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
1826 && CONST_INT_P (*op1)
1827 && *op1 == constm1_rtx)
1829 machine_mode mode = GET_MODE (*op0);
1830 rtx op00 = gen_rtx_NOT (mode, XEXP (*op0, 0));
1831 rtx op01 = gen_rtx_NOT (mode, XEXP (*op0, 1));
1833 if (GET_CODE (*op0) == AND)
1834 *op0 = gen_rtx_IOR (mode, op00, op01);
1835 else
1836 *op0 = gen_rtx_AND (mode, op00, op01);
1838 *op1 = const0_rtx;
1843 /* Emit a compare instruction suitable to implement the comparison
1844 OP0 CODE OP1. Return the correct condition RTL to be placed in
1845 the IF_THEN_ELSE of the conditional branch testing the result. */
1848 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1850 machine_mode mode = s390_select_ccmode (code, op0, op1);
1851 rtx cc;
1853 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1855 /* Do not output a redundant compare instruction if a
1856 compare_and_swap pattern already computed the result and the
1857 machine modes are compatible. */
1858 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1859 == GET_MODE (op0));
1860 cc = op0;
1862 else
1864 cc = gen_rtx_REG (mode, CC_REGNUM);
1865 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1868 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1871 /* If MEM is not a legitimate compare-and-swap memory operand, return a new
1872 MEM, whose address is a pseudo containing the original MEM's address. */
1874 static rtx
1875 s390_legitimize_cs_operand (rtx mem)
1877 rtx tmp;
1879 if (!contains_symbol_ref_p (mem))
1880 return mem;
1881 tmp = gen_reg_rtx (Pmode);
1882 emit_move_insn (tmp, copy_rtx (XEXP (mem, 0)));
1883 return change_address (mem, VOIDmode, tmp);
1886 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1887 matches CMP.
1888 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1889 conditional branch testing the result. */
1891 static rtx
1892 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1893 rtx cmp, rtx new_rtx, machine_mode ccmode)
1895 rtx cc;
1897 mem = s390_legitimize_cs_operand (mem);
1898 cc = gen_rtx_REG (ccmode, CC_REGNUM);
1899 switch (GET_MODE (mem))
1901 case E_SImode:
1902 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp,
1903 new_rtx, cc));
1904 break;
1905 case E_DImode:
1906 emit_insn (gen_atomic_compare_and_swapdi_internal (old, mem, cmp,
1907 new_rtx, cc));
1908 break;
1909 case E_TImode:
1910 emit_insn (gen_atomic_compare_and_swapti_internal (old, mem, cmp,
1911 new_rtx, cc));
1912 break;
1913 case E_QImode:
1914 case E_HImode:
1915 default:
1916 gcc_unreachable ();
1918 return s390_emit_compare (code, cc, const0_rtx);
1921 /* Emit a jump instruction to TARGET and return it. If COND is
1922 NULL_RTX, emit an unconditional jump, else a conditional jump under
1923 condition COND. */
1925 rtx_insn *
1926 s390_emit_jump (rtx target, rtx cond)
1928 rtx insn;
1930 target = gen_rtx_LABEL_REF (VOIDmode, target);
1931 if (cond)
1932 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1934 insn = gen_rtx_SET (pc_rtx, target);
1935 return emit_jump_insn (insn);
1938 /* Return branch condition mask to implement a branch
1939 specified by CODE. Return -1 for invalid comparisons. */
1942 s390_branch_condition_mask (rtx code)
1944 const int CC0 = 1 << 3;
1945 const int CC1 = 1 << 2;
1946 const int CC2 = 1 << 1;
1947 const int CC3 = 1 << 0;
1949 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1950 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1951 gcc_assert (XEXP (code, 1) == const0_rtx
1952 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1953 && CONST_INT_P (XEXP (code, 1))));
1956 switch (GET_MODE (XEXP (code, 0)))
1958 case E_CCZmode:
1959 case E_CCZ1mode:
1960 switch (GET_CODE (code))
1962 case EQ: return CC0;
1963 case NE: return CC1 | CC2 | CC3;
1964 default: return -1;
1966 break;
1968 case E_CCT1mode:
1969 switch (GET_CODE (code))
1971 case EQ: return CC1;
1972 case NE: return CC0 | CC2 | CC3;
1973 default: return -1;
1975 break;
1977 case E_CCT2mode:
1978 switch (GET_CODE (code))
1980 case EQ: return CC2;
1981 case NE: return CC0 | CC1 | CC3;
1982 default: return -1;
1984 break;
1986 case E_CCT3mode:
1987 switch (GET_CODE (code))
1989 case EQ: return CC3;
1990 case NE: return CC0 | CC1 | CC2;
1991 default: return -1;
1993 break;
1995 case E_CCLmode:
1996 switch (GET_CODE (code))
1998 case EQ: return CC0 | CC2;
1999 case NE: return CC1 | CC3;
2000 default: return -1;
2002 break;
2004 case E_CCL1mode:
2005 switch (GET_CODE (code))
2007 case LTU: return CC2 | CC3; /* carry */
2008 case GEU: return CC0 | CC1; /* no carry */
2009 default: return -1;
2011 break;
2013 case E_CCL2mode:
2014 switch (GET_CODE (code))
2016 case GTU: return CC0 | CC1; /* borrow */
2017 case LEU: return CC2 | CC3; /* no borrow */
2018 default: return -1;
2020 break;
2022 case E_CCL3mode:
2023 switch (GET_CODE (code))
2025 case EQ: return CC0 | CC2;
2026 case NE: return CC1 | CC3;
2027 case LTU: return CC1;
2028 case GTU: return CC3;
2029 case LEU: return CC1 | CC2;
2030 case GEU: return CC2 | CC3;
2031 default: return -1;
2034 case E_CCUmode:
2035 switch (GET_CODE (code))
2037 case EQ: return CC0;
2038 case NE: return CC1 | CC2 | CC3;
2039 case LTU: return CC1;
2040 case GTU: return CC2;
2041 case LEU: return CC0 | CC1;
2042 case GEU: return CC0 | CC2;
2043 default: return -1;
2045 break;
2047 case E_CCURmode:
2048 switch (GET_CODE (code))
2050 case EQ: return CC0;
2051 case NE: return CC2 | CC1 | CC3;
2052 case LTU: return CC2;
2053 case GTU: return CC1;
2054 case LEU: return CC0 | CC2;
2055 case GEU: return CC0 | CC1;
2056 default: return -1;
2058 break;
2060 case E_CCAPmode:
2061 switch (GET_CODE (code))
2063 case EQ: return CC0;
2064 case NE: return CC1 | CC2 | CC3;
2065 case LT: return CC1 | CC3;
2066 case GT: return CC2;
2067 case LE: return CC0 | CC1 | CC3;
2068 case GE: return CC0 | CC2;
2069 default: return -1;
2071 break;
2073 case E_CCANmode:
2074 switch (GET_CODE (code))
2076 case EQ: return CC0;
2077 case NE: return CC1 | CC2 | CC3;
2078 case LT: return CC1;
2079 case GT: return CC2 | CC3;
2080 case LE: return CC0 | CC1;
2081 case GE: return CC0 | CC2 | CC3;
2082 default: return -1;
2084 break;
2086 case E_CCOmode:
2087 switch (GET_CODE (code))
2089 case EQ: return CC0 | CC1 | CC2;
2090 case NE: return CC3;
2091 default: return -1;
2093 break;
2095 case E_CCSmode:
2096 case E_CCSFPSmode:
2097 switch (GET_CODE (code))
2099 case EQ: return CC0;
2100 case NE: return CC1 | CC2 | CC3;
2101 case LT: return CC1;
2102 case GT: return CC2;
2103 case LE: return CC0 | CC1;
2104 case GE: return CC0 | CC2;
2105 case UNORDERED: return CC3;
2106 case ORDERED: return CC0 | CC1 | CC2;
2107 case UNEQ: return CC0 | CC3;
2108 case UNLT: return CC1 | CC3;
2109 case UNGT: return CC2 | CC3;
2110 case UNLE: return CC0 | CC1 | CC3;
2111 case UNGE: return CC0 | CC2 | CC3;
2112 case LTGT: return CC1 | CC2;
2113 default: return -1;
2115 break;
2117 case E_CCSRmode:
2118 switch (GET_CODE (code))
2120 case EQ: return CC0;
2121 case NE: return CC2 | CC1 | CC3;
2122 case LT: return CC2;
2123 case GT: return CC1;
2124 case LE: return CC0 | CC2;
2125 case GE: return CC0 | CC1;
2126 case UNORDERED: return CC3;
2127 case ORDERED: return CC0 | CC2 | CC1;
2128 case UNEQ: return CC0 | CC3;
2129 case UNLT: return CC2 | CC3;
2130 case UNGT: return CC1 | CC3;
2131 case UNLE: return CC0 | CC2 | CC3;
2132 case UNGE: return CC0 | CC1 | CC3;
2133 case LTGT: return CC2 | CC1;
2134 default: return -1;
2136 break;
2138 /* Vector comparison modes. */
2139 /* CC2 will never be set. It however is part of the negated
2140 masks. */
2141 case E_CCVIALLmode:
2142 switch (GET_CODE (code))
2144 case EQ:
2145 case GTU:
2146 case GT:
2147 case GE: return CC0;
2148 /* The inverted modes are in fact *any* modes. */
2149 case NE:
2150 case LEU:
2151 case LE:
2152 case LT: return CC3 | CC1 | CC2;
2153 default: return -1;
2156 case E_CCVIANYmode:
2157 switch (GET_CODE (code))
2159 case EQ:
2160 case GTU:
2161 case GT:
2162 case GE: return CC0 | CC1;
2163 /* The inverted modes are in fact *all* modes. */
2164 case NE:
2165 case LEU:
2166 case LE:
2167 case LT: return CC3 | CC2;
2168 default: return -1;
2170 case E_CCVFALLmode:
2171 switch (GET_CODE (code))
2173 case EQ:
2174 case GT:
2175 case GE: return CC0;
2176 /* The inverted modes are in fact *any* modes. */
2177 case NE:
2178 case UNLE:
2179 case UNLT: return CC3 | CC1 | CC2;
2180 default: return -1;
2183 case E_CCVFANYmode:
2184 switch (GET_CODE (code))
2186 case EQ:
2187 case GT:
2188 case GE: return CC0 | CC1;
2189 /* The inverted modes are in fact *all* modes. */
2190 case NE:
2191 case UNLE:
2192 case UNLT: return CC3 | CC2;
2193 default: return -1;
2196 case E_CCRAWmode:
2197 switch (GET_CODE (code))
2199 case EQ:
2200 return INTVAL (XEXP (code, 1));
2201 case NE:
2202 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2203 default:
2204 gcc_unreachable ();
2207 default:
2208 return -1;
2213 /* Return branch condition mask to implement a compare and branch
2214 specified by CODE. Return -1 for invalid comparisons. */
2217 s390_compare_and_branch_condition_mask (rtx code)
2219 const int CC0 = 1 << 3;
2220 const int CC1 = 1 << 2;
2221 const int CC2 = 1 << 1;
2223 switch (GET_CODE (code))
2225 case EQ:
2226 return CC0;
2227 case NE:
2228 return CC1 | CC2;
2229 case LT:
2230 case LTU:
2231 return CC1;
2232 case GT:
2233 case GTU:
2234 return CC2;
2235 case LE:
2236 case LEU:
2237 return CC0 | CC1;
2238 case GE:
2239 case GEU:
2240 return CC0 | CC2;
2241 default:
2242 gcc_unreachable ();
2244 return -1;
2247 /* If INV is false, return assembler mnemonic string to implement
2248 a branch specified by CODE. If INV is true, return mnemonic
2249 for the corresponding inverted branch. */
2251 static const char *
2252 s390_branch_condition_mnemonic (rtx code, int inv)
2254 int mask;
2256 static const char *const mnemonic[16] =
2258 NULL, "o", "h", "nle",
2259 "l", "nhe", "lh", "ne",
2260 "e", "nlh", "he", "nl",
2261 "le", "nh", "no", NULL
2264 if (GET_CODE (XEXP (code, 0)) == REG
2265 && REGNO (XEXP (code, 0)) == CC_REGNUM
2266 && (XEXP (code, 1) == const0_rtx
2267 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2268 && CONST_INT_P (XEXP (code, 1)))))
2269 mask = s390_branch_condition_mask (code);
2270 else
2271 mask = s390_compare_and_branch_condition_mask (code);
2273 gcc_assert (mask >= 0);
2275 if (inv)
2276 mask ^= 15;
2278 gcc_assert (mask >= 1 && mask <= 14);
2280 return mnemonic[mask];
2283 /* Return the part of op which has a value different from def.
2284 The size of the part is determined by mode.
2285 Use this function only if you already know that op really
2286 contains such a part. */
2288 unsigned HOST_WIDE_INT
2289 s390_extract_part (rtx op, machine_mode mode, int def)
2291 unsigned HOST_WIDE_INT value = 0;
2292 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2293 int part_bits = GET_MODE_BITSIZE (mode);
2294 unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1;
2295 int i;
2297 for (i = 0; i < max_parts; i++)
2299 if (i == 0)
2300 value = UINTVAL (op);
2301 else
2302 value >>= part_bits;
2304 if ((value & part_mask) != (def & part_mask))
2305 return value & part_mask;
2308 gcc_unreachable ();
2311 /* If OP is an integer constant of mode MODE with exactly one
2312 part of mode PART_MODE unequal to DEF, return the number of that
2313 part. Otherwise, return -1. */
2316 s390_single_part (rtx op,
2317 machine_mode mode,
2318 machine_mode part_mode,
2319 int def)
2321 unsigned HOST_WIDE_INT value = 0;
2322 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2323 unsigned HOST_WIDE_INT part_mask
2324 = (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1;
2325 int i, part = -1;
2327 if (GET_CODE (op) != CONST_INT)
2328 return -1;
2330 for (i = 0; i < n_parts; i++)
2332 if (i == 0)
2333 value = UINTVAL (op);
2334 else
2335 value >>= GET_MODE_BITSIZE (part_mode);
2337 if ((value & part_mask) != (def & part_mask))
2339 if (part != -1)
2340 return -1;
2341 else
2342 part = i;
2345 return part == -1 ? -1 : n_parts - 1 - part;
2348 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2349 bits and no other bits are set in (the lower SIZE bits of) IN.
2351 PSTART and PEND can be used to obtain the start and end
2352 position (inclusive) of the bitfield relative to 64
2353 bits. *PSTART / *PEND gives the position of the first/last bit
2354 of the bitfield counting from the highest order bit starting
2355 with zero. */
2357 bool
2358 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2359 int *pstart, int *pend)
2361 int start;
2362 int end = -1;
2363 int lowbit = HOST_BITS_PER_WIDE_INT - 1;
2364 int highbit = HOST_BITS_PER_WIDE_INT - size;
2365 unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U;
2367 gcc_assert (!!pstart == !!pend);
2368 for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2369 if (end == -1)
2371 /* Look for the rightmost bit of a contiguous range of ones. */
2372 if (bitmask & in)
2373 /* Found it. */
2374 end = start;
2376 else
2378 /* Look for the firt zero bit after the range of ones. */
2379 if (! (bitmask & in))
2380 /* Found it. */
2381 break;
2383 /* We're one past the last one-bit. */
2384 start++;
2386 if (end == -1)
2387 /* No one bits found. */
2388 return false;
2390 if (start > highbit)
2392 unsigned HOST_WIDE_INT mask;
2394 /* Calculate a mask for all bits beyond the contiguous bits. */
2395 mask = ((~HOST_WIDE_INT_0U >> highbit)
2396 & (~HOST_WIDE_INT_0U << (lowbit - start + 1)));
2397 if (mask & in)
2398 /* There are more bits set beyond the first range of one bits. */
2399 return false;
2402 if (pstart)
2404 *pstart = start;
2405 *pend = end;
2408 return true;
2411 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2412 if ~IN contains a contiguous bitfield. In that case, *END is <
2413 *START.
2415 If WRAP_P is true, a bitmask that wraps around is also tested.
2416 When a wraparoud occurs *START is greater than *END (in
2417 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2418 part of the range. If WRAP_P is false, no wraparound is
2419 tested. */
2421 bool
2422 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2423 int size, int *start, int *end)
2425 int bs = HOST_BITS_PER_WIDE_INT;
2426 bool b;
2428 gcc_assert (!!start == !!end);
2429 if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0)
2430 /* This cannot be expressed as a contiguous bitmask. Exit early because
2431 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2432 a valid bitmask. */
2433 return false;
2434 b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2435 if (b)
2436 return true;
2437 if (! wrap_p)
2438 return false;
2439 b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2440 if (b && start)
2442 int s = *start;
2443 int e = *end;
2445 gcc_assert (s >= 1);
2446 *start = ((e + 1) & (bs - 1));
2447 *end = ((s - 1 + bs) & (bs - 1));
2450 return b;
2453 /* Return true if OP contains the same contiguous bitfield in *all*
2454 its elements. START and END can be used to obtain the start and
2455 end position of the bitfield.
2457 START/STOP give the position of the first/last bit of the bitfield
2458 counting from the lowest order bit starting with zero. In order to
2459 use these values for S/390 instructions this has to be converted to
2460 "bits big endian" style. */
2462 bool
2463 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2465 unsigned HOST_WIDE_INT mask;
2466 int size;
2467 rtx elt;
2468 bool b;
2470 gcc_assert (!!start == !!end);
2471 if (!const_vec_duplicate_p (op, &elt)
2472 || !CONST_INT_P (elt))
2473 return false;
2475 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2477 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2478 if (size > 64)
2479 return false;
2481 mask = UINTVAL (elt);
2483 b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2484 if (b)
2486 if (start)
2488 *start -= (HOST_BITS_PER_WIDE_INT - size);
2489 *end -= (HOST_BITS_PER_WIDE_INT - size);
2491 return true;
2493 else
2494 return false;
2497 /* Return true if C consists only of byte chunks being either 0 or
2498 0xff. If MASK is !=NULL a byte mask is generated which is
2499 appropriate for the vector generate byte mask instruction. */
2501 bool
2502 s390_bytemask_vector_p (rtx op, unsigned *mask)
2504 int i;
2505 unsigned tmp_mask = 0;
2506 int nunit, unit_size;
2508 if (!VECTOR_MODE_P (GET_MODE (op))
2509 || GET_CODE (op) != CONST_VECTOR
2510 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2511 return false;
2513 nunit = GET_MODE_NUNITS (GET_MODE (op));
2514 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2516 for (i = 0; i < nunit; i++)
2518 unsigned HOST_WIDE_INT c;
2519 int j;
2521 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2522 return false;
2524 c = UINTVAL (XVECEXP (op, 0, i));
2525 for (j = 0; j < unit_size; j++)
2527 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2528 return false;
2529 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2530 c = c >> BITS_PER_UNIT;
2534 if (mask != NULL)
2535 *mask = tmp_mask;
2537 return true;
2540 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2541 equivalent to a shift followed by the AND. In particular, CONTIG
2542 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2543 for ROTL indicate a rotate to the right. */
2545 bool
2546 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2548 int start, end;
2549 bool ok;
2551 ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2552 gcc_assert (ok);
2554 if (rotl >= 0)
2555 return (64 - end >= rotl);
2556 else
2558 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2559 DIMode. */
2560 rotl = -rotl + (64 - bitsize);
2561 return (start >= rotl);
2565 /* Check whether we can (and want to) split a double-word
2566 move in mode MODE from SRC to DST into two single-word
2567 moves, moving the subword FIRST_SUBWORD first. */
2569 bool
2570 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2572 /* Floating point and vector registers cannot be split. */
2573 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2574 return false;
2576 /* Non-offsettable memory references cannot be split. */
2577 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2578 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2579 return false;
2581 /* Moving the first subword must not clobber a register
2582 needed to move the second subword. */
2583 if (register_operand (dst, mode))
2585 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2586 if (reg_overlap_mentioned_p (subreg, src))
2587 return false;
2590 return true;
2593 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2594 and [MEM2, MEM2 + SIZE] do overlap and false
2595 otherwise. */
2597 bool
2598 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2600 rtx addr1, addr2, addr_delta;
2601 HOST_WIDE_INT delta;
2603 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2604 return true;
2606 if (size == 0)
2607 return false;
2609 addr1 = XEXP (mem1, 0);
2610 addr2 = XEXP (mem2, 0);
2612 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2614 /* This overlapping check is used by peepholes merging memory block operations.
2615 Overlapping operations would otherwise be recognized by the S/390 hardware
2616 and would fall back to a slower implementation. Allowing overlapping
2617 operations would lead to slow code but not to wrong code. Therefore we are
2618 somewhat optimistic if we cannot prove that the memory blocks are
2619 overlapping.
2620 That's why we return false here although this may accept operations on
2621 overlapping memory areas. */
2622 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2623 return false;
2625 delta = INTVAL (addr_delta);
2627 if (delta == 0
2628 || (delta > 0 && delta < size)
2629 || (delta < 0 && -delta < size))
2630 return true;
2632 return false;
2635 /* Check whether the address of memory reference MEM2 equals exactly
2636 the address of memory reference MEM1 plus DELTA. Return true if
2637 we can prove this to be the case, false otherwise. */
2639 bool
2640 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2642 rtx addr1, addr2, addr_delta;
2644 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2645 return false;
2647 addr1 = XEXP (mem1, 0);
2648 addr2 = XEXP (mem2, 0);
2650 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2651 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2652 return false;
2654 return true;
2657 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2659 void
2660 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2661 rtx *operands)
2663 machine_mode wmode = mode;
2664 rtx dst = operands[0];
2665 rtx src1 = operands[1];
2666 rtx src2 = operands[2];
2667 rtx op, clob, tem;
2669 /* If we cannot handle the operation directly, use a temp register. */
2670 if (!s390_logical_operator_ok_p (operands))
2671 dst = gen_reg_rtx (mode);
2673 /* QImode and HImode patterns make sense only if we have a destination
2674 in memory. Otherwise perform the operation in SImode. */
2675 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2676 wmode = SImode;
2678 /* Widen operands if required. */
2679 if (mode != wmode)
2681 if (GET_CODE (dst) == SUBREG
2682 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2683 dst = tem;
2684 else if (REG_P (dst))
2685 dst = gen_rtx_SUBREG (wmode, dst, 0);
2686 else
2687 dst = gen_reg_rtx (wmode);
2689 if (GET_CODE (src1) == SUBREG
2690 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2691 src1 = tem;
2692 else if (GET_MODE (src1) != VOIDmode)
2693 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2695 if (GET_CODE (src2) == SUBREG
2696 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2697 src2 = tem;
2698 else if (GET_MODE (src2) != VOIDmode)
2699 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2702 /* Emit the instruction. */
2703 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2704 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2705 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2707 /* Fix up the destination if needed. */
2708 if (dst != operands[0])
2709 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2712 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2714 bool
2715 s390_logical_operator_ok_p (rtx *operands)
2717 /* If the destination operand is in memory, it needs to coincide
2718 with one of the source operands. After reload, it has to be
2719 the first source operand. */
2720 if (GET_CODE (operands[0]) == MEM)
2721 return rtx_equal_p (operands[0], operands[1])
2722 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2724 return true;
2727 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2728 operand IMMOP to switch from SS to SI type instructions. */
2730 void
2731 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2733 int def = code == AND ? -1 : 0;
2734 HOST_WIDE_INT mask;
2735 int part;
2737 gcc_assert (GET_CODE (*memop) == MEM);
2738 gcc_assert (!MEM_VOLATILE_P (*memop));
2740 mask = s390_extract_part (*immop, QImode, def);
2741 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2742 gcc_assert (part >= 0);
2744 *memop = adjust_address (*memop, QImode, part);
2745 *immop = gen_int_mode (mask, QImode);
2749 /* How to allocate a 'struct machine_function'. */
2751 static struct machine_function *
2752 s390_init_machine_status (void)
2754 return ggc_cleared_alloc<machine_function> ();
2757 /* Map for smallest class containing reg regno. */
2759 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2760 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2761 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2762 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2763 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2764 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2765 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2766 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2767 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2768 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2769 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2770 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2771 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2772 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2773 VEC_REGS, VEC_REGS /* 52 */
2776 /* Return attribute type of insn. */
2778 static enum attr_type
2779 s390_safe_attr_type (rtx_insn *insn)
2781 if (recog_memoized (insn) >= 0)
2782 return get_attr_type (insn);
2783 else
2784 return TYPE_NONE;
2787 /* Return attribute relative_long of insn. */
2789 static bool
2790 s390_safe_relative_long_p (rtx_insn *insn)
2792 if (recog_memoized (insn) >= 0)
2793 return get_attr_relative_long (insn) == RELATIVE_LONG_YES;
2794 else
2795 return false;
2798 /* Return true if DISP is a valid short displacement. */
2800 static bool
2801 s390_short_displacement (rtx disp)
2803 /* No displacement is OK. */
2804 if (!disp)
2805 return true;
2807 /* Without the long displacement facility we don't need to
2808 distingiush between long and short displacement. */
2809 if (!TARGET_LONG_DISPLACEMENT)
2810 return true;
2812 /* Integer displacement in range. */
2813 if (GET_CODE (disp) == CONST_INT)
2814 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2816 /* GOT offset is not OK, the GOT can be large. */
2817 if (GET_CODE (disp) == CONST
2818 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2819 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2820 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2821 return false;
2823 /* All other symbolic constants are literal pool references,
2824 which are OK as the literal pool must be small. */
2825 if (GET_CODE (disp) == CONST)
2826 return true;
2828 return false;
2831 /* Attempts to split `ref', which should be UNSPEC_LTREF, into (base + `disp').
2832 If successful, also determines the
2833 following characteristics of `ref': `is_ptr' - whether it can be an
2834 LA argument, `is_base_ptr' - whether the resulting base is a well-known
2835 base register (stack/frame pointer, etc), `is_pool_ptr` - whether it is
2836 considered a literal pool pointer for purposes of avoiding two different
2837 literal pool pointers per insn during or after reload (`B' constraint). */
2838 static bool
2839 s390_decompose_constant_pool_ref (rtx *ref, rtx *disp, bool *is_ptr,
2840 bool *is_base_ptr, bool *is_pool_ptr)
2842 if (!*ref)
2843 return true;
2845 if (GET_CODE (*ref) == UNSPEC)
2846 switch (XINT (*ref, 1))
2848 case UNSPEC_LTREF:
2849 if (!*disp)
2850 *disp = gen_rtx_UNSPEC (Pmode,
2851 gen_rtvec (1, XVECEXP (*ref, 0, 0)),
2852 UNSPEC_LTREL_OFFSET);
2853 else
2854 return false;
2856 *ref = XVECEXP (*ref, 0, 1);
2857 break;
2859 default:
2860 return false;
2863 if (!REG_P (*ref) || GET_MODE (*ref) != Pmode)
2864 return false;
2866 if (REGNO (*ref) == STACK_POINTER_REGNUM
2867 || REGNO (*ref) == FRAME_POINTER_REGNUM
2868 || ((reload_completed || reload_in_progress)
2869 && frame_pointer_needed
2870 && REGNO (*ref) == HARD_FRAME_POINTER_REGNUM)
2871 || REGNO (*ref) == ARG_POINTER_REGNUM
2872 || (flag_pic
2873 && REGNO (*ref) == PIC_OFFSET_TABLE_REGNUM))
2874 *is_ptr = *is_base_ptr = true;
2876 if ((reload_completed || reload_in_progress)
2877 && *ref == cfun->machine->base_reg)
2878 *is_ptr = *is_base_ptr = *is_pool_ptr = true;
2880 return true;
2883 /* Decompose a RTL expression ADDR for a memory address into
2884 its components, returned in OUT.
2886 Returns false if ADDR is not a valid memory address, true
2887 otherwise. If OUT is NULL, don't return the components,
2888 but check for validity only.
2890 Note: Only addresses in canonical form are recognized.
2891 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2892 canonical form so that they will be recognized. */
2894 static int
2895 s390_decompose_address (rtx addr, struct s390_address *out)
2897 HOST_WIDE_INT offset = 0;
2898 rtx base = NULL_RTX;
2899 rtx indx = NULL_RTX;
2900 rtx disp = NULL_RTX;
2901 rtx orig_disp;
2902 bool pointer = false;
2903 bool base_ptr = false;
2904 bool indx_ptr = false;
2905 bool literal_pool = false;
2907 /* We may need to substitute the literal pool base register into the address
2908 below. However, at this point we do not know which register is going to
2909 be used as base, so we substitute the arg pointer register. This is going
2910 to be treated as holding a pointer below -- it shouldn't be used for any
2911 other purpose. */
2912 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2914 /* Decompose address into base + index + displacement. */
2916 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2917 base = addr;
2919 else if (GET_CODE (addr) == PLUS)
2921 rtx op0 = XEXP (addr, 0);
2922 rtx op1 = XEXP (addr, 1);
2923 enum rtx_code code0 = GET_CODE (op0);
2924 enum rtx_code code1 = GET_CODE (op1);
2926 if (code0 == REG || code0 == UNSPEC)
2928 if (code1 == REG || code1 == UNSPEC)
2930 indx = op0; /* index + base */
2931 base = op1;
2934 else
2936 base = op0; /* base + displacement */
2937 disp = op1;
2941 else if (code0 == PLUS)
2943 indx = XEXP (op0, 0); /* index + base + disp */
2944 base = XEXP (op0, 1);
2945 disp = op1;
2948 else
2950 return false;
2954 else
2955 disp = addr; /* displacement */
2957 /* Extract integer part of displacement. */
2958 orig_disp = disp;
2959 if (disp)
2961 if (GET_CODE (disp) == CONST_INT)
2963 offset = INTVAL (disp);
2964 disp = NULL_RTX;
2966 else if (GET_CODE (disp) == CONST
2967 && GET_CODE (XEXP (disp, 0)) == PLUS
2968 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2970 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2971 disp = XEXP (XEXP (disp, 0), 0);
2975 /* Strip off CONST here to avoid special case tests later. */
2976 if (disp && GET_CODE (disp) == CONST)
2977 disp = XEXP (disp, 0);
2979 /* We can convert literal pool addresses to
2980 displacements by basing them off the base register. */
2981 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2983 if (base || indx)
2984 return false;
2986 base = fake_pool_base, literal_pool = true;
2988 /* Mark up the displacement. */
2989 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2990 UNSPEC_LTREL_OFFSET);
2993 /* Validate base register. */
2994 if (!s390_decompose_constant_pool_ref (&base, &disp, &pointer, &base_ptr,
2995 &literal_pool))
2996 return false;
2998 /* Validate index register. */
2999 if (!s390_decompose_constant_pool_ref (&indx, &disp, &pointer, &indx_ptr,
3000 &literal_pool))
3001 return false;
3003 /* Prefer to use pointer as base, not index. */
3004 if (base && indx && !base_ptr
3005 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
3007 rtx tmp = base;
3008 base = indx;
3009 indx = tmp;
3012 /* Validate displacement. */
3013 if (!disp)
3015 /* If virtual registers are involved, the displacement will change later
3016 anyway as the virtual registers get eliminated. This could make a
3017 valid displacement invalid, but it is more likely to make an invalid
3018 displacement valid, because we sometimes access the register save area
3019 via negative offsets to one of those registers.
3020 Thus we don't check the displacement for validity here. If after
3021 elimination the displacement turns out to be invalid after all,
3022 this is fixed up by reload in any case. */
3023 /* LRA maintains always displacements up to date and we need to
3024 know the displacement is right during all LRA not only at the
3025 final elimination. */
3026 if (lra_in_progress
3027 || (base != arg_pointer_rtx
3028 && indx != arg_pointer_rtx
3029 && base != return_address_pointer_rtx
3030 && indx != return_address_pointer_rtx
3031 && base != frame_pointer_rtx
3032 && indx != frame_pointer_rtx
3033 && base != virtual_stack_vars_rtx
3034 && indx != virtual_stack_vars_rtx))
3035 if (!DISP_IN_RANGE (offset))
3036 return false;
3038 else
3040 /* All the special cases are pointers. */
3041 pointer = true;
3043 /* In the small-PIC case, the linker converts @GOT
3044 and @GOTNTPOFF offsets to possible displacements. */
3045 if (GET_CODE (disp) == UNSPEC
3046 && (XINT (disp, 1) == UNSPEC_GOT
3047 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
3048 && flag_pic == 1)
3053 /* Accept pool label offsets. */
3054 else if (GET_CODE (disp) == UNSPEC
3055 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
3058 /* Accept literal pool references. */
3059 else if (GET_CODE (disp) == UNSPEC
3060 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
3062 /* In case CSE pulled a non literal pool reference out of
3063 the pool we have to reject the address. This is
3064 especially important when loading the GOT pointer on non
3065 zarch CPUs. In this case the literal pool contains an lt
3066 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3067 will most likely exceed the displacement. */
3068 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3069 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
3070 return false;
3072 orig_disp = gen_rtx_CONST (Pmode, disp);
3073 if (offset)
3075 /* If we have an offset, make sure it does not
3076 exceed the size of the constant pool entry.
3077 Otherwise we might generate an out-of-range
3078 displacement for the base register form. */
3079 rtx sym = XVECEXP (disp, 0, 0);
3080 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
3081 return false;
3083 orig_disp = plus_constant (Pmode, orig_disp, offset);
3087 else
3088 return false;
3091 if (!base && !indx)
3092 pointer = true;
3094 if (out)
3096 out->base = base;
3097 out->indx = indx;
3098 out->disp = orig_disp;
3099 out->pointer = pointer;
3100 out->literal_pool = literal_pool;
3103 return true;
3106 /* Decompose a RTL expression OP for an address style operand into its
3107 components, and return the base register in BASE and the offset in
3108 OFFSET. While OP looks like an address it is never supposed to be
3109 used as such.
3111 Return true if OP is a valid address operand, false if not. */
3113 bool
3114 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
3115 HOST_WIDE_INT *offset)
3117 rtx off = NULL_RTX;
3119 /* We can have an integer constant, an address register,
3120 or a sum of the two. */
3121 if (CONST_SCALAR_INT_P (op))
3123 off = op;
3124 op = NULL_RTX;
3126 if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3128 off = XEXP (op, 1);
3129 op = XEXP (op, 0);
3131 while (op && GET_CODE (op) == SUBREG)
3132 op = SUBREG_REG (op);
3134 if (op && GET_CODE (op) != REG)
3135 return false;
3137 if (offset)
3139 if (off == NULL_RTX)
3140 *offset = 0;
3141 else if (CONST_INT_P (off))
3142 *offset = INTVAL (off);
3143 else if (CONST_WIDE_INT_P (off))
3144 /* The offset will anyway be cut down to 12 bits so take just
3145 the lowest order chunk of the wide int. */
3146 *offset = CONST_WIDE_INT_ELT (off, 0);
3147 else
3148 gcc_unreachable ();
3150 if (base)
3151 *base = op;
3153 return true;
3156 /* Check that OP is a valid shift count operand.
3157 It should be of the following structure:
3158 (subreg (and (plus (reg imm_op)) 2^k-1) 7)
3159 where subreg, and and plus are optional.
3161 If IMPLICIT_MASK is > 0 and OP contains and
3162 (AND ... immediate)
3163 it is checked whether IMPLICIT_MASK and the immediate match.
3164 Otherwise, no checking is performed.
3166 bool
3167 s390_valid_shift_count (rtx op, HOST_WIDE_INT implicit_mask)
3169 /* Strip subreg. */
3170 while (GET_CODE (op) == SUBREG && subreg_lowpart_p (op))
3171 op = XEXP (op, 0);
3173 /* Check for an and with proper constant. */
3174 if (GET_CODE (op) == AND)
3176 rtx op1 = XEXP (op, 0);
3177 rtx imm = XEXP (op, 1);
3179 if (GET_CODE (op1) == SUBREG && subreg_lowpart_p (op1))
3180 op1 = XEXP (op1, 0);
3182 if (!(register_operand (op1, GET_MODE (op1)) || GET_CODE (op1) == PLUS))
3183 return false;
3185 if (!immediate_operand (imm, GET_MODE (imm)))
3186 return false;
3188 HOST_WIDE_INT val = INTVAL (imm);
3189 if (implicit_mask > 0
3190 && (val & implicit_mask) != implicit_mask)
3191 return false;
3193 op = op1;
3196 /* Check the rest. */
3197 return s390_decompose_addrstyle_without_index (op, NULL, NULL);
3200 /* Return true if CODE is a valid address without index. */
3202 bool
3203 s390_legitimate_address_without_index_p (rtx op)
3205 struct s390_address addr;
3207 if (!s390_decompose_address (XEXP (op, 0), &addr))
3208 return false;
3209 if (addr.indx)
3210 return false;
3212 return true;
3216 /* Return TRUE if ADDR is an operand valid for a load/store relative
3217 instruction. Be aware that the alignment of the operand needs to
3218 be checked separately.
3219 Valid addresses are single references or a sum of a reference and a
3220 constant integer. Return these parts in SYMREF and ADDEND. You can
3221 pass NULL in REF and/or ADDEND if you are not interested in these
3222 values. */
3224 static bool
3225 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3227 HOST_WIDE_INT tmpaddend = 0;
3229 if (GET_CODE (addr) == CONST)
3230 addr = XEXP (addr, 0);
3232 if (GET_CODE (addr) == PLUS)
3234 if (!CONST_INT_P (XEXP (addr, 1)))
3235 return false;
3237 tmpaddend = INTVAL (XEXP (addr, 1));
3238 addr = XEXP (addr, 0);
3241 if (GET_CODE (addr) == SYMBOL_REF
3242 || (GET_CODE (addr) == UNSPEC
3243 && (XINT (addr, 1) == UNSPEC_GOTENT
3244 || XINT (addr, 1) == UNSPEC_PLT)))
3246 if (symref)
3247 *symref = addr;
3248 if (addend)
3249 *addend = tmpaddend;
3251 return true;
3253 return false;
3256 /* Return true if the address in OP is valid for constraint letter C
3257 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3258 pool MEMs should be accepted. Only the Q, R, S, T constraint
3259 letters are allowed for C. */
3261 static int
3262 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3264 rtx symref;
3265 struct s390_address addr;
3266 bool decomposed = false;
3268 if (!address_operand (op, GET_MODE (op)))
3269 return 0;
3271 /* This check makes sure that no symbolic address (except literal
3272 pool references) are accepted by the R or T constraints. */
3273 if (s390_loadrelative_operand_p (op, &symref, NULL)
3274 && (!lit_pool_ok
3275 || !SYMBOL_REF_P (symref)
3276 || !CONSTANT_POOL_ADDRESS_P (symref)))
3277 return 0;
3279 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3280 if (!lit_pool_ok)
3282 if (!s390_decompose_address (op, &addr))
3283 return 0;
3284 if (addr.literal_pool)
3285 return 0;
3286 decomposed = true;
3289 /* With reload, we sometimes get intermediate address forms that are
3290 actually invalid as-is, but we need to accept them in the most
3291 generic cases below ('R' or 'T'), since reload will in fact fix
3292 them up. LRA behaves differently here; we never see such forms,
3293 but on the other hand, we need to strictly reject every invalid
3294 address form. After both reload and LRA invalid address forms
3295 must be rejected, because nothing will fix them up later. Perform
3296 this check right up front. */
3297 if (lra_in_progress || reload_completed)
3299 if (!decomposed && !s390_decompose_address (op, &addr))
3300 return 0;
3301 decomposed = true;
3304 switch (c)
3306 case 'Q': /* no index short displacement */
3307 if (!decomposed && !s390_decompose_address (op, &addr))
3308 return 0;
3309 if (addr.indx)
3310 return 0;
3311 if (!s390_short_displacement (addr.disp))
3312 return 0;
3313 break;
3315 case 'R': /* with index short displacement */
3316 if (TARGET_LONG_DISPLACEMENT)
3318 if (!decomposed && !s390_decompose_address (op, &addr))
3319 return 0;
3320 if (!s390_short_displacement (addr.disp))
3321 return 0;
3323 /* Any invalid address here will be fixed up by reload,
3324 so accept it for the most generic constraint. */
3325 break;
3327 case 'S': /* no index long displacement */
3328 if (!decomposed && !s390_decompose_address (op, &addr))
3329 return 0;
3330 if (addr.indx)
3331 return 0;
3332 break;
3334 case 'T': /* with index long displacement */
3335 /* Any invalid address here will be fixed up by reload,
3336 so accept it for the most generic constraint. */
3337 break;
3339 default:
3340 return 0;
3342 return 1;
3346 /* Evaluates constraint strings described by the regular expression
3347 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3348 the constraint given in STR, or 0 else. */
3351 s390_mem_constraint (const char *str, rtx op)
3353 char c = str[0];
3355 switch (c)
3357 case 'A':
3358 /* Check for offsettable variants of memory constraints. */
3359 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3360 return 0;
3361 if ((reload_completed || reload_in_progress)
3362 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3363 return 0;
3364 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3365 case 'B':
3366 /* Check for non-literal-pool variants of memory constraints. */
3367 if (!MEM_P (op))
3368 return 0;
3369 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3370 case 'Q':
3371 case 'R':
3372 case 'S':
3373 case 'T':
3374 if (GET_CODE (op) != MEM)
3375 return 0;
3376 return s390_check_qrst_address (c, XEXP (op, 0), true);
3377 case 'Y':
3378 /* Simply check for the basic form of a shift count. Reload will
3379 take care of making sure we have a proper base register. */
3380 if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3381 return 0;
3382 break;
3383 case 'Z':
3384 return s390_check_qrst_address (str[1], op, true);
3385 default:
3386 return 0;
3388 return 1;
3392 /* Evaluates constraint strings starting with letter O. Input
3393 parameter C is the second letter following the "O" in the constraint
3394 string. Returns 1 if VALUE meets the respective constraint and 0
3395 otherwise. */
3398 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3400 if (!TARGET_EXTIMM)
3401 return 0;
3403 switch (c)
3405 case 's':
3406 return trunc_int_for_mode (value, SImode) == value;
3408 case 'p':
3409 return value == 0
3410 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3412 case 'n':
3413 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3415 default:
3416 gcc_unreachable ();
3421 /* Evaluates constraint strings starting with letter N. Parameter STR
3422 contains the letters following letter "N" in the constraint string.
3423 Returns true if VALUE matches the constraint. */
3426 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3428 machine_mode mode, part_mode;
3429 int def;
3430 int part, part_goal;
3433 if (str[0] == 'x')
3434 part_goal = -1;
3435 else
3436 part_goal = str[0] - '0';
3438 switch (str[1])
3440 case 'Q':
3441 part_mode = QImode;
3442 break;
3443 case 'H':
3444 part_mode = HImode;
3445 break;
3446 case 'S':
3447 part_mode = SImode;
3448 break;
3449 default:
3450 return 0;
3453 switch (str[2])
3455 case 'H':
3456 mode = HImode;
3457 break;
3458 case 'S':
3459 mode = SImode;
3460 break;
3461 case 'D':
3462 mode = DImode;
3463 break;
3464 default:
3465 return 0;
3468 switch (str[3])
3470 case '0':
3471 def = 0;
3472 break;
3473 case 'F':
3474 def = -1;
3475 break;
3476 default:
3477 return 0;
3480 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3481 return 0;
3483 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3484 if (part < 0)
3485 return 0;
3486 if (part_goal != -1 && part_goal != part)
3487 return 0;
3489 return 1;
3493 /* Returns true if the input parameter VALUE is a float zero. */
3496 s390_float_const_zero_p (rtx value)
3498 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3499 && value == CONST0_RTX (GET_MODE (value)));
3502 /* Implement TARGET_REGISTER_MOVE_COST. */
3504 static int
3505 s390_register_move_cost (machine_mode mode,
3506 reg_class_t from, reg_class_t to)
3508 /* On s390, copy between fprs and gprs is expensive. */
3510 /* It becomes somewhat faster having ldgr/lgdr. */
3511 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3513 /* ldgr is single cycle. */
3514 if (reg_classes_intersect_p (from, GENERAL_REGS)
3515 && reg_classes_intersect_p (to, FP_REGS))
3516 return 1;
3517 /* lgdr needs 3 cycles. */
3518 if (reg_classes_intersect_p (to, GENERAL_REGS)
3519 && reg_classes_intersect_p (from, FP_REGS))
3520 return 3;
3523 /* Otherwise copying is done via memory. */
3524 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3525 && reg_classes_intersect_p (to, FP_REGS))
3526 || (reg_classes_intersect_p (from, FP_REGS)
3527 && reg_classes_intersect_p (to, GENERAL_REGS)))
3528 return 10;
3530 /* We usually do not want to copy via CC. */
3531 if (reg_classes_intersect_p (from, CC_REGS)
3532 || reg_classes_intersect_p (to, CC_REGS))
3533 return 5;
3535 return 1;
3538 /* Implement TARGET_MEMORY_MOVE_COST. */
3540 static int
3541 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3542 reg_class_t rclass ATTRIBUTE_UNUSED,
3543 bool in ATTRIBUTE_UNUSED)
3545 return 2;
3548 /* Compute a (partial) cost for rtx X. Return true if the complete
3549 cost has been computed, and false if subexpressions should be
3550 scanned. In either case, *TOTAL contains the cost result. The
3551 initial value of *TOTAL is the default value computed by
3552 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3553 code of the superexpression of x. */
3555 static bool
3556 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3557 int opno ATTRIBUTE_UNUSED,
3558 int *total, bool speed ATTRIBUTE_UNUSED)
3560 int code = GET_CODE (x);
3561 switch (code)
3563 case CONST:
3564 case CONST_INT:
3565 case LABEL_REF:
3566 case SYMBOL_REF:
3567 case CONST_DOUBLE:
3568 case CONST_WIDE_INT:
3569 case MEM:
3570 *total = 0;
3571 return true;
3573 case SET:
3575 /* Without this a conditional move instruction would be
3576 accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3577 comparison operator). That's a bit pessimistic. */
3579 if (!TARGET_Z196 || GET_CODE (SET_SRC (x)) != IF_THEN_ELSE)
3580 return false;
3582 rtx cond = XEXP (SET_SRC (x), 0);
3584 if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1)))
3585 return false;
3587 /* It is going to be a load/store on condition. Make it
3588 slightly more expensive than a normal load. */
3589 *total = COSTS_N_INSNS (1) + 1;
3591 rtx dst = SET_DEST (x);
3592 rtx then = XEXP (SET_SRC (x), 1);
3593 rtx els = XEXP (SET_SRC (x), 2);
3595 /* It is a real IF-THEN-ELSE. An additional move will be
3596 needed to implement that. */
3597 if (!TARGET_Z15
3598 && reload_completed
3599 && !rtx_equal_p (dst, then)
3600 && !rtx_equal_p (dst, els))
3601 *total += COSTS_N_INSNS (1) / 2;
3603 /* A minor penalty for constants we cannot directly handle. */
3604 if ((CONST_INT_P (then) || CONST_INT_P (els))
3605 && (!TARGET_Z13 || MEM_P (dst)
3606 || (CONST_INT_P (then) && !satisfies_constraint_K (then))
3607 || (CONST_INT_P (els) && !satisfies_constraint_K (els))))
3608 *total += COSTS_N_INSNS (1) / 2;
3610 /* A store on condition can only handle register src operands. */
3611 if (MEM_P (dst) && (!REG_P (then) || !REG_P (els)))
3612 *total += COSTS_N_INSNS (1) / 2;
3614 return true;
3616 case IOR:
3618 /* nnrk, nngrk */
3619 if (TARGET_Z15
3620 && (mode == SImode || mode == DImode)
3621 && GET_CODE (XEXP (x, 0)) == NOT
3622 && GET_CODE (XEXP (x, 1)) == NOT)
3624 *total = COSTS_N_INSNS (1);
3625 if (!REG_P (XEXP (XEXP (x, 0), 0)))
3626 *total += 1;
3627 if (!REG_P (XEXP (XEXP (x, 1), 0)))
3628 *total += 1;
3629 return true;
3632 /* risbg */
3633 if (GET_CODE (XEXP (x, 0)) == AND
3634 && GET_CODE (XEXP (x, 1)) == ASHIFT
3635 && REG_P (XEXP (XEXP (x, 0), 0))
3636 && REG_P (XEXP (XEXP (x, 1), 0))
3637 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3638 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3639 && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3640 (HOST_WIDE_INT_1U << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3642 *total = COSTS_N_INSNS (2);
3643 return true;
3646 /* ~AND on a 128 bit mode. This can be done using a vector
3647 instruction. */
3648 if (TARGET_VXE
3649 && GET_CODE (XEXP (x, 0)) == NOT
3650 && GET_CODE (XEXP (x, 1)) == NOT
3651 && REG_P (XEXP (XEXP (x, 0), 0))
3652 && REG_P (XEXP (XEXP (x, 1), 0))
3653 && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 0), 0))) == 16
3654 && s390_hard_regno_mode_ok (VR0_REGNUM,
3655 GET_MODE (XEXP (XEXP (x, 0), 0))))
3657 *total = COSTS_N_INSNS (1);
3658 return true;
3661 *total = COSTS_N_INSNS (1);
3662 return false;
3664 case AND:
3665 /* nork, nogrk */
3666 if (TARGET_Z15
3667 && (mode == SImode || mode == DImode)
3668 && GET_CODE (XEXP (x, 0)) == NOT
3669 && GET_CODE (XEXP (x, 1)) == NOT)
3671 *total = COSTS_N_INSNS (1);
3672 if (!REG_P (XEXP (XEXP (x, 0), 0)))
3673 *total += 1;
3674 if (!REG_P (XEXP (XEXP (x, 1), 0)))
3675 *total += 1;
3676 return true;
3678 /* fallthrough */
3679 case ASHIFT:
3680 case ASHIFTRT:
3681 case LSHIFTRT:
3682 case ROTATE:
3683 case ROTATERT:
3684 case XOR:
3685 case NEG:
3686 case NOT:
3687 case PLUS:
3688 case MINUS:
3689 *total = COSTS_N_INSNS (1);
3690 return false;
3692 case MULT:
3693 switch (mode)
3695 case E_SImode:
3697 rtx left = XEXP (x, 0);
3698 rtx right = XEXP (x, 1);
3699 if (GET_CODE (right) == CONST_INT
3700 && CONST_OK_FOR_K (INTVAL (right)))
3701 *total = s390_cost->mhi;
3702 else if (GET_CODE (left) == SIGN_EXTEND)
3703 *total = s390_cost->mh;
3704 else
3705 *total = s390_cost->ms; /* msr, ms, msy */
3706 break;
3708 case E_DImode:
3710 rtx left = XEXP (x, 0);
3711 rtx right = XEXP (x, 1);
3712 if (TARGET_ZARCH)
3714 if (GET_CODE (right) == CONST_INT
3715 && CONST_OK_FOR_K (INTVAL (right)))
3716 *total = s390_cost->mghi;
3717 else if (GET_CODE (left) == SIGN_EXTEND)
3718 *total = s390_cost->msgf;
3719 else
3720 *total = s390_cost->msg; /* msgr, msg */
3722 else /* TARGET_31BIT */
3724 if (GET_CODE (left) == SIGN_EXTEND
3725 && GET_CODE (right) == SIGN_EXTEND)
3726 /* mulsidi case: mr, m */
3727 *total = s390_cost->m;
3728 else if (GET_CODE (left) == ZERO_EXTEND
3729 && GET_CODE (right) == ZERO_EXTEND)
3730 /* umulsidi case: ml, mlr */
3731 *total = s390_cost->ml;
3732 else
3733 /* Complex calculation is required. */
3734 *total = COSTS_N_INSNS (40);
3736 break;
3738 case E_SFmode:
3739 case E_DFmode:
3740 *total = s390_cost->mult_df;
3741 break;
3742 case E_TFmode:
3743 *total = s390_cost->mxbr;
3744 break;
3745 default:
3746 return false;
3748 return false;
3750 case FMA:
3751 switch (mode)
3753 case E_DFmode:
3754 *total = s390_cost->madbr;
3755 break;
3756 case E_SFmode:
3757 *total = s390_cost->maebr;
3758 break;
3759 default:
3760 return false;
3762 /* Negate in the third argument is free: FMSUB. */
3763 if (GET_CODE (XEXP (x, 2)) == NEG)
3765 *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3766 + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3767 + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3768 return true;
3770 return false;
3772 case UDIV:
3773 case UMOD:
3774 if (mode == TImode) /* 128 bit division */
3775 *total = s390_cost->dlgr;
3776 else if (mode == DImode)
3778 rtx right = XEXP (x, 1);
3779 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3780 *total = s390_cost->dlr;
3781 else /* 64 by 64 bit division */
3782 *total = s390_cost->dlgr;
3784 else if (mode == SImode) /* 32 bit division */
3785 *total = s390_cost->dlr;
3786 return false;
3788 case DIV:
3789 case MOD:
3790 if (mode == DImode)
3792 rtx right = XEXP (x, 1);
3793 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3794 if (TARGET_ZARCH)
3795 *total = s390_cost->dsgfr;
3796 else
3797 *total = s390_cost->dr;
3798 else /* 64 by 64 bit division */
3799 *total = s390_cost->dsgr;
3801 else if (mode == SImode) /* 32 bit division */
3802 *total = s390_cost->dlr;
3803 else if (mode == SFmode)
3805 *total = s390_cost->debr;
3807 else if (mode == DFmode)
3809 *total = s390_cost->ddbr;
3811 else if (mode == TFmode)
3813 *total = s390_cost->dxbr;
3815 return false;
3817 case SQRT:
3818 if (mode == SFmode)
3819 *total = s390_cost->sqebr;
3820 else if (mode == DFmode)
3821 *total = s390_cost->sqdbr;
3822 else /* TFmode */
3823 *total = s390_cost->sqxbr;
3824 return false;
3826 case SIGN_EXTEND:
3827 case ZERO_EXTEND:
3828 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3829 || outer_code == PLUS || outer_code == MINUS
3830 || outer_code == COMPARE)
3831 *total = 0;
3832 return false;
3834 case COMPARE:
3835 *total = COSTS_N_INSNS (1);
3837 /* nxrk, nxgrk ~(a^b)==0 */
3838 if (TARGET_Z15
3839 && GET_CODE (XEXP (x, 0)) == NOT
3840 && XEXP (x, 1) == const0_rtx
3841 && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
3842 && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode)
3843 && mode == CCZmode)
3845 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0)))
3846 *total += 1;
3847 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3848 *total += 1;
3849 return true;
3852 /* nnrk, nngrk, nork, nogrk */
3853 if (TARGET_Z15
3854 && (GET_CODE (XEXP (x, 0)) == AND || GET_CODE (XEXP (x, 0)) == IOR)
3855 && XEXP (x, 1) == const0_rtx
3856 && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode)
3857 && GET_CODE (XEXP (XEXP (x, 0), 0)) == NOT
3858 && GET_CODE (XEXP (XEXP (x, 0), 1)) == NOT
3859 && mode == CCZmode)
3861 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0)))
3862 *total += 1;
3863 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 1), 0)))
3864 *total += 1;
3865 return true;
3868 if (GET_CODE (XEXP (x, 0)) == AND
3869 && GET_CODE (XEXP (x, 1)) == CONST_INT
3870 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3872 rtx op0 = XEXP (XEXP (x, 0), 0);
3873 rtx op1 = XEXP (XEXP (x, 0), 1);
3874 rtx op2 = XEXP (x, 1);
3876 if (memory_operand (op0, GET_MODE (op0))
3877 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3878 return true;
3879 if (register_operand (op0, GET_MODE (op0))
3880 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3881 return true;
3883 return false;
3885 default:
3886 return false;
3890 /* Return the cost of an address rtx ADDR. */
3892 static int
3893 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3894 addr_space_t as ATTRIBUTE_UNUSED,
3895 bool speed ATTRIBUTE_UNUSED)
3897 struct s390_address ad;
3898 if (!s390_decompose_address (addr, &ad))
3899 return 1000;
3901 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3904 /* Implement targetm.vectorize.builtin_vectorization_cost. */
3905 static int
3906 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3907 tree vectype,
3908 int misalign ATTRIBUTE_UNUSED)
3910 switch (type_of_cost)
3912 case scalar_stmt:
3913 case scalar_load:
3914 case scalar_store:
3915 case vector_stmt:
3916 case vector_load:
3917 case vector_store:
3918 case vector_gather_load:
3919 case vector_scatter_store:
3920 case vec_to_scalar:
3921 case scalar_to_vec:
3922 case cond_branch_not_taken:
3923 case vec_perm:
3924 case vec_promote_demote:
3925 case unaligned_load:
3926 case unaligned_store:
3927 return 1;
3929 case cond_branch_taken:
3930 return 3;
3932 case vec_construct:
3933 return TYPE_VECTOR_SUBPARTS (vectype) - 1;
3935 default:
3936 gcc_unreachable ();
3940 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3941 otherwise return 0. */
3944 tls_symbolic_operand (rtx op)
3946 if (GET_CODE (op) != SYMBOL_REF)
3947 return 0;
3948 return SYMBOL_REF_TLS_MODEL (op);
3951 /* Split DImode access register reference REG (on 64-bit) into its constituent
3952 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3953 gen_highpart cannot be used as they assume all registers are word-sized,
3954 while our access registers have only half that size. */
3956 void
3957 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3959 gcc_assert (TARGET_64BIT);
3960 gcc_assert (ACCESS_REG_P (reg));
3961 gcc_assert (GET_MODE (reg) == DImode);
3962 gcc_assert (!(REGNO (reg) & 1));
3964 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3965 *hi = gen_rtx_REG (SImode, REGNO (reg));
3968 /* Return true if OP contains a symbol reference */
3970 bool
3971 symbolic_reference_mentioned_p (rtx op)
3973 const char *fmt;
3974 int i;
3976 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3977 return 1;
3979 fmt = GET_RTX_FORMAT (GET_CODE (op));
3980 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3982 if (fmt[i] == 'E')
3984 int j;
3986 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3987 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3988 return 1;
3991 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3992 return 1;
3995 return 0;
3998 /* Return true if OP contains a reference to a thread-local symbol. */
4000 bool
4001 tls_symbolic_reference_mentioned_p (rtx op)
4003 const char *fmt;
4004 int i;
4006 if (GET_CODE (op) == SYMBOL_REF)
4007 return tls_symbolic_operand (op);
4009 fmt = GET_RTX_FORMAT (GET_CODE (op));
4010 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4012 if (fmt[i] == 'E')
4014 int j;
4016 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4017 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4018 return true;
4021 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
4022 return true;
4025 return false;
4029 /* Return true if OP is a legitimate general operand when
4030 generating PIC code. It is given that flag_pic is on
4031 and that OP satisfies CONSTANT_P. */
4034 legitimate_pic_operand_p (rtx op)
4036 /* Accept all non-symbolic constants. */
4037 if (!SYMBOLIC_CONST (op))
4038 return 1;
4040 /* Accept addresses that can be expressed relative to (pc). */
4041 if (larl_operand (op, VOIDmode))
4042 return 1;
4044 /* Reject everything else; must be handled
4045 via emit_symbolic_move. */
4046 return 0;
4049 /* Returns true if the constant value OP is a legitimate general operand.
4050 It is given that OP satisfies CONSTANT_P. */
4052 static bool
4053 s390_legitimate_constant_p (machine_mode mode, rtx op)
4055 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
4057 if (GET_MODE_SIZE (mode) != 16)
4058 return 0;
4060 if (!satisfies_constraint_j00 (op)
4061 && !satisfies_constraint_jm1 (op)
4062 && !satisfies_constraint_jKK (op)
4063 && !satisfies_constraint_jxx (op)
4064 && !satisfies_constraint_jyy (op))
4065 return 0;
4068 /* Accept all non-symbolic constants. */
4069 if (!SYMBOLIC_CONST (op))
4070 return 1;
4072 /* Accept immediate LARL operands. */
4073 if (larl_operand (op, mode))
4074 return 1;
4076 /* Thread-local symbols are never legal constants. This is
4077 so that emit_call knows that computing such addresses
4078 might require a function call. */
4079 if (TLS_SYMBOLIC_CONST (op))
4080 return 0;
4082 /* In the PIC case, symbolic constants must *not* be
4083 forced into the literal pool. We accept them here,
4084 so that they will be handled by emit_symbolic_move. */
4085 if (flag_pic)
4086 return 1;
4088 /* All remaining non-PIC symbolic constants are
4089 forced into the literal pool. */
4090 return 0;
4093 /* Determine if it's legal to put X into the constant pool. This
4094 is not possible if X contains the address of a symbol that is
4095 not constant (TLS) or not known at final link time (PIC). */
4097 static bool
4098 s390_cannot_force_const_mem (machine_mode mode, rtx x)
4100 switch (GET_CODE (x))
4102 case CONST_INT:
4103 case CONST_DOUBLE:
4104 case CONST_WIDE_INT:
4105 case CONST_VECTOR:
4106 /* Accept all non-symbolic constants. */
4107 return false;
4109 case LABEL_REF:
4110 /* Labels are OK iff we are non-PIC. */
4111 return flag_pic != 0;
4113 case SYMBOL_REF:
4114 /* 'Naked' TLS symbol references are never OK,
4115 non-TLS symbols are OK iff we are non-PIC. */
4116 if (tls_symbolic_operand (x))
4117 return true;
4118 else
4119 return flag_pic != 0;
4121 case CONST:
4122 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
4123 case PLUS:
4124 case MINUS:
4125 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
4126 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
4128 case UNSPEC:
4129 switch (XINT (x, 1))
4131 /* Only lt-relative or GOT-relative UNSPECs are OK. */
4132 case UNSPEC_LTREL_OFFSET:
4133 case UNSPEC_GOT:
4134 case UNSPEC_GOTOFF:
4135 case UNSPEC_PLTOFF:
4136 case UNSPEC_TLSGD:
4137 case UNSPEC_TLSLDM:
4138 case UNSPEC_NTPOFF:
4139 case UNSPEC_DTPOFF:
4140 case UNSPEC_GOTNTPOFF:
4141 case UNSPEC_INDNTPOFF:
4142 return false;
4144 /* If the literal pool shares the code section, be put
4145 execute template placeholders into the pool as well. */
4146 case UNSPEC_INSN:
4147 default:
4148 return true;
4150 break;
4152 default:
4153 gcc_unreachable ();
4157 /* Returns true if the constant value OP is a legitimate general
4158 operand during and after reload. The difference to
4159 legitimate_constant_p is that this function will not accept
4160 a constant that would need to be forced to the literal pool
4161 before it can be used as operand.
4162 This function accepts all constants which can be loaded directly
4163 into a GPR. */
4165 bool
4166 legitimate_reload_constant_p (rtx op)
4168 /* Accept la(y) operands. */
4169 if (GET_CODE (op) == CONST_INT
4170 && DISP_IN_RANGE (INTVAL (op)))
4171 return true;
4173 /* Accept l(g)hi/l(g)fi operands. */
4174 if (GET_CODE (op) == CONST_INT
4175 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
4176 return true;
4178 /* Accept lliXX operands. */
4179 if (TARGET_ZARCH
4180 && GET_CODE (op) == CONST_INT
4181 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4182 && s390_single_part (op, word_mode, HImode, 0) >= 0)
4183 return true;
4185 if (TARGET_EXTIMM
4186 && GET_CODE (op) == CONST_INT
4187 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4188 && s390_single_part (op, word_mode, SImode, 0) >= 0)
4189 return true;
4191 /* Accept larl operands. */
4192 if (larl_operand (op, VOIDmode))
4193 return true;
4195 /* Accept floating-point zero operands that fit into a single GPR. */
4196 if (GET_CODE (op) == CONST_DOUBLE
4197 && s390_float_const_zero_p (op)
4198 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
4199 return true;
4201 /* Accept double-word operands that can be split. */
4202 if (GET_CODE (op) == CONST_WIDE_INT
4203 || (GET_CODE (op) == CONST_INT
4204 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
4206 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
4207 rtx hi = operand_subword (op, 0, 0, dword_mode);
4208 rtx lo = operand_subword (op, 1, 0, dword_mode);
4209 return legitimate_reload_constant_p (hi)
4210 && legitimate_reload_constant_p (lo);
4213 /* Everything else cannot be handled without reload. */
4214 return false;
4217 /* Returns true if the constant value OP is a legitimate fp operand
4218 during and after reload.
4219 This function accepts all constants which can be loaded directly
4220 into an FPR. */
4222 static bool
4223 legitimate_reload_fp_constant_p (rtx op)
4225 /* Accept floating-point zero operands if the load zero instruction
4226 can be used. Prior to z196 the load fp zero instruction caused a
4227 performance penalty if the result is used as BFP number. */
4228 if (TARGET_Z196
4229 && GET_CODE (op) == CONST_DOUBLE
4230 && s390_float_const_zero_p (op))
4231 return true;
4233 return false;
4236 /* Returns true if the constant value OP is a legitimate vector operand
4237 during and after reload.
4238 This function accepts all constants which can be loaded directly
4239 into an VR. */
4241 static bool
4242 legitimate_reload_vector_constant_p (rtx op)
4244 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
4245 && (satisfies_constraint_j00 (op)
4246 || satisfies_constraint_jm1 (op)
4247 || satisfies_constraint_jKK (op)
4248 || satisfies_constraint_jxx (op)
4249 || satisfies_constraint_jyy (op)))
4250 return true;
4252 return false;
4255 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4256 return the class of reg to actually use. */
4258 static reg_class_t
4259 s390_preferred_reload_class (rtx op, reg_class_t rclass)
4261 switch (GET_CODE (op))
4263 /* Constants we cannot reload into general registers
4264 must be forced into the literal pool. */
4265 case CONST_VECTOR:
4266 case CONST_DOUBLE:
4267 case CONST_INT:
4268 case CONST_WIDE_INT:
4269 if (reg_class_subset_p (GENERAL_REGS, rclass)
4270 && legitimate_reload_constant_p (op))
4271 return GENERAL_REGS;
4272 else if (reg_class_subset_p (ADDR_REGS, rclass)
4273 && legitimate_reload_constant_p (op))
4274 return ADDR_REGS;
4275 else if (reg_class_subset_p (FP_REGS, rclass)
4276 && legitimate_reload_fp_constant_p (op))
4277 return FP_REGS;
4278 else if (reg_class_subset_p (VEC_REGS, rclass)
4279 && legitimate_reload_vector_constant_p (op))
4280 return VEC_REGS;
4282 return NO_REGS;
4284 /* If a symbolic constant or a PLUS is reloaded,
4285 it is most likely being used as an address, so
4286 prefer ADDR_REGS. If 'class' is not a superset
4287 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4288 case CONST:
4289 /* Symrefs cannot be pushed into the literal pool with -fPIC
4290 so we *MUST NOT* return NO_REGS for these cases
4291 (s390_cannot_force_const_mem will return true).
4293 On the other hand we MUST return NO_REGS for symrefs with
4294 invalid addend which might have been pushed to the literal
4295 pool (no -fPIC). Usually we would expect them to be
4296 handled via secondary reload but this does not happen if
4297 they are used as literal pool slot replacement in reload
4298 inheritance (see emit_input_reload_insns). */
4299 if (GET_CODE (XEXP (op, 0)) == PLUS
4300 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4301 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4303 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4304 return ADDR_REGS;
4305 else
4306 return NO_REGS;
4308 /* fallthrough */
4309 case LABEL_REF:
4310 case SYMBOL_REF:
4311 if (!legitimate_reload_constant_p (op))
4312 return NO_REGS;
4313 /* fallthrough */
4314 case PLUS:
4315 /* load address will be used. */
4316 if (reg_class_subset_p (ADDR_REGS, rclass))
4317 return ADDR_REGS;
4318 else
4319 return NO_REGS;
4321 default:
4322 break;
4325 return rclass;
4328 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4329 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4330 aligned. */
4332 bool
4333 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4335 HOST_WIDE_INT addend;
4336 rtx symref;
4338 /* The "required alignment" might be 0 (e.g. for certain structs
4339 accessed via BLKmode). Early abort in this case, as well as when
4340 an alignment > 8 is required. */
4341 if (alignment < 2 || alignment > 8)
4342 return false;
4344 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4345 return false;
4347 if (addend & (alignment - 1))
4348 return false;
4350 if (GET_CODE (symref) == SYMBOL_REF)
4352 /* s390_encode_section_info is not called for anchors, since they don't
4353 have corresponding VAR_DECLs. Therefore, we cannot rely on
4354 SYMBOL_FLAG_NOTALIGN{2,4,8}_P returning useful information. */
4355 if (SYMBOL_REF_ANCHOR_P (symref))
4357 HOST_WIDE_INT block_offset = SYMBOL_REF_BLOCK_OFFSET (symref);
4358 unsigned int block_alignment = (SYMBOL_REF_BLOCK (symref)->alignment
4359 / BITS_PER_UNIT);
4361 gcc_assert (block_offset >= 0);
4362 return ((block_offset & (alignment - 1)) == 0
4363 && block_alignment >= alignment);
4366 /* We have load-relative instructions for 2-byte, 4-byte, and
4367 8-byte alignment so allow only these. */
4368 switch (alignment)
4370 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4371 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4372 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4373 default: return false;
4377 if (GET_CODE (symref) == UNSPEC
4378 && alignment <= UNITS_PER_LONG)
4379 return true;
4381 return false;
4384 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4385 operand SCRATCH is used to reload the even part of the address and
4386 adding one. */
4388 void
4389 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4391 HOST_WIDE_INT addend;
4392 rtx symref;
4394 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4395 gcc_unreachable ();
4397 if (!(addend & 1))
4398 /* Easy case. The addend is even so larl will do fine. */
4399 emit_move_insn (reg, addr);
4400 else
4402 /* We can leave the scratch register untouched if the target
4403 register is a valid base register. */
4404 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4405 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4406 scratch = reg;
4408 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4409 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4411 if (addend != 1)
4412 emit_move_insn (scratch,
4413 gen_rtx_CONST (Pmode,
4414 gen_rtx_PLUS (Pmode, symref,
4415 GEN_INT (addend - 1))));
4416 else
4417 emit_move_insn (scratch, symref);
4419 /* Increment the address using la in order to avoid clobbering cc. */
4420 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4424 /* Generate what is necessary to move between REG and MEM using
4425 SCRATCH. The direction is given by TOMEM. */
4427 void
4428 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4430 /* Reload might have pulled a constant out of the literal pool.
4431 Force it back in. */
4432 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4433 || GET_CODE (mem) == CONST_WIDE_INT
4434 || GET_CODE (mem) == CONST_VECTOR
4435 || GET_CODE (mem) == CONST)
4436 mem = force_const_mem (GET_MODE (reg), mem);
4438 gcc_assert (MEM_P (mem));
4440 /* For a load from memory we can leave the scratch register
4441 untouched if the target register is a valid base register. */
4442 if (!tomem
4443 && REGNO (reg) < FIRST_PSEUDO_REGISTER
4444 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4445 && GET_MODE (reg) == GET_MODE (scratch))
4446 scratch = reg;
4448 /* Load address into scratch register. Since we can't have a
4449 secondary reload for a secondary reload we have to cover the case
4450 where larl would need a secondary reload here as well. */
4451 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4453 /* Now we can use a standard load/store to do the move. */
4454 if (tomem)
4455 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4456 else
4457 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4460 /* Inform reload about cases where moving X with a mode MODE to a register in
4461 RCLASS requires an extra scratch or immediate register. Return the class
4462 needed for the immediate register. */
4464 static reg_class_t
4465 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4466 machine_mode mode, secondary_reload_info *sri)
4468 enum reg_class rclass = (enum reg_class) rclass_i;
4470 /* Intermediate register needed. */
4471 if (reg_classes_intersect_p (CC_REGS, rclass))
4472 return GENERAL_REGS;
4474 if (TARGET_VX)
4476 /* The vst/vl vector move instructions allow only for short
4477 displacements. */
4478 if (MEM_P (x)
4479 && GET_CODE (XEXP (x, 0)) == PLUS
4480 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4481 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4482 && reg_class_subset_p (rclass, VEC_REGS)
4483 && (!reg_class_subset_p (rclass, FP_REGS)
4484 || (GET_MODE_SIZE (mode) > 8
4485 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4487 if (in_p)
4488 sri->icode = (TARGET_64BIT ?
4489 CODE_FOR_reloaddi_la_in :
4490 CODE_FOR_reloadsi_la_in);
4491 else
4492 sri->icode = (TARGET_64BIT ?
4493 CODE_FOR_reloaddi_la_out :
4494 CODE_FOR_reloadsi_la_out);
4498 if (TARGET_Z10)
4500 HOST_WIDE_INT offset;
4501 rtx symref;
4503 /* On z10 several optimizer steps may generate larl operands with
4504 an odd addend. */
4505 if (in_p
4506 && s390_loadrelative_operand_p (x, &symref, &offset)
4507 && mode == Pmode
4508 && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4509 && (offset & 1) == 1)
4510 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4511 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4513 /* Handle all the (mem (symref)) accesses we cannot use the z10
4514 instructions for. */
4515 if (MEM_P (x)
4516 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4517 && (mode == QImode
4518 || !reg_class_subset_p (rclass, GENERAL_REGS)
4519 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4520 || !s390_check_symref_alignment (XEXP (x, 0),
4521 GET_MODE_SIZE (mode))))
4523 #define __SECONDARY_RELOAD_CASE(M,m) \
4524 case E_##M##mode: \
4525 if (TARGET_64BIT) \
4526 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4527 CODE_FOR_reload##m##di_tomem_z10; \
4528 else \
4529 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4530 CODE_FOR_reload##m##si_tomem_z10; \
4531 break;
4533 switch (GET_MODE (x))
4535 __SECONDARY_RELOAD_CASE (QI, qi);
4536 __SECONDARY_RELOAD_CASE (HI, hi);
4537 __SECONDARY_RELOAD_CASE (SI, si);
4538 __SECONDARY_RELOAD_CASE (DI, di);
4539 __SECONDARY_RELOAD_CASE (TI, ti);
4540 __SECONDARY_RELOAD_CASE (SF, sf);
4541 __SECONDARY_RELOAD_CASE (DF, df);
4542 __SECONDARY_RELOAD_CASE (TF, tf);
4543 __SECONDARY_RELOAD_CASE (SD, sd);
4544 __SECONDARY_RELOAD_CASE (DD, dd);
4545 __SECONDARY_RELOAD_CASE (TD, td);
4546 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4547 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4548 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4549 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4550 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4551 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4552 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4553 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4554 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4555 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4556 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4557 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4558 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4559 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4560 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4561 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4562 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4563 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4564 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4565 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4566 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4567 default:
4568 gcc_unreachable ();
4570 #undef __SECONDARY_RELOAD_CASE
4574 /* We need a scratch register when loading a PLUS expression which
4575 is not a legitimate operand of the LOAD ADDRESS instruction. */
4576 /* LRA can deal with transformation of plus op very well -- so we
4577 don't need to prompt LRA in this case. */
4578 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4579 sri->icode = (TARGET_64BIT ?
4580 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4582 /* Performing a multiword move from or to memory we have to make sure the
4583 second chunk in memory is addressable without causing a displacement
4584 overflow. If that would be the case we calculate the address in
4585 a scratch register. */
4586 if (MEM_P (x)
4587 && GET_CODE (XEXP (x, 0)) == PLUS
4588 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4589 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4590 + GET_MODE_SIZE (mode) - 1))
4592 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4593 in a s_operand address since we may fallback to lm/stm. So we only
4594 have to care about overflows in the b+i+d case. */
4595 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4596 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4597 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4598 /* For FP_REGS no lm/stm is available so this check is triggered
4599 for displacement overflows in b+i+d and b+d like addresses. */
4600 || (reg_classes_intersect_p (FP_REGS, rclass)
4601 && s390_class_max_nregs (FP_REGS, mode) > 1))
4603 if (in_p)
4604 sri->icode = (TARGET_64BIT ?
4605 CODE_FOR_reloaddi_la_in :
4606 CODE_FOR_reloadsi_la_in);
4607 else
4608 sri->icode = (TARGET_64BIT ?
4609 CODE_FOR_reloaddi_la_out :
4610 CODE_FOR_reloadsi_la_out);
4614 /* A scratch address register is needed when a symbolic constant is
4615 copied to r0 compiling with -fPIC. In other cases the target
4616 register might be used as temporary (see legitimize_pic_address). */
4617 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4618 sri->icode = (TARGET_64BIT ?
4619 CODE_FOR_reloaddi_PIC_addr :
4620 CODE_FOR_reloadsi_PIC_addr);
4622 /* Either scratch or no register needed. */
4623 return NO_REGS;
4626 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
4628 We need secondary memory to move data between GPRs and FPRs.
4630 - With DFP the ldgr lgdr instructions are available. Due to the
4631 different alignment we cannot use them for SFmode. For 31 bit a
4632 64 bit value in GPR would be a register pair so here we still
4633 need to go via memory.
4635 - With z13 we can do the SF/SImode moves with vlgvf. Due to the
4636 overlapping of FPRs and VRs we still disallow TF/TD modes to be
4637 in full VRs so as before also on z13 we do these moves via
4638 memory.
4640 FIXME: Should we try splitting it into two vlgvg's/vlvg's instead? */
4642 static bool
4643 s390_secondary_memory_needed (machine_mode mode,
4644 reg_class_t class1, reg_class_t class2)
4646 return (((reg_classes_intersect_p (class1, VEC_REGS)
4647 && reg_classes_intersect_p (class2, GENERAL_REGS))
4648 || (reg_classes_intersect_p (class1, GENERAL_REGS)
4649 && reg_classes_intersect_p (class2, VEC_REGS)))
4650 && (TARGET_TPF || !TARGET_DFP || !TARGET_64BIT
4651 || GET_MODE_SIZE (mode) != 8)
4652 && (!TARGET_VX || (SCALAR_FLOAT_MODE_P (mode)
4653 && GET_MODE_SIZE (mode) > 8)));
4656 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
4658 get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
4659 because the movsi and movsf patterns don't handle r/f moves. */
4661 static machine_mode
4662 s390_secondary_memory_needed_mode (machine_mode mode)
4664 if (GET_MODE_BITSIZE (mode) < 32)
4665 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
4666 return mode;
4669 /* Generate code to load SRC, which is PLUS that is not a
4670 legitimate operand for the LA instruction, into TARGET.
4671 SCRATCH may be used as scratch register. */
4673 void
4674 s390_expand_plus_operand (rtx target, rtx src,
4675 rtx scratch)
4677 rtx sum1, sum2;
4678 struct s390_address ad;
4680 /* src must be a PLUS; get its two operands. */
4681 gcc_assert (GET_CODE (src) == PLUS);
4682 gcc_assert (GET_MODE (src) == Pmode);
4684 /* Check if any of the two operands is already scheduled
4685 for replacement by reload. This can happen e.g. when
4686 float registers occur in an address. */
4687 sum1 = find_replacement (&XEXP (src, 0));
4688 sum2 = find_replacement (&XEXP (src, 1));
4689 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4691 /* If the address is already strictly valid, there's nothing to do. */
4692 if (!s390_decompose_address (src, &ad)
4693 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4694 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4696 /* Otherwise, one of the operands cannot be an address register;
4697 we reload its value into the scratch register. */
4698 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4700 emit_move_insn (scratch, sum1);
4701 sum1 = scratch;
4703 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4705 emit_move_insn (scratch, sum2);
4706 sum2 = scratch;
4709 /* According to the way these invalid addresses are generated
4710 in reload.c, it should never happen (at least on s390) that
4711 *neither* of the PLUS components, after find_replacements
4712 was applied, is an address register. */
4713 if (sum1 == scratch && sum2 == scratch)
4715 debug_rtx (src);
4716 gcc_unreachable ();
4719 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4722 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4723 is only ever performed on addresses, so we can mark the
4724 sum as legitimate for LA in any case. */
4725 s390_load_address (target, src);
4729 /* Return true if ADDR is a valid memory address.
4730 STRICT specifies whether strict register checking applies. */
4732 static bool
4733 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4735 struct s390_address ad;
4737 if (TARGET_Z10
4738 && larl_operand (addr, VOIDmode)
4739 && (mode == VOIDmode
4740 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4741 return true;
4743 if (!s390_decompose_address (addr, &ad))
4744 return false;
4746 /* The vector memory instructions only support short displacements.
4747 Reject invalid displacements early to prevent plenty of lay
4748 instructions to be generated later which then cannot be merged
4749 properly. */
4750 if (TARGET_VX
4751 && VECTOR_MODE_P (mode)
4752 && ad.disp != NULL_RTX
4753 && CONST_INT_P (ad.disp)
4754 && !SHORT_DISP_IN_RANGE (INTVAL (ad.disp)))
4755 return false;
4757 if (strict)
4759 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4760 return false;
4762 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4763 return false;
4765 else
4767 if (ad.base
4768 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4769 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4770 return false;
4772 if (ad.indx
4773 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4774 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4775 return false;
4777 return true;
4780 /* Return true if OP is a valid operand for the LA instruction.
4781 In 31-bit, we need to prove that the result is used as an
4782 address, as LA performs only a 31-bit addition. */
4784 bool
4785 legitimate_la_operand_p (rtx op)
4787 struct s390_address addr;
4788 if (!s390_decompose_address (op, &addr))
4789 return false;
4791 return (TARGET_64BIT || addr.pointer);
4794 /* Return true if it is valid *and* preferable to use LA to
4795 compute the sum of OP1 and OP2. */
4797 bool
4798 preferred_la_operand_p (rtx op1, rtx op2)
4800 struct s390_address addr;
4802 if (op2 != const0_rtx)
4803 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4805 if (!s390_decompose_address (op1, &addr))
4806 return false;
4807 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4808 return false;
4809 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4810 return false;
4812 /* Avoid LA instructions with index (and base) register on z196 or
4813 later; it is preferable to use regular add instructions when
4814 possible. Starting with zEC12 the la with index register is
4815 "uncracked" again but still slower than a regular add. */
4816 if (addr.indx && s390_tune >= PROCESSOR_2817_Z196)
4817 return false;
4819 if (!TARGET_64BIT && !addr.pointer)
4820 return false;
4822 if (addr.pointer)
4823 return true;
4825 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4826 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4827 return true;
4829 return false;
4832 /* Emit a forced load-address operation to load SRC into DST.
4833 This will use the LOAD ADDRESS instruction even in situations
4834 where legitimate_la_operand_p (SRC) returns false. */
4836 void
4837 s390_load_address (rtx dst, rtx src)
4839 if (TARGET_64BIT)
4840 emit_move_insn (dst, src);
4841 else
4842 emit_insn (gen_force_la_31 (dst, src));
4845 /* Return true if it ok to use SYMBOL_REF in a relative address. */
4847 bool
4848 s390_rel_address_ok_p (rtx symbol_ref)
4850 tree decl;
4852 if (symbol_ref == s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref))
4853 return true;
4855 decl = SYMBOL_REF_DECL (symbol_ref);
4857 if (!flag_pic || SYMBOL_REF_LOCAL_P (symbol_ref))
4858 return (s390_pic_data_is_text_relative
4859 || (decl
4860 && TREE_CODE (decl) == FUNCTION_DECL));
4862 return false;
4865 /* Return a legitimate reference for ORIG (an address) using the
4866 register REG. If REG is 0, a new pseudo is generated.
4868 There are two types of references that must be handled:
4870 1. Global data references must load the address from the GOT, via
4871 the PIC reg. An insn is emitted to do this load, and the reg is
4872 returned.
4874 2. Static data references, constant pool addresses, and code labels
4875 compute the address as an offset from the GOT, whose base is in
4876 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4877 differentiate them from global data objects. The returned
4878 address is the PIC reg + an unspec constant.
4880 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4881 reg also appears in the address. */
4884 legitimize_pic_address (rtx orig, rtx reg)
4886 rtx addr = orig;
4887 rtx addend = const0_rtx;
4888 rtx new_rtx = orig;
4890 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4892 if (GET_CODE (addr) == CONST)
4893 addr = XEXP (addr, 0);
4895 if (GET_CODE (addr) == PLUS)
4897 addend = XEXP (addr, 1);
4898 addr = XEXP (addr, 0);
4901 if ((GET_CODE (addr) == LABEL_REF
4902 || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (addr))
4903 || (GET_CODE (addr) == UNSPEC &&
4904 (XINT (addr, 1) == UNSPEC_GOTENT
4905 || XINT (addr, 1) == UNSPEC_PLT)))
4906 && GET_CODE (addend) == CONST_INT)
4908 /* This can be locally addressed. */
4910 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4911 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4912 gen_rtx_CONST (Pmode, addr) : addr);
4914 if (larl_operand (const_addr, VOIDmode)
4915 && INTVAL (addend) < HOST_WIDE_INT_1 << 31
4916 && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31))
4918 if (INTVAL (addend) & 1)
4920 /* LARL can't handle odd offsets, so emit a pair of LARL
4921 and LA. */
4922 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4924 if (!DISP_IN_RANGE (INTVAL (addend)))
4926 HOST_WIDE_INT even = INTVAL (addend) - 1;
4927 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4928 addr = gen_rtx_CONST (Pmode, addr);
4929 addend = const1_rtx;
4932 emit_move_insn (temp, addr);
4933 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4935 if (reg != 0)
4937 s390_load_address (reg, new_rtx);
4938 new_rtx = reg;
4941 else
4943 /* If the offset is even, we can just use LARL. This
4944 will happen automatically. */
4947 else
4949 /* No larl - Access local symbols relative to the GOT. */
4951 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4953 if (reload_in_progress || reload_completed)
4954 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4956 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4957 if (addend != const0_rtx)
4958 addr = gen_rtx_PLUS (Pmode, addr, addend);
4959 addr = gen_rtx_CONST (Pmode, addr);
4960 addr = force_const_mem (Pmode, addr);
4961 emit_move_insn (temp, addr);
4963 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4964 if (reg != 0)
4966 s390_load_address (reg, new_rtx);
4967 new_rtx = reg;
4971 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4973 /* A non-local symbol reference without addend.
4975 The symbol ref is wrapped into an UNSPEC to make sure the
4976 proper operand modifier (@GOT or @GOTENT) will be emitted.
4977 This will tell the linker to put the symbol into the GOT.
4979 Additionally the code dereferencing the GOT slot is emitted here.
4981 An addend to the symref needs to be added afterwards.
4982 legitimize_pic_address calls itself recursively to handle
4983 that case. So no need to do it here. */
4985 if (reg == 0)
4986 reg = gen_reg_rtx (Pmode);
4988 if (TARGET_Z10)
4990 /* Use load relative if possible.
4991 lgrl <target>, sym@GOTENT */
4992 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4993 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4994 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4996 emit_move_insn (reg, new_rtx);
4997 new_rtx = reg;
4999 else if (flag_pic == 1)
5001 /* Assume GOT offset is a valid displacement operand (< 4k
5002 or < 512k with z990). This is handled the same way in
5003 both 31- and 64-bit code (@GOT).
5004 lg <target>, sym@GOT(r12) */
5006 if (reload_in_progress || reload_completed)
5007 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5009 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5010 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5011 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5012 new_rtx = gen_const_mem (Pmode, new_rtx);
5013 emit_move_insn (reg, new_rtx);
5014 new_rtx = reg;
5016 else
5018 /* If the GOT offset might be >= 4k, we determine the position
5019 of the GOT entry via a PC-relative LARL (@GOTENT).
5020 larl temp, sym@GOTENT
5021 lg <target>, 0(temp) */
5023 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
5025 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
5026 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
5028 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
5029 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5030 emit_move_insn (temp, new_rtx);
5031 new_rtx = gen_const_mem (Pmode, temp);
5032 emit_move_insn (reg, new_rtx);
5034 new_rtx = reg;
5037 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
5039 gcc_assert (XVECLEN (addr, 0) == 1);
5040 switch (XINT (addr, 1))
5042 /* These address symbols (or PLT slots) relative to the GOT
5043 (not GOT slots!). In general this will exceed the
5044 displacement range so these value belong into the literal
5045 pool. */
5046 case UNSPEC_GOTOFF:
5047 case UNSPEC_PLTOFF:
5048 new_rtx = force_const_mem (Pmode, orig);
5049 break;
5051 /* For -fPIC the GOT size might exceed the displacement
5052 range so make sure the value is in the literal pool. */
5053 case UNSPEC_GOT:
5054 if (flag_pic == 2)
5055 new_rtx = force_const_mem (Pmode, orig);
5056 break;
5058 /* For @GOTENT larl is used. This is handled like local
5059 symbol refs. */
5060 case UNSPEC_GOTENT:
5061 gcc_unreachable ();
5062 break;
5064 /* For @PLT larl is used. This is handled like local
5065 symbol refs. */
5066 case UNSPEC_PLT:
5067 gcc_unreachable ();
5068 break;
5070 /* Everything else cannot happen. */
5071 default:
5072 gcc_unreachable ();
5075 else if (addend != const0_rtx)
5077 /* Otherwise, compute the sum. */
5079 rtx base = legitimize_pic_address (addr, reg);
5080 new_rtx = legitimize_pic_address (addend,
5081 base == reg ? NULL_RTX : reg);
5082 if (GET_CODE (new_rtx) == CONST_INT)
5083 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
5084 else
5086 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
5088 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
5089 new_rtx = XEXP (new_rtx, 1);
5091 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
5094 if (GET_CODE (new_rtx) == CONST)
5095 new_rtx = XEXP (new_rtx, 0);
5096 new_rtx = force_operand (new_rtx, 0);
5099 return new_rtx;
5102 /* Load the thread pointer into a register. */
5105 s390_get_thread_pointer (void)
5107 rtx tp = gen_reg_rtx (Pmode);
5109 emit_insn (gen_get_thread_pointer (Pmode, tp));
5111 mark_reg_pointer (tp, BITS_PER_WORD);
5113 return tp;
5116 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
5117 in s390_tls_symbol which always refers to __tls_get_offset.
5118 The returned offset is written to RESULT_REG and an USE rtx is
5119 generated for TLS_CALL. */
5121 static GTY(()) rtx s390_tls_symbol;
5123 static void
5124 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
5126 rtx insn;
5128 if (!flag_pic)
5129 emit_insn (s390_load_got ());
5131 if (!s390_tls_symbol)
5132 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
5134 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
5135 gen_rtx_REG (Pmode, RETURN_REGNUM));
5137 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
5138 RTL_CONST_CALL_P (insn) = 1;
5141 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
5142 this (thread-local) address. REG may be used as temporary. */
5144 static rtx
5145 legitimize_tls_address (rtx addr, rtx reg)
5147 rtx new_rtx, tls_call, temp, base, r2;
5148 rtx_insn *insn;
5150 if (GET_CODE (addr) == SYMBOL_REF)
5151 switch (tls_symbolic_operand (addr))
5153 case TLS_MODEL_GLOBAL_DYNAMIC:
5154 start_sequence ();
5155 r2 = gen_rtx_REG (Pmode, 2);
5156 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
5157 new_rtx = gen_rtx_CONST (Pmode, tls_call);
5158 new_rtx = force_const_mem (Pmode, new_rtx);
5159 emit_move_insn (r2, new_rtx);
5160 s390_emit_tls_call_insn (r2, tls_call);
5161 insn = get_insns ();
5162 end_sequence ();
5164 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5165 temp = gen_reg_rtx (Pmode);
5166 emit_libcall_block (insn, temp, r2, new_rtx);
5168 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5169 if (reg != 0)
5171 s390_load_address (reg, new_rtx);
5172 new_rtx = reg;
5174 break;
5176 case TLS_MODEL_LOCAL_DYNAMIC:
5177 start_sequence ();
5178 r2 = gen_rtx_REG (Pmode, 2);
5179 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
5180 new_rtx = gen_rtx_CONST (Pmode, tls_call);
5181 new_rtx = force_const_mem (Pmode, new_rtx);
5182 emit_move_insn (r2, new_rtx);
5183 s390_emit_tls_call_insn (r2, tls_call);
5184 insn = get_insns ();
5185 end_sequence ();
5187 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
5188 temp = gen_reg_rtx (Pmode);
5189 emit_libcall_block (insn, temp, r2, new_rtx);
5191 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5192 base = gen_reg_rtx (Pmode);
5193 s390_load_address (base, new_rtx);
5195 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
5196 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5197 new_rtx = force_const_mem (Pmode, new_rtx);
5198 temp = gen_reg_rtx (Pmode);
5199 emit_move_insn (temp, new_rtx);
5201 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
5202 if (reg != 0)
5204 s390_load_address (reg, new_rtx);
5205 new_rtx = reg;
5207 break;
5209 case TLS_MODEL_INITIAL_EXEC:
5210 if (flag_pic == 1)
5212 /* Assume GOT offset < 4k. This is handled the same way
5213 in both 31- and 64-bit code. */
5215 if (reload_in_progress || reload_completed)
5216 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5218 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5219 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5220 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5221 new_rtx = gen_const_mem (Pmode, new_rtx);
5222 temp = gen_reg_rtx (Pmode);
5223 emit_move_insn (temp, new_rtx);
5225 else
5227 /* If the GOT offset might be >= 4k, we determine the position
5228 of the GOT entry via a PC-relative LARL. */
5230 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5231 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5232 temp = gen_reg_rtx (Pmode);
5233 emit_move_insn (temp, new_rtx);
5235 new_rtx = gen_const_mem (Pmode, temp);
5236 temp = gen_reg_rtx (Pmode);
5237 emit_move_insn (temp, new_rtx);
5240 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5241 if (reg != 0)
5243 s390_load_address (reg, new_rtx);
5244 new_rtx = reg;
5246 break;
5248 case TLS_MODEL_LOCAL_EXEC:
5249 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5250 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5251 new_rtx = force_const_mem (Pmode, new_rtx);
5252 temp = gen_reg_rtx (Pmode);
5253 emit_move_insn (temp, new_rtx);
5255 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5256 if (reg != 0)
5258 s390_load_address (reg, new_rtx);
5259 new_rtx = reg;
5261 break;
5263 default:
5264 gcc_unreachable ();
5267 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
5269 switch (XINT (XEXP (addr, 0), 1))
5271 case UNSPEC_INDNTPOFF:
5272 new_rtx = addr;
5273 break;
5275 default:
5276 gcc_unreachable ();
5280 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5281 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5283 new_rtx = XEXP (XEXP (addr, 0), 0);
5284 if (GET_CODE (new_rtx) != SYMBOL_REF)
5285 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5287 new_rtx = legitimize_tls_address (new_rtx, reg);
5288 new_rtx = plus_constant (Pmode, new_rtx,
5289 INTVAL (XEXP (XEXP (addr, 0), 1)));
5290 new_rtx = force_operand (new_rtx, 0);
5293 else
5294 gcc_unreachable (); /* for now ... */
5296 return new_rtx;
5299 /* Emit insns making the address in operands[1] valid for a standard
5300 move to operands[0]. operands[1] is replaced by an address which
5301 should be used instead of the former RTX to emit the move
5302 pattern. */
5304 void
5305 emit_symbolic_move (rtx *operands)
5307 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5309 if (GET_CODE (operands[0]) == MEM)
5310 operands[1] = force_reg (Pmode, operands[1]);
5311 else if (TLS_SYMBOLIC_CONST (operands[1]))
5312 operands[1] = legitimize_tls_address (operands[1], temp);
5313 else if (flag_pic)
5314 operands[1] = legitimize_pic_address (operands[1], temp);
5317 /* Try machine-dependent ways of modifying an illegitimate address X
5318 to be legitimate. If we find one, return the new, valid address.
5320 OLDX is the address as it was before break_out_memory_refs was called.
5321 In some cases it is useful to look at this to decide what needs to be done.
5323 MODE is the mode of the operand pointed to by X.
5325 When -fpic is used, special handling is needed for symbolic references.
5326 See comments by legitimize_pic_address for details. */
5328 static rtx
5329 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5330 machine_mode mode ATTRIBUTE_UNUSED)
5332 rtx constant_term = const0_rtx;
5334 if (TLS_SYMBOLIC_CONST (x))
5336 x = legitimize_tls_address (x, 0);
5338 if (s390_legitimate_address_p (mode, x, FALSE))
5339 return x;
5341 else if (GET_CODE (x) == PLUS
5342 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5343 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5345 return x;
5347 else if (flag_pic)
5349 if (SYMBOLIC_CONST (x)
5350 || (GET_CODE (x) == PLUS
5351 && (SYMBOLIC_CONST (XEXP (x, 0))
5352 || SYMBOLIC_CONST (XEXP (x, 1)))))
5353 x = legitimize_pic_address (x, 0);
5355 if (s390_legitimate_address_p (mode, x, FALSE))
5356 return x;
5359 x = eliminate_constant_term (x, &constant_term);
5361 /* Optimize loading of large displacements by splitting them
5362 into the multiple of 4K and the rest; this allows the
5363 former to be CSE'd if possible.
5365 Don't do this if the displacement is added to a register
5366 pointing into the stack frame, as the offsets will
5367 change later anyway. */
5369 if (GET_CODE (constant_term) == CONST_INT
5370 && !TARGET_LONG_DISPLACEMENT
5371 && !DISP_IN_RANGE (INTVAL (constant_term))
5372 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5374 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5375 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5377 rtx temp = gen_reg_rtx (Pmode);
5378 rtx val = force_operand (GEN_INT (upper), temp);
5379 if (val != temp)
5380 emit_move_insn (temp, val);
5382 x = gen_rtx_PLUS (Pmode, x, temp);
5383 constant_term = GEN_INT (lower);
5386 if (GET_CODE (x) == PLUS)
5388 if (GET_CODE (XEXP (x, 0)) == REG)
5390 rtx temp = gen_reg_rtx (Pmode);
5391 rtx val = force_operand (XEXP (x, 1), temp);
5392 if (val != temp)
5393 emit_move_insn (temp, val);
5395 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5398 else if (GET_CODE (XEXP (x, 1)) == REG)
5400 rtx temp = gen_reg_rtx (Pmode);
5401 rtx val = force_operand (XEXP (x, 0), temp);
5402 if (val != temp)
5403 emit_move_insn (temp, val);
5405 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5409 if (constant_term != const0_rtx)
5410 x = gen_rtx_PLUS (Pmode, x, constant_term);
5412 return x;
5415 /* Try a machine-dependent way of reloading an illegitimate address AD
5416 operand. If we find one, push the reload and return the new address.
5418 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5419 and TYPE is the reload type of the current reload. */
5422 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5423 int opnum, int type)
5425 if (!optimize || TARGET_LONG_DISPLACEMENT)
5426 return NULL_RTX;
5428 if (GET_CODE (ad) == PLUS)
5430 rtx tem = simplify_binary_operation (PLUS, Pmode,
5431 XEXP (ad, 0), XEXP (ad, 1));
5432 if (tem)
5433 ad = tem;
5436 if (GET_CODE (ad) == PLUS
5437 && GET_CODE (XEXP (ad, 0)) == REG
5438 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5439 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5441 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5442 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5443 rtx cst, tem, new_rtx;
5445 cst = GEN_INT (upper);
5446 if (!legitimate_reload_constant_p (cst))
5447 cst = force_const_mem (Pmode, cst);
5449 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5450 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5452 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5453 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5454 opnum, (enum reload_type) type);
5455 return new_rtx;
5458 return NULL_RTX;
5461 /* Emit code to move LEN bytes from DST to SRC. */
5463 bool
5464 s390_expand_cpymem (rtx dst, rtx src, rtx len)
5466 /* When tuning for z10 or higher we rely on the Glibc functions to
5467 do the right thing. Only for constant lengths below 64k we will
5468 generate inline code. */
5469 if (s390_tune >= PROCESSOR_2097_Z10
5470 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5471 return false;
5473 /* Expand memcpy for constant length operands without a loop if it
5474 is shorter that way.
5476 With a constant length argument a
5477 memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
5478 if (GET_CODE (len) == CONST_INT
5479 && INTVAL (len) >= 0
5480 && INTVAL (len) <= 256 * 6
5481 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5483 HOST_WIDE_INT o, l;
5485 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5487 rtx newdst = adjust_address (dst, BLKmode, o);
5488 rtx newsrc = adjust_address (src, BLKmode, o);
5489 emit_insn (gen_cpymem_short (newdst, newsrc,
5490 GEN_INT (l > 256 ? 255 : l - 1)));
5494 else if (TARGET_MVCLE)
5496 emit_insn (gen_cpymem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5499 else
5501 rtx dst_addr, src_addr, count, blocks, temp;
5502 rtx_code_label *loop_start_label = gen_label_rtx ();
5503 rtx_code_label *loop_end_label = gen_label_rtx ();
5504 rtx_code_label *end_label = gen_label_rtx ();
5505 machine_mode mode;
5507 mode = GET_MODE (len);
5508 if (mode == VOIDmode)
5509 mode = Pmode;
5511 dst_addr = gen_reg_rtx (Pmode);
5512 src_addr = gen_reg_rtx (Pmode);
5513 count = gen_reg_rtx (mode);
5514 blocks = gen_reg_rtx (mode);
5516 convert_move (count, len, 1);
5517 emit_cmp_and_jump_insns (count, const0_rtx,
5518 EQ, NULL_RTX, mode, 1, end_label);
5520 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5521 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5522 dst = change_address (dst, VOIDmode, dst_addr);
5523 src = change_address (src, VOIDmode, src_addr);
5525 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5526 OPTAB_DIRECT);
5527 if (temp != count)
5528 emit_move_insn (count, temp);
5530 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5531 OPTAB_DIRECT);
5532 if (temp != blocks)
5533 emit_move_insn (blocks, temp);
5535 emit_cmp_and_jump_insns (blocks, const0_rtx,
5536 EQ, NULL_RTX, mode, 1, loop_end_label);
5538 emit_label (loop_start_label);
5540 if (TARGET_Z10
5541 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5543 rtx prefetch;
5545 /* Issue a read prefetch for the +3 cache line. */
5546 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5547 const0_rtx, const0_rtx);
5548 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5549 emit_insn (prefetch);
5551 /* Issue a write prefetch for the +3 cache line. */
5552 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5553 const1_rtx, const0_rtx);
5554 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5555 emit_insn (prefetch);
5558 emit_insn (gen_cpymem_short (dst, src, GEN_INT (255)));
5559 s390_load_address (dst_addr,
5560 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5561 s390_load_address (src_addr,
5562 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5564 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5565 OPTAB_DIRECT);
5566 if (temp != blocks)
5567 emit_move_insn (blocks, temp);
5569 emit_cmp_and_jump_insns (blocks, const0_rtx,
5570 EQ, NULL_RTX, mode, 1, loop_end_label);
5572 emit_jump (loop_start_label);
5573 emit_label (loop_end_label);
5575 emit_insn (gen_cpymem_short (dst, src,
5576 convert_to_mode (Pmode, count, 1)));
5577 emit_label (end_label);
5579 return true;
5582 /* Emit code to set LEN bytes at DST to VAL.
5583 Make use of clrmem if VAL is zero. */
5585 void
5586 s390_expand_setmem (rtx dst, rtx len, rtx val)
5588 if (GET_CODE (len) == CONST_INT && INTVAL (len) <= 0)
5589 return;
5591 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5593 /* Expand setmem/clrmem for a constant length operand without a
5594 loop if it will be shorter that way.
5595 clrmem loop (with PFD) is 30 bytes -> 5 * xc
5596 clrmem loop (without PFD) is 24 bytes -> 4 * xc
5597 setmem loop (with PFD) is 38 bytes -> ~4 * (mvi/stc + mvc)
5598 setmem loop (without PFD) is 32 bytes -> ~4 * (mvi/stc + mvc) */
5599 if (GET_CODE (len) == CONST_INT
5600 && ((val == const0_rtx
5601 && (INTVAL (len) <= 256 * 4
5602 || (INTVAL (len) <= 256 * 5 && TARGET_SETMEM_PFD(val,len))))
5603 || (val != const0_rtx && INTVAL (len) <= 257 * 4))
5604 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5606 HOST_WIDE_INT o, l;
5608 if (val == const0_rtx)
5609 /* clrmem: emit 256 byte blockwise XCs. */
5610 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5612 rtx newdst = adjust_address (dst, BLKmode, o);
5613 emit_insn (gen_clrmem_short (newdst,
5614 GEN_INT (l > 256 ? 255 : l - 1)));
5616 else
5617 /* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5618 setting first byte to val and using a 256 byte mvc with one
5619 byte overlap to propagate the byte. */
5620 for (l = INTVAL (len), o = 0; l > 0; l -= 257, o += 257)
5622 rtx newdst = adjust_address (dst, BLKmode, o);
5623 emit_move_insn (adjust_address (dst, QImode, o), val);
5624 if (l > 1)
5626 rtx newdstp1 = adjust_address (dst, BLKmode, o + 1);
5627 emit_insn (gen_cpymem_short (newdstp1, newdst,
5628 GEN_INT (l > 257 ? 255 : l - 2)));
5633 else if (TARGET_MVCLE)
5635 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5636 if (TARGET_64BIT)
5637 emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5638 val));
5639 else
5640 emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5641 val));
5644 else
5646 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5647 rtx_code_label *loop_start_label = gen_label_rtx ();
5648 rtx_code_label *onebyte_end_label = gen_label_rtx ();
5649 rtx_code_label *zerobyte_end_label = gen_label_rtx ();
5650 rtx_code_label *restbyte_end_label = gen_label_rtx ();
5651 machine_mode mode;
5653 mode = GET_MODE (len);
5654 if (mode == VOIDmode)
5655 mode = Pmode;
5657 dst_addr = gen_reg_rtx (Pmode);
5658 count = gen_reg_rtx (mode);
5659 blocks = gen_reg_rtx (mode);
5661 convert_move (count, len, 1);
5662 emit_cmp_and_jump_insns (count, const0_rtx,
5663 EQ, NULL_RTX, mode, 1, zerobyte_end_label,
5664 profile_probability::very_unlikely ());
5666 /* We need to make a copy of the target address since memset is
5667 supposed to return it unmodified. We have to make it here
5668 already since the new reg is used at onebyte_end_label. */
5669 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5670 dst = change_address (dst, VOIDmode, dst_addr);
5672 if (val != const0_rtx)
5674 /* When using the overlapping mvc the original target
5675 address is only accessed as single byte entity (even by
5676 the mvc reading this value). */
5677 set_mem_size (dst, 1);
5678 dstp1 = adjust_address (dst, VOIDmode, 1);
5679 emit_cmp_and_jump_insns (count,
5680 const1_rtx, EQ, NULL_RTX, mode, 1,
5681 onebyte_end_label,
5682 profile_probability::very_unlikely ());
5685 /* There is one unconditional (mvi+mvc)/xc after the loop
5686 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5687 or one (xc) here leaves this number of bytes to be handled by
5688 it. */
5689 temp = expand_binop (mode, add_optab, count,
5690 val == const0_rtx ? constm1_rtx : GEN_INT (-2),
5691 count, 1, OPTAB_DIRECT);
5692 if (temp != count)
5693 emit_move_insn (count, temp);
5695 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5696 OPTAB_DIRECT);
5697 if (temp != blocks)
5698 emit_move_insn (blocks, temp);
5700 emit_cmp_and_jump_insns (blocks, const0_rtx,
5701 EQ, NULL_RTX, mode, 1, restbyte_end_label);
5703 emit_jump (loop_start_label);
5705 if (val != const0_rtx)
5707 /* The 1 byte != 0 special case. Not handled efficiently
5708 since we require two jumps for that. However, this
5709 should be very rare. */
5710 emit_label (onebyte_end_label);
5711 emit_move_insn (adjust_address (dst, QImode, 0), val);
5712 emit_jump (zerobyte_end_label);
5715 emit_label (loop_start_label);
5717 if (TARGET_SETMEM_PFD (val, len))
5719 /* Issue a write prefetch. */
5720 rtx distance = GEN_INT (TARGET_SETMEM_PREFETCH_DISTANCE);
5721 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, distance),
5722 const1_rtx, const0_rtx);
5723 emit_insn (prefetch);
5724 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5727 if (val == const0_rtx)
5728 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5729 else
5731 /* Set the first byte in the block to the value and use an
5732 overlapping mvc for the block. */
5733 emit_move_insn (adjust_address (dst, QImode, 0), val);
5734 emit_insn (gen_cpymem_short (dstp1, dst, GEN_INT (254)));
5736 s390_load_address (dst_addr,
5737 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5739 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5740 OPTAB_DIRECT);
5741 if (temp != blocks)
5742 emit_move_insn (blocks, temp);
5744 emit_cmp_and_jump_insns (blocks, const0_rtx,
5745 NE, NULL_RTX, mode, 1, loop_start_label);
5747 emit_label (restbyte_end_label);
5749 if (val == const0_rtx)
5750 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5751 else
5753 /* Set the first byte in the block to the value and use an
5754 overlapping mvc for the block. */
5755 emit_move_insn (adjust_address (dst, QImode, 0), val);
5756 /* execute only uses the lowest 8 bits of count that's
5757 exactly what we need here. */
5758 emit_insn (gen_cpymem_short (dstp1, dst,
5759 convert_to_mode (Pmode, count, 1)));
5762 emit_label (zerobyte_end_label);
5766 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5767 and return the result in TARGET. */
5769 bool
5770 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5772 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5773 rtx tmp;
5775 /* When tuning for z10 or higher we rely on the Glibc functions to
5776 do the right thing. Only for constant lengths below 64k we will
5777 generate inline code. */
5778 if (s390_tune >= PROCESSOR_2097_Z10
5779 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5780 return false;
5782 /* As the result of CMPINT is inverted compared to what we need,
5783 we have to swap the operands. */
5784 tmp = op0; op0 = op1; op1 = tmp;
5786 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5788 if (INTVAL (len) > 0)
5790 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5791 emit_insn (gen_cmpint (target, ccreg));
5793 else
5794 emit_move_insn (target, const0_rtx);
5796 else if (TARGET_MVCLE)
5798 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5799 emit_insn (gen_cmpint (target, ccreg));
5801 else
5803 rtx addr0, addr1, count, blocks, temp;
5804 rtx_code_label *loop_start_label = gen_label_rtx ();
5805 rtx_code_label *loop_end_label = gen_label_rtx ();
5806 rtx_code_label *end_label = gen_label_rtx ();
5807 machine_mode mode;
5809 mode = GET_MODE (len);
5810 if (mode == VOIDmode)
5811 mode = Pmode;
5813 addr0 = gen_reg_rtx (Pmode);
5814 addr1 = gen_reg_rtx (Pmode);
5815 count = gen_reg_rtx (mode);
5816 blocks = gen_reg_rtx (mode);
5818 convert_move (count, len, 1);
5819 emit_cmp_and_jump_insns (count, const0_rtx,
5820 EQ, NULL_RTX, mode, 1, end_label);
5822 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5823 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5824 op0 = change_address (op0, VOIDmode, addr0);
5825 op1 = change_address (op1, VOIDmode, addr1);
5827 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5828 OPTAB_DIRECT);
5829 if (temp != count)
5830 emit_move_insn (count, temp);
5832 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5833 OPTAB_DIRECT);
5834 if (temp != blocks)
5835 emit_move_insn (blocks, temp);
5837 emit_cmp_and_jump_insns (blocks, const0_rtx,
5838 EQ, NULL_RTX, mode, 1, loop_end_label);
5840 emit_label (loop_start_label);
5842 if (TARGET_Z10
5843 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5845 rtx prefetch;
5847 /* Issue a read prefetch for the +2 cache line of operand 1. */
5848 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5849 const0_rtx, const0_rtx);
5850 emit_insn (prefetch);
5851 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5853 /* Issue a read prefetch for the +2 cache line of operand 2. */
5854 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5855 const0_rtx, const0_rtx);
5856 emit_insn (prefetch);
5857 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5860 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5861 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5862 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5863 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5864 temp = gen_rtx_SET (pc_rtx, temp);
5865 emit_jump_insn (temp);
5867 s390_load_address (addr0,
5868 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5869 s390_load_address (addr1,
5870 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5872 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5873 OPTAB_DIRECT);
5874 if (temp != blocks)
5875 emit_move_insn (blocks, temp);
5877 emit_cmp_and_jump_insns (blocks, const0_rtx,
5878 EQ, NULL_RTX, mode, 1, loop_end_label);
5880 emit_jump (loop_start_label);
5881 emit_label (loop_end_label);
5883 emit_insn (gen_cmpmem_short (op0, op1,
5884 convert_to_mode (Pmode, count, 1)));
5885 emit_label (end_label);
5887 emit_insn (gen_cmpint (target, ccreg));
5889 return true;
5892 /* Emit a conditional jump to LABEL for condition code mask MASK using
5893 comparsion operator COMPARISON. Return the emitted jump insn. */
5895 static rtx_insn *
5896 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5898 rtx temp;
5900 gcc_assert (comparison == EQ || comparison == NE);
5901 gcc_assert (mask > 0 && mask < 15);
5903 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5904 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5905 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5906 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5907 temp = gen_rtx_SET (pc_rtx, temp);
5908 return emit_jump_insn (temp);
5911 /* Emit the instructions to implement strlen of STRING and store the
5912 result in TARGET. The string has the known ALIGNMENT. This
5913 version uses vector instructions and is therefore not appropriate
5914 for targets prior to z13. */
5916 void
5917 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5919 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5920 rtx str_reg = gen_reg_rtx (V16QImode);
5921 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5922 rtx str_idx_reg = gen_reg_rtx (Pmode);
5923 rtx result_reg = gen_reg_rtx (V16QImode);
5924 rtx is_aligned_label = gen_label_rtx ();
5925 rtx into_loop_label = NULL_RTX;
5926 rtx loop_start_label = gen_label_rtx ();
5927 rtx temp;
5928 rtx len = gen_reg_rtx (QImode);
5929 rtx cond;
5931 s390_load_address (str_addr_base_reg, XEXP (string, 0));
5932 emit_move_insn (str_idx_reg, const0_rtx);
5934 if (INTVAL (alignment) < 16)
5936 /* Check whether the address happens to be aligned properly so
5937 jump directly to the aligned loop. */
5938 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5939 str_addr_base_reg, GEN_INT (15)),
5940 const0_rtx, EQ, NULL_RTX,
5941 Pmode, 1, is_aligned_label);
5943 temp = gen_reg_rtx (Pmode);
5944 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5945 GEN_INT (15), temp, 1, OPTAB_DIRECT);
5946 gcc_assert (REG_P (temp));
5947 highest_index_to_load_reg =
5948 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5949 highest_index_to_load_reg, 1, OPTAB_DIRECT);
5950 gcc_assert (REG_P (highest_index_to_load_reg));
5951 emit_insn (gen_vllv16qi (str_reg,
5952 convert_to_mode (SImode, highest_index_to_load_reg, 1),
5953 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5955 into_loop_label = gen_label_rtx ();
5956 s390_emit_jump (into_loop_label, NULL_RTX);
5957 emit_barrier ();
5960 emit_label (is_aligned_label);
5961 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5963 /* Reaching this point we are only performing 16 bytes aligned
5964 loads. */
5965 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5967 emit_label (loop_start_label);
5968 LABEL_NUSES (loop_start_label) = 1;
5970 /* Load 16 bytes of the string into VR. */
5971 emit_move_insn (str_reg,
5972 gen_rtx_MEM (V16QImode,
5973 gen_rtx_PLUS (Pmode, str_idx_reg,
5974 str_addr_base_reg)));
5975 if (into_loop_label != NULL_RTX)
5977 emit_label (into_loop_label);
5978 LABEL_NUSES (into_loop_label) = 1;
5981 /* Increment string index by 16 bytes. */
5982 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5983 str_idx_reg, 1, OPTAB_DIRECT);
5985 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5986 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5988 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5989 REG_BR_PROB,
5990 profile_probability::very_likely ().to_reg_br_prob_note ());
5991 emit_insn (gen_vec_extractv16qiqi (len, result_reg, GEN_INT (7)));
5993 /* If the string pointer wasn't aligned we have loaded less then 16
5994 bytes and the remaining bytes got filled with zeros (by vll).
5995 Now we have to check whether the resulting index lies within the
5996 bytes actually part of the string. */
5998 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5999 highest_index_to_load_reg);
6000 s390_load_address (highest_index_to_load_reg,
6001 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
6002 const1_rtx));
6003 if (TARGET_64BIT)
6004 emit_insn (gen_movdicc (str_idx_reg, cond,
6005 highest_index_to_load_reg, str_idx_reg));
6006 else
6007 emit_insn (gen_movsicc (str_idx_reg, cond,
6008 highest_index_to_load_reg, str_idx_reg));
6010 add_reg_br_prob_note (s390_emit_jump (is_aligned_label, cond),
6011 profile_probability::very_unlikely ());
6013 expand_binop (Pmode, add_optab, str_idx_reg,
6014 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
6015 /* FIXME: len is already zero extended - so avoid the llgcr emitted
6016 here. */
6017 temp = expand_binop (Pmode, add_optab, str_idx_reg,
6018 convert_to_mode (Pmode, len, 1),
6019 target, 1, OPTAB_DIRECT);
6020 if (temp != target)
6021 emit_move_insn (target, temp);
6024 void
6025 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
6027 rtx temp = gen_reg_rtx (Pmode);
6028 rtx src_addr = XEXP (src, 0);
6029 rtx dst_addr = XEXP (dst, 0);
6030 rtx src_addr_reg = gen_reg_rtx (Pmode);
6031 rtx dst_addr_reg = gen_reg_rtx (Pmode);
6032 rtx offset = gen_reg_rtx (Pmode);
6033 rtx vsrc = gen_reg_rtx (V16QImode);
6034 rtx vpos = gen_reg_rtx (V16QImode);
6035 rtx loadlen = gen_reg_rtx (SImode);
6036 rtx gpos_qi = gen_reg_rtx(QImode);
6037 rtx gpos = gen_reg_rtx (SImode);
6038 rtx done_label = gen_label_rtx ();
6039 rtx loop_label = gen_label_rtx ();
6040 rtx exit_label = gen_label_rtx ();
6041 rtx full_label = gen_label_rtx ();
6043 /* Perform a quick check for string ending on the first up to 16
6044 bytes and exit early if successful. */
6046 emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
6047 emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
6048 emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
6049 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6050 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6051 /* gpos is the byte index if a zero was found and 16 otherwise.
6052 So if it is lower than the loaded bytes we have a hit. */
6053 emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
6054 full_label);
6055 emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
6057 force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
6058 1, OPTAB_DIRECT);
6059 emit_jump (exit_label);
6060 emit_barrier ();
6062 emit_label (full_label);
6063 LABEL_NUSES (full_label) = 1;
6065 /* Calculate `offset' so that src + offset points to the last byte
6066 before 16 byte alignment. */
6068 /* temp = src_addr & 0xf */
6069 force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
6070 1, OPTAB_DIRECT);
6072 /* offset = 0xf - temp */
6073 emit_move_insn (offset, GEN_INT (15));
6074 force_expand_binop (Pmode, sub_optab, offset, temp, offset,
6075 1, OPTAB_DIRECT);
6077 /* Store `offset' bytes in the dstination string. The quick check
6078 has loaded at least `offset' bytes into vsrc. */
6080 emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
6082 /* Advance to the next byte to be loaded. */
6083 force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
6084 1, OPTAB_DIRECT);
6086 /* Make sure the addresses are single regs which can be used as a
6087 base. */
6088 emit_move_insn (src_addr_reg, src_addr);
6089 emit_move_insn (dst_addr_reg, dst_addr);
6091 /* MAIN LOOP */
6093 emit_label (loop_label);
6094 LABEL_NUSES (loop_label) = 1;
6096 emit_move_insn (vsrc,
6097 gen_rtx_MEM (V16QImode,
6098 gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
6100 emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
6101 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
6102 add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
6103 REG_BR_PROB, profile_probability::very_unlikely ()
6104 .to_reg_br_prob_note ());
6106 emit_move_insn (gen_rtx_MEM (V16QImode,
6107 gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
6108 vsrc);
6109 /* offset += 16 */
6110 force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
6111 offset, 1, OPTAB_DIRECT);
6113 emit_jump (loop_label);
6114 emit_barrier ();
6116 /* REGULAR EXIT */
6118 /* We are done. Add the offset of the zero character to the dst_addr
6119 pointer to get the result. */
6121 emit_label (done_label);
6122 LABEL_NUSES (done_label) = 1;
6124 force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
6125 1, OPTAB_DIRECT);
6127 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6128 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6130 emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
6132 force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
6133 1, OPTAB_DIRECT);
6135 /* EARLY EXIT */
6137 emit_label (exit_label);
6138 LABEL_NUSES (exit_label) = 1;
6142 /* Expand conditional increment or decrement using alc/slb instructions.
6143 Should generate code setting DST to either SRC or SRC + INCREMENT,
6144 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
6145 Returns true if successful, false otherwise.
6147 That makes it possible to implement some if-constructs without jumps e.g.:
6148 (borrow = CC0 | CC1 and carry = CC2 | CC3)
6149 unsigned int a, b, c;
6150 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
6151 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
6152 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
6153 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
6155 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
6156 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
6157 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
6158 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
6159 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
6161 bool
6162 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
6163 rtx dst, rtx src, rtx increment)
6165 machine_mode cmp_mode;
6166 machine_mode cc_mode;
6167 rtx op_res;
6168 rtx insn;
6169 rtvec p;
6170 int ret;
6172 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
6173 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
6174 cmp_mode = SImode;
6175 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
6176 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
6177 cmp_mode = DImode;
6178 else
6179 return false;
6181 /* Try ADD LOGICAL WITH CARRY. */
6182 if (increment == const1_rtx)
6184 /* Determine CC mode to use. */
6185 if (cmp_code == EQ || cmp_code == NE)
6187 if (cmp_op1 != const0_rtx)
6189 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6190 NULL_RTX, 0, OPTAB_WIDEN);
6191 cmp_op1 = const0_rtx;
6194 cmp_code = cmp_code == EQ ? LEU : GTU;
6197 if (cmp_code == LTU || cmp_code == LEU)
6199 rtx tem = cmp_op0;
6200 cmp_op0 = cmp_op1;
6201 cmp_op1 = tem;
6202 cmp_code = swap_condition (cmp_code);
6205 switch (cmp_code)
6207 case GTU:
6208 cc_mode = CCUmode;
6209 break;
6211 case GEU:
6212 cc_mode = CCL3mode;
6213 break;
6215 default:
6216 return false;
6219 /* Emit comparison instruction pattern. */
6220 if (!register_operand (cmp_op0, cmp_mode))
6221 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6223 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6224 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6225 /* We use insn_invalid_p here to add clobbers if required. */
6226 ret = insn_invalid_p (emit_insn (insn), false);
6227 gcc_assert (!ret);
6229 /* Emit ALC instruction pattern. */
6230 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6231 gen_rtx_REG (cc_mode, CC_REGNUM),
6232 const0_rtx);
6234 if (src != const0_rtx)
6236 if (!register_operand (src, GET_MODE (dst)))
6237 src = force_reg (GET_MODE (dst), src);
6239 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
6240 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
6243 p = rtvec_alloc (2);
6244 RTVEC_ELT (p, 0) =
6245 gen_rtx_SET (dst, op_res);
6246 RTVEC_ELT (p, 1) =
6247 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6248 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6250 return true;
6253 /* Try SUBTRACT LOGICAL WITH BORROW. */
6254 if (increment == constm1_rtx)
6256 /* Determine CC mode to use. */
6257 if (cmp_code == EQ || cmp_code == NE)
6259 if (cmp_op1 != const0_rtx)
6261 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6262 NULL_RTX, 0, OPTAB_WIDEN);
6263 cmp_op1 = const0_rtx;
6266 cmp_code = cmp_code == EQ ? LEU : GTU;
6269 if (cmp_code == GTU || cmp_code == GEU)
6271 rtx tem = cmp_op0;
6272 cmp_op0 = cmp_op1;
6273 cmp_op1 = tem;
6274 cmp_code = swap_condition (cmp_code);
6277 switch (cmp_code)
6279 case LEU:
6280 cc_mode = CCUmode;
6281 break;
6283 case LTU:
6284 cc_mode = CCL3mode;
6285 break;
6287 default:
6288 return false;
6291 /* Emit comparison instruction pattern. */
6292 if (!register_operand (cmp_op0, cmp_mode))
6293 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6295 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6296 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6297 /* We use insn_invalid_p here to add clobbers if required. */
6298 ret = insn_invalid_p (emit_insn (insn), false);
6299 gcc_assert (!ret);
6301 /* Emit SLB instruction pattern. */
6302 if (!register_operand (src, GET_MODE (dst)))
6303 src = force_reg (GET_MODE (dst), src);
6305 op_res = gen_rtx_MINUS (GET_MODE (dst),
6306 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
6307 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6308 gen_rtx_REG (cc_mode, CC_REGNUM),
6309 const0_rtx));
6310 p = rtvec_alloc (2);
6311 RTVEC_ELT (p, 0) =
6312 gen_rtx_SET (dst, op_res);
6313 RTVEC_ELT (p, 1) =
6314 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6315 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6317 return true;
6320 return false;
6323 /* Expand code for the insv template. Return true if successful. */
6325 bool
6326 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6328 int bitsize = INTVAL (op1);
6329 int bitpos = INTVAL (op2);
6330 machine_mode mode = GET_MODE (dest);
6331 machine_mode smode;
6332 int smode_bsize, mode_bsize;
6333 rtx op, clobber;
6335 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6336 return false;
6338 /* Generate INSERT IMMEDIATE (IILL et al). */
6339 /* (set (ze (reg)) (const_int)). */
6340 if (TARGET_ZARCH
6341 && register_operand (dest, word_mode)
6342 && (bitpos % 16) == 0
6343 && (bitsize % 16) == 0
6344 && const_int_operand (src, VOIDmode))
6346 HOST_WIDE_INT val = INTVAL (src);
6347 int regpos = bitpos + bitsize;
6349 while (regpos > bitpos)
6351 machine_mode putmode;
6352 int putsize;
6354 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6355 putmode = SImode;
6356 else
6357 putmode = HImode;
6359 putsize = GET_MODE_BITSIZE (putmode);
6360 regpos -= putsize;
6361 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6362 GEN_INT (putsize),
6363 GEN_INT (regpos)),
6364 gen_int_mode (val, putmode));
6365 val >>= putsize;
6367 gcc_assert (regpos == bitpos);
6368 return true;
6371 smode = smallest_int_mode_for_size (bitsize);
6372 smode_bsize = GET_MODE_BITSIZE (smode);
6373 mode_bsize = GET_MODE_BITSIZE (mode);
6375 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6376 if (bitpos == 0
6377 && (bitsize % BITS_PER_UNIT) == 0
6378 && MEM_P (dest)
6379 && (register_operand (src, word_mode)
6380 || const_int_operand (src, VOIDmode)))
6382 /* Emit standard pattern if possible. */
6383 if (smode_bsize == bitsize)
6385 emit_move_insn (adjust_address (dest, smode, 0),
6386 gen_lowpart (smode, src));
6387 return true;
6390 /* (set (ze (mem)) (const_int)). */
6391 else if (const_int_operand (src, VOIDmode))
6393 int size = bitsize / BITS_PER_UNIT;
6394 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6395 BLKmode,
6396 UNITS_PER_WORD - size);
6398 dest = adjust_address (dest, BLKmode, 0);
6399 set_mem_size (dest, size);
6400 s390_expand_cpymem (dest, src_mem, GEN_INT (size));
6401 return true;
6404 /* (set (ze (mem)) (reg)). */
6405 else if (register_operand (src, word_mode))
6407 if (bitsize <= 32)
6408 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6409 const0_rtx), src);
6410 else
6412 /* Emit st,stcmh sequence. */
6413 int stcmh_width = bitsize - 32;
6414 int size = stcmh_width / BITS_PER_UNIT;
6416 emit_move_insn (adjust_address (dest, SImode, size),
6417 gen_lowpart (SImode, src));
6418 set_mem_size (dest, size);
6419 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6420 GEN_INT (stcmh_width),
6421 const0_rtx),
6422 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6424 return true;
6428 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6429 if ((bitpos % BITS_PER_UNIT) == 0
6430 && (bitsize % BITS_PER_UNIT) == 0
6431 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6432 && MEM_P (src)
6433 && (mode == DImode || mode == SImode)
6434 && register_operand (dest, mode))
6436 /* Emit a strict_low_part pattern if possible. */
6437 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6439 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
6440 op = gen_rtx_SET (op, gen_lowpart (smode, src));
6441 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6442 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
6443 return true;
6446 /* ??? There are more powerful versions of ICM that are not
6447 completely represented in the md file. */
6450 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6451 if (TARGET_Z10 && (mode == DImode || mode == SImode))
6453 machine_mode mode_s = GET_MODE (src);
6455 if (CONSTANT_P (src))
6457 /* For constant zero values the representation with AND
6458 appears to be folded in more situations than the (set
6459 (zero_extract) ...).
6460 We only do this when the start and end of the bitfield
6461 remain in the same SImode chunk. That way nihf or nilf
6462 can be used.
6463 The AND patterns might still generate a risbg for this. */
6464 if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
6465 return false;
6466 else
6467 src = force_reg (mode, src);
6469 else if (mode_s != mode)
6471 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6472 src = force_reg (mode_s, src);
6473 src = gen_lowpart (mode, src);
6476 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6477 op = gen_rtx_SET (op, src);
6479 if (!TARGET_ZEC12)
6481 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6482 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6484 emit_insn (op);
6486 return true;
6489 return false;
6492 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6493 register that holds VAL of mode MODE shifted by COUNT bits. */
6495 static inline rtx
6496 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6498 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6499 NULL_RTX, 1, OPTAB_DIRECT);
6500 return expand_simple_binop (SImode, ASHIFT, val, count,
6501 NULL_RTX, 1, OPTAB_DIRECT);
6504 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6505 the result in TARGET. */
6507 void
6508 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6509 rtx cmp_op1, rtx cmp_op2)
6511 machine_mode mode = GET_MODE (target);
6512 bool neg_p = false, swap_p = false;
6513 rtx tmp;
6515 if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
6517 switch (cond)
6519 /* NE a != b -> !(a == b) */
6520 case NE: cond = EQ; neg_p = true; break;
6521 case UNGT:
6522 emit_insn (gen_vec_cmpungt (target, cmp_op1, cmp_op2));
6523 return;
6524 case UNGE:
6525 emit_insn (gen_vec_cmpunge (target, cmp_op1, cmp_op2));
6526 return;
6527 case LE: cond = GE; swap_p = true; break;
6528 /* UNLE: (a u<= b) -> (b u>= a). */
6529 case UNLE:
6530 emit_insn (gen_vec_cmpunge (target, cmp_op2, cmp_op1));
6531 return;
6532 /* LT: a < b -> b > a */
6533 case LT: cond = GT; swap_p = true; break;
6534 /* UNLT: (a u< b) -> (b u> a). */
6535 case UNLT:
6536 emit_insn (gen_vec_cmpungt (target, cmp_op2, cmp_op1));
6537 return;
6538 case UNEQ:
6539 emit_insn (gen_vec_cmpuneq (target, cmp_op1, cmp_op2));
6540 return;
6541 case LTGT:
6542 emit_insn (gen_vec_cmpltgt (target, cmp_op1, cmp_op2));
6543 return;
6544 case ORDERED:
6545 emit_insn (gen_vec_cmpordered (target, cmp_op1, cmp_op2));
6546 return;
6547 case UNORDERED:
6548 emit_insn (gen_vec_cmpunordered (target, cmp_op1, cmp_op2));
6549 return;
6550 default: break;
6553 else
6555 switch (cond)
6557 /* NE: a != b -> !(a == b) */
6558 case NE: cond = EQ; neg_p = true; break;
6559 /* GE: a >= b -> !(b > a) */
6560 case GE: cond = GT; neg_p = true; swap_p = true; break;
6561 /* GEU: a >= b -> !(b > a) */
6562 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6563 /* LE: a <= b -> !(a > b) */
6564 case LE: cond = GT; neg_p = true; break;
6565 /* LEU: a <= b -> !(a > b) */
6566 case LEU: cond = GTU; neg_p = true; break;
6567 /* LT: a < b -> b > a */
6568 case LT: cond = GT; swap_p = true; break;
6569 /* LTU: a < b -> b > a */
6570 case LTU: cond = GTU; swap_p = true; break;
6571 default: break;
6575 if (swap_p)
6577 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6580 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6581 mode,
6582 cmp_op1, cmp_op2)));
6583 if (neg_p)
6584 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6587 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6588 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6589 elements in CMP1 and CMP2 fulfill the comparison.
6590 This function is only used to emit patterns for the vx builtins and
6591 therefore only handles comparison codes required by the
6592 builtins. */
6593 void
6594 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6595 rtx cmp1, rtx cmp2, bool all_p)
6597 machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
6598 rtx tmp_reg = gen_reg_rtx (SImode);
6599 bool swap_p = false;
6601 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6603 switch (code)
6605 case EQ:
6606 case NE:
6607 cc_producer_mode = CCVEQmode;
6608 break;
6609 case GE:
6610 case LT:
6611 code = swap_condition (code);
6612 swap_p = true;
6613 /* fallthrough */
6614 case GT:
6615 case LE:
6616 cc_producer_mode = CCVIHmode;
6617 break;
6618 case GEU:
6619 case LTU:
6620 code = swap_condition (code);
6621 swap_p = true;
6622 /* fallthrough */
6623 case GTU:
6624 case LEU:
6625 cc_producer_mode = CCVIHUmode;
6626 break;
6627 default:
6628 gcc_unreachable ();
6631 scratch_mode = GET_MODE (cmp1);
6632 /* These codes represent inverted CC interpretations. Inverting
6633 an ALL CC mode results in an ANY CC mode and the other way
6634 around. Invert the all_p flag here to compensate for
6635 that. */
6636 if (code == NE || code == LE || code == LEU)
6637 all_p = !all_p;
6639 cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
6641 else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
6643 bool inv_p = false;
6645 switch (code)
6647 case EQ: cc_producer_mode = CCVEQmode; break;
6648 case NE: cc_producer_mode = CCVEQmode; inv_p = true; break;
6649 case GT: cc_producer_mode = CCVFHmode; break;
6650 case GE: cc_producer_mode = CCVFHEmode; break;
6651 case UNLE: cc_producer_mode = CCVFHmode; inv_p = true; break;
6652 case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
6653 case LT: cc_producer_mode = CCVFHmode; code = GT; swap_p = true; break;
6654 case LE: cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
6655 default: gcc_unreachable ();
6657 scratch_mode = related_int_vector_mode (GET_MODE (cmp1)).require ();
6659 if (inv_p)
6660 all_p = !all_p;
6662 cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
6664 else
6665 gcc_unreachable ();
6667 if (swap_p)
6669 rtx tmp = cmp2;
6670 cmp2 = cmp1;
6671 cmp1 = tmp;
6674 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6675 gen_rtvec (2, gen_rtx_SET (
6676 gen_rtx_REG (cc_producer_mode, CC_REGNUM),
6677 gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
6678 gen_rtx_CLOBBER (VOIDmode,
6679 gen_rtx_SCRATCH (scratch_mode)))));
6680 emit_move_insn (target, const0_rtx);
6681 emit_move_insn (tmp_reg, const1_rtx);
6683 emit_move_insn (target,
6684 gen_rtx_IF_THEN_ELSE (SImode,
6685 gen_rtx_fmt_ee (code, VOIDmode,
6686 gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
6687 const0_rtx),
6688 tmp_reg, target));
6691 /* Invert the comparison CODE applied to a CC mode. This is only safe
6692 if we know whether there result was created by a floating point
6693 compare or not. For the CCV modes this is encoded as part of the
6694 mode. */
6695 enum rtx_code
6696 s390_reverse_condition (machine_mode mode, enum rtx_code code)
6698 /* Reversal of FP compares takes care -- an ordered compare
6699 becomes an unordered compare and vice versa. */
6700 if (mode == CCVFALLmode || mode == CCVFANYmode || mode == CCSFPSmode)
6701 return reverse_condition_maybe_unordered (code);
6702 else if (mode == CCVIALLmode || mode == CCVIANYmode)
6703 return reverse_condition (code);
6704 else
6705 gcc_unreachable ();
6708 /* Generate a vector comparison expression loading either elements of
6709 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6710 and CMP_OP2. */
6712 void
6713 s390_expand_vcond (rtx target, rtx then, rtx els,
6714 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6716 rtx tmp;
6717 machine_mode result_mode;
6718 rtx result_target;
6720 machine_mode target_mode = GET_MODE (target);
6721 machine_mode cmp_mode = GET_MODE (cmp_op1);
6722 rtx op = (cond == LT) ? els : then;
6724 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6725 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6726 for short and byte (x >> 15 and x >> 7 respectively). */
6727 if ((cond == LT || cond == GE)
6728 && target_mode == cmp_mode
6729 && cmp_op2 == CONST0_RTX (cmp_mode)
6730 && op == CONST0_RTX (target_mode)
6731 && s390_vector_mode_supported_p (target_mode)
6732 && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6734 rtx negop = (cond == LT) ? then : els;
6736 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6738 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6739 if (negop == CONST1_RTX (target_mode))
6741 rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6742 GEN_INT (shift), target,
6743 1, OPTAB_DIRECT);
6744 if (res != target)
6745 emit_move_insn (target, res);
6746 return;
6749 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6750 else if (all_ones_operand (negop, target_mode))
6752 rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6753 GEN_INT (shift), target,
6754 0, OPTAB_DIRECT);
6755 if (res != target)
6756 emit_move_insn (target, res);
6757 return;
6761 /* We always use an integral type vector to hold the comparison
6762 result. */
6763 result_mode = related_int_vector_mode (cmp_mode).require ();
6764 result_target = gen_reg_rtx (result_mode);
6766 /* We allow vector immediates as comparison operands that
6767 can be handled by the optimization above but not by the
6768 following code. Hence, force them into registers here. */
6769 if (!REG_P (cmp_op1))
6770 cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6772 if (!REG_P (cmp_op2))
6773 cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6775 s390_expand_vec_compare (result_target, cond,
6776 cmp_op1, cmp_op2);
6778 /* If the results are supposed to be either -1 or 0 we are done
6779 since this is what our compare instructions generate anyway. */
6780 if (all_ones_operand (then, GET_MODE (then))
6781 && const0_operand (els, GET_MODE (els)))
6783 emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6784 result_target, 0));
6785 return;
6788 /* Otherwise we will do a vsel afterwards. */
6789 /* This gets triggered e.g.
6790 with gcc.c-torture/compile/pr53410-1.c */
6791 if (!REG_P (then))
6792 then = force_reg (target_mode, then);
6794 if (!REG_P (els))
6795 els = force_reg (target_mode, els);
6797 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6798 result_target,
6799 CONST0_RTX (result_mode));
6801 /* We compared the result against zero above so we have to swap then
6802 and els here. */
6803 tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6805 gcc_assert (target_mode == GET_MODE (then));
6806 emit_insn (gen_rtx_SET (target, tmp));
6809 /* Emit the RTX necessary to initialize the vector TARGET with values
6810 in VALS. */
6811 void
6812 s390_expand_vec_init (rtx target, rtx vals)
6814 machine_mode mode = GET_MODE (target);
6815 machine_mode inner_mode = GET_MODE_INNER (mode);
6816 int n_elts = GET_MODE_NUNITS (mode);
6817 bool all_same = true, all_regs = true, all_const_int = true;
6818 rtx x;
6819 int i;
6821 for (i = 0; i < n_elts; ++i)
6823 x = XVECEXP (vals, 0, i);
6825 if (!CONST_INT_P (x))
6826 all_const_int = false;
6828 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6829 all_same = false;
6831 if (!REG_P (x))
6832 all_regs = false;
6835 /* Use vector gen mask or vector gen byte mask if possible. */
6836 if (all_same && all_const_int
6837 && (XVECEXP (vals, 0, 0) == const0_rtx
6838 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6839 NULL, NULL)
6840 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6842 emit_insn (gen_rtx_SET (target,
6843 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6844 return;
6847 /* Use vector replicate instructions. vlrep/vrepi/vrep */
6848 if (all_same)
6850 rtx elem = XVECEXP (vals, 0, 0);
6852 /* vec_splats accepts general_operand as source. */
6853 if (!general_operand (elem, GET_MODE (elem)))
6854 elem = force_reg (inner_mode, elem);
6856 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, elem)));
6857 return;
6860 if (all_regs
6861 && REG_P (target)
6862 && n_elts == 2
6863 && GET_MODE_SIZE (inner_mode) == 8)
6865 /* Use vector load pair. */
6866 emit_insn (gen_rtx_SET (target,
6867 gen_rtx_VEC_CONCAT (mode,
6868 XVECEXP (vals, 0, 0),
6869 XVECEXP (vals, 0, 1))));
6870 return;
6873 /* Use vector load logical element and zero. */
6874 if (TARGET_VXE && (mode == V4SImode || mode == V4SFmode))
6876 bool found = true;
6878 x = XVECEXP (vals, 0, 0);
6879 if (memory_operand (x, inner_mode))
6881 for (i = 1; i < n_elts; ++i)
6882 found = found && XVECEXP (vals, 0, i) == const0_rtx;
6884 if (found)
6886 machine_mode half_mode = (inner_mode == SFmode
6887 ? V2SFmode : V2SImode);
6888 emit_insn (gen_rtx_SET (target,
6889 gen_rtx_VEC_CONCAT (mode,
6890 gen_rtx_VEC_CONCAT (half_mode,
6892 const0_rtx),
6893 gen_rtx_VEC_CONCAT (half_mode,
6894 const0_rtx,
6895 const0_rtx))));
6896 return;
6901 /* We are about to set the vector elements one by one. Zero out the
6902 full register first in order to help the data flow framework to
6903 detect it as full VR set. */
6904 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6906 /* Unfortunately the vec_init expander is not allowed to fail. So
6907 we have to implement the fallback ourselves. */
6908 for (i = 0; i < n_elts; i++)
6910 rtx elem = XVECEXP (vals, 0, i);
6911 if (!general_operand (elem, GET_MODE (elem)))
6912 elem = force_reg (inner_mode, elem);
6914 emit_insn (gen_rtx_SET (target,
6915 gen_rtx_UNSPEC (mode,
6916 gen_rtvec (3, elem,
6917 GEN_INT (i), target),
6918 UNSPEC_VEC_SET)));
6922 /* Structure to hold the initial parameters for a compare_and_swap operation
6923 in HImode and QImode. */
6925 struct alignment_context
6927 rtx memsi; /* SI aligned memory location. */
6928 rtx shift; /* Bit offset with regard to lsb. */
6929 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
6930 rtx modemaski; /* ~modemask */
6931 bool aligned; /* True if memory is aligned, false else. */
6934 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6935 structure AC for transparent simplifying, if the memory alignment is known
6936 to be at least 32bit. MEM is the memory location for the actual operation
6937 and MODE its mode. */
6939 static void
6940 init_alignment_context (struct alignment_context *ac, rtx mem,
6941 machine_mode mode)
6943 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6944 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6946 if (ac->aligned)
6947 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
6948 else
6950 /* Alignment is unknown. */
6951 rtx byteoffset, addr, align;
6953 /* Force the address into a register. */
6954 addr = force_reg (Pmode, XEXP (mem, 0));
6956 /* Align it to SImode. */
6957 align = expand_simple_binop (Pmode, AND, addr,
6958 GEN_INT (-GET_MODE_SIZE (SImode)),
6959 NULL_RTX, 1, OPTAB_DIRECT);
6960 /* Generate MEM. */
6961 ac->memsi = gen_rtx_MEM (SImode, align);
6962 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6963 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6964 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6966 /* Calculate shiftcount. */
6967 byteoffset = expand_simple_binop (Pmode, AND, addr,
6968 GEN_INT (GET_MODE_SIZE (SImode) - 1),
6969 NULL_RTX, 1, OPTAB_DIRECT);
6970 /* As we already have some offset, evaluate the remaining distance. */
6971 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6972 NULL_RTX, 1, OPTAB_DIRECT);
6975 /* Shift is the byte count, but we need the bitcount. */
6976 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6977 NULL_RTX, 1, OPTAB_DIRECT);
6979 /* Calculate masks. */
6980 ac->modemask = expand_simple_binop (SImode, ASHIFT,
6981 GEN_INT (GET_MODE_MASK (mode)),
6982 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6983 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6984 NULL_RTX, 1);
6987 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
6988 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
6989 perform the merge in SEQ2. */
6991 static rtx
6992 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6993 machine_mode mode, rtx val, rtx ins)
6995 rtx tmp;
6997 if (ac->aligned)
6999 start_sequence ();
7000 tmp = copy_to_mode_reg (SImode, val);
7001 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
7002 const0_rtx, ins))
7004 *seq1 = NULL;
7005 *seq2 = get_insns ();
7006 end_sequence ();
7007 return tmp;
7009 end_sequence ();
7012 /* Failed to use insv. Generate a two part shift and mask. */
7013 start_sequence ();
7014 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
7015 *seq1 = get_insns ();
7016 end_sequence ();
7018 start_sequence ();
7019 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
7020 *seq2 = get_insns ();
7021 end_sequence ();
7023 return tmp;
7026 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
7027 the memory location, CMP the old value to compare MEM with and NEW_RTX the
7028 value to set if CMP == MEM. */
7030 static void
7031 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7032 rtx cmp, rtx new_rtx, bool is_weak)
7034 struct alignment_context ac;
7035 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
7036 rtx res = gen_reg_rtx (SImode);
7037 rtx_code_label *csloop = NULL, *csend = NULL;
7039 gcc_assert (MEM_P (mem));
7041 init_alignment_context (&ac, mem, mode);
7043 /* Load full word. Subsequent loads are performed by CS. */
7044 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
7045 NULL_RTX, 1, OPTAB_DIRECT);
7047 /* Prepare insertions of cmp and new_rtx into the loaded value. When
7048 possible, we try to use insv to make this happen efficiently. If
7049 that fails we'll generate code both inside and outside the loop. */
7050 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
7051 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
7053 if (seq0)
7054 emit_insn (seq0);
7055 if (seq1)
7056 emit_insn (seq1);
7058 /* Start CS loop. */
7059 if (!is_weak)
7061 /* Begin assuming success. */
7062 emit_move_insn (btarget, const1_rtx);
7064 csloop = gen_label_rtx ();
7065 csend = gen_label_rtx ();
7066 emit_label (csloop);
7069 /* val = "<mem>00..0<mem>"
7070 * cmp = "00..0<cmp>00..0"
7071 * new = "00..0<new>00..0"
7074 emit_insn (seq2);
7075 emit_insn (seq3);
7077 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv, CCZ1mode);
7078 if (is_weak)
7079 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
7080 else
7082 rtx tmp;
7084 /* Jump to end if we're done (likely?). */
7085 s390_emit_jump (csend, cc);
7087 /* Check for changes outside mode, and loop internal if so.
7088 Arrange the moves so that the compare is adjacent to the
7089 branch so that we can generate CRJ. */
7090 tmp = copy_to_reg (val);
7091 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
7092 1, OPTAB_DIRECT);
7093 cc = s390_emit_compare (NE, val, tmp);
7094 s390_emit_jump (csloop, cc);
7096 /* Failed. */
7097 emit_move_insn (btarget, const0_rtx);
7098 emit_label (csend);
7101 /* Return the correct part of the bitfield. */
7102 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
7103 NULL_RTX, 1, OPTAB_DIRECT), 1);
7106 /* Variant of s390_expand_cs for SI, DI and TI modes. */
7107 static void
7108 s390_expand_cs_tdsi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7109 rtx cmp, rtx new_rtx, bool is_weak)
7111 rtx output = vtarget;
7112 rtx_code_label *skip_cs_label = NULL;
7113 bool do_const_opt = false;
7115 if (!register_operand (output, mode))
7116 output = gen_reg_rtx (mode);
7118 /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
7119 with the constant first and skip the compare_and_swap because its very
7120 expensive and likely to fail anyway.
7121 Note 1: This is done only for IS_WEAK. C11 allows optimizations that may
7122 cause spurious in that case.
7123 Note 2: It may be useful to do this also for non-constant INPUT.
7124 Note 3: Currently only targets with "load on condition" are supported
7125 (z196 and newer). */
7127 if (TARGET_Z196
7128 && (mode == SImode || mode == DImode))
7129 do_const_opt = (is_weak && CONST_INT_P (cmp));
7131 if (do_const_opt)
7133 rtx cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7135 skip_cs_label = gen_label_rtx ();
7136 emit_move_insn (btarget, const0_rtx);
7137 if (CONST_INT_P (cmp) && INTVAL (cmp) == 0)
7139 rtvec lt = rtvec_alloc (2);
7141 /* Load-and-test + conditional jump. */
7142 RTVEC_ELT (lt, 0)
7143 = gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, mem, cmp));
7144 RTVEC_ELT (lt, 1) = gen_rtx_SET (output, mem);
7145 emit_insn (gen_rtx_PARALLEL (VOIDmode, lt));
7147 else
7149 emit_move_insn (output, mem);
7150 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp)));
7152 s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx));
7153 add_reg_br_prob_note (get_last_insn (),
7154 profile_probability::very_unlikely ());
7155 /* If the jump is not taken, OUTPUT is the expected value. */
7156 cmp = output;
7157 /* Reload newval to a register manually, *after* the compare and jump
7158 above. Otherwise Reload might place it before the jump. */
7160 else
7161 cmp = force_reg (mode, cmp);
7162 new_rtx = force_reg (mode, new_rtx);
7163 s390_emit_compare_and_swap (EQ, output, mem, cmp, new_rtx,
7164 (do_const_opt) ? CCZmode : CCZ1mode);
7165 if (skip_cs_label != NULL)
7166 emit_label (skip_cs_label);
7168 /* We deliberately accept non-register operands in the predicate
7169 to ensure the write back to the output operand happens *before*
7170 the store-flags code below. This makes it easier for combine
7171 to merge the store-flags code with a potential test-and-branch
7172 pattern following (immediately!) afterwards. */
7173 if (output != vtarget)
7174 emit_move_insn (vtarget, output);
7176 if (do_const_opt)
7178 rtx cc, cond, ite;
7180 /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
7181 btarget has already been initialized with 0 above. */
7182 cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7183 cond = gen_rtx_EQ (VOIDmode, cc, const0_rtx);
7184 ite = gen_rtx_IF_THEN_ELSE (SImode, cond, const1_rtx, btarget);
7185 emit_insn (gen_rtx_SET (btarget, ite));
7187 else
7189 rtx cc, cond;
7191 cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
7192 cond = gen_rtx_EQ (SImode, cc, const0_rtx);
7193 emit_insn (gen_cstorecc4 (btarget, cond, cc, const0_rtx));
7197 /* Expand an atomic compare and swap operation. MEM is the memory location,
7198 CMP the old value to compare MEM with and NEW_RTX the value to set if
7199 CMP == MEM. */
7201 void
7202 s390_expand_cs (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7203 rtx cmp, rtx new_rtx, bool is_weak)
7205 switch (mode)
7207 case E_TImode:
7208 case E_DImode:
7209 case E_SImode:
7210 s390_expand_cs_tdsi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7211 break;
7212 case E_HImode:
7213 case E_QImode:
7214 s390_expand_cs_hqi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7215 break;
7216 default:
7217 gcc_unreachable ();
7221 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7222 The memory location MEM is set to INPUT. OUTPUT is set to the previous value
7223 of MEM. */
7225 void
7226 s390_expand_atomic_exchange_tdsi (rtx output, rtx mem, rtx input)
7228 machine_mode mode = GET_MODE (mem);
7229 rtx_code_label *csloop;
7231 if (TARGET_Z196
7232 && (mode == DImode || mode == SImode)
7233 && CONST_INT_P (input) && INTVAL (input) == 0)
7235 emit_move_insn (output, const0_rtx);
7236 if (mode == DImode)
7237 emit_insn (gen_atomic_fetch_anddi (output, mem, const0_rtx, input));
7238 else
7239 emit_insn (gen_atomic_fetch_andsi (output, mem, const0_rtx, input));
7240 return;
7243 input = force_reg (mode, input);
7244 emit_move_insn (output, mem);
7245 csloop = gen_label_rtx ();
7246 emit_label (csloop);
7247 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, output, mem, output,
7248 input, CCZ1mode));
7251 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
7252 and VAL the value to play with. If AFTER is true then store the value
7253 MEM holds after the operation, if AFTER is false then store the value MEM
7254 holds before the operation. If TARGET is zero then discard that value, else
7255 store it to TARGET. */
7257 void
7258 s390_expand_atomic (machine_mode mode, enum rtx_code code,
7259 rtx target, rtx mem, rtx val, bool after)
7261 struct alignment_context ac;
7262 rtx cmp;
7263 rtx new_rtx = gen_reg_rtx (SImode);
7264 rtx orig = gen_reg_rtx (SImode);
7265 rtx_code_label *csloop = gen_label_rtx ();
7267 gcc_assert (!target || register_operand (target, VOIDmode));
7268 gcc_assert (MEM_P (mem));
7270 init_alignment_context (&ac, mem, mode);
7272 /* Shift val to the correct bit positions.
7273 Preserve "icm", but prevent "ex icm". */
7274 if (!(ac.aligned && code == SET && MEM_P (val)))
7275 val = s390_expand_mask_and_shift (val, mode, ac.shift);
7277 /* Further preparation insns. */
7278 if (code == PLUS || code == MINUS)
7279 emit_move_insn (orig, val);
7280 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
7281 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
7282 NULL_RTX, 1, OPTAB_DIRECT);
7284 /* Load full word. Subsequent loads are performed by CS. */
7285 cmp = force_reg (SImode, ac.memsi);
7287 /* Start CS loop. */
7288 emit_label (csloop);
7289 emit_move_insn (new_rtx, cmp);
7291 /* Patch new with val at correct position. */
7292 switch (code)
7294 case PLUS:
7295 case MINUS:
7296 val = expand_simple_binop (SImode, code, new_rtx, orig,
7297 NULL_RTX, 1, OPTAB_DIRECT);
7298 val = expand_simple_binop (SImode, AND, val, ac.modemask,
7299 NULL_RTX, 1, OPTAB_DIRECT);
7300 /* FALLTHRU */
7301 case SET:
7302 if (ac.aligned && MEM_P (val))
7303 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
7304 0, 0, SImode, val, false);
7305 else
7307 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
7308 NULL_RTX, 1, OPTAB_DIRECT);
7309 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
7310 NULL_RTX, 1, OPTAB_DIRECT);
7312 break;
7313 case AND:
7314 case IOR:
7315 case XOR:
7316 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
7317 NULL_RTX, 1, OPTAB_DIRECT);
7318 break;
7319 case MULT: /* NAND */
7320 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
7321 NULL_RTX, 1, OPTAB_DIRECT);
7322 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
7323 NULL_RTX, 1, OPTAB_DIRECT);
7324 break;
7325 default:
7326 gcc_unreachable ();
7329 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
7330 ac.memsi, cmp, new_rtx,
7331 CCZ1mode));
7333 /* Return the correct part of the bitfield. */
7334 if (target)
7335 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
7336 after ? new_rtx : cmp, ac.shift,
7337 NULL_RTX, 1, OPTAB_DIRECT), 1);
7340 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7341 We need to emit DTP-relative relocations. */
7343 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7345 static void
7346 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
7348 switch (size)
7350 case 4:
7351 fputs ("\t.long\t", file);
7352 break;
7353 case 8:
7354 fputs ("\t.quad\t", file);
7355 break;
7356 default:
7357 gcc_unreachable ();
7359 output_addr_const (file, x);
7360 fputs ("@DTPOFF", file);
7363 /* Return the proper mode for REGNO being represented in the dwarf
7364 unwind table. */
7365 machine_mode
7366 s390_dwarf_frame_reg_mode (int regno)
7368 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
7370 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
7371 if (GENERAL_REGNO_P (regno))
7372 save_mode = Pmode;
7374 /* The rightmost 64 bits of vector registers are call-clobbered. */
7375 if (GET_MODE_SIZE (save_mode) > 8)
7376 save_mode = DImode;
7378 return save_mode;
7381 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7382 /* Implement TARGET_MANGLE_TYPE. */
7384 static const char *
7385 s390_mangle_type (const_tree type)
7387 type = TYPE_MAIN_VARIANT (type);
7389 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
7390 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
7391 return NULL;
7393 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
7394 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
7395 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
7396 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
7398 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
7399 && TARGET_LONG_DOUBLE_128)
7400 return "g";
7402 /* For all other types, use normal C++ mangling. */
7403 return NULL;
7405 #endif
7407 /* In the name of slightly smaller debug output, and to cater to
7408 general assembler lossage, recognize various UNSPEC sequences
7409 and turn them back into a direct symbol reference. */
7411 static rtx
7412 s390_delegitimize_address (rtx orig_x)
7414 rtx x, y;
7416 orig_x = delegitimize_mem_from_attrs (orig_x);
7417 x = orig_x;
7419 /* Extract the symbol ref from:
7420 (plus:SI (reg:SI 12 %r12)
7421 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7422 UNSPEC_GOTOFF/PLTOFF)))
7424 (plus:SI (reg:SI 12 %r12)
7425 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7426 UNSPEC_GOTOFF/PLTOFF)
7427 (const_int 4 [0x4])))) */
7428 if (GET_CODE (x) == PLUS
7429 && REG_P (XEXP (x, 0))
7430 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
7431 && GET_CODE (XEXP (x, 1)) == CONST)
7433 HOST_WIDE_INT offset = 0;
7435 /* The const operand. */
7436 y = XEXP (XEXP (x, 1), 0);
7438 if (GET_CODE (y) == PLUS
7439 && GET_CODE (XEXP (y, 1)) == CONST_INT)
7441 offset = INTVAL (XEXP (y, 1));
7442 y = XEXP (y, 0);
7445 if (GET_CODE (y) == UNSPEC
7446 && (XINT (y, 1) == UNSPEC_GOTOFF
7447 || XINT (y, 1) == UNSPEC_PLTOFF))
7448 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
7451 if (GET_CODE (x) != MEM)
7452 return orig_x;
7454 x = XEXP (x, 0);
7455 if (GET_CODE (x) == PLUS
7456 && GET_CODE (XEXP (x, 1)) == CONST
7457 && GET_CODE (XEXP (x, 0)) == REG
7458 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7460 y = XEXP (XEXP (x, 1), 0);
7461 if (GET_CODE (y) == UNSPEC
7462 && XINT (y, 1) == UNSPEC_GOT)
7463 y = XVECEXP (y, 0, 0);
7464 else
7465 return orig_x;
7467 else if (GET_CODE (x) == CONST)
7469 /* Extract the symbol ref from:
7470 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7471 UNSPEC_PLT/GOTENT))) */
7473 y = XEXP (x, 0);
7474 if (GET_CODE (y) == UNSPEC
7475 && (XINT (y, 1) == UNSPEC_GOTENT
7476 || XINT (y, 1) == UNSPEC_PLT))
7477 y = XVECEXP (y, 0, 0);
7478 else
7479 return orig_x;
7481 else
7482 return orig_x;
7484 if (GET_MODE (orig_x) != Pmode)
7486 if (GET_MODE (orig_x) == BLKmode)
7487 return orig_x;
7488 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
7489 if (y == NULL_RTX)
7490 return orig_x;
7492 return y;
7495 /* Output operand OP to stdio stream FILE.
7496 OP is an address (register + offset) which is not used to address data;
7497 instead the rightmost bits are interpreted as the value. */
7499 static void
7500 print_addrstyle_operand (FILE *file, rtx op)
7502 HOST_WIDE_INT offset;
7503 rtx base;
7505 /* Extract base register and offset. */
7506 if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
7507 gcc_unreachable ();
7509 /* Sanity check. */
7510 if (base)
7512 gcc_assert (GET_CODE (base) == REG);
7513 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
7514 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
7517 /* Offsets are constricted to twelve bits. */
7518 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
7519 if (base)
7520 fprintf (file, "(%s)", reg_names[REGNO (base)]);
7523 /* Print the shift count operand OP to FILE.
7524 OP is an address-style operand in a form which
7525 s390_valid_shift_count permits. Subregs and no-op
7526 and-masking of the operand are stripped. */
7528 static void
7529 print_shift_count_operand (FILE *file, rtx op)
7531 /* No checking of the and mask required here. */
7532 if (!s390_valid_shift_count (op, 0))
7533 gcc_unreachable ();
7535 while (op && GET_CODE (op) == SUBREG)
7536 op = SUBREG_REG (op);
7538 if (GET_CODE (op) == AND)
7539 op = XEXP (op, 0);
7541 print_addrstyle_operand (file, op);
7544 /* Assigns the number of NOP halfwords to be emitted before and after the
7545 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
7546 If hotpatching is disabled for the function, the values are set to zero.
7549 static void
7550 s390_function_num_hotpatch_hw (tree decl,
7551 int *hw_before,
7552 int *hw_after)
7554 tree attr;
7556 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
7558 /* Handle the arguments of the hotpatch attribute. The values
7559 specified via attribute might override the cmdline argument
7560 values. */
7561 if (attr)
7563 tree args = TREE_VALUE (attr);
7565 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
7566 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
7568 else
7570 /* Use the values specified by the cmdline arguments. */
7571 *hw_before = s390_hotpatch_hw_before_label;
7572 *hw_after = s390_hotpatch_hw_after_label;
7576 /* Write the current .machine and .machinemode specification to the assembler
7577 file. */
7579 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7580 static void
7581 s390_asm_output_machine_for_arch (FILE *asm_out_file)
7583 fprintf (asm_out_file, "\t.machinemode %s\n",
7584 (TARGET_ZARCH) ? "zarch" : "esa");
7585 fprintf (asm_out_file, "\t.machine \"%s",
7586 processor_table[s390_arch].binutils_name);
7587 if (S390_USE_ARCHITECTURE_MODIFIERS)
7589 int cpu_flags;
7591 cpu_flags = processor_flags_table[(int) s390_arch];
7592 if (TARGET_HTM && !(cpu_flags & PF_TX))
7593 fprintf (asm_out_file, "+htm");
7594 else if (!TARGET_HTM && (cpu_flags & PF_TX))
7595 fprintf (asm_out_file, "+nohtm");
7596 if (TARGET_VX && !(cpu_flags & PF_VX))
7597 fprintf (asm_out_file, "+vx");
7598 else if (!TARGET_VX && (cpu_flags & PF_VX))
7599 fprintf (asm_out_file, "+novx");
7601 fprintf (asm_out_file, "\"\n");
7604 /* Write an extra function header before the very start of the function. */
7606 void
7607 s390_asm_output_function_prefix (FILE *asm_out_file,
7608 const char *fnname ATTRIBUTE_UNUSED)
7610 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7611 return;
7612 /* Since only the function specific options are saved but not the indications
7613 which options are set, it's too much work here to figure out which options
7614 have actually changed. Thus, generate .machine and .machinemode whenever a
7615 function has the target attribute or pragma. */
7616 fprintf (asm_out_file, "\t.machinemode push\n");
7617 fprintf (asm_out_file, "\t.machine push\n");
7618 s390_asm_output_machine_for_arch (asm_out_file);
7621 /* Write an extra function footer after the very end of the function. */
7623 void
7624 s390_asm_declare_function_size (FILE *asm_out_file,
7625 const char *fnname, tree decl)
7627 if (!flag_inhibit_size_directive)
7628 ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7629 if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7630 return;
7631 fprintf (asm_out_file, "\t.machine pop\n");
7632 fprintf (asm_out_file, "\t.machinemode pop\n");
7634 #endif
7636 /* Write the extra assembler code needed to declare a function properly. */
7638 void
7639 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
7640 tree decl)
7642 int hw_before, hw_after;
7644 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7645 if (hw_before > 0)
7647 unsigned int function_alignment;
7648 int i;
7650 /* Add a trampoline code area before the function label and initialize it
7651 with two-byte nop instructions. This area can be overwritten with code
7652 that jumps to a patched version of the function. */
7653 asm_fprintf (asm_out_file, "\tnopr\t%%r0"
7654 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7655 hw_before);
7656 for (i = 1; i < hw_before; i++)
7657 fputs ("\tnopr\t%r0\n", asm_out_file);
7659 /* Note: The function label must be aligned so that (a) the bytes of the
7660 following nop do not cross a cacheline boundary, and (b) a jump address
7661 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7662 stored directly before the label without crossing a cacheline
7663 boundary. All this is necessary to make sure the trampoline code can
7664 be changed atomically.
7665 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7666 if there are NOPs before the function label, the alignment is placed
7667 before them. So it is necessary to duplicate the alignment after the
7668 NOPs. */
7669 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7670 if (! DECL_USER_ALIGN (decl))
7671 function_alignment
7672 = MAX (function_alignment,
7673 (unsigned int) align_functions.levels[0].get_value ());
7674 fputs ("\t# alignment for hotpatch\n", asm_out_file);
7675 ASM_OUTPUT_ALIGN (asm_out_file, align_functions.levels[0].log);
7678 if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7680 asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7681 asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7682 asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7683 asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7684 asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7685 asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7686 s390_warn_framesize);
7687 asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7688 asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7689 asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7690 asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7691 asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7692 asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7693 TARGET_PACKED_STACK);
7694 asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7695 asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7696 asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7697 asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7698 s390_warn_dynamicstack_p);
7700 ASM_OUTPUT_LABEL (asm_out_file, fname);
7701 if (hw_after > 0)
7702 asm_fprintf (asm_out_file,
7703 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7704 hw_after);
7707 /* Output machine-dependent UNSPECs occurring in address constant X
7708 in assembler syntax to stdio stream FILE. Returns true if the
7709 constant X could be recognized, false otherwise. */
7711 static bool
7712 s390_output_addr_const_extra (FILE *file, rtx x)
7714 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7715 switch (XINT (x, 1))
7717 case UNSPEC_GOTENT:
7718 output_addr_const (file, XVECEXP (x, 0, 0));
7719 fprintf (file, "@GOTENT");
7720 return true;
7721 case UNSPEC_GOT:
7722 output_addr_const (file, XVECEXP (x, 0, 0));
7723 fprintf (file, "@GOT");
7724 return true;
7725 case UNSPEC_GOTOFF:
7726 output_addr_const (file, XVECEXP (x, 0, 0));
7727 fprintf (file, "@GOTOFF");
7728 return true;
7729 case UNSPEC_PLT:
7730 output_addr_const (file, XVECEXP (x, 0, 0));
7731 fprintf (file, "@PLT");
7732 return true;
7733 case UNSPEC_PLTOFF:
7734 output_addr_const (file, XVECEXP (x, 0, 0));
7735 fprintf (file, "@PLTOFF");
7736 return true;
7737 case UNSPEC_TLSGD:
7738 output_addr_const (file, XVECEXP (x, 0, 0));
7739 fprintf (file, "@TLSGD");
7740 return true;
7741 case UNSPEC_TLSLDM:
7742 assemble_name (file, get_some_local_dynamic_name ());
7743 fprintf (file, "@TLSLDM");
7744 return true;
7745 case UNSPEC_DTPOFF:
7746 output_addr_const (file, XVECEXP (x, 0, 0));
7747 fprintf (file, "@DTPOFF");
7748 return true;
7749 case UNSPEC_NTPOFF:
7750 output_addr_const (file, XVECEXP (x, 0, 0));
7751 fprintf (file, "@NTPOFF");
7752 return true;
7753 case UNSPEC_GOTNTPOFF:
7754 output_addr_const (file, XVECEXP (x, 0, 0));
7755 fprintf (file, "@GOTNTPOFF");
7756 return true;
7757 case UNSPEC_INDNTPOFF:
7758 output_addr_const (file, XVECEXP (x, 0, 0));
7759 fprintf (file, "@INDNTPOFF");
7760 return true;
7763 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7764 switch (XINT (x, 1))
7766 case UNSPEC_POOL_OFFSET:
7767 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7768 output_addr_const (file, x);
7769 return true;
7771 return false;
7774 /* Output address operand ADDR in assembler syntax to
7775 stdio stream FILE. */
7777 void
7778 print_operand_address (FILE *file, rtx addr)
7780 struct s390_address ad;
7781 memset (&ad, 0, sizeof (s390_address));
7783 if (s390_loadrelative_operand_p (addr, NULL, NULL))
7785 if (!TARGET_Z10)
7787 output_operand_lossage ("symbolic memory references are "
7788 "only supported on z10 or later");
7789 return;
7791 output_addr_const (file, addr);
7792 return;
7795 if (!s390_decompose_address (addr, &ad)
7796 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7797 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7798 output_operand_lossage ("cannot decompose address");
7800 if (ad.disp)
7801 output_addr_const (file, ad.disp);
7802 else
7803 fprintf (file, "0");
7805 if (ad.base && ad.indx)
7806 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7807 reg_names[REGNO (ad.base)]);
7808 else if (ad.base)
7809 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7812 /* Output operand X in assembler syntax to stdio stream FILE.
7813 CODE specified the format flag. The following format flags
7814 are recognized:
7816 'A': On z14 or higher: If operand is a mem print the alignment
7817 hint usable with vl/vst prefixed by a comma.
7818 'C': print opcode suffix for branch condition.
7819 'D': print opcode suffix for inverse branch condition.
7820 'E': print opcode suffix for branch on index instruction.
7821 'G': print the size of the operand in bytes.
7822 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7823 'M': print the second word of a TImode operand.
7824 'N': print the second word of a DImode operand.
7825 'O': print only the displacement of a memory reference or address.
7826 'R': print only the base register of a memory reference or address.
7827 'S': print S-type memory reference (base+displacement).
7828 'Y': print address style operand without index (e.g. shift count or setmem
7829 operand).
7831 'b': print integer X as if it's an unsigned byte.
7832 'c': print integer X as if it's an signed byte.
7833 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7834 'f': "end" contiguous bitmask X in SImode.
7835 'h': print integer X as if it's a signed halfword.
7836 'i': print the first nonzero HImode part of X.
7837 'j': print the first HImode part unequal to -1 of X.
7838 'k': print the first nonzero SImode part of X.
7839 'm': print the first SImode part unequal to -1 of X.
7840 'o': print integer X as if it's an unsigned 32bit word.
7841 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7842 't': CONST_INT: "start" of contiguous bitmask X in SImode.
7843 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7844 'x': print integer X as if it's an unsigned halfword.
7845 'v': print register number as vector register (v1 instead of f1).
7848 void
7849 print_operand (FILE *file, rtx x, int code)
7851 HOST_WIDE_INT ival;
7853 switch (code)
7855 case 'A':
7856 if (TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS && MEM_P (x))
7858 if (MEM_ALIGN (x) >= 128)
7859 fprintf (file, ",4");
7860 else if (MEM_ALIGN (x) == 64)
7861 fprintf (file, ",3");
7863 return;
7864 case 'C':
7865 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7866 return;
7868 case 'D':
7869 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7870 return;
7872 case 'E':
7873 if (GET_CODE (x) == LE)
7874 fprintf (file, "l");
7875 else if (GET_CODE (x) == GT)
7876 fprintf (file, "h");
7877 else
7878 output_operand_lossage ("invalid comparison operator "
7879 "for 'E' output modifier");
7880 return;
7882 case 'J':
7883 if (GET_CODE (x) == SYMBOL_REF)
7885 fprintf (file, "%s", ":tls_load:");
7886 output_addr_const (file, x);
7888 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7890 fprintf (file, "%s", ":tls_gdcall:");
7891 output_addr_const (file, XVECEXP (x, 0, 0));
7893 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7895 fprintf (file, "%s", ":tls_ldcall:");
7896 const char *name = get_some_local_dynamic_name ();
7897 gcc_assert (name);
7898 assemble_name (file, name);
7900 else
7901 output_operand_lossage ("invalid reference for 'J' output modifier");
7902 return;
7904 case 'G':
7905 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7906 return;
7908 case 'O':
7910 struct s390_address ad;
7911 int ret;
7913 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7915 if (!ret
7916 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7917 || ad.indx)
7919 output_operand_lossage ("invalid address for 'O' output modifier");
7920 return;
7923 if (ad.disp)
7924 output_addr_const (file, ad.disp);
7925 else
7926 fprintf (file, "0");
7928 return;
7930 case 'R':
7932 struct s390_address ad;
7933 int ret;
7935 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7937 if (!ret
7938 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7939 || ad.indx)
7941 output_operand_lossage ("invalid address for 'R' output modifier");
7942 return;
7945 if (ad.base)
7946 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7947 else
7948 fprintf (file, "0");
7950 return;
7952 case 'S':
7954 struct s390_address ad;
7955 int ret;
7957 if (!MEM_P (x))
7959 output_operand_lossage ("memory reference expected for "
7960 "'S' output modifier");
7961 return;
7963 ret = s390_decompose_address (XEXP (x, 0), &ad);
7965 if (!ret
7966 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7967 || ad.indx)
7969 output_operand_lossage ("invalid address for 'S' output modifier");
7970 return;
7973 if (ad.disp)
7974 output_addr_const (file, ad.disp);
7975 else
7976 fprintf (file, "0");
7978 if (ad.base)
7979 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7981 return;
7983 case 'N':
7984 if (GET_CODE (x) == REG)
7985 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7986 else if (GET_CODE (x) == MEM)
7987 x = change_address (x, VOIDmode,
7988 plus_constant (Pmode, XEXP (x, 0), 4));
7989 else
7990 output_operand_lossage ("register or memory expression expected "
7991 "for 'N' output modifier");
7992 break;
7994 case 'M':
7995 if (GET_CODE (x) == REG)
7996 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7997 else if (GET_CODE (x) == MEM)
7998 x = change_address (x, VOIDmode,
7999 plus_constant (Pmode, XEXP (x, 0), 8));
8000 else
8001 output_operand_lossage ("register or memory expression expected "
8002 "for 'M' output modifier");
8003 break;
8005 case 'Y':
8006 print_shift_count_operand (file, x);
8007 return;
8010 switch (GET_CODE (x))
8012 case REG:
8013 /* Print FP regs as fx instead of vx when they are accessed
8014 through non-vector mode. */
8015 if (code == 'v'
8016 || VECTOR_NOFP_REG_P (x)
8017 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
8018 || (VECTOR_REG_P (x)
8019 && (GET_MODE_SIZE (GET_MODE (x)) /
8020 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
8021 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
8022 else
8023 fprintf (file, "%s", reg_names[REGNO (x)]);
8024 break;
8026 case MEM:
8027 output_address (GET_MODE (x), XEXP (x, 0));
8028 break;
8030 case CONST:
8031 case CODE_LABEL:
8032 case LABEL_REF:
8033 case SYMBOL_REF:
8034 output_addr_const (file, x);
8035 break;
8037 case CONST_INT:
8038 ival = INTVAL (x);
8039 switch (code)
8041 case 0:
8042 break;
8043 case 'b':
8044 ival &= 0xff;
8045 break;
8046 case 'c':
8047 ival = ((ival & 0xff) ^ 0x80) - 0x80;
8048 break;
8049 case 'x':
8050 ival &= 0xffff;
8051 break;
8052 case 'h':
8053 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
8054 break;
8055 case 'i':
8056 ival = s390_extract_part (x, HImode, 0);
8057 break;
8058 case 'j':
8059 ival = s390_extract_part (x, HImode, -1);
8060 break;
8061 case 'k':
8062 ival = s390_extract_part (x, SImode, 0);
8063 break;
8064 case 'm':
8065 ival = s390_extract_part (x, SImode, -1);
8066 break;
8067 case 'o':
8068 ival &= 0xffffffff;
8069 break;
8070 case 'e': case 'f':
8071 case 's': case 't':
8073 int start, end;
8074 int len;
8075 bool ok;
8077 len = (code == 's' || code == 'e' ? 64 : 32);
8078 ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
8079 gcc_assert (ok);
8080 if (code == 's' || code == 't')
8081 ival = start;
8082 else
8083 ival = end;
8085 break;
8086 default:
8087 output_operand_lossage ("invalid constant for output modifier '%c'", code);
8089 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8090 break;
8092 case CONST_WIDE_INT:
8093 if (code == 'b')
8094 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8095 CONST_WIDE_INT_ELT (x, 0) & 0xff);
8096 else if (code == 'x')
8097 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8098 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
8099 else if (code == 'h')
8100 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8101 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
8102 else
8104 if (code == 0)
8105 output_operand_lossage ("invalid constant - try using "
8106 "an output modifier");
8107 else
8108 output_operand_lossage ("invalid constant for output modifier '%c'",
8109 code);
8111 break;
8112 case CONST_VECTOR:
8113 switch (code)
8115 case 'h':
8116 gcc_assert (const_vec_duplicate_p (x));
8117 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8118 ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
8119 break;
8120 case 'e':
8121 case 's':
8123 int start, end;
8124 bool ok;
8126 ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
8127 gcc_assert (ok);
8128 ival = (code == 's') ? start : end;
8129 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8131 break;
8132 case 't':
8134 unsigned mask;
8135 bool ok = s390_bytemask_vector_p (x, &mask);
8136 gcc_assert (ok);
8137 fprintf (file, "%u", mask);
8139 break;
8141 default:
8142 output_operand_lossage ("invalid constant vector for output "
8143 "modifier '%c'", code);
8145 break;
8147 default:
8148 if (code == 0)
8149 output_operand_lossage ("invalid expression - try using "
8150 "an output modifier");
8151 else
8152 output_operand_lossage ("invalid expression for output "
8153 "modifier '%c'", code);
8154 break;
8158 /* Target hook for assembling integer objects. We need to define it
8159 here to work a round a bug in some versions of GAS, which couldn't
8160 handle values smaller than INT_MIN when printed in decimal. */
8162 static bool
8163 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
8165 if (size == 8 && aligned_p
8166 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
8168 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
8169 INTVAL (x));
8170 return true;
8172 return default_assemble_integer (x, size, aligned_p);
8175 /* Returns true if register REGNO is used for forming
8176 a memory address in expression X. */
8178 static bool
8179 reg_used_in_mem_p (int regno, rtx x)
8181 enum rtx_code code = GET_CODE (x);
8182 int i, j;
8183 const char *fmt;
8185 if (code == MEM)
8187 if (refers_to_regno_p (regno, XEXP (x, 0)))
8188 return true;
8190 else if (code == SET
8191 && GET_CODE (SET_DEST (x)) == PC)
8193 if (refers_to_regno_p (regno, SET_SRC (x)))
8194 return true;
8197 fmt = GET_RTX_FORMAT (code);
8198 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8200 if (fmt[i] == 'e'
8201 && reg_used_in_mem_p (regno, XEXP (x, i)))
8202 return true;
8204 else if (fmt[i] == 'E')
8205 for (j = 0; j < XVECLEN (x, i); j++)
8206 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
8207 return true;
8209 return false;
8212 /* Returns true if expression DEP_RTX sets an address register
8213 used by instruction INSN to address memory. */
8215 static bool
8216 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
8218 rtx target, pat;
8220 if (NONJUMP_INSN_P (dep_rtx))
8221 dep_rtx = PATTERN (dep_rtx);
8223 if (GET_CODE (dep_rtx) == SET)
8225 target = SET_DEST (dep_rtx);
8226 if (GET_CODE (target) == STRICT_LOW_PART)
8227 target = XEXP (target, 0);
8228 while (GET_CODE (target) == SUBREG)
8229 target = SUBREG_REG (target);
8231 if (GET_CODE (target) == REG)
8233 int regno = REGNO (target);
8235 if (s390_safe_attr_type (insn) == TYPE_LA)
8237 pat = PATTERN (insn);
8238 if (GET_CODE (pat) == PARALLEL)
8240 gcc_assert (XVECLEN (pat, 0) == 2);
8241 pat = XVECEXP (pat, 0, 0);
8243 gcc_assert (GET_CODE (pat) == SET);
8244 return refers_to_regno_p (regno, SET_SRC (pat));
8246 else if (get_attr_atype (insn) == ATYPE_AGEN)
8247 return reg_used_in_mem_p (regno, PATTERN (insn));
8250 return false;
8253 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
8256 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
8258 rtx dep_rtx = PATTERN (dep_insn);
8259 int i;
8261 if (GET_CODE (dep_rtx) == SET
8262 && addr_generation_dependency_p (dep_rtx, insn))
8263 return 1;
8264 else if (GET_CODE (dep_rtx) == PARALLEL)
8266 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
8268 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
8269 return 1;
8272 return 0;
8276 /* A C statement (sans semicolon) to update the integer scheduling priority
8277 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
8278 reduce the priority to execute INSN later. Do not define this macro if
8279 you do not need to adjust the scheduling priorities of insns.
8281 A STD instruction should be scheduled earlier,
8282 in order to use the bypass. */
8283 static int
8284 s390_adjust_priority (rtx_insn *insn, int priority)
8286 if (! INSN_P (insn))
8287 return priority;
8289 if (s390_tune <= PROCESSOR_2064_Z900)
8290 return priority;
8292 switch (s390_safe_attr_type (insn))
8294 case TYPE_FSTOREDF:
8295 case TYPE_FSTORESF:
8296 priority = priority << 3;
8297 break;
8298 case TYPE_STORE:
8299 case TYPE_STM:
8300 priority = priority << 1;
8301 break;
8302 default:
8303 break;
8305 return priority;
8309 /* The number of instructions that can be issued per cycle. */
8311 static int
8312 s390_issue_rate (void)
8314 switch (s390_tune)
8316 case PROCESSOR_2084_Z990:
8317 case PROCESSOR_2094_Z9_109:
8318 case PROCESSOR_2094_Z9_EC:
8319 case PROCESSOR_2817_Z196:
8320 return 3;
8321 case PROCESSOR_2097_Z10:
8322 return 2;
8323 case PROCESSOR_2064_Z900:
8324 /* Starting with EC12 we use the sched_reorder hook to take care
8325 of instruction dispatch constraints. The algorithm only
8326 picks the best instruction and assumes only a single
8327 instruction gets issued per cycle. */
8328 case PROCESSOR_2827_ZEC12:
8329 case PROCESSOR_2964_Z13:
8330 case PROCESSOR_3906_Z14:
8331 default:
8332 return 1;
8336 static int
8337 s390_first_cycle_multipass_dfa_lookahead (void)
8339 return 4;
8342 static void
8343 annotate_constant_pool_refs_1 (rtx *x)
8345 int i, j;
8346 const char *fmt;
8348 gcc_assert (GET_CODE (*x) != SYMBOL_REF
8349 || !CONSTANT_POOL_ADDRESS_P (*x));
8351 /* Literal pool references can only occur inside a MEM ... */
8352 if (GET_CODE (*x) == MEM)
8354 rtx memref = XEXP (*x, 0);
8356 if (GET_CODE (memref) == SYMBOL_REF
8357 && CONSTANT_POOL_ADDRESS_P (memref))
8359 rtx base = cfun->machine->base_reg;
8360 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
8361 UNSPEC_LTREF);
8363 *x = replace_equiv_address (*x, addr);
8364 return;
8367 if (GET_CODE (memref) == CONST
8368 && GET_CODE (XEXP (memref, 0)) == PLUS
8369 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
8370 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
8371 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
8373 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
8374 rtx sym = XEXP (XEXP (memref, 0), 0);
8375 rtx base = cfun->machine->base_reg;
8376 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8377 UNSPEC_LTREF);
8379 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
8380 return;
8384 /* ... or a load-address type pattern. */
8385 if (GET_CODE (*x) == SET)
8387 rtx addrref = SET_SRC (*x);
8389 if (GET_CODE (addrref) == SYMBOL_REF
8390 && CONSTANT_POOL_ADDRESS_P (addrref))
8392 rtx base = cfun->machine->base_reg;
8393 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
8394 UNSPEC_LTREF);
8396 SET_SRC (*x) = addr;
8397 return;
8400 if (GET_CODE (addrref) == CONST
8401 && GET_CODE (XEXP (addrref, 0)) == PLUS
8402 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
8403 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
8404 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
8406 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
8407 rtx sym = XEXP (XEXP (addrref, 0), 0);
8408 rtx base = cfun->machine->base_reg;
8409 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8410 UNSPEC_LTREF);
8412 SET_SRC (*x) = plus_constant (Pmode, addr, off);
8413 return;
8417 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8418 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8420 if (fmt[i] == 'e')
8422 annotate_constant_pool_refs_1 (&XEXP (*x, i));
8424 else if (fmt[i] == 'E')
8426 for (j = 0; j < XVECLEN (*x, i); j++)
8427 annotate_constant_pool_refs_1 (&XVECEXP (*x, i, j));
8432 /* Annotate every literal pool reference in INSN by an UNSPEC_LTREF expression.
8433 Fix up MEMs as required.
8434 Skip insns which support relative addressing, because they do not use a base
8435 register. */
8437 static void
8438 annotate_constant_pool_refs (rtx_insn *insn)
8440 if (s390_safe_relative_long_p (insn))
8441 return;
8442 annotate_constant_pool_refs_1 (&PATTERN (insn));
8445 static void
8446 find_constant_pool_ref_1 (rtx x, rtx *ref)
8448 int i, j;
8449 const char *fmt;
8451 /* Likewise POOL_ENTRY insns. */
8452 if (GET_CODE (x) == UNSPEC_VOLATILE
8453 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
8454 return;
8456 gcc_assert (GET_CODE (x) != SYMBOL_REF
8457 || !CONSTANT_POOL_ADDRESS_P (x));
8459 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
8461 rtx sym = XVECEXP (x, 0, 0);
8462 gcc_assert (GET_CODE (sym) == SYMBOL_REF
8463 && CONSTANT_POOL_ADDRESS_P (sym));
8465 if (*ref == NULL_RTX)
8466 *ref = sym;
8467 else
8468 gcc_assert (*ref == sym);
8470 return;
8473 fmt = GET_RTX_FORMAT (GET_CODE (x));
8474 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8476 if (fmt[i] == 'e')
8478 find_constant_pool_ref_1 (XEXP (x, i), ref);
8480 else if (fmt[i] == 'E')
8482 for (j = 0; j < XVECLEN (x, i); j++)
8483 find_constant_pool_ref_1 (XVECEXP (x, i, j), ref);
8488 /* Find an annotated literal pool symbol referenced in INSN,
8489 and store it at REF. Will abort if INSN contains references to
8490 more than one such pool symbol; multiple references to the same
8491 symbol are allowed, however.
8493 The rtx pointed to by REF must be initialized to NULL_RTX
8494 by the caller before calling this routine.
8496 Skip insns which support relative addressing, because they do not use a base
8497 register. */
8499 static void
8500 find_constant_pool_ref (rtx_insn *insn, rtx *ref)
8502 if (s390_safe_relative_long_p (insn))
8503 return;
8504 find_constant_pool_ref_1 (PATTERN (insn), ref);
8507 static void
8508 replace_constant_pool_ref_1 (rtx *x, rtx ref, rtx offset)
8510 int i, j;
8511 const char *fmt;
8513 gcc_assert (*x != ref);
8515 if (GET_CODE (*x) == UNSPEC
8516 && XINT (*x, 1) == UNSPEC_LTREF
8517 && XVECEXP (*x, 0, 0) == ref)
8519 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8520 return;
8523 if (GET_CODE (*x) == PLUS
8524 && GET_CODE (XEXP (*x, 1)) == CONST_INT
8525 && GET_CODE (XEXP (*x, 0)) == UNSPEC
8526 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8527 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8529 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8530 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8531 return;
8534 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8535 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8537 if (fmt[i] == 'e')
8539 replace_constant_pool_ref_1 (&XEXP (*x, i), ref, offset);
8541 else if (fmt[i] == 'E')
8543 for (j = 0; j < XVECLEN (*x, i); j++)
8544 replace_constant_pool_ref_1 (&XVECEXP (*x, i, j), ref, offset);
8549 /* Replace every reference to the annotated literal pool
8550 symbol REF in INSN by its base plus OFFSET.
8551 Skip insns which support relative addressing, because they do not use a base
8552 register. */
8554 static void
8555 replace_constant_pool_ref (rtx_insn *insn, rtx ref, rtx offset)
8557 if (s390_safe_relative_long_p (insn))
8558 return;
8559 replace_constant_pool_ref_1 (&PATTERN (insn), ref, offset);
8562 /* We keep a list of constants which we have to add to internal
8563 constant tables in the middle of large functions. */
8565 #define NR_C_MODES 32
8566 machine_mode constant_modes[NR_C_MODES] =
8568 TFmode, TImode, TDmode,
8569 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8570 V4SFmode, V2DFmode, V1TFmode,
8571 DFmode, DImode, DDmode,
8572 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8573 SFmode, SImode, SDmode,
8574 V4QImode, V2HImode, V1SImode, V1SFmode,
8575 HImode,
8576 V2QImode, V1HImode,
8577 QImode,
8578 V1QImode
8581 struct constant
8583 struct constant *next;
8584 rtx value;
8585 rtx_code_label *label;
8588 struct constant_pool
8590 struct constant_pool *next;
8591 rtx_insn *first_insn;
8592 rtx_insn *pool_insn;
8593 bitmap insns;
8594 rtx_insn *emit_pool_after;
8596 struct constant *constants[NR_C_MODES];
8597 struct constant *execute;
8598 rtx_code_label *label;
8599 int size;
8602 /* Allocate new constant_pool structure. */
8604 static struct constant_pool *
8605 s390_alloc_pool (void)
8607 struct constant_pool *pool;
8608 int i;
8610 pool = (struct constant_pool *) xmalloc (sizeof *pool);
8611 pool->next = NULL;
8612 for (i = 0; i < NR_C_MODES; i++)
8613 pool->constants[i] = NULL;
8615 pool->execute = NULL;
8616 pool->label = gen_label_rtx ();
8617 pool->first_insn = NULL;
8618 pool->pool_insn = NULL;
8619 pool->insns = BITMAP_ALLOC (NULL);
8620 pool->size = 0;
8621 pool->emit_pool_after = NULL;
8623 return pool;
8626 /* Create new constant pool covering instructions starting at INSN
8627 and chain it to the end of POOL_LIST. */
8629 static struct constant_pool *
8630 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8632 struct constant_pool *pool, **prev;
8634 pool = s390_alloc_pool ();
8635 pool->first_insn = insn;
8637 for (prev = pool_list; *prev; prev = &(*prev)->next)
8639 *prev = pool;
8641 return pool;
8644 /* End range of instructions covered by POOL at INSN and emit
8645 placeholder insn representing the pool. */
8647 static void
8648 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8650 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8652 if (!insn)
8653 insn = get_last_insn ();
8655 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8656 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8659 /* Add INSN to the list of insns covered by POOL. */
8661 static void
8662 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8664 bitmap_set_bit (pool->insns, INSN_UID (insn));
8667 /* Return pool out of POOL_LIST that covers INSN. */
8669 static struct constant_pool *
8670 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8672 struct constant_pool *pool;
8674 for (pool = pool_list; pool; pool = pool->next)
8675 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8676 break;
8678 return pool;
8681 /* Add constant VAL of mode MODE to the constant pool POOL. */
8683 static void
8684 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8686 struct constant *c;
8687 int i;
8689 for (i = 0; i < NR_C_MODES; i++)
8690 if (constant_modes[i] == mode)
8691 break;
8692 gcc_assert (i != NR_C_MODES);
8694 for (c = pool->constants[i]; c != NULL; c = c->next)
8695 if (rtx_equal_p (val, c->value))
8696 break;
8698 if (c == NULL)
8700 c = (struct constant *) xmalloc (sizeof *c);
8701 c->value = val;
8702 c->label = gen_label_rtx ();
8703 c->next = pool->constants[i];
8704 pool->constants[i] = c;
8705 pool->size += GET_MODE_SIZE (mode);
8709 /* Return an rtx that represents the offset of X from the start of
8710 pool POOL. */
8712 static rtx
8713 s390_pool_offset (struct constant_pool *pool, rtx x)
8715 rtx label;
8717 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8718 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8719 UNSPEC_POOL_OFFSET);
8720 return gen_rtx_CONST (GET_MODE (x), x);
8723 /* Find constant VAL of mode MODE in the constant pool POOL.
8724 Return an RTX describing the distance from the start of
8725 the pool to the location of the new constant. */
8727 static rtx
8728 s390_find_constant (struct constant_pool *pool, rtx val,
8729 machine_mode mode)
8731 struct constant *c;
8732 int i;
8734 for (i = 0; i < NR_C_MODES; i++)
8735 if (constant_modes[i] == mode)
8736 break;
8737 gcc_assert (i != NR_C_MODES);
8739 for (c = pool->constants[i]; c != NULL; c = c->next)
8740 if (rtx_equal_p (val, c->value))
8741 break;
8743 gcc_assert (c);
8745 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8748 /* Check whether INSN is an execute. Return the label_ref to its
8749 execute target template if so, NULL_RTX otherwise. */
8751 static rtx
8752 s390_execute_label (rtx insn)
8754 if (INSN_P (insn)
8755 && GET_CODE (PATTERN (insn)) == PARALLEL
8756 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8757 && (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE
8758 || XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE_JUMP))
8760 if (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8761 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8762 else
8764 gcc_assert (JUMP_P (insn));
8765 /* For jump insns as execute target:
8766 - There is one operand less in the parallel (the
8767 modification register of the execute is always 0).
8768 - The execute target label is wrapped into an
8769 if_then_else in order to hide it from jump analysis. */
8770 return XEXP (XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 0), 0);
8774 return NULL_RTX;
8777 /* Find execute target for INSN in the constant pool POOL.
8778 Return an RTX describing the distance from the start of
8779 the pool to the location of the execute target. */
8781 static rtx
8782 s390_find_execute (struct constant_pool *pool, rtx insn)
8784 struct constant *c;
8786 for (c = pool->execute; c != NULL; c = c->next)
8787 if (INSN_UID (insn) == INSN_UID (c->value))
8788 break;
8790 gcc_assert (c);
8792 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8795 /* For an execute INSN, extract the execute target template. */
8797 static rtx
8798 s390_execute_target (rtx insn)
8800 rtx pattern = PATTERN (insn);
8801 gcc_assert (s390_execute_label (insn));
8803 if (XVECLEN (pattern, 0) == 2)
8805 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8807 else
8809 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8810 int i;
8812 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8813 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8815 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8818 return pattern;
8821 /* Indicate that INSN cannot be duplicated. This is the case for
8822 execute insns that carry a unique label. */
8824 static bool
8825 s390_cannot_copy_insn_p (rtx_insn *insn)
8827 rtx label = s390_execute_label (insn);
8828 return label && label != const0_rtx;
8831 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
8832 do not emit the pool base label. */
8834 static void
8835 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8837 struct constant *c;
8838 rtx_insn *insn = pool->pool_insn;
8839 int i;
8841 /* Switch to rodata section. */
8842 insn = emit_insn_after (gen_pool_section_start (), insn);
8843 INSN_ADDRESSES_NEW (insn, -1);
8845 /* Ensure minimum pool alignment. */
8846 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8847 INSN_ADDRESSES_NEW (insn, -1);
8849 /* Emit pool base label. */
8850 if (!remote_label)
8852 insn = emit_label_after (pool->label, insn);
8853 INSN_ADDRESSES_NEW (insn, -1);
8856 /* Dump constants in descending alignment requirement order,
8857 ensuring proper alignment for every constant. */
8858 for (i = 0; i < NR_C_MODES; i++)
8859 for (c = pool->constants[i]; c; c = c->next)
8861 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
8862 rtx value = copy_rtx (c->value);
8863 if (GET_CODE (value) == CONST
8864 && GET_CODE (XEXP (value, 0)) == UNSPEC
8865 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8866 && XVECLEN (XEXP (value, 0), 0) == 1)
8867 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8869 insn = emit_label_after (c->label, insn);
8870 INSN_ADDRESSES_NEW (insn, -1);
8872 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8873 gen_rtvec (1, value),
8874 UNSPECV_POOL_ENTRY);
8875 insn = emit_insn_after (value, insn);
8876 INSN_ADDRESSES_NEW (insn, -1);
8879 /* Ensure minimum alignment for instructions. */
8880 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8881 INSN_ADDRESSES_NEW (insn, -1);
8883 /* Output in-pool execute template insns. */
8884 for (c = pool->execute; c; c = c->next)
8886 insn = emit_label_after (c->label, insn);
8887 INSN_ADDRESSES_NEW (insn, -1);
8889 insn = emit_insn_after (s390_execute_target (c->value), insn);
8890 INSN_ADDRESSES_NEW (insn, -1);
8893 /* Switch back to previous section. */
8894 insn = emit_insn_after (gen_pool_section_end (), insn);
8895 INSN_ADDRESSES_NEW (insn, -1);
8897 insn = emit_barrier_after (insn);
8898 INSN_ADDRESSES_NEW (insn, -1);
8900 /* Remove placeholder insn. */
8901 remove_insn (pool->pool_insn);
8904 /* Free all memory used by POOL. */
8906 static void
8907 s390_free_pool (struct constant_pool *pool)
8909 struct constant *c, *next;
8910 int i;
8912 for (i = 0; i < NR_C_MODES; i++)
8913 for (c = pool->constants[i]; c; c = next)
8915 next = c->next;
8916 free (c);
8919 for (c = pool->execute; c; c = next)
8921 next = c->next;
8922 free (c);
8925 BITMAP_FREE (pool->insns);
8926 free (pool);
8930 /* Collect main literal pool. Return NULL on overflow. */
8932 static struct constant_pool *
8933 s390_mainpool_start (void)
8935 struct constant_pool *pool;
8936 rtx_insn *insn;
8938 pool = s390_alloc_pool ();
8940 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8942 if (NONJUMP_INSN_P (insn)
8943 && GET_CODE (PATTERN (insn)) == SET
8944 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8945 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8947 /* There might be two main_pool instructions if base_reg
8948 is call-clobbered; one for shrink-wrapped code and one
8949 for the rest. We want to keep the first. */
8950 if (pool->pool_insn)
8952 insn = PREV_INSN (insn);
8953 delete_insn (NEXT_INSN (insn));
8954 continue;
8956 pool->pool_insn = insn;
8959 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8961 rtx pool_ref = NULL_RTX;
8962 find_constant_pool_ref (insn, &pool_ref);
8963 if (pool_ref)
8965 rtx constant = get_pool_constant (pool_ref);
8966 machine_mode mode = get_pool_mode (pool_ref);
8967 s390_add_constant (pool, constant, mode);
8971 /* If hot/cold partitioning is enabled we have to make sure that
8972 the literal pool is emitted in the same section where the
8973 initialization of the literal pool base pointer takes place.
8974 emit_pool_after is only used in the non-overflow case on non
8975 Z cpus where we can emit the literal pool at the end of the
8976 function body within the text section. */
8977 if (NOTE_P (insn)
8978 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8979 && !pool->emit_pool_after)
8980 pool->emit_pool_after = PREV_INSN (insn);
8983 gcc_assert (pool->pool_insn || pool->size == 0);
8985 if (pool->size >= 4096)
8987 /* We're going to chunkify the pool, so remove the main
8988 pool placeholder insn. */
8989 remove_insn (pool->pool_insn);
8991 s390_free_pool (pool);
8992 pool = NULL;
8995 /* If the functions ends with the section where the literal pool
8996 should be emitted set the marker to its end. */
8997 if (pool && !pool->emit_pool_after)
8998 pool->emit_pool_after = get_last_insn ();
9000 return pool;
9003 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9004 Modify the current function to output the pool constants as well as
9005 the pool register setup instruction. */
9007 static void
9008 s390_mainpool_finish (struct constant_pool *pool)
9010 rtx base_reg = cfun->machine->base_reg;
9011 rtx set;
9012 rtx_insn *insn;
9014 /* If the pool is empty, we're done. */
9015 if (pool->size == 0)
9017 /* We don't actually need a base register after all. */
9018 cfun->machine->base_reg = NULL_RTX;
9020 if (pool->pool_insn)
9021 remove_insn (pool->pool_insn);
9022 s390_free_pool (pool);
9023 return;
9026 /* We need correct insn addresses. */
9027 shorten_branches (get_insns ());
9029 /* Use a LARL to load the pool register. The pool is
9030 located in the .rodata section, so we emit it after the function. */
9031 set = gen_main_base_64 (base_reg, pool->label);
9032 insn = emit_insn_after (set, pool->pool_insn);
9033 INSN_ADDRESSES_NEW (insn, -1);
9034 remove_insn (pool->pool_insn);
9036 insn = get_last_insn ();
9037 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9038 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9040 s390_dump_pool (pool, 0);
9042 /* Replace all literal pool references. */
9044 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
9046 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9048 rtx addr, pool_ref = NULL_RTX;
9049 find_constant_pool_ref (insn, &pool_ref);
9050 if (pool_ref)
9052 if (s390_execute_label (insn))
9053 addr = s390_find_execute (pool, insn);
9054 else
9055 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
9056 get_pool_mode (pool_ref));
9058 replace_constant_pool_ref (insn, pool_ref, addr);
9059 INSN_CODE (insn) = -1;
9065 /* Free the pool. */
9066 s390_free_pool (pool);
9069 /* Chunkify the literal pool. */
9071 #define S390_POOL_CHUNK_MIN 0xc00
9072 #define S390_POOL_CHUNK_MAX 0xe00
9074 static struct constant_pool *
9075 s390_chunkify_start (void)
9077 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
9078 bitmap far_labels;
9079 rtx_insn *insn;
9081 /* We need correct insn addresses. */
9083 shorten_branches (get_insns ());
9085 /* Scan all insns and move literals to pool chunks. */
9087 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9089 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9091 rtx pool_ref = NULL_RTX;
9092 find_constant_pool_ref (insn, &pool_ref);
9093 if (pool_ref)
9095 rtx constant = get_pool_constant (pool_ref);
9096 machine_mode mode = get_pool_mode (pool_ref);
9098 if (!curr_pool)
9099 curr_pool = s390_start_pool (&pool_list, insn);
9101 s390_add_constant (curr_pool, constant, mode);
9102 s390_add_pool_insn (curr_pool, insn);
9106 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
9108 if (curr_pool)
9109 s390_add_pool_insn (curr_pool, insn);
9112 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
9113 continue;
9115 if (!curr_pool
9116 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
9117 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
9118 continue;
9120 if (curr_pool->size < S390_POOL_CHUNK_MAX)
9121 continue;
9123 s390_end_pool (curr_pool, NULL);
9124 curr_pool = NULL;
9127 if (curr_pool)
9128 s390_end_pool (curr_pool, NULL);
9130 /* Find all labels that are branched into
9131 from an insn belonging to a different chunk. */
9133 far_labels = BITMAP_ALLOC (NULL);
9135 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9137 rtx_jump_table_data *table;
9139 /* Labels marked with LABEL_PRESERVE_P can be target
9140 of non-local jumps, so we have to mark them.
9141 The same holds for named labels.
9143 Don't do that, however, if it is the label before
9144 a jump table. */
9146 if (LABEL_P (insn)
9147 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
9149 rtx_insn *vec_insn = NEXT_INSN (insn);
9150 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
9151 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
9153 /* Check potential targets in a table jump (casesi_jump). */
9154 else if (tablejump_p (insn, NULL, &table))
9156 rtx vec_pat = PATTERN (table);
9157 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
9159 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
9161 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
9163 if (s390_find_pool (pool_list, label)
9164 != s390_find_pool (pool_list, insn))
9165 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9168 /* If we have a direct jump (conditional or unconditional),
9169 check all potential targets. */
9170 else if (JUMP_P (insn))
9172 rtx pat = PATTERN (insn);
9174 if (GET_CODE (pat) == PARALLEL)
9175 pat = XVECEXP (pat, 0, 0);
9177 if (GET_CODE (pat) == SET)
9179 rtx label = JUMP_LABEL (insn);
9180 if (label && !ANY_RETURN_P (label))
9182 if (s390_find_pool (pool_list, label)
9183 != s390_find_pool (pool_list, insn))
9184 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9190 /* Insert base register reload insns before every pool. */
9192 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9194 rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
9195 curr_pool->label);
9196 rtx_insn *insn = curr_pool->first_insn;
9197 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
9200 /* Insert base register reload insns at every far label. */
9202 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9203 if (LABEL_P (insn)
9204 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
9206 struct constant_pool *pool = s390_find_pool (pool_list, insn);
9207 if (pool)
9209 rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
9210 pool->label);
9211 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9216 BITMAP_FREE (far_labels);
9219 /* Recompute insn addresses. */
9221 init_insn_lengths ();
9222 shorten_branches (get_insns ());
9224 return pool_list;
9227 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9228 After we have decided to use this list, finish implementing
9229 all changes to the current function as required. */
9231 static void
9232 s390_chunkify_finish (struct constant_pool *pool_list)
9234 struct constant_pool *curr_pool = NULL;
9235 rtx_insn *insn;
9238 /* Replace all literal pool references. */
9240 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9242 curr_pool = s390_find_pool (pool_list, insn);
9243 if (!curr_pool)
9244 continue;
9246 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9248 rtx addr, pool_ref = NULL_RTX;
9249 find_constant_pool_ref (insn, &pool_ref);
9250 if (pool_ref)
9252 if (s390_execute_label (insn))
9253 addr = s390_find_execute (curr_pool, insn);
9254 else
9255 addr = s390_find_constant (curr_pool,
9256 get_pool_constant (pool_ref),
9257 get_pool_mode (pool_ref));
9259 replace_constant_pool_ref (insn, pool_ref, addr);
9260 INSN_CODE (insn) = -1;
9265 /* Dump out all literal pools. */
9267 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9268 s390_dump_pool (curr_pool, 0);
9270 /* Free pool list. */
9272 while (pool_list)
9274 struct constant_pool *next = pool_list->next;
9275 s390_free_pool (pool_list);
9276 pool_list = next;
9280 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9282 void
9283 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9285 switch (GET_MODE_CLASS (mode))
9287 case MODE_FLOAT:
9288 case MODE_DECIMAL_FLOAT:
9289 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9291 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp),
9292 as_a <scalar_float_mode> (mode), align);
9293 break;
9295 case MODE_INT:
9296 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9297 mark_symbol_refs_as_used (exp);
9298 break;
9300 case MODE_VECTOR_INT:
9301 case MODE_VECTOR_FLOAT:
9303 int i;
9304 machine_mode inner_mode;
9305 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9307 inner_mode = GET_MODE_INNER (GET_MODE (exp));
9308 for (i = 0; i < XVECLEN (exp, 0); i++)
9309 s390_output_pool_entry (XVECEXP (exp, 0, i),
9310 inner_mode,
9311 i == 0
9312 ? align
9313 : GET_MODE_BITSIZE (inner_mode));
9315 break;
9317 default:
9318 gcc_unreachable ();
9323 /* Return an RTL expression representing the value of the return address
9324 for the frame COUNT steps up from the current frame. FRAME is the
9325 frame pointer of that frame. */
9328 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9330 int offset;
9331 rtx addr;
9333 /* Without backchain, we fail for all but the current frame. */
9335 if (!TARGET_BACKCHAIN && count > 0)
9336 return NULL_RTX;
9338 /* For the current frame, we need to make sure the initial
9339 value of RETURN_REGNUM is actually saved. */
9341 if (count == 0)
9342 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9344 if (TARGET_PACKED_STACK)
9345 offset = -2 * UNITS_PER_LONG;
9346 else
9347 offset = RETURN_REGNUM * UNITS_PER_LONG;
9349 addr = plus_constant (Pmode, frame, offset);
9350 addr = memory_address (Pmode, addr);
9351 return gen_rtx_MEM (Pmode, addr);
9354 /* Return an RTL expression representing the back chain stored in
9355 the current stack frame. */
9358 s390_back_chain_rtx (void)
9360 rtx chain;
9362 gcc_assert (TARGET_BACKCHAIN);
9364 if (TARGET_PACKED_STACK)
9365 chain = plus_constant (Pmode, stack_pointer_rtx,
9366 STACK_POINTER_OFFSET - UNITS_PER_LONG);
9367 else
9368 chain = stack_pointer_rtx;
9370 chain = gen_rtx_MEM (Pmode, chain);
9371 return chain;
9374 /* Find first call clobbered register unused in a function.
9375 This could be used as base register in a leaf function
9376 or for holding the return address before epilogue. */
9378 static int
9379 find_unused_clobbered_reg (void)
9381 int i;
9382 for (i = 0; i < 6; i++)
9383 if (!df_regs_ever_live_p (i))
9384 return i;
9385 return 0;
9389 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9390 clobbered hard regs in SETREG. */
9392 static void
9393 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9395 char *regs_ever_clobbered = (char *)data;
9396 unsigned int i, regno;
9397 machine_mode mode = GET_MODE (setreg);
9399 if (GET_CODE (setreg) == SUBREG)
9401 rtx inner = SUBREG_REG (setreg);
9402 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9403 return;
9404 regno = subreg_regno (setreg);
9406 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9407 regno = REGNO (setreg);
9408 else
9409 return;
9411 for (i = regno;
9412 i < end_hard_regno (mode, regno);
9413 i++)
9414 regs_ever_clobbered[i] = 1;
9417 /* Walks through all basic blocks of the current function looking
9418 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9419 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9420 each of those regs. */
9422 static void
9423 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9425 basic_block cur_bb;
9426 rtx_insn *cur_insn;
9427 unsigned int i;
9429 memset (regs_ever_clobbered, 0, 32);
9431 /* For non-leaf functions we have to consider all call clobbered regs to be
9432 clobbered. */
9433 if (!crtl->is_leaf)
9435 for (i = 0; i < 32; i++)
9436 regs_ever_clobbered[i] = call_used_regs[i];
9439 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9440 this work is done by liveness analysis (mark_regs_live_at_end).
9441 Special care is needed for functions containing landing pads. Landing pads
9442 may use the eh registers, but the code which sets these registers is not
9443 contained in that function. Hence s390_regs_ever_clobbered is not able to
9444 deal with this automatically. */
9445 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9446 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9447 if (crtl->calls_eh_return
9448 || (cfun->machine->has_landing_pad_p
9449 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9450 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9452 /* For nonlocal gotos all call-saved registers have to be saved.
9453 This flag is also set for the unwinding code in libgcc.
9454 See expand_builtin_unwind_init. For regs_ever_live this is done by
9455 reload. */
9456 if (crtl->saves_all_registers)
9457 for (i = 0; i < 32; i++)
9458 if (!call_used_regs[i])
9459 regs_ever_clobbered[i] = 1;
9461 FOR_EACH_BB_FN (cur_bb, cfun)
9463 FOR_BB_INSNS (cur_bb, cur_insn)
9465 rtx pat;
9467 if (!INSN_P (cur_insn))
9468 continue;
9470 pat = PATTERN (cur_insn);
9472 /* Ignore GPR restore insns. */
9473 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9475 if (GET_CODE (pat) == SET
9476 && GENERAL_REG_P (SET_DEST (pat)))
9478 /* lgdr */
9479 if (GET_MODE (SET_SRC (pat)) == DImode
9480 && FP_REG_P (SET_SRC (pat)))
9481 continue;
9483 /* l / lg */
9484 if (GET_CODE (SET_SRC (pat)) == MEM)
9485 continue;
9488 /* lm / lmg */
9489 if (GET_CODE (pat) == PARALLEL
9490 && load_multiple_operation (pat, VOIDmode))
9491 continue;
9494 note_stores (cur_insn,
9495 s390_reg_clobbered_rtx,
9496 regs_ever_clobbered);
9501 /* Determine the frame area which actually has to be accessed
9502 in the function epilogue. The values are stored at the
9503 given pointers AREA_BOTTOM (address of the lowest used stack
9504 address) and AREA_TOP (address of the first item which does
9505 not belong to the stack frame). */
9507 static void
9508 s390_frame_area (int *area_bottom, int *area_top)
9510 int b, t;
9512 b = INT_MAX;
9513 t = INT_MIN;
9515 if (cfun_frame_layout.first_restore_gpr != -1)
9517 b = (cfun_frame_layout.gprs_offset
9518 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9519 t = b + (cfun_frame_layout.last_restore_gpr
9520 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9523 if (TARGET_64BIT && cfun_save_high_fprs_p)
9525 b = MIN (b, cfun_frame_layout.f8_offset);
9526 t = MAX (t, (cfun_frame_layout.f8_offset
9527 + cfun_frame_layout.high_fprs * 8));
9530 if (!TARGET_64BIT)
9532 if (cfun_fpr_save_p (FPR4_REGNUM))
9534 b = MIN (b, cfun_frame_layout.f4_offset);
9535 t = MAX (t, cfun_frame_layout.f4_offset + 8);
9537 if (cfun_fpr_save_p (FPR6_REGNUM))
9539 b = MIN (b, cfun_frame_layout.f4_offset + 8);
9540 t = MAX (t, cfun_frame_layout.f4_offset + 16);
9543 *area_bottom = b;
9544 *area_top = t;
9546 /* Update gpr_save_slots in the frame layout trying to make use of
9547 FPRs as GPR save slots.
9548 This is a helper routine of s390_register_info. */
9550 static void
9551 s390_register_info_gprtofpr ()
9553 int save_reg_slot = FPR0_REGNUM;
9554 int i, j;
9556 if (TARGET_TPF || !TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9557 return;
9559 /* builtin_eh_return needs to be able to modify the return address
9560 on the stack. It could also adjust the FPR save slot instead but
9561 is it worth the trouble?! */
9562 if (crtl->calls_eh_return)
9563 return;
9565 for (i = 15; i >= 6; i--)
9567 if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9568 continue;
9570 /* Advance to the next FP register which can be used as a
9571 GPR save slot. */
9572 while ((!call_used_regs[save_reg_slot]
9573 || df_regs_ever_live_p (save_reg_slot)
9574 || cfun_fpr_save_p (save_reg_slot))
9575 && FP_REGNO_P (save_reg_slot))
9576 save_reg_slot++;
9577 if (!FP_REGNO_P (save_reg_slot))
9579 /* We only want to use ldgr/lgdr if we can get rid of
9580 stm/lm entirely. So undo the gpr slot allocation in
9581 case we ran out of FPR save slots. */
9582 for (j = 6; j <= 15; j++)
9583 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9584 cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9585 break;
9587 cfun_gpr_save_slot (i) = save_reg_slot++;
9591 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9592 stdarg.
9593 This is a helper routine for s390_register_info. */
9595 static void
9596 s390_register_info_stdarg_fpr ()
9598 int i;
9599 int min_fpr;
9600 int max_fpr;
9602 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9603 f0-f4 for 64 bit. */
9604 if (!cfun->stdarg
9605 || !TARGET_HARD_FLOAT
9606 || !cfun->va_list_fpr_size
9607 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9608 return;
9610 min_fpr = crtl->args.info.fprs;
9611 max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9612 if (max_fpr >= FP_ARG_NUM_REG)
9613 max_fpr = FP_ARG_NUM_REG - 1;
9615 /* FPR argument regs start at f0. */
9616 min_fpr += FPR0_REGNUM;
9617 max_fpr += FPR0_REGNUM;
9619 for (i = min_fpr; i <= max_fpr; i++)
9620 cfun_set_fpr_save (i);
9623 /* Reserve the GPR save slots for GPRs which need to be saved due to
9624 stdarg.
9625 This is a helper routine for s390_register_info. */
9627 static void
9628 s390_register_info_stdarg_gpr ()
9630 int i;
9631 int min_gpr;
9632 int max_gpr;
9634 if (!cfun->stdarg
9635 || !cfun->va_list_gpr_size
9636 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9637 return;
9639 min_gpr = crtl->args.info.gprs;
9640 max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9641 if (max_gpr >= GP_ARG_NUM_REG)
9642 max_gpr = GP_ARG_NUM_REG - 1;
9644 /* GPR argument regs start at r2. */
9645 min_gpr += GPR2_REGNUM;
9646 max_gpr += GPR2_REGNUM;
9648 /* If r6 was supposed to be saved into an FPR and now needs to go to
9649 the stack for vararg we have to adjust the restore range to make
9650 sure that the restore is done from stack as well. */
9651 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9652 && min_gpr <= GPR6_REGNUM
9653 && max_gpr >= GPR6_REGNUM)
9655 if (cfun_frame_layout.first_restore_gpr == -1
9656 || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9657 cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9658 if (cfun_frame_layout.last_restore_gpr == -1
9659 || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9660 cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9663 if (cfun_frame_layout.first_save_gpr == -1
9664 || cfun_frame_layout.first_save_gpr > min_gpr)
9665 cfun_frame_layout.first_save_gpr = min_gpr;
9667 if (cfun_frame_layout.last_save_gpr == -1
9668 || cfun_frame_layout.last_save_gpr < max_gpr)
9669 cfun_frame_layout.last_save_gpr = max_gpr;
9671 for (i = min_gpr; i <= max_gpr; i++)
9672 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9675 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9676 prologue and epilogue. */
9678 static void
9679 s390_register_info_set_ranges ()
9681 int i, j;
9683 /* Find the first and the last save slot supposed to use the stack
9684 to set the restore range.
9685 Vararg regs might be marked as save to stack but only the
9686 call-saved regs really need restoring (i.e. r6). This code
9687 assumes that the vararg regs have not yet been recorded in
9688 cfun_gpr_save_slot. */
9689 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
9690 for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
9691 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9692 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9693 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9694 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9697 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9698 for registers which need to be saved in function prologue.
9699 This function can be used until the insns emitted for save/restore
9700 of the regs are visible in the RTL stream. */
9702 static void
9703 s390_register_info ()
9705 int i;
9706 char clobbered_regs[32];
9708 gcc_assert (!epilogue_completed);
9710 if (reload_completed)
9711 /* After reload we rely on our own routine to determine which
9712 registers need saving. */
9713 s390_regs_ever_clobbered (clobbered_regs);
9714 else
9715 /* During reload we use regs_ever_live as a base since reload
9716 does changes in there which we otherwise would not be aware
9717 of. */
9718 for (i = 0; i < 32; i++)
9719 clobbered_regs[i] = df_regs_ever_live_p (i);
9721 for (i = 0; i < 32; i++)
9722 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9724 /* Mark the call-saved FPRs which need to be saved.
9725 This needs to be done before checking the special GPRs since the
9726 stack pointer usage depends on whether high FPRs have to be saved
9727 or not. */
9728 cfun_frame_layout.fpr_bitmap = 0;
9729 cfun_frame_layout.high_fprs = 0;
9730 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9731 if (clobbered_regs[i] && !call_used_regs[i])
9733 cfun_set_fpr_save (i);
9734 if (i >= FPR8_REGNUM)
9735 cfun_frame_layout.high_fprs++;
9738 /* Register 12 is used for GOT address, but also as temp in prologue
9739 for split-stack stdarg functions (unless r14 is available). */
9740 clobbered_regs[12]
9741 |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
9742 || (flag_split_stack && cfun->stdarg
9743 && (crtl->is_leaf || TARGET_TPF_PROFILING
9744 || has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
9746 clobbered_regs[BASE_REGNUM]
9747 |= (cfun->machine->base_reg
9748 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9750 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9751 |= !!frame_pointer_needed;
9753 /* On pre z900 machines this might take until machine dependent
9754 reorg to decide.
9755 save_return_addr_p will only be set on non-zarch machines so
9756 there is no risk that r14 goes into an FPR instead of a stack
9757 slot. */
9758 clobbered_regs[RETURN_REGNUM]
9759 |= (!crtl->is_leaf
9760 || TARGET_TPF_PROFILING
9761 || cfun_frame_layout.save_return_addr_p
9762 || crtl->calls_eh_return);
9764 clobbered_regs[STACK_POINTER_REGNUM]
9765 |= (!crtl->is_leaf
9766 || TARGET_TPF_PROFILING
9767 || cfun_save_high_fprs_p
9768 || get_frame_size () > 0
9769 || (reload_completed && cfun_frame_layout.frame_size > 0)
9770 || cfun->calls_alloca);
9772 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
9774 for (i = 6; i < 16; i++)
9775 if (clobbered_regs[i])
9776 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9778 s390_register_info_stdarg_fpr ();
9779 s390_register_info_gprtofpr ();
9780 s390_register_info_set_ranges ();
9781 /* stdarg functions might need to save GPRs 2 to 6. This might
9782 override the GPR->FPR save decision made by
9783 s390_register_info_gprtofpr for r6 since vararg regs must go to
9784 the stack. */
9785 s390_register_info_stdarg_gpr ();
9788 /* Return true if REGNO is a global register, but not one
9789 of the special ones that need to be saved/restored in anyway. */
9791 static inline bool
9792 global_not_special_regno_p (int regno)
9794 return (global_regs[regno]
9795 /* These registers are special and need to be
9796 restored in any case. */
9797 && !(regno == STACK_POINTER_REGNUM
9798 || regno == RETURN_REGNUM
9799 || regno == BASE_REGNUM
9800 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
9803 /* This function is called by s390_optimize_prologue in order to get
9804 rid of unnecessary GPR save/restore instructions. The register info
9805 for the GPRs is re-computed and the ranges are re-calculated. */
9807 static void
9808 s390_optimize_register_info ()
9810 char clobbered_regs[32];
9811 int i;
9813 gcc_assert (epilogue_completed);
9815 s390_regs_ever_clobbered (clobbered_regs);
9817 /* Global registers do not need to be saved and restored unless it
9818 is one of our special regs. (r12, r13, r14, or r15). */
9819 for (i = 0; i < 32; i++)
9820 clobbered_regs[i] = clobbered_regs[i] && !global_not_special_regno_p (i);
9822 /* There is still special treatment needed for cases invisible to
9823 s390_regs_ever_clobbered. */
9824 clobbered_regs[RETURN_REGNUM]
9825 |= (TARGET_TPF_PROFILING
9826 /* When expanding builtin_return_addr in ESA mode we do not
9827 know whether r14 will later be needed as scratch reg when
9828 doing branch splitting. So the builtin always accesses the
9829 r14 save slot and we need to stick to the save/restore
9830 decision for r14 even if it turns out that it didn't get
9831 clobbered. */
9832 || cfun_frame_layout.save_return_addr_p
9833 || crtl->calls_eh_return);
9835 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
9837 for (i = 6; i < 16; i++)
9838 if (!clobbered_regs[i])
9839 cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
9841 s390_register_info_set_ranges ();
9842 s390_register_info_stdarg_gpr ();
9845 /* Fill cfun->machine with info about frame of current function. */
9847 static void
9848 s390_frame_info (void)
9850 HOST_WIDE_INT lowest_offset;
9852 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
9853 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
9855 /* The va_arg builtin uses a constant distance of 16 *
9856 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9857 pointer. So even if we are going to save the stack pointer in an
9858 FPR we need the stack space in order to keep the offsets
9859 correct. */
9860 if (cfun->stdarg && cfun_save_arg_fprs_p)
9862 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9864 if (cfun_frame_layout.first_save_gpr_slot == -1)
9865 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
9868 cfun_frame_layout.frame_size = get_frame_size ();
9869 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
9870 fatal_error (input_location,
9871 "total size of local variables exceeds architecture limit");
9873 if (!TARGET_PACKED_STACK)
9875 /* Fixed stack layout. */
9876 cfun_frame_layout.backchain_offset = 0;
9877 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
9878 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
9879 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
9880 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
9881 * UNITS_PER_LONG);
9883 else if (TARGET_BACKCHAIN)
9885 /* Kernel stack layout - packed stack, backchain, no float */
9886 gcc_assert (TARGET_SOFT_FLOAT);
9887 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
9888 - UNITS_PER_LONG);
9890 /* The distance between the backchain and the return address
9891 save slot must not change. So we always need a slot for the
9892 stack pointer which resides in between. */
9893 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9895 cfun_frame_layout.gprs_offset
9896 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
9898 /* FPRs will not be saved. Nevertheless pick sane values to
9899 keep area calculations valid. */
9900 cfun_frame_layout.f0_offset =
9901 cfun_frame_layout.f4_offset =
9902 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
9904 else
9906 int num_fprs;
9908 /* Packed stack layout without backchain. */
9910 /* With stdarg FPRs need their dedicated slots. */
9911 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
9912 : (cfun_fpr_save_p (FPR4_REGNUM) +
9913 cfun_fpr_save_p (FPR6_REGNUM)));
9914 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
9916 num_fprs = (cfun->stdarg ? 2
9917 : (cfun_fpr_save_p (FPR0_REGNUM)
9918 + cfun_fpr_save_p (FPR2_REGNUM)));
9919 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
9921 cfun_frame_layout.gprs_offset
9922 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
9924 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
9925 - cfun_frame_layout.high_fprs * 8);
9928 if (cfun_save_high_fprs_p)
9929 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
9931 if (!crtl->is_leaf)
9932 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
9934 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
9935 sized area at the bottom of the stack. This is required also for
9936 leaf functions. When GCC generates a local stack reference it
9937 will always add STACK_POINTER_OFFSET to all these references. */
9938 if (crtl->is_leaf
9939 && !TARGET_TPF_PROFILING
9940 && cfun_frame_layout.frame_size == 0
9941 && !cfun->calls_alloca)
9942 return;
9944 /* Calculate the number of bytes we have used in our own register
9945 save area. With the packed stack layout we can re-use the
9946 remaining bytes for normal stack elements. */
9948 if (TARGET_PACKED_STACK)
9949 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
9950 cfun_frame_layout.f4_offset),
9951 cfun_frame_layout.gprs_offset);
9952 else
9953 lowest_offset = 0;
9955 if (TARGET_BACKCHAIN)
9956 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
9958 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
9960 /* If under 31 bit an odd number of gprs has to be saved we have to
9961 adjust the frame size to sustain 8 byte alignment of stack
9962 frames. */
9963 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
9964 STACK_BOUNDARY / BITS_PER_UNIT - 1)
9965 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
9968 /* Generate frame layout. Fills in register and frame data for the current
9969 function in cfun->machine. This routine can be called multiple times;
9970 it will re-do the complete frame layout every time. */
9972 static void
9973 s390_init_frame_layout (void)
9975 HOST_WIDE_INT frame_size;
9976 int base_used;
9978 /* After LRA the frame layout is supposed to be read-only and should
9979 not be re-computed. */
9980 if (reload_completed)
9981 return;
9985 frame_size = cfun_frame_layout.frame_size;
9987 /* Try to predict whether we'll need the base register. */
9988 base_used = crtl->uses_const_pool
9989 || (!DISP_IN_RANGE (frame_size)
9990 && !CONST_OK_FOR_K (frame_size));
9992 /* Decide which register to use as literal pool base. In small
9993 leaf functions, try to use an unused call-clobbered register
9994 as base register to avoid save/restore overhead. */
9995 if (!base_used)
9996 cfun->machine->base_reg = NULL_RTX;
9997 else
9999 int br = 0;
10001 if (crtl->is_leaf)
10002 /* Prefer r5 (most likely to be free). */
10003 for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
10005 cfun->machine->base_reg =
10006 gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
10009 s390_register_info ();
10010 s390_frame_info ();
10012 while (frame_size != cfun_frame_layout.frame_size);
10015 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
10016 the TX is nonescaping. A transaction is considered escaping if
10017 there is at least one path from tbegin returning CC0 to the
10018 function exit block without an tend.
10020 The check so far has some limitations:
10021 - only single tbegin/tend BBs are supported
10022 - the first cond jump after tbegin must separate the CC0 path from ~CC0
10023 - when CC is copied to a GPR and the CC0 check is done with the GPR
10024 this is not supported
10027 static void
10028 s390_optimize_nonescaping_tx (void)
10030 const unsigned int CC0 = 1 << 3;
10031 basic_block tbegin_bb = NULL;
10032 basic_block tend_bb = NULL;
10033 basic_block bb;
10034 rtx_insn *insn;
10035 bool result = true;
10036 int bb_index;
10037 rtx_insn *tbegin_insn = NULL;
10039 if (!cfun->machine->tbegin_p)
10040 return;
10042 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
10044 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
10046 if (!bb)
10047 continue;
10049 FOR_BB_INSNS (bb, insn)
10051 rtx ite, cc, pat, target;
10052 unsigned HOST_WIDE_INT mask;
10054 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
10055 continue;
10057 pat = PATTERN (insn);
10059 if (GET_CODE (pat) == PARALLEL)
10060 pat = XVECEXP (pat, 0, 0);
10062 if (GET_CODE (pat) != SET
10063 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
10064 continue;
10066 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
10068 rtx_insn *tmp;
10070 tbegin_insn = insn;
10072 /* Just return if the tbegin doesn't have clobbers. */
10073 if (GET_CODE (PATTERN (insn)) != PARALLEL)
10074 return;
10076 if (tbegin_bb != NULL)
10077 return;
10079 /* Find the next conditional jump. */
10080 for (tmp = NEXT_INSN (insn);
10081 tmp != NULL_RTX;
10082 tmp = NEXT_INSN (tmp))
10084 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
10085 return;
10086 if (!JUMP_P (tmp))
10087 continue;
10089 ite = SET_SRC (PATTERN (tmp));
10090 if (GET_CODE (ite) != IF_THEN_ELSE)
10091 continue;
10093 cc = XEXP (XEXP (ite, 0), 0);
10094 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
10095 || GET_MODE (cc) != CCRAWmode
10096 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
10097 return;
10099 if (bb->succs->length () != 2)
10100 return;
10102 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
10103 if (GET_CODE (XEXP (ite, 0)) == NE)
10104 mask ^= 0xf;
10106 if (mask == CC0)
10107 target = XEXP (ite, 1);
10108 else if (mask == (CC0 ^ 0xf))
10109 target = XEXP (ite, 2);
10110 else
10111 return;
10114 edge_iterator ei;
10115 edge e1, e2;
10117 ei = ei_start (bb->succs);
10118 e1 = ei_safe_edge (ei);
10119 ei_next (&ei);
10120 e2 = ei_safe_edge (ei);
10122 if (e2->flags & EDGE_FALLTHRU)
10124 e2 = e1;
10125 e1 = ei_safe_edge (ei);
10128 if (!(e1->flags & EDGE_FALLTHRU))
10129 return;
10131 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
10133 if (tmp == BB_END (bb))
10134 break;
10138 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
10140 if (tend_bb != NULL)
10141 return;
10142 tend_bb = bb;
10147 /* Either we successfully remove the FPR clobbers here or we are not
10148 able to do anything for this TX. Both cases don't qualify for
10149 another look. */
10150 cfun->machine->tbegin_p = false;
10152 if (tbegin_bb == NULL || tend_bb == NULL)
10153 return;
10155 calculate_dominance_info (CDI_POST_DOMINATORS);
10156 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
10157 free_dominance_info (CDI_POST_DOMINATORS);
10159 if (!result)
10160 return;
10162 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10163 gen_rtvec (2,
10164 XVECEXP (PATTERN (tbegin_insn), 0, 0),
10165 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10166 INSN_CODE (tbegin_insn) = -1;
10167 df_insn_rescan (tbegin_insn);
10169 return;
10172 /* Implement TARGET_HARD_REGNO_NREGS. Because all registers in a class
10173 have the same size, this is equivalent to CLASS_MAX_NREGS. */
10175 static unsigned int
10176 s390_hard_regno_nregs (unsigned int regno, machine_mode mode)
10178 return s390_class_max_nregs (REGNO_REG_CLASS (regno), mode);
10181 /* Implement TARGET_HARD_REGNO_MODE_OK.
10183 Integer modes <= word size fit into any GPR.
10184 Integer modes > word size fit into successive GPRs, starting with
10185 an even-numbered register.
10186 SImode and DImode fit into FPRs as well.
10188 Floating point modes <= word size fit into any FPR or GPR.
10189 Floating point modes > word size (i.e. DFmode on 32-bit) fit
10190 into any FPR, or an even-odd GPR pair.
10191 TFmode fits only into an even-odd FPR pair.
10193 Complex floating point modes fit either into two FPRs, or into
10194 successive GPRs (again starting with an even number).
10195 TCmode fits only into two successive even-odd FPR pairs.
10197 Condition code modes fit only into the CC register. */
10199 static bool
10200 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10202 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10203 return false;
10205 switch (REGNO_REG_CLASS (regno))
10207 case VEC_REGS:
10208 return ((GET_MODE_CLASS (mode) == MODE_INT
10209 && s390_class_max_nregs (VEC_REGS, mode) == 1)
10210 || mode == DFmode
10211 || (TARGET_VXE && mode == SFmode)
10212 || s390_vector_mode_supported_p (mode));
10213 break;
10214 case FP_REGS:
10215 if (TARGET_VX
10216 && ((GET_MODE_CLASS (mode) == MODE_INT
10217 && s390_class_max_nregs (FP_REGS, mode) == 1)
10218 || mode == DFmode
10219 || s390_vector_mode_supported_p (mode)))
10220 return true;
10222 if (REGNO_PAIR_OK (regno, mode))
10224 if (mode == SImode || mode == DImode)
10225 return true;
10227 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10228 return true;
10230 break;
10231 case ADDR_REGS:
10232 if (FRAME_REGNO_P (regno) && mode == Pmode)
10233 return true;
10235 /* fallthrough */
10236 case GENERAL_REGS:
10237 if (REGNO_PAIR_OK (regno, mode))
10239 if (TARGET_ZARCH
10240 || (mode != TFmode && mode != TCmode && mode != TDmode))
10241 return true;
10243 break;
10244 case CC_REGS:
10245 if (GET_MODE_CLASS (mode) == MODE_CC)
10246 return true;
10247 break;
10248 case ACCESS_REGS:
10249 if (REGNO_PAIR_OK (regno, mode))
10251 if (mode == SImode || mode == Pmode)
10252 return true;
10254 break;
10255 default:
10256 return false;
10259 return false;
10262 /* Implement TARGET_MODES_TIEABLE_P. */
10264 static bool
10265 s390_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10267 return ((mode1 == SFmode || mode1 == DFmode)
10268 == (mode2 == SFmode || mode2 == DFmode));
10271 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10273 bool
10274 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10276 /* Once we've decided upon a register to use as base register, it must
10277 no longer be used for any other purpose. */
10278 if (cfun->machine->base_reg)
10279 if (REGNO (cfun->machine->base_reg) == old_reg
10280 || REGNO (cfun->machine->base_reg) == new_reg)
10281 return false;
10283 /* Prevent regrename from using call-saved regs which haven't
10284 actually been saved. This is necessary since regrename assumes
10285 the backend save/restore decisions are based on
10286 df_regs_ever_live. Since we have our own routine we have to tell
10287 regrename manually about it. */
10288 if (GENERAL_REGNO_P (new_reg)
10289 && !call_used_regs[new_reg]
10290 && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10291 return false;
10293 return true;
10296 /* Return nonzero if register REGNO can be used as a scratch register
10297 in peephole2. */
10299 static bool
10300 s390_hard_regno_scratch_ok (unsigned int regno)
10302 /* See s390_hard_regno_rename_ok. */
10303 if (GENERAL_REGNO_P (regno)
10304 && !call_used_regs[regno]
10305 && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10306 return false;
10308 return true;
10311 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. When generating
10312 code that runs in z/Architecture mode, but conforms to the 31-bit
10313 ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
10314 bytes are saved across calls, however. */
10316 static bool
10317 s390_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
10318 machine_mode mode)
10320 if (!TARGET_64BIT
10321 && TARGET_ZARCH
10322 && GET_MODE_SIZE (mode) > 4
10323 && ((regno >= 6 && regno <= 15) || regno == 32))
10324 return true;
10326 if (TARGET_VX
10327 && GET_MODE_SIZE (mode) > 8
10328 && (((TARGET_64BIT && regno >= 24 && regno <= 31))
10329 || (!TARGET_64BIT && (regno == 18 || regno == 19))))
10330 return true;
10332 return false;
10335 /* Maximum number of registers to represent a value of mode MODE
10336 in a register of class RCLASS. */
10339 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10341 int reg_size;
10342 bool reg_pair_required_p = false;
10344 switch (rclass)
10346 case FP_REGS:
10347 case VEC_REGS:
10348 reg_size = TARGET_VX ? 16 : 8;
10350 /* TF and TD modes would fit into a VR but we put them into a
10351 register pair since we do not have 128bit FP instructions on
10352 full VRs. */
10353 if (TARGET_VX
10354 && SCALAR_FLOAT_MODE_P (mode)
10355 && GET_MODE_SIZE (mode) >= 16)
10356 reg_pair_required_p = true;
10358 /* Even if complex types would fit into a single FPR/VR we force
10359 them into a register pair to deal with the parts more easily.
10360 (FIXME: What about complex ints?) */
10361 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10362 reg_pair_required_p = true;
10363 break;
10364 case ACCESS_REGS:
10365 reg_size = 4;
10366 break;
10367 default:
10368 reg_size = UNITS_PER_WORD;
10369 break;
10372 if (reg_pair_required_p)
10373 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10375 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10378 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10380 static bool
10381 s390_can_change_mode_class (machine_mode from_mode,
10382 machine_mode to_mode,
10383 reg_class_t rclass)
10385 machine_mode small_mode;
10386 machine_mode big_mode;
10388 /* V1TF and TF have different representations in vector
10389 registers. */
10390 if (reg_classes_intersect_p (VEC_REGS, rclass)
10391 && ((from_mode == V1TFmode && to_mode == TFmode)
10392 || (from_mode == TFmode && to_mode == V1TFmode)))
10393 return false;
10395 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10396 return true;
10398 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10400 small_mode = from_mode;
10401 big_mode = to_mode;
10403 else
10405 small_mode = to_mode;
10406 big_mode = from_mode;
10409 /* Values residing in VRs are little-endian style. All modes are
10410 placed left-aligned in an VR. This means that we cannot allow
10411 switching between modes with differing sizes. Also if the vector
10412 facility is available we still place TFmode values in VR register
10413 pairs, since the only instructions we have operating on TFmodes
10414 only deal with register pairs. Therefore we have to allow DFmode
10415 subregs of TFmodes to enable the TFmode splitters. */
10416 if (reg_classes_intersect_p (VEC_REGS, rclass)
10417 && (GET_MODE_SIZE (small_mode) < 8
10418 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10419 return false;
10421 /* Likewise for access registers, since they have only half the
10422 word size on 64-bit. */
10423 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10424 return false;
10426 return true;
10429 /* Return true if we use LRA instead of reload pass. */
10430 static bool
10431 s390_lra_p (void)
10433 return s390_lra_flag;
10436 /* Return true if register FROM can be eliminated via register TO. */
10438 static bool
10439 s390_can_eliminate (const int from, const int to)
10441 /* We have not marked the base register as fixed.
10442 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10443 If a function requires the base register, we say here that this
10444 elimination cannot be performed. This will cause reload to free
10445 up the base register (as if it were fixed). On the other hand,
10446 if the current function does *not* require the base register, we
10447 say here the elimination succeeds, which in turn allows reload
10448 to allocate the base register for any other purpose. */
10449 if (from == BASE_REGNUM && to == BASE_REGNUM)
10451 s390_init_frame_layout ();
10452 return cfun->machine->base_reg == NULL_RTX;
10455 /* Everything else must point into the stack frame. */
10456 gcc_assert (to == STACK_POINTER_REGNUM
10457 || to == HARD_FRAME_POINTER_REGNUM);
10459 gcc_assert (from == FRAME_POINTER_REGNUM
10460 || from == ARG_POINTER_REGNUM
10461 || from == RETURN_ADDRESS_POINTER_REGNUM);
10463 /* Make sure we actually saved the return address. */
10464 if (from == RETURN_ADDRESS_POINTER_REGNUM)
10465 if (!crtl->calls_eh_return
10466 && !cfun->stdarg
10467 && !cfun_frame_layout.save_return_addr_p)
10468 return false;
10470 return true;
10473 /* Return offset between register FROM and TO initially after prolog. */
10475 HOST_WIDE_INT
10476 s390_initial_elimination_offset (int from, int to)
10478 HOST_WIDE_INT offset;
10480 /* ??? Why are we called for non-eliminable pairs? */
10481 if (!s390_can_eliminate (from, to))
10482 return 0;
10484 switch (from)
10486 case FRAME_POINTER_REGNUM:
10487 offset = (get_frame_size()
10488 + STACK_POINTER_OFFSET
10489 + crtl->outgoing_args_size);
10490 break;
10492 case ARG_POINTER_REGNUM:
10493 s390_init_frame_layout ();
10494 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10495 break;
10497 case RETURN_ADDRESS_POINTER_REGNUM:
10498 s390_init_frame_layout ();
10500 if (cfun_frame_layout.first_save_gpr_slot == -1)
10502 /* If it turns out that for stdarg nothing went into the reg
10503 save area we also do not need the return address
10504 pointer. */
10505 if (cfun->stdarg && !cfun_save_arg_fprs_p)
10506 return 0;
10508 gcc_unreachable ();
10511 /* In order to make the following work it is not necessary for
10512 r14 to have a save slot. It is sufficient if one other GPR
10513 got one. Since the GPRs are always stored without gaps we
10514 are able to calculate where the r14 save slot would
10515 reside. */
10516 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10517 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10518 UNITS_PER_LONG);
10519 break;
10521 case BASE_REGNUM:
10522 offset = 0;
10523 break;
10525 default:
10526 gcc_unreachable ();
10529 return offset;
10532 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10533 to register BASE. Return generated insn. */
10535 static rtx
10536 save_fpr (rtx base, int offset, int regnum)
10538 rtx addr;
10539 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10541 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10542 set_mem_alias_set (addr, get_varargs_alias_set ());
10543 else
10544 set_mem_alias_set (addr, get_frame_alias_set ());
10546 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10549 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10550 to register BASE. Return generated insn. */
10552 static rtx
10553 restore_fpr (rtx base, int offset, int regnum)
10555 rtx addr;
10556 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10557 set_mem_alias_set (addr, get_frame_alias_set ());
10559 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10562 /* Generate insn to save registers FIRST to LAST into
10563 the register save area located at offset OFFSET
10564 relative to register BASE. */
10566 static rtx
10567 save_gprs (rtx base, int offset, int first, int last)
10569 rtx addr, insn, note;
10570 int i;
10572 addr = plus_constant (Pmode, base, offset);
10573 addr = gen_rtx_MEM (Pmode, addr);
10575 set_mem_alias_set (addr, get_frame_alias_set ());
10577 /* Special-case single register. */
10578 if (first == last)
10580 if (TARGET_64BIT)
10581 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10582 else
10583 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10585 if (!global_not_special_regno_p (first))
10586 RTX_FRAME_RELATED_P (insn) = 1;
10587 return insn;
10591 insn = gen_store_multiple (addr,
10592 gen_rtx_REG (Pmode, first),
10593 GEN_INT (last - first + 1));
10595 if (first <= 6 && cfun->stdarg)
10596 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10598 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10600 if (first + i <= 6)
10601 set_mem_alias_set (mem, get_varargs_alias_set ());
10604 /* We need to set the FRAME_RELATED flag on all SETs
10605 inside the store-multiple pattern.
10607 However, we must not emit DWARF records for registers 2..5
10608 if they are stored for use by variable arguments ...
10610 ??? Unfortunately, it is not enough to simply not the
10611 FRAME_RELATED flags for those SETs, because the first SET
10612 of the PARALLEL is always treated as if it had the flag
10613 set, even if it does not. Therefore we emit a new pattern
10614 without those registers as REG_FRAME_RELATED_EXPR note. */
10616 if (first >= 6 && !global_not_special_regno_p (first))
10618 rtx pat = PATTERN (insn);
10620 for (i = 0; i < XVECLEN (pat, 0); i++)
10621 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10622 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10623 0, i)))))
10624 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10626 RTX_FRAME_RELATED_P (insn) = 1;
10628 else if (last >= 6)
10630 int start;
10632 for (start = first >= 6 ? first : 6; start <= last; start++)
10633 if (!global_not_special_regno_p (start))
10634 break;
10636 if (start > last)
10637 return insn;
10639 addr = plus_constant (Pmode, base,
10640 offset + (start - first) * UNITS_PER_LONG);
10642 if (start == last)
10644 if (TARGET_64BIT)
10645 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10646 gen_rtx_REG (Pmode, start));
10647 else
10648 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10649 gen_rtx_REG (Pmode, start));
10650 note = PATTERN (note);
10652 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10653 RTX_FRAME_RELATED_P (insn) = 1;
10655 return insn;
10658 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10659 gen_rtx_REG (Pmode, start),
10660 GEN_INT (last - start + 1));
10661 note = PATTERN (note);
10663 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10665 for (i = 0; i < XVECLEN (note, 0); i++)
10666 if (GET_CODE (XVECEXP (note, 0, i)) == SET
10667 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10668 0, i)))))
10669 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10671 RTX_FRAME_RELATED_P (insn) = 1;
10674 return insn;
10677 /* Generate insn to restore registers FIRST to LAST from
10678 the register save area located at offset OFFSET
10679 relative to register BASE. */
10681 static rtx
10682 restore_gprs (rtx base, int offset, int first, int last)
10684 rtx addr, insn;
10686 addr = plus_constant (Pmode, base, offset);
10687 addr = gen_rtx_MEM (Pmode, addr);
10688 set_mem_alias_set (addr, get_frame_alias_set ());
10690 /* Special-case single register. */
10691 if (first == last)
10693 if (TARGET_64BIT)
10694 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10695 else
10696 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10698 RTX_FRAME_RELATED_P (insn) = 1;
10699 return insn;
10702 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10703 addr,
10704 GEN_INT (last - first + 1));
10705 RTX_FRAME_RELATED_P (insn) = 1;
10706 return insn;
10709 /* Return insn sequence to load the GOT register. */
10711 rtx_insn *
10712 s390_load_got (void)
10714 rtx_insn *insns;
10716 /* We cannot use pic_offset_table_rtx here since we use this
10717 function also for non-pic if __tls_get_offset is called and in
10718 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10719 aren't usable. */
10720 rtx got_rtx = gen_rtx_REG (Pmode, 12);
10722 start_sequence ();
10724 emit_move_insn (got_rtx, s390_got_symbol ());
10726 insns = get_insns ();
10727 end_sequence ();
10728 return insns;
10731 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10732 and the change to the stack pointer. */
10734 static void
10735 s390_emit_stack_tie (void)
10737 rtx mem = gen_frame_mem (BLKmode,
10738 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10740 emit_insn (gen_stack_tie (mem));
10743 /* Copy GPRS into FPR save slots. */
10745 static void
10746 s390_save_gprs_to_fprs (void)
10748 int i;
10750 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10751 return;
10753 for (i = 6; i < 16; i++)
10755 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10757 rtx_insn *insn =
10758 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10759 gen_rtx_REG (DImode, i));
10760 RTX_FRAME_RELATED_P (insn) = 1;
10761 /* This prevents dwarf2cfi from interpreting the set. Doing
10762 so it might emit def_cfa_register infos setting an FPR as
10763 new CFA. */
10764 add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
10769 /* Restore GPRs from FPR save slots. */
10771 static void
10772 s390_restore_gprs_from_fprs (void)
10774 int i;
10776 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10777 return;
10779 /* Restore the GPRs starting with the stack pointer. That way the
10780 stack pointer already has its original value when it comes to
10781 restoring the hard frame pointer. So we can set the cfa reg back
10782 to the stack pointer. */
10783 for (i = STACK_POINTER_REGNUM; i >= 6; i--)
10785 rtx_insn *insn;
10787 if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
10788 continue;
10790 rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
10792 if (i == STACK_POINTER_REGNUM)
10793 insn = emit_insn (gen_stack_restore_from_fpr (fpr));
10794 else
10795 insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
10797 df_set_regs_ever_live (i, true);
10798 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
10800 /* If either the stack pointer or the frame pointer get restored
10801 set the CFA value to its value at function start. Doing this
10802 for the frame pointer results in .cfi_def_cfa_register 15
10803 what is ok since if the stack pointer got modified it has
10804 been restored already. */
10805 if (i == STACK_POINTER_REGNUM || i == HARD_FRAME_POINTER_REGNUM)
10806 add_reg_note (insn, REG_CFA_DEF_CFA,
10807 plus_constant (Pmode, stack_pointer_rtx,
10808 STACK_POINTER_OFFSET));
10809 RTX_FRAME_RELATED_P (insn) = 1;
10814 /* A pass run immediately before shrink-wrapping and prologue and epilogue
10815 generation. */
10817 namespace {
10819 const pass_data pass_data_s390_early_mach =
10821 RTL_PASS, /* type */
10822 "early_mach", /* name */
10823 OPTGROUP_NONE, /* optinfo_flags */
10824 TV_MACH_DEP, /* tv_id */
10825 0, /* properties_required */
10826 0, /* properties_provided */
10827 0, /* properties_destroyed */
10828 0, /* todo_flags_start */
10829 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
10832 class pass_s390_early_mach : public rtl_opt_pass
10834 public:
10835 pass_s390_early_mach (gcc::context *ctxt)
10836 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
10839 /* opt_pass methods: */
10840 virtual unsigned int execute (function *);
10842 }; // class pass_s390_early_mach
10844 unsigned int
10845 pass_s390_early_mach::execute (function *fun)
10847 rtx_insn *insn;
10849 /* Try to get rid of the FPR clobbers. */
10850 s390_optimize_nonescaping_tx ();
10852 /* Re-compute register info. */
10853 s390_register_info ();
10855 /* If we're using a base register, ensure that it is always valid for
10856 the first non-prologue instruction. */
10857 if (fun->machine->base_reg)
10858 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
10860 /* Annotate all constant pool references to let the scheduler know
10861 they implicitly use the base register. */
10862 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10863 if (INSN_P (insn))
10865 annotate_constant_pool_refs (insn);
10866 df_insn_rescan (insn);
10868 return 0;
10871 } // anon namespace
10873 rtl_opt_pass *
10874 make_pass_s390_early_mach (gcc::context *ctxt)
10876 return new pass_s390_early_mach (ctxt);
10879 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
10880 - push too big immediates to the literal pool and annotate the refs
10881 - emit frame related notes for stack pointer changes. */
10883 static rtx
10884 s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool frame_related_p)
10886 rtx_insn *insn;
10887 rtx orig_offset = offset;
10889 gcc_assert (REG_P (target));
10890 gcc_assert (REG_P (reg));
10891 gcc_assert (CONST_INT_P (offset));
10893 if (offset == const0_rtx) /* lr/lgr */
10895 insn = emit_move_insn (target, reg);
10897 else if (DISP_IN_RANGE (INTVAL (offset))) /* la */
10899 insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg,
10900 offset));
10902 else
10904 if (!satisfies_constraint_K (offset) /* ahi/aghi */
10905 && (!TARGET_EXTIMM
10906 || (!satisfies_constraint_Op (offset) /* alfi/algfi */
10907 && !satisfies_constraint_On (offset)))) /* slfi/slgfi */
10908 offset = force_const_mem (Pmode, offset);
10910 if (target != reg)
10912 insn = emit_move_insn (target, reg);
10913 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
10916 insn = emit_insn (gen_add2_insn (target, offset));
10918 if (!CONST_INT_P (offset))
10920 annotate_constant_pool_refs (insn);
10922 if (frame_related_p)
10923 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10924 gen_rtx_SET (target,
10925 gen_rtx_PLUS (Pmode, target,
10926 orig_offset)));
10930 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
10932 /* If this is a stack adjustment and we are generating a stack clash
10933 prologue, then add a REG_STACK_CHECK note to signal that this insn
10934 should be left alone. */
10935 if (flag_stack_clash_protection && target == stack_pointer_rtx)
10936 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
10938 return insn;
10941 /* Emit a compare instruction with a volatile memory access as stack
10942 probe. It does not waste store tags and does not clobber any
10943 registers apart from the condition code. */
10944 static void
10945 s390_emit_stack_probe (rtx addr)
10947 rtx mem = gen_rtx_MEM (Pmode, addr);
10948 MEM_VOLATILE_P (mem) = 1;
10949 emit_insn (gen_probe_stack (mem));
10952 /* Use a runtime loop if we have to emit more probes than this. */
10953 #define MIN_UNROLL_PROBES 3
10955 /* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
10956 if necessary. LAST_PROBE_OFFSET contains the offset of the closest
10957 probe relative to the stack pointer.
10959 Note that SIZE is negative.
10961 The return value is true if TEMP_REG has been clobbered. */
10962 static bool
10963 allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
10964 rtx temp_reg)
10966 bool temp_reg_clobbered_p = false;
10967 HOST_WIDE_INT probe_interval
10968 = 1 << param_stack_clash_protection_probe_interval;
10969 HOST_WIDE_INT guard_size
10970 = 1 << param_stack_clash_protection_guard_size;
10972 if (flag_stack_clash_protection)
10974 if (last_probe_offset + -INTVAL (size) < guard_size)
10975 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
10976 else
10978 rtx offset = GEN_INT (probe_interval - UNITS_PER_LONG);
10979 HOST_WIDE_INT rounded_size = -INTVAL (size) & -probe_interval;
10980 HOST_WIDE_INT num_probes = rounded_size / probe_interval;
10981 HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
10983 if (num_probes < MIN_UNROLL_PROBES)
10985 /* Emit unrolled probe statements. */
10987 for (unsigned int i = 0; i < num_probes; i++)
10989 s390_prologue_plus_offset (stack_pointer_rtx,
10990 stack_pointer_rtx,
10991 GEN_INT (-probe_interval), true);
10992 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
10993 stack_pointer_rtx,
10994 offset));
10996 if (num_probes > 0)
10997 last_probe_offset = INTVAL (offset);
10998 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
11000 else
11002 /* Emit a loop probing the pages. */
11004 rtx_code_label *loop_start_label = gen_label_rtx ();
11006 /* From now on temp_reg will be the CFA register. */
11007 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11008 GEN_INT (-rounded_size), true);
11009 emit_label (loop_start_label);
11011 s390_prologue_plus_offset (stack_pointer_rtx,
11012 stack_pointer_rtx,
11013 GEN_INT (-probe_interval), false);
11014 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11015 stack_pointer_rtx,
11016 offset));
11017 emit_cmp_and_jump_insns (stack_pointer_rtx, temp_reg,
11018 GT, NULL_RTX,
11019 Pmode, 1, loop_start_label);
11021 /* Without this make_edges ICEes. */
11022 JUMP_LABEL (get_last_insn ()) = loop_start_label;
11023 LABEL_NUSES (loop_start_label) = 1;
11025 /* That's going to be a NOP since stack pointer and
11026 temp_reg are supposed to be the same here. We just
11027 emit it to set the CFA reg back to r15. */
11028 s390_prologue_plus_offset (stack_pointer_rtx, temp_reg,
11029 const0_rtx, true);
11030 temp_reg_clobbered_p = true;
11031 last_probe_offset = INTVAL (offset);
11032 dump_stack_clash_frame_info (PROBE_LOOP, residual != 0);
11035 /* Handle any residual allocation request. */
11036 s390_prologue_plus_offset (stack_pointer_rtx,
11037 stack_pointer_rtx,
11038 GEN_INT (-residual), true);
11039 last_probe_offset += residual;
11040 if (last_probe_offset >= probe_interval)
11041 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11042 stack_pointer_rtx,
11043 GEN_INT (residual
11044 - UNITS_PER_LONG)));
11046 return temp_reg_clobbered_p;
11050 /* Subtract frame size from stack pointer. */
11051 s390_prologue_plus_offset (stack_pointer_rtx,
11052 stack_pointer_rtx,
11053 size, true);
11055 return temp_reg_clobbered_p;
11058 /* Expand the prologue into a bunch of separate insns. */
11060 void
11061 s390_emit_prologue (void)
11063 rtx insn, addr;
11064 rtx temp_reg;
11065 int i;
11066 int offset;
11067 int next_fpr = 0;
11069 /* Choose best register to use for temp use within prologue.
11070 TPF with profiling must avoid the register 14 - the tracing function
11071 needs the original contents of r14 to be preserved. */
11073 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
11074 && !crtl->is_leaf
11075 && !TARGET_TPF_PROFILING)
11076 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11077 else if (flag_split_stack && cfun->stdarg)
11078 temp_reg = gen_rtx_REG (Pmode, 12);
11079 else
11080 temp_reg = gen_rtx_REG (Pmode, 1);
11082 /* When probing for stack-clash mitigation, we have to track the distance
11083 between the stack pointer and closest known reference.
11085 Most of the time we have to make a worst case assumption. The
11086 only exception is when TARGET_BACKCHAIN is active, in which case
11087 we know *sp (offset 0) was written. */
11088 HOST_WIDE_INT probe_interval
11089 = 1 << param_stack_clash_protection_probe_interval;
11090 HOST_WIDE_INT last_probe_offset
11091 = (TARGET_BACKCHAIN
11092 ? (TARGET_PACKED_STACK ? STACK_POINTER_OFFSET - UNITS_PER_LONG : 0)
11093 : probe_interval - (STACK_BOUNDARY / UNITS_PER_WORD));
11095 s390_save_gprs_to_fprs ();
11097 /* Save call saved gprs. */
11098 if (cfun_frame_layout.first_save_gpr != -1)
11100 insn = save_gprs (stack_pointer_rtx,
11101 cfun_frame_layout.gprs_offset +
11102 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11103 - cfun_frame_layout.first_save_gpr_slot),
11104 cfun_frame_layout.first_save_gpr,
11105 cfun_frame_layout.last_save_gpr);
11107 /* This is not 100% correct. If we have more than one register saved,
11108 then LAST_PROBE_OFFSET can move even closer to sp. */
11109 last_probe_offset
11110 = (cfun_frame_layout.gprs_offset +
11111 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11112 - cfun_frame_layout.first_save_gpr_slot));
11114 emit_insn (insn);
11117 /* Dummy insn to mark literal pool slot. */
11119 if (cfun->machine->base_reg)
11120 emit_insn (gen_main_pool (cfun->machine->base_reg));
11122 offset = cfun_frame_layout.f0_offset;
11124 /* Save f0 and f2. */
11125 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
11127 if (cfun_fpr_save_p (i))
11129 save_fpr (stack_pointer_rtx, offset, i);
11130 if (offset < last_probe_offset)
11131 last_probe_offset = offset;
11132 offset += 8;
11134 else if (!TARGET_PACKED_STACK || cfun->stdarg)
11135 offset += 8;
11138 /* Save f4 and f6. */
11139 offset = cfun_frame_layout.f4_offset;
11140 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11142 if (cfun_fpr_save_p (i))
11144 insn = save_fpr (stack_pointer_rtx, offset, i);
11145 if (offset < last_probe_offset)
11146 last_probe_offset = offset;
11147 offset += 8;
11149 /* If f4 and f6 are call clobbered they are saved due to
11150 stdargs and therefore are not frame related. */
11151 if (!call_used_regs[i])
11152 RTX_FRAME_RELATED_P (insn) = 1;
11154 else if (!TARGET_PACKED_STACK || call_used_regs[i])
11155 offset += 8;
11158 if (TARGET_PACKED_STACK
11159 && cfun_save_high_fprs_p
11160 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
11162 offset = (cfun_frame_layout.f8_offset
11163 + (cfun_frame_layout.high_fprs - 1) * 8);
11165 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
11166 if (cfun_fpr_save_p (i))
11168 insn = save_fpr (stack_pointer_rtx, offset, i);
11169 if (offset < last_probe_offset)
11170 last_probe_offset = offset;
11172 RTX_FRAME_RELATED_P (insn) = 1;
11173 offset -= 8;
11175 if (offset >= cfun_frame_layout.f8_offset)
11176 next_fpr = i;
11179 if (!TARGET_PACKED_STACK)
11180 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
11182 if (flag_stack_usage_info)
11183 current_function_static_stack_size = cfun_frame_layout.frame_size;
11185 /* Decrement stack pointer. */
11187 if (cfun_frame_layout.frame_size > 0)
11189 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
11190 rtx_insn *stack_pointer_backup_loc;
11191 bool temp_reg_clobbered_p;
11193 if (s390_stack_size)
11195 HOST_WIDE_INT stack_guard;
11197 if (s390_stack_guard)
11198 stack_guard = s390_stack_guard;
11199 else
11201 /* If no value for stack guard is provided the smallest power of 2
11202 larger than the current frame size is chosen. */
11203 stack_guard = 1;
11204 while (stack_guard < cfun_frame_layout.frame_size)
11205 stack_guard <<= 1;
11208 if (cfun_frame_layout.frame_size >= s390_stack_size)
11210 warning (0, "frame size of function %qs is %wd"
11211 " bytes exceeding user provided stack limit of "
11212 "%d bytes. "
11213 "An unconditional trap is added.",
11214 current_function_name(), cfun_frame_layout.frame_size,
11215 s390_stack_size);
11216 emit_insn (gen_trap ());
11217 emit_barrier ();
11219 else
11221 /* stack_guard has to be smaller than s390_stack_size.
11222 Otherwise we would emit an AND with zero which would
11223 not match the test under mask pattern. */
11224 if (stack_guard >= s390_stack_size)
11226 warning (0, "frame size of function %qs is %wd"
11227 " bytes which is more than half the stack size. "
11228 "The dynamic check would not be reliable. "
11229 "No check emitted for this function.",
11230 current_function_name(),
11231 cfun_frame_layout.frame_size);
11233 else
11235 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
11236 & ~(stack_guard - 1));
11238 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
11239 GEN_INT (stack_check_mask));
11240 if (TARGET_64BIT)
11241 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
11242 t, const0_rtx),
11243 t, const0_rtx, const0_rtx));
11244 else
11245 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
11246 t, const0_rtx),
11247 t, const0_rtx, const0_rtx));
11252 if (s390_warn_framesize > 0
11253 && cfun_frame_layout.frame_size >= s390_warn_framesize)
11254 warning (0, "frame size of %qs is %wd bytes",
11255 current_function_name (), cfun_frame_layout.frame_size);
11257 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
11258 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11260 /* Save the location where we could backup the incoming stack
11261 pointer. */
11262 stack_pointer_backup_loc = get_last_insn ();
11264 temp_reg_clobbered_p = allocate_stack_space (frame_off, last_probe_offset,
11265 temp_reg);
11267 if (TARGET_BACKCHAIN || next_fpr)
11269 if (temp_reg_clobbered_p)
11271 /* allocate_stack_space had to make use of temp_reg and
11272 we need it to hold a backup of the incoming stack
11273 pointer. Calculate back that value from the current
11274 stack pointer. */
11275 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11276 GEN_INT (cfun_frame_layout.frame_size),
11277 false);
11279 else
11281 /* allocate_stack_space didn't actually required
11282 temp_reg. Insert the stack pointer backup insn
11283 before the stack pointer decrement code - knowing now
11284 that the value will survive. */
11285 emit_insn_after (gen_move_insn (temp_reg, stack_pointer_rtx),
11286 stack_pointer_backup_loc);
11290 /* Set backchain. */
11292 if (TARGET_BACKCHAIN)
11294 if (cfun_frame_layout.backchain_offset)
11295 addr = gen_rtx_MEM (Pmode,
11296 plus_constant (Pmode, stack_pointer_rtx,
11297 cfun_frame_layout.backchain_offset));
11298 else
11299 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11300 set_mem_alias_set (addr, get_frame_alias_set ());
11301 insn = emit_insn (gen_move_insn (addr, temp_reg));
11304 /* If we support non-call exceptions (e.g. for Java),
11305 we need to make sure the backchain pointer is set up
11306 before any possibly trapping memory access. */
11307 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
11309 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
11310 emit_clobber (addr);
11313 else if (flag_stack_clash_protection)
11314 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
11316 /* Save fprs 8 - 15 (64 bit ABI). */
11318 if (cfun_save_high_fprs_p && next_fpr)
11320 /* If the stack might be accessed through a different register
11321 we have to make sure that the stack pointer decrement is not
11322 moved below the use of the stack slots. */
11323 s390_emit_stack_tie ();
11325 insn = emit_insn (gen_add2_insn (temp_reg,
11326 GEN_INT (cfun_frame_layout.f8_offset)));
11328 offset = 0;
11330 for (i = FPR8_REGNUM; i <= next_fpr; i++)
11331 if (cfun_fpr_save_p (i))
11333 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
11334 cfun_frame_layout.frame_size
11335 + cfun_frame_layout.f8_offset
11336 + offset);
11338 insn = save_fpr (temp_reg, offset, i);
11339 offset += 8;
11340 RTX_FRAME_RELATED_P (insn) = 1;
11341 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11342 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
11343 gen_rtx_REG (DFmode, i)));
11347 /* Set frame pointer, if needed. */
11349 if (frame_pointer_needed)
11351 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11352 RTX_FRAME_RELATED_P (insn) = 1;
11355 /* Set up got pointer, if needed. */
11357 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
11359 rtx_insn *insns = s390_load_got ();
11361 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
11362 annotate_constant_pool_refs (insn);
11364 emit_insn (insns);
11367 #if TARGET_TPF != 0
11368 if (TARGET_TPF_PROFILING)
11370 /* Generate a BAS instruction to serve as a function entry
11371 intercept to facilitate the use of tracing algorithms located
11372 at the branch target. */
11373 emit_insn (gen_prologue_tpf (
11374 GEN_INT (s390_tpf_trace_hook_prologue_check),
11375 GEN_INT (s390_tpf_trace_hook_prologue_target)));
11377 /* Emit a blockage here so that all code lies between the
11378 profiling mechanisms. */
11379 emit_insn (gen_blockage ());
11381 #endif
11384 /* Expand the epilogue into a bunch of separate insns. */
11386 void
11387 s390_emit_epilogue (bool sibcall)
11389 rtx frame_pointer, return_reg = NULL_RTX, cfa_restores = NULL_RTX;
11390 int area_bottom, area_top, offset = 0;
11391 int next_offset;
11392 int i;
11394 #if TARGET_TPF != 0
11395 if (TARGET_TPF_PROFILING)
11397 /* Generate a BAS instruction to serve as a function entry
11398 intercept to facilitate the use of tracing algorithms located
11399 at the branch target. */
11401 /* Emit a blockage here so that all code lies between the
11402 profiling mechanisms. */
11403 emit_insn (gen_blockage ());
11405 emit_insn (gen_epilogue_tpf (
11406 GEN_INT (s390_tpf_trace_hook_epilogue_check),
11407 GEN_INT (s390_tpf_trace_hook_epilogue_target)));
11409 #endif
11411 /* Check whether to use frame or stack pointer for restore. */
11413 frame_pointer = (frame_pointer_needed
11414 ? hard_frame_pointer_rtx : stack_pointer_rtx);
11416 s390_frame_area (&area_bottom, &area_top);
11418 /* Check whether we can access the register save area.
11419 If not, increment the frame pointer as required. */
11421 if (area_top <= area_bottom)
11423 /* Nothing to restore. */
11425 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11426 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11428 /* Area is in range. */
11429 offset = cfun_frame_layout.frame_size;
11431 else
11433 rtx_insn *insn;
11434 rtx frame_off, cfa;
11436 offset = area_bottom < 0 ? -area_bottom : 0;
11437 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11439 cfa = gen_rtx_SET (frame_pointer,
11440 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11441 if (DISP_IN_RANGE (INTVAL (frame_off)))
11443 rtx set;
11445 set = gen_rtx_SET (frame_pointer,
11446 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11447 insn = emit_insn (set);
11449 else
11451 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11452 frame_off = force_const_mem (Pmode, frame_off);
11454 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11455 annotate_constant_pool_refs (insn);
11457 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11458 RTX_FRAME_RELATED_P (insn) = 1;
11461 /* Restore call saved fprs. */
11463 if (TARGET_64BIT)
11465 if (cfun_save_high_fprs_p)
11467 next_offset = cfun_frame_layout.f8_offset;
11468 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11470 if (cfun_fpr_save_p (i))
11472 restore_fpr (frame_pointer,
11473 offset + next_offset, i);
11474 cfa_restores
11475 = alloc_reg_note (REG_CFA_RESTORE,
11476 gen_rtx_REG (DFmode, i), cfa_restores);
11477 next_offset += 8;
11483 else
11485 next_offset = cfun_frame_layout.f4_offset;
11486 /* f4, f6 */
11487 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11489 if (cfun_fpr_save_p (i))
11491 restore_fpr (frame_pointer,
11492 offset + next_offset, i);
11493 cfa_restores
11494 = alloc_reg_note (REG_CFA_RESTORE,
11495 gen_rtx_REG (DFmode, i), cfa_restores);
11496 next_offset += 8;
11498 else if (!TARGET_PACKED_STACK)
11499 next_offset += 8;
11504 /* Restore call saved gprs. */
11506 if (cfun_frame_layout.first_restore_gpr != -1)
11508 rtx insn, addr;
11509 int i;
11511 /* Check for global register and save them
11512 to stack location from where they get restored. */
11514 for (i = cfun_frame_layout.first_restore_gpr;
11515 i <= cfun_frame_layout.last_restore_gpr;
11516 i++)
11518 if (global_not_special_regno_p (i))
11520 addr = plus_constant (Pmode, frame_pointer,
11521 offset + cfun_frame_layout.gprs_offset
11522 + (i - cfun_frame_layout.first_save_gpr_slot)
11523 * UNITS_PER_LONG);
11524 addr = gen_rtx_MEM (Pmode, addr);
11525 set_mem_alias_set (addr, get_frame_alias_set ());
11526 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11528 else
11529 cfa_restores
11530 = alloc_reg_note (REG_CFA_RESTORE,
11531 gen_rtx_REG (Pmode, i), cfa_restores);
11534 /* Fetch return address from stack before load multiple,
11535 this will do good for scheduling.
11537 Only do this if we already decided that r14 needs to be
11538 saved to a stack slot. (And not just because r14 happens to
11539 be in between two GPRs which need saving.) Otherwise it
11540 would be difficult to take that decision back in
11541 s390_optimize_prologue.
11543 This optimization is only helpful on in-order machines. */
11544 if (! sibcall
11545 && cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK
11546 && s390_tune <= PROCESSOR_2097_Z10)
11548 int return_regnum = find_unused_clobbered_reg();
11549 if (!return_regnum
11550 || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION
11551 && !TARGET_CPU_Z10
11552 && return_regnum == INDIRECT_BRANCH_THUNK_REGNUM))
11554 gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM != 4);
11555 return_regnum = 4;
11557 return_reg = gen_rtx_REG (Pmode, return_regnum);
11559 addr = plus_constant (Pmode, frame_pointer,
11560 offset + cfun_frame_layout.gprs_offset
11561 + (RETURN_REGNUM
11562 - cfun_frame_layout.first_save_gpr_slot)
11563 * UNITS_PER_LONG);
11564 addr = gen_rtx_MEM (Pmode, addr);
11565 set_mem_alias_set (addr, get_frame_alias_set ());
11566 emit_move_insn (return_reg, addr);
11568 /* Once we did that optimization we have to make sure
11569 s390_optimize_prologue does not try to remove the store
11570 of r14 since we will not be able to find the load issued
11571 here. */
11572 cfun_frame_layout.save_return_addr_p = true;
11575 insn = restore_gprs (frame_pointer,
11576 offset + cfun_frame_layout.gprs_offset
11577 + (cfun_frame_layout.first_restore_gpr
11578 - cfun_frame_layout.first_save_gpr_slot)
11579 * UNITS_PER_LONG,
11580 cfun_frame_layout.first_restore_gpr,
11581 cfun_frame_layout.last_restore_gpr);
11582 insn = emit_insn (insn);
11583 REG_NOTES (insn) = cfa_restores;
11584 add_reg_note (insn, REG_CFA_DEF_CFA,
11585 plus_constant (Pmode, stack_pointer_rtx,
11586 STACK_POINTER_OFFSET));
11587 RTX_FRAME_RELATED_P (insn) = 1;
11590 s390_restore_gprs_from_fprs ();
11592 if (! sibcall)
11594 if (!return_reg && !s390_can_use_return_insn ())
11595 /* We planned to emit (return), be we are not allowed to. */
11596 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11598 if (return_reg)
11599 /* Emit (return) and (use). */
11600 emit_jump_insn (gen_return_use (return_reg));
11601 else
11602 /* The fact that RETURN_REGNUM is used is already reflected by
11603 EPILOGUE_USES. Emit plain (return). */
11604 emit_jump_insn (gen_return ());
11608 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11610 static void
11611 s300_set_up_by_prologue (hard_reg_set_container *regs)
11613 if (cfun->machine->base_reg
11614 && !call_used_regs[REGNO (cfun->machine->base_reg)])
11615 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11618 /* -fsplit-stack support. */
11620 /* A SYMBOL_REF for __morestack. */
11621 static GTY(()) rtx morestack_ref;
11623 /* When using -fsplit-stack, the allocation routines set a field in
11624 the TCB to the bottom of the stack plus this much space, measured
11625 in bytes. */
11627 #define SPLIT_STACK_AVAILABLE 1024
11629 /* Emit the parmblock for __morestack into .rodata section. It
11630 consists of 3 pointer size entries:
11631 - frame size
11632 - size of stack arguments
11633 - offset between parm block and __morestack return label */
11635 void
11636 s390_output_split_stack_data (rtx parm_block, rtx call_done,
11637 rtx frame_size, rtx args_size)
11639 rtx ops[] = { parm_block, call_done };
11641 switch_to_section (targetm.asm_out.function_rodata_section
11642 (current_function_decl));
11644 if (TARGET_64BIT)
11645 output_asm_insn (".align\t8", NULL);
11646 else
11647 output_asm_insn (".align\t4", NULL);
11649 (*targetm.asm_out.internal_label) (asm_out_file, "L",
11650 CODE_LABEL_NUMBER (parm_block));
11651 if (TARGET_64BIT)
11653 output_asm_insn (".quad\t%0", &frame_size);
11654 output_asm_insn (".quad\t%0", &args_size);
11655 output_asm_insn (".quad\t%1-%0", ops);
11657 else
11659 output_asm_insn (".long\t%0", &frame_size);
11660 output_asm_insn (".long\t%0", &args_size);
11661 output_asm_insn (".long\t%1-%0", ops);
11664 switch_to_section (current_function_section ());
11667 /* Emit -fsplit-stack prologue, which goes before the regular function
11668 prologue. */
11670 void
11671 s390_expand_split_stack_prologue (void)
11673 rtx r1, guard, cc = NULL;
11674 rtx_insn *insn;
11675 /* Offset from thread pointer to __private_ss. */
11676 int psso = TARGET_64BIT ? 0x38 : 0x20;
11677 /* Pointer size in bytes. */
11678 /* Frame size and argument size - the two parameters to __morestack. */
11679 HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11680 /* Align argument size to 8 bytes - simplifies __morestack code. */
11681 HOST_WIDE_INT args_size = crtl->args.size >= 0
11682 ? ((crtl->args.size + 7) & ~7)
11683 : 0;
11684 /* Label to be called by __morestack. */
11685 rtx_code_label *call_done = NULL;
11686 rtx_code_label *parm_base = NULL;
11687 rtx tmp;
11689 gcc_assert (flag_split_stack && reload_completed);
11691 r1 = gen_rtx_REG (Pmode, 1);
11693 /* If no stack frame will be allocated, don't do anything. */
11694 if (!frame_size)
11696 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11698 /* If va_start is used, just use r15. */
11699 emit_move_insn (r1,
11700 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11701 GEN_INT (STACK_POINTER_OFFSET)));
11704 return;
11707 if (morestack_ref == NULL_RTX)
11709 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11710 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11711 | SYMBOL_FLAG_FUNCTION);
11714 if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11716 /* If frame_size will fit in an add instruction, do a stack space
11717 check, and only call __morestack if there's not enough space. */
11719 /* Get thread pointer. r1 is the only register we can always destroy - r0
11720 could contain a static chain (and cannot be used to address memory
11721 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
11722 emit_insn (gen_get_thread_pointer (Pmode, r1));
11723 /* Aim at __private_ss. */
11724 guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
11726 /* If less that 1kiB used, skip addition and compare directly with
11727 __private_ss. */
11728 if (frame_size > SPLIT_STACK_AVAILABLE)
11730 emit_move_insn (r1, guard);
11731 if (TARGET_64BIT)
11732 emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
11733 else
11734 emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
11735 guard = r1;
11738 /* Compare the (maybe adjusted) guard with the stack pointer. */
11739 cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
11742 call_done = gen_label_rtx ();
11743 parm_base = gen_label_rtx ();
11744 LABEL_NUSES (parm_base)++;
11745 LABEL_NUSES (call_done)++;
11747 /* %r1 = litbase. */
11748 insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
11749 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11750 LABEL_NUSES (parm_base)++;
11752 /* Now, we need to call __morestack. It has very special calling
11753 conventions: it preserves param/return/static chain registers for
11754 calling main function body, and looks for its own parameters at %r1. */
11755 if (cc != NULL)
11756 tmp = gen_split_stack_cond_call (Pmode,
11757 morestack_ref,
11758 parm_base,
11759 call_done,
11760 GEN_INT (frame_size),
11761 GEN_INT (args_size),
11762 cc);
11763 else
11764 tmp = gen_split_stack_call (Pmode,
11765 morestack_ref,
11766 parm_base,
11767 call_done,
11768 GEN_INT (frame_size),
11769 GEN_INT (args_size));
11771 insn = emit_jump_insn (tmp);
11772 JUMP_LABEL (insn) = call_done;
11773 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11774 add_reg_note (insn, REG_LABEL_OPERAND, call_done);
11776 if (cc != NULL)
11778 /* Mark the jump as very unlikely to be taken. */
11779 add_reg_br_prob_note (insn,
11780 profile_probability::very_unlikely ());
11782 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11784 /* If va_start is used, and __morestack was not called, just use
11785 r15. */
11786 emit_move_insn (r1,
11787 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11788 GEN_INT (STACK_POINTER_OFFSET)));
11791 else
11793 emit_barrier ();
11796 /* __morestack will call us here. */
11798 emit_label (call_done);
11801 /* We may have to tell the dataflow pass that the split stack prologue
11802 is initializing a register. */
11804 static void
11805 s390_live_on_entry (bitmap regs)
11807 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11809 gcc_assert (flag_split_stack);
11810 bitmap_set_bit (regs, 1);
11814 /* Return true if the function can use simple_return to return outside
11815 of a shrink-wrapped region. At present shrink-wrapping is supported
11816 in all cases. */
11818 bool
11819 s390_can_use_simple_return_insn (void)
11821 return true;
11824 /* Return true if the epilogue is guaranteed to contain only a return
11825 instruction and if a direct return can therefore be used instead.
11826 One of the main advantages of using direct return instructions
11827 is that we can then use conditional returns. */
11829 bool
11830 s390_can_use_return_insn (void)
11832 int i;
11834 if (!reload_completed)
11835 return false;
11837 if (crtl->profile)
11838 return false;
11840 if (TARGET_TPF_PROFILING)
11841 return false;
11843 for (i = 0; i < 16; i++)
11844 if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
11845 return false;
11847 /* For 31 bit this is not covered by the frame_size check below
11848 since f4, f6 are saved in the register save area without needing
11849 additional stack space. */
11850 if (!TARGET_64BIT
11851 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
11852 return false;
11854 if (cfun->machine->base_reg
11855 && !call_used_regs[REGNO (cfun->machine->base_reg)])
11856 return false;
11858 return cfun_frame_layout.frame_size == 0;
11861 /* The VX ABI differs for vararg functions. Therefore we need the
11862 prototype of the callee to be available when passing vector type
11863 values. */
11864 static const char *
11865 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
11867 return ((TARGET_VX_ABI
11868 && typelist == 0
11869 && VECTOR_TYPE_P (TREE_TYPE (val))
11870 && (funcdecl == NULL_TREE
11871 || (TREE_CODE (funcdecl) == FUNCTION_DECL
11872 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
11873 ? N_("vector argument passed to unprototyped function")
11874 : NULL);
11878 /* Return the size in bytes of a function argument of
11879 type TYPE and/or mode MODE. At least one of TYPE or
11880 MODE must be specified. */
11882 static int
11883 s390_function_arg_size (machine_mode mode, const_tree type)
11885 if (type)
11886 return int_size_in_bytes (type);
11888 /* No type info available for some library calls ... */
11889 if (mode != BLKmode)
11890 return GET_MODE_SIZE (mode);
11892 /* If we have neither type nor mode, abort */
11893 gcc_unreachable ();
11896 /* Return true if a function argument of type TYPE and mode MODE
11897 is to be passed in a vector register, if available. */
11899 bool
11900 s390_function_arg_vector (machine_mode mode, const_tree type)
11902 if (!TARGET_VX_ABI)
11903 return false;
11905 if (s390_function_arg_size (mode, type) > 16)
11906 return false;
11908 /* No type info available for some library calls ... */
11909 if (!type)
11910 return VECTOR_MODE_P (mode);
11912 /* The ABI says that record types with a single member are treated
11913 just like that member would be. */
11914 int empty_base_seen = 0;
11915 const_tree orig_type = type;
11916 while (TREE_CODE (type) == RECORD_TYPE)
11918 tree field, single = NULL_TREE;
11920 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11922 if (TREE_CODE (field) != FIELD_DECL)
11923 continue;
11925 if (DECL_FIELD_ABI_IGNORED (field))
11927 if (lookup_attribute ("no_unique_address",
11928 DECL_ATTRIBUTES (field)))
11929 empty_base_seen |= 2;
11930 else
11931 empty_base_seen |= 1;
11932 continue;
11935 if (single == NULL_TREE)
11936 single = TREE_TYPE (field);
11937 else
11938 return false;
11941 if (single == NULL_TREE)
11942 return false;
11943 else
11945 /* If the field declaration adds extra byte due to
11946 e.g. padding this is not accepted as vector type. */
11947 if (int_size_in_bytes (single) <= 0
11948 || int_size_in_bytes (single) != int_size_in_bytes (type))
11949 return false;
11950 type = single;
11954 if (!VECTOR_TYPE_P (type))
11955 return false;
11957 if (warn_psabi && empty_base_seen)
11959 static unsigned last_reported_type_uid;
11960 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (orig_type));
11961 if (uid != last_reported_type_uid)
11963 const char *url = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
11964 last_reported_type_uid = uid;
11965 if (empty_base_seen & 1)
11966 inform (input_location,
11967 "parameter passing for argument of type %qT when C++17 "
11968 "is enabled changed to match C++14 %{in GCC 10.1%}",
11969 orig_type, url);
11970 else
11971 inform (input_location,
11972 "parameter passing for argument of type %qT with "
11973 "%<[[no_unique_address]]%> members changed "
11974 "%{in GCC 10.1%}", orig_type, url);
11977 return true;
11980 /* Return true if a function argument of type TYPE and mode MODE
11981 is to be passed in a floating-point register, if available. */
11983 static bool
11984 s390_function_arg_float (machine_mode mode, const_tree type)
11986 if (s390_function_arg_size (mode, type) > 8)
11987 return false;
11989 /* Soft-float changes the ABI: no floating-point registers are used. */
11990 if (TARGET_SOFT_FLOAT)
11991 return false;
11993 /* No type info available for some library calls ... */
11994 if (!type)
11995 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
11997 /* The ABI says that record types with a single member are treated
11998 just like that member would be. */
11999 int empty_base_seen = 0;
12000 const_tree orig_type = type;
12001 while (TREE_CODE (type) == RECORD_TYPE)
12003 tree field, single = NULL_TREE;
12005 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
12007 if (TREE_CODE (field) != FIELD_DECL)
12008 continue;
12009 if (DECL_FIELD_ABI_IGNORED (field))
12011 if (lookup_attribute ("no_unique_address",
12012 DECL_ATTRIBUTES (field)))
12013 empty_base_seen |= 2;
12014 else
12015 empty_base_seen |= 1;
12016 continue;
12019 if (single == NULL_TREE)
12020 single = TREE_TYPE (field);
12021 else
12022 return false;
12025 if (single == NULL_TREE)
12026 return false;
12027 else
12028 type = single;
12031 if (TREE_CODE (type) != REAL_TYPE)
12032 return false;
12034 if (warn_psabi && empty_base_seen)
12036 static unsigned last_reported_type_uid;
12037 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (orig_type));
12038 if (uid != last_reported_type_uid)
12040 const char *url = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
12041 last_reported_type_uid = uid;
12042 if (empty_base_seen & 1)
12043 inform (input_location,
12044 "parameter passing for argument of type %qT when C++17 "
12045 "is enabled changed to match C++14 %{in GCC 10.1%}",
12046 orig_type, url);
12047 else
12048 inform (input_location,
12049 "parameter passing for argument of type %qT with "
12050 "%<[[no_unique_address]]%> members changed "
12051 "%{in GCC 10.1%}", orig_type, url);
12055 return true;
12058 /* Return true if a function argument of type TYPE and mode MODE
12059 is to be passed in an integer register, or a pair of integer
12060 registers, if available. */
12062 static bool
12063 s390_function_arg_integer (machine_mode mode, const_tree type)
12065 int size = s390_function_arg_size (mode, type);
12066 if (size > 8)
12067 return false;
12069 /* No type info available for some library calls ... */
12070 if (!type)
12071 return GET_MODE_CLASS (mode) == MODE_INT
12072 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
12074 /* We accept small integral (and similar) types. */
12075 if (INTEGRAL_TYPE_P (type)
12076 || POINTER_TYPE_P (type)
12077 || TREE_CODE (type) == NULLPTR_TYPE
12078 || TREE_CODE (type) == OFFSET_TYPE
12079 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
12080 return true;
12082 /* We also accept structs of size 1, 2, 4, 8 that are not
12083 passed in floating-point registers. */
12084 if (AGGREGATE_TYPE_P (type)
12085 && exact_log2 (size) >= 0
12086 && !s390_function_arg_float (mode, type))
12087 return true;
12089 return false;
12092 /* Return 1 if a function argument ARG is to be passed by reference.
12093 The ABI specifies that only structures of size 1, 2, 4, or 8 bytes
12094 are passed by value, all other structures (and complex numbers) are
12095 passed by reference. */
12097 static bool
12098 s390_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
12100 int size = s390_function_arg_size (arg.mode, arg.type);
12102 if (s390_function_arg_vector (arg.mode, arg.type))
12103 return false;
12105 if (size > 8)
12106 return true;
12108 if (tree type = arg.type)
12110 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
12111 return true;
12113 if (TREE_CODE (type) == COMPLEX_TYPE
12114 || TREE_CODE (type) == VECTOR_TYPE)
12115 return true;
12118 return false;
12121 /* Update the data in CUM to advance over argument ARG. */
12123 static void
12124 s390_function_arg_advance (cumulative_args_t cum_v,
12125 const function_arg_info &arg)
12127 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12129 if (s390_function_arg_vector (arg.mode, arg.type))
12131 /* We are called for unnamed vector stdarg arguments which are
12132 passed on the stack. In this case this hook does not have to
12133 do anything since stack arguments are tracked by common
12134 code. */
12135 if (!arg.named)
12136 return;
12137 cum->vrs += 1;
12139 else if (s390_function_arg_float (arg.mode, arg.type))
12141 cum->fprs += 1;
12143 else if (s390_function_arg_integer (arg.mode, arg.type))
12145 int size = s390_function_arg_size (arg.mode, arg.type);
12146 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
12148 else
12149 gcc_unreachable ();
12152 /* Define where to put the arguments to a function.
12153 Value is zero to push the argument on the stack,
12154 or a hard register in which to store the argument.
12156 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12157 the preceding args and about the function being called.
12158 ARG is a description of the argument.
12160 On S/390, we use general purpose registers 2 through 6 to
12161 pass integer, pointer, and certain structure arguments, and
12162 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
12163 to pass floating point arguments. All remaining arguments
12164 are pushed to the stack. */
12166 static rtx
12167 s390_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
12169 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12171 if (!arg.named)
12172 s390_check_type_for_vector_abi (arg.type, true, false);
12174 if (s390_function_arg_vector (arg.mode, arg.type))
12176 /* Vector arguments being part of the ellipsis are passed on the
12177 stack. */
12178 if (!arg.named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
12179 return NULL_RTX;
12181 return gen_rtx_REG (arg.mode, cum->vrs + FIRST_VEC_ARG_REGNO);
12183 else if (s390_function_arg_float (arg.mode, arg.type))
12185 if (cum->fprs + 1 > FP_ARG_NUM_REG)
12186 return NULL_RTX;
12187 else
12188 return gen_rtx_REG (arg.mode, cum->fprs + 16);
12190 else if (s390_function_arg_integer (arg.mode, arg.type))
12192 int size = s390_function_arg_size (arg.mode, arg.type);
12193 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12195 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
12196 return NULL_RTX;
12197 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
12198 return gen_rtx_REG (arg.mode, cum->gprs + 2);
12199 else if (n_gprs == 2)
12201 rtvec p = rtvec_alloc (2);
12203 RTVEC_ELT (p, 0)
12204 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
12205 const0_rtx);
12206 RTVEC_ELT (p, 1)
12207 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
12208 GEN_INT (4));
12210 return gen_rtx_PARALLEL (arg.mode, p);
12214 /* After the real arguments, expand_call calls us once again with an
12215 end marker. Whatever we return here is passed as operand 2 to the
12216 call expanders.
12218 We don't need this feature ... */
12219 else if (arg.end_marker_p ())
12220 return const0_rtx;
12222 gcc_unreachable ();
12225 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Vector arguments are
12226 left-justified when placed on the stack during parameter passing. */
12228 static pad_direction
12229 s390_function_arg_padding (machine_mode mode, const_tree type)
12231 if (s390_function_arg_vector (mode, type))
12232 return PAD_UPWARD;
12234 return default_function_arg_padding (mode, type);
12237 /* Return true if return values of type TYPE should be returned
12238 in a memory buffer whose address is passed by the caller as
12239 hidden first argument. */
12241 static bool
12242 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
12244 /* We accept small integral (and similar) types. */
12245 if (INTEGRAL_TYPE_P (type)
12246 || POINTER_TYPE_P (type)
12247 || TREE_CODE (type) == OFFSET_TYPE
12248 || TREE_CODE (type) == REAL_TYPE)
12249 return int_size_in_bytes (type) > 8;
12251 /* vector types which fit into a VR. */
12252 if (TARGET_VX_ABI
12253 && VECTOR_TYPE_P (type)
12254 && int_size_in_bytes (type) <= 16)
12255 return false;
12257 /* Aggregates and similar constructs are always returned
12258 in memory. */
12259 if (AGGREGATE_TYPE_P (type)
12260 || TREE_CODE (type) == COMPLEX_TYPE
12261 || VECTOR_TYPE_P (type))
12262 return true;
12264 /* ??? We get called on all sorts of random stuff from
12265 aggregate_value_p. We can't abort, but it's not clear
12266 what's safe to return. Pretend it's a struct I guess. */
12267 return true;
12270 /* Function arguments and return values are promoted to word size. */
12272 static machine_mode
12273 s390_promote_function_mode (const_tree type, machine_mode mode,
12274 int *punsignedp,
12275 const_tree fntype ATTRIBUTE_UNUSED,
12276 int for_return ATTRIBUTE_UNUSED)
12278 if (INTEGRAL_MODE_P (mode)
12279 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
12281 if (type != NULL_TREE && POINTER_TYPE_P (type))
12282 *punsignedp = POINTERS_EXTEND_UNSIGNED;
12283 return Pmode;
12286 return mode;
12289 /* Define where to return a (scalar) value of type RET_TYPE.
12290 If RET_TYPE is null, define where to return a (scalar)
12291 value of mode MODE from a libcall. */
12293 static rtx
12294 s390_function_and_libcall_value (machine_mode mode,
12295 const_tree ret_type,
12296 const_tree fntype_or_decl,
12297 bool outgoing ATTRIBUTE_UNUSED)
12299 /* For vector return types it is important to use the RET_TYPE
12300 argument whenever available since the middle-end might have
12301 changed the mode to a scalar mode. */
12302 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
12303 || (!ret_type && VECTOR_MODE_P (mode)));
12305 /* For normal functions perform the promotion as
12306 promote_function_mode would do. */
12307 if (ret_type)
12309 int unsignedp = TYPE_UNSIGNED (ret_type);
12310 mode = promote_function_mode (ret_type, mode, &unsignedp,
12311 fntype_or_decl, 1);
12314 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
12315 || SCALAR_FLOAT_MODE_P (mode)
12316 || (TARGET_VX_ABI && vector_ret_type_p));
12317 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
12319 if (TARGET_VX_ABI && vector_ret_type_p)
12320 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
12321 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
12322 return gen_rtx_REG (mode, 16);
12323 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
12324 || UNITS_PER_LONG == UNITS_PER_WORD)
12325 return gen_rtx_REG (mode, 2);
12326 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
12328 /* This case is triggered when returning a 64 bit value with
12329 -m31 -mzarch. Although the value would fit into a single
12330 register it has to be forced into a 32 bit register pair in
12331 order to match the ABI. */
12332 rtvec p = rtvec_alloc (2);
12334 RTVEC_ELT (p, 0)
12335 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
12336 RTVEC_ELT (p, 1)
12337 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
12339 return gen_rtx_PARALLEL (mode, p);
12342 gcc_unreachable ();
12345 /* Define where to return a scalar return value of type RET_TYPE. */
12347 static rtx
12348 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
12349 bool outgoing)
12351 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
12352 fn_decl_or_type, outgoing);
12355 /* Define where to return a scalar libcall return value of mode
12356 MODE. */
12358 static rtx
12359 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
12361 return s390_function_and_libcall_value (mode, NULL_TREE,
12362 NULL_TREE, true);
12366 /* Create and return the va_list datatype.
12368 On S/390, va_list is an array type equivalent to
12370 typedef struct __va_list_tag
12372 long __gpr;
12373 long __fpr;
12374 void *__overflow_arg_area;
12375 void *__reg_save_area;
12376 } va_list[1];
12378 where __gpr and __fpr hold the number of general purpose
12379 or floating point arguments used up to now, respectively,
12380 __overflow_arg_area points to the stack location of the
12381 next argument passed on the stack, and __reg_save_area
12382 always points to the start of the register area in the
12383 call frame of the current function. The function prologue
12384 saves all registers used for argument passing into this
12385 area if the function uses variable arguments. */
12387 static tree
12388 s390_build_builtin_va_list (void)
12390 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
12392 record = lang_hooks.types.make_type (RECORD_TYPE);
12394 type_decl =
12395 build_decl (BUILTINS_LOCATION,
12396 TYPE_DECL, get_identifier ("__va_list_tag"), record);
12398 f_gpr = build_decl (BUILTINS_LOCATION,
12399 FIELD_DECL, get_identifier ("__gpr"),
12400 long_integer_type_node);
12401 f_fpr = build_decl (BUILTINS_LOCATION,
12402 FIELD_DECL, get_identifier ("__fpr"),
12403 long_integer_type_node);
12404 f_ovf = build_decl (BUILTINS_LOCATION,
12405 FIELD_DECL, get_identifier ("__overflow_arg_area"),
12406 ptr_type_node);
12407 f_sav = build_decl (BUILTINS_LOCATION,
12408 FIELD_DECL, get_identifier ("__reg_save_area"),
12409 ptr_type_node);
12411 va_list_gpr_counter_field = f_gpr;
12412 va_list_fpr_counter_field = f_fpr;
12414 DECL_FIELD_CONTEXT (f_gpr) = record;
12415 DECL_FIELD_CONTEXT (f_fpr) = record;
12416 DECL_FIELD_CONTEXT (f_ovf) = record;
12417 DECL_FIELD_CONTEXT (f_sav) = record;
12419 TYPE_STUB_DECL (record) = type_decl;
12420 TYPE_NAME (record) = type_decl;
12421 TYPE_FIELDS (record) = f_gpr;
12422 DECL_CHAIN (f_gpr) = f_fpr;
12423 DECL_CHAIN (f_fpr) = f_ovf;
12424 DECL_CHAIN (f_ovf) = f_sav;
12426 layout_type (record);
12428 /* The correct type is an array type of one element. */
12429 return build_array_type (record, build_index_type (size_zero_node));
12432 /* Implement va_start by filling the va_list structure VALIST.
12433 STDARG_P is always true, and ignored.
12434 NEXTARG points to the first anonymous stack argument.
12436 The following global variables are used to initialize
12437 the va_list structure:
12439 crtl->args.info:
12440 holds number of gprs and fprs used for named arguments.
12441 crtl->args.arg_offset_rtx:
12442 holds the offset of the first anonymous stack argument
12443 (relative to the virtual arg pointer). */
12445 static void
12446 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12448 HOST_WIDE_INT n_gpr, n_fpr;
12449 int off;
12450 tree f_gpr, f_fpr, f_ovf, f_sav;
12451 tree gpr, fpr, ovf, sav, t;
12453 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12454 f_fpr = DECL_CHAIN (f_gpr);
12455 f_ovf = DECL_CHAIN (f_fpr);
12456 f_sav = DECL_CHAIN (f_ovf);
12458 valist = build_simple_mem_ref (valist);
12459 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12460 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12461 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12462 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12464 /* Count number of gp and fp argument registers used. */
12466 n_gpr = crtl->args.info.gprs;
12467 n_fpr = crtl->args.info.fprs;
12469 if (cfun->va_list_gpr_size)
12471 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12472 build_int_cst (NULL_TREE, n_gpr));
12473 TREE_SIDE_EFFECTS (t) = 1;
12474 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12477 if (cfun->va_list_fpr_size)
12479 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12480 build_int_cst (NULL_TREE, n_fpr));
12481 TREE_SIDE_EFFECTS (t) = 1;
12482 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12485 if (flag_split_stack
12486 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
12487 == NULL)
12488 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12490 rtx reg;
12491 rtx_insn *seq;
12493 reg = gen_reg_rtx (Pmode);
12494 cfun->machine->split_stack_varargs_pointer = reg;
12496 start_sequence ();
12497 emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
12498 seq = get_insns ();
12499 end_sequence ();
12501 push_topmost_sequence ();
12502 emit_insn_after (seq, entry_of_function ());
12503 pop_topmost_sequence ();
12506 /* Find the overflow area.
12507 FIXME: This currently is too pessimistic when the vector ABI is
12508 enabled. In that case we *always* set up the overflow area
12509 pointer. */
12510 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
12511 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
12512 || TARGET_VX_ABI)
12514 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12515 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
12516 else
12517 t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
12519 off = INTVAL (crtl->args.arg_offset_rtx);
12520 off = off < 0 ? 0 : off;
12521 if (TARGET_DEBUG_ARG)
12522 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12523 (int)n_gpr, (int)n_fpr, off);
12525 t = fold_build_pointer_plus_hwi (t, off);
12527 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12528 TREE_SIDE_EFFECTS (t) = 1;
12529 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12532 /* Find the register save area. */
12533 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12534 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12536 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12537 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12539 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12540 TREE_SIDE_EFFECTS (t) = 1;
12541 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12545 /* Implement va_arg by updating the va_list structure
12546 VALIST as required to retrieve an argument of type
12547 TYPE, and returning that argument.
12549 Generates code equivalent to:
12551 if (integral value) {
12552 if (size <= 4 && args.gpr < 5 ||
12553 size > 4 && args.gpr < 4 )
12554 ret = args.reg_save_area[args.gpr+8]
12555 else
12556 ret = *args.overflow_arg_area++;
12557 } else if (vector value) {
12558 ret = *args.overflow_arg_area;
12559 args.overflow_arg_area += size / 8;
12560 } else if (float value) {
12561 if (args.fgpr < 2)
12562 ret = args.reg_save_area[args.fpr+64]
12563 else
12564 ret = *args.overflow_arg_area++;
12565 } else if (aggregate value) {
12566 if (args.gpr < 5)
12567 ret = *args.reg_save_area[args.gpr]
12568 else
12569 ret = **args.overflow_arg_area++;
12570 } */
12572 static tree
12573 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12574 gimple_seq *post_p ATTRIBUTE_UNUSED)
12576 tree f_gpr, f_fpr, f_ovf, f_sav;
12577 tree gpr, fpr, ovf, sav, reg, t, u;
12578 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12579 tree lab_false, lab_over = NULL_TREE;
12580 tree addr = create_tmp_var (ptr_type_node, "addr");
12581 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12582 a stack slot. */
12584 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12585 f_fpr = DECL_CHAIN (f_gpr);
12586 f_ovf = DECL_CHAIN (f_fpr);
12587 f_sav = DECL_CHAIN (f_ovf);
12589 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12590 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12591 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12593 /* The tree for args* cannot be shared between gpr/fpr and ovf since
12594 both appear on a lhs. */
12595 valist = unshare_expr (valist);
12596 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12598 size = int_size_in_bytes (type);
12600 s390_check_type_for_vector_abi (type, true, false);
12602 if (pass_va_arg_by_reference (type))
12604 if (TARGET_DEBUG_ARG)
12606 fprintf (stderr, "va_arg: aggregate type");
12607 debug_tree (type);
12610 /* Aggregates are passed by reference. */
12611 indirect_p = 1;
12612 reg = gpr;
12613 n_reg = 1;
12615 /* kernel stack layout on 31 bit: It is assumed here that no padding
12616 will be added by s390_frame_info because for va_args always an even
12617 number of gprs has to be saved r15-r2 = 14 regs. */
12618 sav_ofs = 2 * UNITS_PER_LONG;
12619 sav_scale = UNITS_PER_LONG;
12620 size = UNITS_PER_LONG;
12621 max_reg = GP_ARG_NUM_REG - n_reg;
12622 left_align_p = false;
12624 else if (s390_function_arg_vector (TYPE_MODE (type), type))
12626 if (TARGET_DEBUG_ARG)
12628 fprintf (stderr, "va_arg: vector type");
12629 debug_tree (type);
12632 indirect_p = 0;
12633 reg = NULL_TREE;
12634 n_reg = 0;
12635 sav_ofs = 0;
12636 sav_scale = 8;
12637 max_reg = 0;
12638 left_align_p = true;
12640 else if (s390_function_arg_float (TYPE_MODE (type), type))
12642 if (TARGET_DEBUG_ARG)
12644 fprintf (stderr, "va_arg: float type");
12645 debug_tree (type);
12648 /* FP args go in FP registers, if present. */
12649 indirect_p = 0;
12650 reg = fpr;
12651 n_reg = 1;
12652 sav_ofs = 16 * UNITS_PER_LONG;
12653 sav_scale = 8;
12654 max_reg = FP_ARG_NUM_REG - n_reg;
12655 left_align_p = false;
12657 else
12659 if (TARGET_DEBUG_ARG)
12661 fprintf (stderr, "va_arg: other type");
12662 debug_tree (type);
12665 /* Otherwise into GP registers. */
12666 indirect_p = 0;
12667 reg = gpr;
12668 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12670 /* kernel stack layout on 31 bit: It is assumed here that no padding
12671 will be added by s390_frame_info because for va_args always an even
12672 number of gprs has to be saved r15-r2 = 14 regs. */
12673 sav_ofs = 2 * UNITS_PER_LONG;
12675 if (size < UNITS_PER_LONG)
12676 sav_ofs += UNITS_PER_LONG - size;
12678 sav_scale = UNITS_PER_LONG;
12679 max_reg = GP_ARG_NUM_REG - n_reg;
12680 left_align_p = false;
12683 /* Pull the value out of the saved registers ... */
12685 if (reg != NULL_TREE)
12688 if (reg > ((typeof (reg))max_reg))
12689 goto lab_false;
12691 addr = sav + sav_ofs + reg * save_scale;
12693 goto lab_over;
12695 lab_false:
12698 lab_false = create_artificial_label (UNKNOWN_LOCATION);
12699 lab_over = create_artificial_label (UNKNOWN_LOCATION);
12701 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12702 t = build2 (GT_EXPR, boolean_type_node, reg, t);
12703 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12704 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12705 gimplify_and_add (t, pre_p);
12707 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12708 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12709 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12710 t = fold_build_pointer_plus (t, u);
12712 gimplify_assign (addr, t, pre_p);
12714 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12716 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12719 /* ... Otherwise out of the overflow area. */
12721 t = ovf;
12722 if (size < UNITS_PER_LONG && !left_align_p)
12723 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12725 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12727 gimplify_assign (addr, t, pre_p);
12729 if (size < UNITS_PER_LONG && left_align_p)
12730 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12731 else
12732 t = fold_build_pointer_plus_hwi (t, size);
12734 gimplify_assign (ovf, t, pre_p);
12736 if (reg != NULL_TREE)
12737 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12740 /* Increment register save count. */
12742 if (n_reg > 0)
12744 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12745 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12746 gimplify_and_add (u, pre_p);
12749 if (indirect_p)
12751 t = build_pointer_type_for_mode (build_pointer_type (type),
12752 ptr_mode, true);
12753 addr = fold_convert (t, addr);
12754 addr = build_va_arg_indirect_ref (addr);
12756 else
12758 t = build_pointer_type_for_mode (type, ptr_mode, true);
12759 addr = fold_convert (t, addr);
12762 return build_va_arg_indirect_ref (addr);
12765 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12766 expanders.
12767 DEST - Register location where CC will be stored.
12768 TDB - Pointer to a 256 byte area where to store the transaction.
12769 diagnostic block. NULL if TDB is not needed.
12770 RETRY - Retry count value. If non-NULL a retry loop for CC2
12771 is emitted
12772 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12773 of the tbegin instruction pattern. */
12775 void
12776 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
12778 rtx retry_plus_two = gen_reg_rtx (SImode);
12779 rtx retry_reg = gen_reg_rtx (SImode);
12780 rtx_code_label *retry_label = NULL;
12782 if (retry != NULL_RTX)
12784 emit_move_insn (retry_reg, retry);
12785 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
12786 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
12787 retry_label = gen_label_rtx ();
12788 emit_label (retry_label);
12791 if (clobber_fprs_p)
12793 if (TARGET_VX)
12794 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12795 tdb));
12796 else
12797 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12798 tdb));
12800 else
12801 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12802 tdb));
12804 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
12805 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
12806 CC_REGNUM)),
12807 UNSPEC_CC_TO_INT));
12808 if (retry != NULL_RTX)
12810 const int CC0 = 1 << 3;
12811 const int CC1 = 1 << 2;
12812 const int CC3 = 1 << 0;
12813 rtx jump;
12814 rtx count = gen_reg_rtx (SImode);
12815 rtx_code_label *leave_label = gen_label_rtx ();
12817 /* Exit for success and permanent failures. */
12818 jump = s390_emit_jump (leave_label,
12819 gen_rtx_EQ (VOIDmode,
12820 gen_rtx_REG (CCRAWmode, CC_REGNUM),
12821 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
12822 LABEL_NUSES (leave_label) = 1;
12824 /* CC2 - transient failure. Perform retry with ppa. */
12825 emit_move_insn (count, retry_plus_two);
12826 emit_insn (gen_subsi3 (count, count, retry_reg));
12827 emit_insn (gen_tx_assist (count));
12828 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
12829 retry_reg,
12830 retry_reg));
12831 JUMP_LABEL (jump) = retry_label;
12832 LABEL_NUSES (retry_label) = 1;
12833 emit_label (leave_label);
12838 /* Return the decl for the target specific builtin with the function
12839 code FCODE. */
12841 static tree
12842 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
12844 if (fcode >= S390_BUILTIN_MAX)
12845 return error_mark_node;
12847 return s390_builtin_decls[fcode];
12850 /* We call mcount before the function prologue. So a profiled leaf
12851 function should stay a leaf function. */
12853 static bool
12854 s390_keep_leaf_when_profiled ()
12856 return true;
12859 /* Output assembly code for the trampoline template to
12860 stdio stream FILE.
12862 On S/390, we use gpr 1 internally in the trampoline code;
12863 gpr 0 is used to hold the static chain. */
12865 static void
12866 s390_asm_trampoline_template (FILE *file)
12868 rtx op[2];
12869 op[0] = gen_rtx_REG (Pmode, 0);
12870 op[1] = gen_rtx_REG (Pmode, 1);
12872 if (TARGET_64BIT)
12874 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12875 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
12876 output_asm_insn ("br\t%1", op); /* 2 byte */
12877 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
12879 else
12881 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12882 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
12883 output_asm_insn ("br\t%1", op); /* 2 byte */
12884 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
12888 /* Emit RTL insns to initialize the variable parts of a trampoline.
12889 FNADDR is an RTX for the address of the function's pure code.
12890 CXT is an RTX for the static chain value for the function. */
12892 static void
12893 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
12895 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
12896 rtx mem;
12898 emit_block_move (m_tramp, assemble_trampoline_template (),
12899 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
12901 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
12902 emit_move_insn (mem, cxt);
12903 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
12904 emit_move_insn (mem, fnaddr);
12907 static void
12908 output_asm_nops (const char *user, int hw)
12910 asm_fprintf (asm_out_file, "\t# NOPs for %s (%d halfwords)\n", user, hw);
12911 while (hw > 0)
12913 if (hw >= 3)
12915 output_asm_insn ("brcl\t0,0", NULL);
12916 hw -= 3;
12918 else if (hw >= 2)
12920 output_asm_insn ("bc\t0,0", NULL);
12921 hw -= 2;
12923 else
12925 output_asm_insn ("bcr\t0,0", NULL);
12926 hw -= 1;
12931 /* Output assembler code to FILE to increment profiler label # LABELNO
12932 for profiling a function entry. */
12934 void
12935 s390_function_profiler (FILE *file, int labelno)
12937 rtx op[8];
12939 char label[128];
12940 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
12942 fprintf (file, "# function profiler \n");
12944 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
12945 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
12946 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
12947 op[7] = GEN_INT (UNITS_PER_LONG);
12949 op[2] = gen_rtx_REG (Pmode, 1);
12950 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
12951 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
12953 op[4] = gen_rtx_SYMBOL_REF (Pmode, flag_fentry ? "__fentry__" : "_mcount");
12954 if (flag_pic)
12956 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
12957 op[4] = gen_rtx_CONST (Pmode, op[4]);
12960 if (flag_record_mcount)
12961 fprintf (file, "1:\n");
12963 if (flag_fentry)
12965 if (flag_nop_mcount)
12966 output_asm_nops ("-mnop-mcount", /* brasl */ 3);
12967 else if (cfun->static_chain_decl)
12968 warning (OPT_Wcannot_profile, "nested functions cannot be profiled "
12969 "with %<-mfentry%> on s390");
12970 else
12971 output_asm_insn ("brasl\t0,%4", op);
12973 else if (TARGET_64BIT)
12975 if (flag_nop_mcount)
12976 output_asm_nops ("-mnop-mcount", /* stg */ 3 + /* larl */ 3 +
12977 /* brasl */ 3 + /* lg */ 3);
12978 else
12980 output_asm_insn ("stg\t%0,%1", op);
12981 if (flag_dwarf2_cfi_asm)
12982 output_asm_insn (".cfi_rel_offset\t%0,%7", op);
12983 output_asm_insn ("larl\t%2,%3", op);
12984 output_asm_insn ("brasl\t%0,%4", op);
12985 output_asm_insn ("lg\t%0,%1", op);
12986 if (flag_dwarf2_cfi_asm)
12987 output_asm_insn (".cfi_restore\t%0", op);
12990 else
12992 if (flag_nop_mcount)
12993 output_asm_nops ("-mnop-mcount", /* st */ 2 + /* larl */ 3 +
12994 /* brasl */ 3 + /* l */ 2);
12995 else
12997 output_asm_insn ("st\t%0,%1", op);
12998 if (flag_dwarf2_cfi_asm)
12999 output_asm_insn (".cfi_rel_offset\t%0,%7", op);
13000 output_asm_insn ("larl\t%2,%3", op);
13001 output_asm_insn ("brasl\t%0,%4", op);
13002 output_asm_insn ("l\t%0,%1", op);
13003 if (flag_dwarf2_cfi_asm)
13004 output_asm_insn (".cfi_restore\t%0", op);
13008 if (flag_record_mcount)
13010 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
13011 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
13012 fprintf (file, "\t.previous\n");
13016 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
13017 into its SYMBOL_REF_FLAGS. */
13019 static void
13020 s390_encode_section_info (tree decl, rtx rtl, int first)
13022 default_encode_section_info (decl, rtl, first);
13024 if (TREE_CODE (decl) == VAR_DECL)
13026 /* Store the alignment to be able to check if we can use
13027 a larl/load-relative instruction. We only handle the cases
13028 that can go wrong (i.e. no FUNC_DECLs). */
13029 if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
13030 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13031 else if (DECL_ALIGN (decl) % 32)
13032 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13033 else if (DECL_ALIGN (decl) % 64)
13034 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13037 /* Literal pool references don't have a decl so they are handled
13038 differently here. We rely on the information in the MEM_ALIGN
13039 entry to decide upon the alignment. */
13040 if (MEM_P (rtl)
13041 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
13042 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
13044 if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
13045 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13046 else if (MEM_ALIGN (rtl) % 32)
13047 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13048 else if (MEM_ALIGN (rtl) % 64)
13049 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13053 /* Output thunk to FILE that implements a C++ virtual function call (with
13054 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
13055 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
13056 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
13057 relative to the resulting this pointer. */
13059 static void
13060 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
13061 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
13062 tree function)
13064 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
13065 rtx op[10];
13066 int nonlocal = 0;
13068 assemble_start_function (thunk, fnname);
13069 /* Make sure unwind info is emitted for the thunk if needed. */
13070 final_start_function (emit_barrier (), file, 1);
13072 /* Operand 0 is the target function. */
13073 op[0] = XEXP (DECL_RTL (function), 0);
13074 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
13076 nonlocal = 1;
13077 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
13078 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
13079 op[0] = gen_rtx_CONST (Pmode, op[0]);
13082 /* Operand 1 is the 'this' pointer. */
13083 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
13084 op[1] = gen_rtx_REG (Pmode, 3);
13085 else
13086 op[1] = gen_rtx_REG (Pmode, 2);
13088 /* Operand 2 is the delta. */
13089 op[2] = GEN_INT (delta);
13091 /* Operand 3 is the vcall_offset. */
13092 op[3] = GEN_INT (vcall_offset);
13094 /* Operand 4 is the temporary register. */
13095 op[4] = gen_rtx_REG (Pmode, 1);
13097 /* Operands 5 to 8 can be used as labels. */
13098 op[5] = NULL_RTX;
13099 op[6] = NULL_RTX;
13100 op[7] = NULL_RTX;
13101 op[8] = NULL_RTX;
13103 /* Operand 9 can be used for temporary register. */
13104 op[9] = NULL_RTX;
13106 /* Generate code. */
13107 if (TARGET_64BIT)
13109 /* Setup literal pool pointer if required. */
13110 if ((!DISP_IN_RANGE (delta)
13111 && !CONST_OK_FOR_K (delta)
13112 && !CONST_OK_FOR_Os (delta))
13113 || (!DISP_IN_RANGE (vcall_offset)
13114 && !CONST_OK_FOR_K (vcall_offset)
13115 && !CONST_OK_FOR_Os (vcall_offset)))
13117 op[5] = gen_label_rtx ();
13118 output_asm_insn ("larl\t%4,%5", op);
13121 /* Add DELTA to this pointer. */
13122 if (delta)
13124 if (CONST_OK_FOR_J (delta))
13125 output_asm_insn ("la\t%1,%2(%1)", op);
13126 else if (DISP_IN_RANGE (delta))
13127 output_asm_insn ("lay\t%1,%2(%1)", op);
13128 else if (CONST_OK_FOR_K (delta))
13129 output_asm_insn ("aghi\t%1,%2", op);
13130 else if (CONST_OK_FOR_Os (delta))
13131 output_asm_insn ("agfi\t%1,%2", op);
13132 else
13134 op[6] = gen_label_rtx ();
13135 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
13139 /* Perform vcall adjustment. */
13140 if (vcall_offset)
13142 if (DISP_IN_RANGE (vcall_offset))
13144 output_asm_insn ("lg\t%4,0(%1)", op);
13145 output_asm_insn ("ag\t%1,%3(%4)", op);
13147 else if (CONST_OK_FOR_K (vcall_offset))
13149 output_asm_insn ("lghi\t%4,%3", op);
13150 output_asm_insn ("ag\t%4,0(%1)", op);
13151 output_asm_insn ("ag\t%1,0(%4)", op);
13153 else if (CONST_OK_FOR_Os (vcall_offset))
13155 output_asm_insn ("lgfi\t%4,%3", op);
13156 output_asm_insn ("ag\t%4,0(%1)", op);
13157 output_asm_insn ("ag\t%1,0(%4)", op);
13159 else
13161 op[7] = gen_label_rtx ();
13162 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
13163 output_asm_insn ("ag\t%4,0(%1)", op);
13164 output_asm_insn ("ag\t%1,0(%4)", op);
13168 /* Jump to target. */
13169 output_asm_insn ("jg\t%0", op);
13171 /* Output literal pool if required. */
13172 if (op[5])
13174 output_asm_insn (".align\t4", op);
13175 targetm.asm_out.internal_label (file, "L",
13176 CODE_LABEL_NUMBER (op[5]));
13178 if (op[6])
13180 targetm.asm_out.internal_label (file, "L",
13181 CODE_LABEL_NUMBER (op[6]));
13182 output_asm_insn (".long\t%2", op);
13184 if (op[7])
13186 targetm.asm_out.internal_label (file, "L",
13187 CODE_LABEL_NUMBER (op[7]));
13188 output_asm_insn (".long\t%3", op);
13191 else
13193 /* Setup base pointer if required. */
13194 if (!vcall_offset
13195 || (!DISP_IN_RANGE (delta)
13196 && !CONST_OK_FOR_K (delta)
13197 && !CONST_OK_FOR_Os (delta))
13198 || (!DISP_IN_RANGE (delta)
13199 && !CONST_OK_FOR_K (vcall_offset)
13200 && !CONST_OK_FOR_Os (vcall_offset)))
13202 op[5] = gen_label_rtx ();
13203 output_asm_insn ("basr\t%4,0", op);
13204 targetm.asm_out.internal_label (file, "L",
13205 CODE_LABEL_NUMBER (op[5]));
13208 /* Add DELTA to this pointer. */
13209 if (delta)
13211 if (CONST_OK_FOR_J (delta))
13212 output_asm_insn ("la\t%1,%2(%1)", op);
13213 else if (DISP_IN_RANGE (delta))
13214 output_asm_insn ("lay\t%1,%2(%1)", op);
13215 else if (CONST_OK_FOR_K (delta))
13216 output_asm_insn ("ahi\t%1,%2", op);
13217 else if (CONST_OK_FOR_Os (delta))
13218 output_asm_insn ("afi\t%1,%2", op);
13219 else
13221 op[6] = gen_label_rtx ();
13222 output_asm_insn ("a\t%1,%6-%5(%4)", op);
13226 /* Perform vcall adjustment. */
13227 if (vcall_offset)
13229 if (CONST_OK_FOR_J (vcall_offset))
13231 output_asm_insn ("l\t%4,0(%1)", op);
13232 output_asm_insn ("a\t%1,%3(%4)", op);
13234 else if (DISP_IN_RANGE (vcall_offset))
13236 output_asm_insn ("l\t%4,0(%1)", op);
13237 output_asm_insn ("ay\t%1,%3(%4)", op);
13239 else if (CONST_OK_FOR_K (vcall_offset))
13241 output_asm_insn ("lhi\t%4,%3", op);
13242 output_asm_insn ("a\t%4,0(%1)", op);
13243 output_asm_insn ("a\t%1,0(%4)", op);
13245 else if (CONST_OK_FOR_Os (vcall_offset))
13247 output_asm_insn ("iilf\t%4,%3", op);
13248 output_asm_insn ("a\t%4,0(%1)", op);
13249 output_asm_insn ("a\t%1,0(%4)", op);
13251 else
13253 op[7] = gen_label_rtx ();
13254 output_asm_insn ("l\t%4,%7-%5(%4)", op);
13255 output_asm_insn ("a\t%4,0(%1)", op);
13256 output_asm_insn ("a\t%1,0(%4)", op);
13259 /* We had to clobber the base pointer register.
13260 Re-setup the base pointer (with a different base). */
13261 op[5] = gen_label_rtx ();
13262 output_asm_insn ("basr\t%4,0", op);
13263 targetm.asm_out.internal_label (file, "L",
13264 CODE_LABEL_NUMBER (op[5]));
13267 /* Jump to target. */
13268 op[8] = gen_label_rtx ();
13270 if (!flag_pic)
13271 output_asm_insn ("l\t%4,%8-%5(%4)", op);
13272 else if (!nonlocal)
13273 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13274 /* We cannot call through .plt, since .plt requires %r12 loaded. */
13275 else if (flag_pic == 1)
13277 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13278 output_asm_insn ("l\t%4,%0(%4)", op);
13280 else if (flag_pic == 2)
13282 op[9] = gen_rtx_REG (Pmode, 0);
13283 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
13284 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13285 output_asm_insn ("ar\t%4,%9", op);
13286 output_asm_insn ("l\t%4,0(%4)", op);
13289 output_asm_insn ("br\t%4", op);
13291 /* Output literal pool. */
13292 output_asm_insn (".align\t4", op);
13294 if (nonlocal && flag_pic == 2)
13295 output_asm_insn (".long\t%0", op);
13296 if (nonlocal)
13298 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13299 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
13302 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
13303 if (!flag_pic)
13304 output_asm_insn (".long\t%0", op);
13305 else
13306 output_asm_insn (".long\t%0-%5", op);
13308 if (op[6])
13310 targetm.asm_out.internal_label (file, "L",
13311 CODE_LABEL_NUMBER (op[6]));
13312 output_asm_insn (".long\t%2", op);
13314 if (op[7])
13316 targetm.asm_out.internal_label (file, "L",
13317 CODE_LABEL_NUMBER (op[7]));
13318 output_asm_insn (".long\t%3", op);
13321 final_end_function ();
13322 assemble_end_function (thunk, fnname);
13325 /* Output either an indirect jump or an indirect call
13326 (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO
13327 using a branch trampoline disabling branch target prediction. */
13329 void
13330 s390_indirect_branch_via_thunk (unsigned int regno,
13331 unsigned int return_addr_regno,
13332 rtx comparison_operator,
13333 enum s390_indirect_branch_type type)
13335 enum s390_indirect_branch_option option;
13337 if (type == s390_indirect_branch_type_return)
13339 if (s390_return_addr_from_memory ())
13340 option = s390_opt_function_return_mem;
13341 else
13342 option = s390_opt_function_return_reg;
13344 else if (type == s390_indirect_branch_type_jump)
13345 option = s390_opt_indirect_branch_jump;
13346 else if (type == s390_indirect_branch_type_call)
13347 option = s390_opt_indirect_branch_call;
13348 else
13349 gcc_unreachable ();
13351 if (TARGET_INDIRECT_BRANCH_TABLE)
13353 char label[32];
13355 ASM_GENERATE_INTERNAL_LABEL (label,
13356 indirect_branch_table_label[option],
13357 indirect_branch_table_label_no[option]++);
13358 ASM_OUTPUT_LABEL (asm_out_file, label);
13361 if (return_addr_regno != INVALID_REGNUM)
13363 gcc_assert (comparison_operator == NULL_RTX);
13364 fprintf (asm_out_file, " \tbrasl\t%%r%d,", return_addr_regno);
13366 else
13368 fputs (" \tjg", asm_out_file);
13369 if (comparison_operator != NULL_RTX)
13370 print_operand (asm_out_file, comparison_operator, 'C');
13372 fputs ("\t", asm_out_file);
13375 if (TARGET_CPU_Z10)
13376 fprintf (asm_out_file,
13377 TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL "\n",
13378 regno);
13379 else
13380 fprintf (asm_out_file,
13381 TARGET_INDIRECT_BRANCH_THUNK_NAME_EX "\n",
13382 INDIRECT_BRANCH_THUNK_REGNUM, regno);
13384 if ((option == s390_opt_indirect_branch_jump
13385 && cfun->machine->indirect_branch_jump == indirect_branch_thunk)
13386 || (option == s390_opt_indirect_branch_call
13387 && cfun->machine->indirect_branch_call == indirect_branch_thunk)
13388 || (option == s390_opt_function_return_reg
13389 && cfun->machine->function_return_reg == indirect_branch_thunk)
13390 || (option == s390_opt_function_return_mem
13391 && cfun->machine->function_return_mem == indirect_branch_thunk))
13393 if (TARGET_CPU_Z10)
13394 indirect_branch_z10thunk_mask |= (1 << regno);
13395 else
13396 indirect_branch_prez10thunk_mask |= (1 << regno);
13400 /* Output an inline thunk for indirect jumps. EXECUTE_TARGET can
13401 either be an address register or a label pointing to the location
13402 of the jump instruction. */
13404 void
13405 s390_indirect_branch_via_inline_thunk (rtx execute_target)
13407 if (TARGET_INDIRECT_BRANCH_TABLE)
13409 char label[32];
13411 ASM_GENERATE_INTERNAL_LABEL (label,
13412 indirect_branch_table_label[s390_opt_indirect_branch_jump],
13413 indirect_branch_table_label_no[s390_opt_indirect_branch_jump]++);
13414 ASM_OUTPUT_LABEL (asm_out_file, label);
13417 if (!TARGET_ZARCH)
13418 fputs ("\t.machinemode zarch\n", asm_out_file);
13420 if (REG_P (execute_target))
13421 fprintf (asm_out_file, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target));
13422 else
13423 output_asm_insn ("\texrl\t%%r0,%0", &execute_target);
13425 if (!TARGET_ZARCH)
13426 fputs ("\t.machinemode esa\n", asm_out_file);
13428 fputs ("0:\tj\t0b\n", asm_out_file);
13431 static bool
13432 s390_valid_pointer_mode (scalar_int_mode mode)
13434 return (mode == SImode || (TARGET_64BIT && mode == DImode));
13437 /* Checks whether the given CALL_EXPR would use a caller
13438 saved register. This is used to decide whether sibling call
13439 optimization could be performed on the respective function
13440 call. */
13442 static bool
13443 s390_call_saved_register_used (tree call_expr)
13445 CUMULATIVE_ARGS cum_v;
13446 cumulative_args_t cum;
13447 tree parameter;
13448 rtx parm_rtx;
13449 int reg, i;
13451 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
13452 cum = pack_cumulative_args (&cum_v);
13454 for (i = 0; i < call_expr_nargs (call_expr); i++)
13456 parameter = CALL_EXPR_ARG (call_expr, i);
13457 gcc_assert (parameter);
13459 /* For an undeclared variable passed as parameter we will get
13460 an ERROR_MARK node here. */
13461 if (TREE_CODE (parameter) == ERROR_MARK)
13462 return true;
13464 /* We assume that in the target function all parameters are
13465 named. This only has an impact on vector argument register
13466 usage none of which is call-saved. */
13467 function_arg_info arg (TREE_TYPE (parameter), /*named=*/true);
13468 apply_pass_by_reference_rules (&cum_v, arg);
13470 parm_rtx = s390_function_arg (cum, arg);
13472 s390_function_arg_advance (cum, arg);
13474 if (!parm_rtx)
13475 continue;
13477 if (REG_P (parm_rtx))
13479 for (reg = 0; reg < REG_NREGS (parm_rtx); reg++)
13480 if (!call_used_or_fixed_reg_p (reg + REGNO (parm_rtx)))
13481 return true;
13484 if (GET_CODE (parm_rtx) == PARALLEL)
13486 int i;
13488 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
13490 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
13492 gcc_assert (REG_P (r));
13494 for (reg = 0; reg < REG_NREGS (r); reg++)
13495 if (!call_used_or_fixed_reg_p (reg + REGNO (r)))
13496 return true;
13501 return false;
13504 /* Return true if the given call expression can be
13505 turned into a sibling call.
13506 DECL holds the declaration of the function to be called whereas
13507 EXP is the call expression itself. */
13509 static bool
13510 s390_function_ok_for_sibcall (tree decl, tree exp)
13512 /* The TPF epilogue uses register 1. */
13513 if (TARGET_TPF_PROFILING)
13514 return false;
13516 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13517 which would have to be restored before the sibcall. */
13518 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
13519 return false;
13521 /* The thunks for indirect branches require r1 if no exrl is
13522 available. r1 might not be available when doing a sibling
13523 call. */
13524 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13525 && !TARGET_CPU_Z10
13526 && !decl)
13527 return false;
13529 /* Register 6 on s390 is available as an argument register but unfortunately
13530 "caller saved". This makes functions needing this register for arguments
13531 not suitable for sibcalls. */
13532 return !s390_call_saved_register_used (exp);
13535 /* Return the fixed registers used for condition codes. */
13537 static bool
13538 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13540 *p1 = CC_REGNUM;
13541 *p2 = INVALID_REGNUM;
13543 return true;
13546 /* This function is used by the call expanders of the machine description.
13547 It emits the call insn itself together with the necessary operations
13548 to adjust the target address and returns the emitted insn.
13549 ADDR_LOCATION is the target address rtx
13550 TLS_CALL the location of the thread-local symbol
13551 RESULT_REG the register where the result of the call should be stored
13552 RETADDR_REG the register where the return address should be stored
13553 If this parameter is NULL_RTX the call is considered
13554 to be a sibling call. */
13556 rtx_insn *
13557 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
13558 rtx retaddr_reg)
13560 bool plt_call = false;
13561 rtx_insn *insn;
13562 rtx vec[4] = { NULL_RTX };
13563 int elts = 0;
13564 rtx *call = &vec[0];
13565 rtx *clobber_ret_reg = &vec[1];
13566 rtx *use = &vec[2];
13567 rtx *clobber_thunk_reg = &vec[3];
13568 int i;
13570 /* Direct function calls need special treatment. */
13571 if (GET_CODE (addr_location) == SYMBOL_REF)
13573 /* When calling a global routine in PIC mode, we must
13574 replace the symbol itself with the PLT stub. */
13575 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
13577 if (TARGET_64BIT || retaddr_reg != NULL_RTX)
13579 addr_location = gen_rtx_UNSPEC (Pmode,
13580 gen_rtvec (1, addr_location),
13581 UNSPEC_PLT);
13582 addr_location = gen_rtx_CONST (Pmode, addr_location);
13583 plt_call = true;
13585 else
13586 /* For -fpic code the PLT entries might use r12 which is
13587 call-saved. Therefore we cannot do a sibcall when
13588 calling directly using a symbol ref. When reaching
13589 this point we decided (in s390_function_ok_for_sibcall)
13590 to do a sibcall for a function pointer but one of the
13591 optimizers was able to get rid of the function pointer
13592 by propagating the symbol ref into the call. This
13593 optimization is illegal for S/390 so we turn the direct
13594 call into a indirect call again. */
13595 addr_location = force_reg (Pmode, addr_location);
13599 /* If it is already an indirect call or the code above moved the
13600 SYMBOL_REF to somewhere else make sure the address can be found in
13601 register 1. */
13602 if (retaddr_reg == NULL_RTX
13603 && GET_CODE (addr_location) != SYMBOL_REF
13604 && !plt_call)
13606 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
13607 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
13610 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13611 && GET_CODE (addr_location) != SYMBOL_REF
13612 && !plt_call)
13614 /* Indirect branch thunks require the target to be a single GPR. */
13615 addr_location = force_reg (Pmode, addr_location);
13617 /* Without exrl the indirect branch thunks need an additional
13618 register for larl;ex */
13619 if (!TARGET_CPU_Z10)
13621 *clobber_thunk_reg = gen_rtx_REG (Pmode, INDIRECT_BRANCH_THUNK_REGNUM);
13622 *clobber_thunk_reg = gen_rtx_CLOBBER (VOIDmode, *clobber_thunk_reg);
13626 addr_location = gen_rtx_MEM (QImode, addr_location);
13627 *call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
13629 if (result_reg != NULL_RTX)
13630 *call = gen_rtx_SET (result_reg, *call);
13632 if (retaddr_reg != NULL_RTX)
13634 *clobber_ret_reg = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
13636 if (tls_call != NULL_RTX)
13637 *use = gen_rtx_USE (VOIDmode, tls_call);
13641 for (i = 0; i < 4; i++)
13642 if (vec[i] != NULL_RTX)
13643 elts++;
13645 if (elts > 1)
13647 rtvec v;
13648 int e = 0;
13650 v = rtvec_alloc (elts);
13651 for (i = 0; i < 4; i++)
13652 if (vec[i] != NULL_RTX)
13654 RTVEC_ELT (v, e) = vec[i];
13655 e++;
13658 *call = gen_rtx_PARALLEL (VOIDmode, v);
13661 insn = emit_call_insn (*call);
13663 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
13664 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
13666 /* s390_function_ok_for_sibcall should
13667 have denied sibcalls in this case. */
13668 gcc_assert (retaddr_reg != NULL_RTX);
13669 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
13671 return insn;
13674 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
13676 static void
13677 s390_conditional_register_usage (void)
13679 int i;
13681 if (flag_pic)
13682 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13683 fixed_regs[BASE_REGNUM] = 0;
13684 fixed_regs[RETURN_REGNUM] = 0;
13685 if (TARGET_64BIT)
13687 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13688 call_used_regs[i] = 0;
13690 else
13692 call_used_regs[FPR4_REGNUM] = 0;
13693 call_used_regs[FPR6_REGNUM] = 0;
13696 if (TARGET_SOFT_FLOAT)
13698 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13699 fixed_regs[i] = 1;
13702 /* Disable v16 - v31 for non-vector target. */
13703 if (!TARGET_VX)
13705 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13706 fixed_regs[i] = call_used_regs[i] = 1;
13710 /* Corresponding function to eh_return expander. */
13712 static GTY(()) rtx s390_tpf_eh_return_symbol;
13713 void
13714 s390_emit_tpf_eh_return (rtx target)
13716 rtx_insn *insn;
13717 rtx reg, orig_ra;
13719 if (!s390_tpf_eh_return_symbol)
13720 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13722 reg = gen_rtx_REG (Pmode, 2);
13723 orig_ra = gen_rtx_REG (Pmode, 3);
13725 emit_move_insn (reg, target);
13726 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13727 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13728 gen_rtx_REG (Pmode, RETURN_REGNUM));
13729 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13730 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13732 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13735 /* Rework the prologue/epilogue to avoid saving/restoring
13736 registers unnecessarily. */
13738 static void
13739 s390_optimize_prologue (void)
13741 rtx_insn *insn, *new_insn, *next_insn;
13743 /* Do a final recompute of the frame-related data. */
13744 s390_optimize_register_info ();
13746 /* If all special registers are in fact used, there's nothing we
13747 can do, so no point in walking the insn list. */
13749 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13750 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM)
13751 return;
13753 /* Search for prologue/epilogue insns and replace them. */
13754 for (insn = get_insns (); insn; insn = next_insn)
13756 int first, last, off;
13757 rtx set, base, offset;
13758 rtx pat;
13760 next_insn = NEXT_INSN (insn);
13762 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13763 continue;
13765 pat = PATTERN (insn);
13767 /* Remove ldgr/lgdr instructions used for saving and restore
13768 GPRs if possible. */
13769 if (TARGET_Z10)
13771 rtx tmp_pat = pat;
13773 if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
13774 tmp_pat = XVECEXP (pat, 0, 0);
13776 if (GET_CODE (tmp_pat) == SET
13777 && GET_MODE (SET_SRC (tmp_pat)) == DImode
13778 && REG_P (SET_SRC (tmp_pat))
13779 && REG_P (SET_DEST (tmp_pat)))
13781 int src_regno = REGNO (SET_SRC (tmp_pat));
13782 int dest_regno = REGNO (SET_DEST (tmp_pat));
13783 int gpr_regno;
13784 int fpr_regno;
13786 if (!((GENERAL_REGNO_P (src_regno)
13787 && FP_REGNO_P (dest_regno))
13788 || (FP_REGNO_P (src_regno)
13789 && GENERAL_REGNO_P (dest_regno))))
13790 continue;
13792 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
13793 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
13795 /* GPR must be call-saved, FPR must be call-clobbered. */
13796 if (!call_used_regs[fpr_regno]
13797 || call_used_regs[gpr_regno])
13798 continue;
13800 /* It must not happen that what we once saved in an FPR now
13801 needs a stack slot. */
13802 gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
13804 if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
13806 remove_insn (insn);
13807 continue;
13812 if (GET_CODE (pat) == PARALLEL
13813 && store_multiple_operation (pat, VOIDmode))
13815 set = XVECEXP (pat, 0, 0);
13816 first = REGNO (SET_SRC (set));
13817 last = first + XVECLEN (pat, 0) - 1;
13818 offset = const0_rtx;
13819 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13820 off = INTVAL (offset);
13822 if (GET_CODE (base) != REG || off < 0)
13823 continue;
13824 if (cfun_frame_layout.first_save_gpr != -1
13825 && (cfun_frame_layout.first_save_gpr < first
13826 || cfun_frame_layout.last_save_gpr > last))
13827 continue;
13828 if (REGNO (base) != STACK_POINTER_REGNUM
13829 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13830 continue;
13831 if (first > BASE_REGNUM || last < BASE_REGNUM)
13832 continue;
13834 if (cfun_frame_layout.first_save_gpr != -1)
13836 rtx s_pat = save_gprs (base,
13837 off + (cfun_frame_layout.first_save_gpr
13838 - first) * UNITS_PER_LONG,
13839 cfun_frame_layout.first_save_gpr,
13840 cfun_frame_layout.last_save_gpr);
13841 new_insn = emit_insn_before (s_pat, insn);
13842 INSN_ADDRESSES_NEW (new_insn, -1);
13845 remove_insn (insn);
13846 continue;
13849 if (cfun_frame_layout.first_save_gpr == -1
13850 && GET_CODE (pat) == SET
13851 && GENERAL_REG_P (SET_SRC (pat))
13852 && GET_CODE (SET_DEST (pat)) == MEM)
13854 set = pat;
13855 first = REGNO (SET_SRC (set));
13856 offset = const0_rtx;
13857 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13858 off = INTVAL (offset);
13860 if (GET_CODE (base) != REG || off < 0)
13861 continue;
13862 if (REGNO (base) != STACK_POINTER_REGNUM
13863 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13864 continue;
13866 remove_insn (insn);
13867 continue;
13870 if (GET_CODE (pat) == PARALLEL
13871 && load_multiple_operation (pat, VOIDmode))
13873 set = XVECEXP (pat, 0, 0);
13874 first = REGNO (SET_DEST (set));
13875 last = first + XVECLEN (pat, 0) - 1;
13876 offset = const0_rtx;
13877 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13878 off = INTVAL (offset);
13880 if (GET_CODE (base) != REG || off < 0)
13881 continue;
13883 if (cfun_frame_layout.first_restore_gpr != -1
13884 && (cfun_frame_layout.first_restore_gpr < first
13885 || cfun_frame_layout.last_restore_gpr > last))
13886 continue;
13887 if (REGNO (base) != STACK_POINTER_REGNUM
13888 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13889 continue;
13890 if (first > BASE_REGNUM || last < BASE_REGNUM)
13891 continue;
13893 if (cfun_frame_layout.first_restore_gpr != -1)
13895 rtx rpat = restore_gprs (base,
13896 off + (cfun_frame_layout.first_restore_gpr
13897 - first) * UNITS_PER_LONG,
13898 cfun_frame_layout.first_restore_gpr,
13899 cfun_frame_layout.last_restore_gpr);
13901 /* Remove REG_CFA_RESTOREs for registers that we no
13902 longer need to save. */
13903 REG_NOTES (rpat) = REG_NOTES (insn);
13904 for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
13905 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
13906 && ((int) REGNO (XEXP (*ptr, 0))
13907 < cfun_frame_layout.first_restore_gpr))
13908 *ptr = XEXP (*ptr, 1);
13909 else
13910 ptr = &XEXP (*ptr, 1);
13911 new_insn = emit_insn_before (rpat, insn);
13912 RTX_FRAME_RELATED_P (new_insn) = 1;
13913 INSN_ADDRESSES_NEW (new_insn, -1);
13916 remove_insn (insn);
13917 continue;
13920 if (cfun_frame_layout.first_restore_gpr == -1
13921 && GET_CODE (pat) == SET
13922 && GENERAL_REG_P (SET_DEST (pat))
13923 && GET_CODE (SET_SRC (pat)) == MEM)
13925 set = pat;
13926 first = REGNO (SET_DEST (set));
13927 offset = const0_rtx;
13928 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13929 off = INTVAL (offset);
13931 if (GET_CODE (base) != REG || off < 0)
13932 continue;
13934 if (REGNO (base) != STACK_POINTER_REGNUM
13935 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13936 continue;
13938 remove_insn (insn);
13939 continue;
13944 /* On z10 and later the dynamic branch prediction must see the
13945 backward jump within a certain windows. If not it falls back to
13946 the static prediction. This function rearranges the loop backward
13947 branch in a way which makes the static prediction always correct.
13948 The function returns true if it added an instruction. */
13949 static bool
13950 s390_fix_long_loop_prediction (rtx_insn *insn)
13952 rtx set = single_set (insn);
13953 rtx code_label, label_ref;
13954 rtx_insn *uncond_jump;
13955 rtx_insn *cur_insn;
13956 rtx tmp;
13957 int distance;
13959 /* This will exclude branch on count and branch on index patterns
13960 since these are correctly statically predicted.
13962 The additional check for a PARALLEL is required here since
13963 single_set might be != NULL for PARALLELs where the set of the
13964 iteration variable is dead. */
13965 if (GET_CODE (PATTERN (insn)) == PARALLEL
13966 || !set
13967 || SET_DEST (set) != pc_rtx
13968 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
13969 return false;
13971 /* Skip conditional returns. */
13972 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
13973 && XEXP (SET_SRC (set), 2) == pc_rtx)
13974 return false;
13976 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
13977 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
13979 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
13981 code_label = XEXP (label_ref, 0);
13983 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
13984 || INSN_ADDRESSES (INSN_UID (insn)) == -1
13985 || (INSN_ADDRESSES (INSN_UID (insn))
13986 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
13987 return false;
13989 for (distance = 0, cur_insn = PREV_INSN (insn);
13990 distance < PREDICT_DISTANCE - 6;
13991 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
13992 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
13993 return false;
13995 rtx_code_label *new_label = gen_label_rtx ();
13996 uncond_jump = emit_jump_insn_after (
13997 gen_rtx_SET (pc_rtx,
13998 gen_rtx_LABEL_REF (VOIDmode, code_label)),
13999 insn);
14000 emit_label_after (new_label, uncond_jump);
14002 tmp = XEXP (SET_SRC (set), 1);
14003 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
14004 XEXP (SET_SRC (set), 2) = tmp;
14005 INSN_CODE (insn) = -1;
14007 XEXP (label_ref, 0) = new_label;
14008 JUMP_LABEL (insn) = new_label;
14009 JUMP_LABEL (uncond_jump) = code_label;
14011 return true;
14014 /* Returns 1 if INSN reads the value of REG for purposes not related
14015 to addressing of memory, and 0 otherwise. */
14016 static int
14017 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
14019 return reg_referenced_p (reg, PATTERN (insn))
14020 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
14023 /* Starting from INSN find_cond_jump looks downwards in the insn
14024 stream for a single jump insn which is the last user of the
14025 condition code set in INSN. */
14026 static rtx_insn *
14027 find_cond_jump (rtx_insn *insn)
14029 for (; insn; insn = NEXT_INSN (insn))
14031 rtx ite, cc;
14033 if (LABEL_P (insn))
14034 break;
14036 if (!JUMP_P (insn))
14038 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
14039 break;
14040 continue;
14043 /* This will be triggered by a return. */
14044 if (GET_CODE (PATTERN (insn)) != SET)
14045 break;
14047 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
14048 ite = SET_SRC (PATTERN (insn));
14050 if (GET_CODE (ite) != IF_THEN_ELSE)
14051 break;
14053 cc = XEXP (XEXP (ite, 0), 0);
14054 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
14055 break;
14057 if (find_reg_note (insn, REG_DEAD, cc))
14058 return insn;
14059 break;
14062 return NULL;
14065 /* Swap the condition in COND and the operands in OP0 and OP1 so that
14066 the semantics does not change. If NULL_RTX is passed as COND the
14067 function tries to find the conditional jump starting with INSN. */
14068 static void
14069 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
14071 rtx tmp = *op0;
14073 if (cond == NULL_RTX)
14075 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
14076 rtx set = jump ? single_set (jump) : NULL_RTX;
14078 if (set == NULL_RTX)
14079 return;
14081 cond = XEXP (SET_SRC (set), 0);
14084 *op0 = *op1;
14085 *op1 = tmp;
14086 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
14089 /* On z10, instructions of the compare-and-branch family have the
14090 property to access the register occurring as second operand with
14091 its bits complemented. If such a compare is grouped with a second
14092 instruction that accesses the same register non-complemented, and
14093 if that register's value is delivered via a bypass, then the
14094 pipeline recycles, thereby causing significant performance decline.
14095 This function locates such situations and exchanges the two
14096 operands of the compare. The function return true whenever it
14097 added an insn. */
14098 static bool
14099 s390_z10_optimize_cmp (rtx_insn *insn)
14101 rtx_insn *prev_insn, *next_insn;
14102 bool insn_added_p = false;
14103 rtx cond, *op0, *op1;
14105 if (GET_CODE (PATTERN (insn)) == PARALLEL)
14107 /* Handle compare and branch and branch on count
14108 instructions. */
14109 rtx pattern = single_set (insn);
14111 if (!pattern
14112 || SET_DEST (pattern) != pc_rtx
14113 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
14114 return false;
14116 cond = XEXP (SET_SRC (pattern), 0);
14117 op0 = &XEXP (cond, 0);
14118 op1 = &XEXP (cond, 1);
14120 else if (GET_CODE (PATTERN (insn)) == SET)
14122 rtx src, dest;
14124 /* Handle normal compare instructions. */
14125 src = SET_SRC (PATTERN (insn));
14126 dest = SET_DEST (PATTERN (insn));
14128 if (!REG_P (dest)
14129 || !CC_REGNO_P (REGNO (dest))
14130 || GET_CODE (src) != COMPARE)
14131 return false;
14133 /* s390_swap_cmp will try to find the conditional
14134 jump when passing NULL_RTX as condition. */
14135 cond = NULL_RTX;
14136 op0 = &XEXP (src, 0);
14137 op1 = &XEXP (src, 1);
14139 else
14140 return false;
14142 if (!REG_P (*op0) || !REG_P (*op1))
14143 return false;
14145 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
14146 return false;
14148 /* Swap the COMPARE arguments and its mask if there is a
14149 conflicting access in the previous insn. */
14150 prev_insn = prev_active_insn (insn);
14151 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14152 && reg_referenced_p (*op1, PATTERN (prev_insn)))
14153 s390_swap_cmp (cond, op0, op1, insn);
14155 /* Check if there is a conflict with the next insn. If there
14156 was no conflict with the previous insn, then swap the
14157 COMPARE arguments and its mask. If we already swapped
14158 the operands, or if swapping them would cause a conflict
14159 with the previous insn, issue a NOP after the COMPARE in
14160 order to separate the two instuctions. */
14161 next_insn = next_active_insn (insn);
14162 if (next_insn != NULL_RTX && INSN_P (next_insn)
14163 && s390_non_addr_reg_read_p (*op1, next_insn))
14165 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14166 && s390_non_addr_reg_read_p (*op0, prev_insn))
14168 if (REGNO (*op1) == 0)
14169 emit_insn_after (gen_nop_lr1 (), insn);
14170 else
14171 emit_insn_after (gen_nop_lr0 (), insn);
14172 insn_added_p = true;
14174 else
14175 s390_swap_cmp (cond, op0, op1, insn);
14177 return insn_added_p;
14180 /* Number of INSNs to be scanned backward in the last BB of the loop
14181 and forward in the first BB of the loop. This usually should be a
14182 bit more than the number of INSNs which could go into one
14183 group. */
14184 #define S390_OSC_SCAN_INSN_NUM 5
14186 /* Scan LOOP for static OSC collisions and return true if a osc_break
14187 should be issued for this loop. */
14188 static bool
14189 s390_adjust_loop_scan_osc (struct loop* loop)
14192 HARD_REG_SET modregs, newregs;
14193 rtx_insn *insn, *store_insn = NULL;
14194 rtx set;
14195 struct s390_address addr_store, addr_load;
14196 subrtx_iterator::array_type array;
14197 int insn_count;
14199 CLEAR_HARD_REG_SET (modregs);
14201 insn_count = 0;
14202 FOR_BB_INSNS_REVERSE (loop->latch, insn)
14204 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14205 continue;
14207 insn_count++;
14208 if (insn_count > S390_OSC_SCAN_INSN_NUM)
14209 return false;
14211 find_all_hard_reg_sets (insn, &newregs, true);
14212 modregs |= newregs;
14214 set = single_set (insn);
14215 if (!set)
14216 continue;
14218 if (MEM_P (SET_DEST (set))
14219 && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store))
14221 store_insn = insn;
14222 break;
14226 if (store_insn == NULL_RTX)
14227 return false;
14229 insn_count = 0;
14230 FOR_BB_INSNS (loop->header, insn)
14232 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14233 continue;
14235 if (insn == store_insn)
14236 return false;
14238 insn_count++;
14239 if (insn_count > S390_OSC_SCAN_INSN_NUM)
14240 return false;
14242 find_all_hard_reg_sets (insn, &newregs, true);
14243 modregs |= newregs;
14245 set = single_set (insn);
14246 if (!set)
14247 continue;
14249 /* An intermediate store disrupts static OSC checking
14250 anyway. */
14251 if (MEM_P (SET_DEST (set))
14252 && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL))
14253 return false;
14255 FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
14256 if (MEM_P (*iter)
14257 && s390_decompose_address (XEXP (*iter, 0), &addr_load)
14258 && rtx_equal_p (addr_load.base, addr_store.base)
14259 && rtx_equal_p (addr_load.indx, addr_store.indx)
14260 && rtx_equal_p (addr_load.disp, addr_store.disp))
14262 if ((addr_load.base != NULL_RTX
14263 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base)))
14264 || (addr_load.indx != NULL_RTX
14265 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx))))
14266 return true;
14269 return false;
14272 /* Look for adjustments which can be done on simple innermost
14273 loops. */
14274 static void
14275 s390_adjust_loops ()
14277 struct loop *loop = NULL;
14279 df_analyze ();
14280 compute_bb_for_insn ();
14282 /* Find the loops. */
14283 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
14285 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
14287 if (dump_file)
14289 flow_loop_dump (loop, dump_file, NULL, 0);
14290 fprintf (dump_file, ";; OSC loop scan Loop: ");
14292 if (loop->latch == NULL
14293 || pc_set (BB_END (loop->latch)) == NULL_RTX
14294 || !s390_adjust_loop_scan_osc (loop))
14296 if (dump_file)
14298 if (loop->latch == NULL)
14299 fprintf (dump_file, " muliple backward jumps\n");
14300 else
14302 fprintf (dump_file, " header insn: %d latch insn: %d ",
14303 INSN_UID (BB_HEAD (loop->header)),
14304 INSN_UID (BB_END (loop->latch)));
14305 if (pc_set (BB_END (loop->latch)) == NULL_RTX)
14306 fprintf (dump_file, " loop does not end with jump\n");
14307 else
14308 fprintf (dump_file, " not instrumented\n");
14312 else
14314 rtx_insn *new_insn;
14316 if (dump_file)
14317 fprintf (dump_file, " adding OSC break insn: ");
14318 new_insn = emit_insn_before (gen_osc_break (),
14319 BB_END (loop->latch));
14320 INSN_ADDRESSES_NEW (new_insn, -1);
14324 loop_optimizer_finalize ();
14326 df_finish_pass (false);
14329 /* Perform machine-dependent processing. */
14331 static void
14332 s390_reorg (void)
14334 struct constant_pool *pool;
14335 rtx_insn *insn;
14336 int hw_before, hw_after;
14338 if (s390_tune == PROCESSOR_2964_Z13)
14339 s390_adjust_loops ();
14341 /* Make sure all splits have been performed; splits after
14342 machine_dependent_reorg might confuse insn length counts. */
14343 split_all_insns_noflow ();
14345 /* Install the main literal pool and the associated base
14346 register load insns. The literal pool might be > 4096 bytes in
14347 size, so that some of its elements cannot be directly accessed.
14349 To fix this, we split the single literal pool into multiple
14350 pool chunks, reloading the pool base register at various
14351 points throughout the function to ensure it always points to
14352 the pool chunk the following code expects. */
14354 /* Collect the literal pool. */
14355 pool = s390_mainpool_start ();
14356 if (pool)
14358 /* Finish up literal pool related changes. */
14359 s390_mainpool_finish (pool);
14361 else
14363 /* If literal pool overflowed, chunkify it. */
14364 pool = s390_chunkify_start ();
14365 s390_chunkify_finish (pool);
14368 /* Generate out-of-pool execute target insns. */
14369 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14371 rtx label;
14372 rtx_insn *target;
14374 label = s390_execute_label (insn);
14375 if (!label)
14376 continue;
14378 gcc_assert (label != const0_rtx);
14380 target = emit_label (XEXP (label, 0));
14381 INSN_ADDRESSES_NEW (target, -1);
14383 if (JUMP_P (insn))
14385 target = emit_jump_insn (s390_execute_target (insn));
14386 /* This is important in order to keep a table jump
14387 pointing at the jump table label. Only this makes it
14388 being recognized as table jump. */
14389 JUMP_LABEL (target) = JUMP_LABEL (insn);
14391 else
14392 target = emit_insn (s390_execute_target (insn));
14393 INSN_ADDRESSES_NEW (target, -1);
14396 /* Try to optimize prologue and epilogue further. */
14397 s390_optimize_prologue ();
14399 /* Walk over the insns and do some >=z10 specific changes. */
14400 if (s390_tune >= PROCESSOR_2097_Z10)
14402 rtx_insn *insn;
14403 bool insn_added_p = false;
14405 /* The insn lengths and addresses have to be up to date for the
14406 following manipulations. */
14407 shorten_branches (get_insns ());
14409 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14411 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14412 continue;
14414 if (JUMP_P (insn))
14415 insn_added_p |= s390_fix_long_loop_prediction (insn);
14417 if ((GET_CODE (PATTERN (insn)) == PARALLEL
14418 || GET_CODE (PATTERN (insn)) == SET)
14419 && s390_tune == PROCESSOR_2097_Z10)
14420 insn_added_p |= s390_z10_optimize_cmp (insn);
14423 /* Adjust branches if we added new instructions. */
14424 if (insn_added_p)
14425 shorten_branches (get_insns ());
14428 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
14429 if (hw_after > 0)
14431 rtx_insn *insn;
14433 /* Insert NOPs for hotpatching. */
14434 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14435 /* Emit NOPs
14436 1. inside the area covered by debug information to allow setting
14437 breakpoints at the NOPs,
14438 2. before any insn which results in an asm instruction,
14439 3. before in-function labels to avoid jumping to the NOPs, for
14440 example as part of a loop,
14441 4. before any barrier in case the function is completely empty
14442 (__builtin_unreachable ()) and has neither internal labels nor
14443 active insns.
14445 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
14446 break;
14447 /* Output a series of NOPs before the first active insn. */
14448 while (insn && hw_after > 0)
14450 if (hw_after >= 3)
14452 emit_insn_before (gen_nop_6_byte (), insn);
14453 hw_after -= 3;
14455 else if (hw_after >= 2)
14457 emit_insn_before (gen_nop_4_byte (), insn);
14458 hw_after -= 2;
14460 else
14462 emit_insn_before (gen_nop_2_byte (), insn);
14463 hw_after -= 1;
14469 /* Return true if INSN is a fp load insn writing register REGNO. */
14470 static inline bool
14471 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
14473 rtx set;
14474 enum attr_type flag = s390_safe_attr_type (insn);
14476 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
14477 return false;
14479 set = single_set (insn);
14481 if (set == NULL_RTX)
14482 return false;
14484 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
14485 return false;
14487 if (REGNO (SET_DEST (set)) != regno)
14488 return false;
14490 return true;
14493 /* This value describes the distance to be avoided between an
14494 arithmetic fp instruction and an fp load writing the same register.
14495 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14496 fine but the exact value has to be avoided. Otherwise the FP
14497 pipeline will throw an exception causing a major penalty. */
14498 #define Z10_EARLYLOAD_DISTANCE 7
14500 /* Rearrange the ready list in order to avoid the situation described
14501 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
14502 moved to the very end of the ready list. */
14503 static void
14504 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
14506 unsigned int regno;
14507 int nready = *nready_p;
14508 rtx_insn *tmp;
14509 int i;
14510 rtx_insn *insn;
14511 rtx set;
14512 enum attr_type flag;
14513 int distance;
14515 /* Skip DISTANCE - 1 active insns. */
14516 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
14517 distance > 0 && insn != NULL_RTX;
14518 distance--, insn = prev_active_insn (insn))
14519 if (CALL_P (insn) || JUMP_P (insn))
14520 return;
14522 if (insn == NULL_RTX)
14523 return;
14525 set = single_set (insn);
14527 if (set == NULL_RTX || !REG_P (SET_DEST (set))
14528 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
14529 return;
14531 flag = s390_safe_attr_type (insn);
14533 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
14534 return;
14536 regno = REGNO (SET_DEST (set));
14537 i = nready - 1;
14539 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
14540 i--;
14542 if (!i)
14543 return;
14545 tmp = ready[i];
14546 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
14547 ready[0] = tmp;
14550 /* Returns TRUE if BB is entered via a fallthru edge and all other
14551 incoming edges are less than likely. */
14552 static bool
14553 s390_bb_fallthru_entry_likely (basic_block bb)
14555 edge e, fallthru_edge;
14556 edge_iterator ei;
14558 if (!bb)
14559 return false;
14561 fallthru_edge = find_fallthru_edge (bb->preds);
14562 if (!fallthru_edge)
14563 return false;
14565 FOR_EACH_EDGE (e, ei, bb->preds)
14566 if (e != fallthru_edge
14567 && e->probability >= profile_probability::likely ())
14568 return false;
14570 return true;
14573 struct s390_sched_state
14575 /* Number of insns in the group. */
14576 int group_state;
14577 /* Execution side of the group. */
14578 int side;
14579 /* Group can only hold two insns. */
14580 bool group_of_two;
14581 } s390_sched_state;
14583 static struct s390_sched_state sched_state = {0, 1, false};
14585 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
14586 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
14587 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
14588 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14589 #define S390_SCHED_ATTR_MASK_GROUPOFTWO 0x10
14591 static unsigned int
14592 s390_get_sched_attrmask (rtx_insn *insn)
14594 unsigned int mask = 0;
14596 switch (s390_tune)
14598 case PROCESSOR_2827_ZEC12:
14599 if (get_attr_zEC12_cracked (insn))
14600 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14601 if (get_attr_zEC12_expanded (insn))
14602 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14603 if (get_attr_zEC12_endgroup (insn))
14604 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14605 if (get_attr_zEC12_groupalone (insn))
14606 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14607 break;
14608 case PROCESSOR_2964_Z13:
14609 if (get_attr_z13_cracked (insn))
14610 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14611 if (get_attr_z13_expanded (insn))
14612 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14613 if (get_attr_z13_endgroup (insn))
14614 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14615 if (get_attr_z13_groupalone (insn))
14616 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14617 if (get_attr_z13_groupoftwo (insn))
14618 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14619 break;
14620 case PROCESSOR_3906_Z14:
14621 if (get_attr_z14_cracked (insn))
14622 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14623 if (get_attr_z14_expanded (insn))
14624 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14625 if (get_attr_z14_endgroup (insn))
14626 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14627 if (get_attr_z14_groupalone (insn))
14628 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14629 if (get_attr_z14_groupoftwo (insn))
14630 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14631 break;
14632 case PROCESSOR_8561_Z15:
14633 if (get_attr_z15_cracked (insn))
14634 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14635 if (get_attr_z15_expanded (insn))
14636 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14637 if (get_attr_z15_endgroup (insn))
14638 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14639 if (get_attr_z15_groupalone (insn))
14640 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14641 if (get_attr_z15_groupoftwo (insn))
14642 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14643 break;
14644 default:
14645 gcc_unreachable ();
14647 return mask;
14650 static unsigned int
14651 s390_get_unit_mask (rtx_insn *insn, int *units)
14653 unsigned int mask = 0;
14655 switch (s390_tune)
14657 case PROCESSOR_2964_Z13:
14658 *units = 4;
14659 if (get_attr_z13_unit_lsu (insn))
14660 mask |= 1 << 0;
14661 if (get_attr_z13_unit_fxa (insn))
14662 mask |= 1 << 1;
14663 if (get_attr_z13_unit_fxb (insn))
14664 mask |= 1 << 2;
14665 if (get_attr_z13_unit_vfu (insn))
14666 mask |= 1 << 3;
14667 break;
14668 case PROCESSOR_3906_Z14:
14669 *units = 4;
14670 if (get_attr_z14_unit_lsu (insn))
14671 mask |= 1 << 0;
14672 if (get_attr_z14_unit_fxa (insn))
14673 mask |= 1 << 1;
14674 if (get_attr_z14_unit_fxb (insn))
14675 mask |= 1 << 2;
14676 if (get_attr_z14_unit_vfu (insn))
14677 mask |= 1 << 3;
14678 break;
14679 case PROCESSOR_8561_Z15:
14680 *units = 4;
14681 if (get_attr_z15_unit_lsu (insn))
14682 mask |= 1 << 0;
14683 if (get_attr_z15_unit_fxa (insn))
14684 mask |= 1 << 1;
14685 if (get_attr_z15_unit_fxb (insn))
14686 mask |= 1 << 2;
14687 if (get_attr_z15_unit_vfu (insn))
14688 mask |= 1 << 3;
14689 break;
14690 default:
14691 gcc_unreachable ();
14693 return mask;
14696 static bool
14697 s390_is_fpd (rtx_insn *insn)
14699 if (insn == NULL_RTX)
14700 return false;
14702 return get_attr_z13_unit_fpd (insn) || get_attr_z14_unit_fpd (insn)
14703 || get_attr_z15_unit_fpd (insn);
14706 static bool
14707 s390_is_fxd (rtx_insn *insn)
14709 if (insn == NULL_RTX)
14710 return false;
14712 return get_attr_z13_unit_fxd (insn) || get_attr_z14_unit_fxd (insn)
14713 || get_attr_z15_unit_fxd (insn);
14716 /* Returns TRUE if INSN is a long-running instruction. */
14717 static bool
14718 s390_is_longrunning (rtx_insn *insn)
14720 if (insn == NULL_RTX)
14721 return false;
14723 return s390_is_fxd (insn) || s390_is_fpd (insn);
14727 /* Return the scheduling score for INSN. The higher the score the
14728 better. The score is calculated from the OOO scheduling attributes
14729 of INSN and the scheduling state sched_state. */
14730 static int
14731 s390_sched_score (rtx_insn *insn)
14733 unsigned int mask = s390_get_sched_attrmask (insn);
14734 int score = 0;
14736 switch (sched_state.group_state)
14738 case 0:
14739 /* Try to put insns into the first slot which would otherwise
14740 break a group. */
14741 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14742 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14743 score += 5;
14744 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14745 score += 10;
14746 break;
14747 case 1:
14748 /* Prefer not cracked insns while trying to put together a
14749 group. */
14750 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14751 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14752 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14753 score += 10;
14754 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
14755 score += 5;
14756 /* If we are in a group of two already, try to schedule another
14757 group-of-two insn to avoid shortening another group. */
14758 if (sched_state.group_of_two
14759 && (mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14760 score += 15;
14761 break;
14762 case 2:
14763 /* Prefer not cracked insns while trying to put together a
14764 group. */
14765 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14766 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14767 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14768 score += 10;
14769 /* Prefer endgroup insns in the last slot. */
14770 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14771 score += 10;
14772 /* Try to avoid group-of-two insns in the last slot as they will
14773 shorten this group as well as the next one. */
14774 if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14775 score = MAX (0, score - 15);
14776 break;
14779 if (s390_tune >= PROCESSOR_2964_Z13)
14781 int units, i;
14782 unsigned unit_mask, m = 1;
14784 unit_mask = s390_get_unit_mask (insn, &units);
14785 gcc_assert (units <= MAX_SCHED_UNITS);
14787 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14788 ago the last insn of this unit type got scheduled. This is
14789 supposed to help providing a proper instruction mix to the
14790 CPU. */
14791 for (i = 0; i < units; i++, m <<= 1)
14792 if (m & unit_mask)
14793 score += (last_scheduled_unit_distance[i][sched_state.side]
14794 * MAX_SCHED_MIX_SCORE / MAX_SCHED_MIX_DISTANCE);
14796 int other_side = 1 - sched_state.side;
14798 /* Try to delay long-running insns when side is busy. */
14799 if (s390_is_longrunning (insn))
14801 if (s390_is_fxd (insn))
14803 if (fxd_longrunning[sched_state.side]
14804 && fxd_longrunning[other_side]
14805 <= fxd_longrunning[sched_state.side])
14806 score = MAX (0, score - 10);
14808 else if (fxd_longrunning[other_side]
14809 >= fxd_longrunning[sched_state.side])
14810 score += 10;
14813 if (s390_is_fpd (insn))
14815 if (fpd_longrunning[sched_state.side]
14816 && fpd_longrunning[other_side]
14817 <= fpd_longrunning[sched_state.side])
14818 score = MAX (0, score - 10);
14820 else if (fpd_longrunning[other_side]
14821 >= fpd_longrunning[sched_state.side])
14822 score += 10;
14827 return score;
14830 /* This function is called via hook TARGET_SCHED_REORDER before
14831 issuing one insn from list READY which contains *NREADYP entries.
14832 For target z10 it reorders load instructions to avoid early load
14833 conflicts in the floating point pipeline */
14834 static int
14835 s390_sched_reorder (FILE *file, int verbose,
14836 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
14838 if (s390_tune == PROCESSOR_2097_Z10
14839 && reload_completed
14840 && *nreadyp > 1)
14841 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
14843 if (s390_tune >= PROCESSOR_2827_ZEC12
14844 && reload_completed
14845 && *nreadyp > 1)
14847 int i;
14848 int last_index = *nreadyp - 1;
14849 int max_index = -1;
14850 int max_score = -1;
14851 rtx_insn *tmp;
14853 /* Just move the insn with the highest score to the top (the
14854 end) of the list. A full sort is not needed since a conflict
14855 in the hazard recognition cannot happen. So the top insn in
14856 the ready list will always be taken. */
14857 for (i = last_index; i >= 0; i--)
14859 int score;
14861 if (recog_memoized (ready[i]) < 0)
14862 continue;
14864 score = s390_sched_score (ready[i]);
14865 if (score > max_score)
14867 max_score = score;
14868 max_index = i;
14872 if (max_index != -1)
14874 if (max_index != last_index)
14876 tmp = ready[max_index];
14877 ready[max_index] = ready[last_index];
14878 ready[last_index] = tmp;
14880 if (verbose > 5)
14881 fprintf (file,
14882 ";;\t\tBACKEND: move insn %d to the top of list\n",
14883 INSN_UID (ready[last_index]));
14885 else if (verbose > 5)
14886 fprintf (file,
14887 ";;\t\tBACKEND: best insn %d already on top\n",
14888 INSN_UID (ready[last_index]));
14891 if (verbose > 5)
14893 fprintf (file, "ready list ooo attributes - sched state: %d\n",
14894 sched_state.group_state);
14896 for (i = last_index; i >= 0; i--)
14898 unsigned int sched_mask;
14899 rtx_insn *insn = ready[i];
14901 if (recog_memoized (insn) < 0)
14902 continue;
14904 sched_mask = s390_get_sched_attrmask (insn);
14905 fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
14906 INSN_UID (insn),
14907 s390_sched_score (insn));
14908 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
14909 ((M) & sched_mask) ? #ATTR : "");
14910 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14911 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14912 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14913 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14914 #undef PRINT_SCHED_ATTR
14915 if (s390_tune >= PROCESSOR_2964_Z13)
14917 unsigned int unit_mask, m = 1;
14918 int units, j;
14920 unit_mask = s390_get_unit_mask (insn, &units);
14921 fprintf (file, "(units:");
14922 for (j = 0; j < units; j++, m <<= 1)
14923 if (m & unit_mask)
14924 fprintf (file, " u%d", j);
14925 fprintf (file, ")");
14927 fprintf (file, "\n");
14932 return s390_issue_rate ();
14936 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
14937 the scheduler has issued INSN. It stores the last issued insn into
14938 last_scheduled_insn in order to make it available for
14939 s390_sched_reorder. */
14940 static int
14941 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
14943 last_scheduled_insn = insn;
14945 bool ends_group = false;
14947 if (s390_tune >= PROCESSOR_2827_ZEC12
14948 && reload_completed
14949 && recog_memoized (insn) >= 0)
14951 unsigned int mask = s390_get_sched_attrmask (insn);
14953 if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14954 sched_state.group_of_two = true;
14956 /* If this is a group-of-two insn, we actually ended the last group
14957 and this insn is the first one of the new group. */
14958 if (sched_state.group_state == 2 && sched_state.group_of_two)
14960 sched_state.side = sched_state.side ? 0 : 1;
14961 sched_state.group_state = 0;
14964 /* Longrunning and side bookkeeping. */
14965 for (int i = 0; i < 2; i++)
14967 fxd_longrunning[i] = MAX (0, fxd_longrunning[i] - 1);
14968 fpd_longrunning[i] = MAX (0, fpd_longrunning[i] - 1);
14971 unsigned latency = insn_default_latency (insn);
14972 if (s390_is_longrunning (insn))
14974 if (s390_is_fxd (insn))
14975 fxd_longrunning[sched_state.side] = latency;
14976 else
14977 fpd_longrunning[sched_state.side] = latency;
14980 if (s390_tune >= PROCESSOR_2964_Z13)
14982 int units, i;
14983 unsigned unit_mask, m = 1;
14985 unit_mask = s390_get_unit_mask (insn, &units);
14986 gcc_assert (units <= MAX_SCHED_UNITS);
14988 for (i = 0; i < units; i++, m <<= 1)
14989 if (m & unit_mask)
14990 last_scheduled_unit_distance[i][sched_state.side] = 0;
14991 else if (last_scheduled_unit_distance[i][sched_state.side]
14992 < MAX_SCHED_MIX_DISTANCE)
14993 last_scheduled_unit_distance[i][sched_state.side]++;
14996 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14997 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0
14998 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0
14999 || (mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
15001 sched_state.group_state = 0;
15002 ends_group = true;
15004 else
15006 switch (sched_state.group_state)
15008 case 0:
15009 sched_state.group_state++;
15010 break;
15011 case 1:
15012 sched_state.group_state++;
15013 if (sched_state.group_of_two)
15015 sched_state.group_state = 0;
15016 ends_group = true;
15018 break;
15019 case 2:
15020 sched_state.group_state++;
15021 ends_group = true;
15022 break;
15026 if (verbose > 5)
15028 unsigned int sched_mask;
15030 sched_mask = s390_get_sched_attrmask (insn);
15032 fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
15033 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
15034 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
15035 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
15036 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
15037 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
15038 #undef PRINT_SCHED_ATTR
15040 if (s390_tune >= PROCESSOR_2964_Z13)
15042 unsigned int unit_mask, m = 1;
15043 int units, j;
15045 unit_mask = s390_get_unit_mask (insn, &units);
15046 fprintf (file, "(units:");
15047 for (j = 0; j < units; j++, m <<= 1)
15048 if (m & unit_mask)
15049 fprintf (file, " %d", j);
15050 fprintf (file, ")");
15052 fprintf (file, " sched state: %d\n", sched_state.group_state);
15054 if (s390_tune >= PROCESSOR_2964_Z13)
15056 int units, j;
15058 s390_get_unit_mask (insn, &units);
15060 fprintf (file, ";;\t\tBACKEND: units on this side unused for: ");
15061 for (j = 0; j < units; j++)
15062 fprintf (file, "%d:%d ", j,
15063 last_scheduled_unit_distance[j][sched_state.side]);
15064 fprintf (file, "\n");
15068 /* If this insn ended a group, the next will be on the other side. */
15069 if (ends_group)
15071 sched_state.group_state = 0;
15072 sched_state.side = sched_state.side ? 0 : 1;
15073 sched_state.group_of_two = false;
15077 if (GET_CODE (PATTERN (insn)) != USE
15078 && GET_CODE (PATTERN (insn)) != CLOBBER)
15079 return more - 1;
15080 else
15081 return more;
15084 static void
15085 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
15086 int verbose ATTRIBUTE_UNUSED,
15087 int max_ready ATTRIBUTE_UNUSED)
15089 /* If the next basic block is most likely entered via a fallthru edge
15090 we keep the last sched state. Otherwise we start a new group.
15091 The scheduler traverses basic blocks in "instruction stream" ordering
15092 so if we see a fallthru edge here, sched_state will be of its
15093 source block.
15095 current_sched_info->prev_head is the insn before the first insn of the
15096 block of insns to be scheduled.
15098 rtx_insn *insn = current_sched_info->prev_head
15099 ? NEXT_INSN (current_sched_info->prev_head) : NULL;
15100 basic_block bb = insn ? BLOCK_FOR_INSN (insn) : NULL;
15101 if (s390_tune < PROCESSOR_2964_Z13 || !s390_bb_fallthru_entry_likely (bb))
15103 last_scheduled_insn = NULL;
15104 memset (last_scheduled_unit_distance, 0,
15105 MAX_SCHED_UNITS * NUM_SIDES * sizeof (int));
15106 sched_state.group_state = 0;
15107 sched_state.group_of_two = false;
15111 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
15112 a new number struct loop *loop should be unrolled if tuned for cpus with
15113 a built-in stride prefetcher.
15114 The loop is analyzed for memory accesses by calling check_dpu for
15115 each rtx of the loop. Depending on the loop_depth and the amount of
15116 memory accesses a new number <=nunroll is returned to improve the
15117 behavior of the hardware prefetch unit. */
15118 static unsigned
15119 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
15121 basic_block *bbs;
15122 rtx_insn *insn;
15123 unsigned i;
15124 unsigned mem_count = 0;
15126 if (s390_tune < PROCESSOR_2097_Z10)
15127 return nunroll;
15129 /* Count the number of memory references within the loop body. */
15130 bbs = get_loop_body (loop);
15131 subrtx_iterator::array_type array;
15132 for (i = 0; i < loop->num_nodes; i++)
15133 FOR_BB_INSNS (bbs[i], insn)
15134 if (INSN_P (insn) && INSN_CODE (insn) != -1)
15136 rtx set;
15138 /* The runtime of small loops with memory block operations
15139 will be determined by the memory operation. Doing
15140 unrolling doesn't help here. Measurements to confirm
15141 this where only done on recent CPU levels. So better do
15142 not change anything for older CPUs. */
15143 if (s390_tune >= PROCESSOR_2964_Z13
15144 && loop->ninsns <= BLOCK_MEM_OPS_LOOP_INSNS
15145 && ((set = single_set (insn)) != NULL_RTX)
15146 && ((GET_MODE (SET_DEST (set)) == BLKmode
15147 && (GET_MODE (SET_SRC (set)) == BLKmode
15148 || SET_SRC (set) == const0_rtx))
15149 || (GET_CODE (SET_SRC (set)) == COMPARE
15150 && GET_MODE (XEXP (SET_SRC (set), 0)) == BLKmode
15151 && GET_MODE (XEXP (SET_SRC (set), 1)) == BLKmode)))
15152 return 1;
15154 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
15155 if (MEM_P (*iter))
15156 mem_count += 1;
15158 free (bbs);
15160 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
15161 if (mem_count == 0)
15162 return nunroll;
15164 switch (loop_depth(loop))
15166 case 1:
15167 return MIN (nunroll, 28 / mem_count);
15168 case 2:
15169 return MIN (nunroll, 22 / mem_count);
15170 default:
15171 return MIN (nunroll, 16 / mem_count);
15175 /* Restore the current options. This is a hook function and also called
15176 internally. */
15178 static void
15179 s390_function_specific_restore (struct gcc_options *opts,
15180 struct cl_target_option *ptr ATTRIBUTE_UNUSED)
15182 opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
15185 static void
15186 s390_default_align (struct gcc_options *opts)
15188 /* Set the default function alignment to 16 in order to get rid of
15189 some unwanted performance effects. */
15190 if (opts->x_flag_align_functions && !opts->x_str_align_functions
15191 && opts->x_s390_tune >= PROCESSOR_2964_Z13)
15192 opts->x_str_align_functions = "16";
15195 static void
15196 s390_override_options_after_change (void)
15198 s390_default_align (&global_options);
15201 static void
15202 s390_option_override_internal (struct gcc_options *opts,
15203 const struct gcc_options *opts_set)
15205 /* Architecture mode defaults according to ABI. */
15206 if (!(opts_set->x_target_flags & MASK_ZARCH))
15208 if (TARGET_64BIT)
15209 opts->x_target_flags |= MASK_ZARCH;
15210 else
15211 opts->x_target_flags &= ~MASK_ZARCH;
15214 /* Set the march default in case it hasn't been specified on cmdline. */
15215 if (!opts_set->x_s390_arch)
15216 opts->x_s390_arch = PROCESSOR_2064_Z900;
15218 opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
15220 /* Determine processor to tune for. */
15221 if (!opts_set->x_s390_tune)
15222 opts->x_s390_tune = opts->x_s390_arch;
15224 opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
15226 /* Sanity checks. */
15227 if (opts->x_s390_arch == PROCESSOR_NATIVE
15228 || opts->x_s390_tune == PROCESSOR_NATIVE)
15229 gcc_unreachable ();
15230 if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
15231 error ("64-bit ABI not supported in ESA/390 mode");
15233 if (opts->x_s390_indirect_branch == indirect_branch_thunk_inline
15234 || opts->x_s390_indirect_branch_call == indirect_branch_thunk_inline
15235 || opts->x_s390_function_return == indirect_branch_thunk_inline
15236 || opts->x_s390_function_return_reg == indirect_branch_thunk_inline
15237 || opts->x_s390_function_return_mem == indirect_branch_thunk_inline)
15238 error ("thunk-inline is only supported with %<-mindirect-branch-jump%>");
15240 if (opts->x_s390_indirect_branch != indirect_branch_keep)
15242 if (!opts_set->x_s390_indirect_branch_call)
15243 opts->x_s390_indirect_branch_call = opts->x_s390_indirect_branch;
15245 if (!opts_set->x_s390_indirect_branch_jump)
15246 opts->x_s390_indirect_branch_jump = opts->x_s390_indirect_branch;
15249 if (opts->x_s390_function_return != indirect_branch_keep)
15251 if (!opts_set->x_s390_function_return_reg)
15252 opts->x_s390_function_return_reg = opts->x_s390_function_return;
15254 if (!opts_set->x_s390_function_return_mem)
15255 opts->x_s390_function_return_mem = opts->x_s390_function_return;
15258 /* Enable hardware transactions if available and not explicitly
15259 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
15260 if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
15262 if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
15263 opts->x_target_flags |= MASK_OPT_HTM;
15264 else
15265 opts->x_target_flags &= ~MASK_OPT_HTM;
15268 if (TARGET_OPT_VX_P (opts_set->x_target_flags))
15270 if (TARGET_OPT_VX_P (opts->x_target_flags))
15272 if (!TARGET_CPU_VX_P (opts))
15273 error ("hardware vector support not available on %s",
15274 processor_table[(int)opts->x_s390_arch].name);
15275 if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15276 error ("hardware vector support not available with "
15277 "%<-msoft-float%>");
15280 else
15282 if (TARGET_CPU_VX_P (opts))
15283 /* Enable vector support if available and not explicitly disabled
15284 by user. E.g. with -m31 -march=z13 -mzarch */
15285 opts->x_target_flags |= MASK_OPT_VX;
15286 else
15287 opts->x_target_flags &= ~MASK_OPT_VX;
15290 /* Use hardware DFP if available and not explicitly disabled by
15291 user. E.g. with -m31 -march=z10 -mzarch */
15292 if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
15294 if (TARGET_DFP_P (opts))
15295 opts->x_target_flags |= MASK_HARD_DFP;
15296 else
15297 opts->x_target_flags &= ~MASK_HARD_DFP;
15300 if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
15302 if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
15304 if (!TARGET_CPU_DFP_P (opts))
15305 error ("hardware decimal floating point instructions"
15306 " not available on %s",
15307 processor_table[(int)opts->x_s390_arch].name);
15308 if (!TARGET_ZARCH_P (opts->x_target_flags))
15309 error ("hardware decimal floating point instructions"
15310 " not available in ESA/390 mode");
15312 else
15313 opts->x_target_flags &= ~MASK_HARD_DFP;
15316 if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
15317 && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15319 if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
15320 && TARGET_HARD_DFP_P (opts->x_target_flags))
15321 error ("%<-mhard-dfp%> can%'t be used in conjunction with "
15322 "%<-msoft-float%>");
15324 opts->x_target_flags &= ~MASK_HARD_DFP;
15327 if (TARGET_BACKCHAIN_P (opts->x_target_flags)
15328 && TARGET_PACKED_STACK_P (opts->x_target_flags)
15329 && TARGET_HARD_FLOAT_P (opts->x_target_flags))
15330 error ("%<-mbackchain%> %<-mpacked-stack%> %<-mhard-float%> are not "
15331 "supported in combination");
15333 if (opts->x_s390_stack_size)
15335 if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
15336 error ("stack size must be greater than the stack guard value");
15337 else if (opts->x_s390_stack_size > 1 << 16)
15338 error ("stack size must not be greater than 64k");
15340 else if (opts->x_s390_stack_guard)
15341 error ("%<-mstack-guard%> implies use of %<-mstack-size%>");
15343 /* Our implementation of the stack probe requires the probe interval
15344 to be used as displacement in an address operand. The maximum
15345 probe interval currently is 64k. This would exceed short
15346 displacements. Trim that value down to 4k if that happens. This
15347 might result in too many probes being generated only on the
15348 oldest supported machine level z900. */
15349 if (!DISP_IN_RANGE ((1 << param_stack_clash_protection_probe_interval)))
15350 param_stack_clash_protection_probe_interval = 12;
15352 #if TARGET_TPF != 0
15353 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_prologue_check))
15354 error ("-mtpf-trace-hook-prologue-check requires integer in range 0..4095");
15356 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_prologue_target))
15357 error ("-mtpf-trace-hook-prologue-target requires integer in range 0..4095");
15359 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_epilogue_check))
15360 error ("-mtpf-trace-hook-epilogue-check requires integer in range 0..4095");
15362 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_epilogue_target))
15363 error ("-mtpf-trace-hook-epilogue-target requires integer in range 0..4095");
15365 if (s390_tpf_trace_skip)
15367 opts->x_s390_tpf_trace_hook_prologue_target = TPF_TRACE_PROLOGUE_SKIP_TARGET;
15368 opts->x_s390_tpf_trace_hook_epilogue_target = TPF_TRACE_EPILOGUE_SKIP_TARGET;
15370 #endif
15372 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
15373 if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
15374 opts->x_target_flags |= MASK_LONG_DOUBLE_128;
15375 #endif
15377 if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
15379 SET_OPTION_IF_UNSET (opts, opts_set, param_max_unrolled_insns,
15380 100);
15381 SET_OPTION_IF_UNSET (opts, opts_set, param_max_unroll_times, 32);
15382 SET_OPTION_IF_UNSET (opts, opts_set, param_max_completely_peeled_insns,
15383 2000);
15384 SET_OPTION_IF_UNSET (opts, opts_set, param_max_completely_peel_times,
15385 64);
15388 SET_OPTION_IF_UNSET (opts, opts_set, param_max_pending_list_length,
15389 256);
15390 /* values for loop prefetching */
15391 SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_line_size, 256);
15392 SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_size, 128);
15393 /* s390 has more than 2 levels and the size is much larger. Since
15394 we are always running virtualized assume that we only get a small
15395 part of the caches above l1. */
15396 SET_OPTION_IF_UNSET (opts, opts_set, param_l2_cache_size, 1500);
15397 SET_OPTION_IF_UNSET (opts, opts_set,
15398 param_prefetch_min_insn_to_mem_ratio, 2);
15399 SET_OPTION_IF_UNSET (opts, opts_set, param_simultaneous_prefetches, 6);
15401 /* Use the alternative scheduling-pressure algorithm by default. */
15402 SET_OPTION_IF_UNSET (opts, opts_set, param_sched_pressure_algorithm, 2);
15403 SET_OPTION_IF_UNSET (opts, opts_set, param_min_vect_loop_bound, 2);
15405 /* Use aggressive inlining parameters. */
15406 if (opts->x_s390_tune >= PROCESSOR_2964_Z13)
15408 SET_OPTION_IF_UNSET (opts, opts_set, param_inline_min_speedup, 2);
15409 SET_OPTION_IF_UNSET (opts, opts_set, param_max_inline_insns_auto, 80);
15412 /* Set the default alignment. */
15413 s390_default_align (opts);
15415 /* Call target specific restore function to do post-init work. At the moment,
15416 this just sets opts->x_s390_cost_pointer. */
15417 s390_function_specific_restore (opts, NULL);
15419 /* Check whether -mfentry is supported. It cannot be used in 31-bit mode,
15420 because 31-bit PLT stubs assume that %r12 contains GOT address, which is
15421 not the case when the code runs before the prolog. */
15422 if (opts->x_flag_fentry && !TARGET_64BIT)
15423 error ("%<-mfentry%> is supported only for 64-bit CPUs");
15426 static void
15427 s390_option_override (void)
15429 unsigned int i;
15430 cl_deferred_option *opt;
15431 vec<cl_deferred_option> *v =
15432 (vec<cl_deferred_option> *) s390_deferred_options;
15434 if (v)
15435 FOR_EACH_VEC_ELT (*v, i, opt)
15437 switch (opt->opt_index)
15439 case OPT_mhotpatch_:
15441 int val1;
15442 int val2;
15443 char *s = strtok (ASTRDUP (opt->arg), ",");
15444 char *t = strtok (NULL, "\0");
15446 if (t != NULL)
15448 val1 = integral_argument (s);
15449 val2 = integral_argument (t);
15451 else
15453 val1 = -1;
15454 val2 = -1;
15456 if (val1 == -1 || val2 == -1)
15458 /* argument is not a plain number */
15459 error ("arguments to %qs should be non-negative integers",
15460 "-mhotpatch=n,m");
15461 break;
15463 else if (val1 > s390_hotpatch_hw_max
15464 || val2 > s390_hotpatch_hw_max)
15466 error ("argument to %qs is too large (max. %d)",
15467 "-mhotpatch=n,m", s390_hotpatch_hw_max);
15468 break;
15470 s390_hotpatch_hw_before_label = val1;
15471 s390_hotpatch_hw_after_label = val2;
15472 break;
15474 default:
15475 gcc_unreachable ();
15479 /* Set up function hooks. */
15480 init_machine_status = s390_init_machine_status;
15482 s390_option_override_internal (&global_options, &global_options_set);
15484 /* Save the initial options in case the user does function specific
15485 options. */
15486 target_option_default_node = build_target_option_node (&global_options);
15487 target_option_current_node = target_option_default_node;
15489 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
15490 requires the arch flags to be evaluated already. Since prefetching
15491 is beneficial on s390, we enable it if available. */
15492 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
15493 flag_prefetch_loop_arrays = 1;
15495 if (!s390_pic_data_is_text_relative && !flag_pic)
15496 error ("%<-mno-pic-data-is-text-relative%> cannot be used without "
15497 "%<-fpic%>/%<-fPIC%>");
15499 if (TARGET_TPF)
15501 /* Don't emit DWARF3/4 unless specifically selected. The TPF
15502 debuggers do not yet support DWARF 3/4. */
15503 if (!global_options_set.x_dwarf_strict)
15504 dwarf_strict = 1;
15505 if (!global_options_set.x_dwarf_version)
15506 dwarf_version = 2;
15510 #if S390_USE_TARGET_ATTRIBUTE
15511 /* Inner function to process the attribute((target(...))), take an argument and
15512 set the current options from the argument. If we have a list, recursively go
15513 over the list. */
15515 static bool
15516 s390_valid_target_attribute_inner_p (tree args,
15517 struct gcc_options *opts,
15518 struct gcc_options *new_opts_set,
15519 bool force_pragma)
15521 char *next_optstr;
15522 bool ret = true;
15524 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
15525 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
15526 static const struct
15528 const char *string;
15529 size_t len;
15530 int opt;
15531 int has_arg;
15532 int only_as_pragma;
15533 } attrs[] = {
15534 /* enum options */
15535 S390_ATTRIB ("arch=", OPT_march_, 1),
15536 S390_ATTRIB ("tune=", OPT_mtune_, 1),
15537 /* uinteger options */
15538 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
15539 S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
15540 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
15541 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
15542 /* flag options */
15543 S390_ATTRIB ("backchain", OPT_mbackchain, 0),
15544 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
15545 S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
15546 S390_ATTRIB ("htm", OPT_mhtm, 0),
15547 S390_ATTRIB ("vx", OPT_mvx, 0),
15548 S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
15549 S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
15550 S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
15551 S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
15552 S390_PRAGMA ("zvector", OPT_mzvector, 0),
15553 /* boolean options */
15554 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
15556 #undef S390_ATTRIB
15557 #undef S390_PRAGMA
15559 /* If this is a list, recurse to get the options. */
15560 if (TREE_CODE (args) == TREE_LIST)
15562 bool ret = true;
15563 int num_pragma_values;
15564 int i;
15566 /* Note: attribs.c:decl_attributes prepends the values from
15567 current_target_pragma to the list of target attributes. To determine
15568 whether we're looking at a value of the attribute or the pragma we
15569 assume that the first [list_length (current_target_pragma)] values in
15570 the list are the values from the pragma. */
15571 num_pragma_values = (!force_pragma && current_target_pragma != NULL)
15572 ? list_length (current_target_pragma) : 0;
15573 for (i = 0; args; args = TREE_CHAIN (args), i++)
15575 bool is_pragma;
15577 is_pragma = (force_pragma || i < num_pragma_values);
15578 if (TREE_VALUE (args)
15579 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
15580 opts, new_opts_set,
15581 is_pragma))
15583 ret = false;
15586 return ret;
15589 else if (TREE_CODE (args) != STRING_CST)
15591 error ("attribute %<target%> argument not a string");
15592 return false;
15595 /* Handle multiple arguments separated by commas. */
15596 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
15598 while (next_optstr && *next_optstr != '\0')
15600 char *p = next_optstr;
15601 char *orig_p = p;
15602 char *comma = strchr (next_optstr, ',');
15603 size_t len, opt_len;
15604 int opt;
15605 bool opt_set_p;
15606 char ch;
15607 unsigned i;
15608 int mask = 0;
15609 enum cl_var_type var_type;
15610 bool found;
15612 if (comma)
15614 *comma = '\0';
15615 len = comma - next_optstr;
15616 next_optstr = comma + 1;
15618 else
15620 len = strlen (p);
15621 next_optstr = NULL;
15624 /* Recognize no-xxx. */
15625 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
15627 opt_set_p = false;
15628 p += 3;
15629 len -= 3;
15631 else
15632 opt_set_p = true;
15634 /* Find the option. */
15635 ch = *p;
15636 found = false;
15637 for (i = 0; i < ARRAY_SIZE (attrs); i++)
15639 opt_len = attrs[i].len;
15640 if (ch == attrs[i].string[0]
15641 && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
15642 && memcmp (p, attrs[i].string, opt_len) == 0)
15644 opt = attrs[i].opt;
15645 if (!opt_set_p && cl_options[opt].cl_reject_negative)
15646 continue;
15647 mask = cl_options[opt].var_value;
15648 var_type = cl_options[opt].var_type;
15649 found = true;
15650 break;
15654 /* Process the option. */
15655 if (!found)
15657 error ("attribute(target(\"%s\")) is unknown", orig_p);
15658 return false;
15660 else if (attrs[i].only_as_pragma && !force_pragma)
15662 /* Value is not allowed for the target attribute. */
15663 error ("value %qs is not supported by attribute %<target%>",
15664 attrs[i].string);
15665 return false;
15668 else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
15670 if (var_type == CLVC_BIT_CLEAR)
15671 opt_set_p = !opt_set_p;
15673 if (opt_set_p)
15674 opts->x_target_flags |= mask;
15675 else
15676 opts->x_target_flags &= ~mask;
15677 new_opts_set->x_target_flags |= mask;
15680 else if (cl_options[opt].var_type == CLVC_BOOLEAN)
15682 int value;
15684 if (cl_options[opt].cl_uinteger)
15686 /* Unsigned integer argument. Code based on the function
15687 decode_cmdline_option () in opts-common.c. */
15688 value = integral_argument (p + opt_len);
15690 else
15691 value = (opt_set_p) ? 1 : 0;
15693 if (value != -1)
15695 struct cl_decoded_option decoded;
15697 /* Value range check; only implemented for numeric and boolean
15698 options at the moment. */
15699 generate_option (opt, NULL, value, CL_TARGET, &decoded);
15700 s390_handle_option (opts, new_opts_set, &decoded, input_location);
15701 set_option (opts, new_opts_set, opt, value,
15702 p + opt_len, DK_UNSPECIFIED, input_location,
15703 global_dc);
15705 else
15707 error ("attribute(target(\"%s\")) is unknown", orig_p);
15708 ret = false;
15712 else if (cl_options[opt].var_type == CLVC_ENUM)
15714 bool arg_ok;
15715 int value;
15717 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
15718 if (arg_ok)
15719 set_option (opts, new_opts_set, opt, value,
15720 p + opt_len, DK_UNSPECIFIED, input_location,
15721 global_dc);
15722 else
15724 error ("attribute(target(\"%s\")) is unknown", orig_p);
15725 ret = false;
15729 else
15730 gcc_unreachable ();
15732 return ret;
15735 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
15737 tree
15738 s390_valid_target_attribute_tree (tree args,
15739 struct gcc_options *opts,
15740 const struct gcc_options *opts_set,
15741 bool force_pragma)
15743 tree t = NULL_TREE;
15744 struct gcc_options new_opts_set;
15746 memset (&new_opts_set, 0, sizeof (new_opts_set));
15748 /* Process each of the options on the chain. */
15749 if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
15750 force_pragma))
15751 return error_mark_node;
15753 /* If some option was set (even if it has not changed), rerun
15754 s390_option_override_internal, and then save the options away. */
15755 if (new_opts_set.x_target_flags
15756 || new_opts_set.x_s390_arch
15757 || new_opts_set.x_s390_tune
15758 || new_opts_set.x_s390_stack_guard
15759 || new_opts_set.x_s390_stack_size
15760 || new_opts_set.x_s390_branch_cost
15761 || new_opts_set.x_s390_warn_framesize
15762 || new_opts_set.x_s390_warn_dynamicstack_p)
15764 const unsigned char *src = (const unsigned char *)opts_set;
15765 unsigned char *dest = (unsigned char *)&new_opts_set;
15766 unsigned int i;
15768 /* Merge the original option flags into the new ones. */
15769 for (i = 0; i < sizeof(*opts_set); i++)
15770 dest[i] |= src[i];
15772 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
15773 s390_option_override_internal (opts, &new_opts_set);
15774 /* Save the current options unless we are validating options for
15775 #pragma. */
15776 t = build_target_option_node (opts);
15778 return t;
15781 /* Hook to validate attribute((target("string"))). */
15783 static bool
15784 s390_valid_target_attribute_p (tree fndecl,
15785 tree ARG_UNUSED (name),
15786 tree args,
15787 int ARG_UNUSED (flags))
15789 struct gcc_options func_options;
15790 tree new_target, new_optimize;
15791 bool ret = true;
15793 /* attribute((target("default"))) does nothing, beyond
15794 affecting multi-versioning. */
15795 if (TREE_VALUE (args)
15796 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
15797 && TREE_CHAIN (args) == NULL_TREE
15798 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
15799 return true;
15801 tree old_optimize = build_optimization_node (&global_options);
15803 /* Get the optimization options of the current function. */
15804 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
15806 if (!func_optimize)
15807 func_optimize = old_optimize;
15809 /* Init func_options. */
15810 memset (&func_options, 0, sizeof (func_options));
15811 init_options_struct (&func_options, NULL);
15812 lang_hooks.init_options_struct (&func_options);
15814 cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize));
15816 /* Initialize func_options to the default before its target options can
15817 be set. */
15818 cl_target_option_restore (&func_options,
15819 TREE_TARGET_OPTION (target_option_default_node));
15821 new_target = s390_valid_target_attribute_tree (args, &func_options,
15822 &global_options_set,
15823 (args ==
15824 current_target_pragma));
15825 new_optimize = build_optimization_node (&func_options);
15826 if (new_target == error_mark_node)
15827 ret = false;
15828 else if (fndecl && new_target)
15830 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
15831 if (old_optimize != new_optimize)
15832 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
15834 return ret;
15837 /* Hook to determine if one function can safely inline another. */
15839 static bool
15840 s390_can_inline_p (tree caller, tree callee)
15842 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
15843 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
15845 if (!callee_tree)
15846 callee_tree = target_option_default_node;
15847 if (!caller_tree)
15848 caller_tree = target_option_default_node;
15849 if (callee_tree == caller_tree)
15850 return true;
15852 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
15853 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
15854 bool ret = true;
15856 if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))
15857 != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)))
15858 ret = false;
15860 /* Don't inline functions to be compiled for a more recent arch into a
15861 function for an older arch. */
15862 else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
15863 ret = false;
15865 /* Inlining a hard float function into a soft float function is only
15866 allowed if the hard float function doesn't actually make use of
15867 floating point.
15869 We are called from FEs for multi-versioning call optimization, so
15870 beware of ipa_fn_summaries not available. */
15871 else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags)
15872 && !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags))
15873 || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags)
15874 && TARGET_HARD_DFP_P (callee_opts->x_target_flags)))
15875 && (! ipa_fn_summaries
15876 || ipa_fn_summaries->get
15877 (cgraph_node::get (callee))->fp_expressions))
15878 ret = false;
15880 return ret;
15882 #endif
15884 /* Set VAL to correct enum value according to the indirect-branch or
15885 function-return attribute in ATTR. */
15887 static inline void
15888 s390_indirect_branch_attrvalue (tree attr, enum indirect_branch *val)
15890 const char *str = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
15891 if (strcmp (str, "keep") == 0)
15892 *val = indirect_branch_keep;
15893 else if (strcmp (str, "thunk") == 0)
15894 *val = indirect_branch_thunk;
15895 else if (strcmp (str, "thunk-inline") == 0)
15896 *val = indirect_branch_thunk_inline;
15897 else if (strcmp (str, "thunk-extern") == 0)
15898 *val = indirect_branch_thunk_extern;
15901 /* Memorize the setting for -mindirect-branch* and -mfunction-return*
15902 from either the cmdline or the function attributes in
15903 cfun->machine. */
15905 static void
15906 s390_indirect_branch_settings (tree fndecl)
15908 tree attr;
15910 if (!fndecl)
15911 return;
15913 /* Initialize with the cmdline options and let the attributes
15914 override it. */
15915 cfun->machine->indirect_branch_jump = s390_indirect_branch_jump;
15916 cfun->machine->indirect_branch_call = s390_indirect_branch_call;
15918 cfun->machine->function_return_reg = s390_function_return_reg;
15919 cfun->machine->function_return_mem = s390_function_return_mem;
15921 if ((attr = lookup_attribute ("indirect_branch",
15922 DECL_ATTRIBUTES (fndecl))))
15924 s390_indirect_branch_attrvalue (attr,
15925 &cfun->machine->indirect_branch_jump);
15926 s390_indirect_branch_attrvalue (attr,
15927 &cfun->machine->indirect_branch_call);
15930 if ((attr = lookup_attribute ("indirect_branch_jump",
15931 DECL_ATTRIBUTES (fndecl))))
15932 s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_jump);
15934 if ((attr = lookup_attribute ("indirect_branch_call",
15935 DECL_ATTRIBUTES (fndecl))))
15936 s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_call);
15938 if ((attr = lookup_attribute ("function_return",
15939 DECL_ATTRIBUTES (fndecl))))
15941 s390_indirect_branch_attrvalue (attr,
15942 &cfun->machine->function_return_reg);
15943 s390_indirect_branch_attrvalue (attr,
15944 &cfun->machine->function_return_mem);
15947 if ((attr = lookup_attribute ("function_return_reg",
15948 DECL_ATTRIBUTES (fndecl))))
15949 s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_reg);
15951 if ((attr = lookup_attribute ("function_return_mem",
15952 DECL_ATTRIBUTES (fndecl))))
15953 s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_mem);
15956 #if S390_USE_TARGET_ATTRIBUTE
15957 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
15958 cache. */
15960 void
15961 s390_activate_target_options (tree new_tree)
15963 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
15964 if (TREE_TARGET_GLOBALS (new_tree))
15965 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
15966 else if (new_tree == target_option_default_node)
15967 restore_target_globals (&default_target_globals);
15968 else
15969 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
15970 s390_previous_fndecl = NULL_TREE;
15972 #endif
15974 /* Establish appropriate back-end context for processing the function
15975 FNDECL. The argument might be NULL to indicate processing at top
15976 level, outside of any function scope. */
15977 static void
15978 s390_set_current_function (tree fndecl)
15980 #if S390_USE_TARGET_ATTRIBUTE
15981 /* Only change the context if the function changes. This hook is called
15982 several times in the course of compiling a function, and we don't want to
15983 slow things down too much or call target_reinit when it isn't safe. */
15984 if (fndecl == s390_previous_fndecl)
15986 s390_indirect_branch_settings (fndecl);
15987 return;
15990 tree old_tree;
15991 if (s390_previous_fndecl == NULL_TREE)
15992 old_tree = target_option_current_node;
15993 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
15994 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
15995 else
15996 old_tree = target_option_default_node;
15998 if (fndecl == NULL_TREE)
16000 if (old_tree != target_option_current_node)
16001 s390_activate_target_options (target_option_current_node);
16002 return;
16005 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
16006 if (new_tree == NULL_TREE)
16007 new_tree = target_option_default_node;
16009 if (old_tree != new_tree)
16010 s390_activate_target_options (new_tree);
16011 s390_previous_fndecl = fndecl;
16012 #endif
16013 s390_indirect_branch_settings (fndecl);
16016 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
16018 static bool
16019 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
16020 unsigned int align ATTRIBUTE_UNUSED,
16021 enum by_pieces_operation op ATTRIBUTE_UNUSED,
16022 bool speed_p ATTRIBUTE_UNUSED)
16024 return (size == 1 || size == 2
16025 || size == 4 || (TARGET_ZARCH && size == 8));
16028 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
16030 static void
16031 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
16033 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
16034 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
16035 tree call_efpc = build_call_expr (efpc, 0);
16036 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
16038 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
16039 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
16040 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
16041 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
16042 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
16043 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
16045 /* Generates the equivalent of feholdexcept (&fenv_var)
16047 fenv_var = __builtin_s390_efpc ();
16048 __builtin_s390_sfpc (fenv_var & mask) */
16049 tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
16050 tree new_fpc =
16051 build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
16052 build_int_cst (unsigned_type_node,
16053 ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
16054 FPC_EXCEPTION_MASK)));
16055 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
16056 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
16058 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
16060 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
16061 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
16062 build_int_cst (unsigned_type_node,
16063 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
16064 *clear = build_call_expr (sfpc, 1, new_fpc);
16066 /* Generates the equivalent of feupdateenv (fenv_var)
16068 old_fpc = __builtin_s390_efpc ();
16069 __builtin_s390_sfpc (fenv_var);
16070 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
16072 old_fpc = create_tmp_var_raw (unsigned_type_node);
16073 tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
16074 old_fpc, call_efpc);
16076 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
16078 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
16079 build_int_cst (unsigned_type_node,
16080 FPC_FLAGS_MASK));
16081 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
16082 build_int_cst (unsigned_type_node,
16083 FPC_FLAGS_SHIFT));
16084 tree atomic_feraiseexcept
16085 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
16086 raise_old_except = build_call_expr (atomic_feraiseexcept,
16087 1, raise_old_except);
16089 *update = build2 (COMPOUND_EXPR, void_type_node,
16090 build2 (COMPOUND_EXPR, void_type_node,
16091 store_old_fpc, set_new_fpc),
16092 raise_old_except);
16094 #undef FPC_EXCEPTION_MASK
16095 #undef FPC_FLAGS_MASK
16096 #undef FPC_DXC_MASK
16097 #undef FPC_EXCEPTION_MASK_SHIFT
16098 #undef FPC_FLAGS_SHIFT
16099 #undef FPC_DXC_SHIFT
16102 /* Return the vector mode to be used for inner mode MODE when doing
16103 vectorization. */
16104 static machine_mode
16105 s390_preferred_simd_mode (scalar_mode mode)
16107 if (TARGET_VXE)
16108 switch (mode)
16110 case E_SFmode:
16111 return V4SFmode;
16112 default:;
16115 if (TARGET_VX)
16116 switch (mode)
16118 case E_DFmode:
16119 return V2DFmode;
16120 case E_DImode:
16121 return V2DImode;
16122 case E_SImode:
16123 return V4SImode;
16124 case E_HImode:
16125 return V8HImode;
16126 case E_QImode:
16127 return V16QImode;
16128 default:;
16130 return word_mode;
16133 /* Our hardware does not require vectors to be strictly aligned. */
16134 static bool
16135 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
16136 const_tree type ATTRIBUTE_UNUSED,
16137 int misalignment ATTRIBUTE_UNUSED,
16138 bool is_packed ATTRIBUTE_UNUSED)
16140 if (TARGET_VX)
16141 return true;
16143 return default_builtin_support_vector_misalignment (mode, type, misalignment,
16144 is_packed);
16147 /* The vector ABI requires vector types to be aligned on an 8 byte
16148 boundary (our stack alignment). However, we allow this to be
16149 overriden by the user, while this definitely breaks the ABI. */
16150 static HOST_WIDE_INT
16151 s390_vector_alignment (const_tree type)
16153 tree size = TYPE_SIZE (type);
16155 if (!TARGET_VX_ABI)
16156 return default_vector_alignment (type);
16158 if (TYPE_USER_ALIGN (type))
16159 return TYPE_ALIGN (type);
16161 if (tree_fits_uhwi_p (size)
16162 && tree_to_uhwi (size) < BIGGEST_ALIGNMENT)
16163 return tree_to_uhwi (size);
16165 return BIGGEST_ALIGNMENT;
16168 /* Implement TARGET_CONSTANT_ALIGNMENT. Alignment on even addresses for
16169 LARL instruction. */
16171 static HOST_WIDE_INT
16172 s390_constant_alignment (const_tree, HOST_WIDE_INT align)
16174 return MAX (align, 16);
16177 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16178 /* Implement TARGET_ASM_FILE_START. */
16179 static void
16180 s390_asm_file_start (void)
16182 default_file_start ();
16183 s390_asm_output_machine_for_arch (asm_out_file);
16185 #endif
16187 /* Implement TARGET_ASM_FILE_END. */
16188 static void
16189 s390_asm_file_end (void)
16191 #ifdef HAVE_AS_GNU_ATTRIBUTE
16192 varpool_node *vnode;
16193 cgraph_node *cnode;
16195 FOR_EACH_VARIABLE (vnode)
16196 if (TREE_PUBLIC (vnode->decl))
16197 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
16199 FOR_EACH_FUNCTION (cnode)
16200 if (TREE_PUBLIC (cnode->decl))
16201 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
16204 if (s390_vector_abi != 0)
16205 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
16206 s390_vector_abi);
16207 #endif
16208 file_end_indicate_exec_stack ();
16210 if (flag_split_stack)
16211 file_end_indicate_split_stack ();
16214 /* Return true if TYPE is a vector bool type. */
16215 static inline bool
16216 s390_vector_bool_type_p (const_tree type)
16218 return TYPE_VECTOR_OPAQUE (type);
16221 /* Return the diagnostic message string if the binary operation OP is
16222 not permitted on TYPE1 and TYPE2, NULL otherwise. */
16223 static const char*
16224 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
16226 bool bool1_p, bool2_p;
16227 bool plusminus_p;
16228 bool muldiv_p;
16229 bool compare_p;
16230 machine_mode mode1, mode2;
16232 if (!TARGET_ZVECTOR)
16233 return NULL;
16235 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
16236 return NULL;
16238 bool1_p = s390_vector_bool_type_p (type1);
16239 bool2_p = s390_vector_bool_type_p (type2);
16241 /* Mixing signed and unsigned types is forbidden for all
16242 operators. */
16243 if (!bool1_p && !bool2_p
16244 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
16245 return N_("types differ in signedness");
16247 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
16248 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
16249 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
16250 || op == ROUND_DIV_EXPR);
16251 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
16252 || op == EQ_EXPR || op == NE_EXPR);
16254 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
16255 return N_("binary operator does not support two vector bool operands");
16257 if (bool1_p != bool2_p && (muldiv_p || compare_p))
16258 return N_("binary operator does not support vector bool operand");
16260 mode1 = TYPE_MODE (type1);
16261 mode2 = TYPE_MODE (type2);
16263 if (bool1_p != bool2_p && plusminus_p
16264 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
16265 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
16266 return N_("binary operator does not support mixing vector "
16267 "bool with floating point vector operands");
16269 return NULL;
16272 /* Implement TARGET_C_EXCESS_PRECISION.
16274 FIXME: For historical reasons, float_t and double_t are typedef'ed to
16275 double on s390, causing operations on float_t to operate in a higher
16276 precision than is necessary. However, it is not the case that SFmode
16277 operations have implicit excess precision, and we generate more optimal
16278 code if we let the compiler know no implicit extra precision is added.
16280 That means when we are compiling with -fexcess-precision=fast, the value
16281 we set for FLT_EVAL_METHOD will be out of line with the actual precision of
16282 float_t (though they would be correct for -fexcess-precision=standard).
16284 A complete fix would modify glibc to remove the unnecessary typedef
16285 of float_t to double. */
16287 static enum flt_eval_method
16288 s390_excess_precision (enum excess_precision_type type)
16290 switch (type)
16292 case EXCESS_PRECISION_TYPE_IMPLICIT:
16293 case EXCESS_PRECISION_TYPE_FAST:
16294 /* The fastest type to promote to will always be the native type,
16295 whether that occurs with implicit excess precision or
16296 otherwise. */
16297 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
16298 case EXCESS_PRECISION_TYPE_STANDARD:
16299 /* Otherwise, when we are in a standards compliant mode, to
16300 ensure consistency with the implementation in glibc, report that
16301 float is evaluated to the range and precision of double. */
16302 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE;
16303 default:
16304 gcc_unreachable ();
16306 return FLT_EVAL_METHOD_UNPREDICTABLE;
16309 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
16311 static unsigned HOST_WIDE_INT
16312 s390_asan_shadow_offset (void)
16314 return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000);
16317 #ifdef HAVE_GAS_HIDDEN
16318 # define USE_HIDDEN_LINKONCE 1
16319 #else
16320 # define USE_HIDDEN_LINKONCE 0
16321 #endif
16323 /* Output an indirect branch trampoline for target register REGNO. */
16325 static void
16326 s390_output_indirect_thunk_function (unsigned int regno, bool z10_p)
16328 tree decl;
16329 char thunk_label[32];
16330 int i;
16332 if (z10_p)
16333 sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL, regno);
16334 else
16335 sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX,
16336 INDIRECT_BRANCH_THUNK_REGNUM, regno);
16338 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
16339 get_identifier (thunk_label),
16340 build_function_type_list (void_type_node, NULL_TREE));
16341 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
16342 NULL_TREE, void_type_node);
16343 TREE_PUBLIC (decl) = 1;
16344 TREE_STATIC (decl) = 1;
16345 DECL_IGNORED_P (decl) = 1;
16347 if (USE_HIDDEN_LINKONCE)
16349 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
16351 targetm.asm_out.unique_section (decl, 0);
16352 switch_to_section (get_named_section (decl, NULL, 0));
16354 targetm.asm_out.globalize_label (asm_out_file, thunk_label);
16355 fputs ("\t.hidden\t", asm_out_file);
16356 assemble_name (asm_out_file, thunk_label);
16357 putc ('\n', asm_out_file);
16358 ASM_DECLARE_FUNCTION_NAME (asm_out_file, thunk_label, decl);
16360 else
16362 switch_to_section (text_section);
16363 ASM_OUTPUT_LABEL (asm_out_file, thunk_label);
16366 DECL_INITIAL (decl) = make_node (BLOCK);
16367 current_function_decl = decl;
16368 allocate_struct_function (decl, false);
16369 init_function_start (decl);
16370 cfun->is_thunk = true;
16371 first_function_block_is_cold = false;
16372 final_start_function (emit_barrier (), asm_out_file, 1);
16374 /* This makes CFI at least usable for indirect jumps.
16376 Stopping in the thunk: backtrace will point to the thunk target
16377 is if it was interrupted by a signal. For a call this means that
16378 the call chain will be: caller->callee->thunk */
16379 if (flag_asynchronous_unwind_tables && flag_dwarf2_cfi_asm)
16381 fputs ("\t.cfi_signal_frame\n", asm_out_file);
16382 fprintf (asm_out_file, "\t.cfi_return_column %d\n", regno);
16383 for (i = 0; i < FPR15_REGNUM; i++)
16384 fprintf (asm_out_file, "\t.cfi_same_value %s\n", reg_names[i]);
16387 if (z10_p)
16389 /* exrl 0,1f */
16391 /* We generate a thunk for z10 compiled code although z10 is
16392 currently not enabled. Tell the assembler to accept the
16393 instruction. */
16394 if (!TARGET_CPU_Z10)
16396 fputs ("\t.machine push\n", asm_out_file);
16397 fputs ("\t.machine z10\n", asm_out_file);
16399 /* We use exrl even if -mzarch hasn't been specified on the
16400 command line so we have to tell the assembler to accept
16401 it. */
16402 if (!TARGET_ZARCH)
16403 fputs ("\t.machinemode zarch\n", asm_out_file);
16405 fputs ("\texrl\t0,1f\n", asm_out_file);
16407 if (!TARGET_ZARCH)
16408 fputs ("\t.machinemode esa\n", asm_out_file);
16410 if (!TARGET_CPU_Z10)
16411 fputs ("\t.machine pop\n", asm_out_file);
16413 else
16415 /* larl %r1,1f */
16416 fprintf (asm_out_file, "\tlarl\t%%r%d,1f\n",
16417 INDIRECT_BRANCH_THUNK_REGNUM);
16419 /* ex 0,0(%r1) */
16420 fprintf (asm_out_file, "\tex\t0,0(%%r%d)\n",
16421 INDIRECT_BRANCH_THUNK_REGNUM);
16424 /* 0: j 0b */
16425 fputs ("0:\tj\t0b\n", asm_out_file);
16427 /* 1: br <regno> */
16428 fprintf (asm_out_file, "1:\tbr\t%%r%d\n", regno);
16430 final_end_function ();
16431 init_insn_lengths ();
16432 free_after_compilation (cfun);
16433 set_cfun (NULL);
16434 current_function_decl = NULL;
16437 /* Implement the asm.code_end target hook. */
16439 static void
16440 s390_code_end (void)
16442 int i;
16444 for (i = 1; i < 16; i++)
16446 if (indirect_branch_z10thunk_mask & (1 << i))
16447 s390_output_indirect_thunk_function (i, true);
16449 if (indirect_branch_prez10thunk_mask & (1 << i))
16450 s390_output_indirect_thunk_function (i, false);
16453 if (TARGET_INDIRECT_BRANCH_TABLE)
16455 int o;
16456 int i;
16458 for (o = 0; o < INDIRECT_BRANCH_NUM_OPTIONS; o++)
16460 if (indirect_branch_table_label_no[o] == 0)
16461 continue;
16463 switch_to_section (get_section (indirect_branch_table_name[o],
16465 NULL_TREE));
16466 for (i = 0; i < indirect_branch_table_label_no[o]; i++)
16468 char label_start[32];
16470 ASM_GENERATE_INTERNAL_LABEL (label_start,
16471 indirect_branch_table_label[o], i);
16473 fputs ("\t.long\t", asm_out_file);
16474 assemble_name_raw (asm_out_file, label_start);
16475 fputs ("-.\n", asm_out_file);
16477 switch_to_section (current_function_section ());
16482 /* Implement the TARGET_CASE_VALUES_THRESHOLD target hook. */
16484 unsigned int
16485 s390_case_values_threshold (void)
16487 /* Disabling branch prediction for indirect jumps makes jump tables
16488 much more expensive. */
16489 if (TARGET_INDIRECT_BRANCH_NOBP_JUMP)
16490 return 20;
16492 return default_case_values_threshold ();
16495 /* Evaluate the insns between HEAD and TAIL and do back-end to install
16496 back-end specific dependencies.
16498 Establish an ANTI dependency between r11 and r15 restores from FPRs
16499 to prevent the instructions scheduler from reordering them since
16500 this would break CFI. No further handling in the sched_reorder
16501 hook is required since the r11 and r15 restore will never appear in
16502 the same ready list with that change. */
16503 void
16504 s390_sched_dependencies_evaluation (rtx_insn *head, rtx_insn *tail)
16506 if (!frame_pointer_needed || !epilogue_completed)
16507 return;
16509 while (head != tail && DEBUG_INSN_P (head))
16510 head = NEXT_INSN (head);
16512 rtx_insn *r15_restore = NULL, *r11_restore = NULL;
16514 for (rtx_insn *insn = tail; insn != head; insn = PREV_INSN (insn))
16516 rtx set = single_set (insn);
16517 if (!INSN_P (insn)
16518 || !RTX_FRAME_RELATED_P (insn)
16519 || set == NULL_RTX
16520 || !REG_P (SET_DEST (set))
16521 || !FP_REG_P (SET_SRC (set)))
16522 continue;
16524 if (REGNO (SET_DEST (set)) == HARD_FRAME_POINTER_REGNUM)
16525 r11_restore = insn;
16527 if (REGNO (SET_DEST (set)) == STACK_POINTER_REGNUM)
16528 r15_restore = insn;
16531 if (r11_restore == NULL || r15_restore == NULL)
16532 return;
16533 add_dependence (r11_restore, r15_restore, REG_DEP_ANTI);
16536 /* Implement TARGET_SHIFT_TRUNCATION_MASK for integer shifts. */
16538 static unsigned HOST_WIDE_INT
16539 s390_shift_truncation_mask (machine_mode mode)
16541 return mode == DImode || mode == SImode ? 63 : 0;
16544 /* Initialize GCC target structure. */
16546 #undef TARGET_ASM_ALIGNED_HI_OP
16547 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
16548 #undef TARGET_ASM_ALIGNED_DI_OP
16549 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
16550 #undef TARGET_ASM_INTEGER
16551 #define TARGET_ASM_INTEGER s390_assemble_integer
16553 #undef TARGET_ASM_OPEN_PAREN
16554 #define TARGET_ASM_OPEN_PAREN ""
16556 #undef TARGET_ASM_CLOSE_PAREN
16557 #define TARGET_ASM_CLOSE_PAREN ""
16559 #undef TARGET_OPTION_OVERRIDE
16560 #define TARGET_OPTION_OVERRIDE s390_option_override
16562 #ifdef TARGET_THREAD_SSP_OFFSET
16563 #undef TARGET_STACK_PROTECT_GUARD
16564 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
16565 #endif
16567 #undef TARGET_ENCODE_SECTION_INFO
16568 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
16570 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16571 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16573 #ifdef HAVE_AS_TLS
16574 #undef TARGET_HAVE_TLS
16575 #define TARGET_HAVE_TLS true
16576 #endif
16577 #undef TARGET_CANNOT_FORCE_CONST_MEM
16578 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
16580 #undef TARGET_DELEGITIMIZE_ADDRESS
16581 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
16583 #undef TARGET_LEGITIMIZE_ADDRESS
16584 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
16586 #undef TARGET_RETURN_IN_MEMORY
16587 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
16589 #undef TARGET_INIT_BUILTINS
16590 #define TARGET_INIT_BUILTINS s390_init_builtins
16591 #undef TARGET_EXPAND_BUILTIN
16592 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
16593 #undef TARGET_BUILTIN_DECL
16594 #define TARGET_BUILTIN_DECL s390_builtin_decl
16596 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
16597 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
16599 #undef TARGET_ASM_OUTPUT_MI_THUNK
16600 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
16601 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
16602 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
16604 #undef TARGET_C_EXCESS_PRECISION
16605 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
16607 #undef TARGET_SCHED_ADJUST_PRIORITY
16608 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
16609 #undef TARGET_SCHED_ISSUE_RATE
16610 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
16611 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
16612 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
16614 #undef TARGET_SCHED_VARIABLE_ISSUE
16615 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
16616 #undef TARGET_SCHED_REORDER
16617 #define TARGET_SCHED_REORDER s390_sched_reorder
16618 #undef TARGET_SCHED_INIT
16619 #define TARGET_SCHED_INIT s390_sched_init
16621 #undef TARGET_CANNOT_COPY_INSN_P
16622 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
16623 #undef TARGET_RTX_COSTS
16624 #define TARGET_RTX_COSTS s390_rtx_costs
16625 #undef TARGET_ADDRESS_COST
16626 #define TARGET_ADDRESS_COST s390_address_cost
16627 #undef TARGET_REGISTER_MOVE_COST
16628 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
16629 #undef TARGET_MEMORY_MOVE_COST
16630 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
16631 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
16632 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
16633 s390_builtin_vectorization_cost
16635 #undef TARGET_MACHINE_DEPENDENT_REORG
16636 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
16638 #undef TARGET_VALID_POINTER_MODE
16639 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
16641 #undef TARGET_BUILD_BUILTIN_VA_LIST
16642 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
16643 #undef TARGET_EXPAND_BUILTIN_VA_START
16644 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
16645 #undef TARGET_ASAN_SHADOW_OFFSET
16646 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
16647 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
16648 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
16650 #undef TARGET_PROMOTE_FUNCTION_MODE
16651 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
16652 #undef TARGET_PASS_BY_REFERENCE
16653 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
16655 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
16656 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE s390_override_options_after_change
16658 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
16659 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
16660 #undef TARGET_FUNCTION_ARG
16661 #define TARGET_FUNCTION_ARG s390_function_arg
16662 #undef TARGET_FUNCTION_ARG_ADVANCE
16663 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
16664 #undef TARGET_FUNCTION_ARG_PADDING
16665 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
16666 #undef TARGET_FUNCTION_VALUE
16667 #define TARGET_FUNCTION_VALUE s390_function_value
16668 #undef TARGET_LIBCALL_VALUE
16669 #define TARGET_LIBCALL_VALUE s390_libcall_value
16670 #undef TARGET_STRICT_ARGUMENT_NAMING
16671 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
16673 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
16674 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
16676 #undef TARGET_FIXED_CONDITION_CODE_REGS
16677 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
16679 #undef TARGET_CC_MODES_COMPATIBLE
16680 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
16682 #undef TARGET_INVALID_WITHIN_DOLOOP
16683 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
16685 #ifdef HAVE_AS_TLS
16686 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
16687 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
16688 #endif
16690 #undef TARGET_DWARF_FRAME_REG_MODE
16691 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
16693 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
16694 #undef TARGET_MANGLE_TYPE
16695 #define TARGET_MANGLE_TYPE s390_mangle_type
16696 #endif
16698 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16699 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16701 #undef TARGET_VECTOR_MODE_SUPPORTED_P
16702 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
16704 #undef TARGET_PREFERRED_RELOAD_CLASS
16705 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
16707 #undef TARGET_SECONDARY_RELOAD
16708 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
16709 #undef TARGET_SECONDARY_MEMORY_NEEDED
16710 #define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed
16711 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
16712 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode
16714 #undef TARGET_LIBGCC_CMP_RETURN_MODE
16715 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
16717 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
16718 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
16720 #undef TARGET_LEGITIMATE_ADDRESS_P
16721 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
16723 #undef TARGET_LEGITIMATE_CONSTANT_P
16724 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
16726 #undef TARGET_LRA_P
16727 #define TARGET_LRA_P s390_lra_p
16729 #undef TARGET_CAN_ELIMINATE
16730 #define TARGET_CAN_ELIMINATE s390_can_eliminate
16732 #undef TARGET_CONDITIONAL_REGISTER_USAGE
16733 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
16735 #undef TARGET_LOOP_UNROLL_ADJUST
16736 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
16738 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
16739 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
16740 #undef TARGET_TRAMPOLINE_INIT
16741 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
16743 /* PR 79421 */
16744 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
16745 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
16747 #undef TARGET_UNWIND_WORD_MODE
16748 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
16750 #undef TARGET_CANONICALIZE_COMPARISON
16751 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
16753 #undef TARGET_HARD_REGNO_SCRATCH_OK
16754 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
16756 #undef TARGET_HARD_REGNO_NREGS
16757 #define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs
16758 #undef TARGET_HARD_REGNO_MODE_OK
16759 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
16760 #undef TARGET_MODES_TIEABLE_P
16761 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
16763 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
16764 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
16765 s390_hard_regno_call_part_clobbered
16767 #undef TARGET_ATTRIBUTE_TABLE
16768 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
16770 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
16771 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
16773 #undef TARGET_SET_UP_BY_PROLOGUE
16774 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
16776 #undef TARGET_EXTRA_LIVE_ON_ENTRY
16777 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
16779 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
16780 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
16781 s390_use_by_pieces_infrastructure_p
16783 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
16784 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
16786 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
16787 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
16789 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
16790 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
16792 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
16793 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
16795 #undef TARGET_VECTOR_ALIGNMENT
16796 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
16798 #undef TARGET_INVALID_BINARY_OP
16799 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
16801 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16802 #undef TARGET_ASM_FILE_START
16803 #define TARGET_ASM_FILE_START s390_asm_file_start
16804 #endif
16806 #undef TARGET_ASM_FILE_END
16807 #define TARGET_ASM_FILE_END s390_asm_file_end
16809 #undef TARGET_SET_CURRENT_FUNCTION
16810 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
16812 #if S390_USE_TARGET_ATTRIBUTE
16813 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
16814 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
16816 #undef TARGET_CAN_INLINE_P
16817 #define TARGET_CAN_INLINE_P s390_can_inline_p
16818 #endif
16820 #undef TARGET_OPTION_RESTORE
16821 #define TARGET_OPTION_RESTORE s390_function_specific_restore
16823 #undef TARGET_CAN_CHANGE_MODE_CLASS
16824 #define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class
16826 #undef TARGET_CONSTANT_ALIGNMENT
16827 #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
16829 #undef TARGET_ASM_CODE_END
16830 #define TARGET_ASM_CODE_END s390_code_end
16832 #undef TARGET_CASE_VALUES_THRESHOLD
16833 #define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold
16835 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
16836 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
16837 s390_sched_dependencies_evaluation
16839 #undef TARGET_SHIFT_TRUNCATION_MASK
16840 #define TARGET_SHIFT_TRUNCATION_MASK s390_shift_truncation_mask
16842 /* Use only short displacement, since long displacement is not available for
16843 the floating point instructions. */
16844 #undef TARGET_MAX_ANCHOR_OFFSET
16845 #define TARGET_MAX_ANCHOR_OFFSET 0xfff
16847 struct gcc_target targetm = TARGET_INITIALIZER;
16849 #include "gt-s390.h"