params 2 and 3 of reg_set_between_p
[official-gcc.git] / gcc / config / alpha / alpha.c
blob1fdde29009ebbc13f368c44333c0ed00055abd5c
1 /* Subroutines used for code generation on the DEC Alpha.
2 Copyright (C) 1992-2014 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "calls.h"
30 #include "varasm.h"
31 #include "regs.h"
32 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "conditions.h"
35 #include "output.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "recog.h"
39 #include "expr.h"
40 #include "optabs.h"
41 #include "reload.h"
42 #include "obstack.h"
43 #include "except.h"
44 #include "function.h"
45 #include "diagnostic-core.h"
46 #include "ggc.h"
47 #include "tm_p.h"
48 #include "target.h"
49 #include "target-def.h"
50 #include "common/common-target.h"
51 #include "debug.h"
52 #include "langhooks.h"
53 #include "splay-tree.h"
54 #include "hash-table.h"
55 #include "vec.h"
56 #include "basic-block.h"
57 #include "tree-ssa-alias.h"
58 #include "internal-fn.h"
59 #include "gimple-fold.h"
60 #include "tree-eh.h"
61 #include "gimple-expr.h"
62 #include "is-a.h"
63 #include "gimple.h"
64 #include "tree-pass.h"
65 #include "context.h"
66 #include "pass_manager.h"
67 #include "gimple-iterator.h"
68 #include "gimplify.h"
69 #include "gimple-ssa.h"
70 #include "stringpool.h"
71 #include "tree-ssanames.h"
72 #include "tree-stdarg.h"
73 #include "tm-constrs.h"
74 #include "df.h"
75 #include "libfuncs.h"
76 #include "opts.h"
77 #include "params.h"
78 #include "builtins.h"
80 /* Specify which cpu to schedule for. */
81 enum processor_type alpha_tune;
83 /* Which cpu we're generating code for. */
84 enum processor_type alpha_cpu;
86 static const char * const alpha_cpu_name[] =
88 "ev4", "ev5", "ev6"
91 /* Specify how accurate floating-point traps need to be. */
93 enum alpha_trap_precision alpha_tp;
95 /* Specify the floating-point rounding mode. */
97 enum alpha_fp_rounding_mode alpha_fprm;
99 /* Specify which things cause traps. */
101 enum alpha_fp_trap_mode alpha_fptm;
103 /* Nonzero if inside of a function, because the Alpha asm can't
104 handle .files inside of functions. */
106 static int inside_function = FALSE;
108 /* The number of cycles of latency we should assume on memory reads. */
110 int alpha_memory_latency = 3;
112 /* Whether the function needs the GP. */
114 static int alpha_function_needs_gp;
116 /* The assembler name of the current function. */
118 static const char *alpha_fnname;
120 /* The next explicit relocation sequence number. */
121 extern GTY(()) int alpha_next_sequence_number;
122 int alpha_next_sequence_number = 1;
124 /* The literal and gpdisp sequence numbers for this insn, as printed
125 by %# and %* respectively. */
126 extern GTY(()) int alpha_this_literal_sequence_number;
127 extern GTY(()) int alpha_this_gpdisp_sequence_number;
128 int alpha_this_literal_sequence_number;
129 int alpha_this_gpdisp_sequence_number;
131 /* Costs of various operations on the different architectures. */
133 struct alpha_rtx_cost_data
135 unsigned char fp_add;
136 unsigned char fp_mult;
137 unsigned char fp_div_sf;
138 unsigned char fp_div_df;
139 unsigned char int_mult_si;
140 unsigned char int_mult_di;
141 unsigned char int_shift;
142 unsigned char int_cmov;
143 unsigned short int_div;
146 static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] =
148 { /* EV4 */
149 COSTS_N_INSNS (6), /* fp_add */
150 COSTS_N_INSNS (6), /* fp_mult */
151 COSTS_N_INSNS (34), /* fp_div_sf */
152 COSTS_N_INSNS (63), /* fp_div_df */
153 COSTS_N_INSNS (23), /* int_mult_si */
154 COSTS_N_INSNS (23), /* int_mult_di */
155 COSTS_N_INSNS (2), /* int_shift */
156 COSTS_N_INSNS (2), /* int_cmov */
157 COSTS_N_INSNS (97), /* int_div */
159 { /* EV5 */
160 COSTS_N_INSNS (4), /* fp_add */
161 COSTS_N_INSNS (4), /* fp_mult */
162 COSTS_N_INSNS (15), /* fp_div_sf */
163 COSTS_N_INSNS (22), /* fp_div_df */
164 COSTS_N_INSNS (8), /* int_mult_si */
165 COSTS_N_INSNS (12), /* int_mult_di */
166 COSTS_N_INSNS (1) + 1, /* int_shift */
167 COSTS_N_INSNS (1), /* int_cmov */
168 COSTS_N_INSNS (83), /* int_div */
170 { /* EV6 */
171 COSTS_N_INSNS (4), /* fp_add */
172 COSTS_N_INSNS (4), /* fp_mult */
173 COSTS_N_INSNS (12), /* fp_div_sf */
174 COSTS_N_INSNS (15), /* fp_div_df */
175 COSTS_N_INSNS (7), /* int_mult_si */
176 COSTS_N_INSNS (7), /* int_mult_di */
177 COSTS_N_INSNS (1), /* int_shift */
178 COSTS_N_INSNS (2), /* int_cmov */
179 COSTS_N_INSNS (86), /* int_div */
183 /* Similar but tuned for code size instead of execution latency. The
184 extra +N is fractional cost tuning based on latency. It's used to
185 encourage use of cheaper insns like shift, but only if there's just
186 one of them. */
188 static struct alpha_rtx_cost_data const alpha_rtx_cost_size =
190 COSTS_N_INSNS (1), /* fp_add */
191 COSTS_N_INSNS (1), /* fp_mult */
192 COSTS_N_INSNS (1), /* fp_div_sf */
193 COSTS_N_INSNS (1) + 1, /* fp_div_df */
194 COSTS_N_INSNS (1) + 1, /* int_mult_si */
195 COSTS_N_INSNS (1) + 2, /* int_mult_di */
196 COSTS_N_INSNS (1), /* int_shift */
197 COSTS_N_INSNS (1), /* int_cmov */
198 COSTS_N_INSNS (6), /* int_div */
201 /* Get the number of args of a function in one of two ways. */
202 #if TARGET_ABI_OPEN_VMS
203 #define NUM_ARGS crtl->args.info.num_args
204 #else
205 #define NUM_ARGS crtl->args.info
206 #endif
208 #define REG_PV 27
209 #define REG_RA 26
211 /* Declarations of static functions. */
212 static struct machine_function *alpha_init_machine_status (void);
213 static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
214 static void alpha_handle_trap_shadows (void);
215 static void alpha_align_insns (void);
217 #if TARGET_ABI_OPEN_VMS
218 static void alpha_write_linkage (FILE *, const char *);
219 static bool vms_valid_pointer_mode (enum machine_mode);
220 #else
221 #define vms_patch_builtins() gcc_unreachable()
222 #endif
224 static unsigned int
225 rest_of_handle_trap_shadows (void)
227 alpha_handle_trap_shadows ();
228 return 0;
231 namespace {
233 const pass_data pass_data_handle_trap_shadows =
235 RTL_PASS,
236 "trap_shadows", /* name */
237 OPTGROUP_NONE, /* optinfo_flags */
238 TV_NONE, /* tv_id */
239 0, /* properties_required */
240 0, /* properties_provided */
241 0, /* properties_destroyed */
242 0, /* todo_flags_start */
243 TODO_df_finish, /* todo_flags_finish */
246 class pass_handle_trap_shadows : public rtl_opt_pass
248 public:
249 pass_handle_trap_shadows(gcc::context *ctxt)
250 : rtl_opt_pass(pass_data_handle_trap_shadows, ctxt)
253 /* opt_pass methods: */
254 virtual bool gate (function *)
256 return alpha_tp != ALPHA_TP_PROG || flag_exceptions;
259 virtual unsigned int execute (function *)
261 return rest_of_handle_trap_shadows ();
264 }; // class pass_handle_trap_shadows
266 } // anon namespace
268 rtl_opt_pass *
269 make_pass_handle_trap_shadows (gcc::context *ctxt)
271 return new pass_handle_trap_shadows (ctxt);
274 static unsigned int
275 rest_of_align_insns (void)
277 alpha_align_insns ();
278 return 0;
281 namespace {
283 const pass_data pass_data_align_insns =
285 RTL_PASS,
286 "align_insns", /* name */
287 OPTGROUP_NONE, /* optinfo_flags */
288 TV_NONE, /* tv_id */
289 0, /* properties_required */
290 0, /* properties_provided */
291 0, /* properties_destroyed */
292 0, /* todo_flags_start */
293 TODO_df_finish, /* todo_flags_finish */
296 class pass_align_insns : public rtl_opt_pass
298 public:
299 pass_align_insns(gcc::context *ctxt)
300 : rtl_opt_pass(pass_data_align_insns, ctxt)
303 /* opt_pass methods: */
304 virtual bool gate (function *)
306 /* Due to the number of extra trapb insns, don't bother fixing up
307 alignment when trap precision is instruction. Moreover, we can
308 only do our job when sched2 is run. */
309 return ((alpha_tune == PROCESSOR_EV4
310 || alpha_tune == PROCESSOR_EV5)
311 && optimize && !optimize_size
312 && alpha_tp != ALPHA_TP_INSN
313 && flag_schedule_insns_after_reload);
316 virtual unsigned int execute (function *)
318 return rest_of_align_insns ();
321 }; // class pass_align_insns
323 } // anon namespace
325 rtl_opt_pass *
326 make_pass_align_insns (gcc::context *ctxt)
328 return new pass_align_insns (ctxt);
331 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
332 /* Implement TARGET_MANGLE_TYPE. */
334 static const char *
335 alpha_mangle_type (const_tree type)
337 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
338 && TARGET_LONG_DOUBLE_128)
339 return "g";
341 /* For all other types, use normal C++ mangling. */
342 return NULL;
344 #endif
346 /* Parse target option strings. */
348 static void
349 alpha_option_override (void)
351 static const struct cpu_table {
352 const char *const name;
353 const enum processor_type processor;
354 const int flags;
355 const unsigned short line_size; /* in bytes */
356 const unsigned short l1_size; /* in kb. */
357 const unsigned short l2_size; /* in kb. */
358 } cpu_table[] = {
359 /* EV4/LCA45 had 8k L1 caches; EV45 had 16k L1 caches.
360 EV4/EV45 had 128k to 16M 32-byte direct Bcache. LCA45
361 had 64k to 8M 8-byte direct Bcache. */
362 { "ev4", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
363 { "21064", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
364 { "ev45", PROCESSOR_EV4, 0, 32, 16, 16*1024 },
366 /* EV5 or EV56 had 8k 32 byte L1, 96k 32 or 64 byte L2,
367 and 1M to 16M 64 byte L3 (not modeled).
368 PCA56 had 16k 64-byte cache; PCA57 had 32k Icache.
369 PCA56 had 8k 64-byte cache; PCA57 had 16k Dcache. */
370 { "ev5", PROCESSOR_EV5, 0, 32, 8, 96 },
371 { "21164", PROCESSOR_EV5, 0, 32, 8, 96 },
372 { "ev56", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
373 { "21164a", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
374 { "pca56", PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
375 { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
376 { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
378 /* EV6 had 64k 64 byte L1, 1M to 16M Bcache. */
379 { "ev6", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
380 { "21264", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
381 { "ev67", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
382 64, 64, 16*1024 },
383 { "21264a", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
384 64, 64, 16*1024 }
387 opt_pass *pass_handle_trap_shadows = make_pass_handle_trap_shadows (g);
388 static struct register_pass_info handle_trap_shadows_info
389 = { pass_handle_trap_shadows, "eh_ranges",
390 1, PASS_POS_INSERT_AFTER
393 opt_pass *pass_align_insns = make_pass_align_insns (g);
394 static struct register_pass_info align_insns_info
395 = { pass_align_insns, "shorten",
396 1, PASS_POS_INSERT_BEFORE
399 int const ct_size = ARRAY_SIZE (cpu_table);
400 int line_size = 0, l1_size = 0, l2_size = 0;
401 int i;
403 #ifdef SUBTARGET_OVERRIDE_OPTIONS
404 SUBTARGET_OVERRIDE_OPTIONS;
405 #endif
407 /* Default to full IEEE compliance mode for Go language. */
408 if (strcmp (lang_hooks.name, "GNU Go") == 0
409 && !(target_flags_explicit & MASK_IEEE))
410 target_flags |= MASK_IEEE;
412 alpha_fprm = ALPHA_FPRM_NORM;
413 alpha_tp = ALPHA_TP_PROG;
414 alpha_fptm = ALPHA_FPTM_N;
416 if (TARGET_IEEE)
418 alpha_tp = ALPHA_TP_INSN;
419 alpha_fptm = ALPHA_FPTM_SU;
421 if (TARGET_IEEE_WITH_INEXACT)
423 alpha_tp = ALPHA_TP_INSN;
424 alpha_fptm = ALPHA_FPTM_SUI;
427 if (alpha_tp_string)
429 if (! strcmp (alpha_tp_string, "p"))
430 alpha_tp = ALPHA_TP_PROG;
431 else if (! strcmp (alpha_tp_string, "f"))
432 alpha_tp = ALPHA_TP_FUNC;
433 else if (! strcmp (alpha_tp_string, "i"))
434 alpha_tp = ALPHA_TP_INSN;
435 else
436 error ("bad value %qs for -mtrap-precision switch", alpha_tp_string);
439 if (alpha_fprm_string)
441 if (! strcmp (alpha_fprm_string, "n"))
442 alpha_fprm = ALPHA_FPRM_NORM;
443 else if (! strcmp (alpha_fprm_string, "m"))
444 alpha_fprm = ALPHA_FPRM_MINF;
445 else if (! strcmp (alpha_fprm_string, "c"))
446 alpha_fprm = ALPHA_FPRM_CHOP;
447 else if (! strcmp (alpha_fprm_string,"d"))
448 alpha_fprm = ALPHA_FPRM_DYN;
449 else
450 error ("bad value %qs for -mfp-rounding-mode switch",
451 alpha_fprm_string);
454 if (alpha_fptm_string)
456 if (strcmp (alpha_fptm_string, "n") == 0)
457 alpha_fptm = ALPHA_FPTM_N;
458 else if (strcmp (alpha_fptm_string, "u") == 0)
459 alpha_fptm = ALPHA_FPTM_U;
460 else if (strcmp (alpha_fptm_string, "su") == 0)
461 alpha_fptm = ALPHA_FPTM_SU;
462 else if (strcmp (alpha_fptm_string, "sui") == 0)
463 alpha_fptm = ALPHA_FPTM_SUI;
464 else
465 error ("bad value %qs for -mfp-trap-mode switch", alpha_fptm_string);
468 if (alpha_cpu_string)
470 for (i = 0; i < ct_size; i++)
471 if (! strcmp (alpha_cpu_string, cpu_table [i].name))
473 alpha_tune = alpha_cpu = cpu_table[i].processor;
474 line_size = cpu_table[i].line_size;
475 l1_size = cpu_table[i].l1_size;
476 l2_size = cpu_table[i].l2_size;
477 target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX);
478 target_flags |= cpu_table[i].flags;
479 break;
481 if (i == ct_size)
482 error ("bad value %qs for -mcpu switch", alpha_cpu_string);
485 if (alpha_tune_string)
487 for (i = 0; i < ct_size; i++)
488 if (! strcmp (alpha_tune_string, cpu_table [i].name))
490 alpha_tune = cpu_table[i].processor;
491 line_size = cpu_table[i].line_size;
492 l1_size = cpu_table[i].l1_size;
493 l2_size = cpu_table[i].l2_size;
494 break;
496 if (i == ct_size)
497 error ("bad value %qs for -mtune switch", alpha_tune_string);
500 if (line_size)
501 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, line_size,
502 global_options.x_param_values,
503 global_options_set.x_param_values);
504 if (l1_size)
505 maybe_set_param_value (PARAM_L1_CACHE_SIZE, l1_size,
506 global_options.x_param_values,
507 global_options_set.x_param_values);
508 if (l2_size)
509 maybe_set_param_value (PARAM_L2_CACHE_SIZE, l2_size,
510 global_options.x_param_values,
511 global_options_set.x_param_values);
513 /* Do some sanity checks on the above options. */
515 if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
516 && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6)
518 warning (0, "fp software completion requires -mtrap-precision=i");
519 alpha_tp = ALPHA_TP_INSN;
522 if (alpha_cpu == PROCESSOR_EV6)
524 /* Except for EV6 pass 1 (not released), we always have precise
525 arithmetic traps. Which means we can do software completion
526 without minding trap shadows. */
527 alpha_tp = ALPHA_TP_PROG;
530 if (TARGET_FLOAT_VAX)
532 if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
534 warning (0, "rounding mode not supported for VAX floats");
535 alpha_fprm = ALPHA_FPRM_NORM;
537 if (alpha_fptm == ALPHA_FPTM_SUI)
539 warning (0, "trap mode not supported for VAX floats");
540 alpha_fptm = ALPHA_FPTM_SU;
542 if (target_flags_explicit & MASK_LONG_DOUBLE_128)
543 warning (0, "128-bit long double not supported for VAX floats");
544 target_flags &= ~MASK_LONG_DOUBLE_128;
548 char *end;
549 int lat;
551 if (!alpha_mlat_string)
552 alpha_mlat_string = "L1";
554 if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
555 && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
557 else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
558 && ISDIGIT ((unsigned char)alpha_mlat_string[1])
559 && alpha_mlat_string[2] == '\0')
561 static int const cache_latency[][4] =
563 { 3, 30, -1 }, /* ev4 -- Bcache is a guess */
564 { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */
565 { 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */
568 lat = alpha_mlat_string[1] - '0';
569 if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1)
571 warning (0, "L%d cache latency unknown for %s",
572 lat, alpha_cpu_name[alpha_tune]);
573 lat = 3;
575 else
576 lat = cache_latency[alpha_tune][lat-1];
578 else if (! strcmp (alpha_mlat_string, "main"))
580 /* Most current memories have about 370ns latency. This is
581 a reasonable guess for a fast cpu. */
582 lat = 150;
584 else
586 warning (0, "bad value %qs for -mmemory-latency", alpha_mlat_string);
587 lat = 3;
590 alpha_memory_latency = lat;
593 /* Default the definition of "small data" to 8 bytes. */
594 if (!global_options_set.x_g_switch_value)
595 g_switch_value = 8;
597 /* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */
598 if (flag_pic == 1)
599 target_flags |= MASK_SMALL_DATA;
600 else if (flag_pic == 2)
601 target_flags &= ~MASK_SMALL_DATA;
603 /* Align labels and loops for optimal branching. */
604 /* ??? Kludge these by not doing anything if we don't optimize. */
605 if (optimize > 0)
607 if (align_loops <= 0)
608 align_loops = 16;
609 if (align_jumps <= 0)
610 align_jumps = 16;
612 if (align_functions <= 0)
613 align_functions = 16;
615 /* Register variables and functions with the garbage collector. */
617 /* Set up function hooks. */
618 init_machine_status = alpha_init_machine_status;
620 /* Tell the compiler when we're using VAX floating point. */
621 if (TARGET_FLOAT_VAX)
623 REAL_MODE_FORMAT (SFmode) = &vax_f_format;
624 REAL_MODE_FORMAT (DFmode) = &vax_g_format;
625 REAL_MODE_FORMAT (TFmode) = NULL;
628 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
629 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
630 target_flags |= MASK_LONG_DOUBLE_128;
631 #endif
633 /* This needs to be done at start up. It's convenient to do it here. */
634 register_pass (&handle_trap_shadows_info);
635 register_pass (&align_insns_info);
638 /* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */
641 zap_mask (HOST_WIDE_INT value)
643 int i;
645 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
646 i++, value >>= 8)
647 if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
648 return 0;
650 return 1;
653 /* Return true if OP is valid for a particular TLS relocation.
654 We are already guaranteed that OP is a CONST. */
657 tls_symbolic_operand_1 (rtx op, int size, int unspec)
659 op = XEXP (op, 0);
661 if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
662 return 0;
663 op = XVECEXP (op, 0, 0);
665 if (GET_CODE (op) != SYMBOL_REF)
666 return 0;
668 switch (SYMBOL_REF_TLS_MODEL (op))
670 case TLS_MODEL_LOCAL_DYNAMIC:
671 return unspec == UNSPEC_DTPREL && size == alpha_tls_size;
672 case TLS_MODEL_INITIAL_EXEC:
673 return unspec == UNSPEC_TPREL && size == 64;
674 case TLS_MODEL_LOCAL_EXEC:
675 return unspec == UNSPEC_TPREL && size == alpha_tls_size;
676 default:
677 gcc_unreachable ();
681 /* Used by aligned_memory_operand and unaligned_memory_operand to
682 resolve what reload is going to do with OP if it's a register. */
685 resolve_reload_operand (rtx op)
687 if (reload_in_progress)
689 rtx tmp = op;
690 if (GET_CODE (tmp) == SUBREG)
691 tmp = SUBREG_REG (tmp);
692 if (REG_P (tmp)
693 && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
695 op = reg_equiv_memory_loc (REGNO (tmp));
696 if (op == 0)
697 return 0;
700 return op;
703 /* The scalar modes supported differs from the default check-what-c-supports
704 version in that sometimes TFmode is available even when long double
705 indicates only DFmode. */
707 static bool
708 alpha_scalar_mode_supported_p (enum machine_mode mode)
710 switch (mode)
712 case QImode:
713 case HImode:
714 case SImode:
715 case DImode:
716 case TImode: /* via optabs.c */
717 return true;
719 case SFmode:
720 case DFmode:
721 return true;
723 case TFmode:
724 return TARGET_HAS_XFLOATING_LIBS;
726 default:
727 return false;
731 /* Alpha implements a couple of integer vector mode operations when
732 TARGET_MAX is enabled. We do not check TARGET_MAX here, however,
733 which allows the vectorizer to operate on e.g. move instructions,
734 or when expand_vector_operations can do something useful. */
736 static bool
737 alpha_vector_mode_supported_p (enum machine_mode mode)
739 return mode == V8QImode || mode == V4HImode || mode == V2SImode;
742 /* Return 1 if this function can directly return via $26. */
745 direct_return (void)
747 return (TARGET_ABI_OSF
748 && reload_completed
749 && alpha_sa_size () == 0
750 && get_frame_size () == 0
751 && crtl->outgoing_args_size == 0
752 && crtl->args.pretend_args_size == 0);
755 /* Return the TLS model to use for SYMBOL. */
757 static enum tls_model
758 tls_symbolic_operand_type (rtx symbol)
760 enum tls_model model;
762 if (GET_CODE (symbol) != SYMBOL_REF)
763 return TLS_MODEL_NONE;
764 model = SYMBOL_REF_TLS_MODEL (symbol);
766 /* Local-exec with a 64-bit size is the same code as initial-exec. */
767 if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64)
768 model = TLS_MODEL_INITIAL_EXEC;
770 return model;
773 /* Return true if the function DECL will share the same GP as any
774 function in the current unit of translation. */
776 static bool
777 decl_has_samegp (const_tree decl)
779 /* Functions that are not local can be overridden, and thus may
780 not share the same gp. */
781 if (!(*targetm.binds_local_p) (decl))
782 return false;
784 /* If -msmall-data is in effect, assume that there is only one GP
785 for the module, and so any local symbol has this property. We
786 need explicit relocations to be able to enforce this for symbols
787 not defined in this unit of translation, however. */
788 if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
789 return true;
791 /* Functions that are not external are defined in this UoT. */
792 /* ??? Irritatingly, static functions not yet emitted are still
793 marked "external". Apply this to non-static functions only. */
794 return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
797 /* Return true if EXP should be placed in the small data section. */
799 static bool
800 alpha_in_small_data_p (const_tree exp)
802 /* We want to merge strings, so we never consider them small data. */
803 if (TREE_CODE (exp) == STRING_CST)
804 return false;
806 /* Functions are never in the small data area. Duh. */
807 if (TREE_CODE (exp) == FUNCTION_DECL)
808 return false;
810 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
812 const char *section = DECL_SECTION_NAME (exp);
813 if (strcmp (section, ".sdata") == 0
814 || strcmp (section, ".sbss") == 0)
815 return true;
817 else
819 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
821 /* If this is an incomplete type with size 0, then we can't put it
822 in sdata because it might be too big when completed. */
823 if (size > 0 && size <= g_switch_value)
824 return true;
827 return false;
830 #if TARGET_ABI_OPEN_VMS
831 static bool
832 vms_valid_pointer_mode (enum machine_mode mode)
834 return (mode == SImode || mode == DImode);
837 static bool
838 alpha_linkage_symbol_p (const char *symname)
840 int symlen = strlen (symname);
842 if (symlen > 4)
843 return strcmp (&symname [symlen - 4], "..lk") == 0;
845 return false;
848 #define LINKAGE_SYMBOL_REF_P(X) \
849 ((GET_CODE (X) == SYMBOL_REF \
850 && alpha_linkage_symbol_p (XSTR (X, 0))) \
851 || (GET_CODE (X) == CONST \
852 && GET_CODE (XEXP (X, 0)) == PLUS \
853 && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \
854 && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0))))
855 #endif
857 /* legitimate_address_p recognizes an RTL expression that is a valid
858 memory address for an instruction. The MODE argument is the
859 machine mode for the MEM expression that wants to use this address.
861 For Alpha, we have either a constant address or the sum of a
862 register and a constant address, or just a register. For DImode,
863 any of those forms can be surrounded with an AND that clear the
864 low-order three bits; this is an "unaligned" access. */
866 static bool
867 alpha_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
869 /* If this is an ldq_u type address, discard the outer AND. */
870 if (mode == DImode
871 && GET_CODE (x) == AND
872 && CONST_INT_P (XEXP (x, 1))
873 && INTVAL (XEXP (x, 1)) == -8)
874 x = XEXP (x, 0);
876 /* Discard non-paradoxical subregs. */
877 if (GET_CODE (x) == SUBREG
878 && (GET_MODE_SIZE (GET_MODE (x))
879 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
880 x = SUBREG_REG (x);
882 /* Unadorned general registers are valid. */
883 if (REG_P (x)
884 && (strict
885 ? STRICT_REG_OK_FOR_BASE_P (x)
886 : NONSTRICT_REG_OK_FOR_BASE_P (x)))
887 return true;
889 /* Constant addresses (i.e. +/- 32k) are valid. */
890 if (CONSTANT_ADDRESS_P (x))
891 return true;
893 #if TARGET_ABI_OPEN_VMS
894 if (LINKAGE_SYMBOL_REF_P (x))
895 return true;
896 #endif
898 /* Register plus a small constant offset is valid. */
899 if (GET_CODE (x) == PLUS)
901 rtx ofs = XEXP (x, 1);
902 x = XEXP (x, 0);
904 /* Discard non-paradoxical subregs. */
905 if (GET_CODE (x) == SUBREG
906 && (GET_MODE_SIZE (GET_MODE (x))
907 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
908 x = SUBREG_REG (x);
910 if (REG_P (x))
912 if (! strict
913 && NONSTRICT_REG_OK_FP_BASE_P (x)
914 && CONST_INT_P (ofs))
915 return true;
916 if ((strict
917 ? STRICT_REG_OK_FOR_BASE_P (x)
918 : NONSTRICT_REG_OK_FOR_BASE_P (x))
919 && CONSTANT_ADDRESS_P (ofs))
920 return true;
924 /* If we're managing explicit relocations, LO_SUM is valid, as are small
925 data symbols. Avoid explicit relocations of modes larger than word
926 mode since i.e. $LC0+8($1) can fold around +/- 32k offset. */
927 else if (TARGET_EXPLICIT_RELOCS
928 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
930 if (small_symbolic_operand (x, Pmode))
931 return true;
933 if (GET_CODE (x) == LO_SUM)
935 rtx ofs = XEXP (x, 1);
936 x = XEXP (x, 0);
938 /* Discard non-paradoxical subregs. */
939 if (GET_CODE (x) == SUBREG
940 && (GET_MODE_SIZE (GET_MODE (x))
941 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
942 x = SUBREG_REG (x);
944 /* Must have a valid base register. */
945 if (! (REG_P (x)
946 && (strict
947 ? STRICT_REG_OK_FOR_BASE_P (x)
948 : NONSTRICT_REG_OK_FOR_BASE_P (x))))
949 return false;
951 /* The symbol must be local. */
952 if (local_symbolic_operand (ofs, Pmode)
953 || dtp32_symbolic_operand (ofs, Pmode)
954 || tp32_symbolic_operand (ofs, Pmode))
955 return true;
959 return false;
962 /* Build the SYMBOL_REF for __tls_get_addr. */
964 static GTY(()) rtx tls_get_addr_libfunc;
966 static rtx
967 get_tls_get_addr (void)
969 if (!tls_get_addr_libfunc)
970 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
971 return tls_get_addr_libfunc;
974 /* Try machine-dependent ways of modifying an illegitimate address
975 to be legitimate. If we find one, return the new, valid address. */
977 static rtx
978 alpha_legitimize_address_1 (rtx x, rtx scratch, enum machine_mode mode)
980 HOST_WIDE_INT addend;
982 /* If the address is (plus reg const_int) and the CONST_INT is not a
983 valid offset, compute the high part of the constant and add it to
984 the register. Then our address is (plus temp low-part-const). */
985 if (GET_CODE (x) == PLUS
986 && REG_P (XEXP (x, 0))
987 && CONST_INT_P (XEXP (x, 1))
988 && ! CONSTANT_ADDRESS_P (XEXP (x, 1)))
990 addend = INTVAL (XEXP (x, 1));
991 x = XEXP (x, 0);
992 goto split_addend;
995 /* If the address is (const (plus FOO const_int)), find the low-order
996 part of the CONST_INT. Then load FOO plus any high-order part of the
997 CONST_INT into a register. Our address is (plus reg low-part-const).
998 This is done to reduce the number of GOT entries. */
999 if (can_create_pseudo_p ()
1000 && GET_CODE (x) == CONST
1001 && GET_CODE (XEXP (x, 0)) == PLUS
1002 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
1004 addend = INTVAL (XEXP (XEXP (x, 0), 1));
1005 x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
1006 goto split_addend;
1009 /* If we have a (plus reg const), emit the load as in (2), then add
1010 the two registers, and finally generate (plus reg low-part-const) as
1011 our address. */
1012 if (can_create_pseudo_p ()
1013 && GET_CODE (x) == PLUS
1014 && REG_P (XEXP (x, 0))
1015 && GET_CODE (XEXP (x, 1)) == CONST
1016 && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
1017 && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1)))
1019 addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
1020 x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
1021 XEXP (XEXP (XEXP (x, 1), 0), 0),
1022 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1023 goto split_addend;
1026 /* If this is a local symbol, split the address into HIGH/LO_SUM parts.
1027 Avoid modes larger than word mode since i.e. $LC0+8($1) can fold
1028 around +/- 32k offset. */
1029 if (TARGET_EXPLICIT_RELOCS
1030 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
1031 && symbolic_operand (x, Pmode))
1033 rtx r0, r16, eqv, tga, tp, insn, dest, seq;
1035 switch (tls_symbolic_operand_type (x))
1037 case TLS_MODEL_NONE:
1038 break;
1040 case TLS_MODEL_GLOBAL_DYNAMIC:
1041 start_sequence ();
1043 r0 = gen_rtx_REG (Pmode, 0);
1044 r16 = gen_rtx_REG (Pmode, 16);
1045 tga = get_tls_get_addr ();
1046 dest = gen_reg_rtx (Pmode);
1047 seq = GEN_INT (alpha_next_sequence_number++);
1049 emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
1050 insn = gen_call_value_osf_tlsgd (r0, tga, seq);
1051 insn = emit_call_insn (insn);
1052 RTL_CONST_CALL_P (insn) = 1;
1053 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1055 insn = get_insns ();
1056 end_sequence ();
1058 emit_libcall_block (insn, dest, r0, x);
1059 return dest;
1061 case TLS_MODEL_LOCAL_DYNAMIC:
1062 start_sequence ();
1064 r0 = gen_rtx_REG (Pmode, 0);
1065 r16 = gen_rtx_REG (Pmode, 16);
1066 tga = get_tls_get_addr ();
1067 scratch = gen_reg_rtx (Pmode);
1068 seq = GEN_INT (alpha_next_sequence_number++);
1070 emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
1071 insn = gen_call_value_osf_tlsldm (r0, tga, seq);
1072 insn = emit_call_insn (insn);
1073 RTL_CONST_CALL_P (insn) = 1;
1074 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1076 insn = get_insns ();
1077 end_sequence ();
1079 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1080 UNSPEC_TLSLDM_CALL);
1081 emit_libcall_block (insn, scratch, r0, eqv);
1083 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
1084 eqv = gen_rtx_CONST (Pmode, eqv);
1086 if (alpha_tls_size == 64)
1088 dest = gen_reg_rtx (Pmode);
1089 emit_insn (gen_rtx_SET (VOIDmode, dest, eqv));
1090 emit_insn (gen_adddi3 (dest, dest, scratch));
1091 return dest;
1093 if (alpha_tls_size == 32)
1095 insn = gen_rtx_HIGH (Pmode, eqv);
1096 insn = gen_rtx_PLUS (Pmode, scratch, insn);
1097 scratch = gen_reg_rtx (Pmode);
1098 emit_insn (gen_rtx_SET (VOIDmode, scratch, insn));
1100 return gen_rtx_LO_SUM (Pmode, scratch, eqv);
1102 case TLS_MODEL_INITIAL_EXEC:
1103 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1104 eqv = gen_rtx_CONST (Pmode, eqv);
1105 tp = gen_reg_rtx (Pmode);
1106 scratch = gen_reg_rtx (Pmode);
1107 dest = gen_reg_rtx (Pmode);
1109 emit_insn (gen_get_thread_pointerdi (tp));
1110 emit_insn (gen_rtx_SET (VOIDmode, scratch, eqv));
1111 emit_insn (gen_adddi3 (dest, tp, scratch));
1112 return dest;
1114 case TLS_MODEL_LOCAL_EXEC:
1115 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1116 eqv = gen_rtx_CONST (Pmode, eqv);
1117 tp = gen_reg_rtx (Pmode);
1119 emit_insn (gen_get_thread_pointerdi (tp));
1120 if (alpha_tls_size == 32)
1122 insn = gen_rtx_HIGH (Pmode, eqv);
1123 insn = gen_rtx_PLUS (Pmode, tp, insn);
1124 tp = gen_reg_rtx (Pmode);
1125 emit_insn (gen_rtx_SET (VOIDmode, tp, insn));
1127 return gen_rtx_LO_SUM (Pmode, tp, eqv);
1129 default:
1130 gcc_unreachable ();
1133 if (local_symbolic_operand (x, Pmode))
1135 if (small_symbolic_operand (x, Pmode))
1136 return x;
1137 else
1139 if (can_create_pseudo_p ())
1140 scratch = gen_reg_rtx (Pmode);
1141 emit_insn (gen_rtx_SET (VOIDmode, scratch,
1142 gen_rtx_HIGH (Pmode, x)));
1143 return gen_rtx_LO_SUM (Pmode, scratch, x);
1148 return NULL;
1150 split_addend:
1152 HOST_WIDE_INT low, high;
1154 low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
1155 addend -= low;
1156 high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
1157 addend -= high;
1159 if (addend)
1160 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
1161 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1162 1, OPTAB_LIB_WIDEN);
1163 if (high)
1164 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
1165 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1166 1, OPTAB_LIB_WIDEN);
1168 return plus_constant (Pmode, x, low);
1173 /* Try machine-dependent ways of modifying an illegitimate address
1174 to be legitimate. Return X or the new, valid address. */
1176 static rtx
1177 alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1178 enum machine_mode mode)
1180 rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode);
1181 return new_x ? new_x : x;
1184 /* Return true if ADDR has an effect that depends on the machine mode it
1185 is used for. On the Alpha this is true only for the unaligned modes.
1186 We can simplify the test since we know that the address must be valid. */
1188 static bool
1189 alpha_mode_dependent_address_p (const_rtx addr,
1190 addr_space_t as ATTRIBUTE_UNUSED)
1192 return GET_CODE (addr) == AND;
1195 /* Primarily this is required for TLS symbols, but given that our move
1196 patterns *ought* to be able to handle any symbol at any time, we
1197 should never be spilling symbolic operands to the constant pool, ever. */
1199 static bool
1200 alpha_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1202 enum rtx_code code = GET_CODE (x);
1203 return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
1206 /* We do not allow indirect calls to be optimized into sibling calls, nor
1207 can we allow a call to a function with a different GP to be optimized
1208 into a sibcall. */
1210 static bool
1211 alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1213 /* Can't do indirect tail calls, since we don't know if the target
1214 uses the same GP. */
1215 if (!decl)
1216 return false;
1218 /* Otherwise, we can make a tail call if the target function shares
1219 the same GP. */
1220 return decl_has_samegp (decl);
1224 some_small_symbolic_operand_int (rtx *px, void *data ATTRIBUTE_UNUSED)
1226 rtx x = *px;
1228 /* Don't re-split. */
1229 if (GET_CODE (x) == LO_SUM)
1230 return -1;
1232 return small_symbolic_operand (x, Pmode) != 0;
1235 static int
1236 split_small_symbolic_operand_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
1238 rtx x = *px;
1240 /* Don't re-split. */
1241 if (GET_CODE (x) == LO_SUM)
1242 return -1;
1244 if (small_symbolic_operand (x, Pmode))
1246 x = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
1247 *px = x;
1248 return -1;
1251 return 0;
1255 split_small_symbolic_operand (rtx x)
1257 x = copy_insn (x);
1258 for_each_rtx (&x, split_small_symbolic_operand_1, NULL);
1259 return x;
1262 /* Indicate that INSN cannot be duplicated. This is true for any insn
1263 that we've marked with gpdisp relocs, since those have to stay in
1264 1-1 correspondence with one another.
1266 Technically we could copy them if we could set up a mapping from one
1267 sequence number to another, across the set of insns to be duplicated.
1268 This seems overly complicated and error-prone since interblock motion
1269 from sched-ebb could move one of the pair of insns to a different block.
1271 Also cannot allow jsr insns to be duplicated. If they throw exceptions,
1272 then they'll be in a different block from their ldgp. Which could lead
1273 the bb reorder code to think that it would be ok to copy just the block
1274 containing the call and branch to the block containing the ldgp. */
1276 static bool
1277 alpha_cannot_copy_insn_p (rtx_insn *insn)
1279 if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
1280 return false;
1281 if (recog_memoized (insn) >= 0)
1282 return get_attr_cannot_copy (insn);
1283 else
1284 return false;
1288 /* Try a machine-dependent way of reloading an illegitimate address
1289 operand. If we find one, push the reload and return the new rtx. */
1292 alpha_legitimize_reload_address (rtx x,
1293 enum machine_mode mode ATTRIBUTE_UNUSED,
1294 int opnum, int type,
1295 int ind_levels ATTRIBUTE_UNUSED)
1297 /* We must recognize output that we have already generated ourselves. */
1298 if (GET_CODE (x) == PLUS
1299 && GET_CODE (XEXP (x, 0)) == PLUS
1300 && REG_P (XEXP (XEXP (x, 0), 0))
1301 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
1302 && CONST_INT_P (XEXP (x, 1)))
1304 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1305 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1306 opnum, (enum reload_type) type);
1307 return x;
1310 /* We wish to handle large displacements off a base register by
1311 splitting the addend across an ldah and the mem insn. This
1312 cuts number of extra insns needed from 3 to 1. */
1313 if (GET_CODE (x) == PLUS
1314 && REG_P (XEXP (x, 0))
1315 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
1316 && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0)))
1317 && GET_CODE (XEXP (x, 1)) == CONST_INT)
1319 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
1320 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
1321 HOST_WIDE_INT high
1322 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
1324 /* Check for 32-bit overflow. */
1325 if (high + low != val)
1326 return NULL_RTX;
1328 /* Reload the high part into a base reg; leave the low part
1329 in the mem directly. */
1330 x = gen_rtx_PLUS (GET_MODE (x),
1331 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
1332 GEN_INT (high)),
1333 GEN_INT (low));
1335 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1336 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1337 opnum, (enum reload_type) type);
1338 return x;
1341 return NULL_RTX;
1344 /* Compute a (partial) cost for rtx X. Return true if the complete
1345 cost has been computed, and false if subexpressions should be
1346 scanned. In either case, *TOTAL contains the cost result. */
1348 static bool
1349 alpha_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
1350 bool speed)
1352 enum machine_mode mode = GET_MODE (x);
1353 bool float_mode_p = FLOAT_MODE_P (mode);
1354 const struct alpha_rtx_cost_data *cost_data;
1356 if (!speed)
1357 cost_data = &alpha_rtx_cost_size;
1358 else
1359 cost_data = &alpha_rtx_cost_data[alpha_tune];
1361 switch (code)
1363 case CONST_INT:
1364 /* If this is an 8-bit constant, return zero since it can be used
1365 nearly anywhere with no cost. If it is a valid operand for an
1366 ADD or AND, likewise return 0 if we know it will be used in that
1367 context. Otherwise, return 2 since it might be used there later.
1368 All other constants take at least two insns. */
1369 if (INTVAL (x) >= 0 && INTVAL (x) < 256)
1371 *total = 0;
1372 return true;
1374 /* FALLTHRU */
1376 case CONST_DOUBLE:
1377 if (x == CONST0_RTX (mode))
1378 *total = 0;
1379 else if ((outer_code == PLUS && add_operand (x, VOIDmode))
1380 || (outer_code == AND && and_operand (x, VOIDmode)))
1381 *total = 0;
1382 else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
1383 *total = 2;
1384 else
1385 *total = COSTS_N_INSNS (2);
1386 return true;
1388 case CONST:
1389 case SYMBOL_REF:
1390 case LABEL_REF:
1391 if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
1392 *total = COSTS_N_INSNS (outer_code != MEM);
1393 else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
1394 *total = COSTS_N_INSNS (1 + (outer_code != MEM));
1395 else if (tls_symbolic_operand_type (x))
1396 /* Estimate of cost for call_pal rduniq. */
1397 /* ??? How many insns do we emit here? More than one... */
1398 *total = COSTS_N_INSNS (15);
1399 else
1400 /* Otherwise we do a load from the GOT. */
1401 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1402 return true;
1404 case HIGH:
1405 /* This is effectively an add_operand. */
1406 *total = 2;
1407 return true;
1409 case PLUS:
1410 case MINUS:
1411 if (float_mode_p)
1412 *total = cost_data->fp_add;
1413 else if (GET_CODE (XEXP (x, 0)) == MULT
1414 && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
1416 *total = (rtx_cost (XEXP (XEXP (x, 0), 0),
1417 (enum rtx_code) outer_code, opno, speed)
1418 + rtx_cost (XEXP (x, 1),
1419 (enum rtx_code) outer_code, opno, speed)
1420 + COSTS_N_INSNS (1));
1421 return true;
1423 return false;
1425 case MULT:
1426 if (float_mode_p)
1427 *total = cost_data->fp_mult;
1428 else if (mode == DImode)
1429 *total = cost_data->int_mult_di;
1430 else
1431 *total = cost_data->int_mult_si;
1432 return false;
1434 case ASHIFT:
1435 if (CONST_INT_P (XEXP (x, 1))
1436 && INTVAL (XEXP (x, 1)) <= 3)
1438 *total = COSTS_N_INSNS (1);
1439 return false;
1441 /* FALLTHRU */
1443 case ASHIFTRT:
1444 case LSHIFTRT:
1445 *total = cost_data->int_shift;
1446 return false;
1448 case IF_THEN_ELSE:
1449 if (float_mode_p)
1450 *total = cost_data->fp_add;
1451 else
1452 *total = cost_data->int_cmov;
1453 return false;
1455 case DIV:
1456 case UDIV:
1457 case MOD:
1458 case UMOD:
1459 if (!float_mode_p)
1460 *total = cost_data->int_div;
1461 else if (mode == SFmode)
1462 *total = cost_data->fp_div_sf;
1463 else
1464 *total = cost_data->fp_div_df;
1465 return false;
1467 case MEM:
1468 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1469 return true;
1471 case NEG:
1472 if (! float_mode_p)
1474 *total = COSTS_N_INSNS (1);
1475 return false;
1477 /* FALLTHRU */
1479 case ABS:
1480 if (! float_mode_p)
1482 *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
1483 return false;
1485 /* FALLTHRU */
1487 case FLOAT:
1488 case UNSIGNED_FLOAT:
1489 case FIX:
1490 case UNSIGNED_FIX:
1491 case FLOAT_TRUNCATE:
1492 *total = cost_data->fp_add;
1493 return false;
1495 case FLOAT_EXTEND:
1496 if (MEM_P (XEXP (x, 0)))
1497 *total = 0;
1498 else
1499 *total = cost_data->fp_add;
1500 return false;
1502 default:
1503 return false;
1507 /* REF is an alignable memory location. Place an aligned SImode
1508 reference into *PALIGNED_MEM and the number of bits to shift into
1509 *PBITNUM. SCRATCH is a free register for use in reloading out
1510 of range stack slots. */
1512 void
1513 get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
1515 rtx base;
1516 HOST_WIDE_INT disp, offset;
1518 gcc_assert (MEM_P (ref));
1520 if (reload_in_progress
1521 && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1523 base = find_replacement (&XEXP (ref, 0));
1524 gcc_assert (memory_address_p (GET_MODE (ref), base));
1526 else
1527 base = XEXP (ref, 0);
1529 if (GET_CODE (base) == PLUS)
1530 disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1531 else
1532 disp = 0;
1534 /* Find the byte offset within an aligned word. If the memory itself is
1535 claimed to be aligned, believe it. Otherwise, aligned_memory_operand
1536 will have examined the base register and determined it is aligned, and
1537 thus displacements from it are naturally alignable. */
1538 if (MEM_ALIGN (ref) >= 32)
1539 offset = 0;
1540 else
1541 offset = disp & 3;
1543 /* The location should not cross aligned word boundary. */
1544 gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref))
1545 <= GET_MODE_SIZE (SImode));
1547 /* Access the entire aligned word. */
1548 *paligned_mem = widen_memory_access (ref, SImode, -offset);
1550 /* Convert the byte offset within the word to a bit offset. */
1551 offset *= BITS_PER_UNIT;
1552 *pbitnum = GEN_INT (offset);
1555 /* Similar, but just get the address. Handle the two reload cases.
1556 Add EXTRA_OFFSET to the address we return. */
1559 get_unaligned_address (rtx ref)
1561 rtx base;
1562 HOST_WIDE_INT offset = 0;
1564 gcc_assert (MEM_P (ref));
1566 if (reload_in_progress
1567 && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0)))
1569 base = find_replacement (&XEXP (ref, 0));
1571 gcc_assert (memory_address_p (GET_MODE (ref), base));
1573 else
1574 base = XEXP (ref, 0);
1576 if (GET_CODE (base) == PLUS)
1577 offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1579 return plus_constant (Pmode, base, offset);
1582 /* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
1583 X is always returned in a register. */
1586 get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
1588 if (GET_CODE (addr) == PLUS)
1590 ofs += INTVAL (XEXP (addr, 1));
1591 addr = XEXP (addr, 0);
1594 return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7),
1595 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1598 /* On the Alpha, all (non-symbolic) constants except zero go into
1599 a floating-point register via memory. Note that we cannot
1600 return anything that is not a subset of RCLASS, and that some
1601 symbolic constants cannot be dropped to memory. */
1603 enum reg_class
1604 alpha_preferred_reload_class(rtx x, enum reg_class rclass)
1606 /* Zero is present in any register class. */
1607 if (x == CONST0_RTX (GET_MODE (x)))
1608 return rclass;
1610 /* These sorts of constants we can easily drop to memory. */
1611 if (CONST_INT_P (x)
1612 || GET_CODE (x) == CONST_DOUBLE
1613 || GET_CODE (x) == CONST_VECTOR)
1615 if (rclass == FLOAT_REGS)
1616 return NO_REGS;
1617 if (rclass == ALL_REGS)
1618 return GENERAL_REGS;
1619 return rclass;
1622 /* All other kinds of constants should not (and in the case of HIGH
1623 cannot) be dropped to memory -- instead we use a GENERAL_REGS
1624 secondary reload. */
1625 if (CONSTANT_P (x))
1626 return (rclass == ALL_REGS ? GENERAL_REGS : rclass);
1628 return rclass;
1631 /* Inform reload about cases where moving X with a mode MODE to a register in
1632 RCLASS requires an extra scratch or immediate register. Return the class
1633 needed for the immediate register. */
1635 static reg_class_t
1636 alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
1637 enum machine_mode mode, secondary_reload_info *sri)
1639 enum reg_class rclass = (enum reg_class) rclass_i;
1641 /* Loading and storing HImode or QImode values to and from memory
1642 usually requires a scratch register. */
1643 if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode))
1645 if (any_memory_operand (x, mode))
1647 if (in_p)
1649 if (!aligned_memory_operand (x, mode))
1650 sri->icode = direct_optab_handler (reload_in_optab, mode);
1652 else
1653 sri->icode = direct_optab_handler (reload_out_optab, mode);
1654 return NO_REGS;
1658 /* We also cannot do integral arithmetic into FP regs, as might result
1659 from register elimination into a DImode fp register. */
1660 if (rclass == FLOAT_REGS)
1662 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
1663 return GENERAL_REGS;
1664 if (in_p && INTEGRAL_MODE_P (mode)
1665 && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x))
1666 return GENERAL_REGS;
1669 return NO_REGS;
1672 /* Subfunction of the following function. Update the flags of any MEM
1673 found in part of X. */
1675 static int
1676 alpha_set_memflags_1 (rtx *xp, void *data)
1678 rtx x = *xp, orig = (rtx) data;
1680 if (!MEM_P (x))
1681 return 0;
1683 MEM_VOLATILE_P (x) = MEM_VOLATILE_P (orig);
1684 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (orig);
1685 MEM_READONLY_P (x) = MEM_READONLY_P (orig);
1687 /* Sadly, we cannot use alias sets because the extra aliasing
1688 produced by the AND interferes. Given that two-byte quantities
1689 are the only thing we would be able to differentiate anyway,
1690 there does not seem to be any point in convoluting the early
1691 out of the alias check. */
1693 return -1;
1696 /* Given SEQ, which is an INSN list, look for any MEMs in either
1697 a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
1698 volatile flags from REF into each of the MEMs found. If REF is not
1699 a MEM, don't do anything. */
1701 void
1702 alpha_set_memflags (rtx seq, rtx ref)
1704 rtx_insn *insn;
1706 if (!MEM_P (ref))
1707 return;
1709 /* This is only called from alpha.md, after having had something
1710 generated from one of the insn patterns. So if everything is
1711 zero, the pattern is already up-to-date. */
1712 if (!MEM_VOLATILE_P (ref)
1713 && !MEM_NOTRAP_P (ref)
1714 && !MEM_READONLY_P (ref))
1715 return;
1717 for (insn = as_a <rtx_insn *> (seq); insn; insn = NEXT_INSN (insn))
1718 if (INSN_P (insn))
1719 for_each_rtx (&PATTERN (insn), alpha_set_memflags_1, (void *) ref);
1720 else
1721 gcc_unreachable ();
1724 static rtx alpha_emit_set_const (rtx, enum machine_mode, HOST_WIDE_INT,
1725 int, bool);
1727 /* Internal routine for alpha_emit_set_const to check for N or below insns.
1728 If NO_OUTPUT is true, then we only check to see if N insns are possible,
1729 and return pc_rtx if successful. */
1731 static rtx
1732 alpha_emit_set_const_1 (rtx target, enum machine_mode mode,
1733 HOST_WIDE_INT c, int n, bool no_output)
1735 HOST_WIDE_INT new_const;
1736 int i, bits;
1737 /* Use a pseudo if highly optimizing and still generating RTL. */
1738 rtx subtarget
1739 = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target);
1740 rtx temp, insn;
1742 /* If this is a sign-extended 32-bit constant, we can do this in at most
1743 three insns, so do it if we have enough insns left. We always have
1744 a sign-extended 32-bit constant when compiling on a narrow machine. */
1746 if (HOST_BITS_PER_WIDE_INT != 64
1747 || c >> 31 == -1 || c >> 31 == 0)
1749 HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
1750 HOST_WIDE_INT tmp1 = c - low;
1751 HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
1752 HOST_WIDE_INT extra = 0;
1754 /* If HIGH will be interpreted as negative but the constant is
1755 positive, we must adjust it to do two ldha insns. */
1757 if ((high & 0x8000) != 0 && c >= 0)
1759 extra = 0x4000;
1760 tmp1 -= 0x40000000;
1761 high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
1764 if (c == low || (low == 0 && extra == 0))
1766 /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
1767 but that meant that we can't handle INT_MIN on 32-bit machines
1768 (like NT/Alpha), because we recurse indefinitely through
1769 emit_move_insn to gen_movdi. So instead, since we know exactly
1770 what we want, create it explicitly. */
1772 if (no_output)
1773 return pc_rtx;
1774 if (target == NULL)
1775 target = gen_reg_rtx (mode);
1776 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (c)));
1777 return target;
1779 else if (n >= 2 + (extra != 0))
1781 if (no_output)
1782 return pc_rtx;
1783 if (!can_create_pseudo_p ())
1785 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (high << 16)));
1786 temp = target;
1788 else
1789 temp = copy_to_suggested_reg (GEN_INT (high << 16),
1790 subtarget, mode);
1792 /* As of 2002-02-23, addsi3 is only available when not optimizing.
1793 This means that if we go through expand_binop, we'll try to
1794 generate extensions, etc, which will require new pseudos, which
1795 will fail during some split phases. The SImode add patterns
1796 still exist, but are not named. So build the insns by hand. */
1798 if (extra != 0)
1800 if (! subtarget)
1801 subtarget = gen_reg_rtx (mode);
1802 insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
1803 insn = gen_rtx_SET (VOIDmode, subtarget, insn);
1804 emit_insn (insn);
1805 temp = subtarget;
1808 if (target == NULL)
1809 target = gen_reg_rtx (mode);
1810 insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1811 insn = gen_rtx_SET (VOIDmode, target, insn);
1812 emit_insn (insn);
1813 return target;
1817 /* If we couldn't do it that way, try some other methods. But if we have
1818 no instructions left, don't bother. Likewise, if this is SImode and
1819 we can't make pseudos, we can't do anything since the expand_binop
1820 and expand_unop calls will widen and try to make pseudos. */
1822 if (n == 1 || (mode == SImode && !can_create_pseudo_p ()))
1823 return 0;
1825 /* Next, see if we can load a related constant and then shift and possibly
1826 negate it to get the constant we want. Try this once each increasing
1827 numbers of insns. */
1829 for (i = 1; i < n; i++)
1831 /* First, see if minus some low bits, we've an easy load of
1832 high bits. */
1834 new_const = ((c & 0xffff) ^ 0x8000) - 0x8000;
1835 if (new_const != 0)
1837 temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output);
1838 if (temp)
1840 if (no_output)
1841 return temp;
1842 return expand_binop (mode, add_optab, temp, GEN_INT (new_const),
1843 target, 0, OPTAB_WIDEN);
1847 /* Next try complementing. */
1848 temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output);
1849 if (temp)
1851 if (no_output)
1852 return temp;
1853 return expand_unop (mode, one_cmpl_optab, temp, target, 0);
1856 /* Next try to form a constant and do a left shift. We can do this
1857 if some low-order bits are zero; the exact_log2 call below tells
1858 us that information. The bits we are shifting out could be any
1859 value, but here we'll just try the 0- and sign-extended forms of
1860 the constant. To try to increase the chance of having the same
1861 constant in more than one insn, start at the highest number of
1862 bits to shift, but try all possibilities in case a ZAPNOT will
1863 be useful. */
1865 bits = exact_log2 (c & -c);
1866 if (bits > 0)
1867 for (; bits > 0; bits--)
1869 new_const = c >> bits;
1870 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1871 if (!temp && c < 0)
1873 new_const = (unsigned HOST_WIDE_INT)c >> bits;
1874 temp = alpha_emit_set_const (subtarget, mode, new_const,
1875 i, no_output);
1877 if (temp)
1879 if (no_output)
1880 return temp;
1881 return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
1882 target, 0, OPTAB_WIDEN);
1886 /* Now try high-order zero bits. Here we try the shifted-in bits as
1887 all zero and all ones. Be careful to avoid shifting outside the
1888 mode and to avoid shifting outside the host wide int size. */
1889 /* On narrow hosts, don't shift a 1 into the high bit, since we'll
1890 confuse the recursive call and set all of the high 32 bits. */
1892 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1893 - floor_log2 (c) - 1 - (HOST_BITS_PER_WIDE_INT < 64));
1894 if (bits > 0)
1895 for (; bits > 0; bits--)
1897 new_const = c << bits;
1898 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1899 if (!temp)
1901 new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
1902 temp = alpha_emit_set_const (subtarget, mode, new_const,
1903 i, no_output);
1905 if (temp)
1907 if (no_output)
1908 return temp;
1909 return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
1910 target, 1, OPTAB_WIDEN);
1914 /* Now try high-order 1 bits. We get that with a sign-extension.
1915 But one bit isn't enough here. Be careful to avoid shifting outside
1916 the mode and to avoid shifting outside the host wide int size. */
1918 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1919 - floor_log2 (~ c) - 2);
1920 if (bits > 0)
1921 for (; bits > 0; bits--)
1923 new_const = c << bits;
1924 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1925 if (!temp)
1927 new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1);
1928 temp = alpha_emit_set_const (subtarget, mode, new_const,
1929 i, no_output);
1931 if (temp)
1933 if (no_output)
1934 return temp;
1935 return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
1936 target, 0, OPTAB_WIDEN);
1941 #if HOST_BITS_PER_WIDE_INT == 64
1942 /* Finally, see if can load a value into the target that is the same as the
1943 constant except that all bytes that are 0 are changed to be 0xff. If we
1944 can, then we can do a ZAPNOT to obtain the desired constant. */
1946 new_const = c;
1947 for (i = 0; i < 64; i += 8)
1948 if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0)
1949 new_const |= (HOST_WIDE_INT) 0xff << i;
1951 /* We are only called for SImode and DImode. If this is SImode, ensure that
1952 we are sign extended to a full word. */
1954 if (mode == SImode)
1955 new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000;
1957 if (new_const != c)
1959 temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output);
1960 if (temp)
1962 if (no_output)
1963 return temp;
1964 return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const),
1965 target, 0, OPTAB_WIDEN);
1968 #endif
1970 return 0;
1973 /* Try to output insns to set TARGET equal to the constant C if it can be
1974 done in less than N insns. Do all computations in MODE. Returns the place
1975 where the output has been placed if it can be done and the insns have been
1976 emitted. If it would take more than N insns, zero is returned and no
1977 insns and emitted. */
1979 static rtx
1980 alpha_emit_set_const (rtx target, enum machine_mode mode,
1981 HOST_WIDE_INT c, int n, bool no_output)
1983 enum machine_mode orig_mode = mode;
1984 rtx orig_target = target;
1985 rtx result = 0;
1986 int i;
1988 /* If we can't make any pseudos, TARGET is an SImode hard register, we
1989 can't load this constant in one insn, do this in DImode. */
1990 if (!can_create_pseudo_p () && mode == SImode
1991 && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER)
1993 result = alpha_emit_set_const_1 (target, mode, c, 1, no_output);
1994 if (result)
1995 return result;
1997 target = no_output ? NULL : gen_lowpart (DImode, target);
1998 mode = DImode;
2000 else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
2002 target = no_output ? NULL : gen_lowpart (DImode, target);
2003 mode = DImode;
2006 /* Try 1 insn, then 2, then up to N. */
2007 for (i = 1; i <= n; i++)
2009 result = alpha_emit_set_const_1 (target, mode, c, i, no_output);
2010 if (result)
2012 rtx_insn *insn;
2013 rtx set;
2015 if (no_output)
2016 return result;
2018 insn = get_last_insn ();
2019 set = single_set (insn);
2020 if (! CONSTANT_P (SET_SRC (set)))
2021 set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
2022 break;
2026 /* Allow for the case where we changed the mode of TARGET. */
2027 if (result)
2029 if (result == target)
2030 result = orig_target;
2031 else if (mode != orig_mode)
2032 result = gen_lowpart (orig_mode, result);
2035 return result;
2038 /* Having failed to find a 3 insn sequence in alpha_emit_set_const,
2039 fall back to a straight forward decomposition. We do this to avoid
2040 exponential run times encountered when looking for longer sequences
2041 with alpha_emit_set_const. */
2043 static rtx
2044 alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1, HOST_WIDE_INT c2)
2046 HOST_WIDE_INT d1, d2, d3, d4;
2048 /* Decompose the entire word */
2049 #if HOST_BITS_PER_WIDE_INT >= 64
2050 gcc_assert (c2 == -(c1 < 0));
2051 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2052 c1 -= d1;
2053 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2054 c1 = (c1 - d2) >> 32;
2055 d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2056 c1 -= d3;
2057 d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2058 gcc_assert (c1 == d4);
2059 #else
2060 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2061 c1 -= d1;
2062 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2063 gcc_assert (c1 == d2);
2064 c2 += (d2 < 0);
2065 d3 = ((c2 & 0xffff) ^ 0x8000) - 0x8000;
2066 c2 -= d3;
2067 d4 = ((c2 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2068 gcc_assert (c2 == d4);
2069 #endif
2071 /* Construct the high word */
2072 if (d4)
2074 emit_move_insn (target, GEN_INT (d4));
2075 if (d3)
2076 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
2078 else
2079 emit_move_insn (target, GEN_INT (d3));
2081 /* Shift it into place */
2082 emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
2084 /* Add in the low bits. */
2085 if (d2)
2086 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
2087 if (d1)
2088 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
2090 return target;
2093 /* Given an integral CONST_INT, CONST_DOUBLE, or CONST_VECTOR, return
2094 the low 64 bits. */
2096 static void
2097 alpha_extract_integer (rtx x, HOST_WIDE_INT *p0, HOST_WIDE_INT *p1)
2099 HOST_WIDE_INT i0, i1;
2101 if (GET_CODE (x) == CONST_VECTOR)
2102 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
2105 if (CONST_INT_P (x))
2107 i0 = INTVAL (x);
2108 i1 = -(i0 < 0);
2110 else if (HOST_BITS_PER_WIDE_INT >= 64)
2112 i0 = CONST_DOUBLE_LOW (x);
2113 i1 = -(i0 < 0);
2115 else
2117 i0 = CONST_DOUBLE_LOW (x);
2118 i1 = CONST_DOUBLE_HIGH (x);
2121 *p0 = i0;
2122 *p1 = i1;
2125 /* Implement TARGET_LEGITIMATE_CONSTANT_P. This is all constants for which
2126 we are willing to load the value into a register via a move pattern.
2127 Normally this is all symbolic constants, integral constants that
2128 take three or fewer instructions, and floating-point zero. */
2130 bool
2131 alpha_legitimate_constant_p (enum machine_mode mode, rtx x)
2133 HOST_WIDE_INT i0, i1;
2135 switch (GET_CODE (x))
2137 case LABEL_REF:
2138 case HIGH:
2139 return true;
2141 case CONST:
2142 if (GET_CODE (XEXP (x, 0)) == PLUS
2143 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
2144 x = XEXP (XEXP (x, 0), 0);
2145 else
2146 return true;
2148 if (GET_CODE (x) != SYMBOL_REF)
2149 return true;
2151 /* FALLTHRU */
2153 case SYMBOL_REF:
2154 /* TLS symbols are never valid. */
2155 return SYMBOL_REF_TLS_MODEL (x) == 0;
2157 case CONST_DOUBLE:
2158 if (x == CONST0_RTX (mode))
2159 return true;
2160 if (FLOAT_MODE_P (mode))
2161 return false;
2162 goto do_integer;
2164 case CONST_VECTOR:
2165 if (x == CONST0_RTX (mode))
2166 return true;
2167 if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
2168 return false;
2169 if (GET_MODE_SIZE (mode) != 8)
2170 return false;
2171 goto do_integer;
2173 case CONST_INT:
2174 do_integer:
2175 if (TARGET_BUILD_CONSTANTS)
2176 return true;
2177 alpha_extract_integer (x, &i0, &i1);
2178 if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == (-i0 < 0))
2179 return alpha_emit_set_const_1 (x, mode, i0, 3, true) != NULL;
2180 return false;
2182 default:
2183 return false;
2187 /* Operand 1 is known to be a constant, and should require more than one
2188 instruction to load. Emit that multi-part load. */
2190 bool
2191 alpha_split_const_mov (enum machine_mode mode, rtx *operands)
2193 HOST_WIDE_INT i0, i1;
2194 rtx temp = NULL_RTX;
2196 alpha_extract_integer (operands[1], &i0, &i1);
2198 if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == -(i0 < 0))
2199 temp = alpha_emit_set_const (operands[0], mode, i0, 3, false);
2201 if (!temp && TARGET_BUILD_CONSTANTS)
2202 temp = alpha_emit_set_long_const (operands[0], i0, i1);
2204 if (temp)
2206 if (!rtx_equal_p (operands[0], temp))
2207 emit_move_insn (operands[0], temp);
2208 return true;
2211 return false;
2214 /* Expand a move instruction; return true if all work is done.
2215 We don't handle non-bwx subword loads here. */
2217 bool
2218 alpha_expand_mov (enum machine_mode mode, rtx *operands)
2220 rtx tmp;
2222 /* If the output is not a register, the input must be. */
2223 if (MEM_P (operands[0])
2224 && ! reg_or_0_operand (operands[1], mode))
2225 operands[1] = force_reg (mode, operands[1]);
2227 /* Allow legitimize_address to perform some simplifications. */
2228 if (mode == Pmode && symbolic_operand (operands[1], mode))
2230 tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode);
2231 if (tmp)
2233 if (tmp == operands[0])
2234 return true;
2235 operands[1] = tmp;
2236 return false;
2240 /* Early out for non-constants and valid constants. */
2241 if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
2242 return false;
2244 /* Split large integers. */
2245 if (CONST_INT_P (operands[1])
2246 || GET_CODE (operands[1]) == CONST_DOUBLE
2247 || GET_CODE (operands[1]) == CONST_VECTOR)
2249 if (alpha_split_const_mov (mode, operands))
2250 return true;
2253 /* Otherwise we've nothing left but to drop the thing to memory. */
2254 tmp = force_const_mem (mode, operands[1]);
2256 if (tmp == NULL_RTX)
2257 return false;
2259 if (reload_in_progress)
2261 emit_move_insn (operands[0], XEXP (tmp, 0));
2262 operands[1] = replace_equiv_address (tmp, operands[0]);
2264 else
2265 operands[1] = validize_mem (tmp);
2266 return false;
2269 /* Expand a non-bwx QImode or HImode move instruction;
2270 return true if all work is done. */
2272 bool
2273 alpha_expand_mov_nobwx (enum machine_mode mode, rtx *operands)
2275 rtx seq;
2277 /* If the output is not a register, the input must be. */
2278 if (MEM_P (operands[0]))
2279 operands[1] = force_reg (mode, operands[1]);
2281 /* Handle four memory cases, unaligned and aligned for either the input
2282 or the output. The only case where we can be called during reload is
2283 for aligned loads; all other cases require temporaries. */
2285 if (any_memory_operand (operands[1], mode))
2287 if (aligned_memory_operand (operands[1], mode))
2289 if (reload_in_progress)
2291 if (mode == QImode)
2292 seq = gen_reload_inqi_aligned (operands[0], operands[1]);
2293 else
2294 seq = gen_reload_inhi_aligned (operands[0], operands[1]);
2295 emit_insn (seq);
2297 else
2299 rtx aligned_mem, bitnum;
2300 rtx scratch = gen_reg_rtx (SImode);
2301 rtx subtarget;
2302 bool copyout;
2304 get_aligned_mem (operands[1], &aligned_mem, &bitnum);
2306 subtarget = operands[0];
2307 if (REG_P (subtarget))
2308 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2309 else
2310 subtarget = gen_reg_rtx (DImode), copyout = true;
2312 if (mode == QImode)
2313 seq = gen_aligned_loadqi (subtarget, aligned_mem,
2314 bitnum, scratch);
2315 else
2316 seq = gen_aligned_loadhi (subtarget, aligned_mem,
2317 bitnum, scratch);
2318 emit_insn (seq);
2320 if (copyout)
2321 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2324 else
2326 /* Don't pass these as parameters since that makes the generated
2327 code depend on parameter evaluation order which will cause
2328 bootstrap failures. */
2330 rtx temp1, temp2, subtarget, ua;
2331 bool copyout;
2333 temp1 = gen_reg_rtx (DImode);
2334 temp2 = gen_reg_rtx (DImode);
2336 subtarget = operands[0];
2337 if (REG_P (subtarget))
2338 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2339 else
2340 subtarget = gen_reg_rtx (DImode), copyout = true;
2342 ua = get_unaligned_address (operands[1]);
2343 if (mode == QImode)
2344 seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2);
2345 else
2346 seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2);
2348 alpha_set_memflags (seq, operands[1]);
2349 emit_insn (seq);
2351 if (copyout)
2352 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2354 return true;
2357 if (any_memory_operand (operands[0], mode))
2359 if (aligned_memory_operand (operands[0], mode))
2361 rtx aligned_mem, bitnum;
2362 rtx temp1 = gen_reg_rtx (SImode);
2363 rtx temp2 = gen_reg_rtx (SImode);
2365 get_aligned_mem (operands[0], &aligned_mem, &bitnum);
2367 emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
2368 temp1, temp2));
2370 else
2372 rtx temp1 = gen_reg_rtx (DImode);
2373 rtx temp2 = gen_reg_rtx (DImode);
2374 rtx temp3 = gen_reg_rtx (DImode);
2375 rtx ua = get_unaligned_address (operands[0]);
2377 if (mode == QImode)
2378 seq = gen_unaligned_storeqi (ua, operands[1], temp1, temp2, temp3);
2379 else
2380 seq = gen_unaligned_storehi (ua, operands[1], temp1, temp2, temp3);
2382 alpha_set_memflags (seq, operands[0]);
2383 emit_insn (seq);
2385 return true;
2388 return false;
2391 /* Implement the movmisalign patterns. One of the operands is a memory
2392 that is not naturally aligned. Emit instructions to load it. */
2394 void
2395 alpha_expand_movmisalign (enum machine_mode mode, rtx *operands)
2397 /* Honor misaligned loads, for those we promised to do so. */
2398 if (MEM_P (operands[1]))
2400 rtx tmp;
2402 if (register_operand (operands[0], mode))
2403 tmp = operands[0];
2404 else
2405 tmp = gen_reg_rtx (mode);
2407 alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
2408 if (tmp != operands[0])
2409 emit_move_insn (operands[0], tmp);
2411 else if (MEM_P (operands[0]))
2413 if (!reg_or_0_operand (operands[1], mode))
2414 operands[1] = force_reg (mode, operands[1]);
2415 alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
2417 else
2418 gcc_unreachable ();
2421 /* Generate an unsigned DImode to FP conversion. This is the same code
2422 optabs would emit if we didn't have TFmode patterns.
2424 For SFmode, this is the only construction I've found that can pass
2425 gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode
2426 intermediates will work, because you'll get intermediate rounding
2427 that ruins the end result. Some of this could be fixed by turning
2428 on round-to-positive-infinity, but that requires diddling the fpsr,
2429 which kills performance. I tried turning this around and converting
2430 to a negative number, so that I could turn on /m, but either I did
2431 it wrong or there's something else cause I wound up with the exact
2432 same single-bit error. There is a branch-less form of this same code:
2434 srl $16,1,$1
2435 and $16,1,$2
2436 cmplt $16,0,$3
2437 or $1,$2,$2
2438 cmovge $16,$16,$2
2439 itoft $3,$f10
2440 itoft $2,$f11
2441 cvtqs $f11,$f11
2442 adds $f11,$f11,$f0
2443 fcmoveq $f10,$f11,$f0
2445 I'm not using it because it's the same number of instructions as
2446 this branch-full form, and it has more serialized long latency
2447 instructions on the critical path.
2449 For DFmode, we can avoid rounding errors by breaking up the word
2450 into two pieces, converting them separately, and adding them back:
2452 LC0: .long 0,0x5f800000
2454 itoft $16,$f11
2455 lda $2,LC0
2456 cmplt $16,0,$1
2457 cpyse $f11,$f31,$f10
2458 cpyse $f31,$f11,$f11
2459 s4addq $1,$2,$1
2460 lds $f12,0($1)
2461 cvtqt $f10,$f10
2462 cvtqt $f11,$f11
2463 addt $f12,$f10,$f0
2464 addt $f0,$f11,$f0
2466 This doesn't seem to be a clear-cut win over the optabs form.
2467 It probably all depends on the distribution of numbers being
2468 converted -- in the optabs form, all but high-bit-set has a
2469 much lower minimum execution time. */
2471 void
2472 alpha_emit_floatuns (rtx operands[2])
2474 rtx neglab, donelab, i0, i1, f0, in, out;
2475 enum machine_mode mode;
2477 out = operands[0];
2478 in = force_reg (DImode, operands[1]);
2479 mode = GET_MODE (out);
2480 neglab = gen_label_rtx ();
2481 donelab = gen_label_rtx ();
2482 i0 = gen_reg_rtx (DImode);
2483 i1 = gen_reg_rtx (DImode);
2484 f0 = gen_reg_rtx (mode);
2486 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
2488 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
2489 emit_jump_insn (gen_jump (donelab));
2490 emit_barrier ();
2492 emit_label (neglab);
2494 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
2495 emit_insn (gen_anddi3 (i1, in, const1_rtx));
2496 emit_insn (gen_iordi3 (i0, i0, i1));
2497 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
2498 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
2500 emit_label (donelab);
2503 /* Generate the comparison for a conditional branch. */
2505 void
2506 alpha_emit_conditional_branch (rtx operands[], enum machine_mode cmp_mode)
2508 enum rtx_code cmp_code, branch_code;
2509 enum machine_mode branch_mode = VOIDmode;
2510 enum rtx_code code = GET_CODE (operands[0]);
2511 rtx op0 = operands[1], op1 = operands[2];
2512 rtx tem;
2514 if (cmp_mode == TFmode)
2516 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2517 op1 = const0_rtx;
2518 cmp_mode = DImode;
2521 /* The general case: fold the comparison code to the types of compares
2522 that we have, choosing the branch as necessary. */
2523 switch (code)
2525 case EQ: case LE: case LT: case LEU: case LTU:
2526 case UNORDERED:
2527 /* We have these compares. */
2528 cmp_code = code, branch_code = NE;
2529 break;
2531 case NE:
2532 case ORDERED:
2533 /* These must be reversed. */
2534 cmp_code = reverse_condition (code), branch_code = EQ;
2535 break;
2537 case GE: case GT: case GEU: case GTU:
2538 /* For FP, we swap them, for INT, we reverse them. */
2539 if (cmp_mode == DFmode)
2541 cmp_code = swap_condition (code);
2542 branch_code = NE;
2543 tem = op0, op0 = op1, op1 = tem;
2545 else
2547 cmp_code = reverse_condition (code);
2548 branch_code = EQ;
2550 break;
2552 default:
2553 gcc_unreachable ();
2556 if (cmp_mode == DFmode)
2558 if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
2560 /* When we are not as concerned about non-finite values, and we
2561 are comparing against zero, we can branch directly. */
2562 if (op1 == CONST0_RTX (DFmode))
2563 cmp_code = UNKNOWN, branch_code = code;
2564 else if (op0 == CONST0_RTX (DFmode))
2566 /* Undo the swap we probably did just above. */
2567 tem = op0, op0 = op1, op1 = tem;
2568 branch_code = swap_condition (cmp_code);
2569 cmp_code = UNKNOWN;
2572 else
2574 /* ??? We mark the branch mode to be CCmode to prevent the
2575 compare and branch from being combined, since the compare
2576 insn follows IEEE rules that the branch does not. */
2577 branch_mode = CCmode;
2580 else
2582 /* The following optimizations are only for signed compares. */
2583 if (code != LEU && code != LTU && code != GEU && code != GTU)
2585 /* Whee. Compare and branch against 0 directly. */
2586 if (op1 == const0_rtx)
2587 cmp_code = UNKNOWN, branch_code = code;
2589 /* If the constants doesn't fit into an immediate, but can
2590 be generated by lda/ldah, we adjust the argument and
2591 compare against zero, so we can use beq/bne directly. */
2592 /* ??? Don't do this when comparing against symbols, otherwise
2593 we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
2594 be declared false out of hand (at least for non-weak). */
2595 else if (CONST_INT_P (op1)
2596 && (code == EQ || code == NE)
2597 && !(symbolic_operand (op0, VOIDmode)
2598 || (REG_P (op0) && REG_POINTER (op0))))
2600 rtx n_op1 = GEN_INT (-INTVAL (op1));
2602 if (! satisfies_constraint_I (op1)
2603 && (satisfies_constraint_K (n_op1)
2604 || satisfies_constraint_L (n_op1)))
2605 cmp_code = PLUS, branch_code = code, op1 = n_op1;
2609 if (!reg_or_0_operand (op0, DImode))
2610 op0 = force_reg (DImode, op0);
2611 if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
2612 op1 = force_reg (DImode, op1);
2615 /* Emit an initial compare instruction, if necessary. */
2616 tem = op0;
2617 if (cmp_code != UNKNOWN)
2619 tem = gen_reg_rtx (cmp_mode);
2620 emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
2623 /* Emit the branch instruction. */
2624 tem = gen_rtx_SET (VOIDmode, pc_rtx,
2625 gen_rtx_IF_THEN_ELSE (VOIDmode,
2626 gen_rtx_fmt_ee (branch_code,
2627 branch_mode, tem,
2628 CONST0_RTX (cmp_mode)),
2629 gen_rtx_LABEL_REF (VOIDmode,
2630 operands[3]),
2631 pc_rtx));
2632 emit_jump_insn (tem);
2635 /* Certain simplifications can be done to make invalid setcc operations
2636 valid. Return the final comparison, or NULL if we can't work. */
2638 bool
2639 alpha_emit_setcc (rtx operands[], enum machine_mode cmp_mode)
2641 enum rtx_code cmp_code;
2642 enum rtx_code code = GET_CODE (operands[1]);
2643 rtx op0 = operands[2], op1 = operands[3];
2644 rtx tmp;
2646 if (cmp_mode == TFmode)
2648 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2649 op1 = const0_rtx;
2650 cmp_mode = DImode;
2653 if (cmp_mode == DFmode && !TARGET_FIX)
2654 return 0;
2656 /* The general case: fold the comparison code to the types of compares
2657 that we have, choosing the branch as necessary. */
2659 cmp_code = UNKNOWN;
2660 switch (code)
2662 case EQ: case LE: case LT: case LEU: case LTU:
2663 case UNORDERED:
2664 /* We have these compares. */
2665 if (cmp_mode == DFmode)
2666 cmp_code = code, code = NE;
2667 break;
2669 case NE:
2670 if (cmp_mode == DImode && op1 == const0_rtx)
2671 break;
2672 /* FALLTHRU */
2674 case ORDERED:
2675 cmp_code = reverse_condition (code);
2676 code = EQ;
2677 break;
2679 case GE: case GT: case GEU: case GTU:
2680 /* These normally need swapping, but for integer zero we have
2681 special patterns that recognize swapped operands. */
2682 if (cmp_mode == DImode && op1 == const0_rtx)
2683 break;
2684 code = swap_condition (code);
2685 if (cmp_mode == DFmode)
2686 cmp_code = code, code = NE;
2687 tmp = op0, op0 = op1, op1 = tmp;
2688 break;
2690 default:
2691 gcc_unreachable ();
2694 if (cmp_mode == DImode)
2696 if (!register_operand (op0, DImode))
2697 op0 = force_reg (DImode, op0);
2698 if (!reg_or_8bit_operand (op1, DImode))
2699 op1 = force_reg (DImode, op1);
2702 /* Emit an initial compare instruction, if necessary. */
2703 if (cmp_code != UNKNOWN)
2705 tmp = gen_reg_rtx (cmp_mode);
2706 emit_insn (gen_rtx_SET (VOIDmode, tmp,
2707 gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1)));
2709 op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp;
2710 op1 = const0_rtx;
2713 /* Emit the setcc instruction. */
2714 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2715 gen_rtx_fmt_ee (code, DImode, op0, op1)));
2716 return true;
2720 /* Rewrite a comparison against zero CMP of the form
2721 (CODE (cc0) (const_int 0)) so it can be written validly in
2722 a conditional move (if_then_else CMP ...).
2723 If both of the operands that set cc0 are nonzero we must emit
2724 an insn to perform the compare (it can't be done within
2725 the conditional move). */
2728 alpha_emit_conditional_move (rtx cmp, enum machine_mode mode)
2730 enum rtx_code code = GET_CODE (cmp);
2731 enum rtx_code cmov_code = NE;
2732 rtx op0 = XEXP (cmp, 0);
2733 rtx op1 = XEXP (cmp, 1);
2734 enum machine_mode cmp_mode
2735 = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
2736 enum machine_mode cmov_mode = VOIDmode;
2737 int local_fast_math = flag_unsafe_math_optimizations;
2738 rtx tem;
2740 if (cmp_mode == TFmode)
2742 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2743 op1 = const0_rtx;
2744 cmp_mode = DImode;
2747 gcc_assert (cmp_mode == DFmode || cmp_mode == DImode);
2749 if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode))
2751 enum rtx_code cmp_code;
2753 if (! TARGET_FIX)
2754 return 0;
2756 /* If we have fp<->int register move instructions, do a cmov by
2757 performing the comparison in fp registers, and move the
2758 zero/nonzero value to integer registers, where we can then
2759 use a normal cmov, or vice-versa. */
2761 switch (code)
2763 case EQ: case LE: case LT: case LEU: case LTU:
2764 case UNORDERED:
2765 /* We have these compares. */
2766 cmp_code = code, code = NE;
2767 break;
2769 case NE:
2770 case ORDERED:
2771 /* These must be reversed. */
2772 cmp_code = reverse_condition (code), code = EQ;
2773 break;
2775 case GE: case GT: case GEU: case GTU:
2776 /* These normally need swapping, but for integer zero we have
2777 special patterns that recognize swapped operands. */
2778 if (cmp_mode == DImode && op1 == const0_rtx)
2779 cmp_code = code, code = NE;
2780 else
2782 cmp_code = swap_condition (code);
2783 code = NE;
2784 tem = op0, op0 = op1, op1 = tem;
2786 break;
2788 default:
2789 gcc_unreachable ();
2792 if (cmp_mode == DImode)
2794 if (!reg_or_0_operand (op0, DImode))
2795 op0 = force_reg (DImode, op0);
2796 if (!reg_or_8bit_operand (op1, DImode))
2797 op1 = force_reg (DImode, op1);
2800 tem = gen_reg_rtx (cmp_mode);
2801 emit_insn (gen_rtx_SET (VOIDmode, tem,
2802 gen_rtx_fmt_ee (cmp_code, cmp_mode,
2803 op0, op1)));
2805 cmp_mode = cmp_mode == DImode ? DFmode : DImode;
2806 op0 = gen_lowpart (cmp_mode, tem);
2807 op1 = CONST0_RTX (cmp_mode);
2808 cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2809 local_fast_math = 1;
2812 if (cmp_mode == DImode)
2814 if (!reg_or_0_operand (op0, DImode))
2815 op0 = force_reg (DImode, op0);
2816 if (!reg_or_8bit_operand (op1, DImode))
2817 op1 = force_reg (DImode, op1);
2820 /* We may be able to use a conditional move directly.
2821 This avoids emitting spurious compares. */
2822 if (signed_comparison_operator (cmp, VOIDmode)
2823 && (cmp_mode == DImode || local_fast_math)
2824 && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
2825 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2827 /* We can't put the comparison inside the conditional move;
2828 emit a compare instruction and put that inside the
2829 conditional move. Make sure we emit only comparisons we have;
2830 swap or reverse as necessary. */
2832 if (!can_create_pseudo_p ())
2833 return NULL_RTX;
2835 switch (code)
2837 case EQ: case LE: case LT: case LEU: case LTU:
2838 case UNORDERED:
2839 /* We have these compares: */
2840 break;
2842 case NE:
2843 case ORDERED:
2844 /* These must be reversed. */
2845 code = reverse_condition (code);
2846 cmov_code = EQ;
2847 break;
2849 case GE: case GT: case GEU: case GTU:
2850 /* These normally need swapping, but for integer zero we have
2851 special patterns that recognize swapped operands. */
2852 if (cmp_mode == DImode && op1 == const0_rtx)
2853 break;
2854 code = swap_condition (code);
2855 tem = op0, op0 = op1, op1 = tem;
2856 break;
2858 default:
2859 gcc_unreachable ();
2862 if (cmp_mode == DImode)
2864 if (!reg_or_0_operand (op0, DImode))
2865 op0 = force_reg (DImode, op0);
2866 if (!reg_or_8bit_operand (op1, DImode))
2867 op1 = force_reg (DImode, op1);
2870 /* ??? We mark the branch mode to be CCmode to prevent the compare
2871 and cmov from being combined, since the compare insn follows IEEE
2872 rules that the cmov does not. */
2873 if (cmp_mode == DFmode && !local_fast_math)
2874 cmov_mode = CCmode;
2876 tem = gen_reg_rtx (cmp_mode);
2877 emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1));
2878 return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode));
2881 /* Simplify a conditional move of two constants into a setcc with
2882 arithmetic. This is done with a splitter since combine would
2883 just undo the work if done during code generation. It also catches
2884 cases we wouldn't have before cse. */
2887 alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond,
2888 rtx t_rtx, rtx f_rtx)
2890 HOST_WIDE_INT t, f, diff;
2891 enum machine_mode mode;
2892 rtx target, subtarget, tmp;
2894 mode = GET_MODE (dest);
2895 t = INTVAL (t_rtx);
2896 f = INTVAL (f_rtx);
2897 diff = t - f;
2899 if (((code == NE || code == EQ) && diff < 0)
2900 || (code == GE || code == GT))
2902 code = reverse_condition (code);
2903 diff = t, t = f, f = diff;
2904 diff = t - f;
2907 subtarget = target = dest;
2908 if (mode != DImode)
2910 target = gen_lowpart (DImode, dest);
2911 if (can_create_pseudo_p ())
2912 subtarget = gen_reg_rtx (DImode);
2913 else
2914 subtarget = target;
2916 /* Below, we must be careful to use copy_rtx on target and subtarget
2917 in intermediate insns, as they may be a subreg rtx, which may not
2918 be shared. */
2920 if (f == 0 && exact_log2 (diff) > 0
2921 /* On EV6, we've got enough shifters to make non-arithmetic shifts
2922 viable over a longer latency cmove. On EV5, the E0 slot is a
2923 scarce resource, and on EV4 shift has the same latency as a cmove. */
2924 && (diff <= 8 || alpha_tune == PROCESSOR_EV6))
2926 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2927 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2929 tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2930 GEN_INT (exact_log2 (t)));
2931 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2933 else if (f == 0 && t == -1)
2935 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2936 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2938 emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
2940 else if (diff == 1 || diff == 4 || diff == 8)
2942 rtx add_op;
2944 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2945 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp));
2947 if (diff == 1)
2948 emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
2949 else
2951 add_op = GEN_INT (f);
2952 if (sext_add_operand (add_op, mode))
2954 tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget),
2955 GEN_INT (diff));
2956 tmp = gen_rtx_PLUS (DImode, tmp, add_op);
2957 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
2959 else
2960 return 0;
2963 else
2964 return 0;
2966 return 1;
2969 /* Look up the function X_floating library function name for the
2970 given operation. */
2972 struct GTY(()) xfloating_op
2974 const enum rtx_code code;
2975 const char *const GTY((skip)) osf_func;
2976 const char *const GTY((skip)) vms_func;
2977 rtx libcall;
2980 static GTY(()) struct xfloating_op xfloating_ops[] =
2982 { PLUS, "_OtsAddX", "OTS$ADD_X", 0 },
2983 { MINUS, "_OtsSubX", "OTS$SUB_X", 0 },
2984 { MULT, "_OtsMulX", "OTS$MUL_X", 0 },
2985 { DIV, "_OtsDivX", "OTS$DIV_X", 0 },
2986 { EQ, "_OtsEqlX", "OTS$EQL_X", 0 },
2987 { NE, "_OtsNeqX", "OTS$NEQ_X", 0 },
2988 { LT, "_OtsLssX", "OTS$LSS_X", 0 },
2989 { LE, "_OtsLeqX", "OTS$LEQ_X", 0 },
2990 { GT, "_OtsGtrX", "OTS$GTR_X", 0 },
2991 { GE, "_OtsGeqX", "OTS$GEQ_X", 0 },
2992 { FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0 },
2993 { FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0 },
2994 { UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0 },
2995 { FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 },
2996 { FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 }
2999 static GTY(()) struct xfloating_op vax_cvt_ops[] =
3001 { FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 },
3002 { FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 }
3005 static rtx
3006 alpha_lookup_xfloating_lib_func (enum rtx_code code)
3008 struct xfloating_op *ops = xfloating_ops;
3009 long n = ARRAY_SIZE (xfloating_ops);
3010 long i;
3012 gcc_assert (TARGET_HAS_XFLOATING_LIBS);
3014 /* How irritating. Nothing to key off for the main table. */
3015 if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
3017 ops = vax_cvt_ops;
3018 n = ARRAY_SIZE (vax_cvt_ops);
3021 for (i = 0; i < n; ++i, ++ops)
3022 if (ops->code == code)
3024 rtx func = ops->libcall;
3025 if (!func)
3027 func = init_one_libfunc (TARGET_ABI_OPEN_VMS
3028 ? ops->vms_func : ops->osf_func);
3029 ops->libcall = func;
3031 return func;
3034 gcc_unreachable ();
3037 /* Most X_floating operations take the rounding mode as an argument.
3038 Compute that here. */
3040 static int
3041 alpha_compute_xfloating_mode_arg (enum rtx_code code,
3042 enum alpha_fp_rounding_mode round)
3044 int mode;
3046 switch (round)
3048 case ALPHA_FPRM_NORM:
3049 mode = 2;
3050 break;
3051 case ALPHA_FPRM_MINF:
3052 mode = 1;
3053 break;
3054 case ALPHA_FPRM_CHOP:
3055 mode = 0;
3056 break;
3057 case ALPHA_FPRM_DYN:
3058 mode = 4;
3059 break;
3060 default:
3061 gcc_unreachable ();
3063 /* XXX For reference, round to +inf is mode = 3. */
3066 if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
3067 mode |= 0x10000;
3069 return mode;
3072 /* Emit an X_floating library function call.
3074 Note that these functions do not follow normal calling conventions:
3075 TFmode arguments are passed in two integer registers (as opposed to
3076 indirect); TFmode return values appear in R16+R17.
3078 FUNC is the function to call.
3079 TARGET is where the output belongs.
3080 OPERANDS are the inputs.
3081 NOPERANDS is the count of inputs.
3082 EQUIV is the expression equivalent for the function.
3085 static void
3086 alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
3087 int noperands, rtx equiv)
3089 rtx usage = NULL_RTX, tmp, reg;
3090 int regno = 16, i;
3092 start_sequence ();
3094 for (i = 0; i < noperands; ++i)
3096 switch (GET_MODE (operands[i]))
3098 case TFmode:
3099 reg = gen_rtx_REG (TFmode, regno);
3100 regno += 2;
3101 break;
3103 case DFmode:
3104 reg = gen_rtx_REG (DFmode, regno + 32);
3105 regno += 1;
3106 break;
3108 case VOIDmode:
3109 gcc_assert (CONST_INT_P (operands[i]));
3110 /* FALLTHRU */
3111 case DImode:
3112 reg = gen_rtx_REG (DImode, regno);
3113 regno += 1;
3114 break;
3116 default:
3117 gcc_unreachable ();
3120 emit_move_insn (reg, operands[i]);
3121 use_reg (&usage, reg);
3124 switch (GET_MODE (target))
3126 case TFmode:
3127 reg = gen_rtx_REG (TFmode, 16);
3128 break;
3129 case DFmode:
3130 reg = gen_rtx_REG (DFmode, 32);
3131 break;
3132 case DImode:
3133 reg = gen_rtx_REG (DImode, 0);
3134 break;
3135 default:
3136 gcc_unreachable ();
3139 tmp = gen_rtx_MEM (QImode, func);
3140 tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx,
3141 const0_rtx, const0_rtx));
3142 CALL_INSN_FUNCTION_USAGE (tmp) = usage;
3143 RTL_CONST_CALL_P (tmp) = 1;
3145 tmp = get_insns ();
3146 end_sequence ();
3148 emit_libcall_block (tmp, target, reg, equiv);
3151 /* Emit an X_floating library function call for arithmetic (+,-,*,/). */
3153 void
3154 alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[])
3156 rtx func;
3157 int mode;
3158 rtx out_operands[3];
3160 func = alpha_lookup_xfloating_lib_func (code);
3161 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3163 out_operands[0] = operands[1];
3164 out_operands[1] = operands[2];
3165 out_operands[2] = GEN_INT (mode);
3166 alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
3167 gen_rtx_fmt_ee (code, TFmode, operands[1],
3168 operands[2]));
3171 /* Emit an X_floating library function call for a comparison. */
3173 static rtx
3174 alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
3176 enum rtx_code cmp_code, res_code;
3177 rtx func, out, operands[2], note;
3179 /* X_floating library comparison functions return
3180 -1 unordered
3181 0 false
3182 1 true
3183 Convert the compare against the raw return value. */
3185 cmp_code = *pcode;
3186 switch (cmp_code)
3188 case UNORDERED:
3189 cmp_code = EQ;
3190 res_code = LT;
3191 break;
3192 case ORDERED:
3193 cmp_code = EQ;
3194 res_code = GE;
3195 break;
3196 case NE:
3197 res_code = NE;
3198 break;
3199 case EQ:
3200 case LT:
3201 case GT:
3202 case LE:
3203 case GE:
3204 res_code = GT;
3205 break;
3206 default:
3207 gcc_unreachable ();
3209 *pcode = res_code;
3211 func = alpha_lookup_xfloating_lib_func (cmp_code);
3213 operands[0] = op0;
3214 operands[1] = op1;
3215 out = gen_reg_rtx (DImode);
3217 /* What's actually returned is -1,0,1, not a proper boolean value. */
3218 note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1);
3219 note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE);
3220 alpha_emit_xfloating_libcall (func, out, operands, 2, note);
3222 return out;
3225 /* Emit an X_floating library function call for a conversion. */
3227 void
3228 alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
3230 int noperands = 1, mode;
3231 rtx out_operands[2];
3232 rtx func;
3233 enum rtx_code code = orig_code;
3235 if (code == UNSIGNED_FIX)
3236 code = FIX;
3238 func = alpha_lookup_xfloating_lib_func (code);
3240 out_operands[0] = operands[1];
3242 switch (code)
3244 case FIX:
3245 mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
3246 out_operands[1] = GEN_INT (mode);
3247 noperands = 2;
3248 break;
3249 case FLOAT_TRUNCATE:
3250 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3251 out_operands[1] = GEN_INT (mode);
3252 noperands = 2;
3253 break;
3254 default:
3255 break;
3258 alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
3259 gen_rtx_fmt_e (orig_code,
3260 GET_MODE (operands[0]),
3261 operands[1]));
3264 /* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of
3265 DImode moves from OP[2,3] to OP[0,1]. If FIXUP_OVERLAP is true,
3266 guarantee that the sequence
3267 set (OP[0] OP[2])
3268 set (OP[1] OP[3])
3269 is valid. Naturally, output operand ordering is little-endian.
3270 This is used by *movtf_internal and *movti_internal. */
3272 void
3273 alpha_split_tmode_pair (rtx operands[4], enum machine_mode mode,
3274 bool fixup_overlap)
3276 switch (GET_CODE (operands[1]))
3278 case REG:
3279 operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
3280 operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
3281 break;
3283 case MEM:
3284 operands[3] = adjust_address (operands[1], DImode, 8);
3285 operands[2] = adjust_address (operands[1], DImode, 0);
3286 break;
3288 case CONST_INT:
3289 case CONST_DOUBLE:
3290 gcc_assert (operands[1] == CONST0_RTX (mode));
3291 operands[2] = operands[3] = const0_rtx;
3292 break;
3294 default:
3295 gcc_unreachable ();
3298 switch (GET_CODE (operands[0]))
3300 case REG:
3301 operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
3302 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
3303 break;
3305 case MEM:
3306 operands[1] = adjust_address (operands[0], DImode, 8);
3307 operands[0] = adjust_address (operands[0], DImode, 0);
3308 break;
3310 default:
3311 gcc_unreachable ();
3314 if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3]))
3316 rtx tmp;
3317 tmp = operands[0], operands[0] = operands[1], operands[1] = tmp;
3318 tmp = operands[2], operands[2] = operands[3], operands[3] = tmp;
3322 /* Implement negtf2 or abstf2. Op0 is destination, op1 is source,
3323 op2 is a register containing the sign bit, operation is the
3324 logical operation to be performed. */
3326 void
3327 alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx))
3329 rtx high_bit = operands[2];
3330 rtx scratch;
3331 int move;
3333 alpha_split_tmode_pair (operands, TFmode, false);
3335 /* Detect three flavors of operand overlap. */
3336 move = 1;
3337 if (rtx_equal_p (operands[0], operands[2]))
3338 move = 0;
3339 else if (rtx_equal_p (operands[1], operands[2]))
3341 if (rtx_equal_p (operands[0], high_bit))
3342 move = 2;
3343 else
3344 move = -1;
3347 if (move < 0)
3348 emit_move_insn (operands[0], operands[2]);
3350 /* ??? If the destination overlaps both source tf and high_bit, then
3351 assume source tf is dead in its entirety and use the other half
3352 for a scratch register. Otherwise "scratch" is just the proper
3353 destination register. */
3354 scratch = operands[move < 2 ? 1 : 3];
3356 emit_insn ((*operation) (scratch, high_bit, operands[3]));
3358 if (move > 0)
3360 emit_move_insn (operands[0], operands[2]);
3361 if (move > 1)
3362 emit_move_insn (operands[1], scratch);
3366 /* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
3367 unaligned data:
3369 unsigned: signed:
3370 word: ldq_u r1,X(r11) ldq_u r1,X(r11)
3371 ldq_u r2,X+1(r11) ldq_u r2,X+1(r11)
3372 lda r3,X(r11) lda r3,X+2(r11)
3373 extwl r1,r3,r1 extql r1,r3,r1
3374 extwh r2,r3,r2 extqh r2,r3,r2
3375 or r1.r2.r1 or r1,r2,r1
3376 sra r1,48,r1
3378 long: ldq_u r1,X(r11) ldq_u r1,X(r11)
3379 ldq_u r2,X+3(r11) ldq_u r2,X+3(r11)
3380 lda r3,X(r11) lda r3,X(r11)
3381 extll r1,r3,r1 extll r1,r3,r1
3382 extlh r2,r3,r2 extlh r2,r3,r2
3383 or r1.r2.r1 addl r1,r2,r1
3385 quad: ldq_u r1,X(r11)
3386 ldq_u r2,X+7(r11)
3387 lda r3,X(r11)
3388 extql r1,r3,r1
3389 extqh r2,r3,r2
3390 or r1.r2.r1
3393 void
3394 alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size,
3395 HOST_WIDE_INT ofs, int sign)
3397 rtx meml, memh, addr, extl, exth, tmp, mema;
3398 enum machine_mode mode;
3400 if (TARGET_BWX && size == 2)
3402 meml = adjust_address (mem, QImode, ofs);
3403 memh = adjust_address (mem, QImode, ofs+1);
3404 extl = gen_reg_rtx (DImode);
3405 exth = gen_reg_rtx (DImode);
3406 emit_insn (gen_zero_extendqidi2 (extl, meml));
3407 emit_insn (gen_zero_extendqidi2 (exth, memh));
3408 exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8),
3409 NULL, 1, OPTAB_LIB_WIDEN);
3410 addr = expand_simple_binop (DImode, IOR, extl, exth,
3411 NULL, 1, OPTAB_LIB_WIDEN);
3413 if (sign && GET_MODE (tgt) != HImode)
3415 addr = gen_lowpart (HImode, addr);
3416 emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0));
3418 else
3420 if (GET_MODE (tgt) != DImode)
3421 addr = gen_lowpart (GET_MODE (tgt), addr);
3422 emit_move_insn (tgt, addr);
3424 return;
3427 meml = gen_reg_rtx (DImode);
3428 memh = gen_reg_rtx (DImode);
3429 addr = gen_reg_rtx (DImode);
3430 extl = gen_reg_rtx (DImode);
3431 exth = gen_reg_rtx (DImode);
3433 mema = XEXP (mem, 0);
3434 if (GET_CODE (mema) == LO_SUM)
3435 mema = force_reg (Pmode, mema);
3437 /* AND addresses cannot be in any alias set, since they may implicitly
3438 alias surrounding code. Ideally we'd have some alias set that
3439 covered all types except those with alignment 8 or higher. */
3441 tmp = change_address (mem, DImode,
3442 gen_rtx_AND (DImode,
3443 plus_constant (DImode, mema, ofs),
3444 GEN_INT (-8)));
3445 set_mem_alias_set (tmp, 0);
3446 emit_move_insn (meml, tmp);
3448 tmp = change_address (mem, DImode,
3449 gen_rtx_AND (DImode,
3450 plus_constant (DImode, mema,
3451 ofs + size - 1),
3452 GEN_INT (-8)));
3453 set_mem_alias_set (tmp, 0);
3454 emit_move_insn (memh, tmp);
3456 if (sign && size == 2)
3458 emit_move_insn (addr, plus_constant (Pmode, mema, ofs+2));
3460 emit_insn (gen_extql (extl, meml, addr));
3461 emit_insn (gen_extqh (exth, memh, addr));
3463 /* We must use tgt here for the target. Alpha-vms port fails if we use
3464 addr for the target, because addr is marked as a pointer and combine
3465 knows that pointers are always sign-extended 32-bit values. */
3466 addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
3467 addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
3468 addr, 1, OPTAB_WIDEN);
3470 else
3472 emit_move_insn (addr, plus_constant (Pmode, mema, ofs));
3473 emit_insn (gen_extxl (extl, meml, GEN_INT (size*8), addr));
3474 switch ((int) size)
3476 case 2:
3477 emit_insn (gen_extwh (exth, memh, addr));
3478 mode = HImode;
3479 break;
3480 case 4:
3481 emit_insn (gen_extlh (exth, memh, addr));
3482 mode = SImode;
3483 break;
3484 case 8:
3485 emit_insn (gen_extqh (exth, memh, addr));
3486 mode = DImode;
3487 break;
3488 default:
3489 gcc_unreachable ();
3492 addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
3493 gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
3494 sign, OPTAB_WIDEN);
3497 if (addr != tgt)
3498 emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr));
3501 /* Similarly, use ins and msk instructions to perform unaligned stores. */
3503 void
3504 alpha_expand_unaligned_store (rtx dst, rtx src,
3505 HOST_WIDE_INT size, HOST_WIDE_INT ofs)
3507 rtx dstl, dsth, addr, insl, insh, meml, memh, dsta;
3509 if (TARGET_BWX && size == 2)
3511 if (src != const0_rtx)
3513 dstl = gen_lowpart (QImode, src);
3514 dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8),
3515 NULL, 1, OPTAB_LIB_WIDEN);
3516 dsth = gen_lowpart (QImode, dsth);
3518 else
3519 dstl = dsth = const0_rtx;
3521 meml = adjust_address (dst, QImode, ofs);
3522 memh = adjust_address (dst, QImode, ofs+1);
3524 emit_move_insn (meml, dstl);
3525 emit_move_insn (memh, dsth);
3526 return;
3529 dstl = gen_reg_rtx (DImode);
3530 dsth = gen_reg_rtx (DImode);
3531 insl = gen_reg_rtx (DImode);
3532 insh = gen_reg_rtx (DImode);
3534 dsta = XEXP (dst, 0);
3535 if (GET_CODE (dsta) == LO_SUM)
3536 dsta = force_reg (Pmode, dsta);
3538 /* AND addresses cannot be in any alias set, since they may implicitly
3539 alias surrounding code. Ideally we'd have some alias set that
3540 covered all types except those with alignment 8 or higher. */
3542 meml = change_address (dst, DImode,
3543 gen_rtx_AND (DImode,
3544 plus_constant (DImode, dsta, ofs),
3545 GEN_INT (-8)));
3546 set_mem_alias_set (meml, 0);
3548 memh = change_address (dst, DImode,
3549 gen_rtx_AND (DImode,
3550 plus_constant (DImode, dsta,
3551 ofs + size - 1),
3552 GEN_INT (-8)));
3553 set_mem_alias_set (memh, 0);
3555 emit_move_insn (dsth, memh);
3556 emit_move_insn (dstl, meml);
3558 addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs));
3560 if (src != CONST0_RTX (GET_MODE (src)))
3562 emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
3563 GEN_INT (size*8), addr));
3565 switch ((int) size)
3567 case 2:
3568 emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr));
3569 break;
3570 case 4:
3571 emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr));
3572 break;
3573 case 8:
3574 emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr));
3575 break;
3576 default:
3577 gcc_unreachable ();
3581 emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr));
3583 switch ((int) size)
3585 case 2:
3586 emit_insn (gen_mskwl (dstl, dstl, addr));
3587 break;
3588 case 4:
3589 emit_insn (gen_mskll (dstl, dstl, addr));
3590 break;
3591 case 8:
3592 emit_insn (gen_mskql (dstl, dstl, addr));
3593 break;
3594 default:
3595 gcc_unreachable ();
3598 if (src != CONST0_RTX (GET_MODE (src)))
3600 dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
3601 dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
3604 /* Must store high before low for degenerate case of aligned. */
3605 emit_move_insn (memh, dsth);
3606 emit_move_insn (meml, dstl);
3609 /* The block move code tries to maximize speed by separating loads and
3610 stores at the expense of register pressure: we load all of the data
3611 before we store it back out. There are two secondary effects worth
3612 mentioning, that this speeds copying to/from aligned and unaligned
3613 buffers, and that it makes the code significantly easier to write. */
3615 #define MAX_MOVE_WORDS 8
3617 /* Load an integral number of consecutive unaligned quadwords. */
3619 static void
3620 alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
3621 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3623 rtx const im8 = GEN_INT (-8);
3624 rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1];
3625 rtx sreg, areg, tmp, smema;
3626 HOST_WIDE_INT i;
3628 smema = XEXP (smem, 0);
3629 if (GET_CODE (smema) == LO_SUM)
3630 smema = force_reg (Pmode, smema);
3632 /* Generate all the tmp registers we need. */
3633 for (i = 0; i < words; ++i)
3635 data_regs[i] = out_regs[i];
3636 ext_tmps[i] = gen_reg_rtx (DImode);
3638 data_regs[words] = gen_reg_rtx (DImode);
3640 if (ofs != 0)
3641 smem = adjust_address (smem, GET_MODE (smem), ofs);
3643 /* Load up all of the source data. */
3644 for (i = 0; i < words; ++i)
3646 tmp = change_address (smem, DImode,
3647 gen_rtx_AND (DImode,
3648 plus_constant (DImode, smema, 8*i),
3649 im8));
3650 set_mem_alias_set (tmp, 0);
3651 emit_move_insn (data_regs[i], tmp);
3654 tmp = change_address (smem, DImode,
3655 gen_rtx_AND (DImode,
3656 plus_constant (DImode, smema,
3657 8*words - 1),
3658 im8));
3659 set_mem_alias_set (tmp, 0);
3660 emit_move_insn (data_regs[words], tmp);
3662 /* Extract the half-word fragments. Unfortunately DEC decided to make
3663 extxh with offset zero a noop instead of zeroing the register, so
3664 we must take care of that edge condition ourselves with cmov. */
3666 sreg = copy_addr_to_reg (smema);
3667 areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL,
3668 1, OPTAB_WIDEN);
3669 for (i = 0; i < words; ++i)
3671 emit_insn (gen_extql (data_regs[i], data_regs[i], sreg));
3672 emit_insn (gen_extqh (ext_tmps[i], data_regs[i+1], sreg));
3673 emit_insn (gen_rtx_SET (VOIDmode, ext_tmps[i],
3674 gen_rtx_IF_THEN_ELSE (DImode,
3675 gen_rtx_EQ (DImode, areg,
3676 const0_rtx),
3677 const0_rtx, ext_tmps[i])));
3680 /* Merge the half-words into whole words. */
3681 for (i = 0; i < words; ++i)
3683 out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i],
3684 ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN);
3688 /* Store an integral number of consecutive unaligned quadwords. DATA_REGS
3689 may be NULL to store zeros. */
3691 static void
3692 alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
3693 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3695 rtx const im8 = GEN_INT (-8);
3696 rtx ins_tmps[MAX_MOVE_WORDS];
3697 rtx st_tmp_1, st_tmp_2, dreg;
3698 rtx st_addr_1, st_addr_2, dmema;
3699 HOST_WIDE_INT i;
3701 dmema = XEXP (dmem, 0);
3702 if (GET_CODE (dmema) == LO_SUM)
3703 dmema = force_reg (Pmode, dmema);
3705 /* Generate all the tmp registers we need. */
3706 if (data_regs != NULL)
3707 for (i = 0; i < words; ++i)
3708 ins_tmps[i] = gen_reg_rtx(DImode);
3709 st_tmp_1 = gen_reg_rtx(DImode);
3710 st_tmp_2 = gen_reg_rtx(DImode);
3712 if (ofs != 0)
3713 dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
3715 st_addr_2 = change_address (dmem, DImode,
3716 gen_rtx_AND (DImode,
3717 plus_constant (DImode, dmema,
3718 words*8 - 1),
3719 im8));
3720 set_mem_alias_set (st_addr_2, 0);
3722 st_addr_1 = change_address (dmem, DImode,
3723 gen_rtx_AND (DImode, dmema, im8));
3724 set_mem_alias_set (st_addr_1, 0);
3726 /* Load up the destination end bits. */
3727 emit_move_insn (st_tmp_2, st_addr_2);
3728 emit_move_insn (st_tmp_1, st_addr_1);
3730 /* Shift the input data into place. */
3731 dreg = copy_addr_to_reg (dmema);
3732 if (data_regs != NULL)
3734 for (i = words-1; i >= 0; --i)
3736 emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg));
3737 emit_insn (gen_insql (data_regs[i], data_regs[i], dreg));
3739 for (i = words-1; i > 0; --i)
3741 ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i],
3742 ins_tmps[i-1], ins_tmps[i-1], 1,
3743 OPTAB_WIDEN);
3747 /* Split and merge the ends with the destination data. */
3748 emit_insn (gen_mskqh (st_tmp_2, st_tmp_2, dreg));
3749 emit_insn (gen_mskql (st_tmp_1, st_tmp_1, dreg));
3751 if (data_regs != NULL)
3753 st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1],
3754 st_tmp_2, 1, OPTAB_WIDEN);
3755 st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
3756 st_tmp_1, 1, OPTAB_WIDEN);
3759 /* Store it all. */
3760 emit_move_insn (st_addr_2, st_tmp_2);
3761 for (i = words-1; i > 0; --i)
3763 rtx tmp = change_address (dmem, DImode,
3764 gen_rtx_AND (DImode,
3765 plus_constant (DImode,
3766 dmema, i*8),
3767 im8));
3768 set_mem_alias_set (tmp, 0);
3769 emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx);
3771 emit_move_insn (st_addr_1, st_tmp_1);
3775 /* Expand string/block move operations.
3777 operands[0] is the pointer to the destination.
3778 operands[1] is the pointer to the source.
3779 operands[2] is the number of bytes to move.
3780 operands[3] is the alignment. */
3783 alpha_expand_block_move (rtx operands[])
3785 rtx bytes_rtx = operands[2];
3786 rtx align_rtx = operands[3];
3787 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
3788 HOST_WIDE_INT bytes = orig_bytes;
3789 HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
3790 HOST_WIDE_INT dst_align = src_align;
3791 rtx orig_src = operands[1];
3792 rtx orig_dst = operands[0];
3793 rtx data_regs[2 * MAX_MOVE_WORDS + 16];
3794 rtx tmp;
3795 unsigned int i, words, ofs, nregs = 0;
3797 if (orig_bytes <= 0)
3798 return 1;
3799 else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
3800 return 0;
3802 /* Look for additional alignment information from recorded register info. */
3804 tmp = XEXP (orig_src, 0);
3805 if (REG_P (tmp))
3806 src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3807 else if (GET_CODE (tmp) == PLUS
3808 && REG_P (XEXP (tmp, 0))
3809 && CONST_INT_P (XEXP (tmp, 1)))
3811 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3812 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3814 if (a > src_align)
3816 if (a >= 64 && c % 8 == 0)
3817 src_align = 64;
3818 else if (a >= 32 && c % 4 == 0)
3819 src_align = 32;
3820 else if (a >= 16 && c % 2 == 0)
3821 src_align = 16;
3825 tmp = XEXP (orig_dst, 0);
3826 if (REG_P (tmp))
3827 dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3828 else if (GET_CODE (tmp) == PLUS
3829 && REG_P (XEXP (tmp, 0))
3830 && CONST_INT_P (XEXP (tmp, 1)))
3832 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3833 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3835 if (a > dst_align)
3837 if (a >= 64 && c % 8 == 0)
3838 dst_align = 64;
3839 else if (a >= 32 && c % 4 == 0)
3840 dst_align = 32;
3841 else if (a >= 16 && c % 2 == 0)
3842 dst_align = 16;
3846 ofs = 0;
3847 if (src_align >= 64 && bytes >= 8)
3849 words = bytes / 8;
3851 for (i = 0; i < words; ++i)
3852 data_regs[nregs + i] = gen_reg_rtx (DImode);
3854 for (i = 0; i < words; ++i)
3855 emit_move_insn (data_regs[nregs + i],
3856 adjust_address (orig_src, DImode, ofs + i * 8));
3858 nregs += words;
3859 bytes -= words * 8;
3860 ofs += words * 8;
3863 if (src_align >= 32 && bytes >= 4)
3865 words = bytes / 4;
3867 for (i = 0; i < words; ++i)
3868 data_regs[nregs + i] = gen_reg_rtx (SImode);
3870 for (i = 0; i < words; ++i)
3871 emit_move_insn (data_regs[nregs + i],
3872 adjust_address (orig_src, SImode, ofs + i * 4));
3874 nregs += words;
3875 bytes -= words * 4;
3876 ofs += words * 4;
3879 if (bytes >= 8)
3881 words = bytes / 8;
3883 for (i = 0; i < words+1; ++i)
3884 data_regs[nregs + i] = gen_reg_rtx (DImode);
3886 alpha_expand_unaligned_load_words (data_regs + nregs, orig_src,
3887 words, ofs);
3889 nregs += words;
3890 bytes -= words * 8;
3891 ofs += words * 8;
3894 if (! TARGET_BWX && bytes >= 4)
3896 data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
3897 alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
3898 bytes -= 4;
3899 ofs += 4;
3902 if (bytes >= 2)
3904 if (src_align >= 16)
3906 do {
3907 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3908 emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
3909 bytes -= 2;
3910 ofs += 2;
3911 } while (bytes >= 2);
3913 else if (! TARGET_BWX)
3915 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3916 alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
3917 bytes -= 2;
3918 ofs += 2;
3922 while (bytes > 0)
3924 data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
3925 emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs));
3926 bytes -= 1;
3927 ofs += 1;
3930 gcc_assert (nregs <= ARRAY_SIZE (data_regs));
3932 /* Now save it back out again. */
3934 i = 0, ofs = 0;
3936 /* Write out the data in whatever chunks reading the source allowed. */
3937 if (dst_align >= 64)
3939 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3941 emit_move_insn (adjust_address (orig_dst, DImode, ofs),
3942 data_regs[i]);
3943 ofs += 8;
3944 i++;
3948 if (dst_align >= 32)
3950 /* If the source has remaining DImode regs, write them out in
3951 two pieces. */
3952 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3954 tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
3955 NULL_RTX, 1, OPTAB_WIDEN);
3957 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3958 gen_lowpart (SImode, data_regs[i]));
3959 emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4),
3960 gen_lowpart (SImode, tmp));
3961 ofs += 8;
3962 i++;
3965 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3967 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3968 data_regs[i]);
3969 ofs += 4;
3970 i++;
3974 if (i < nregs && GET_MODE (data_regs[i]) == DImode)
3976 /* Write out a remaining block of words using unaligned methods. */
3978 for (words = 1; i + words < nregs; words++)
3979 if (GET_MODE (data_regs[i + words]) != DImode)
3980 break;
3982 if (words == 1)
3983 alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
3984 else
3985 alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
3986 words, ofs);
3988 i += words;
3989 ofs += words * 8;
3992 /* Due to the above, this won't be aligned. */
3993 /* ??? If we have more than one of these, consider constructing full
3994 words in registers and using alpha_expand_unaligned_store_words. */
3995 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3997 alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
3998 ofs += 4;
3999 i++;
4002 if (dst_align >= 16)
4003 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
4005 emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
4006 i++;
4007 ofs += 2;
4009 else
4010 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
4012 alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
4013 i++;
4014 ofs += 2;
4017 /* The remainder must be byte copies. */
4018 while (i < nregs)
4020 gcc_assert (GET_MODE (data_regs[i]) == QImode);
4021 emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
4022 i++;
4023 ofs += 1;
4026 return 1;
4030 alpha_expand_block_clear (rtx operands[])
4032 rtx bytes_rtx = operands[1];
4033 rtx align_rtx = operands[3];
4034 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
4035 HOST_WIDE_INT bytes = orig_bytes;
4036 HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
4037 HOST_WIDE_INT alignofs = 0;
4038 rtx orig_dst = operands[0];
4039 rtx tmp;
4040 int i, words, ofs = 0;
4042 if (orig_bytes <= 0)
4043 return 1;
4044 if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
4045 return 0;
4047 /* Look for stricter alignment. */
4048 tmp = XEXP (orig_dst, 0);
4049 if (REG_P (tmp))
4050 align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
4051 else if (GET_CODE (tmp) == PLUS
4052 && REG_P (XEXP (tmp, 0))
4053 && CONST_INT_P (XEXP (tmp, 1)))
4055 HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
4056 int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
4058 if (a > align)
4060 if (a >= 64)
4061 align = a, alignofs = 8 - c % 8;
4062 else if (a >= 32)
4063 align = a, alignofs = 4 - c % 4;
4064 else if (a >= 16)
4065 align = a, alignofs = 2 - c % 2;
4069 /* Handle an unaligned prefix first. */
4071 if (alignofs > 0)
4073 #if HOST_BITS_PER_WIDE_INT >= 64
4074 /* Given that alignofs is bounded by align, the only time BWX could
4075 generate three stores is for a 7 byte fill. Prefer two individual
4076 stores over a load/mask/store sequence. */
4077 if ((!TARGET_BWX || alignofs == 7)
4078 && align >= 32
4079 && !(alignofs == 4 && bytes >= 4))
4081 enum machine_mode mode = (align >= 64 ? DImode : SImode);
4082 int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs;
4083 rtx mem, tmp;
4084 HOST_WIDE_INT mask;
4086 mem = adjust_address (orig_dst, mode, ofs - inv_alignofs);
4087 set_mem_alias_set (mem, 0);
4089 mask = ~(~(HOST_WIDE_INT)0 << (inv_alignofs * 8));
4090 if (bytes < alignofs)
4092 mask |= ~(HOST_WIDE_INT)0 << ((inv_alignofs + bytes) * 8);
4093 ofs += bytes;
4094 bytes = 0;
4096 else
4098 bytes -= alignofs;
4099 ofs += alignofs;
4101 alignofs = 0;
4103 tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
4104 NULL_RTX, 1, OPTAB_WIDEN);
4106 emit_move_insn (mem, tmp);
4108 #endif
4110 if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
4112 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4113 bytes -= 1;
4114 ofs += 1;
4115 alignofs -= 1;
4117 if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2)
4119 emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx);
4120 bytes -= 2;
4121 ofs += 2;
4122 alignofs -= 2;
4124 if (alignofs == 4 && bytes >= 4)
4126 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4127 bytes -= 4;
4128 ofs += 4;
4129 alignofs = 0;
4132 /* If we've not used the extra lead alignment information by now,
4133 we won't be able to. Downgrade align to match what's left over. */
4134 if (alignofs > 0)
4136 alignofs = alignofs & -alignofs;
4137 align = MIN (align, alignofs * BITS_PER_UNIT);
4141 /* Handle a block of contiguous long-words. */
4143 if (align >= 64 && bytes >= 8)
4145 words = bytes / 8;
4147 for (i = 0; i < words; ++i)
4148 emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8),
4149 const0_rtx);
4151 bytes -= words * 8;
4152 ofs += words * 8;
4155 /* If the block is large and appropriately aligned, emit a single
4156 store followed by a sequence of stq_u insns. */
4158 if (align >= 32 && bytes > 16)
4160 rtx orig_dsta;
4162 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4163 bytes -= 4;
4164 ofs += 4;
4166 orig_dsta = XEXP (orig_dst, 0);
4167 if (GET_CODE (orig_dsta) == LO_SUM)
4168 orig_dsta = force_reg (Pmode, orig_dsta);
4170 words = bytes / 8;
4171 for (i = 0; i < words; ++i)
4173 rtx mem
4174 = change_address (orig_dst, DImode,
4175 gen_rtx_AND (DImode,
4176 plus_constant (DImode, orig_dsta,
4177 ofs + i*8),
4178 GEN_INT (-8)));
4179 set_mem_alias_set (mem, 0);
4180 emit_move_insn (mem, const0_rtx);
4183 /* Depending on the alignment, the first stq_u may have overlapped
4184 with the initial stl, which means that the last stq_u didn't
4185 write as much as it would appear. Leave those questionable bytes
4186 unaccounted for. */
4187 bytes -= words * 8 - 4;
4188 ofs += words * 8 - 4;
4191 /* Handle a smaller block of aligned words. */
4193 if ((align >= 64 && bytes == 4)
4194 || (align == 32 && bytes >= 4))
4196 words = bytes / 4;
4198 for (i = 0; i < words; ++i)
4199 emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4),
4200 const0_rtx);
4202 bytes -= words * 4;
4203 ofs += words * 4;
4206 /* An unaligned block uses stq_u stores for as many as possible. */
4208 if (bytes >= 8)
4210 words = bytes / 8;
4212 alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
4214 bytes -= words * 8;
4215 ofs += words * 8;
4218 /* Next clean up any trailing pieces. */
4220 #if HOST_BITS_PER_WIDE_INT >= 64
4221 /* Count the number of bits in BYTES for which aligned stores could
4222 be emitted. */
4223 words = 0;
4224 for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1)
4225 if (bytes & i)
4226 words += 1;
4228 /* If we have appropriate alignment (and it wouldn't take too many
4229 instructions otherwise), mask out the bytes we need. */
4230 if (TARGET_BWX ? words > 2 : bytes > 0)
4232 if (align >= 64)
4234 rtx mem, tmp;
4235 HOST_WIDE_INT mask;
4237 mem = adjust_address (orig_dst, DImode, ofs);
4238 set_mem_alias_set (mem, 0);
4240 mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
4242 tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask),
4243 NULL_RTX, 1, OPTAB_WIDEN);
4245 emit_move_insn (mem, tmp);
4246 return 1;
4248 else if (align >= 32 && bytes < 4)
4250 rtx mem, tmp;
4251 HOST_WIDE_INT mask;
4253 mem = adjust_address (orig_dst, SImode, ofs);
4254 set_mem_alias_set (mem, 0);
4256 mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
4258 tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask),
4259 NULL_RTX, 1, OPTAB_WIDEN);
4261 emit_move_insn (mem, tmp);
4262 return 1;
4265 #endif
4267 if (!TARGET_BWX && bytes >= 4)
4269 alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
4270 bytes -= 4;
4271 ofs += 4;
4274 if (bytes >= 2)
4276 if (align >= 16)
4278 do {
4279 emit_move_insn (adjust_address (orig_dst, HImode, ofs),
4280 const0_rtx);
4281 bytes -= 2;
4282 ofs += 2;
4283 } while (bytes >= 2);
4285 else if (! TARGET_BWX)
4287 alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
4288 bytes -= 2;
4289 ofs += 2;
4293 while (bytes > 0)
4295 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4296 bytes -= 1;
4297 ofs += 1;
4300 return 1;
4303 /* Returns a mask so that zap(x, value) == x & mask. */
4306 alpha_expand_zap_mask (HOST_WIDE_INT value)
4308 rtx result;
4309 int i;
4311 if (HOST_BITS_PER_WIDE_INT >= 64)
4313 HOST_WIDE_INT mask = 0;
4315 for (i = 7; i >= 0; --i)
4317 mask <<= 8;
4318 if (!((value >> i) & 1))
4319 mask |= 0xff;
4322 result = gen_int_mode (mask, DImode);
4324 else
4326 HOST_WIDE_INT mask_lo = 0, mask_hi = 0;
4328 gcc_assert (HOST_BITS_PER_WIDE_INT == 32);
4330 for (i = 7; i >= 4; --i)
4332 mask_hi <<= 8;
4333 if (!((value >> i) & 1))
4334 mask_hi |= 0xff;
4337 for (i = 3; i >= 0; --i)
4339 mask_lo <<= 8;
4340 if (!((value >> i) & 1))
4341 mask_lo |= 0xff;
4344 result = immed_double_const (mask_lo, mask_hi, DImode);
4347 return result;
4350 void
4351 alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
4352 enum machine_mode mode,
4353 rtx op0, rtx op1, rtx op2)
4355 op0 = gen_lowpart (mode, op0);
4357 if (op1 == const0_rtx)
4358 op1 = CONST0_RTX (mode);
4359 else
4360 op1 = gen_lowpart (mode, op1);
4362 if (op2 == const0_rtx)
4363 op2 = CONST0_RTX (mode);
4364 else
4365 op2 = gen_lowpart (mode, op2);
4367 emit_insn ((*gen) (op0, op1, op2));
4370 /* A subroutine of the atomic operation splitters. Jump to LABEL if
4371 COND is true. Mark the jump as unlikely to be taken. */
4373 static void
4374 emit_unlikely_jump (rtx cond, rtx label)
4376 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
4377 rtx x;
4379 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
4380 x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
4381 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
4384 /* A subroutine of the atomic operation splitters. Emit a load-locked
4385 instruction in MODE. */
4387 static void
4388 emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
4390 rtx (*fn) (rtx, rtx) = NULL;
4391 if (mode == SImode)
4392 fn = gen_load_locked_si;
4393 else if (mode == DImode)
4394 fn = gen_load_locked_di;
4395 emit_insn (fn (reg, mem));
4398 /* A subroutine of the atomic operation splitters. Emit a store-conditional
4399 instruction in MODE. */
4401 static void
4402 emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val)
4404 rtx (*fn) (rtx, rtx, rtx) = NULL;
4405 if (mode == SImode)
4406 fn = gen_store_conditional_si;
4407 else if (mode == DImode)
4408 fn = gen_store_conditional_di;
4409 emit_insn (fn (res, mem, val));
4412 /* Subroutines of the atomic operation splitters. Emit barriers
4413 as needed for the memory MODEL. */
4415 static void
4416 alpha_pre_atomic_barrier (enum memmodel model)
4418 if (need_atomic_barrier_p (model, true))
4419 emit_insn (gen_memory_barrier ());
4422 static void
4423 alpha_post_atomic_barrier (enum memmodel model)
4425 if (need_atomic_barrier_p (model, false))
4426 emit_insn (gen_memory_barrier ());
4429 /* A subroutine of the atomic operation splitters. Emit an insxl
4430 instruction in MODE. */
4432 static rtx
4433 emit_insxl (enum machine_mode mode, rtx op1, rtx op2)
4435 rtx ret = gen_reg_rtx (DImode);
4436 rtx (*fn) (rtx, rtx, rtx);
4438 switch (mode)
4440 case QImode:
4441 fn = gen_insbl;
4442 break;
4443 case HImode:
4444 fn = gen_inswl;
4445 break;
4446 case SImode:
4447 fn = gen_insll;
4448 break;
4449 case DImode:
4450 fn = gen_insql;
4451 break;
4452 default:
4453 gcc_unreachable ();
4456 op1 = force_reg (mode, op1);
4457 emit_insn (fn (ret, op1, op2));
4459 return ret;
4462 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
4463 to perform. MEM is the memory on which to operate. VAL is the second
4464 operand of the binary operator. BEFORE and AFTER are optional locations to
4465 return the value of MEM either before of after the operation. SCRATCH is
4466 a scratch register. */
4468 void
4469 alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before,
4470 rtx after, rtx scratch, enum memmodel model)
4472 enum machine_mode mode = GET_MODE (mem);
4473 rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch));
4475 alpha_pre_atomic_barrier (model);
4477 label = gen_label_rtx ();
4478 emit_label (label);
4479 label = gen_rtx_LABEL_REF (DImode, label);
4481 if (before == NULL)
4482 before = scratch;
4483 emit_load_locked (mode, before, mem);
4485 if (code == NOT)
4487 x = gen_rtx_AND (mode, before, val);
4488 emit_insn (gen_rtx_SET (VOIDmode, val, x));
4490 x = gen_rtx_NOT (mode, val);
4492 else
4493 x = gen_rtx_fmt_ee (code, mode, before, val);
4494 if (after)
4495 emit_insn (gen_rtx_SET (VOIDmode, after, copy_rtx (x)));
4496 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
4498 emit_store_conditional (mode, cond, mem, scratch);
4500 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4501 emit_unlikely_jump (x, label);
4503 alpha_post_atomic_barrier (model);
4506 /* Expand a compare and swap operation. */
4508 void
4509 alpha_split_compare_and_swap (rtx operands[])
4511 rtx cond, retval, mem, oldval, newval;
4512 bool is_weak;
4513 enum memmodel mod_s, mod_f;
4514 enum machine_mode mode;
4515 rtx label1, label2, x;
4517 cond = operands[0];
4518 retval = operands[1];
4519 mem = operands[2];
4520 oldval = operands[3];
4521 newval = operands[4];
4522 is_weak = (operands[5] != const0_rtx);
4523 mod_s = (enum memmodel) INTVAL (operands[6]);
4524 mod_f = (enum memmodel) INTVAL (operands[7]);
4525 mode = GET_MODE (mem);
4527 alpha_pre_atomic_barrier (mod_s);
4529 label1 = NULL_RTX;
4530 if (!is_weak)
4532 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4533 emit_label (XEXP (label1, 0));
4535 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4537 emit_load_locked (mode, retval, mem);
4539 x = gen_lowpart (DImode, retval);
4540 if (oldval == const0_rtx)
4542 emit_move_insn (cond, const0_rtx);
4543 x = gen_rtx_NE (DImode, x, const0_rtx);
4545 else
4547 x = gen_rtx_EQ (DImode, x, oldval);
4548 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
4549 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4551 emit_unlikely_jump (x, label2);
4553 emit_move_insn (cond, newval);
4554 emit_store_conditional (mode, cond, mem, gen_lowpart (mode, cond));
4556 if (!is_weak)
4558 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4559 emit_unlikely_jump (x, label1);
4562 if (mod_f != MEMMODEL_RELAXED)
4563 emit_label (XEXP (label2, 0));
4565 alpha_post_atomic_barrier (mod_s);
4567 if (mod_f == MEMMODEL_RELAXED)
4568 emit_label (XEXP (label2, 0));
4571 void
4572 alpha_expand_compare_and_swap_12 (rtx operands[])
4574 rtx cond, dst, mem, oldval, newval, is_weak, mod_s, mod_f;
4575 enum machine_mode mode;
4576 rtx addr, align, wdst;
4577 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
4579 cond = operands[0];
4580 dst = operands[1];
4581 mem = operands[2];
4582 oldval = operands[3];
4583 newval = operands[4];
4584 is_weak = operands[5];
4585 mod_s = operands[6];
4586 mod_f = operands[7];
4587 mode = GET_MODE (mem);
4589 /* We forced the address into a register via mem_noofs_operand. */
4590 addr = XEXP (mem, 0);
4591 gcc_assert (register_operand (addr, DImode));
4593 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4594 NULL_RTX, 1, OPTAB_DIRECT);
4596 oldval = convert_modes (DImode, mode, oldval, 1);
4598 if (newval != const0_rtx)
4599 newval = emit_insxl (mode, newval, addr);
4601 wdst = gen_reg_rtx (DImode);
4602 if (mode == QImode)
4603 gen = gen_atomic_compare_and_swapqi_1;
4604 else
4605 gen = gen_atomic_compare_and_swaphi_1;
4606 emit_insn (gen (cond, wdst, mem, oldval, newval, align,
4607 is_weak, mod_s, mod_f));
4609 emit_move_insn (dst, gen_lowpart (mode, wdst));
4612 void
4613 alpha_split_compare_and_swap_12 (rtx operands[])
4615 rtx cond, dest, orig_mem, oldval, newval, align, scratch;
4616 enum machine_mode mode;
4617 bool is_weak;
4618 enum memmodel mod_s, mod_f;
4619 rtx label1, label2, mem, addr, width, mask, x;
4621 cond = operands[0];
4622 dest = operands[1];
4623 orig_mem = operands[2];
4624 oldval = operands[3];
4625 newval = operands[4];
4626 align = operands[5];
4627 is_weak = (operands[6] != const0_rtx);
4628 mod_s = (enum memmodel) INTVAL (operands[7]);
4629 mod_f = (enum memmodel) INTVAL (operands[8]);
4630 scratch = operands[9];
4631 mode = GET_MODE (orig_mem);
4632 addr = XEXP (orig_mem, 0);
4634 mem = gen_rtx_MEM (DImode, align);
4635 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4636 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4637 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4639 alpha_pre_atomic_barrier (mod_s);
4641 label1 = NULL_RTX;
4642 if (!is_weak)
4644 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4645 emit_label (XEXP (label1, 0));
4647 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4649 emit_load_locked (DImode, scratch, mem);
4651 width = GEN_INT (GET_MODE_BITSIZE (mode));
4652 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4653 emit_insn (gen_extxl (dest, scratch, width, addr));
4655 if (oldval == const0_rtx)
4657 emit_move_insn (cond, const0_rtx);
4658 x = gen_rtx_NE (DImode, dest, const0_rtx);
4660 else
4662 x = gen_rtx_EQ (DImode, dest, oldval);
4663 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
4664 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4666 emit_unlikely_jump (x, label2);
4668 emit_insn (gen_mskxl (cond, scratch, mask, addr));
4670 if (newval != const0_rtx)
4671 emit_insn (gen_iordi3 (cond, cond, newval));
4673 emit_store_conditional (DImode, cond, mem, cond);
4675 if (!is_weak)
4677 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4678 emit_unlikely_jump (x, label1);
4681 if (mod_f != MEMMODEL_RELAXED)
4682 emit_label (XEXP (label2, 0));
4684 alpha_post_atomic_barrier (mod_s);
4686 if (mod_f == MEMMODEL_RELAXED)
4687 emit_label (XEXP (label2, 0));
4690 /* Expand an atomic exchange operation. */
4692 void
4693 alpha_split_atomic_exchange (rtx operands[])
4695 rtx retval, mem, val, scratch;
4696 enum memmodel model;
4697 enum machine_mode mode;
4698 rtx label, x, cond;
4700 retval = operands[0];
4701 mem = operands[1];
4702 val = operands[2];
4703 model = (enum memmodel) INTVAL (operands[3]);
4704 scratch = operands[4];
4705 mode = GET_MODE (mem);
4706 cond = gen_lowpart (DImode, scratch);
4708 alpha_pre_atomic_barrier (model);
4710 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4711 emit_label (XEXP (label, 0));
4713 emit_load_locked (mode, retval, mem);
4714 emit_move_insn (scratch, val);
4715 emit_store_conditional (mode, cond, mem, scratch);
4717 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4718 emit_unlikely_jump (x, label);
4720 alpha_post_atomic_barrier (model);
4723 void
4724 alpha_expand_atomic_exchange_12 (rtx operands[])
4726 rtx dst, mem, val, model;
4727 enum machine_mode mode;
4728 rtx addr, align, wdst;
4729 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
4731 dst = operands[0];
4732 mem = operands[1];
4733 val = operands[2];
4734 model = operands[3];
4735 mode = GET_MODE (mem);
4737 /* We forced the address into a register via mem_noofs_operand. */
4738 addr = XEXP (mem, 0);
4739 gcc_assert (register_operand (addr, DImode));
4741 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4742 NULL_RTX, 1, OPTAB_DIRECT);
4744 /* Insert val into the correct byte location within the word. */
4745 if (val != const0_rtx)
4746 val = emit_insxl (mode, val, addr);
4748 wdst = gen_reg_rtx (DImode);
4749 if (mode == QImode)
4750 gen = gen_atomic_exchangeqi_1;
4751 else
4752 gen = gen_atomic_exchangehi_1;
4753 emit_insn (gen (wdst, mem, val, align, model));
4755 emit_move_insn (dst, gen_lowpart (mode, wdst));
4758 void
4759 alpha_split_atomic_exchange_12 (rtx operands[])
4761 rtx dest, orig_mem, addr, val, align, scratch;
4762 rtx label, mem, width, mask, x;
4763 enum machine_mode mode;
4764 enum memmodel model;
4766 dest = operands[0];
4767 orig_mem = operands[1];
4768 val = operands[2];
4769 align = operands[3];
4770 model = (enum memmodel) INTVAL (operands[4]);
4771 scratch = operands[5];
4772 mode = GET_MODE (orig_mem);
4773 addr = XEXP (orig_mem, 0);
4775 mem = gen_rtx_MEM (DImode, align);
4776 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4777 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4778 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4780 alpha_pre_atomic_barrier (model);
4782 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4783 emit_label (XEXP (label, 0));
4785 emit_load_locked (DImode, scratch, mem);
4787 width = GEN_INT (GET_MODE_BITSIZE (mode));
4788 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4789 emit_insn (gen_extxl (dest, scratch, width, addr));
4790 emit_insn (gen_mskxl (scratch, scratch, mask, addr));
4791 if (val != const0_rtx)
4792 emit_insn (gen_iordi3 (scratch, scratch, val));
4794 emit_store_conditional (DImode, scratch, mem, scratch);
4796 x = gen_rtx_EQ (DImode, scratch, const0_rtx);
4797 emit_unlikely_jump (x, label);
4799 alpha_post_atomic_barrier (model);
4802 /* Adjust the cost of a scheduling dependency. Return the new cost of
4803 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4805 static int
4806 alpha_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
4808 enum attr_type dep_insn_type;
4810 /* If the dependence is an anti-dependence, there is no cost. For an
4811 output dependence, there is sometimes a cost, but it doesn't seem
4812 worth handling those few cases. */
4813 if (REG_NOTE_KIND (link) != 0)
4814 return cost;
4816 /* If we can't recognize the insns, we can't really do anything. */
4817 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
4818 return cost;
4820 dep_insn_type = get_attr_type (dep_insn);
4822 /* Bring in the user-defined memory latency. */
4823 if (dep_insn_type == TYPE_ILD
4824 || dep_insn_type == TYPE_FLD
4825 || dep_insn_type == TYPE_LDSYM)
4826 cost += alpha_memory_latency-1;
4828 /* Everything else handled in DFA bypasses now. */
4830 return cost;
4833 /* The number of instructions that can be issued per cycle. */
4835 static int
4836 alpha_issue_rate (void)
4838 return (alpha_tune == PROCESSOR_EV4 ? 2 : 4);
4841 /* How many alternative schedules to try. This should be as wide as the
4842 scheduling freedom in the DFA, but no wider. Making this value too
4843 large results extra work for the scheduler.
4845 For EV4, loads can be issued to either IB0 or IB1, thus we have 2
4846 alternative schedules. For EV5, we can choose between E0/E1 and
4847 FA/FM. For EV6, an arithmetic insn can be issued to U0/U1/L0/L1. */
4849 static int
4850 alpha_multipass_dfa_lookahead (void)
4852 return (alpha_tune == PROCESSOR_EV6 ? 4 : 2);
4855 /* Machine-specific function data. */
4857 struct GTY(()) alpha_links;
4859 struct GTY(()) machine_function
4861 /* For flag_reorder_blocks_and_partition. */
4862 rtx gp_save_rtx;
4864 /* For VMS condition handlers. */
4865 bool uses_condition_handler;
4867 /* Linkage entries. */
4868 splay_tree GTY ((param1_is (char *), param2_is (struct alpha_links *)))
4869 links;
4872 /* How to allocate a 'struct machine_function'. */
4874 static struct machine_function *
4875 alpha_init_machine_status (void)
4877 return ggc_cleared_alloc<machine_function> ();
4880 /* Support for frame based VMS condition handlers. */
4882 /* A VMS condition handler may be established for a function with a call to
4883 __builtin_establish_vms_condition_handler, and cancelled with a call to
4884 __builtin_revert_vms_condition_handler.
4886 The VMS Condition Handling Facility knows about the existence of a handler
4887 from the procedure descriptor .handler field. As the VMS native compilers,
4888 we store the user specified handler's address at a fixed location in the
4889 stack frame and point the procedure descriptor at a common wrapper which
4890 fetches the real handler's address and issues an indirect call.
4892 The indirection wrapper is "__gcc_shell_handler", provided by libgcc.
4894 We force the procedure kind to PT_STACK, and the fixed frame location is
4895 fp+8, just before the register save area. We use the handler_data field in
4896 the procedure descriptor to state the fp offset at which the installed
4897 handler address can be found. */
4899 #define VMS_COND_HANDLER_FP_OFFSET 8
4901 /* Expand code to store the currently installed user VMS condition handler
4902 into TARGET and install HANDLER as the new condition handler. */
4904 void
4905 alpha_expand_builtin_establish_vms_condition_handler (rtx target, rtx handler)
4907 rtx handler_slot_address = plus_constant (Pmode, hard_frame_pointer_rtx,
4908 VMS_COND_HANDLER_FP_OFFSET);
4910 rtx handler_slot
4911 = gen_rtx_MEM (DImode, handler_slot_address);
4913 emit_move_insn (target, handler_slot);
4914 emit_move_insn (handler_slot, handler);
4916 /* Notify the start/prologue/epilogue emitters that the condition handler
4917 slot is needed. In addition to reserving the slot space, this will force
4918 the procedure kind to PT_STACK so ensure that the hard_frame_pointer_rtx
4919 use above is correct. */
4920 cfun->machine->uses_condition_handler = true;
4923 /* Expand code to store the current VMS condition handler into TARGET and
4924 nullify it. */
4926 void
4927 alpha_expand_builtin_revert_vms_condition_handler (rtx target)
4929 /* We implement this by establishing a null condition handler, with the tiny
4930 side effect of setting uses_condition_handler. This is a little bit
4931 pessimistic if no actual builtin_establish call is ever issued, which is
4932 not a real problem and expected never to happen anyway. */
4934 alpha_expand_builtin_establish_vms_condition_handler (target, const0_rtx);
4937 /* Functions to save and restore alpha_return_addr_rtx. */
4939 /* Start the ball rolling with RETURN_ADDR_RTX. */
4942 alpha_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4944 if (count != 0)
4945 return const0_rtx;
4947 return get_hard_reg_initial_val (Pmode, REG_RA);
4950 /* Return or create a memory slot containing the gp value for the current
4951 function. Needed only if TARGET_LD_BUGGY_LDGP. */
4954 alpha_gp_save_rtx (void)
4956 rtx_insn *seq;
4957 rtx m = cfun->machine->gp_save_rtx;
4959 if (m == NULL)
4961 start_sequence ();
4963 m = assign_stack_local (DImode, UNITS_PER_WORD, BITS_PER_WORD);
4964 m = validize_mem (m);
4965 emit_move_insn (m, pic_offset_table_rtx);
4967 seq = get_insns ();
4968 end_sequence ();
4970 /* We used to simply emit the sequence after entry_of_function.
4971 However this breaks the CFG if the first instruction in the
4972 first block is not the NOTE_INSN_BASIC_BLOCK, for example a
4973 label. Emit the sequence properly on the edge. We are only
4974 invoked from dw2_build_landing_pads and finish_eh_generation
4975 will call commit_edge_insertions thanks to a kludge. */
4976 insert_insn_on_edge (seq,
4977 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
4979 cfun->machine->gp_save_rtx = m;
4982 return m;
4985 static void
4986 alpha_instantiate_decls (void)
4988 if (cfun->machine->gp_save_rtx != NULL_RTX)
4989 instantiate_decl_rtl (cfun->machine->gp_save_rtx);
4992 static int
4993 alpha_ra_ever_killed (void)
4995 rtx_insn *top;
4997 if (!has_hard_reg_initial_val (Pmode, REG_RA))
4998 return (int)df_regs_ever_live_p (REG_RA);
5000 push_topmost_sequence ();
5001 top = get_insns ();
5002 pop_topmost_sequence ();
5004 return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL);
5008 /* Return the trap mode suffix applicable to the current
5009 instruction, or NULL. */
5011 static const char *
5012 get_trap_mode_suffix (void)
5014 enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn);
5016 switch (s)
5018 case TRAP_SUFFIX_NONE:
5019 return NULL;
5021 case TRAP_SUFFIX_SU:
5022 if (alpha_fptm >= ALPHA_FPTM_SU)
5023 return "su";
5024 return NULL;
5026 case TRAP_SUFFIX_SUI:
5027 if (alpha_fptm >= ALPHA_FPTM_SUI)
5028 return "sui";
5029 return NULL;
5031 case TRAP_SUFFIX_V_SV:
5032 switch (alpha_fptm)
5034 case ALPHA_FPTM_N:
5035 return NULL;
5036 case ALPHA_FPTM_U:
5037 return "v";
5038 case ALPHA_FPTM_SU:
5039 case ALPHA_FPTM_SUI:
5040 return "sv";
5041 default:
5042 gcc_unreachable ();
5045 case TRAP_SUFFIX_V_SV_SVI:
5046 switch (alpha_fptm)
5048 case ALPHA_FPTM_N:
5049 return NULL;
5050 case ALPHA_FPTM_U:
5051 return "v";
5052 case ALPHA_FPTM_SU:
5053 return "sv";
5054 case ALPHA_FPTM_SUI:
5055 return "svi";
5056 default:
5057 gcc_unreachable ();
5059 break;
5061 case TRAP_SUFFIX_U_SU_SUI:
5062 switch (alpha_fptm)
5064 case ALPHA_FPTM_N:
5065 return NULL;
5066 case ALPHA_FPTM_U:
5067 return "u";
5068 case ALPHA_FPTM_SU:
5069 return "su";
5070 case ALPHA_FPTM_SUI:
5071 return "sui";
5072 default:
5073 gcc_unreachable ();
5075 break;
5077 default:
5078 gcc_unreachable ();
5080 gcc_unreachable ();
5083 /* Return the rounding mode suffix applicable to the current
5084 instruction, or NULL. */
5086 static const char *
5087 get_round_mode_suffix (void)
5089 enum attr_round_suffix s = get_attr_round_suffix (current_output_insn);
5091 switch (s)
5093 case ROUND_SUFFIX_NONE:
5094 return NULL;
5095 case ROUND_SUFFIX_NORMAL:
5096 switch (alpha_fprm)
5098 case ALPHA_FPRM_NORM:
5099 return NULL;
5100 case ALPHA_FPRM_MINF:
5101 return "m";
5102 case ALPHA_FPRM_CHOP:
5103 return "c";
5104 case ALPHA_FPRM_DYN:
5105 return "d";
5106 default:
5107 gcc_unreachable ();
5109 break;
5111 case ROUND_SUFFIX_C:
5112 return "c";
5114 default:
5115 gcc_unreachable ();
5117 gcc_unreachable ();
5120 /* Print an operand. Recognize special options, documented below. */
5122 void
5123 print_operand (FILE *file, rtx x, int code)
5125 int i;
5127 switch (code)
5129 case '~':
5130 /* Print the assembler name of the current function. */
5131 assemble_name (file, alpha_fnname);
5132 break;
5134 case '&':
5135 if (const char *name = get_some_local_dynamic_name ())
5136 assemble_name (file, name);
5137 else
5138 output_operand_lossage ("'%%&' used without any "
5139 "local dynamic TLS references");
5140 break;
5142 case '/':
5144 const char *trap = get_trap_mode_suffix ();
5145 const char *round = get_round_mode_suffix ();
5147 if (trap || round)
5148 fprintf (file, "/%s%s", (trap ? trap : ""), (round ? round : ""));
5149 break;
5152 case ',':
5153 /* Generates single precision instruction suffix. */
5154 fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file);
5155 break;
5157 case '-':
5158 /* Generates double precision instruction suffix. */
5159 fputc ((TARGET_FLOAT_VAX ? 'g' : 't'), file);
5160 break;
5162 case '#':
5163 if (alpha_this_literal_sequence_number == 0)
5164 alpha_this_literal_sequence_number = alpha_next_sequence_number++;
5165 fprintf (file, "%d", alpha_this_literal_sequence_number);
5166 break;
5168 case '*':
5169 if (alpha_this_gpdisp_sequence_number == 0)
5170 alpha_this_gpdisp_sequence_number = alpha_next_sequence_number++;
5171 fprintf (file, "%d", alpha_this_gpdisp_sequence_number);
5172 break;
5174 case 'H':
5175 if (GET_CODE (x) == HIGH)
5176 output_addr_const (file, XEXP (x, 0));
5177 else
5178 output_operand_lossage ("invalid %%H value");
5179 break;
5181 case 'J':
5183 const char *lituse;
5185 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL)
5187 x = XVECEXP (x, 0, 0);
5188 lituse = "lituse_tlsgd";
5190 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL)
5192 x = XVECEXP (x, 0, 0);
5193 lituse = "lituse_tlsldm";
5195 else if (CONST_INT_P (x))
5196 lituse = "lituse_jsr";
5197 else
5199 output_operand_lossage ("invalid %%J value");
5200 break;
5203 if (x != const0_rtx)
5204 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5206 break;
5208 case 'j':
5210 const char *lituse;
5212 #ifdef HAVE_AS_JSRDIRECT_RELOCS
5213 lituse = "lituse_jsrdirect";
5214 #else
5215 lituse = "lituse_jsr";
5216 #endif
5218 gcc_assert (INTVAL (x) != 0);
5219 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5221 break;
5222 case 'r':
5223 /* If this operand is the constant zero, write it as "$31". */
5224 if (REG_P (x))
5225 fprintf (file, "%s", reg_names[REGNO (x)]);
5226 else if (x == CONST0_RTX (GET_MODE (x)))
5227 fprintf (file, "$31");
5228 else
5229 output_operand_lossage ("invalid %%r value");
5230 break;
5232 case 'R':
5233 /* Similar, but for floating-point. */
5234 if (REG_P (x))
5235 fprintf (file, "%s", reg_names[REGNO (x)]);
5236 else if (x == CONST0_RTX (GET_MODE (x)))
5237 fprintf (file, "$f31");
5238 else
5239 output_operand_lossage ("invalid %%R value");
5240 break;
5242 case 'N':
5243 /* Write the 1's complement of a constant. */
5244 if (!CONST_INT_P (x))
5245 output_operand_lossage ("invalid %%N value");
5247 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
5248 break;
5250 case 'P':
5251 /* Write 1 << C, for a constant C. */
5252 if (!CONST_INT_P (x))
5253 output_operand_lossage ("invalid %%P value");
5255 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT) 1 << INTVAL (x));
5256 break;
5258 case 'h':
5259 /* Write the high-order 16 bits of a constant, sign-extended. */
5260 if (!CONST_INT_P (x))
5261 output_operand_lossage ("invalid %%h value");
5263 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16);
5264 break;
5266 case 'L':
5267 /* Write the low-order 16 bits of a constant, sign-extended. */
5268 if (!CONST_INT_P (x))
5269 output_operand_lossage ("invalid %%L value");
5271 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5272 (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000));
5273 break;
5275 case 'm':
5276 /* Write mask for ZAP insn. */
5277 if (GET_CODE (x) == CONST_DOUBLE)
5279 HOST_WIDE_INT mask = 0;
5280 HOST_WIDE_INT value;
5282 value = CONST_DOUBLE_LOW (x);
5283 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
5284 i++, value >>= 8)
5285 if (value & 0xff)
5286 mask |= (1 << i);
5288 value = CONST_DOUBLE_HIGH (x);
5289 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
5290 i++, value >>= 8)
5291 if (value & 0xff)
5292 mask |= (1 << (i + sizeof (int)));
5294 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask & 0xff);
5297 else if (CONST_INT_P (x))
5299 HOST_WIDE_INT mask = 0, value = INTVAL (x);
5301 for (i = 0; i < 8; i++, value >>= 8)
5302 if (value & 0xff)
5303 mask |= (1 << i);
5305 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask);
5307 else
5308 output_operand_lossage ("invalid %%m value");
5309 break;
5311 case 'M':
5312 /* 'b', 'w', 'l', or 'q' as the value of the constant. */
5313 if (!CONST_INT_P (x)
5314 || (INTVAL (x) != 8 && INTVAL (x) != 16
5315 && INTVAL (x) != 32 && INTVAL (x) != 64))
5316 output_operand_lossage ("invalid %%M value");
5318 fprintf (file, "%s",
5319 (INTVAL (x) == 8 ? "b"
5320 : INTVAL (x) == 16 ? "w"
5321 : INTVAL (x) == 32 ? "l"
5322 : "q"));
5323 break;
5325 case 'U':
5326 /* Similar, except do it from the mask. */
5327 if (CONST_INT_P (x))
5329 HOST_WIDE_INT value = INTVAL (x);
5331 if (value == 0xff)
5333 fputc ('b', file);
5334 break;
5336 if (value == 0xffff)
5338 fputc ('w', file);
5339 break;
5341 if (value == 0xffffffff)
5343 fputc ('l', file);
5344 break;
5346 if (value == -1)
5348 fputc ('q', file);
5349 break;
5352 else if (HOST_BITS_PER_WIDE_INT == 32
5353 && GET_CODE (x) == CONST_DOUBLE
5354 && CONST_DOUBLE_LOW (x) == 0xffffffff
5355 && CONST_DOUBLE_HIGH (x) == 0)
5357 fputc ('l', file);
5358 break;
5360 output_operand_lossage ("invalid %%U value");
5361 break;
5363 case 's':
5364 /* Write the constant value divided by 8. */
5365 if (!CONST_INT_P (x)
5366 || (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5367 || (INTVAL (x) & 7) != 0)
5368 output_operand_lossage ("invalid %%s value");
5370 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8);
5371 break;
5373 case 'S':
5374 /* Same, except compute (64 - c) / 8 */
5376 if (!CONST_INT_P (x)
5377 && (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5378 && (INTVAL (x) & 7) != 8)
5379 output_operand_lossage ("invalid %%s value");
5381 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (64 - INTVAL (x)) / 8);
5382 break;
5384 case 'C': case 'D': case 'c': case 'd':
5385 /* Write out comparison name. */
5387 enum rtx_code c = GET_CODE (x);
5389 if (!COMPARISON_P (x))
5390 output_operand_lossage ("invalid %%C value");
5392 else if (code == 'D')
5393 c = reverse_condition (c);
5394 else if (code == 'c')
5395 c = swap_condition (c);
5396 else if (code == 'd')
5397 c = swap_condition (reverse_condition (c));
5399 if (c == LEU)
5400 fprintf (file, "ule");
5401 else if (c == LTU)
5402 fprintf (file, "ult");
5403 else if (c == UNORDERED)
5404 fprintf (file, "un");
5405 else
5406 fprintf (file, "%s", GET_RTX_NAME (c));
5408 break;
5410 case 'E':
5411 /* Write the divide or modulus operator. */
5412 switch (GET_CODE (x))
5414 case DIV:
5415 fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q");
5416 break;
5417 case UDIV:
5418 fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q");
5419 break;
5420 case MOD:
5421 fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q");
5422 break;
5423 case UMOD:
5424 fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q");
5425 break;
5426 default:
5427 output_operand_lossage ("invalid %%E value");
5428 break;
5430 break;
5432 case 'A':
5433 /* Write "_u" for unaligned access. */
5434 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
5435 fprintf (file, "_u");
5436 break;
5438 case 0:
5439 if (REG_P (x))
5440 fprintf (file, "%s", reg_names[REGNO (x)]);
5441 else if (MEM_P (x))
5442 output_address (XEXP (x, 0));
5443 else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
5445 switch (XINT (XEXP (x, 0), 1))
5447 case UNSPEC_DTPREL:
5448 case UNSPEC_TPREL:
5449 output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0));
5450 break;
5451 default:
5452 output_operand_lossage ("unknown relocation unspec");
5453 break;
5456 else
5457 output_addr_const (file, x);
5458 break;
5460 default:
5461 output_operand_lossage ("invalid %%xn code");
5465 void
5466 print_operand_address (FILE *file, rtx addr)
5468 int basereg = 31;
5469 HOST_WIDE_INT offset = 0;
5471 if (GET_CODE (addr) == AND)
5472 addr = XEXP (addr, 0);
5474 if (GET_CODE (addr) == PLUS
5475 && CONST_INT_P (XEXP (addr, 1)))
5477 offset = INTVAL (XEXP (addr, 1));
5478 addr = XEXP (addr, 0);
5481 if (GET_CODE (addr) == LO_SUM)
5483 const char *reloc16, *reloclo;
5484 rtx op1 = XEXP (addr, 1);
5486 if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC)
5488 op1 = XEXP (op1, 0);
5489 switch (XINT (op1, 1))
5491 case UNSPEC_DTPREL:
5492 reloc16 = NULL;
5493 reloclo = (alpha_tls_size == 16 ? "dtprel" : "dtprello");
5494 break;
5495 case UNSPEC_TPREL:
5496 reloc16 = NULL;
5497 reloclo = (alpha_tls_size == 16 ? "tprel" : "tprello");
5498 break;
5499 default:
5500 output_operand_lossage ("unknown relocation unspec");
5501 return;
5504 output_addr_const (file, XVECEXP (op1, 0, 0));
5506 else
5508 reloc16 = "gprel";
5509 reloclo = "gprellow";
5510 output_addr_const (file, op1);
5513 if (offset)
5514 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
5516 addr = XEXP (addr, 0);
5517 switch (GET_CODE (addr))
5519 case REG:
5520 basereg = REGNO (addr);
5521 break;
5523 case SUBREG:
5524 basereg = subreg_regno (addr);
5525 break;
5527 default:
5528 gcc_unreachable ();
5531 fprintf (file, "($%d)\t\t!%s", basereg,
5532 (basereg == 29 ? reloc16 : reloclo));
5533 return;
5536 switch (GET_CODE (addr))
5538 case REG:
5539 basereg = REGNO (addr);
5540 break;
5542 case SUBREG:
5543 basereg = subreg_regno (addr);
5544 break;
5546 case CONST_INT:
5547 offset = INTVAL (addr);
5548 break;
5550 case SYMBOL_REF:
5551 gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands);
5552 fprintf (file, "%s", XSTR (addr, 0));
5553 return;
5555 case CONST:
5556 gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands);
5557 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS
5558 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF);
5559 fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC,
5560 XSTR (XEXP (XEXP (addr, 0), 0), 0),
5561 INTVAL (XEXP (XEXP (addr, 0), 1)));
5562 return;
5564 default:
5565 output_operand_lossage ("invalid operand address");
5566 return;
5569 fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg);
5572 /* Emit RTL insns to initialize the variable parts of a trampoline at
5573 M_TRAMP. FNDECL is target function's decl. CHAIN_VALUE is an rtx
5574 for the static chain value for the function. */
5576 static void
5577 alpha_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5579 rtx fnaddr, mem, word1, word2;
5581 fnaddr = XEXP (DECL_RTL (fndecl), 0);
5583 #ifdef POINTERS_EXTEND_UNSIGNED
5584 fnaddr = convert_memory_address (Pmode, fnaddr);
5585 chain_value = convert_memory_address (Pmode, chain_value);
5586 #endif
5588 if (TARGET_ABI_OPEN_VMS)
5590 const char *fnname;
5591 char *trname;
5593 /* Construct the name of the trampoline entry point. */
5594 fnname = XSTR (fnaddr, 0);
5595 trname = (char *) alloca (strlen (fnname) + 5);
5596 strcpy (trname, fnname);
5597 strcat (trname, "..tr");
5598 fnname = ggc_alloc_string (trname, strlen (trname) + 1);
5599 word2 = gen_rtx_SYMBOL_REF (Pmode, fnname);
5601 /* Trampoline (or "bounded") procedure descriptor is constructed from
5602 the function's procedure descriptor with certain fields zeroed IAW
5603 the VMS calling standard. This is stored in the first quadword. */
5604 word1 = force_reg (DImode, gen_const_mem (DImode, fnaddr));
5605 word1 = expand_and (DImode, word1,
5606 GEN_INT (HOST_WIDE_INT_C (0xffff0fff0000fff0)),
5607 NULL);
5609 else
5611 /* These 4 instructions are:
5612 ldq $1,24($27)
5613 ldq $27,16($27)
5614 jmp $31,($27),0
5616 We don't bother setting the HINT field of the jump; the nop
5617 is merely there for padding. */
5618 word1 = GEN_INT (HOST_WIDE_INT_C (0xa77b0010a43b0018));
5619 word2 = GEN_INT (HOST_WIDE_INT_C (0x47ff041f6bfb0000));
5622 /* Store the first two words, as computed above. */
5623 mem = adjust_address (m_tramp, DImode, 0);
5624 emit_move_insn (mem, word1);
5625 mem = adjust_address (m_tramp, DImode, 8);
5626 emit_move_insn (mem, word2);
5628 /* Store function address and static chain value. */
5629 mem = adjust_address (m_tramp, Pmode, 16);
5630 emit_move_insn (mem, fnaddr);
5631 mem = adjust_address (m_tramp, Pmode, 24);
5632 emit_move_insn (mem, chain_value);
5634 if (TARGET_ABI_OSF)
5636 emit_insn (gen_imb ());
5637 #ifdef HAVE_ENABLE_EXECUTE_STACK
5638 emit_library_call (init_one_libfunc ("__enable_execute_stack"),
5639 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
5640 #endif
5644 /* Determine where to put an argument to a function.
5645 Value is zero to push the argument on the stack,
5646 or a hard register in which to store the argument.
5648 MODE is the argument's machine mode.
5649 TYPE is the data type of the argument (as a tree).
5650 This is null for libcalls where that information may
5651 not be available.
5652 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5653 the preceding args and about the function being called.
5654 NAMED is nonzero if this argument is a named parameter
5655 (otherwise it is an extra parameter matching an ellipsis).
5657 On Alpha the first 6 words of args are normally in registers
5658 and the rest are pushed. */
5660 static rtx
5661 alpha_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
5662 const_tree type, bool named ATTRIBUTE_UNUSED)
5664 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5665 int basereg;
5666 int num_args;
5668 /* Don't get confused and pass small structures in FP registers. */
5669 if (type && AGGREGATE_TYPE_P (type))
5670 basereg = 16;
5671 else
5673 #ifdef ENABLE_CHECKING
5674 /* With alpha_split_complex_arg, we shouldn't see any raw complex
5675 values here. */
5676 gcc_assert (!COMPLEX_MODE_P (mode));
5677 #endif
5679 /* Set up defaults for FP operands passed in FP registers, and
5680 integral operands passed in integer registers. */
5681 if (TARGET_FPREGS && GET_MODE_CLASS (mode) == MODE_FLOAT)
5682 basereg = 32 + 16;
5683 else
5684 basereg = 16;
5687 /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for
5688 the two platforms, so we can't avoid conditional compilation. */
5689 #if TARGET_ABI_OPEN_VMS
5691 if (mode == VOIDmode)
5692 return alpha_arg_info_reg_val (*cum);
5694 num_args = cum->num_args;
5695 if (num_args >= 6
5696 || targetm.calls.must_pass_in_stack (mode, type))
5697 return NULL_RTX;
5699 #elif TARGET_ABI_OSF
5701 if (*cum >= 6)
5702 return NULL_RTX;
5703 num_args = *cum;
5705 /* VOID is passed as a special flag for "last argument". */
5706 if (type == void_type_node)
5707 basereg = 16;
5708 else if (targetm.calls.must_pass_in_stack (mode, type))
5709 return NULL_RTX;
5711 #else
5712 #error Unhandled ABI
5713 #endif
5715 return gen_rtx_REG (mode, num_args + basereg);
5718 /* Update the data in CUM to advance over an argument
5719 of mode MODE and data type TYPE.
5720 (TYPE is null for libcalls where that information may not be available.) */
5722 static void
5723 alpha_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
5724 const_tree type, bool named ATTRIBUTE_UNUSED)
5726 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5727 bool onstack = targetm.calls.must_pass_in_stack (mode, type);
5728 int increment = onstack ? 6 : ALPHA_ARG_SIZE (mode, type, named);
5730 #if TARGET_ABI_OSF
5731 *cum += increment;
5732 #else
5733 if (!onstack && cum->num_args < 6)
5734 cum->atypes[cum->num_args] = alpha_arg_type (mode);
5735 cum->num_args += increment;
5736 #endif
5739 static int
5740 alpha_arg_partial_bytes (cumulative_args_t cum_v,
5741 enum machine_mode mode ATTRIBUTE_UNUSED,
5742 tree type ATTRIBUTE_UNUSED,
5743 bool named ATTRIBUTE_UNUSED)
5745 int words = 0;
5746 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED = get_cumulative_args (cum_v);
5748 #if TARGET_ABI_OPEN_VMS
5749 if (cum->num_args < 6
5750 && 6 < cum->num_args + ALPHA_ARG_SIZE (mode, type, named))
5751 words = 6 - cum->num_args;
5752 #elif TARGET_ABI_OSF
5753 if (*cum < 6 && 6 < *cum + ALPHA_ARG_SIZE (mode, type, named))
5754 words = 6 - *cum;
5755 #else
5756 #error Unhandled ABI
5757 #endif
5759 return words * UNITS_PER_WORD;
5763 /* Return true if TYPE must be returned in memory, instead of in registers. */
5765 static bool
5766 alpha_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
5768 enum machine_mode mode = VOIDmode;
5769 int size;
5771 if (type)
5773 mode = TYPE_MODE (type);
5775 /* All aggregates are returned in memory, except on OpenVMS where
5776 records that fit 64 bits should be returned by immediate value
5777 as required by section 3.8.7.1 of the OpenVMS Calling Standard. */
5778 if (TARGET_ABI_OPEN_VMS
5779 && TREE_CODE (type) != ARRAY_TYPE
5780 && (unsigned HOST_WIDE_INT) int_size_in_bytes(type) <= 8)
5781 return false;
5783 if (AGGREGATE_TYPE_P (type))
5784 return true;
5787 size = GET_MODE_SIZE (mode);
5788 switch (GET_MODE_CLASS (mode))
5790 case MODE_VECTOR_FLOAT:
5791 /* Pass all float vectors in memory, like an aggregate. */
5792 return true;
5794 case MODE_COMPLEX_FLOAT:
5795 /* We judge complex floats on the size of their element,
5796 not the size of the whole type. */
5797 size = GET_MODE_UNIT_SIZE (mode);
5798 break;
5800 case MODE_INT:
5801 case MODE_FLOAT:
5802 case MODE_COMPLEX_INT:
5803 case MODE_VECTOR_INT:
5804 break;
5806 default:
5807 /* ??? We get called on all sorts of random stuff from
5808 aggregate_value_p. We must return something, but it's not
5809 clear what's safe to return. Pretend it's a struct I
5810 guess. */
5811 return true;
5814 /* Otherwise types must fit in one register. */
5815 return size > UNITS_PER_WORD;
5818 /* Return true if TYPE should be passed by invisible reference. */
5820 static bool
5821 alpha_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
5822 enum machine_mode mode,
5823 const_tree type ATTRIBUTE_UNUSED,
5824 bool named ATTRIBUTE_UNUSED)
5826 return mode == TFmode || mode == TCmode;
5829 /* Define how to find the value returned by a function. VALTYPE is the
5830 data type of the value (as a tree). If the precise function being
5831 called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0.
5832 MODE is set instead of VALTYPE for libcalls.
5834 On Alpha the value is found in $0 for integer functions and
5835 $f0 for floating-point functions. */
5838 function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED,
5839 enum machine_mode mode)
5841 unsigned int regnum, dummy ATTRIBUTE_UNUSED;
5842 enum mode_class mclass;
5844 gcc_assert (!valtype || !alpha_return_in_memory (valtype, func));
5846 if (valtype)
5847 mode = TYPE_MODE (valtype);
5849 mclass = GET_MODE_CLASS (mode);
5850 switch (mclass)
5852 case MODE_INT:
5853 /* Do the same thing as PROMOTE_MODE except for libcalls on VMS,
5854 where we have them returning both SImode and DImode. */
5855 if (!(TARGET_ABI_OPEN_VMS && valtype && AGGREGATE_TYPE_P (valtype)))
5856 PROMOTE_MODE (mode, dummy, valtype);
5857 /* FALLTHRU */
5859 case MODE_COMPLEX_INT:
5860 case MODE_VECTOR_INT:
5861 regnum = 0;
5862 break;
5864 case MODE_FLOAT:
5865 regnum = 32;
5866 break;
5868 case MODE_COMPLEX_FLOAT:
5870 enum machine_mode cmode = GET_MODE_INNER (mode);
5872 return gen_rtx_PARALLEL
5873 (VOIDmode,
5874 gen_rtvec (2,
5875 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32),
5876 const0_rtx),
5877 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33),
5878 GEN_INT (GET_MODE_SIZE (cmode)))));
5881 case MODE_RANDOM:
5882 /* We should only reach here for BLKmode on VMS. */
5883 gcc_assert (TARGET_ABI_OPEN_VMS && mode == BLKmode);
5884 regnum = 0;
5885 break;
5887 default:
5888 gcc_unreachable ();
5891 return gen_rtx_REG (mode, regnum);
5894 /* TCmode complex values are passed by invisible reference. We
5895 should not split these values. */
5897 static bool
5898 alpha_split_complex_arg (const_tree type)
5900 return TYPE_MODE (type) != TCmode;
5903 static tree
5904 alpha_build_builtin_va_list (void)
5906 tree base, ofs, space, record, type_decl;
5908 if (TARGET_ABI_OPEN_VMS)
5909 return ptr_type_node;
5911 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5912 type_decl = build_decl (BUILTINS_LOCATION,
5913 TYPE_DECL, get_identifier ("__va_list_tag"), record);
5914 TYPE_STUB_DECL (record) = type_decl;
5915 TYPE_NAME (record) = type_decl;
5917 /* C++? SET_IS_AGGR_TYPE (record, 1); */
5919 /* Dummy field to prevent alignment warnings. */
5920 space = build_decl (BUILTINS_LOCATION,
5921 FIELD_DECL, NULL_TREE, integer_type_node);
5922 DECL_FIELD_CONTEXT (space) = record;
5923 DECL_ARTIFICIAL (space) = 1;
5924 DECL_IGNORED_P (space) = 1;
5926 ofs = build_decl (BUILTINS_LOCATION,
5927 FIELD_DECL, get_identifier ("__offset"),
5928 integer_type_node);
5929 DECL_FIELD_CONTEXT (ofs) = record;
5930 DECL_CHAIN (ofs) = space;
5931 /* ??? This is a hack, __offset is marked volatile to prevent
5932 DCE that confuses stdarg optimization and results in
5933 gcc.c-torture/execute/stdarg-1.c failure. See PR 41089. */
5934 TREE_THIS_VOLATILE (ofs) = 1;
5936 base = build_decl (BUILTINS_LOCATION,
5937 FIELD_DECL, get_identifier ("__base"),
5938 ptr_type_node);
5939 DECL_FIELD_CONTEXT (base) = record;
5940 DECL_CHAIN (base) = ofs;
5942 TYPE_FIELDS (record) = base;
5943 layout_type (record);
5945 va_list_gpr_counter_field = ofs;
5946 return record;
5949 #if TARGET_ABI_OSF
5950 /* Helper function for alpha_stdarg_optimize_hook. Skip over casts
5951 and constant additions. */
5953 static gimple
5954 va_list_skip_additions (tree lhs)
5956 gimple stmt;
5958 for (;;)
5960 enum tree_code code;
5962 stmt = SSA_NAME_DEF_STMT (lhs);
5964 if (gimple_code (stmt) == GIMPLE_PHI)
5965 return stmt;
5967 if (!is_gimple_assign (stmt)
5968 || gimple_assign_lhs (stmt) != lhs)
5969 return NULL;
5971 if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME)
5972 return stmt;
5973 code = gimple_assign_rhs_code (stmt);
5974 if (!CONVERT_EXPR_CODE_P (code)
5975 && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR)
5976 || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
5977 || !tree_fits_uhwi_p (gimple_assign_rhs2 (stmt))))
5978 return stmt;
5980 lhs = gimple_assign_rhs1 (stmt);
5984 /* Check if LHS = RHS statement is
5985 LHS = *(ap.__base + ap.__offset + cst)
5987 LHS = *(ap.__base
5988 + ((ap.__offset + cst <= 47)
5989 ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2).
5990 If the former, indicate that GPR registers are needed,
5991 if the latter, indicate that FPR registers are needed.
5993 Also look for LHS = (*ptr).field, where ptr is one of the forms
5994 listed above.
5996 On alpha, cfun->va_list_gpr_size is used as size of the needed
5997 regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR
5998 registers are needed and bit 1 set if FPR registers are needed.
5999 Return true if va_list references should not be scanned for the
6000 current statement. */
6002 static bool
6003 alpha_stdarg_optimize_hook (struct stdarg_info *si, const_gimple stmt)
6005 tree base, offset, rhs;
6006 int offset_arg = 1;
6007 gimple base_stmt;
6009 if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
6010 != GIMPLE_SINGLE_RHS)
6011 return false;
6013 rhs = gimple_assign_rhs1 (stmt);
6014 while (handled_component_p (rhs))
6015 rhs = TREE_OPERAND (rhs, 0);
6016 if (TREE_CODE (rhs) != MEM_REF
6017 || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
6018 return false;
6020 stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0));
6021 if (stmt == NULL
6022 || !is_gimple_assign (stmt)
6023 || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR)
6024 return false;
6026 base = gimple_assign_rhs1 (stmt);
6027 if (TREE_CODE (base) == SSA_NAME)
6029 base_stmt = va_list_skip_additions (base);
6030 if (base_stmt
6031 && is_gimple_assign (base_stmt)
6032 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
6033 base = gimple_assign_rhs1 (base_stmt);
6036 if (TREE_CODE (base) != COMPONENT_REF
6037 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
6039 base = gimple_assign_rhs2 (stmt);
6040 if (TREE_CODE (base) == SSA_NAME)
6042 base_stmt = va_list_skip_additions (base);
6043 if (base_stmt
6044 && is_gimple_assign (base_stmt)
6045 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
6046 base = gimple_assign_rhs1 (base_stmt);
6049 if (TREE_CODE (base) != COMPONENT_REF
6050 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
6051 return false;
6053 offset_arg = 0;
6056 base = get_base_address (base);
6057 if (TREE_CODE (base) != VAR_DECL
6058 || !bitmap_bit_p (si->va_list_vars, DECL_UID (base) + num_ssa_names))
6059 return false;
6061 offset = gimple_op (stmt, 1 + offset_arg);
6062 if (TREE_CODE (offset) == SSA_NAME)
6064 gimple offset_stmt = va_list_skip_additions (offset);
6066 if (offset_stmt
6067 && gimple_code (offset_stmt) == GIMPLE_PHI)
6069 HOST_WIDE_INT sub;
6070 gimple arg1_stmt, arg2_stmt;
6071 tree arg1, arg2;
6072 enum tree_code code1, code2;
6074 if (gimple_phi_num_args (offset_stmt) != 2)
6075 goto escapes;
6077 arg1_stmt
6078 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0));
6079 arg2_stmt
6080 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1));
6081 if (arg1_stmt == NULL
6082 || !is_gimple_assign (arg1_stmt)
6083 || arg2_stmt == NULL
6084 || !is_gimple_assign (arg2_stmt))
6085 goto escapes;
6087 code1 = gimple_assign_rhs_code (arg1_stmt);
6088 code2 = gimple_assign_rhs_code (arg2_stmt);
6089 if (code1 == COMPONENT_REF
6090 && (code2 == MINUS_EXPR || code2 == PLUS_EXPR))
6091 /* Do nothing. */;
6092 else if (code2 == COMPONENT_REF
6093 && (code1 == MINUS_EXPR || code1 == PLUS_EXPR))
6095 gimple tem = arg1_stmt;
6096 code2 = code1;
6097 arg1_stmt = arg2_stmt;
6098 arg2_stmt = tem;
6100 else
6101 goto escapes;
6103 if (!tree_fits_shwi_p (gimple_assign_rhs2 (arg2_stmt)))
6104 goto escapes;
6106 sub = tree_to_shwi (gimple_assign_rhs2 (arg2_stmt));
6107 if (code2 == MINUS_EXPR)
6108 sub = -sub;
6109 if (sub < -48 || sub > -32)
6110 goto escapes;
6112 arg1 = gimple_assign_rhs1 (arg1_stmt);
6113 arg2 = gimple_assign_rhs1 (arg2_stmt);
6114 if (TREE_CODE (arg2) == SSA_NAME)
6116 arg2_stmt = va_list_skip_additions (arg2);
6117 if (arg2_stmt == NULL
6118 || !is_gimple_assign (arg2_stmt)
6119 || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF)
6120 goto escapes;
6121 arg2 = gimple_assign_rhs1 (arg2_stmt);
6123 if (arg1 != arg2)
6124 goto escapes;
6126 if (TREE_CODE (arg1) != COMPONENT_REF
6127 || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field
6128 || get_base_address (arg1) != base)
6129 goto escapes;
6131 /* Need floating point regs. */
6132 cfun->va_list_fpr_size |= 2;
6133 return false;
6135 if (offset_stmt
6136 && is_gimple_assign (offset_stmt)
6137 && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF)
6138 offset = gimple_assign_rhs1 (offset_stmt);
6140 if (TREE_CODE (offset) != COMPONENT_REF
6141 || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field
6142 || get_base_address (offset) != base)
6143 goto escapes;
6144 else
6145 /* Need general regs. */
6146 cfun->va_list_fpr_size |= 1;
6147 return false;
6149 escapes:
6150 si->va_list_escapes = true;
6151 return false;
6153 #endif
6155 /* Perform any needed actions needed for a function that is receiving a
6156 variable number of arguments. */
6158 static void
6159 alpha_setup_incoming_varargs (cumulative_args_t pcum, enum machine_mode mode,
6160 tree type, int *pretend_size, int no_rtl)
6162 CUMULATIVE_ARGS cum = *get_cumulative_args (pcum);
6164 /* Skip the current argument. */
6165 targetm.calls.function_arg_advance (pack_cumulative_args (&cum), mode, type,
6166 true);
6168 #if TARGET_ABI_OPEN_VMS
6169 /* For VMS, we allocate space for all 6 arg registers plus a count.
6171 However, if NO registers need to be saved, don't allocate any space.
6172 This is not only because we won't need the space, but because AP
6173 includes the current_pretend_args_size and we don't want to mess up
6174 any ap-relative addresses already made. */
6175 if (cum.num_args < 6)
6177 if (!no_rtl)
6179 emit_move_insn (gen_rtx_REG (DImode, 1), virtual_incoming_args_rtx);
6180 emit_insn (gen_arg_home ());
6182 *pretend_size = 7 * UNITS_PER_WORD;
6184 #else
6185 /* On OSF/1 and friends, we allocate space for all 12 arg registers, but
6186 only push those that are remaining. However, if NO registers need to
6187 be saved, don't allocate any space. This is not only because we won't
6188 need the space, but because AP includes the current_pretend_args_size
6189 and we don't want to mess up any ap-relative addresses already made.
6191 If we are not to use the floating-point registers, save the integer
6192 registers where we would put the floating-point registers. This is
6193 not the most efficient way to implement varargs with just one register
6194 class, but it isn't worth doing anything more efficient in this rare
6195 case. */
6196 if (cum >= 6)
6197 return;
6199 if (!no_rtl)
6201 int count;
6202 alias_set_type set = get_varargs_alias_set ();
6203 rtx tmp;
6205 count = cfun->va_list_gpr_size / UNITS_PER_WORD;
6206 if (count > 6 - cum)
6207 count = 6 - cum;
6209 /* Detect whether integer registers or floating-point registers
6210 are needed by the detected va_arg statements. See above for
6211 how these values are computed. Note that the "escape" value
6212 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of
6213 these bits set. */
6214 gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3);
6216 if (cfun->va_list_fpr_size & 1)
6218 tmp = gen_rtx_MEM (BLKmode,
6219 plus_constant (Pmode, virtual_incoming_args_rtx,
6220 (cum + 6) * UNITS_PER_WORD));
6221 MEM_NOTRAP_P (tmp) = 1;
6222 set_mem_alias_set (tmp, set);
6223 move_block_from_reg (16 + cum, tmp, count);
6226 if (cfun->va_list_fpr_size & 2)
6228 tmp = gen_rtx_MEM (BLKmode,
6229 plus_constant (Pmode, virtual_incoming_args_rtx,
6230 cum * UNITS_PER_WORD));
6231 MEM_NOTRAP_P (tmp) = 1;
6232 set_mem_alias_set (tmp, set);
6233 move_block_from_reg (16 + cum + TARGET_FPREGS*32, tmp, count);
6236 *pretend_size = 12 * UNITS_PER_WORD;
6237 #endif
6240 static void
6241 alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6243 HOST_WIDE_INT offset;
6244 tree t, offset_field, base_field;
6246 if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK)
6247 return;
6249 /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base
6250 up by 48, storing fp arg registers in the first 48 bytes, and the
6251 integer arg registers in the next 48 bytes. This is only done,
6252 however, if any integer registers need to be stored.
6254 If no integer registers need be stored, then we must subtract 48
6255 in order to account for the integer arg registers which are counted
6256 in argsize above, but which are not actually stored on the stack.
6257 Must further be careful here about structures straddling the last
6258 integer argument register; that futzes with pretend_args_size,
6259 which changes the meaning of AP. */
6261 if (NUM_ARGS < 6)
6262 offset = TARGET_ABI_OPEN_VMS ? UNITS_PER_WORD : 6 * UNITS_PER_WORD;
6263 else
6264 offset = -6 * UNITS_PER_WORD + crtl->args.pretend_args_size;
6266 if (TARGET_ABI_OPEN_VMS)
6268 t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6269 t = fold_build_pointer_plus_hwi (t, offset + NUM_ARGS * UNITS_PER_WORD);
6270 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
6271 TREE_SIDE_EFFECTS (t) = 1;
6272 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6274 else
6276 base_field = TYPE_FIELDS (TREE_TYPE (valist));
6277 offset_field = DECL_CHAIN (base_field);
6279 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6280 valist, base_field, NULL_TREE);
6281 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6282 valist, offset_field, NULL_TREE);
6284 t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6285 t = fold_build_pointer_plus_hwi (t, offset);
6286 t = build2 (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t);
6287 TREE_SIDE_EFFECTS (t) = 1;
6288 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6290 t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD);
6291 t = build2 (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t);
6292 TREE_SIDE_EFFECTS (t) = 1;
6293 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6297 static tree
6298 alpha_gimplify_va_arg_1 (tree type, tree base, tree offset,
6299 gimple_seq *pre_p)
6301 tree type_size, ptr_type, addend, t, addr;
6302 gimple_seq internal_post;
6304 /* If the type could not be passed in registers, skip the block
6305 reserved for the registers. */
6306 if (targetm.calls.must_pass_in_stack (TYPE_MODE (type), type))
6308 t = build_int_cst (TREE_TYPE (offset), 6*8);
6309 gimplify_assign (offset,
6310 build2 (MAX_EXPR, TREE_TYPE (offset), offset, t),
6311 pre_p);
6314 addend = offset;
6315 ptr_type = build_pointer_type_for_mode (type, ptr_mode, true);
6317 if (TREE_CODE (type) == COMPLEX_TYPE)
6319 tree real_part, imag_part, real_temp;
6321 real_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6322 offset, pre_p);
6324 /* Copy the value into a new temporary, lest the formal temporary
6325 be reused out from under us. */
6326 real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
6328 imag_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6329 offset, pre_p);
6331 return build2 (COMPLEX_EXPR, type, real_temp, imag_part);
6333 else if (TREE_CODE (type) == REAL_TYPE)
6335 tree fpaddend, cond, fourtyeight;
6337 fourtyeight = build_int_cst (TREE_TYPE (addend), 6*8);
6338 fpaddend = fold_build2 (MINUS_EXPR, TREE_TYPE (addend),
6339 addend, fourtyeight);
6340 cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight);
6341 addend = fold_build3 (COND_EXPR, TREE_TYPE (addend), cond,
6342 fpaddend, addend);
6345 /* Build the final address and force that value into a temporary. */
6346 addr = fold_build_pointer_plus (fold_convert (ptr_type, base), addend);
6347 internal_post = NULL;
6348 gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue);
6349 gimple_seq_add_seq (pre_p, internal_post);
6351 /* Update the offset field. */
6352 type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type));
6353 if (type_size == NULL || TREE_OVERFLOW (type_size))
6354 t = size_zero_node;
6355 else
6357 t = size_binop (PLUS_EXPR, type_size, size_int (7));
6358 t = size_binop (TRUNC_DIV_EXPR, t, size_int (8));
6359 t = size_binop (MULT_EXPR, t, size_int (8));
6361 t = fold_convert (TREE_TYPE (offset), t);
6362 gimplify_assign (offset, build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t),
6363 pre_p);
6365 return build_va_arg_indirect_ref (addr);
6368 static tree
6369 alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6370 gimple_seq *post_p)
6372 tree offset_field, base_field, offset, base, t, r;
6373 bool indirect;
6375 if (TARGET_ABI_OPEN_VMS)
6376 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6378 base_field = TYPE_FIELDS (va_list_type_node);
6379 offset_field = DECL_CHAIN (base_field);
6380 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6381 valist, base_field, NULL_TREE);
6382 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6383 valist, offset_field, NULL_TREE);
6385 /* Pull the fields of the structure out into temporaries. Since we never
6386 modify the base field, we can use a formal temporary. Sign-extend the
6387 offset field so that it's the proper width for pointer arithmetic. */
6388 base = get_formal_tmp_var (base_field, pre_p);
6390 t = fold_convert (build_nonstandard_integer_type (64, 0), offset_field);
6391 offset = get_initialized_tmp_var (t, pre_p, NULL);
6393 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6394 if (indirect)
6395 type = build_pointer_type_for_mode (type, ptr_mode, true);
6397 /* Find the value. Note that this will be a stable indirection, or
6398 a composite of stable indirections in the case of complex. */
6399 r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p);
6401 /* Stuff the offset temporary back into its field. */
6402 gimplify_assign (unshare_expr (offset_field),
6403 fold_convert (TREE_TYPE (offset_field), offset), pre_p);
6405 if (indirect)
6406 r = build_va_arg_indirect_ref (r);
6408 return r;
6411 /* Builtins. */
6413 enum alpha_builtin
6415 ALPHA_BUILTIN_CMPBGE,
6416 ALPHA_BUILTIN_EXTBL,
6417 ALPHA_BUILTIN_EXTWL,
6418 ALPHA_BUILTIN_EXTLL,
6419 ALPHA_BUILTIN_EXTQL,
6420 ALPHA_BUILTIN_EXTWH,
6421 ALPHA_BUILTIN_EXTLH,
6422 ALPHA_BUILTIN_EXTQH,
6423 ALPHA_BUILTIN_INSBL,
6424 ALPHA_BUILTIN_INSWL,
6425 ALPHA_BUILTIN_INSLL,
6426 ALPHA_BUILTIN_INSQL,
6427 ALPHA_BUILTIN_INSWH,
6428 ALPHA_BUILTIN_INSLH,
6429 ALPHA_BUILTIN_INSQH,
6430 ALPHA_BUILTIN_MSKBL,
6431 ALPHA_BUILTIN_MSKWL,
6432 ALPHA_BUILTIN_MSKLL,
6433 ALPHA_BUILTIN_MSKQL,
6434 ALPHA_BUILTIN_MSKWH,
6435 ALPHA_BUILTIN_MSKLH,
6436 ALPHA_BUILTIN_MSKQH,
6437 ALPHA_BUILTIN_UMULH,
6438 ALPHA_BUILTIN_ZAP,
6439 ALPHA_BUILTIN_ZAPNOT,
6440 ALPHA_BUILTIN_AMASK,
6441 ALPHA_BUILTIN_IMPLVER,
6442 ALPHA_BUILTIN_RPCC,
6443 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6444 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER,
6446 /* TARGET_MAX */
6447 ALPHA_BUILTIN_MINUB8,
6448 ALPHA_BUILTIN_MINSB8,
6449 ALPHA_BUILTIN_MINUW4,
6450 ALPHA_BUILTIN_MINSW4,
6451 ALPHA_BUILTIN_MAXUB8,
6452 ALPHA_BUILTIN_MAXSB8,
6453 ALPHA_BUILTIN_MAXUW4,
6454 ALPHA_BUILTIN_MAXSW4,
6455 ALPHA_BUILTIN_PERR,
6456 ALPHA_BUILTIN_PKLB,
6457 ALPHA_BUILTIN_PKWB,
6458 ALPHA_BUILTIN_UNPKBL,
6459 ALPHA_BUILTIN_UNPKBW,
6461 /* TARGET_CIX */
6462 ALPHA_BUILTIN_CTTZ,
6463 ALPHA_BUILTIN_CTLZ,
6464 ALPHA_BUILTIN_CTPOP,
6466 ALPHA_BUILTIN_max
6469 static enum insn_code const code_for_builtin[ALPHA_BUILTIN_max] = {
6470 CODE_FOR_builtin_cmpbge,
6471 CODE_FOR_extbl,
6472 CODE_FOR_extwl,
6473 CODE_FOR_extll,
6474 CODE_FOR_extql,
6475 CODE_FOR_extwh,
6476 CODE_FOR_extlh,
6477 CODE_FOR_extqh,
6478 CODE_FOR_builtin_insbl,
6479 CODE_FOR_builtin_inswl,
6480 CODE_FOR_builtin_insll,
6481 CODE_FOR_insql,
6482 CODE_FOR_inswh,
6483 CODE_FOR_inslh,
6484 CODE_FOR_insqh,
6485 CODE_FOR_mskbl,
6486 CODE_FOR_mskwl,
6487 CODE_FOR_mskll,
6488 CODE_FOR_mskql,
6489 CODE_FOR_mskwh,
6490 CODE_FOR_msklh,
6491 CODE_FOR_mskqh,
6492 CODE_FOR_umuldi3_highpart,
6493 CODE_FOR_builtin_zap,
6494 CODE_FOR_builtin_zapnot,
6495 CODE_FOR_builtin_amask,
6496 CODE_FOR_builtin_implver,
6497 CODE_FOR_builtin_rpcc,
6498 CODE_FOR_builtin_establish_vms_condition_handler,
6499 CODE_FOR_builtin_revert_vms_condition_handler,
6501 /* TARGET_MAX */
6502 CODE_FOR_builtin_minub8,
6503 CODE_FOR_builtin_minsb8,
6504 CODE_FOR_builtin_minuw4,
6505 CODE_FOR_builtin_minsw4,
6506 CODE_FOR_builtin_maxub8,
6507 CODE_FOR_builtin_maxsb8,
6508 CODE_FOR_builtin_maxuw4,
6509 CODE_FOR_builtin_maxsw4,
6510 CODE_FOR_builtin_perr,
6511 CODE_FOR_builtin_pklb,
6512 CODE_FOR_builtin_pkwb,
6513 CODE_FOR_builtin_unpkbl,
6514 CODE_FOR_builtin_unpkbw,
6516 /* TARGET_CIX */
6517 CODE_FOR_ctzdi2,
6518 CODE_FOR_clzdi2,
6519 CODE_FOR_popcountdi2
6522 struct alpha_builtin_def
6524 const char *name;
6525 enum alpha_builtin code;
6526 unsigned int target_mask;
6527 bool is_const;
6530 static struct alpha_builtin_def const zero_arg_builtins[] = {
6531 { "__builtin_alpha_implver", ALPHA_BUILTIN_IMPLVER, 0, true },
6532 { "__builtin_alpha_rpcc", ALPHA_BUILTIN_RPCC, 0, false }
6535 static struct alpha_builtin_def const one_arg_builtins[] = {
6536 { "__builtin_alpha_amask", ALPHA_BUILTIN_AMASK, 0, true },
6537 { "__builtin_alpha_pklb", ALPHA_BUILTIN_PKLB, MASK_MAX, true },
6538 { "__builtin_alpha_pkwb", ALPHA_BUILTIN_PKWB, MASK_MAX, true },
6539 { "__builtin_alpha_unpkbl", ALPHA_BUILTIN_UNPKBL, MASK_MAX, true },
6540 { "__builtin_alpha_unpkbw", ALPHA_BUILTIN_UNPKBW, MASK_MAX, true },
6541 { "__builtin_alpha_cttz", ALPHA_BUILTIN_CTTZ, MASK_CIX, true },
6542 { "__builtin_alpha_ctlz", ALPHA_BUILTIN_CTLZ, MASK_CIX, true },
6543 { "__builtin_alpha_ctpop", ALPHA_BUILTIN_CTPOP, MASK_CIX, true }
6546 static struct alpha_builtin_def const two_arg_builtins[] = {
6547 { "__builtin_alpha_cmpbge", ALPHA_BUILTIN_CMPBGE, 0, true },
6548 { "__builtin_alpha_extbl", ALPHA_BUILTIN_EXTBL, 0, true },
6549 { "__builtin_alpha_extwl", ALPHA_BUILTIN_EXTWL, 0, true },
6550 { "__builtin_alpha_extll", ALPHA_BUILTIN_EXTLL, 0, true },
6551 { "__builtin_alpha_extql", ALPHA_BUILTIN_EXTQL, 0, true },
6552 { "__builtin_alpha_extwh", ALPHA_BUILTIN_EXTWH, 0, true },
6553 { "__builtin_alpha_extlh", ALPHA_BUILTIN_EXTLH, 0, true },
6554 { "__builtin_alpha_extqh", ALPHA_BUILTIN_EXTQH, 0, true },
6555 { "__builtin_alpha_insbl", ALPHA_BUILTIN_INSBL, 0, true },
6556 { "__builtin_alpha_inswl", ALPHA_BUILTIN_INSWL, 0, true },
6557 { "__builtin_alpha_insll", ALPHA_BUILTIN_INSLL, 0, true },
6558 { "__builtin_alpha_insql", ALPHA_BUILTIN_INSQL, 0, true },
6559 { "__builtin_alpha_inswh", ALPHA_BUILTIN_INSWH, 0, true },
6560 { "__builtin_alpha_inslh", ALPHA_BUILTIN_INSLH, 0, true },
6561 { "__builtin_alpha_insqh", ALPHA_BUILTIN_INSQH, 0, true },
6562 { "__builtin_alpha_mskbl", ALPHA_BUILTIN_MSKBL, 0, true },
6563 { "__builtin_alpha_mskwl", ALPHA_BUILTIN_MSKWL, 0, true },
6564 { "__builtin_alpha_mskll", ALPHA_BUILTIN_MSKLL, 0, true },
6565 { "__builtin_alpha_mskql", ALPHA_BUILTIN_MSKQL, 0, true },
6566 { "__builtin_alpha_mskwh", ALPHA_BUILTIN_MSKWH, 0, true },
6567 { "__builtin_alpha_msklh", ALPHA_BUILTIN_MSKLH, 0, true },
6568 { "__builtin_alpha_mskqh", ALPHA_BUILTIN_MSKQH, 0, true },
6569 { "__builtin_alpha_umulh", ALPHA_BUILTIN_UMULH, 0, true },
6570 { "__builtin_alpha_zap", ALPHA_BUILTIN_ZAP, 0, true },
6571 { "__builtin_alpha_zapnot", ALPHA_BUILTIN_ZAPNOT, 0, true },
6572 { "__builtin_alpha_minub8", ALPHA_BUILTIN_MINUB8, MASK_MAX, true },
6573 { "__builtin_alpha_minsb8", ALPHA_BUILTIN_MINSB8, MASK_MAX, true },
6574 { "__builtin_alpha_minuw4", ALPHA_BUILTIN_MINUW4, MASK_MAX, true },
6575 { "__builtin_alpha_minsw4", ALPHA_BUILTIN_MINSW4, MASK_MAX, true },
6576 { "__builtin_alpha_maxub8", ALPHA_BUILTIN_MAXUB8, MASK_MAX, true },
6577 { "__builtin_alpha_maxsb8", ALPHA_BUILTIN_MAXSB8, MASK_MAX, true },
6578 { "__builtin_alpha_maxuw4", ALPHA_BUILTIN_MAXUW4, MASK_MAX, true },
6579 { "__builtin_alpha_maxsw4", ALPHA_BUILTIN_MAXSW4, MASK_MAX, true },
6580 { "__builtin_alpha_perr", ALPHA_BUILTIN_PERR, MASK_MAX, true }
6583 static GTY(()) tree alpha_dimode_u;
6584 static GTY(()) tree alpha_v8qi_u;
6585 static GTY(()) tree alpha_v8qi_s;
6586 static GTY(()) tree alpha_v4hi_u;
6587 static GTY(()) tree alpha_v4hi_s;
6589 static GTY(()) tree alpha_builtins[(int) ALPHA_BUILTIN_max];
6591 /* Return the alpha builtin for CODE. */
6593 static tree
6594 alpha_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
6596 if (code >= ALPHA_BUILTIN_max)
6597 return error_mark_node;
6598 return alpha_builtins[code];
6601 /* Helper function of alpha_init_builtins. Add the built-in specified
6602 by NAME, TYPE, CODE, and ECF. */
6604 static void
6605 alpha_builtin_function (const char *name, tree ftype,
6606 enum alpha_builtin code, unsigned ecf)
6608 tree decl = add_builtin_function (name, ftype, (int) code,
6609 BUILT_IN_MD, NULL, NULL_TREE);
6611 if (ecf & ECF_CONST)
6612 TREE_READONLY (decl) = 1;
6613 if (ecf & ECF_NOTHROW)
6614 TREE_NOTHROW (decl) = 1;
6616 alpha_builtins [(int) code] = decl;
6619 /* Helper function of alpha_init_builtins. Add the COUNT built-in
6620 functions pointed to by P, with function type FTYPE. */
6622 static void
6623 alpha_add_builtins (const struct alpha_builtin_def *p, size_t count,
6624 tree ftype)
6626 size_t i;
6628 for (i = 0; i < count; ++i, ++p)
6629 if ((target_flags & p->target_mask) == p->target_mask)
6630 alpha_builtin_function (p->name, ftype, p->code,
6631 (p->is_const ? ECF_CONST : 0) | ECF_NOTHROW);
6634 static void
6635 alpha_init_builtins (void)
6637 tree ftype;
6639 alpha_dimode_u = lang_hooks.types.type_for_mode (DImode, 1);
6640 alpha_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8);
6641 alpha_v8qi_s = build_vector_type (intQI_type_node, 8);
6642 alpha_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4);
6643 alpha_v4hi_s = build_vector_type (intHI_type_node, 4);
6645 ftype = build_function_type_list (alpha_dimode_u, NULL_TREE);
6646 alpha_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins), ftype);
6648 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u, NULL_TREE);
6649 alpha_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins), ftype);
6651 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u,
6652 alpha_dimode_u, NULL_TREE);
6653 alpha_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins), ftype);
6655 if (TARGET_ABI_OPEN_VMS)
6657 ftype = build_function_type_list (ptr_type_node, ptr_type_node,
6658 NULL_TREE);
6659 alpha_builtin_function ("__builtin_establish_vms_condition_handler",
6660 ftype,
6661 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6664 ftype = build_function_type_list (ptr_type_node, void_type_node,
6665 NULL_TREE);
6666 alpha_builtin_function ("__builtin_revert_vms_condition_handler", ftype,
6667 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, 0);
6669 vms_patch_builtins ();
6673 /* Expand an expression EXP that calls a built-in function,
6674 with result going to TARGET if that's convenient
6675 (and in mode MODE if that's convenient).
6676 SUBTARGET may be used as the target for computing one of EXP's operands.
6677 IGNORE is nonzero if the value is to be ignored. */
6679 static rtx
6680 alpha_expand_builtin (tree exp, rtx target,
6681 rtx subtarget ATTRIBUTE_UNUSED,
6682 enum machine_mode mode ATTRIBUTE_UNUSED,
6683 int ignore ATTRIBUTE_UNUSED)
6685 #define MAX_ARGS 2
6687 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6688 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6689 tree arg;
6690 call_expr_arg_iterator iter;
6691 enum insn_code icode;
6692 rtx op[MAX_ARGS], pat;
6693 int arity;
6694 bool nonvoid;
6696 if (fcode >= ALPHA_BUILTIN_max)
6697 internal_error ("bad builtin fcode");
6698 icode = code_for_builtin[fcode];
6699 if (icode == 0)
6700 internal_error ("bad builtin fcode");
6702 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
6704 arity = 0;
6705 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
6707 const struct insn_operand_data *insn_op;
6709 if (arg == error_mark_node)
6710 return NULL_RTX;
6711 if (arity > MAX_ARGS)
6712 return NULL_RTX;
6714 insn_op = &insn_data[icode].operand[arity + nonvoid];
6716 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
6718 if (!(*insn_op->predicate) (op[arity], insn_op->mode))
6719 op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
6720 arity++;
6723 if (nonvoid)
6725 enum machine_mode tmode = insn_data[icode].operand[0].mode;
6726 if (!target
6727 || GET_MODE (target) != tmode
6728 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
6729 target = gen_reg_rtx (tmode);
6732 switch (arity)
6734 case 0:
6735 pat = GEN_FCN (icode) (target);
6736 break;
6737 case 1:
6738 if (nonvoid)
6739 pat = GEN_FCN (icode) (target, op[0]);
6740 else
6741 pat = GEN_FCN (icode) (op[0]);
6742 break;
6743 case 2:
6744 pat = GEN_FCN (icode) (target, op[0], op[1]);
6745 break;
6746 default:
6747 gcc_unreachable ();
6749 if (!pat)
6750 return NULL_RTX;
6751 emit_insn (pat);
6753 if (nonvoid)
6754 return target;
6755 else
6756 return const0_rtx;
6760 /* Several bits below assume HWI >= 64 bits. This should be enforced
6761 by config.gcc. */
6762 #if HOST_BITS_PER_WIDE_INT < 64
6763 # error "HOST_WIDE_INT too small"
6764 #endif
6766 /* Fold the builtin for the CMPBGE instruction. This is a vector comparison
6767 with an 8-bit output vector. OPINT contains the integer operands; bit N
6768 of OP_CONST is set if OPINT[N] is valid. */
6770 static tree
6771 alpha_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const)
6773 if (op_const == 3)
6775 int i, val;
6776 for (i = 0, val = 0; i < 8; ++i)
6778 unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff;
6779 unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff;
6780 if (c0 >= c1)
6781 val |= 1 << i;
6783 return build_int_cst (alpha_dimode_u, val);
6785 else if (op_const == 2 && opint[1] == 0)
6786 return build_int_cst (alpha_dimode_u, 0xff);
6787 return NULL;
6790 /* Fold the builtin for the ZAPNOT instruction. This is essentially a
6791 specialized form of an AND operation. Other byte manipulation instructions
6792 are defined in terms of this instruction, so this is also used as a
6793 subroutine for other builtins.
6795 OP contains the tree operands; OPINT contains the extracted integer values.
6796 Bit N of OP_CONST it set if OPINT[N] is valid. OP may be null if only
6797 OPINT may be considered. */
6799 static tree
6800 alpha_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[],
6801 long op_const)
6803 if (op_const & 2)
6805 unsigned HOST_WIDE_INT mask = 0;
6806 int i;
6808 for (i = 0; i < 8; ++i)
6809 if ((opint[1] >> i) & 1)
6810 mask |= (unsigned HOST_WIDE_INT)0xff << (i * 8);
6812 if (op_const & 1)
6813 return build_int_cst (alpha_dimode_u, opint[0] & mask);
6815 if (op)
6816 return fold_build2 (BIT_AND_EXPR, alpha_dimode_u, op[0],
6817 build_int_cst (alpha_dimode_u, mask));
6819 else if ((op_const & 1) && opint[0] == 0)
6820 return build_int_cst (alpha_dimode_u, 0);
6821 return NULL;
6824 /* Fold the builtins for the EXT family of instructions. */
6826 static tree
6827 alpha_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[],
6828 long op_const, unsigned HOST_WIDE_INT bytemask,
6829 bool is_high)
6831 long zap_const = 2;
6832 tree *zap_op = NULL;
6834 if (op_const & 2)
6836 unsigned HOST_WIDE_INT loc;
6838 loc = opint[1] & 7;
6839 loc *= BITS_PER_UNIT;
6841 if (loc != 0)
6843 if (op_const & 1)
6845 unsigned HOST_WIDE_INT temp = opint[0];
6846 if (is_high)
6847 temp <<= loc;
6848 else
6849 temp >>= loc;
6850 opint[0] = temp;
6851 zap_const = 3;
6854 else
6855 zap_op = op;
6858 opint[1] = bytemask;
6859 return alpha_fold_builtin_zapnot (zap_op, opint, zap_const);
6862 /* Fold the builtins for the INS family of instructions. */
6864 static tree
6865 alpha_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[],
6866 long op_const, unsigned HOST_WIDE_INT bytemask,
6867 bool is_high)
6869 if ((op_const & 1) && opint[0] == 0)
6870 return build_int_cst (alpha_dimode_u, 0);
6872 if (op_const & 2)
6874 unsigned HOST_WIDE_INT temp, loc, byteloc;
6875 tree *zap_op = NULL;
6877 loc = opint[1] & 7;
6878 bytemask <<= loc;
6880 temp = opint[0];
6881 if (is_high)
6883 byteloc = (64 - (loc * 8)) & 0x3f;
6884 if (byteloc == 0)
6885 zap_op = op;
6886 else
6887 temp >>= byteloc;
6888 bytemask >>= 8;
6890 else
6892 byteloc = loc * 8;
6893 if (byteloc == 0)
6894 zap_op = op;
6895 else
6896 temp <<= byteloc;
6899 opint[0] = temp;
6900 opint[1] = bytemask;
6901 return alpha_fold_builtin_zapnot (zap_op, opint, op_const);
6904 return NULL;
6907 static tree
6908 alpha_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[],
6909 long op_const, unsigned HOST_WIDE_INT bytemask,
6910 bool is_high)
6912 if (op_const & 2)
6914 unsigned HOST_WIDE_INT loc;
6916 loc = opint[1] & 7;
6917 bytemask <<= loc;
6919 if (is_high)
6920 bytemask >>= 8;
6922 opint[1] = bytemask ^ 0xff;
6925 return alpha_fold_builtin_zapnot (op, opint, op_const);
6928 static tree
6929 alpha_fold_vector_minmax (enum tree_code code, tree op[], tree vtype)
6931 tree op0 = fold_convert (vtype, op[0]);
6932 tree op1 = fold_convert (vtype, op[1]);
6933 tree val = fold_build2 (code, vtype, op0, op1);
6934 return fold_build1 (VIEW_CONVERT_EXPR, alpha_dimode_u, val);
6937 static tree
6938 alpha_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const)
6940 unsigned HOST_WIDE_INT temp = 0;
6941 int i;
6943 if (op_const != 3)
6944 return NULL;
6946 for (i = 0; i < 8; ++i)
6948 unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff;
6949 unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff;
6950 if (a >= b)
6951 temp += a - b;
6952 else
6953 temp += b - a;
6956 return build_int_cst (alpha_dimode_u, temp);
6959 static tree
6960 alpha_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const)
6962 unsigned HOST_WIDE_INT temp;
6964 if (op_const == 0)
6965 return NULL;
6967 temp = opint[0] & 0xff;
6968 temp |= (opint[0] >> 24) & 0xff00;
6970 return build_int_cst (alpha_dimode_u, temp);
6973 static tree
6974 alpha_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const)
6976 unsigned HOST_WIDE_INT temp;
6978 if (op_const == 0)
6979 return NULL;
6981 temp = opint[0] & 0xff;
6982 temp |= (opint[0] >> 8) & 0xff00;
6983 temp |= (opint[0] >> 16) & 0xff0000;
6984 temp |= (opint[0] >> 24) & 0xff000000;
6986 return build_int_cst (alpha_dimode_u, temp);
6989 static tree
6990 alpha_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const)
6992 unsigned HOST_WIDE_INT temp;
6994 if (op_const == 0)
6995 return NULL;
6997 temp = opint[0] & 0xff;
6998 temp |= (opint[0] & 0xff00) << 24;
7000 return build_int_cst (alpha_dimode_u, temp);
7003 static tree
7004 alpha_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const)
7006 unsigned HOST_WIDE_INT temp;
7008 if (op_const == 0)
7009 return NULL;
7011 temp = opint[0] & 0xff;
7012 temp |= (opint[0] & 0x0000ff00) << 8;
7013 temp |= (opint[0] & 0x00ff0000) << 16;
7014 temp |= (opint[0] & 0xff000000) << 24;
7016 return build_int_cst (alpha_dimode_u, temp);
7019 static tree
7020 alpha_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const)
7022 unsigned HOST_WIDE_INT temp;
7024 if (op_const == 0)
7025 return NULL;
7027 if (opint[0] == 0)
7028 temp = 64;
7029 else
7030 temp = exact_log2 (opint[0] & -opint[0]);
7032 return build_int_cst (alpha_dimode_u, temp);
7035 static tree
7036 alpha_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const)
7038 unsigned HOST_WIDE_INT temp;
7040 if (op_const == 0)
7041 return NULL;
7043 if (opint[0] == 0)
7044 temp = 64;
7045 else
7046 temp = 64 - floor_log2 (opint[0]) - 1;
7048 return build_int_cst (alpha_dimode_u, temp);
7051 static tree
7052 alpha_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const)
7054 unsigned HOST_WIDE_INT temp, op;
7056 if (op_const == 0)
7057 return NULL;
7059 op = opint[0];
7060 temp = 0;
7061 while (op)
7062 temp++, op &= op - 1;
7064 return build_int_cst (alpha_dimode_u, temp);
7067 /* Fold one of our builtin functions. */
7069 static tree
7070 alpha_fold_builtin (tree fndecl, int n_args, tree *op,
7071 bool ignore ATTRIBUTE_UNUSED)
7073 unsigned HOST_WIDE_INT opint[MAX_ARGS];
7074 long op_const = 0;
7075 int i;
7077 if (n_args > MAX_ARGS)
7078 return NULL;
7080 for (i = 0; i < n_args; i++)
7082 tree arg = op[i];
7083 if (arg == error_mark_node)
7084 return NULL;
7086 opint[i] = 0;
7087 if (TREE_CODE (arg) == INTEGER_CST)
7089 op_const |= 1L << i;
7090 opint[i] = int_cst_value (arg);
7094 switch (DECL_FUNCTION_CODE (fndecl))
7096 case ALPHA_BUILTIN_CMPBGE:
7097 return alpha_fold_builtin_cmpbge (opint, op_const);
7099 case ALPHA_BUILTIN_EXTBL:
7100 return alpha_fold_builtin_extxx (op, opint, op_const, 0x01, false);
7101 case ALPHA_BUILTIN_EXTWL:
7102 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, false);
7103 case ALPHA_BUILTIN_EXTLL:
7104 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, false);
7105 case ALPHA_BUILTIN_EXTQL:
7106 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, false);
7107 case ALPHA_BUILTIN_EXTWH:
7108 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, true);
7109 case ALPHA_BUILTIN_EXTLH:
7110 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, true);
7111 case ALPHA_BUILTIN_EXTQH:
7112 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, true);
7114 case ALPHA_BUILTIN_INSBL:
7115 return alpha_fold_builtin_insxx (op, opint, op_const, 0x01, false);
7116 case ALPHA_BUILTIN_INSWL:
7117 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, false);
7118 case ALPHA_BUILTIN_INSLL:
7119 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, false);
7120 case ALPHA_BUILTIN_INSQL:
7121 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, false);
7122 case ALPHA_BUILTIN_INSWH:
7123 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, true);
7124 case ALPHA_BUILTIN_INSLH:
7125 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, true);
7126 case ALPHA_BUILTIN_INSQH:
7127 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, true);
7129 case ALPHA_BUILTIN_MSKBL:
7130 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x01, false);
7131 case ALPHA_BUILTIN_MSKWL:
7132 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, false);
7133 case ALPHA_BUILTIN_MSKLL:
7134 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, false);
7135 case ALPHA_BUILTIN_MSKQL:
7136 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, false);
7137 case ALPHA_BUILTIN_MSKWH:
7138 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, true);
7139 case ALPHA_BUILTIN_MSKLH:
7140 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, true);
7141 case ALPHA_BUILTIN_MSKQH:
7142 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, true);
7144 case ALPHA_BUILTIN_ZAP:
7145 opint[1] ^= 0xff;
7146 /* FALLTHRU */
7147 case ALPHA_BUILTIN_ZAPNOT:
7148 return alpha_fold_builtin_zapnot (op, opint, op_const);
7150 case ALPHA_BUILTIN_MINUB8:
7151 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_u);
7152 case ALPHA_BUILTIN_MINSB8:
7153 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_s);
7154 case ALPHA_BUILTIN_MINUW4:
7155 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_u);
7156 case ALPHA_BUILTIN_MINSW4:
7157 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_s);
7158 case ALPHA_BUILTIN_MAXUB8:
7159 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_u);
7160 case ALPHA_BUILTIN_MAXSB8:
7161 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_s);
7162 case ALPHA_BUILTIN_MAXUW4:
7163 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_u);
7164 case ALPHA_BUILTIN_MAXSW4:
7165 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_s);
7167 case ALPHA_BUILTIN_PERR:
7168 return alpha_fold_builtin_perr (opint, op_const);
7169 case ALPHA_BUILTIN_PKLB:
7170 return alpha_fold_builtin_pklb (opint, op_const);
7171 case ALPHA_BUILTIN_PKWB:
7172 return alpha_fold_builtin_pkwb (opint, op_const);
7173 case ALPHA_BUILTIN_UNPKBL:
7174 return alpha_fold_builtin_unpkbl (opint, op_const);
7175 case ALPHA_BUILTIN_UNPKBW:
7176 return alpha_fold_builtin_unpkbw (opint, op_const);
7178 case ALPHA_BUILTIN_CTTZ:
7179 return alpha_fold_builtin_cttz (opint, op_const);
7180 case ALPHA_BUILTIN_CTLZ:
7181 return alpha_fold_builtin_ctlz (opint, op_const);
7182 case ALPHA_BUILTIN_CTPOP:
7183 return alpha_fold_builtin_ctpop (opint, op_const);
7185 case ALPHA_BUILTIN_AMASK:
7186 case ALPHA_BUILTIN_IMPLVER:
7187 case ALPHA_BUILTIN_RPCC:
7188 /* None of these are foldable at compile-time. */
7189 default:
7190 return NULL;
7194 bool
7195 alpha_gimple_fold_builtin (gimple_stmt_iterator *gsi)
7197 bool changed = false;
7198 gimple stmt = gsi_stmt (*gsi);
7199 tree call = gimple_call_fn (stmt);
7200 gimple new_stmt = NULL;
7202 if (call)
7204 tree fndecl = gimple_call_fndecl (stmt);
7206 if (fndecl)
7208 tree arg0, arg1;
7210 switch (DECL_FUNCTION_CODE (fndecl))
7212 case ALPHA_BUILTIN_UMULH:
7213 arg0 = gimple_call_arg (stmt, 0);
7214 arg1 = gimple_call_arg (stmt, 1);
7216 new_stmt
7217 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR,
7218 gimple_call_lhs (stmt),
7219 arg0,
7220 arg1);
7221 break;
7222 default:
7223 break;
7228 if (new_stmt)
7230 gsi_replace (gsi, new_stmt, true);
7231 changed = true;
7234 return changed;
7237 /* This page contains routines that are used to determine what the function
7238 prologue and epilogue code will do and write them out. */
7240 /* Compute the size of the save area in the stack. */
7242 /* These variables are used for communication between the following functions.
7243 They indicate various things about the current function being compiled
7244 that are used to tell what kind of prologue, epilogue and procedure
7245 descriptor to generate. */
7247 /* Nonzero if we need a stack procedure. */
7248 enum alpha_procedure_types {PT_NULL = 0, PT_REGISTER = 1, PT_STACK = 2};
7249 static enum alpha_procedure_types alpha_procedure_type;
7251 /* Register number (either FP or SP) that is used to unwind the frame. */
7252 static int vms_unwind_regno;
7254 /* Register number used to save FP. We need not have one for RA since
7255 we don't modify it for register procedures. This is only defined
7256 for register frame procedures. */
7257 static int vms_save_fp_regno;
7259 /* Register number used to reference objects off our PV. */
7260 static int vms_base_regno;
7262 /* Compute register masks for saved registers. */
7264 static void
7265 alpha_sa_mask (unsigned long *imaskP, unsigned long *fmaskP)
7267 unsigned long imask = 0;
7268 unsigned long fmask = 0;
7269 unsigned int i;
7271 /* When outputting a thunk, we don't have valid register life info,
7272 but assemble_start_function wants to output .frame and .mask
7273 directives. */
7274 if (cfun->is_thunk)
7276 *imaskP = 0;
7277 *fmaskP = 0;
7278 return;
7281 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7282 imask |= (1UL << HARD_FRAME_POINTER_REGNUM);
7284 /* One for every register we have to save. */
7285 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
7286 if (! fixed_regs[i] && ! call_used_regs[i]
7287 && df_regs_ever_live_p (i) && i != REG_RA)
7289 if (i < 32)
7290 imask |= (1UL << i);
7291 else
7292 fmask |= (1UL << (i - 32));
7295 /* We need to restore these for the handler. */
7296 if (crtl->calls_eh_return)
7298 for (i = 0; ; ++i)
7300 unsigned regno = EH_RETURN_DATA_REGNO (i);
7301 if (regno == INVALID_REGNUM)
7302 break;
7303 imask |= 1UL << regno;
7307 /* If any register spilled, then spill the return address also. */
7308 /* ??? This is required by the Digital stack unwind specification
7309 and isn't needed if we're doing Dwarf2 unwinding. */
7310 if (imask || fmask || alpha_ra_ever_killed ())
7311 imask |= (1UL << REG_RA);
7313 *imaskP = imask;
7314 *fmaskP = fmask;
7318 alpha_sa_size (void)
7320 unsigned long mask[2];
7321 int sa_size = 0;
7322 int i, j;
7324 alpha_sa_mask (&mask[0], &mask[1]);
7326 for (j = 0; j < 2; ++j)
7327 for (i = 0; i < 32; ++i)
7328 if ((mask[j] >> i) & 1)
7329 sa_size++;
7331 if (TARGET_ABI_OPEN_VMS)
7333 /* Start with a stack procedure if we make any calls (REG_RA used), or
7334 need a frame pointer, with a register procedure if we otherwise need
7335 at least a slot, and with a null procedure in other cases. */
7336 if ((mask[0] >> REG_RA) & 1 || frame_pointer_needed)
7337 alpha_procedure_type = PT_STACK;
7338 else if (get_frame_size() != 0)
7339 alpha_procedure_type = PT_REGISTER;
7340 else
7341 alpha_procedure_type = PT_NULL;
7343 /* Don't reserve space for saving FP & RA yet. Do that later after we've
7344 made the final decision on stack procedure vs register procedure. */
7345 if (alpha_procedure_type == PT_STACK)
7346 sa_size -= 2;
7348 /* Decide whether to refer to objects off our PV via FP or PV.
7349 If we need FP for something else or if we receive a nonlocal
7350 goto (which expects PV to contain the value), we must use PV.
7351 Otherwise, start by assuming we can use FP. */
7353 vms_base_regno
7354 = (frame_pointer_needed
7355 || cfun->has_nonlocal_label
7356 || alpha_procedure_type == PT_STACK
7357 || crtl->outgoing_args_size)
7358 ? REG_PV : HARD_FRAME_POINTER_REGNUM;
7360 /* If we want to copy PV into FP, we need to find some register
7361 in which to save FP. */
7363 vms_save_fp_regno = -1;
7364 if (vms_base_regno == HARD_FRAME_POINTER_REGNUM)
7365 for (i = 0; i < 32; i++)
7366 if (! fixed_regs[i] && call_used_regs[i] && ! df_regs_ever_live_p (i))
7367 vms_save_fp_regno = i;
7369 /* A VMS condition handler requires a stack procedure in our
7370 implementation. (not required by the calling standard). */
7371 if ((vms_save_fp_regno == -1 && alpha_procedure_type == PT_REGISTER)
7372 || cfun->machine->uses_condition_handler)
7373 vms_base_regno = REG_PV, alpha_procedure_type = PT_STACK;
7374 else if (alpha_procedure_type == PT_NULL)
7375 vms_base_regno = REG_PV;
7377 /* Stack unwinding should be done via FP unless we use it for PV. */
7378 vms_unwind_regno = (vms_base_regno == REG_PV
7379 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
7381 /* If this is a stack procedure, allow space for saving FP, RA and
7382 a condition handler slot if needed. */
7383 if (alpha_procedure_type == PT_STACK)
7384 sa_size += 2 + cfun->machine->uses_condition_handler;
7386 else
7388 /* Our size must be even (multiple of 16 bytes). */
7389 if (sa_size & 1)
7390 sa_size++;
7393 return sa_size * 8;
7396 /* Define the offset between two registers, one to be eliminated,
7397 and the other its replacement, at the start of a routine. */
7399 HOST_WIDE_INT
7400 alpha_initial_elimination_offset (unsigned int from,
7401 unsigned int to ATTRIBUTE_UNUSED)
7403 HOST_WIDE_INT ret;
7405 ret = alpha_sa_size ();
7406 ret += ALPHA_ROUND (crtl->outgoing_args_size);
7408 switch (from)
7410 case FRAME_POINTER_REGNUM:
7411 break;
7413 case ARG_POINTER_REGNUM:
7414 ret += (ALPHA_ROUND (get_frame_size ()
7415 + crtl->args.pretend_args_size)
7416 - crtl->args.pretend_args_size);
7417 break;
7419 default:
7420 gcc_unreachable ();
7423 return ret;
7426 #if TARGET_ABI_OPEN_VMS
7428 /* Worker function for TARGET_CAN_ELIMINATE. */
7430 static bool
7431 alpha_vms_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
7433 /* We need the alpha_procedure_type to decide. Evaluate it now. */
7434 alpha_sa_size ();
7436 switch (alpha_procedure_type)
7438 case PT_NULL:
7439 /* NULL procedures have no frame of their own and we only
7440 know how to resolve from the current stack pointer. */
7441 return to == STACK_POINTER_REGNUM;
7443 case PT_REGISTER:
7444 case PT_STACK:
7445 /* We always eliminate except to the stack pointer if there is no
7446 usable frame pointer at hand. */
7447 return (to != STACK_POINTER_REGNUM
7448 || vms_unwind_regno != HARD_FRAME_POINTER_REGNUM);
7451 gcc_unreachable ();
7454 /* FROM is to be eliminated for TO. Return the offset so that TO+offset
7455 designates the same location as FROM. */
7457 HOST_WIDE_INT
7458 alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to)
7460 /* The only possible attempts we ever expect are ARG or FRAME_PTR to
7461 HARD_FRAME or STACK_PTR. We need the alpha_procedure_type to decide
7462 on the proper computations and will need the register save area size
7463 in most cases. */
7465 HOST_WIDE_INT sa_size = alpha_sa_size ();
7467 /* PT_NULL procedures have no frame of their own and we only allow
7468 elimination to the stack pointer. This is the argument pointer and we
7469 resolve the soft frame pointer to that as well. */
7471 if (alpha_procedure_type == PT_NULL)
7472 return 0;
7474 /* For a PT_STACK procedure the frame layout looks as follows
7476 -----> decreasing addresses
7478 < size rounded up to 16 | likewise >
7479 --------------#------------------------------+++--------------+++-------#
7480 incoming args # pretended args | "frame" | regs sa | PV | outgoing args #
7481 --------------#---------------------------------------------------------#
7482 ^ ^ ^ ^
7483 ARG_PTR FRAME_PTR HARD_FRAME_PTR STACK_PTR
7486 PT_REGISTER procedures are similar in that they may have a frame of their
7487 own. They have no regs-sa/pv/outgoing-args area.
7489 We first compute offset to HARD_FRAME_PTR, then add what we need to get
7490 to STACK_PTR if need be. */
7493 HOST_WIDE_INT offset;
7494 HOST_WIDE_INT pv_save_size = alpha_procedure_type == PT_STACK ? 8 : 0;
7496 switch (from)
7498 case FRAME_POINTER_REGNUM:
7499 offset = ALPHA_ROUND (sa_size + pv_save_size);
7500 break;
7501 case ARG_POINTER_REGNUM:
7502 offset = (ALPHA_ROUND (sa_size + pv_save_size
7503 + get_frame_size ()
7504 + crtl->args.pretend_args_size)
7505 - crtl->args.pretend_args_size);
7506 break;
7507 default:
7508 gcc_unreachable ();
7511 if (to == STACK_POINTER_REGNUM)
7512 offset += ALPHA_ROUND (crtl->outgoing_args_size);
7514 return offset;
7518 #define COMMON_OBJECT "common_object"
7520 static tree
7521 common_object_handler (tree *node, tree name ATTRIBUTE_UNUSED,
7522 tree args ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED,
7523 bool *no_add_attrs ATTRIBUTE_UNUSED)
7525 tree decl = *node;
7526 gcc_assert (DECL_P (decl));
7528 DECL_COMMON (decl) = 1;
7529 return NULL_TREE;
7532 static const struct attribute_spec vms_attribute_table[] =
7534 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7535 affects_type_identity } */
7536 { COMMON_OBJECT, 0, 1, true, false, false, common_object_handler, false },
7537 { NULL, 0, 0, false, false, false, NULL, false }
7540 void
7541 vms_output_aligned_decl_common(FILE *file, tree decl, const char *name,
7542 unsigned HOST_WIDE_INT size,
7543 unsigned int align)
7545 tree attr = DECL_ATTRIBUTES (decl);
7546 fprintf (file, "%s", COMMON_ASM_OP);
7547 assemble_name (file, name);
7548 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED, size);
7549 /* ??? Unlike on OSF/1, the alignment factor is not in log units. */
7550 fprintf (file, ",%u", align / BITS_PER_UNIT);
7551 if (attr)
7553 attr = lookup_attribute (COMMON_OBJECT, attr);
7554 if (attr)
7555 fprintf (file, ",%s",
7556 IDENTIFIER_POINTER (TREE_VALUE (TREE_VALUE (attr))));
7558 fputc ('\n', file);
7561 #undef COMMON_OBJECT
7563 #endif
7565 static int
7566 find_lo_sum_using_gp (rtx *px, void *data ATTRIBUTE_UNUSED)
7568 return GET_CODE (*px) == LO_SUM && XEXP (*px, 0) == pic_offset_table_rtx;
7572 alpha_find_lo_sum_using_gp (rtx insn)
7574 return for_each_rtx (&PATTERN (insn), find_lo_sum_using_gp, NULL) > 0;
7577 static int
7578 alpha_does_function_need_gp (void)
7580 rtx_insn *insn;
7582 /* The GP being variable is an OSF abi thing. */
7583 if (! TARGET_ABI_OSF)
7584 return 0;
7586 /* We need the gp to load the address of __mcount. */
7587 if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7588 return 1;
7590 /* The code emitted by alpha_output_mi_thunk_osf uses the gp. */
7591 if (cfun->is_thunk)
7592 return 1;
7594 /* The nonlocal receiver pattern assumes that the gp is valid for
7595 the nested function. Reasonable because it's almost always set
7596 correctly already. For the cases where that's wrong, make sure
7597 the nested function loads its gp on entry. */
7598 if (crtl->has_nonlocal_goto)
7599 return 1;
7601 /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first.
7602 Even if we are a static function, we still need to do this in case
7603 our address is taken and passed to something like qsort. */
7605 push_topmost_sequence ();
7606 insn = get_insns ();
7607 pop_topmost_sequence ();
7609 for (; insn; insn = NEXT_INSN (insn))
7610 if (NONDEBUG_INSN_P (insn)
7611 && GET_CODE (PATTERN (insn)) != USE
7612 && GET_CODE (PATTERN (insn)) != CLOBBER
7613 && get_attr_usegp (insn))
7614 return 1;
7616 return 0;
7620 /* Helper function to set RTX_FRAME_RELATED_P on instructions, including
7621 sequences. */
7623 static rtx_insn *
7624 set_frame_related_p (void)
7626 rtx_insn *seq = get_insns ();
7627 rtx_insn *insn;
7629 end_sequence ();
7631 if (!seq)
7632 return NULL;
7634 if (INSN_P (seq))
7636 insn = seq;
7637 while (insn != NULL_RTX)
7639 RTX_FRAME_RELATED_P (insn) = 1;
7640 insn = NEXT_INSN (insn);
7642 seq = emit_insn (seq);
7644 else
7646 seq = emit_insn (seq);
7647 RTX_FRAME_RELATED_P (seq) = 1;
7649 return seq;
7652 #define FRP(exp) (start_sequence (), exp, set_frame_related_p ())
7654 /* Generates a store with the proper unwind info attached. VALUE is
7655 stored at BASE_REG+BASE_OFS. If FRAME_BIAS is nonzero, then BASE_REG
7656 contains SP+FRAME_BIAS, and that is the unwind info that should be
7657 generated. If FRAME_REG != VALUE, then VALUE is being stored on
7658 behalf of FRAME_REG, and FRAME_REG should be present in the unwind. */
7660 static void
7661 emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias,
7662 HOST_WIDE_INT base_ofs, rtx frame_reg)
7664 rtx addr, mem;
7665 rtx_insn *insn;
7667 addr = plus_constant (Pmode, base_reg, base_ofs);
7668 mem = gen_frame_mem (DImode, addr);
7670 insn = emit_move_insn (mem, value);
7671 RTX_FRAME_RELATED_P (insn) = 1;
7673 if (frame_bias || value != frame_reg)
7675 if (frame_bias)
7677 addr = plus_constant (Pmode, stack_pointer_rtx,
7678 frame_bias + base_ofs);
7679 mem = gen_rtx_MEM (DImode, addr);
7682 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7683 gen_rtx_SET (VOIDmode, mem, frame_reg));
7687 static void
7688 emit_frame_store (unsigned int regno, rtx base_reg,
7689 HOST_WIDE_INT frame_bias, HOST_WIDE_INT base_ofs)
7691 rtx reg = gen_rtx_REG (DImode, regno);
7692 emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg);
7695 /* Compute the frame size. SIZE is the size of the "naked" frame
7696 and SA_SIZE is the size of the register save area. */
7698 static HOST_WIDE_INT
7699 compute_frame_size (HOST_WIDE_INT size, HOST_WIDE_INT sa_size)
7701 if (TARGET_ABI_OPEN_VMS)
7702 return ALPHA_ROUND (sa_size
7703 + (alpha_procedure_type == PT_STACK ? 8 : 0)
7704 + size
7705 + crtl->args.pretend_args_size);
7706 else
7707 return ALPHA_ROUND (crtl->outgoing_args_size)
7708 + sa_size
7709 + ALPHA_ROUND (size
7710 + crtl->args.pretend_args_size);
7713 /* Write function prologue. */
7715 /* On vms we have two kinds of functions:
7717 - stack frame (PROC_STACK)
7718 these are 'normal' functions with local vars and which are
7719 calling other functions
7720 - register frame (PROC_REGISTER)
7721 keeps all data in registers, needs no stack
7723 We must pass this to the assembler so it can generate the
7724 proper pdsc (procedure descriptor)
7725 This is done with the '.pdesc' command.
7727 On not-vms, we don't really differentiate between the two, as we can
7728 simply allocate stack without saving registers. */
7730 void
7731 alpha_expand_prologue (void)
7733 /* Registers to save. */
7734 unsigned long imask = 0;
7735 unsigned long fmask = 0;
7736 /* Stack space needed for pushing registers clobbered by us. */
7737 HOST_WIDE_INT sa_size, sa_bias;
7738 /* Complete stack size needed. */
7739 HOST_WIDE_INT frame_size;
7740 /* Probed stack size; it additionally includes the size of
7741 the "reserve region" if any. */
7742 HOST_WIDE_INT probed_size;
7743 /* Offset from base reg to register save area. */
7744 HOST_WIDE_INT reg_offset;
7745 rtx sa_reg;
7746 int i;
7748 sa_size = alpha_sa_size ();
7749 frame_size = compute_frame_size (get_frame_size (), sa_size);
7751 if (flag_stack_usage_info)
7752 current_function_static_stack_size = frame_size;
7754 if (TARGET_ABI_OPEN_VMS)
7755 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
7756 else
7757 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
7759 alpha_sa_mask (&imask, &fmask);
7761 /* Emit an insn to reload GP, if needed. */
7762 if (TARGET_ABI_OSF)
7764 alpha_function_needs_gp = alpha_does_function_need_gp ();
7765 if (alpha_function_needs_gp)
7766 emit_insn (gen_prologue_ldgp ());
7769 /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert
7770 the call to mcount ourselves, rather than having the linker do it
7771 magically in response to -pg. Since _mcount has special linkage,
7772 don't represent the call as a call. */
7773 if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7774 emit_insn (gen_prologue_mcount ());
7776 /* Adjust the stack by the frame size. If the frame size is > 4096
7777 bytes, we need to be sure we probe somewhere in the first and last
7778 4096 bytes (we can probably get away without the latter test) and
7779 every 8192 bytes in between. If the frame size is > 32768, we
7780 do this in a loop. Otherwise, we generate the explicit probe
7781 instructions.
7783 Note that we are only allowed to adjust sp once in the prologue. */
7785 probed_size = frame_size;
7786 if (flag_stack_check)
7787 probed_size += STACK_CHECK_PROTECT;
7789 if (probed_size <= 32768)
7791 if (probed_size > 4096)
7793 int probed;
7795 for (probed = 4096; probed < probed_size; probed += 8192)
7796 emit_insn (gen_probe_stack (GEN_INT (-probed)));
7798 /* We only have to do this probe if we aren't saving registers or
7799 if we are probing beyond the frame because of -fstack-check. */
7800 if ((sa_size == 0 && probed_size > probed - 4096)
7801 || flag_stack_check)
7802 emit_insn (gen_probe_stack (GEN_INT (-probed_size)));
7805 if (frame_size != 0)
7806 FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
7807 GEN_INT (-frame_size))));
7809 else
7811 /* Here we generate code to set R22 to SP + 4096 and set R23 to the
7812 number of 8192 byte blocks to probe. We then probe each block
7813 in the loop and then set SP to the proper location. If the
7814 amount remaining is > 4096, we have to do one more probe if we
7815 are not saving any registers or if we are probing beyond the
7816 frame because of -fstack-check. */
7818 HOST_WIDE_INT blocks = (probed_size + 4096) / 8192;
7819 HOST_WIDE_INT leftover = probed_size + 4096 - blocks * 8192;
7820 rtx ptr = gen_rtx_REG (DImode, 22);
7821 rtx count = gen_rtx_REG (DImode, 23);
7822 rtx seq;
7824 emit_move_insn (count, GEN_INT (blocks));
7825 emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096)));
7827 /* Because of the difficulty in emitting a new basic block this
7828 late in the compilation, generate the loop as a single insn. */
7829 emit_insn (gen_prologue_stack_probe_loop (count, ptr));
7831 if ((leftover > 4096 && sa_size == 0) || flag_stack_check)
7833 rtx last = gen_rtx_MEM (DImode,
7834 plus_constant (Pmode, ptr, -leftover));
7835 MEM_VOLATILE_P (last) = 1;
7836 emit_move_insn (last, const0_rtx);
7839 if (flag_stack_check)
7841 /* If -fstack-check is specified we have to load the entire
7842 constant into a register and subtract from the sp in one go,
7843 because the probed stack size is not equal to the frame size. */
7844 HOST_WIDE_INT lo, hi;
7845 lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
7846 hi = frame_size - lo;
7848 emit_move_insn (ptr, GEN_INT (hi));
7849 emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo)));
7850 seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx,
7851 ptr));
7853 else
7855 seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr,
7856 GEN_INT (-leftover)));
7859 /* This alternative is special, because the DWARF code cannot
7860 possibly intuit through the loop above. So we invent this
7861 note it looks at instead. */
7862 RTX_FRAME_RELATED_P (seq) = 1;
7863 add_reg_note (seq, REG_FRAME_RELATED_EXPR,
7864 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
7865 plus_constant (Pmode, stack_pointer_rtx,
7866 -frame_size)));
7869 /* Cope with very large offsets to the register save area. */
7870 sa_bias = 0;
7871 sa_reg = stack_pointer_rtx;
7872 if (reg_offset + sa_size > 0x8000)
7874 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
7875 rtx sa_bias_rtx;
7877 if (low + sa_size <= 0x8000)
7878 sa_bias = reg_offset - low, reg_offset = low;
7879 else
7880 sa_bias = reg_offset, reg_offset = 0;
7882 sa_reg = gen_rtx_REG (DImode, 24);
7883 sa_bias_rtx = GEN_INT (sa_bias);
7885 if (add_operand (sa_bias_rtx, DImode))
7886 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx));
7887 else
7889 emit_move_insn (sa_reg, sa_bias_rtx);
7890 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg));
7894 /* Save regs in stack order. Beginning with VMS PV. */
7895 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7896 emit_frame_store (REG_PV, stack_pointer_rtx, 0, 0);
7898 /* Save register RA next. */
7899 if (imask & (1UL << REG_RA))
7901 emit_frame_store (REG_RA, sa_reg, sa_bias, reg_offset);
7902 imask &= ~(1UL << REG_RA);
7903 reg_offset += 8;
7906 /* Now save any other registers required to be saved. */
7907 for (i = 0; i < 31; i++)
7908 if (imask & (1UL << i))
7910 emit_frame_store (i, sa_reg, sa_bias, reg_offset);
7911 reg_offset += 8;
7914 for (i = 0; i < 31; i++)
7915 if (fmask & (1UL << i))
7917 emit_frame_store (i+32, sa_reg, sa_bias, reg_offset);
7918 reg_offset += 8;
7921 if (TARGET_ABI_OPEN_VMS)
7923 /* Register frame procedures save the fp. */
7924 if (alpha_procedure_type == PT_REGISTER)
7926 rtx_insn *insn =
7927 emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno),
7928 hard_frame_pointer_rtx);
7929 add_reg_note (insn, REG_CFA_REGISTER, NULL);
7930 RTX_FRAME_RELATED_P (insn) = 1;
7933 if (alpha_procedure_type != PT_NULL && vms_base_regno != REG_PV)
7934 emit_insn (gen_force_movdi (gen_rtx_REG (DImode, vms_base_regno),
7935 gen_rtx_REG (DImode, REG_PV)));
7937 if (alpha_procedure_type != PT_NULL
7938 && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
7939 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7941 /* If we have to allocate space for outgoing args, do it now. */
7942 if (crtl->outgoing_args_size != 0)
7944 rtx_insn *seq
7945 = emit_move_insn (stack_pointer_rtx,
7946 plus_constant
7947 (Pmode, hard_frame_pointer_rtx,
7948 - (ALPHA_ROUND
7949 (crtl->outgoing_args_size))));
7951 /* Only set FRAME_RELATED_P on the stack adjustment we just emitted
7952 if ! frame_pointer_needed. Setting the bit will change the CFA
7953 computation rule to use sp again, which would be wrong if we had
7954 frame_pointer_needed, as this means sp might move unpredictably
7955 later on.
7957 Also, note that
7958 frame_pointer_needed
7959 => vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
7961 crtl->outgoing_args_size != 0
7962 => alpha_procedure_type != PT_NULL,
7964 so when we are not setting the bit here, we are guaranteed to
7965 have emitted an FRP frame pointer update just before. */
7966 RTX_FRAME_RELATED_P (seq) = ! frame_pointer_needed;
7969 else
7971 /* If we need a frame pointer, set it from the stack pointer. */
7972 if (frame_pointer_needed)
7974 if (TARGET_CAN_FAULT_IN_PROLOGUE)
7975 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7976 else
7977 /* This must always be the last instruction in the
7978 prologue, thus we emit a special move + clobber. */
7979 FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx,
7980 stack_pointer_rtx, sa_reg)));
7984 /* The ABIs for VMS and OSF/1 say that while we can schedule insns into
7985 the prologue, for exception handling reasons, we cannot do this for
7986 any insn that might fault. We could prevent this for mems with a
7987 (clobber:BLK (scratch)), but this doesn't work for fp insns. So we
7988 have to prevent all such scheduling with a blockage.
7990 Linux, on the other hand, never bothered to implement OSF/1's
7991 exception handling, and so doesn't care about such things. Anyone
7992 planning to use dwarf2 frame-unwind info can also omit the blockage. */
7994 if (! TARGET_CAN_FAULT_IN_PROLOGUE)
7995 emit_insn (gen_blockage ());
7998 /* Count the number of .file directives, so that .loc is up to date. */
7999 int num_source_filenames = 0;
8001 /* Output the textual info surrounding the prologue. */
8003 void
8004 alpha_start_function (FILE *file, const char *fnname,
8005 tree decl ATTRIBUTE_UNUSED)
8007 unsigned long imask = 0;
8008 unsigned long fmask = 0;
8009 /* Stack space needed for pushing registers clobbered by us. */
8010 HOST_WIDE_INT sa_size;
8011 /* Complete stack size needed. */
8012 unsigned HOST_WIDE_INT frame_size;
8013 /* The maximum debuggable frame size. */
8014 unsigned HOST_WIDE_INT max_frame_size = 1UL << 31;
8015 /* Offset from base reg to register save area. */
8016 HOST_WIDE_INT reg_offset;
8017 char *entry_label = (char *) alloca (strlen (fnname) + 6);
8018 char *tramp_label = (char *) alloca (strlen (fnname) + 6);
8019 int i;
8021 #if TARGET_ABI_OPEN_VMS
8022 vms_start_function (fnname);
8023 #endif
8025 alpha_fnname = fnname;
8026 sa_size = alpha_sa_size ();
8027 frame_size = compute_frame_size (get_frame_size (), sa_size);
8029 if (TARGET_ABI_OPEN_VMS)
8030 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
8031 else
8032 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
8034 alpha_sa_mask (&imask, &fmask);
8036 /* Issue function start and label. */
8037 if (TARGET_ABI_OPEN_VMS || !flag_inhibit_size_directive)
8039 fputs ("\t.ent ", file);
8040 assemble_name (file, fnname);
8041 putc ('\n', file);
8043 /* If the function needs GP, we'll write the "..ng" label there.
8044 Otherwise, do it here. */
8045 if (TARGET_ABI_OSF
8046 && ! alpha_function_needs_gp
8047 && ! cfun->is_thunk)
8049 putc ('$', file);
8050 assemble_name (file, fnname);
8051 fputs ("..ng:\n", file);
8054 /* Nested functions on VMS that are potentially called via trampoline
8055 get a special transfer entry point that loads the called functions
8056 procedure descriptor and static chain. */
8057 if (TARGET_ABI_OPEN_VMS
8058 && !TREE_PUBLIC (decl)
8059 && DECL_CONTEXT (decl)
8060 && !TYPE_P (DECL_CONTEXT (decl))
8061 && TREE_CODE (DECL_CONTEXT (decl)) != TRANSLATION_UNIT_DECL)
8063 strcpy (tramp_label, fnname);
8064 strcat (tramp_label, "..tr");
8065 ASM_OUTPUT_LABEL (file, tramp_label);
8066 fprintf (file, "\tldq $1,24($27)\n");
8067 fprintf (file, "\tldq $27,16($27)\n");
8070 strcpy (entry_label, fnname);
8071 if (TARGET_ABI_OPEN_VMS)
8072 strcat (entry_label, "..en");
8074 ASM_OUTPUT_LABEL (file, entry_label);
8075 inside_function = TRUE;
8077 if (TARGET_ABI_OPEN_VMS)
8078 fprintf (file, "\t.base $%d\n", vms_base_regno);
8080 if (TARGET_ABI_OSF
8081 && TARGET_IEEE_CONFORMANT
8082 && !flag_inhibit_size_directive)
8084 /* Set flags in procedure descriptor to request IEEE-conformant
8085 math-library routines. The value we set it to is PDSC_EXC_IEEE
8086 (/usr/include/pdsc.h). */
8087 fputs ("\t.eflag 48\n", file);
8090 /* Set up offsets to alpha virtual arg/local debugging pointer. */
8091 alpha_auto_offset = -frame_size + crtl->args.pretend_args_size;
8092 alpha_arg_offset = -frame_size + 48;
8094 /* Describe our frame. If the frame size is larger than an integer,
8095 print it as zero to avoid an assembler error. We won't be
8096 properly describing such a frame, but that's the best we can do. */
8097 if (TARGET_ABI_OPEN_VMS)
8098 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,"
8099 HOST_WIDE_INT_PRINT_DEC "\n",
8100 vms_unwind_regno,
8101 frame_size >= (1UL << 31) ? 0 : frame_size,
8102 reg_offset);
8103 else if (!flag_inhibit_size_directive)
8104 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n",
8105 (frame_pointer_needed
8106 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM),
8107 frame_size >= max_frame_size ? 0 : frame_size,
8108 crtl->args.pretend_args_size);
8110 /* Describe which registers were spilled. */
8111 if (TARGET_ABI_OPEN_VMS)
8113 if (imask)
8114 /* ??? Does VMS care if mask contains ra? The old code didn't
8115 set it, so I don't here. */
8116 fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1UL << REG_RA));
8117 if (fmask)
8118 fprintf (file, "\t.fmask 0x%lx,0\n", fmask);
8119 if (alpha_procedure_type == PT_REGISTER)
8120 fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno);
8122 else if (!flag_inhibit_size_directive)
8124 if (imask)
8126 fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask,
8127 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
8129 for (i = 0; i < 32; ++i)
8130 if (imask & (1UL << i))
8131 reg_offset += 8;
8134 if (fmask)
8135 fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask,
8136 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
8139 #if TARGET_ABI_OPEN_VMS
8140 /* If a user condition handler has been installed at some point, emit
8141 the procedure descriptor bits to point the Condition Handling Facility
8142 at the indirection wrapper, and state the fp offset at which the user
8143 handler may be found. */
8144 if (cfun->machine->uses_condition_handler)
8146 fprintf (file, "\t.handler __gcc_shell_handler\n");
8147 fprintf (file, "\t.handler_data %d\n", VMS_COND_HANDLER_FP_OFFSET);
8150 #ifdef TARGET_VMS_CRASH_DEBUG
8151 /* Support of minimal traceback info. */
8152 switch_to_section (readonly_data_section);
8153 fprintf (file, "\t.align 3\n");
8154 assemble_name (file, fnname); fputs ("..na:\n", file);
8155 fputs ("\t.ascii \"", file);
8156 assemble_name (file, fnname);
8157 fputs ("\\0\"\n", file);
8158 switch_to_section (text_section);
8159 #endif
8160 #endif /* TARGET_ABI_OPEN_VMS */
8163 /* Emit the .prologue note at the scheduled end of the prologue. */
8165 static void
8166 alpha_output_function_end_prologue (FILE *file)
8168 if (TARGET_ABI_OPEN_VMS)
8169 fputs ("\t.prologue\n", file);
8170 else if (!flag_inhibit_size_directive)
8171 fprintf (file, "\t.prologue %d\n",
8172 alpha_function_needs_gp || cfun->is_thunk);
8175 /* Write function epilogue. */
8177 void
8178 alpha_expand_epilogue (void)
8180 /* Registers to save. */
8181 unsigned long imask = 0;
8182 unsigned long fmask = 0;
8183 /* Stack space needed for pushing registers clobbered by us. */
8184 HOST_WIDE_INT sa_size;
8185 /* Complete stack size needed. */
8186 HOST_WIDE_INT frame_size;
8187 /* Offset from base reg to register save area. */
8188 HOST_WIDE_INT reg_offset;
8189 int fp_is_frame_pointer, fp_offset;
8190 rtx sa_reg, sa_reg_exp = NULL;
8191 rtx sp_adj1, sp_adj2, mem, reg, insn;
8192 rtx eh_ofs;
8193 rtx cfa_restores = NULL_RTX;
8194 int i;
8196 sa_size = alpha_sa_size ();
8197 frame_size = compute_frame_size (get_frame_size (), sa_size);
8199 if (TARGET_ABI_OPEN_VMS)
8201 if (alpha_procedure_type == PT_STACK)
8202 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
8203 else
8204 reg_offset = 0;
8206 else
8207 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
8209 alpha_sa_mask (&imask, &fmask);
8211 fp_is_frame_pointer
8212 = (TARGET_ABI_OPEN_VMS
8213 ? alpha_procedure_type == PT_STACK
8214 : frame_pointer_needed);
8215 fp_offset = 0;
8216 sa_reg = stack_pointer_rtx;
8218 if (crtl->calls_eh_return)
8219 eh_ofs = EH_RETURN_STACKADJ_RTX;
8220 else
8221 eh_ofs = NULL_RTX;
8223 if (sa_size)
8225 /* If we have a frame pointer, restore SP from it. */
8226 if (TARGET_ABI_OPEN_VMS
8227 ? vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
8228 : frame_pointer_needed)
8229 emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
8231 /* Cope with very large offsets to the register save area. */
8232 if (reg_offset + sa_size > 0x8000)
8234 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
8235 HOST_WIDE_INT bias;
8237 if (low + sa_size <= 0x8000)
8238 bias = reg_offset - low, reg_offset = low;
8239 else
8240 bias = reg_offset, reg_offset = 0;
8242 sa_reg = gen_rtx_REG (DImode, 22);
8243 sa_reg_exp = plus_constant (Pmode, stack_pointer_rtx, bias);
8245 emit_move_insn (sa_reg, sa_reg_exp);
8248 /* Restore registers in order, excepting a true frame pointer. */
8250 mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg, reg_offset));
8251 reg = gen_rtx_REG (DImode, REG_RA);
8252 emit_move_insn (reg, mem);
8253 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
8255 reg_offset += 8;
8256 imask &= ~(1UL << REG_RA);
8258 for (i = 0; i < 31; ++i)
8259 if (imask & (1UL << i))
8261 if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer)
8262 fp_offset = reg_offset;
8263 else
8265 mem = gen_frame_mem (DImode,
8266 plus_constant (Pmode, sa_reg,
8267 reg_offset));
8268 reg = gen_rtx_REG (DImode, i);
8269 emit_move_insn (reg, mem);
8270 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
8271 cfa_restores);
8273 reg_offset += 8;
8276 for (i = 0; i < 31; ++i)
8277 if (fmask & (1UL << i))
8279 mem = gen_frame_mem (DFmode, plus_constant (Pmode, sa_reg,
8280 reg_offset));
8281 reg = gen_rtx_REG (DFmode, i+32);
8282 emit_move_insn (reg, mem);
8283 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
8284 reg_offset += 8;
8288 if (frame_size || eh_ofs)
8290 sp_adj1 = stack_pointer_rtx;
8292 if (eh_ofs)
8294 sp_adj1 = gen_rtx_REG (DImode, 23);
8295 emit_move_insn (sp_adj1,
8296 gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs));
8299 /* If the stack size is large, begin computation into a temporary
8300 register so as not to interfere with a potential fp restore,
8301 which must be consecutive with an SP restore. */
8302 if (frame_size < 32768 && !cfun->calls_alloca)
8303 sp_adj2 = GEN_INT (frame_size);
8304 else if (frame_size < 0x40007fffL)
8306 int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
8308 sp_adj2 = plus_constant (Pmode, sp_adj1, frame_size - low);
8309 if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2))
8310 sp_adj1 = sa_reg;
8311 else
8313 sp_adj1 = gen_rtx_REG (DImode, 23);
8314 emit_move_insn (sp_adj1, sp_adj2);
8316 sp_adj2 = GEN_INT (low);
8318 else
8320 rtx tmp = gen_rtx_REG (DImode, 23);
8321 sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size, 3, false);
8322 if (!sp_adj2)
8324 /* We can't drop new things to memory this late, afaik,
8325 so build it up by pieces. */
8326 sp_adj2 = alpha_emit_set_long_const (tmp, frame_size,
8327 -(frame_size < 0));
8328 gcc_assert (sp_adj2);
8332 /* From now on, things must be in order. So emit blockages. */
8334 /* Restore the frame pointer. */
8335 if (fp_is_frame_pointer)
8337 emit_insn (gen_blockage ());
8338 mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg,
8339 fp_offset));
8340 emit_move_insn (hard_frame_pointer_rtx, mem);
8341 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8342 hard_frame_pointer_rtx, cfa_restores);
8344 else if (TARGET_ABI_OPEN_VMS)
8346 emit_insn (gen_blockage ());
8347 emit_move_insn (hard_frame_pointer_rtx,
8348 gen_rtx_REG (DImode, vms_save_fp_regno));
8349 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8350 hard_frame_pointer_rtx, cfa_restores);
8353 /* Restore the stack pointer. */
8354 emit_insn (gen_blockage ());
8355 if (sp_adj2 == const0_rtx)
8356 insn = emit_move_insn (stack_pointer_rtx, sp_adj1);
8357 else
8358 insn = emit_move_insn (stack_pointer_rtx,
8359 gen_rtx_PLUS (DImode, sp_adj1, sp_adj2));
8360 REG_NOTES (insn) = cfa_restores;
8361 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
8362 RTX_FRAME_RELATED_P (insn) = 1;
8364 else
8366 gcc_assert (cfa_restores == NULL);
8368 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_REGISTER)
8370 emit_insn (gen_blockage ());
8371 insn = emit_move_insn (hard_frame_pointer_rtx,
8372 gen_rtx_REG (DImode, vms_save_fp_regno));
8373 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8374 RTX_FRAME_RELATED_P (insn) = 1;
8379 /* Output the rest of the textual info surrounding the epilogue. */
8381 void
8382 alpha_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED)
8384 rtx_insn *insn;
8386 /* We output a nop after noreturn calls at the very end of the function to
8387 ensure that the return address always remains in the caller's code range,
8388 as not doing so might confuse unwinding engines. */
8389 insn = get_last_insn ();
8390 if (!INSN_P (insn))
8391 insn = prev_active_insn (insn);
8392 if (insn && CALL_P (insn))
8393 output_asm_insn (get_insn_template (CODE_FOR_nop, NULL), NULL);
8395 #if TARGET_ABI_OPEN_VMS
8396 /* Write the linkage entries. */
8397 alpha_write_linkage (file, fnname);
8398 #endif
8400 /* End the function. */
8401 if (TARGET_ABI_OPEN_VMS
8402 || !flag_inhibit_size_directive)
8404 fputs ("\t.end ", file);
8405 assemble_name (file, fnname);
8406 putc ('\n', file);
8408 inside_function = FALSE;
8411 #if TARGET_ABI_OSF
8412 /* Emit a tail call to FUNCTION after adjusting THIS by DELTA.
8414 In order to avoid the hordes of differences between generated code
8415 with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating
8416 lots of code loading up large constants, generate rtl and emit it
8417 instead of going straight to text.
8419 Not sure why this idea hasn't been explored before... */
8421 static void
8422 alpha_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8423 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8424 tree function)
8426 HOST_WIDE_INT hi, lo;
8427 rtx this_rtx, funexp;
8428 rtx_insn *insn;
8430 /* We always require a valid GP. */
8431 emit_insn (gen_prologue_ldgp ());
8432 emit_note (NOTE_INSN_PROLOGUE_END);
8434 /* Find the "this" pointer. If the function returns a structure,
8435 the structure return pointer is in $16. */
8436 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8437 this_rtx = gen_rtx_REG (Pmode, 17);
8438 else
8439 this_rtx = gen_rtx_REG (Pmode, 16);
8441 /* Add DELTA. When possible we use ldah+lda. Otherwise load the
8442 entire constant for the add. */
8443 lo = ((delta & 0xffff) ^ 0x8000) - 0x8000;
8444 hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8445 if (hi + lo == delta)
8447 if (hi)
8448 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (hi)));
8449 if (lo)
8450 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (lo)));
8452 else
8454 rtx tmp = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 0),
8455 delta, -(delta < 0));
8456 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8459 /* Add a delta stored in the vtable at VCALL_OFFSET. */
8460 if (vcall_offset)
8462 rtx tmp, tmp2;
8464 tmp = gen_rtx_REG (Pmode, 0);
8465 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
8467 lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000;
8468 hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8469 if (hi + lo == vcall_offset)
8471 if (hi)
8472 emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi)));
8474 else
8476 tmp2 = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 1),
8477 vcall_offset, -(vcall_offset < 0));
8478 emit_insn (gen_adddi3 (tmp, tmp, tmp2));
8479 lo = 0;
8481 if (lo)
8482 tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo));
8483 else
8484 tmp2 = tmp;
8485 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2));
8487 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8490 /* Generate a tail call to the target function. */
8491 if (! TREE_USED (function))
8493 assemble_external (function);
8494 TREE_USED (function) = 1;
8496 funexp = XEXP (DECL_RTL (function), 0);
8497 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8498 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx));
8499 SIBLING_CALL_P (insn) = 1;
8501 /* Run just enough of rest_of_compilation to get the insns emitted.
8502 There's not really enough bulk here to make other passes such as
8503 instruction scheduling worth while. Note that use_thunk calls
8504 assemble_start_function and assemble_end_function. */
8505 insn = get_insns ();
8506 shorten_branches (insn);
8507 final_start_function (insn, file, 1);
8508 final (insn, file, 1);
8509 final_end_function ();
8511 #endif /* TARGET_ABI_OSF */
8513 /* Debugging support. */
8515 #include "gstab.h"
8517 /* Name of the file containing the current function. */
8519 static const char *current_function_file = "";
8521 /* Offsets to alpha virtual arg/local debugging pointers. */
8523 long alpha_arg_offset;
8524 long alpha_auto_offset;
8526 /* Emit a new filename to a stream. */
8528 void
8529 alpha_output_filename (FILE *stream, const char *name)
8531 static int first_time = TRUE;
8533 if (first_time)
8535 first_time = FALSE;
8536 ++num_source_filenames;
8537 current_function_file = name;
8538 fprintf (stream, "\t.file\t%d ", num_source_filenames);
8539 output_quoted_string (stream, name);
8540 fprintf (stream, "\n");
8543 else if (name != current_function_file
8544 && strcmp (name, current_function_file) != 0)
8546 ++num_source_filenames;
8547 current_function_file = name;
8548 fprintf (stream, "\t.file\t%d ", num_source_filenames);
8550 output_quoted_string (stream, name);
8551 fprintf (stream, "\n");
8555 /* Structure to show the current status of registers and memory. */
8557 struct shadow_summary
8559 struct {
8560 unsigned int i : 31; /* Mask of int regs */
8561 unsigned int fp : 31; /* Mask of fp regs */
8562 unsigned int mem : 1; /* mem == imem | fpmem */
8563 } used, defd;
8566 /* Summary the effects of expression X on the machine. Update SUM, a pointer
8567 to the summary structure. SET is nonzero if the insn is setting the
8568 object, otherwise zero. */
8570 static void
8571 summarize_insn (rtx x, struct shadow_summary *sum, int set)
8573 const char *format_ptr;
8574 int i, j;
8576 if (x == 0)
8577 return;
8579 switch (GET_CODE (x))
8581 /* ??? Note that this case would be incorrect if the Alpha had a
8582 ZERO_EXTRACT in SET_DEST. */
8583 case SET:
8584 summarize_insn (SET_SRC (x), sum, 0);
8585 summarize_insn (SET_DEST (x), sum, 1);
8586 break;
8588 case CLOBBER:
8589 summarize_insn (XEXP (x, 0), sum, 1);
8590 break;
8592 case USE:
8593 summarize_insn (XEXP (x, 0), sum, 0);
8594 break;
8596 case ASM_OPERANDS:
8597 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
8598 summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0);
8599 break;
8601 case PARALLEL:
8602 for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
8603 summarize_insn (XVECEXP (x, 0, i), sum, 0);
8604 break;
8606 case SUBREG:
8607 summarize_insn (SUBREG_REG (x), sum, 0);
8608 break;
8610 case REG:
8612 int regno = REGNO (x);
8613 unsigned long mask = ((unsigned long) 1) << (regno % 32);
8615 if (regno == 31 || regno == 63)
8616 break;
8618 if (set)
8620 if (regno < 32)
8621 sum->defd.i |= mask;
8622 else
8623 sum->defd.fp |= mask;
8625 else
8627 if (regno < 32)
8628 sum->used.i |= mask;
8629 else
8630 sum->used.fp |= mask;
8633 break;
8635 case MEM:
8636 if (set)
8637 sum->defd.mem = 1;
8638 else
8639 sum->used.mem = 1;
8641 /* Find the regs used in memory address computation: */
8642 summarize_insn (XEXP (x, 0), sum, 0);
8643 break;
8645 case CONST_INT: case CONST_DOUBLE:
8646 case SYMBOL_REF: case LABEL_REF: case CONST:
8647 case SCRATCH: case ASM_INPUT:
8648 break;
8650 /* Handle common unary and binary ops for efficiency. */
8651 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
8652 case MOD: case UDIV: case UMOD: case AND: case IOR:
8653 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
8654 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
8655 case NE: case EQ: case GE: case GT: case LE:
8656 case LT: case GEU: case GTU: case LEU: case LTU:
8657 summarize_insn (XEXP (x, 0), sum, 0);
8658 summarize_insn (XEXP (x, 1), sum, 0);
8659 break;
8661 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
8662 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
8663 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
8664 case SQRT: case FFS:
8665 summarize_insn (XEXP (x, 0), sum, 0);
8666 break;
8668 default:
8669 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
8670 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8671 switch (format_ptr[i])
8673 case 'e':
8674 summarize_insn (XEXP (x, i), sum, 0);
8675 break;
8677 case 'E':
8678 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8679 summarize_insn (XVECEXP (x, i, j), sum, 0);
8680 break;
8682 case 'i':
8683 break;
8685 default:
8686 gcc_unreachable ();
8691 /* Ensure a sufficient number of `trapb' insns are in the code when
8692 the user requests code with a trap precision of functions or
8693 instructions.
8695 In naive mode, when the user requests a trap-precision of
8696 "instruction", a trapb is needed after every instruction that may
8697 generate a trap. This ensures that the code is resumption safe but
8698 it is also slow.
8700 When optimizations are turned on, we delay issuing a trapb as long
8701 as possible. In this context, a trap shadow is the sequence of
8702 instructions that starts with a (potentially) trap generating
8703 instruction and extends to the next trapb or call_pal instruction
8704 (but GCC never generates call_pal by itself). We can delay (and
8705 therefore sometimes omit) a trapb subject to the following
8706 conditions:
8708 (a) On entry to the trap shadow, if any Alpha register or memory
8709 location contains a value that is used as an operand value by some
8710 instruction in the trap shadow (live on entry), then no instruction
8711 in the trap shadow may modify the register or memory location.
8713 (b) Within the trap shadow, the computation of the base register
8714 for a memory load or store instruction may not involve using the
8715 result of an instruction that might generate an UNPREDICTABLE
8716 result.
8718 (c) Within the trap shadow, no register may be used more than once
8719 as a destination register. (This is to make life easier for the
8720 trap-handler.)
8722 (d) The trap shadow may not include any branch instructions. */
8724 static void
8725 alpha_handle_trap_shadows (void)
8727 struct shadow_summary shadow;
8728 int trap_pending, exception_nesting;
8729 rtx_insn *i, *n;
8731 trap_pending = 0;
8732 exception_nesting = 0;
8733 shadow.used.i = 0;
8734 shadow.used.fp = 0;
8735 shadow.used.mem = 0;
8736 shadow.defd = shadow.used;
8738 for (i = get_insns (); i ; i = NEXT_INSN (i))
8740 if (NOTE_P (i))
8742 switch (NOTE_KIND (i))
8744 case NOTE_INSN_EH_REGION_BEG:
8745 exception_nesting++;
8746 if (trap_pending)
8747 goto close_shadow;
8748 break;
8750 case NOTE_INSN_EH_REGION_END:
8751 exception_nesting--;
8752 if (trap_pending)
8753 goto close_shadow;
8754 break;
8756 case NOTE_INSN_EPILOGUE_BEG:
8757 if (trap_pending && alpha_tp >= ALPHA_TP_FUNC)
8758 goto close_shadow;
8759 break;
8762 else if (trap_pending)
8764 if (alpha_tp == ALPHA_TP_FUNC)
8766 if (JUMP_P (i)
8767 && GET_CODE (PATTERN (i)) == RETURN)
8768 goto close_shadow;
8770 else if (alpha_tp == ALPHA_TP_INSN)
8772 if (optimize > 0)
8774 struct shadow_summary sum;
8776 sum.used.i = 0;
8777 sum.used.fp = 0;
8778 sum.used.mem = 0;
8779 sum.defd = sum.used;
8781 switch (GET_CODE (i))
8783 case INSN:
8784 /* Annoyingly, get_attr_trap will die on these. */
8785 if (GET_CODE (PATTERN (i)) == USE
8786 || GET_CODE (PATTERN (i)) == CLOBBER)
8787 break;
8789 summarize_insn (PATTERN (i), &sum, 0);
8791 if ((sum.defd.i & shadow.defd.i)
8792 || (sum.defd.fp & shadow.defd.fp))
8794 /* (c) would be violated */
8795 goto close_shadow;
8798 /* Combine shadow with summary of current insn: */
8799 shadow.used.i |= sum.used.i;
8800 shadow.used.fp |= sum.used.fp;
8801 shadow.used.mem |= sum.used.mem;
8802 shadow.defd.i |= sum.defd.i;
8803 shadow.defd.fp |= sum.defd.fp;
8804 shadow.defd.mem |= sum.defd.mem;
8806 if ((sum.defd.i & shadow.used.i)
8807 || (sum.defd.fp & shadow.used.fp)
8808 || (sum.defd.mem & shadow.used.mem))
8810 /* (a) would be violated (also takes care of (b)) */
8811 gcc_assert (get_attr_trap (i) != TRAP_YES
8812 || (!(sum.defd.i & sum.used.i)
8813 && !(sum.defd.fp & sum.used.fp)));
8815 goto close_shadow;
8817 break;
8819 case BARRIER:
8820 /* __builtin_unreachable can expand to no code at all,
8821 leaving (barrier) RTXes in the instruction stream. */
8822 goto close_shadow_notrapb;
8824 case JUMP_INSN:
8825 case CALL_INSN:
8826 case CODE_LABEL:
8827 goto close_shadow;
8829 default:
8830 gcc_unreachable ();
8833 else
8835 close_shadow:
8836 n = emit_insn_before (gen_trapb (), i);
8837 PUT_MODE (n, TImode);
8838 PUT_MODE (i, TImode);
8839 close_shadow_notrapb:
8840 trap_pending = 0;
8841 shadow.used.i = 0;
8842 shadow.used.fp = 0;
8843 shadow.used.mem = 0;
8844 shadow.defd = shadow.used;
8849 if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC)
8850 && NONJUMP_INSN_P (i)
8851 && GET_CODE (PATTERN (i)) != USE
8852 && GET_CODE (PATTERN (i)) != CLOBBER
8853 && get_attr_trap (i) == TRAP_YES)
8855 if (optimize && !trap_pending)
8856 summarize_insn (PATTERN (i), &shadow, 0);
8857 trap_pending = 1;
8862 /* Alpha can only issue instruction groups simultaneously if they are
8863 suitably aligned. This is very processor-specific. */
8864 /* There are a number of entries in alphaev4_insn_pipe and alphaev5_insn_pipe
8865 that are marked "fake". These instructions do not exist on that target,
8866 but it is possible to see these insns with deranged combinations of
8867 command-line options, such as "-mtune=ev4 -mmax". Instead of aborting,
8868 choose a result at random. */
8870 enum alphaev4_pipe {
8871 EV4_STOP = 0,
8872 EV4_IB0 = 1,
8873 EV4_IB1 = 2,
8874 EV4_IBX = 4
8877 enum alphaev5_pipe {
8878 EV5_STOP = 0,
8879 EV5_NONE = 1,
8880 EV5_E01 = 2,
8881 EV5_E0 = 4,
8882 EV5_E1 = 8,
8883 EV5_FAM = 16,
8884 EV5_FA = 32,
8885 EV5_FM = 64
8888 static enum alphaev4_pipe
8889 alphaev4_insn_pipe (rtx_insn *insn)
8891 if (recog_memoized (insn) < 0)
8892 return EV4_STOP;
8893 if (get_attr_length (insn) != 4)
8894 return EV4_STOP;
8896 switch (get_attr_type (insn))
8898 case TYPE_ILD:
8899 case TYPE_LDSYM:
8900 case TYPE_FLD:
8901 case TYPE_LD_L:
8902 return EV4_IBX;
8904 case TYPE_IADD:
8905 case TYPE_ILOG:
8906 case TYPE_ICMOV:
8907 case TYPE_ICMP:
8908 case TYPE_FST:
8909 case TYPE_SHIFT:
8910 case TYPE_IMUL:
8911 case TYPE_FBR:
8912 case TYPE_MVI: /* fake */
8913 return EV4_IB0;
8915 case TYPE_IST:
8916 case TYPE_MISC:
8917 case TYPE_IBR:
8918 case TYPE_JSR:
8919 case TYPE_CALLPAL:
8920 case TYPE_FCPYS:
8921 case TYPE_FCMOV:
8922 case TYPE_FADD:
8923 case TYPE_FDIV:
8924 case TYPE_FMUL:
8925 case TYPE_ST_C:
8926 case TYPE_MB:
8927 case TYPE_FSQRT: /* fake */
8928 case TYPE_FTOI: /* fake */
8929 case TYPE_ITOF: /* fake */
8930 return EV4_IB1;
8932 default:
8933 gcc_unreachable ();
8937 static enum alphaev5_pipe
8938 alphaev5_insn_pipe (rtx_insn *insn)
8940 if (recog_memoized (insn) < 0)
8941 return EV5_STOP;
8942 if (get_attr_length (insn) != 4)
8943 return EV5_STOP;
8945 switch (get_attr_type (insn))
8947 case TYPE_ILD:
8948 case TYPE_FLD:
8949 case TYPE_LDSYM:
8950 case TYPE_IADD:
8951 case TYPE_ILOG:
8952 case TYPE_ICMOV:
8953 case TYPE_ICMP:
8954 return EV5_E01;
8956 case TYPE_IST:
8957 case TYPE_FST:
8958 case TYPE_SHIFT:
8959 case TYPE_IMUL:
8960 case TYPE_MISC:
8961 case TYPE_MVI:
8962 case TYPE_LD_L:
8963 case TYPE_ST_C:
8964 case TYPE_MB:
8965 case TYPE_FTOI: /* fake */
8966 case TYPE_ITOF: /* fake */
8967 return EV5_E0;
8969 case TYPE_IBR:
8970 case TYPE_JSR:
8971 case TYPE_CALLPAL:
8972 return EV5_E1;
8974 case TYPE_FCPYS:
8975 return EV5_FAM;
8977 case TYPE_FBR:
8978 case TYPE_FCMOV:
8979 case TYPE_FADD:
8980 case TYPE_FDIV:
8981 case TYPE_FSQRT: /* fake */
8982 return EV5_FA;
8984 case TYPE_FMUL:
8985 return EV5_FM;
8987 default:
8988 gcc_unreachable ();
8992 /* IN_USE is a mask of the slots currently filled within the insn group.
8993 The mask bits come from alphaev4_pipe above. If EV4_IBX is set, then
8994 the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1.
8996 LEN is, of course, the length of the group in bytes. */
8998 static rtx_insn *
8999 alphaev4_next_group (rtx_insn *insn, int *pin_use, int *plen)
9001 int len, in_use;
9003 len = in_use = 0;
9005 if (! INSN_P (insn)
9006 || GET_CODE (PATTERN (insn)) == CLOBBER
9007 || GET_CODE (PATTERN (insn)) == USE)
9008 goto next_and_done;
9010 while (1)
9012 enum alphaev4_pipe pipe;
9014 pipe = alphaev4_insn_pipe (insn);
9015 switch (pipe)
9017 case EV4_STOP:
9018 /* Force complex instructions to start new groups. */
9019 if (in_use)
9020 goto done;
9022 /* If this is a completely unrecognized insn, it's an asm.
9023 We don't know how long it is, so record length as -1 to
9024 signal a needed realignment. */
9025 if (recog_memoized (insn) < 0)
9026 len = -1;
9027 else
9028 len = get_attr_length (insn);
9029 goto next_and_done;
9031 case EV4_IBX:
9032 if (in_use & EV4_IB0)
9034 if (in_use & EV4_IB1)
9035 goto done;
9036 in_use |= EV4_IB1;
9038 else
9039 in_use |= EV4_IB0 | EV4_IBX;
9040 break;
9042 case EV4_IB0:
9043 if (in_use & EV4_IB0)
9045 if (!(in_use & EV4_IBX) || (in_use & EV4_IB1))
9046 goto done;
9047 in_use |= EV4_IB1;
9049 in_use |= EV4_IB0;
9050 break;
9052 case EV4_IB1:
9053 if (in_use & EV4_IB1)
9054 goto done;
9055 in_use |= EV4_IB1;
9056 break;
9058 default:
9059 gcc_unreachable ();
9061 len += 4;
9063 /* Haifa doesn't do well scheduling branches. */
9064 if (JUMP_P (insn))
9065 goto next_and_done;
9067 next:
9068 insn = next_nonnote_insn (insn);
9070 if (!insn || ! INSN_P (insn))
9071 goto done;
9073 /* Let Haifa tell us where it thinks insn group boundaries are. */
9074 if (GET_MODE (insn) == TImode)
9075 goto done;
9077 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9078 goto next;
9081 next_and_done:
9082 insn = next_nonnote_insn (insn);
9084 done:
9085 *plen = len;
9086 *pin_use = in_use;
9087 return insn;
9090 /* IN_USE is a mask of the slots currently filled within the insn group.
9091 The mask bits come from alphaev5_pipe above. If EV5_E01 is set, then
9092 the insn in EV5_E0 can be swapped by the hardware into EV5_E1.
9094 LEN is, of course, the length of the group in bytes. */
9096 static rtx_insn *
9097 alphaev5_next_group (rtx_insn *insn, int *pin_use, int *plen)
9099 int len, in_use;
9101 len = in_use = 0;
9103 if (! INSN_P (insn)
9104 || GET_CODE (PATTERN (insn)) == CLOBBER
9105 || GET_CODE (PATTERN (insn)) == USE)
9106 goto next_and_done;
9108 while (1)
9110 enum alphaev5_pipe pipe;
9112 pipe = alphaev5_insn_pipe (insn);
9113 switch (pipe)
9115 case EV5_STOP:
9116 /* Force complex instructions to start new groups. */
9117 if (in_use)
9118 goto done;
9120 /* If this is a completely unrecognized insn, it's an asm.
9121 We don't know how long it is, so record length as -1 to
9122 signal a needed realignment. */
9123 if (recog_memoized (insn) < 0)
9124 len = -1;
9125 else
9126 len = get_attr_length (insn);
9127 goto next_and_done;
9129 /* ??? Most of the places below, we would like to assert never
9130 happen, as it would indicate an error either in Haifa, or
9131 in the scheduling description. Unfortunately, Haifa never
9132 schedules the last instruction of the BB, so we don't have
9133 an accurate TI bit to go off. */
9134 case EV5_E01:
9135 if (in_use & EV5_E0)
9137 if (in_use & EV5_E1)
9138 goto done;
9139 in_use |= EV5_E1;
9141 else
9142 in_use |= EV5_E0 | EV5_E01;
9143 break;
9145 case EV5_E0:
9146 if (in_use & EV5_E0)
9148 if (!(in_use & EV5_E01) || (in_use & EV5_E1))
9149 goto done;
9150 in_use |= EV5_E1;
9152 in_use |= EV5_E0;
9153 break;
9155 case EV5_E1:
9156 if (in_use & EV5_E1)
9157 goto done;
9158 in_use |= EV5_E1;
9159 break;
9161 case EV5_FAM:
9162 if (in_use & EV5_FA)
9164 if (in_use & EV5_FM)
9165 goto done;
9166 in_use |= EV5_FM;
9168 else
9169 in_use |= EV5_FA | EV5_FAM;
9170 break;
9172 case EV5_FA:
9173 if (in_use & EV5_FA)
9174 goto done;
9175 in_use |= EV5_FA;
9176 break;
9178 case EV5_FM:
9179 if (in_use & EV5_FM)
9180 goto done;
9181 in_use |= EV5_FM;
9182 break;
9184 case EV5_NONE:
9185 break;
9187 default:
9188 gcc_unreachable ();
9190 len += 4;
9192 /* Haifa doesn't do well scheduling branches. */
9193 /* ??? If this is predicted not-taken, slotting continues, except
9194 that no more IBR, FBR, or JSR insns may be slotted. */
9195 if (JUMP_P (insn))
9196 goto next_and_done;
9198 next:
9199 insn = next_nonnote_insn (insn);
9201 if (!insn || ! INSN_P (insn))
9202 goto done;
9204 /* Let Haifa tell us where it thinks insn group boundaries are. */
9205 if (GET_MODE (insn) == TImode)
9206 goto done;
9208 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9209 goto next;
9212 next_and_done:
9213 insn = next_nonnote_insn (insn);
9215 done:
9216 *plen = len;
9217 *pin_use = in_use;
9218 return insn;
9221 static rtx
9222 alphaev4_next_nop (int *pin_use)
9224 int in_use = *pin_use;
9225 rtx nop;
9227 if (!(in_use & EV4_IB0))
9229 in_use |= EV4_IB0;
9230 nop = gen_nop ();
9232 else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX)
9234 in_use |= EV4_IB1;
9235 nop = gen_nop ();
9237 else if (TARGET_FP && !(in_use & EV4_IB1))
9239 in_use |= EV4_IB1;
9240 nop = gen_fnop ();
9242 else
9243 nop = gen_unop ();
9245 *pin_use = in_use;
9246 return nop;
9249 static rtx
9250 alphaev5_next_nop (int *pin_use)
9252 int in_use = *pin_use;
9253 rtx nop;
9255 if (!(in_use & EV5_E1))
9257 in_use |= EV5_E1;
9258 nop = gen_nop ();
9260 else if (TARGET_FP && !(in_use & EV5_FA))
9262 in_use |= EV5_FA;
9263 nop = gen_fnop ();
9265 else if (TARGET_FP && !(in_use & EV5_FM))
9267 in_use |= EV5_FM;
9268 nop = gen_fnop ();
9270 else
9271 nop = gen_unop ();
9273 *pin_use = in_use;
9274 return nop;
9277 /* The instruction group alignment main loop. */
9279 static void
9280 alpha_align_insns_1 (unsigned int max_align,
9281 rtx_insn *(*next_group) (rtx_insn *, int *, int *),
9282 rtx (*next_nop) (int *))
9284 /* ALIGN is the known alignment for the insn group. */
9285 unsigned int align;
9286 /* OFS is the offset of the current insn in the insn group. */
9287 int ofs;
9288 int prev_in_use, in_use, len, ldgp;
9289 rtx_insn *i, *next;
9291 /* Let shorten branches care for assigning alignments to code labels. */
9292 shorten_branches (get_insns ());
9294 if (align_functions < 4)
9295 align = 4;
9296 else if ((unsigned int) align_functions < max_align)
9297 align = align_functions;
9298 else
9299 align = max_align;
9301 ofs = prev_in_use = 0;
9302 i = get_insns ();
9303 if (NOTE_P (i))
9304 i = next_nonnote_insn (i);
9306 ldgp = alpha_function_needs_gp ? 8 : 0;
9308 while (i)
9310 next = (*next_group) (i, &in_use, &len);
9312 /* When we see a label, resync alignment etc. */
9313 if (LABEL_P (i))
9315 unsigned int new_align = 1 << label_to_alignment (i);
9317 if (new_align >= align)
9319 align = new_align < max_align ? new_align : max_align;
9320 ofs = 0;
9323 else if (ofs & (new_align-1))
9324 ofs = (ofs | (new_align-1)) + 1;
9325 gcc_assert (!len);
9328 /* Handle complex instructions special. */
9329 else if (in_use == 0)
9331 /* Asms will have length < 0. This is a signal that we have
9332 lost alignment knowledge. Assume, however, that the asm
9333 will not mis-align instructions. */
9334 if (len < 0)
9336 ofs = 0;
9337 align = 4;
9338 len = 0;
9342 /* If the known alignment is smaller than the recognized insn group,
9343 realign the output. */
9344 else if ((int) align < len)
9346 unsigned int new_log_align = len > 8 ? 4 : 3;
9347 rtx_insn *prev, *where;
9349 where = prev = prev_nonnote_insn (i);
9350 if (!where || !LABEL_P (where))
9351 where = i;
9353 /* Can't realign between a call and its gp reload. */
9354 if (! (TARGET_EXPLICIT_RELOCS
9355 && prev && CALL_P (prev)))
9357 emit_insn_before (gen_realign (GEN_INT (new_log_align)), where);
9358 align = 1 << new_log_align;
9359 ofs = 0;
9363 /* We may not insert padding inside the initial ldgp sequence. */
9364 else if (ldgp > 0)
9365 ldgp -= len;
9367 /* If the group won't fit in the same INT16 as the previous,
9368 we need to add padding to keep the group together. Rather
9369 than simply leaving the insn filling to the assembler, we
9370 can make use of the knowledge of what sorts of instructions
9371 were issued in the previous group to make sure that all of
9372 the added nops are really free. */
9373 else if (ofs + len > (int) align)
9375 int nop_count = (align - ofs) / 4;
9376 rtx_insn *where;
9378 /* Insert nops before labels, branches, and calls to truly merge
9379 the execution of the nops with the previous instruction group. */
9380 where = prev_nonnote_insn (i);
9381 if (where)
9383 if (LABEL_P (where))
9385 rtx_insn *where2 = prev_nonnote_insn (where);
9386 if (where2 && JUMP_P (where2))
9387 where = where2;
9389 else if (NONJUMP_INSN_P (where))
9390 where = i;
9392 else
9393 where = i;
9396 emit_insn_before ((*next_nop)(&prev_in_use), where);
9397 while (--nop_count);
9398 ofs = 0;
9401 ofs = (ofs + len) & (align - 1);
9402 prev_in_use = in_use;
9403 i = next;
9407 static void
9408 alpha_align_insns (void)
9410 if (alpha_tune == PROCESSOR_EV4)
9411 alpha_align_insns_1 (8, alphaev4_next_group, alphaev4_next_nop);
9412 else if (alpha_tune == PROCESSOR_EV5)
9413 alpha_align_insns_1 (16, alphaev5_next_group, alphaev5_next_nop);
9414 else
9415 gcc_unreachable ();
9418 /* Insert an unop between sibcall or noreturn function call and GP load. */
9420 static void
9421 alpha_pad_function_end (void)
9423 rtx_insn *insn, *next;
9425 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9427 if (!CALL_P (insn)
9428 || !(SIBLING_CALL_P (insn)
9429 || find_reg_note (insn, REG_NORETURN, NULL_RTX)))
9430 continue;
9432 /* Make sure we do not split a call and its corresponding
9433 CALL_ARG_LOCATION note. */
9434 next = NEXT_INSN (insn);
9435 if (next == NULL)
9436 continue;
9437 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
9438 insn = next;
9440 next = next_active_insn (insn);
9441 if (next)
9443 rtx pat = PATTERN (next);
9445 if (GET_CODE (pat) == SET
9446 && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
9447 && XINT (SET_SRC (pat), 1) == UNSPECV_LDGP1)
9448 emit_insn_after (gen_unop (), insn);
9453 /* Machine dependent reorg pass. */
9455 static void
9456 alpha_reorg (void)
9458 /* Workaround for a linker error that triggers when an exception
9459 handler immediatelly follows a sibcall or a noreturn function.
9461 In the sibcall case:
9463 The instruction stream from an object file:
9465 1d8: 00 00 fb 6b jmp (t12)
9466 1dc: 00 00 ba 27 ldah gp,0(ra)
9467 1e0: 00 00 bd 23 lda gp,0(gp)
9468 1e4: 00 00 7d a7 ldq t12,0(gp)
9469 1e8: 00 40 5b 6b jsr ra,(t12),1ec <__funcZ+0x1ec>
9471 was converted in the final link pass to:
9473 12003aa88: 67 fa ff c3 br 120039428 <...>
9474 12003aa8c: 00 00 fe 2f unop
9475 12003aa90: 00 00 fe 2f unop
9476 12003aa94: 48 83 7d a7 ldq t12,-31928(gp)
9477 12003aa98: 00 40 5b 6b jsr ra,(t12),12003aa9c <__func+0x1ec>
9479 And in the noreturn case:
9481 The instruction stream from an object file:
9483 54: 00 40 5b 6b jsr ra,(t12),58 <__func+0x58>
9484 58: 00 00 ba 27 ldah gp,0(ra)
9485 5c: 00 00 bd 23 lda gp,0(gp)
9486 60: 00 00 7d a7 ldq t12,0(gp)
9487 64: 00 40 5b 6b jsr ra,(t12),68 <__func+0x68>
9489 was converted in the final link pass to:
9491 fdb24: a0 03 40 d3 bsr ra,fe9a8 <_called_func+0x8>
9492 fdb28: 00 00 fe 2f unop
9493 fdb2c: 00 00 fe 2f unop
9494 fdb30: 30 82 7d a7 ldq t12,-32208(gp)
9495 fdb34: 00 40 5b 6b jsr ra,(t12),fdb38 <__func+0x68>
9497 GP load instructions were wrongly cleared by the linker relaxation
9498 pass. This workaround prevents removal of GP loads by inserting
9499 an unop instruction between a sibcall or noreturn function call and
9500 exception handler prologue. */
9502 if (current_function_has_exception_handlers ())
9503 alpha_pad_function_end ();
9506 static void
9507 alpha_file_start (void)
9509 default_file_start ();
9511 fputs ("\t.set noreorder\n", asm_out_file);
9512 fputs ("\t.set volatile\n", asm_out_file);
9513 if (TARGET_ABI_OSF)
9514 fputs ("\t.set noat\n", asm_out_file);
9515 if (TARGET_EXPLICIT_RELOCS)
9516 fputs ("\t.set nomacro\n", asm_out_file);
9517 if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX)
9519 const char *arch;
9521 if (alpha_cpu == PROCESSOR_EV6 || TARGET_FIX || TARGET_CIX)
9522 arch = "ev6";
9523 else if (TARGET_MAX)
9524 arch = "pca56";
9525 else if (TARGET_BWX)
9526 arch = "ev56";
9527 else if (alpha_cpu == PROCESSOR_EV5)
9528 arch = "ev5";
9529 else
9530 arch = "ev4";
9532 fprintf (asm_out_file, "\t.arch %s\n", arch);
9536 /* Since we don't have a .dynbss section, we should not allow global
9537 relocations in the .rodata section. */
9539 static int
9540 alpha_elf_reloc_rw_mask (void)
9542 return flag_pic ? 3 : 2;
9545 /* Return a section for X. The only special thing we do here is to
9546 honor small data. */
9548 static section *
9549 alpha_elf_select_rtx_section (enum machine_mode mode, rtx x,
9550 unsigned HOST_WIDE_INT align)
9552 if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value)
9553 /* ??? Consider using mergeable sdata sections. */
9554 return sdata_section;
9555 else
9556 return default_elf_select_rtx_section (mode, x, align);
9559 static unsigned int
9560 alpha_elf_section_type_flags (tree decl, const char *name, int reloc)
9562 unsigned int flags = 0;
9564 if (strcmp (name, ".sdata") == 0
9565 || strncmp (name, ".sdata.", 7) == 0
9566 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
9567 || strcmp (name, ".sbss") == 0
9568 || strncmp (name, ".sbss.", 6) == 0
9569 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
9570 flags = SECTION_SMALL;
9572 flags |= default_section_type_flags (decl, name, reloc);
9573 return flags;
9576 /* Structure to collect function names for final output in link section. */
9577 /* Note that items marked with GTY can't be ifdef'ed out. */
9579 enum reloc_kind
9581 KIND_LINKAGE,
9582 KIND_CODEADDR
9585 struct GTY(()) alpha_links
9587 rtx func;
9588 rtx linkage;
9589 enum reloc_kind rkind;
9592 #if TARGET_ABI_OPEN_VMS
9594 /* Return the VMS argument type corresponding to MODE. */
9596 enum avms_arg_type
9597 alpha_arg_type (enum machine_mode mode)
9599 switch (mode)
9601 case SFmode:
9602 return TARGET_FLOAT_VAX ? FF : FS;
9603 case DFmode:
9604 return TARGET_FLOAT_VAX ? FD : FT;
9605 default:
9606 return I64;
9610 /* Return an rtx for an integer representing the VMS Argument Information
9611 register value. */
9614 alpha_arg_info_reg_val (CUMULATIVE_ARGS cum)
9616 unsigned HOST_WIDE_INT regval = cum.num_args;
9617 int i;
9619 for (i = 0; i < 6; i++)
9620 regval |= ((int) cum.atypes[i]) << (i * 3 + 8);
9622 return GEN_INT (regval);
9626 /* Return a SYMBOL_REF representing the reference to the .linkage entry
9627 of function FUNC built for calls made from CFUNDECL. LFLAG is 1 if
9628 this is the reference to the linkage pointer value, 0 if this is the
9629 reference to the function entry value. RFLAG is 1 if this a reduced
9630 reference (code address only), 0 if this is a full reference. */
9633 alpha_use_linkage (rtx func, bool lflag, bool rflag)
9635 struct alpha_links *al = NULL;
9636 const char *name = XSTR (func, 0);
9638 if (cfun->machine->links)
9640 splay_tree_node lnode;
9642 /* Is this name already defined? */
9643 lnode = splay_tree_lookup (cfun->machine->links, (splay_tree_key) name);
9644 if (lnode)
9645 al = (struct alpha_links *) lnode->value;
9647 else
9648 cfun->machine->links = splay_tree_new_ggc
9649 ((splay_tree_compare_fn) strcmp,
9650 ggc_alloc_splay_tree_str_alpha_links_splay_tree_s,
9651 ggc_alloc_splay_tree_str_alpha_links_splay_tree_node_s);
9653 if (al == NULL)
9655 size_t buf_len;
9656 char *linksym;
9657 tree id;
9659 if (name[0] == '*')
9660 name++;
9662 /* Follow transparent alias, as this is used for CRTL translations. */
9663 id = maybe_get_identifier (name);
9664 if (id)
9666 while (IDENTIFIER_TRANSPARENT_ALIAS (id))
9667 id = TREE_CHAIN (id);
9668 name = IDENTIFIER_POINTER (id);
9671 buf_len = strlen (name) + 8 + 9;
9672 linksym = (char *) alloca (buf_len);
9673 snprintf (linksym, buf_len, "$%d..%s..lk", cfun->funcdef_no, name);
9675 al = ggc_alloc<alpha_links> ();
9676 al->func = func;
9677 al->linkage = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (linksym));
9679 splay_tree_insert (cfun->machine->links,
9680 (splay_tree_key) ggc_strdup (name),
9681 (splay_tree_value) al);
9684 al->rkind = rflag ? KIND_CODEADDR : KIND_LINKAGE;
9686 if (lflag)
9687 return gen_rtx_MEM (Pmode, plus_constant (Pmode, al->linkage, 8));
9688 else
9689 return al->linkage;
9692 static int
9693 alpha_write_one_linkage (splay_tree_node node, void *data)
9695 const char *const name = (const char *) node->key;
9696 struct alpha_links *link = (struct alpha_links *) node->value;
9697 FILE *stream = (FILE *) data;
9699 ASM_OUTPUT_INTERNAL_LABEL (stream, XSTR (link->linkage, 0));
9700 if (link->rkind == KIND_CODEADDR)
9702 /* External and used, request code address. */
9703 fprintf (stream, "\t.code_address ");
9705 else
9707 if (!SYMBOL_REF_EXTERNAL_P (link->func)
9708 && SYMBOL_REF_LOCAL_P (link->func))
9710 /* Locally defined, build linkage pair. */
9711 fprintf (stream, "\t.quad %s..en\n", name);
9712 fprintf (stream, "\t.quad ");
9714 else
9716 /* External, request linkage pair. */
9717 fprintf (stream, "\t.linkage ");
9720 assemble_name (stream, name);
9721 fputs ("\n", stream);
9723 return 0;
9726 static void
9727 alpha_write_linkage (FILE *stream, const char *funname)
9729 fprintf (stream, "\t.link\n");
9730 fprintf (stream, "\t.align 3\n");
9731 in_section = NULL;
9733 #ifdef TARGET_VMS_CRASH_DEBUG
9734 fputs ("\t.name ", stream);
9735 assemble_name (stream, funname);
9736 fputs ("..na\n", stream);
9737 #endif
9739 ASM_OUTPUT_LABEL (stream, funname);
9740 fprintf (stream, "\t.pdesc ");
9741 assemble_name (stream, funname);
9742 fprintf (stream, "..en,%s\n",
9743 alpha_procedure_type == PT_STACK ? "stack"
9744 : alpha_procedure_type == PT_REGISTER ? "reg" : "null");
9746 if (cfun->machine->links)
9748 splay_tree_foreach (cfun->machine->links, alpha_write_one_linkage, stream);
9749 /* splay_tree_delete (func->links); */
9753 /* Switch to an arbitrary section NAME with attributes as specified
9754 by FLAGS. ALIGN specifies any known alignment requirements for
9755 the section; 0 if the default should be used. */
9757 static void
9758 vms_asm_named_section (const char *name, unsigned int flags,
9759 tree decl ATTRIBUTE_UNUSED)
9761 fputc ('\n', asm_out_file);
9762 fprintf (asm_out_file, ".section\t%s", name);
9764 if (flags & SECTION_DEBUG)
9765 fprintf (asm_out_file, ",NOWRT");
9767 fputc ('\n', asm_out_file);
9770 /* Record an element in the table of global constructors. SYMBOL is
9771 a SYMBOL_REF of the function to be called; PRIORITY is a number
9772 between 0 and MAX_INIT_PRIORITY.
9774 Differs from default_ctors_section_asm_out_constructor in that the
9775 width of the .ctors entry is always 64 bits, rather than the 32 bits
9776 used by a normal pointer. */
9778 static void
9779 vms_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9781 switch_to_section (ctors_section);
9782 assemble_align (BITS_PER_WORD);
9783 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9786 static void
9787 vms_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9789 switch_to_section (dtors_section);
9790 assemble_align (BITS_PER_WORD);
9791 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9793 #else
9795 alpha_use_linkage (rtx func ATTRIBUTE_UNUSED,
9796 bool lflag ATTRIBUTE_UNUSED,
9797 bool rflag ATTRIBUTE_UNUSED)
9799 return NULL_RTX;
9802 #endif /* TARGET_ABI_OPEN_VMS */
9804 static void
9805 alpha_init_libfuncs (void)
9807 if (TARGET_ABI_OPEN_VMS)
9809 /* Use the VMS runtime library functions for division and
9810 remainder. */
9811 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
9812 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
9813 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
9814 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
9815 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
9816 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
9817 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
9818 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
9819 abort_libfunc = init_one_libfunc ("decc$abort");
9820 memcmp_libfunc = init_one_libfunc ("decc$memcmp");
9821 #ifdef MEM_LIBFUNCS_INIT
9822 MEM_LIBFUNCS_INIT;
9823 #endif
9827 /* On the Alpha, we use this to disable the floating-point registers
9828 when they don't exist. */
9830 static void
9831 alpha_conditional_register_usage (void)
9833 int i;
9834 if (! TARGET_FPREGS)
9835 for (i = 32; i < 63; i++)
9836 fixed_regs[i] = call_used_regs[i] = 1;
9839 /* Canonicalize a comparison from one we don't have to one we do have. */
9841 static void
9842 alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
9843 bool op0_preserve_value)
9845 if (!op0_preserve_value
9846 && (*code == GE || *code == GT || *code == GEU || *code == GTU)
9847 && (REG_P (*op1) || *op1 == const0_rtx))
9849 rtx tem = *op0;
9850 *op0 = *op1;
9851 *op1 = tem;
9852 *code = (int)swap_condition ((enum rtx_code)*code);
9855 if ((*code == LT || *code == LTU)
9856 && CONST_INT_P (*op1) && INTVAL (*op1) == 256)
9858 *code = *code == LT ? LE : LEU;
9859 *op1 = GEN_INT (255);
9863 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
9865 static void
9866 alpha_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
9868 const unsigned HOST_WIDE_INT SWCR_STATUS_MASK = (0x3fUL << 17);
9870 tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
9871 tree new_fenv_var, reload_fenv, restore_fnenv;
9872 tree update_call, atomic_feraiseexcept, hold_fnclex;
9874 /* Assume OSF/1 compatible interfaces. */
9875 if (!TARGET_ABI_OSF)
9876 return;
9878 /* Generate the equivalent of :
9879 unsigned long fenv_var;
9880 fenv_var = __ieee_get_fp_control ();
9882 unsigned long masked_fenv;
9883 masked_fenv = fenv_var & mask;
9885 __ieee_set_fp_control (masked_fenv); */
9887 fenv_var = create_tmp_var (long_unsigned_type_node, NULL);
9888 get_fpscr
9889 = build_fn_decl ("__ieee_get_fp_control",
9890 build_function_type_list (long_unsigned_type_node, NULL));
9891 set_fpscr
9892 = build_fn_decl ("__ieee_set_fp_control",
9893 build_function_type_list (void_type_node, NULL));
9894 mask = build_int_cst (long_unsigned_type_node, ~SWCR_STATUS_MASK);
9895 ld_fenv = build2 (MODIFY_EXPR, long_unsigned_type_node,
9896 fenv_var, build_call_expr (get_fpscr, 0));
9897 masked_fenv = build2 (BIT_AND_EXPR, long_unsigned_type_node, fenv_var, mask);
9898 hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
9899 *hold = build2 (COMPOUND_EXPR, void_type_node,
9900 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
9901 hold_fnclex);
9903 /* Store the value of masked_fenv to clear the exceptions:
9904 __ieee_set_fp_control (masked_fenv); */
9906 *clear = build_call_expr (set_fpscr, 1, masked_fenv);
9908 /* Generate the equivalent of :
9909 unsigned long new_fenv_var;
9910 new_fenv_var = __ieee_get_fp_control ();
9912 __ieee_set_fp_control (fenv_var);
9914 __atomic_feraiseexcept (new_fenv_var); */
9916 new_fenv_var = create_tmp_var (long_unsigned_type_node, NULL);
9917 reload_fenv = build2 (MODIFY_EXPR, long_unsigned_type_node, new_fenv_var,
9918 build_call_expr (get_fpscr, 0));
9919 restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
9920 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
9921 update_call
9922 = build_call_expr (atomic_feraiseexcept, 1,
9923 fold_convert (integer_type_node, new_fenv_var));
9924 *update = build2 (COMPOUND_EXPR, void_type_node,
9925 build2 (COMPOUND_EXPR, void_type_node,
9926 reload_fenv, restore_fnenv), update_call);
9929 /* Initialize the GCC target structure. */
9930 #if TARGET_ABI_OPEN_VMS
9931 # undef TARGET_ATTRIBUTE_TABLE
9932 # define TARGET_ATTRIBUTE_TABLE vms_attribute_table
9933 # undef TARGET_CAN_ELIMINATE
9934 # define TARGET_CAN_ELIMINATE alpha_vms_can_eliminate
9935 #endif
9937 #undef TARGET_IN_SMALL_DATA_P
9938 #define TARGET_IN_SMALL_DATA_P alpha_in_small_data_p
9940 #undef TARGET_ASM_ALIGNED_HI_OP
9941 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
9942 #undef TARGET_ASM_ALIGNED_DI_OP
9943 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
9945 /* Default unaligned ops are provided for ELF systems. To get unaligned
9946 data for non-ELF systems, we have to turn off auto alignment. */
9947 #if TARGET_ABI_OPEN_VMS
9948 #undef TARGET_ASM_UNALIGNED_HI_OP
9949 #define TARGET_ASM_UNALIGNED_HI_OP "\t.align 0\n\t.word\t"
9950 #undef TARGET_ASM_UNALIGNED_SI_OP
9951 #define TARGET_ASM_UNALIGNED_SI_OP "\t.align 0\n\t.long\t"
9952 #undef TARGET_ASM_UNALIGNED_DI_OP
9953 #define TARGET_ASM_UNALIGNED_DI_OP "\t.align 0\n\t.quad\t"
9954 #endif
9956 #undef TARGET_ASM_RELOC_RW_MASK
9957 #define TARGET_ASM_RELOC_RW_MASK alpha_elf_reloc_rw_mask
9958 #undef TARGET_ASM_SELECT_RTX_SECTION
9959 #define TARGET_ASM_SELECT_RTX_SECTION alpha_elf_select_rtx_section
9960 #undef TARGET_SECTION_TYPE_FLAGS
9961 #define TARGET_SECTION_TYPE_FLAGS alpha_elf_section_type_flags
9963 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
9964 #define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue
9966 #undef TARGET_INIT_LIBFUNCS
9967 #define TARGET_INIT_LIBFUNCS alpha_init_libfuncs
9969 #undef TARGET_LEGITIMIZE_ADDRESS
9970 #define TARGET_LEGITIMIZE_ADDRESS alpha_legitimize_address
9971 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
9972 #define TARGET_MODE_DEPENDENT_ADDRESS_P alpha_mode_dependent_address_p
9974 #undef TARGET_ASM_FILE_START
9975 #define TARGET_ASM_FILE_START alpha_file_start
9977 #undef TARGET_SCHED_ADJUST_COST
9978 #define TARGET_SCHED_ADJUST_COST alpha_adjust_cost
9979 #undef TARGET_SCHED_ISSUE_RATE
9980 #define TARGET_SCHED_ISSUE_RATE alpha_issue_rate
9981 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
9982 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
9983 alpha_multipass_dfa_lookahead
9985 #undef TARGET_HAVE_TLS
9986 #define TARGET_HAVE_TLS HAVE_AS_TLS
9988 #undef TARGET_BUILTIN_DECL
9989 #define TARGET_BUILTIN_DECL alpha_builtin_decl
9990 #undef TARGET_INIT_BUILTINS
9991 #define TARGET_INIT_BUILTINS alpha_init_builtins
9992 #undef TARGET_EXPAND_BUILTIN
9993 #define TARGET_EXPAND_BUILTIN alpha_expand_builtin
9994 #undef TARGET_FOLD_BUILTIN
9995 #define TARGET_FOLD_BUILTIN alpha_fold_builtin
9996 #undef TARGET_GIMPLE_FOLD_BUILTIN
9997 #define TARGET_GIMPLE_FOLD_BUILTIN alpha_gimple_fold_builtin
9999 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
10000 #define TARGET_FUNCTION_OK_FOR_SIBCALL alpha_function_ok_for_sibcall
10001 #undef TARGET_CANNOT_COPY_INSN_P
10002 #define TARGET_CANNOT_COPY_INSN_P alpha_cannot_copy_insn_p
10003 #undef TARGET_LEGITIMATE_CONSTANT_P
10004 #define TARGET_LEGITIMATE_CONSTANT_P alpha_legitimate_constant_p
10005 #undef TARGET_CANNOT_FORCE_CONST_MEM
10006 #define TARGET_CANNOT_FORCE_CONST_MEM alpha_cannot_force_const_mem
10008 #if TARGET_ABI_OSF
10009 #undef TARGET_ASM_OUTPUT_MI_THUNK
10010 #define TARGET_ASM_OUTPUT_MI_THUNK alpha_output_mi_thunk_osf
10011 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
10012 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
10013 #undef TARGET_STDARG_OPTIMIZE_HOOK
10014 #define TARGET_STDARG_OPTIMIZE_HOOK alpha_stdarg_optimize_hook
10015 #endif
10017 /* Use 16-bits anchor. */
10018 #undef TARGET_MIN_ANCHOR_OFFSET
10019 #define TARGET_MIN_ANCHOR_OFFSET -0x7fff - 1
10020 #undef TARGET_MAX_ANCHOR_OFFSET
10021 #define TARGET_MAX_ANCHOR_OFFSET 0x7fff
10022 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
10023 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
10025 #undef TARGET_RTX_COSTS
10026 #define TARGET_RTX_COSTS alpha_rtx_costs
10027 #undef TARGET_ADDRESS_COST
10028 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
10030 #undef TARGET_MACHINE_DEPENDENT_REORG
10031 #define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg
10033 #undef TARGET_PROMOTE_FUNCTION_MODE
10034 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
10035 #undef TARGET_PROMOTE_PROTOTYPES
10036 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false
10037 #undef TARGET_RETURN_IN_MEMORY
10038 #define TARGET_RETURN_IN_MEMORY alpha_return_in_memory
10039 #undef TARGET_PASS_BY_REFERENCE
10040 #define TARGET_PASS_BY_REFERENCE alpha_pass_by_reference
10041 #undef TARGET_SETUP_INCOMING_VARARGS
10042 #define TARGET_SETUP_INCOMING_VARARGS alpha_setup_incoming_varargs
10043 #undef TARGET_STRICT_ARGUMENT_NAMING
10044 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
10045 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
10046 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
10047 #undef TARGET_SPLIT_COMPLEX_ARG
10048 #define TARGET_SPLIT_COMPLEX_ARG alpha_split_complex_arg
10049 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
10050 #define TARGET_GIMPLIFY_VA_ARG_EXPR alpha_gimplify_va_arg
10051 #undef TARGET_ARG_PARTIAL_BYTES
10052 #define TARGET_ARG_PARTIAL_BYTES alpha_arg_partial_bytes
10053 #undef TARGET_FUNCTION_ARG
10054 #define TARGET_FUNCTION_ARG alpha_function_arg
10055 #undef TARGET_FUNCTION_ARG_ADVANCE
10056 #define TARGET_FUNCTION_ARG_ADVANCE alpha_function_arg_advance
10057 #undef TARGET_TRAMPOLINE_INIT
10058 #define TARGET_TRAMPOLINE_INIT alpha_trampoline_init
10060 #undef TARGET_INSTANTIATE_DECLS
10061 #define TARGET_INSTANTIATE_DECLS alpha_instantiate_decls
10063 #undef TARGET_SECONDARY_RELOAD
10064 #define TARGET_SECONDARY_RELOAD alpha_secondary_reload
10066 #undef TARGET_SCALAR_MODE_SUPPORTED_P
10067 #define TARGET_SCALAR_MODE_SUPPORTED_P alpha_scalar_mode_supported_p
10068 #undef TARGET_VECTOR_MODE_SUPPORTED_P
10069 #define TARGET_VECTOR_MODE_SUPPORTED_P alpha_vector_mode_supported_p
10071 #undef TARGET_BUILD_BUILTIN_VA_LIST
10072 #define TARGET_BUILD_BUILTIN_VA_LIST alpha_build_builtin_va_list
10074 #undef TARGET_EXPAND_BUILTIN_VA_START
10075 #define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start
10077 /* The Alpha architecture does not require sequential consistency. See
10078 http://www.cs.umd.edu/~pugh/java/memoryModel/AlphaReordering.html
10079 for an example of how it can be violated in practice. */
10080 #undef TARGET_RELAXED_ORDERING
10081 #define TARGET_RELAXED_ORDERING true
10083 #undef TARGET_OPTION_OVERRIDE
10084 #define TARGET_OPTION_OVERRIDE alpha_option_override
10086 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
10087 #undef TARGET_MANGLE_TYPE
10088 #define TARGET_MANGLE_TYPE alpha_mangle_type
10089 #endif
10091 #undef TARGET_LEGITIMATE_ADDRESS_P
10092 #define TARGET_LEGITIMATE_ADDRESS_P alpha_legitimate_address_p
10094 #undef TARGET_CONDITIONAL_REGISTER_USAGE
10095 #define TARGET_CONDITIONAL_REGISTER_USAGE alpha_conditional_register_usage
10097 #undef TARGET_CANONICALIZE_COMPARISON
10098 #define TARGET_CANONICALIZE_COMPARISON alpha_canonicalize_comparison
10100 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
10101 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV alpha_atomic_assign_expand_fenv
10103 struct gcc_target targetm = TARGET_INITIALIZER;
10106 #include "gt-alpha.h"