Implement ASM_DECLARE_FUNCTION_NAME using ASM_OUTPUT_FUNCTION_LABEL
[official-gcc.git] / gcc / config / alpha / alpha.cc
blob98df142fb061ece7f768fcad6c4878682dbb65f4
1 /* Subroutines used for code generation on the DEC Alpha.
2 Copyright (C) 1992-2024 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #define IN_TARGET_CODE 1
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "stringpool.h"
32 #include "attribs.h"
33 #include "memmodel.h"
34 #include "gimple.h"
35 #include "df.h"
36 #include "predict.h"
37 #include "tm_p.h"
38 #include "ssa.h"
39 #include "expmed.h"
40 #include "optabs.h"
41 #include "regs.h"
42 #include "emit-rtl.h"
43 #include "recog.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "output.h"
51 #include "insn-attr.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "reload.h"
55 #include "except.h"
56 #include "common/common-target.h"
57 #include "debug.h"
58 #include "langhooks.h"
59 #include "cfgrtl.h"
60 #include "tree-pass.h"
61 #include "context.h"
62 #include "gimple-iterator.h"
63 #include "gimplify.h"
64 #include "tree-stdarg.h"
65 #include "tm-constrs.h"
66 #include "libfuncs.h"
67 #include "builtins.h"
68 #include "rtl-iter.h"
69 #include "flags.h"
70 #include "opts.h"
72 /* This file should be included last. */
73 #include "target-def.h"
75 /* Specify which cpu to schedule for. */
76 enum processor_type alpha_tune;
78 /* Which cpu we're generating code for. */
79 enum processor_type alpha_cpu;
81 static const char * const alpha_cpu_name[] =
83 "ev4", "ev5", "ev6"
86 /* Specify how accurate floating-point traps need to be. */
88 enum alpha_trap_precision alpha_tp;
90 /* Specify the floating-point rounding mode. */
92 enum alpha_fp_rounding_mode alpha_fprm;
94 /* Specify which things cause traps. */
96 enum alpha_fp_trap_mode alpha_fptm;
98 /* Nonzero if inside of a function, because the Alpha asm can't
99 handle .files inside of functions. */
101 static int inside_function = FALSE;
103 /* The number of cycles of latency we should assume on memory reads. */
105 static int alpha_memory_latency = 3;
107 /* Whether the function needs the GP. */
109 static int alpha_function_needs_gp;
111 /* The assembler name of the current function. */
113 static const char *alpha_fnname;
115 /* The next explicit relocation sequence number. */
116 extern GTY(()) int alpha_next_sequence_number;
117 int alpha_next_sequence_number = 1;
119 /* The literal and gpdisp sequence numbers for this insn, as printed
120 by %# and %* respectively. */
121 extern GTY(()) int alpha_this_literal_sequence_number;
122 extern GTY(()) int alpha_this_gpdisp_sequence_number;
123 int alpha_this_literal_sequence_number;
124 int alpha_this_gpdisp_sequence_number;
126 /* Costs of various operations on the different architectures. */
128 struct alpha_rtx_cost_data
130 unsigned char fp_add;
131 unsigned char fp_mult;
132 unsigned char fp_div_sf;
133 unsigned char fp_div_df;
134 unsigned char int_mult_si;
135 unsigned char int_mult_di;
136 unsigned char int_shift;
137 unsigned char int_cmov;
138 unsigned short int_div;
141 static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] =
143 { /* EV4 */
144 COSTS_N_INSNS (6), /* fp_add */
145 COSTS_N_INSNS (6), /* fp_mult */
146 COSTS_N_INSNS (34), /* fp_div_sf */
147 COSTS_N_INSNS (63), /* fp_div_df */
148 COSTS_N_INSNS (23), /* int_mult_si */
149 COSTS_N_INSNS (23), /* int_mult_di */
150 COSTS_N_INSNS (2), /* int_shift */
151 COSTS_N_INSNS (2), /* int_cmov */
152 COSTS_N_INSNS (97), /* int_div */
154 { /* EV5 */
155 COSTS_N_INSNS (4), /* fp_add */
156 COSTS_N_INSNS (4), /* fp_mult */
157 COSTS_N_INSNS (15), /* fp_div_sf */
158 COSTS_N_INSNS (22), /* fp_div_df */
159 COSTS_N_INSNS (8), /* int_mult_si */
160 COSTS_N_INSNS (12), /* int_mult_di */
161 COSTS_N_INSNS (1) + 1, /* int_shift */
162 COSTS_N_INSNS (1), /* int_cmov */
163 COSTS_N_INSNS (83), /* int_div */
165 { /* EV6 */
166 COSTS_N_INSNS (4), /* fp_add */
167 COSTS_N_INSNS (4), /* fp_mult */
168 COSTS_N_INSNS (12), /* fp_div_sf */
169 COSTS_N_INSNS (15), /* fp_div_df */
170 COSTS_N_INSNS (7), /* int_mult_si */
171 COSTS_N_INSNS (7), /* int_mult_di */
172 COSTS_N_INSNS (1), /* int_shift */
173 COSTS_N_INSNS (2), /* int_cmov */
174 COSTS_N_INSNS (86), /* int_div */
178 /* Similar but tuned for code size instead of execution latency. The
179 extra +N is fractional cost tuning based on latency. It's used to
180 encourage use of cheaper insns like shift, but only if there's just
181 one of them. */
183 static struct alpha_rtx_cost_data const alpha_rtx_cost_size =
185 COSTS_N_INSNS (1), /* fp_add */
186 COSTS_N_INSNS (1), /* fp_mult */
187 COSTS_N_INSNS (1), /* fp_div_sf */
188 COSTS_N_INSNS (1) + 1, /* fp_div_df */
189 COSTS_N_INSNS (1) + 1, /* int_mult_si */
190 COSTS_N_INSNS (1) + 2, /* int_mult_di */
191 COSTS_N_INSNS (1), /* int_shift */
192 COSTS_N_INSNS (1), /* int_cmov */
193 COSTS_N_INSNS (6), /* int_div */
196 /* Get the number of args of a function in one of two ways. */
197 #if TARGET_ABI_OPEN_VMS
198 #define NUM_ARGS crtl->args.info.num_args
199 #else
200 #define NUM_ARGS crtl->args.info
201 #endif
203 #define REG_PV 27
204 #define REG_RA 26
206 /* Declarations of static functions. */
207 static struct machine_function *alpha_init_machine_status (void);
208 static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
209 static void alpha_handle_trap_shadows (void);
210 static void alpha_align_insns (void);
211 static void alpha_override_options_after_change (void);
213 #if TARGET_ABI_OPEN_VMS
214 static void alpha_write_linkage (FILE *, const char *);
215 static bool vms_valid_pointer_mode (scalar_int_mode);
216 #else
217 #define vms_patch_builtins() gcc_unreachable()
218 #endif
220 static unsigned int
221 rest_of_handle_trap_shadows (void)
223 alpha_handle_trap_shadows ();
224 return 0;
227 namespace {
229 const pass_data pass_data_handle_trap_shadows =
231 RTL_PASS,
232 "trap_shadows", /* name */
233 OPTGROUP_NONE, /* optinfo_flags */
234 TV_NONE, /* tv_id */
235 0, /* properties_required */
236 0, /* properties_provided */
237 0, /* properties_destroyed */
238 0, /* todo_flags_start */
239 TODO_df_finish, /* todo_flags_finish */
242 class pass_handle_trap_shadows : public rtl_opt_pass
244 public:
245 pass_handle_trap_shadows(gcc::context *ctxt)
246 : rtl_opt_pass(pass_data_handle_trap_shadows, ctxt)
249 /* opt_pass methods: */
250 virtual bool gate (function *)
252 return alpha_tp != ALPHA_TP_PROG || flag_exceptions;
255 virtual unsigned int execute (function *)
257 return rest_of_handle_trap_shadows ();
260 }; // class pass_handle_trap_shadows
262 } // anon namespace
264 rtl_opt_pass *
265 make_pass_handle_trap_shadows (gcc::context *ctxt)
267 return new pass_handle_trap_shadows (ctxt);
270 static unsigned int
271 rest_of_align_insns (void)
273 alpha_align_insns ();
274 return 0;
277 namespace {
279 const pass_data pass_data_align_insns =
281 RTL_PASS,
282 "align_insns", /* name */
283 OPTGROUP_NONE, /* optinfo_flags */
284 TV_NONE, /* tv_id */
285 0, /* properties_required */
286 0, /* properties_provided */
287 0, /* properties_destroyed */
288 0, /* todo_flags_start */
289 TODO_df_finish, /* todo_flags_finish */
292 class pass_align_insns : public rtl_opt_pass
294 public:
295 pass_align_insns(gcc::context *ctxt)
296 : rtl_opt_pass(pass_data_align_insns, ctxt)
299 /* opt_pass methods: */
300 virtual bool gate (function *)
302 /* Due to the number of extra trapb insns, don't bother fixing up
303 alignment when trap precision is instruction. Moreover, we can
304 only do our job when sched2 is run. */
305 return ((alpha_tune == PROCESSOR_EV4
306 || alpha_tune == PROCESSOR_EV5)
307 && optimize && !optimize_size
308 && alpha_tp != ALPHA_TP_INSN
309 && flag_schedule_insns_after_reload);
312 virtual unsigned int execute (function *)
314 return rest_of_align_insns ();
317 }; // class pass_align_insns
319 } // anon namespace
321 rtl_opt_pass *
322 make_pass_align_insns (gcc::context *ctxt)
324 return new pass_align_insns (ctxt);
327 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
328 /* Implement TARGET_MANGLE_TYPE. */
330 static const char *
331 alpha_mangle_type (const_tree type)
333 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
334 && TARGET_LONG_DOUBLE_128)
335 return "g";
337 /* For all other types, use normal C++ mangling. */
338 return NULL;
340 #endif
342 /* Parse target option strings. */
344 static void
345 alpha_option_override (void)
347 static const struct cpu_table {
348 const char *const name;
349 const enum processor_type processor;
350 const int flags;
351 const unsigned short line_size; /* in bytes */
352 const unsigned short l1_size; /* in kb. */
353 const unsigned short l2_size; /* in kb. */
354 } cpu_table[] = {
355 /* EV4/LCA45 had 8k L1 caches; EV45 had 16k L1 caches.
356 EV4/EV45 had 128k to 16M 32-byte direct Bcache. LCA45
357 had 64k to 8M 8-byte direct Bcache. */
358 { "ev4", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
359 { "21064", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
360 { "ev45", PROCESSOR_EV4, 0, 32, 16, 16*1024 },
362 /* EV5 or EV56 had 8k 32 byte L1, 96k 32 or 64 byte L2,
363 and 1M to 16M 64 byte L3 (not modeled).
364 PCA56 had 16k 64-byte cache; PCA57 had 32k Icache.
365 PCA56 had 8k 64-byte cache; PCA57 had 16k Dcache. */
366 { "ev5", PROCESSOR_EV5, 0, 32, 8, 96 },
367 { "21164", PROCESSOR_EV5, 0, 32, 8, 96 },
368 { "ev56", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
369 { "21164a", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
370 { "pca56", PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
371 { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
372 { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
374 /* EV6 had 64k 64 byte L1, 1M to 16M Bcache. */
375 { "ev6", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
376 { "21264", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
377 { "ev67", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
378 64, 64, 16*1024 },
379 { "21264a", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
380 64, 64, 16*1024 }
383 int const ct_size = ARRAY_SIZE (cpu_table);
384 int line_size = 0, l1_size = 0, l2_size = 0;
385 int i;
387 #ifdef SUBTARGET_OVERRIDE_OPTIONS
388 SUBTARGET_OVERRIDE_OPTIONS;
389 #endif
391 /* Default to full IEEE compliance mode for Go language. */
392 if (strcmp (lang_hooks.name, "GNU Go") == 0
393 && !(target_flags_explicit & MASK_IEEE))
394 target_flags |= MASK_IEEE;
396 alpha_fprm = ALPHA_FPRM_NORM;
397 alpha_tp = ALPHA_TP_PROG;
398 alpha_fptm = ALPHA_FPTM_N;
400 if (TARGET_IEEE)
402 alpha_tp = ALPHA_TP_INSN;
403 alpha_fptm = ALPHA_FPTM_SU;
405 if (TARGET_IEEE_WITH_INEXACT)
407 alpha_tp = ALPHA_TP_INSN;
408 alpha_fptm = ALPHA_FPTM_SUI;
411 if (alpha_tp_string)
413 if (! strcmp (alpha_tp_string, "p"))
414 alpha_tp = ALPHA_TP_PROG;
415 else if (! strcmp (alpha_tp_string, "f"))
416 alpha_tp = ALPHA_TP_FUNC;
417 else if (! strcmp (alpha_tp_string, "i"))
418 alpha_tp = ALPHA_TP_INSN;
419 else
420 error ("bad value %qs for %<-mtrap-precision%> switch",
421 alpha_tp_string);
424 if (alpha_fprm_string)
426 if (! strcmp (alpha_fprm_string, "n"))
427 alpha_fprm = ALPHA_FPRM_NORM;
428 else if (! strcmp (alpha_fprm_string, "m"))
429 alpha_fprm = ALPHA_FPRM_MINF;
430 else if (! strcmp (alpha_fprm_string, "c"))
431 alpha_fprm = ALPHA_FPRM_CHOP;
432 else if (! strcmp (alpha_fprm_string,"d"))
433 alpha_fprm = ALPHA_FPRM_DYN;
434 else
435 error ("bad value %qs for %<-mfp-rounding-mode%> switch",
436 alpha_fprm_string);
439 if (alpha_fptm_string)
441 if (strcmp (alpha_fptm_string, "n") == 0)
442 alpha_fptm = ALPHA_FPTM_N;
443 else if (strcmp (alpha_fptm_string, "u") == 0)
444 alpha_fptm = ALPHA_FPTM_U;
445 else if (strcmp (alpha_fptm_string, "su") == 0)
446 alpha_fptm = ALPHA_FPTM_SU;
447 else if (strcmp (alpha_fptm_string, "sui") == 0)
448 alpha_fptm = ALPHA_FPTM_SUI;
449 else
450 error ("bad value %qs for %<-mfp-trap-mode%> switch",
451 alpha_fptm_string);
454 if (alpha_cpu_string)
456 for (i = 0; i < ct_size; i++)
457 if (! strcmp (alpha_cpu_string, cpu_table [i].name))
459 alpha_tune = alpha_cpu = cpu_table[i].processor;
460 line_size = cpu_table[i].line_size;
461 l1_size = cpu_table[i].l1_size;
462 l2_size = cpu_table[i].l2_size;
463 target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX);
464 target_flags |= cpu_table[i].flags;
465 break;
467 if (i == ct_size)
468 error ("bad value %qs for %<-mcpu%> switch", alpha_cpu_string);
471 if (alpha_tune_string)
473 for (i = 0; i < ct_size; i++)
474 if (! strcmp (alpha_tune_string, cpu_table [i].name))
476 alpha_tune = cpu_table[i].processor;
477 line_size = cpu_table[i].line_size;
478 l1_size = cpu_table[i].l1_size;
479 l2_size = cpu_table[i].l2_size;
480 break;
482 if (i == ct_size)
483 error ("bad value %qs for %<-mtune%> switch", alpha_tune_string);
486 if (line_size)
487 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
488 param_l1_cache_line_size, line_size);
489 if (l1_size)
490 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
491 param_l1_cache_size, l1_size);
492 if (l2_size)
493 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
494 param_l2_cache_size, l2_size);
496 /* Do some sanity checks on the above options. */
498 if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
499 && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6)
501 warning (0, "fp software completion requires %<-mtrap-precision=i%>");
502 alpha_tp = ALPHA_TP_INSN;
505 if (alpha_cpu == PROCESSOR_EV6)
507 /* Except for EV6 pass 1 (not released), we always have precise
508 arithmetic traps. Which means we can do software completion
509 without minding trap shadows. */
510 alpha_tp = ALPHA_TP_PROG;
513 if (TARGET_FLOAT_VAX)
515 if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
517 warning (0, "rounding mode not supported for VAX floats");
518 alpha_fprm = ALPHA_FPRM_NORM;
520 if (alpha_fptm == ALPHA_FPTM_SUI)
522 warning (0, "trap mode not supported for VAX floats");
523 alpha_fptm = ALPHA_FPTM_SU;
525 if (target_flags_explicit & MASK_LONG_DOUBLE_128)
526 warning (0, "128-bit %<long double%> not supported for VAX floats");
527 target_flags &= ~MASK_LONG_DOUBLE_128;
531 char *end;
532 int lat;
534 if (!alpha_mlat_string)
535 alpha_mlat_string = "L1";
537 if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
538 && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
540 else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
541 && ISDIGIT ((unsigned char)alpha_mlat_string[1])
542 && alpha_mlat_string[2] == '\0')
544 static int const cache_latency[][4] =
546 { 3, 30, -1 }, /* ev4 -- Bcache is a guess */
547 { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */
548 { 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */
551 lat = alpha_mlat_string[1] - '0';
552 if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1)
554 warning (0, "L%d cache latency unknown for %s",
555 lat, alpha_cpu_name[alpha_tune]);
556 lat = 3;
558 else
559 lat = cache_latency[alpha_tune][lat-1];
561 else if (! strcmp (alpha_mlat_string, "main"))
563 /* Most current memories have about 370ns latency. This is
564 a reasonable guess for a fast cpu. */
565 lat = 150;
567 else
569 warning (0, "bad value %qs for %<-mmemory-latency%>",
570 alpha_mlat_string);
571 lat = 3;
574 alpha_memory_latency = lat;
577 /* Default the definition of "small data" to 8 bytes. */
578 if (!OPTION_SET_P (g_switch_value))
579 g_switch_value = 8;
581 /* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */
582 if (flag_pic == 1)
583 target_flags |= MASK_SMALL_DATA;
584 else if (flag_pic == 2)
585 target_flags &= ~MASK_SMALL_DATA;
587 alpha_override_options_after_change ();
589 /* Register variables and functions with the garbage collector. */
591 /* Set up function hooks. */
592 init_machine_status = alpha_init_machine_status;
594 /* Tell the compiler when we're using VAX floating point. */
595 if (TARGET_FLOAT_VAX)
597 REAL_MODE_FORMAT (SFmode) = &vax_f_format;
598 REAL_MODE_FORMAT (DFmode) = &vax_g_format;
599 REAL_MODE_FORMAT (TFmode) = NULL;
602 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
603 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
604 target_flags |= MASK_LONG_DOUBLE_128;
605 #endif
609 /* Implement targetm.override_options_after_change. */
611 static void
612 alpha_override_options_after_change (void)
614 /* Align labels and loops for optimal branching. */
615 /* ??? Kludge these by not doing anything if we don't optimize. */
616 if (optimize > 0)
618 if (flag_align_loops && !str_align_loops)
619 str_align_loops = "16";
620 if (flag_align_jumps && !str_align_jumps)
621 str_align_jumps = "16";
623 if (flag_align_functions && !str_align_functions)
624 str_align_functions = "16";
627 /* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */
630 zap_mask (HOST_WIDE_INT value)
632 int i;
634 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
635 i++, value >>= 8)
636 if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
637 return 0;
639 return 1;
642 /* Return true if OP is valid for a particular TLS relocation.
643 We are already guaranteed that OP is a CONST. */
646 tls_symbolic_operand_1 (rtx op, int size, int unspec)
648 op = XEXP (op, 0);
650 if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
651 return 0;
652 op = XVECEXP (op, 0, 0);
654 if (GET_CODE (op) != SYMBOL_REF)
655 return 0;
657 switch (SYMBOL_REF_TLS_MODEL (op))
659 case TLS_MODEL_LOCAL_DYNAMIC:
660 return unspec == UNSPEC_DTPREL && size == alpha_tls_size;
661 case TLS_MODEL_INITIAL_EXEC:
662 return unspec == UNSPEC_TPREL && size == 64;
663 case TLS_MODEL_LOCAL_EXEC:
664 return unspec == UNSPEC_TPREL && size == alpha_tls_size;
665 default:
666 gcc_unreachable ();
670 /* Used by aligned_memory_operand and unaligned_memory_operand to
671 resolve what reload is going to do with OP if it's a register. */
674 resolve_reload_operand (rtx op)
676 if (reload_in_progress)
678 rtx tmp = op;
679 if (SUBREG_P (tmp))
680 tmp = SUBREG_REG (tmp);
681 if (REG_P (tmp)
682 && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
684 op = reg_equiv_memory_loc (REGNO (tmp));
685 if (op == 0)
686 return 0;
689 return op;
692 /* The scalar modes supported differs from the default check-what-c-supports
693 version in that sometimes TFmode is available even when long double
694 indicates only DFmode. */
696 static bool
697 alpha_scalar_mode_supported_p (scalar_mode mode)
699 switch (mode)
701 case E_QImode:
702 case E_HImode:
703 case E_SImode:
704 case E_DImode:
705 case E_TImode: /* via optabs.cc */
706 return true;
708 case E_SFmode:
709 case E_DFmode:
710 return true;
712 case E_TFmode:
713 return TARGET_HAS_XFLOATING_LIBS;
715 default:
716 return false;
720 /* Alpha implements a couple of integer vector mode operations when
721 TARGET_MAX is enabled. We do not check TARGET_MAX here, however,
722 which allows the vectorizer to operate on e.g. move instructions,
723 or when expand_vector_operations can do something useful. */
725 static bool
726 alpha_vector_mode_supported_p (machine_mode mode)
728 return mode == V8QImode || mode == V4HImode || mode == V2SImode;
731 /* Return the TLS model to use for SYMBOL. */
733 static enum tls_model
734 tls_symbolic_operand_type (rtx symbol)
736 enum tls_model model;
738 if (GET_CODE (symbol) != SYMBOL_REF)
739 return TLS_MODEL_NONE;
740 model = SYMBOL_REF_TLS_MODEL (symbol);
742 /* Local-exec with a 64-bit size is the same code as initial-exec. */
743 if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64)
744 model = TLS_MODEL_INITIAL_EXEC;
746 return model;
749 /* Return true if the function DECL will share the same GP as any
750 function in the current unit of translation. */
752 static bool
753 decl_has_samegp (const_tree decl)
755 /* Functions that are not local can be overridden, and thus may
756 not share the same gp. */
757 if (!(*targetm.binds_local_p) (decl))
758 return false;
760 /* If -msmall-data is in effect, assume that there is only one GP
761 for the module, and so any local symbol has this property. We
762 need explicit relocations to be able to enforce this for symbols
763 not defined in this unit of translation, however. */
764 if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
765 return true;
767 /* Functions that are not external are defined in this UoT. */
768 /* ??? Irritatingly, static functions not yet emitted are still
769 marked "external". Apply this to non-static functions only. */
770 return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
773 /* Return true if EXP should be placed in the small data section. */
775 static bool
776 alpha_in_small_data_p (const_tree exp)
778 /* We want to merge strings, so we never consider them small data. */
779 if (TREE_CODE (exp) == STRING_CST)
780 return false;
782 /* Functions are never in the small data area. Duh. */
783 if (TREE_CODE (exp) == FUNCTION_DECL)
784 return false;
786 /* COMMON symbols are never small data. */
787 if (VAR_P (exp) && DECL_COMMON (exp))
788 return false;
790 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
792 const char *section = DECL_SECTION_NAME (exp);
793 if (strcmp (section, ".sdata") == 0
794 || strcmp (section, ".sbss") == 0)
795 return true;
797 else
799 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
801 /* If this is an incomplete type with size 0, then we can't put it
802 in sdata because it might be too big when completed. */
803 if (size > 0 && size <= g_switch_value)
804 return true;
807 return false;
810 #if TARGET_ABI_OPEN_VMS
811 static bool
812 vms_valid_pointer_mode (scalar_int_mode mode)
814 return (mode == SImode || mode == DImode);
817 static bool
818 alpha_linkage_symbol_p (const char *symname)
820 int symlen = strlen (symname);
822 if (symlen > 4)
823 return strcmp (&symname [symlen - 4], "..lk") == 0;
825 return false;
828 #define LINKAGE_SYMBOL_REF_P(X) \
829 ((GET_CODE (X) == SYMBOL_REF \
830 && alpha_linkage_symbol_p (XSTR (X, 0))) \
831 || (GET_CODE (X) == CONST \
832 && GET_CODE (XEXP (X, 0)) == PLUS \
833 && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \
834 && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0))))
835 #endif
837 /* legitimate_address_p recognizes an RTL expression that is a valid
838 memory address for an instruction. The MODE argument is the
839 machine mode for the MEM expression that wants to use this address.
841 For Alpha, we have either a constant address or the sum of a
842 register and a constant address, or just a register. For DImode,
843 any of those forms can be surrounded with an AND that clear the
844 low-order three bits; this is an "unaligned" access. */
846 static bool
847 alpha_legitimate_address_p (machine_mode mode, rtx x, bool strict,
848 code_helper = ERROR_MARK)
850 /* If this is an ldq_u type address, discard the outer AND. */
851 if (mode == DImode
852 && GET_CODE (x) == AND
853 && CONST_INT_P (XEXP (x, 1))
854 && INTVAL (XEXP (x, 1)) == -8)
855 x = XEXP (x, 0);
857 /* Discard non-paradoxical subregs. */
858 if (SUBREG_P (x)
859 && (GET_MODE_SIZE (GET_MODE (x))
860 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
861 x = SUBREG_REG (x);
863 /* Unadorned general registers are valid. */
864 if (REG_P (x)
865 && (strict
866 ? STRICT_REG_OK_FOR_BASE_P (x)
867 : NONSTRICT_REG_OK_FOR_BASE_P (x)))
868 return true;
870 /* Constant addresses (i.e. +/- 32k) are valid. */
871 if (CONSTANT_ADDRESS_P (x))
872 return true;
874 #if TARGET_ABI_OPEN_VMS
875 if (LINKAGE_SYMBOL_REF_P (x))
876 return true;
877 #endif
879 /* Register plus a small constant offset is valid. */
880 if (GET_CODE (x) == PLUS)
882 rtx ofs = XEXP (x, 1);
883 x = XEXP (x, 0);
885 /* Discard non-paradoxical subregs. */
886 if (SUBREG_P (x)
887 && (GET_MODE_SIZE (GET_MODE (x))
888 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
889 x = SUBREG_REG (x);
891 if (REG_P (x))
893 if (! strict
894 && NONSTRICT_REG_OK_FP_BASE_P (x)
895 && CONST_INT_P (ofs))
896 return true;
897 if ((strict
898 ? STRICT_REG_OK_FOR_BASE_P (x)
899 : NONSTRICT_REG_OK_FOR_BASE_P (x))
900 && CONSTANT_ADDRESS_P (ofs))
901 return true;
905 /* If we're managing explicit relocations, LO_SUM is valid, as are small
906 data symbols. Avoid explicit relocations of modes larger than word
907 mode since i.e. $LC0+8($1) can fold around +/- 32k offset. */
908 else if (TARGET_EXPLICIT_RELOCS
909 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
911 if (small_symbolic_operand (x, Pmode))
912 return true;
914 if (GET_CODE (x) == LO_SUM)
916 rtx ofs = XEXP (x, 1);
917 x = XEXP (x, 0);
919 /* Discard non-paradoxical subregs. */
920 if (SUBREG_P (x)
921 && (GET_MODE_SIZE (GET_MODE (x))
922 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
923 x = SUBREG_REG (x);
925 /* Must have a valid base register. */
926 if (! (REG_P (x)
927 && (strict
928 ? STRICT_REG_OK_FOR_BASE_P (x)
929 : NONSTRICT_REG_OK_FOR_BASE_P (x))))
930 return false;
932 /* The symbol must be local. */
933 if (local_symbolic_operand (ofs, Pmode)
934 || dtp32_symbolic_operand (ofs, Pmode)
935 || tp32_symbolic_operand (ofs, Pmode))
936 return true;
940 return false;
943 /* Build the SYMBOL_REF for __tls_get_addr. */
945 static GTY(()) rtx tls_get_addr_libfunc;
947 static rtx
948 get_tls_get_addr (void)
950 if (!tls_get_addr_libfunc)
951 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
952 return tls_get_addr_libfunc;
955 /* Try machine-dependent ways of modifying an illegitimate address
956 to be legitimate. If we find one, return the new, valid address. */
958 static rtx
959 alpha_legitimize_address_1 (rtx x, rtx scratch, machine_mode mode)
961 HOST_WIDE_INT addend;
963 /* If the address is (plus reg const_int) and the CONST_INT is not a
964 valid offset, compute the high part of the constant and add it to
965 the register. Then our address is (plus temp low-part-const). */
966 if (GET_CODE (x) == PLUS
967 && REG_P (XEXP (x, 0))
968 && CONST_INT_P (XEXP (x, 1))
969 && ! CONSTANT_ADDRESS_P (XEXP (x, 1)))
971 addend = INTVAL (XEXP (x, 1));
972 x = XEXP (x, 0);
973 goto split_addend;
976 /* If the address is (const (plus FOO const_int)), find the low-order
977 part of the CONST_INT. Then load FOO plus any high-order part of the
978 CONST_INT into a register. Our address is (plus reg low-part-const).
979 This is done to reduce the number of GOT entries. */
980 if (can_create_pseudo_p ()
981 && GET_CODE (x) == CONST
982 && GET_CODE (XEXP (x, 0)) == PLUS
983 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
985 addend = INTVAL (XEXP (XEXP (x, 0), 1));
986 x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
987 goto split_addend;
990 /* If we have a (plus reg const), emit the load as in (2), then add
991 the two registers, and finally generate (plus reg low-part-const) as
992 our address. */
993 if (can_create_pseudo_p ()
994 && GET_CODE (x) == PLUS
995 && REG_P (XEXP (x, 0))
996 && GET_CODE (XEXP (x, 1)) == CONST
997 && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
998 && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1)))
1000 addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
1001 x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
1002 XEXP (XEXP (XEXP (x, 1), 0), 0),
1003 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1004 goto split_addend;
1007 /* If this is a local symbol, split the address into HIGH/LO_SUM parts.
1008 Avoid modes larger than word mode since i.e. $LC0+8($1) can fold
1009 around +/- 32k offset. */
1010 if (TARGET_EXPLICIT_RELOCS
1011 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
1012 && symbolic_operand (x, Pmode))
1014 rtx r0, r16, eqv, tga, tp, dest, seq;
1015 rtx_insn *insn;
1017 switch (tls_symbolic_operand_type (x))
1019 case TLS_MODEL_NONE:
1020 break;
1022 case TLS_MODEL_GLOBAL_DYNAMIC:
1024 start_sequence ();
1026 r0 = gen_rtx_REG (Pmode, 0);
1027 r16 = gen_rtx_REG (Pmode, 16);
1028 tga = get_tls_get_addr ();
1029 dest = gen_reg_rtx (Pmode);
1030 seq = GEN_INT (alpha_next_sequence_number++);
1032 emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
1033 rtx val = gen_call_value_osf_tlsgd (r0, tga, seq);
1034 insn = emit_call_insn (val);
1035 RTL_CONST_CALL_P (insn) = 1;
1036 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1038 insn = get_insns ();
1039 end_sequence ();
1041 emit_libcall_block (insn, dest, r0, x);
1042 return dest;
1045 case TLS_MODEL_LOCAL_DYNAMIC:
1047 start_sequence ();
1049 r0 = gen_rtx_REG (Pmode, 0);
1050 r16 = gen_rtx_REG (Pmode, 16);
1051 tga = get_tls_get_addr ();
1052 scratch = gen_reg_rtx (Pmode);
1053 seq = GEN_INT (alpha_next_sequence_number++);
1055 emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
1056 rtx val = gen_call_value_osf_tlsldm (r0, tga, seq);
1057 insn = emit_call_insn (val);
1058 RTL_CONST_CALL_P (insn) = 1;
1059 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1061 insn = get_insns ();
1062 end_sequence ();
1064 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1065 UNSPEC_TLSLDM_CALL);
1066 emit_libcall_block (insn, scratch, r0, eqv);
1068 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
1069 eqv = gen_rtx_CONST (Pmode, eqv);
1071 if (alpha_tls_size == 64)
1073 dest = gen_reg_rtx (Pmode);
1074 emit_insn (gen_rtx_SET (dest, eqv));
1075 emit_insn (gen_adddi3 (dest, dest, scratch));
1076 return dest;
1078 if (alpha_tls_size == 32)
1080 rtx temp = gen_rtx_HIGH (Pmode, eqv);
1081 temp = gen_rtx_PLUS (Pmode, scratch, temp);
1082 scratch = gen_reg_rtx (Pmode);
1083 emit_insn (gen_rtx_SET (scratch, temp));
1085 return gen_rtx_LO_SUM (Pmode, scratch, eqv);
1088 case TLS_MODEL_INITIAL_EXEC:
1089 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1090 eqv = gen_rtx_CONST (Pmode, eqv);
1091 tp = gen_reg_rtx (Pmode);
1092 scratch = gen_reg_rtx (Pmode);
1093 dest = gen_reg_rtx (Pmode);
1095 emit_insn (gen_get_thread_pointerdi (tp));
1096 emit_insn (gen_rtx_SET (scratch, eqv));
1097 emit_insn (gen_adddi3 (dest, tp, scratch));
1098 return dest;
1100 case TLS_MODEL_LOCAL_EXEC:
1101 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1102 eqv = gen_rtx_CONST (Pmode, eqv);
1103 tp = gen_reg_rtx (Pmode);
1105 emit_insn (gen_get_thread_pointerdi (tp));
1106 if (alpha_tls_size == 32)
1108 rtx temp = gen_rtx_HIGH (Pmode, eqv);
1109 temp = gen_rtx_PLUS (Pmode, tp, temp);
1110 tp = gen_reg_rtx (Pmode);
1111 emit_insn (gen_rtx_SET (tp, temp));
1113 return gen_rtx_LO_SUM (Pmode, tp, eqv);
1115 default:
1116 gcc_unreachable ();
1119 if (local_symbolic_operand (x, Pmode))
1121 if (small_symbolic_operand (x, Pmode))
1122 return x;
1123 else
1125 if (can_create_pseudo_p ())
1126 scratch = gen_reg_rtx (Pmode);
1127 emit_insn (gen_rtx_SET (scratch, gen_rtx_HIGH (Pmode, x)));
1128 return gen_rtx_LO_SUM (Pmode, scratch, x);
1133 return NULL;
1135 split_addend:
1137 HOST_WIDE_INT low, high;
1139 low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
1140 addend -= low;
1141 high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
1142 addend -= high;
1144 if (addend)
1145 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
1146 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1147 1, OPTAB_LIB_WIDEN);
1148 if (high)
1149 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
1150 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1151 1, OPTAB_LIB_WIDEN);
1153 return plus_constant (Pmode, x, low);
1158 /* Try machine-dependent ways of modifying an illegitimate address
1159 to be legitimate. Return X or the new, valid address. */
1161 static rtx
1162 alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1163 machine_mode mode)
1165 rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode);
1166 return new_x ? new_x : x;
1169 /* Return true if ADDR has an effect that depends on the machine mode it
1170 is used for. On the Alpha this is true only for the unaligned modes.
1171 We can simplify the test since we know that the address must be valid. */
1173 static bool
1174 alpha_mode_dependent_address_p (const_rtx addr,
1175 addr_space_t as ATTRIBUTE_UNUSED)
1177 return GET_CODE (addr) == AND;
1180 /* Primarily this is required for TLS symbols, but given that our move
1181 patterns *ought* to be able to handle any symbol at any time, we
1182 should never be spilling symbolic operands to the constant pool, ever. */
1184 static bool
1185 alpha_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1187 enum rtx_code code = GET_CODE (x);
1188 return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
1191 /* We do not allow indirect calls to be optimized into sibling calls, nor
1192 can we allow a call to a function with a different GP to be optimized
1193 into a sibcall. */
1195 static bool
1196 alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1198 /* Can't do indirect tail calls, since we don't know if the target
1199 uses the same GP. */
1200 if (!decl)
1201 return false;
1203 /* Otherwise, we can make a tail call if the target function shares
1204 the same GP. */
1205 return decl_has_samegp (decl);
1208 bool
1209 some_small_symbolic_operand_int (rtx x)
1211 subrtx_var_iterator::array_type array;
1212 FOR_EACH_SUBRTX_VAR (iter, array, x, ALL)
1214 rtx x = *iter;
1215 /* Don't re-split. */
1216 if (GET_CODE (x) == LO_SUM)
1217 iter.skip_subrtxes ();
1218 else if (small_symbolic_operand (x, Pmode))
1219 return true;
1221 return false;
1225 split_small_symbolic_operand (rtx x)
1227 x = copy_insn (x);
1228 subrtx_ptr_iterator::array_type array;
1229 FOR_EACH_SUBRTX_PTR (iter, array, &x, ALL)
1231 rtx *ptr = *iter;
1232 rtx x = *ptr;
1233 /* Don't re-split. */
1234 if (GET_CODE (x) == LO_SUM)
1235 iter.skip_subrtxes ();
1236 else if (small_symbolic_operand (x, Pmode))
1238 *ptr = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
1239 iter.skip_subrtxes ();
1242 return x;
1245 /* Indicate that INSN cannot be duplicated. This is true for any insn
1246 that we've marked with gpdisp relocs, since those have to stay in
1247 1-1 correspondence with one another.
1249 Technically we could copy them if we could set up a mapping from one
1250 sequence number to another, across the set of insns to be duplicated.
1251 This seems overly complicated and error-prone since interblock motion
1252 from sched-ebb could move one of the pair of insns to a different block.
1254 Also cannot allow jsr insns to be duplicated. If they throw exceptions,
1255 then they'll be in a different block from their ldgp. Which could lead
1256 the bb reorder code to think that it would be ok to copy just the block
1257 containing the call and branch to the block containing the ldgp. */
1259 static bool
1260 alpha_cannot_copy_insn_p (rtx_insn *insn)
1262 if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
1263 return false;
1264 if (recog_memoized (insn) >= 0)
1265 return get_attr_cannot_copy (insn);
1266 else
1267 return false;
1271 /* Try a machine-dependent way of reloading an illegitimate address
1272 operand. If we find one, push the reload and return the new rtx. */
1275 alpha_legitimize_reload_address (rtx x,
1276 machine_mode mode ATTRIBUTE_UNUSED,
1277 int opnum, int type,
1278 int ind_levels ATTRIBUTE_UNUSED)
1280 /* We must recognize output that we have already generated ourselves. */
1281 if (GET_CODE (x) == PLUS
1282 && GET_CODE (XEXP (x, 0)) == PLUS
1283 && REG_P (XEXP (XEXP (x, 0), 0))
1284 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
1285 && CONST_INT_P (XEXP (x, 1)))
1287 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1288 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1289 opnum, (enum reload_type) type);
1290 return x;
1293 /* We wish to handle large displacements off a base register by
1294 splitting the addend across an ldah and the mem insn. This
1295 cuts number of extra insns needed from 3 to 1. */
1296 if (GET_CODE (x) == PLUS
1297 && REG_P (XEXP (x, 0))
1298 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
1299 && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0)))
1300 && CONST_INT_P (XEXP (x, 1)))
1302 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
1303 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
1304 HOST_WIDE_INT high
1305 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
1307 /* Check for 32-bit overflow. */
1308 if (high + low != val)
1309 return NULL_RTX;
1311 /* Reload the high part into a base reg; leave the low part
1312 in the mem directly. */
1313 x = gen_rtx_PLUS (GET_MODE (x),
1314 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
1315 GEN_INT (high)),
1316 GEN_INT (low));
1318 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1319 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1320 opnum, (enum reload_type) type);
1321 return x;
1324 return NULL_RTX;
1327 /* Return the cost of moving between registers of various classes. Moving
1328 between FLOAT_REGS and anything else except float regs is expensive.
1329 In fact, we make it quite expensive because we really don't want to
1330 do these moves unless it is clearly worth it. Optimizations may
1331 reduce the impact of not being able to allocate a pseudo to a
1332 hard register. */
1334 static int
1335 alpha_register_move_cost (machine_mode /*mode*/,
1336 reg_class_t from, reg_class_t to)
1338 if ((from == FLOAT_REGS) == (to == FLOAT_REGS))
1339 return 2;
1341 if (TARGET_FIX)
1342 return (from == FLOAT_REGS) ? 6 : 8;
1344 return 4 + 2 * alpha_memory_latency;
1347 /* Return the cost of moving data of MODE from a register to
1348 or from memory. On the Alpha, bump this up a bit. */
1350 static int
1351 alpha_memory_move_cost (machine_mode /*mode*/, reg_class_t /*regclass*/,
1352 bool /*in*/)
1354 return 2 * alpha_memory_latency;
1357 /* Compute a (partial) cost for rtx X. Return true if the complete
1358 cost has been computed, and false if subexpressions should be
1359 scanned. In either case, *TOTAL contains the cost result. */
1361 static bool
1362 alpha_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno, int *total,
1363 bool speed)
1365 int code = GET_CODE (x);
1366 bool float_mode_p = FLOAT_MODE_P (mode);
1367 const struct alpha_rtx_cost_data *cost_data;
1369 if (!speed)
1370 cost_data = &alpha_rtx_cost_size;
1371 else
1372 cost_data = &alpha_rtx_cost_data[alpha_tune];
1374 switch (code)
1376 case CONST_INT:
1377 /* If this is an 8-bit constant, return zero since it can be used
1378 nearly anywhere with no cost. If it is a valid operand for an
1379 ADD or AND, likewise return 0 if we know it will be used in that
1380 context. Otherwise, return 2 since it might be used there later.
1381 All other constants take at least two insns. */
1382 if (INTVAL (x) >= 0 && INTVAL (x) < 256)
1384 *total = 0;
1385 return true;
1387 /* FALLTHRU */
1389 case CONST_DOUBLE:
1390 case CONST_WIDE_INT:
1391 if (x == CONST0_RTX (mode))
1392 *total = 0;
1393 else if ((outer_code == PLUS && add_operand (x, VOIDmode))
1394 || (outer_code == AND && and_operand (x, VOIDmode)))
1395 *total = 0;
1396 else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
1397 *total = 2;
1398 else
1399 *total = COSTS_N_INSNS (2);
1400 return true;
1402 case CONST:
1403 case SYMBOL_REF:
1404 case LABEL_REF:
1405 if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
1406 *total = COSTS_N_INSNS (outer_code != MEM);
1407 else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
1408 *total = COSTS_N_INSNS (1 + (outer_code != MEM));
1409 else if (tls_symbolic_operand_type (x))
1410 /* Estimate of cost for call_pal rduniq. */
1411 /* ??? How many insns do we emit here? More than one... */
1412 *total = COSTS_N_INSNS (15);
1413 else
1414 /* Otherwise we do a load from the GOT. */
1415 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1416 return true;
1418 case HIGH:
1419 /* This is effectively an add_operand. */
1420 *total = 2;
1421 return true;
1423 case PLUS:
1424 case MINUS:
1425 if (float_mode_p)
1426 *total = cost_data->fp_add;
1427 else if (GET_CODE (XEXP (x, 0)) == ASHIFT
1428 && const23_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
1430 *total = (rtx_cost (XEXP (XEXP (x, 0), 0), mode,
1431 (enum rtx_code) outer_code, opno, speed)
1432 + rtx_cost (XEXP (x, 1), mode,
1433 (enum rtx_code) outer_code, opno, speed)
1434 + COSTS_N_INSNS (1));
1435 return true;
1437 return false;
1439 case MULT:
1440 if (float_mode_p)
1441 *total = cost_data->fp_mult;
1442 else if (mode == DImode)
1443 *total = cost_data->int_mult_di;
1444 else
1445 *total = cost_data->int_mult_si;
1446 return false;
1448 case ASHIFT:
1449 if (CONST_INT_P (XEXP (x, 1))
1450 && INTVAL (XEXP (x, 1)) <= 3)
1452 *total = COSTS_N_INSNS (1);
1453 return false;
1455 /* FALLTHRU */
1457 case ASHIFTRT:
1458 case LSHIFTRT:
1459 *total = cost_data->int_shift;
1460 return false;
1462 case IF_THEN_ELSE:
1463 if (float_mode_p)
1464 *total = cost_data->fp_add;
1465 else
1466 *total = cost_data->int_cmov;
1467 return false;
1469 case DIV:
1470 case UDIV:
1471 case MOD:
1472 case UMOD:
1473 if (!float_mode_p)
1474 *total = cost_data->int_div;
1475 else if (mode == SFmode)
1476 *total = cost_data->fp_div_sf;
1477 else
1478 *total = cost_data->fp_div_df;
1479 return false;
1481 case MEM:
1482 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1483 return true;
1485 case NEG:
1486 if (! float_mode_p)
1488 *total = COSTS_N_INSNS (1);
1489 return false;
1491 /* FALLTHRU */
1493 case ABS:
1494 if (! float_mode_p)
1496 *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
1497 return false;
1499 /* FALLTHRU */
1501 case FLOAT:
1502 case UNSIGNED_FLOAT:
1503 case FIX:
1504 case UNSIGNED_FIX:
1505 case FLOAT_TRUNCATE:
1506 *total = cost_data->fp_add;
1507 return false;
1509 case FLOAT_EXTEND:
1510 if (MEM_P (XEXP (x, 0)))
1511 *total = 0;
1512 else
1513 *total = cost_data->fp_add;
1514 return false;
1516 default:
1517 return false;
1521 /* REF is an alignable memory location. Place an aligned SImode
1522 reference into *PALIGNED_MEM and the number of bits to shift into
1523 *PBITNUM. SCRATCH is a free register for use in reloading out
1524 of range stack slots. */
1526 void
1527 get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
1529 rtx base;
1530 HOST_WIDE_INT disp, offset;
1532 gcc_assert (MEM_P (ref));
1534 if (reload_in_progress)
1536 base = find_replacement (&XEXP (ref, 0));
1537 gcc_assert (memory_address_p (GET_MODE (ref), base));
1539 else
1540 base = XEXP (ref, 0);
1542 if (GET_CODE (base) == PLUS)
1543 disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1544 else
1545 disp = 0;
1547 /* Find the byte offset within an aligned word. If the memory itself is
1548 claimed to be aligned, believe it. Otherwise, aligned_memory_operand
1549 will have examined the base register and determined it is aligned, and
1550 thus displacements from it are naturally alignable. */
1551 if (MEM_ALIGN (ref) >= 32)
1552 offset = 0;
1553 else
1554 offset = disp & 3;
1556 /* The location should not cross aligned word boundary. */
1557 gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref))
1558 <= GET_MODE_SIZE (SImode));
1560 /* Access the entire aligned word. */
1561 *paligned_mem = widen_memory_access (ref, SImode, -offset);
1563 /* Convert the byte offset within the word to a bit offset. */
1564 offset *= BITS_PER_UNIT;
1565 *pbitnum = GEN_INT (offset);
1568 /* Similar, but just get the address. Handle the two reload cases.
1569 Add EXTRA_OFFSET to the address we return. */
1572 get_unaligned_address (rtx ref)
1574 rtx base;
1575 HOST_WIDE_INT offset = 0;
1577 gcc_assert (MEM_P (ref));
1579 if (reload_in_progress)
1581 base = find_replacement (&XEXP (ref, 0));
1582 gcc_assert (memory_address_p (GET_MODE (ref), base));
1584 else
1585 base = XEXP (ref, 0);
1587 if (GET_CODE (base) == PLUS)
1588 offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1590 return plus_constant (Pmode, base, offset);
1593 /* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
1594 X is always returned in a register. */
1597 get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
1599 if (GET_CODE (addr) == PLUS)
1601 ofs += INTVAL (XEXP (addr, 1));
1602 addr = XEXP (addr, 0);
1605 return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7),
1606 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1609 /* On the Alpha, all (non-symbolic) constants except zero go into
1610 a floating-point register via memory. Note that we cannot
1611 return anything that is not a subset of RCLASS, and that some
1612 symbolic constants cannot be dropped to memory. */
1614 enum reg_class
1615 alpha_preferred_reload_class(rtx x, enum reg_class rclass)
1617 /* Zero is present in any register class. */
1618 if (x == CONST0_RTX (GET_MODE (x)))
1619 return rclass;
1621 /* These sorts of constants we can easily drop to memory. */
1622 if (CONST_SCALAR_INT_P (x)
1623 || CONST_DOUBLE_P (x)
1624 || GET_CODE (x) == CONST_VECTOR)
1626 if (rclass == FLOAT_REGS)
1627 return NO_REGS;
1628 if (rclass == ALL_REGS)
1629 return GENERAL_REGS;
1630 return rclass;
1633 /* All other kinds of constants should not (and in the case of HIGH
1634 cannot) be dropped to memory -- instead we use a GENERAL_REGS
1635 secondary reload. */
1636 if (CONSTANT_P (x))
1637 return (rclass == ALL_REGS ? GENERAL_REGS : rclass);
1639 return rclass;
1642 /* Inform reload about cases where moving X with a mode MODE to a register in
1643 RCLASS requires an extra scratch or immediate register. Return the class
1644 needed for the immediate register. */
1646 static reg_class_t
1647 alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
1648 machine_mode mode, secondary_reload_info *sri)
1650 enum reg_class rclass = (enum reg_class) rclass_i;
1652 /* Loading and storing HImode or QImode values to and from memory
1653 usually requires a scratch register. */
1654 if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode))
1656 if (any_memory_operand (x, mode))
1658 if (in_p)
1660 if (!aligned_memory_operand (x, mode))
1661 sri->icode = direct_optab_handler (reload_in_optab, mode);
1663 else
1664 sri->icode = direct_optab_handler (reload_out_optab, mode);
1665 return NO_REGS;
1669 /* We also cannot do integral arithmetic into FP regs, as might result
1670 from register elimination into a DImode fp register. */
1671 if (rclass == FLOAT_REGS)
1673 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
1674 return GENERAL_REGS;
1675 if (in_p && INTEGRAL_MODE_P (mode)
1676 && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x))
1677 return GENERAL_REGS;
1680 return NO_REGS;
1683 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
1685 If we are copying between general and FP registers, we need a memory
1686 location unless the FIX extension is available. */
1688 static bool
1689 alpha_secondary_memory_needed (machine_mode, reg_class_t class1,
1690 reg_class_t class2)
1692 return (!TARGET_FIX
1693 && ((class1 == FLOAT_REGS && class2 != FLOAT_REGS)
1694 || (class2 == FLOAT_REGS && class1 != FLOAT_REGS)));
1697 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. If MODE is
1698 floating-point, use it. Otherwise, widen to a word like the default.
1699 This is needed because we always store integers in FP registers in
1700 quadword format. This whole area is very tricky! */
1702 static machine_mode
1703 alpha_secondary_memory_needed_mode (machine_mode mode)
1705 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1706 return mode;
1707 if (GET_MODE_SIZE (mode) >= 4)
1708 return mode;
1709 return mode_for_size (BITS_PER_WORD, GET_MODE_CLASS (mode), 0).require ();
1712 /* Given SEQ, which is an INSN list, look for any MEMs in either
1713 a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
1714 volatile flags from REF into each of the MEMs found. If REF is not
1715 a MEM, don't do anything. */
1717 void
1718 alpha_set_memflags (rtx seq, rtx ref)
1720 rtx_insn *insn;
1722 if (!MEM_P (ref))
1723 return;
1725 /* This is only called from alpha.md, after having had something
1726 generated from one of the insn patterns. So if everything is
1727 zero, the pattern is already up-to-date. */
1728 if (!MEM_VOLATILE_P (ref)
1729 && !MEM_NOTRAP_P (ref)
1730 && !MEM_READONLY_P (ref))
1731 return;
1733 subrtx_var_iterator::array_type array;
1734 for (insn = as_a <rtx_insn *> (seq); insn; insn = NEXT_INSN (insn))
1735 if (INSN_P (insn))
1736 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST)
1738 rtx x = *iter;
1739 if (MEM_P (x))
1741 MEM_VOLATILE_P (x) = MEM_VOLATILE_P (ref);
1742 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (ref);
1743 MEM_READONLY_P (x) = MEM_READONLY_P (ref);
1744 /* Sadly, we cannot use alias sets because the extra
1745 aliasing produced by the AND interferes. Given that
1746 two-byte quantities are the only thing we would be
1747 able to differentiate anyway, there does not seem to
1748 be any point in convoluting the early out of the
1749 alias check. */
1750 iter.skip_subrtxes ();
1753 else
1754 gcc_unreachable ();
1757 static rtx alpha_emit_set_const (rtx, machine_mode, HOST_WIDE_INT,
1758 int, bool);
1760 /* Internal routine for alpha_emit_set_const to check for N or below insns.
1761 If NO_OUTPUT is true, then we only check to see if N insns are possible,
1762 and return pc_rtx if successful. */
1764 static rtx
1765 alpha_emit_set_const_1 (rtx target, machine_mode mode,
1766 HOST_WIDE_INT c, int n, bool no_output)
1768 HOST_WIDE_INT new_const;
1769 int i, bits;
1770 /* Use a pseudo if highly optimizing and still generating RTL. */
1771 rtx subtarget
1772 = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target);
1773 rtx temp, insn;
1775 /* If this is a sign-extended 32-bit constant, we can do this in at most
1776 three insns, so do it if we have enough insns left. */
1778 if (c >> 31 == -1 || c >> 31 == 0)
1780 HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
1781 HOST_WIDE_INT tmp1 = c - low;
1782 HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
1783 HOST_WIDE_INT extra = 0;
1785 /* If HIGH will be interpreted as negative but the constant is
1786 positive, we must adjust it to do two ldha insns. */
1788 if ((high & 0x8000) != 0 && c >= 0)
1790 extra = 0x4000;
1791 tmp1 -= 0x40000000;
1792 high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
1795 if (c == low || (low == 0 && extra == 0))
1797 /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
1798 but that meant that we can't handle INT_MIN on 32-bit machines
1799 (like NT/Alpha), because we recurse indefinitely through
1800 emit_move_insn to gen_movdi. So instead, since we know exactly
1801 what we want, create it explicitly. */
1803 if (no_output)
1804 return pc_rtx;
1805 if (target == NULL)
1806 target = gen_reg_rtx (mode);
1807 emit_insn (gen_rtx_SET (target, GEN_INT (c)));
1808 return target;
1810 else if (n >= 2 + (extra != 0))
1812 if (no_output)
1813 return pc_rtx;
1814 if (!can_create_pseudo_p ())
1816 emit_insn (gen_rtx_SET (target, GEN_INT (high << 16)));
1817 temp = target;
1819 else
1820 temp = copy_to_suggested_reg (GEN_INT (high << 16),
1821 subtarget, mode);
1823 /* As of 2002-02-23, addsi3 is only available when not optimizing.
1824 This means that if we go through expand_binop, we'll try to
1825 generate extensions, etc, which will require new pseudos, which
1826 will fail during some split phases. The SImode add patterns
1827 still exist, but are not named. So build the insns by hand. */
1829 if (extra != 0)
1831 if (! subtarget)
1832 subtarget = gen_reg_rtx (mode);
1833 insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
1834 insn = gen_rtx_SET (subtarget, insn);
1835 emit_insn (insn);
1836 temp = subtarget;
1839 if (target == NULL)
1840 target = gen_reg_rtx (mode);
1841 insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1842 insn = gen_rtx_SET (target, insn);
1843 emit_insn (insn);
1844 return target;
1848 /* If we couldn't do it that way, try some other methods. But if we have
1849 no instructions left, don't bother. Likewise, if this is SImode and
1850 we can't make pseudos, we can't do anything since the expand_binop
1851 and expand_unop calls will widen and try to make pseudos. */
1853 if (n == 1 || (mode == SImode && !can_create_pseudo_p ()))
1854 return 0;
1856 /* Next, see if we can load a related constant and then shift and possibly
1857 negate it to get the constant we want. Try this once each increasing
1858 numbers of insns. */
1860 for (i = 1; i < n; i++)
1862 /* First, see if minus some low bits, we've an easy load of
1863 high bits. */
1865 new_const = ((c & 0xffff) ^ 0x8000) - 0x8000;
1866 if (new_const != 0)
1868 temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output);
1869 if (temp)
1871 if (no_output)
1872 return temp;
1873 return expand_binop (mode, add_optab, temp, GEN_INT (new_const),
1874 target, 0, OPTAB_WIDEN);
1878 /* Next try complementing. */
1879 temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output);
1880 if (temp)
1882 if (no_output)
1883 return temp;
1884 return expand_unop (mode, one_cmpl_optab, temp, target, 0);
1887 /* Next try to form a constant and do a left shift. We can do this
1888 if some low-order bits are zero; the exact_log2 call below tells
1889 us that information. The bits we are shifting out could be any
1890 value, but here we'll just try the 0- and sign-extended forms of
1891 the constant. To try to increase the chance of having the same
1892 constant in more than one insn, start at the highest number of
1893 bits to shift, but try all possibilities in case a ZAPNOT will
1894 be useful. */
1896 bits = exact_log2 (c & -c);
1897 if (bits > 0)
1898 for (; bits > 0; bits--)
1900 new_const = c >> bits;
1901 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1902 if (!temp && c < 0)
1904 new_const = (unsigned HOST_WIDE_INT)c >> bits;
1905 temp = alpha_emit_set_const (subtarget, mode, new_const,
1906 i, no_output);
1908 if (temp)
1910 if (no_output)
1911 return temp;
1912 return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
1913 target, 0, OPTAB_WIDEN);
1917 /* Now try high-order zero bits. Here we try the shifted-in bits as
1918 all zero and all ones. Be careful to avoid shifting outside the
1919 mode and to avoid shifting outside the host wide int size. */
1921 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1922 - floor_log2 (c) - 1);
1923 if (bits > 0)
1924 for (; bits > 0; bits--)
1926 new_const = c << bits;
1927 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1928 if (!temp)
1930 new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1);
1931 temp = alpha_emit_set_const (subtarget, mode, new_const,
1932 i, no_output);
1934 if (temp)
1936 if (no_output)
1937 return temp;
1938 return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
1939 target, 1, OPTAB_WIDEN);
1943 /* Now try high-order 1 bits. We get that with a sign-extension.
1944 But one bit isn't enough here. Be careful to avoid shifting outside
1945 the mode and to avoid shifting outside the host wide int size. */
1947 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1948 - floor_log2 (~ c) - 2);
1949 if (bits > 0)
1950 for (; bits > 0; bits--)
1952 new_const = c << bits;
1953 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1954 if (!temp)
1956 new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1);
1957 temp = alpha_emit_set_const (subtarget, mode, new_const,
1958 i, no_output);
1960 if (temp)
1962 if (no_output)
1963 return temp;
1964 return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
1965 target, 0, OPTAB_WIDEN);
1970 /* Finally, see if can load a value into the target that is the same as the
1971 constant except that all bytes that are 0 are changed to be 0xff. If we
1972 can, then we can do a ZAPNOT to obtain the desired constant. */
1974 new_const = c;
1975 for (i = 0; i < 64; i += 8)
1976 if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0)
1977 new_const |= (HOST_WIDE_INT) 0xff << i;
1979 /* We are only called for SImode and DImode. If this is SImode, ensure that
1980 we are sign extended to a full word. */
1982 if (mode == SImode)
1983 new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000;
1985 if (new_const != c)
1987 temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output);
1988 if (temp)
1990 if (no_output)
1991 return temp;
1992 return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const),
1993 target, 0, OPTAB_WIDEN);
1997 return 0;
2000 /* Try to output insns to set TARGET equal to the constant C if it can be
2001 done in less than N insns. Do all computations in MODE. Returns the place
2002 where the output has been placed if it can be done and the insns have been
2003 emitted. If it would take more than N insns, zero is returned and no
2004 insns and emitted. */
2006 static rtx
2007 alpha_emit_set_const (rtx target, machine_mode mode,
2008 HOST_WIDE_INT c, int n, bool no_output)
2010 machine_mode orig_mode = mode;
2011 rtx orig_target = target;
2012 rtx result = 0;
2013 int i;
2015 /* If we can't make any pseudos, TARGET is an SImode hard register, we
2016 can't load this constant in one insn, do this in DImode. */
2017 if (!can_create_pseudo_p () && mode == SImode
2018 && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER)
2020 result = alpha_emit_set_const_1 (target, mode, c, 1, no_output);
2021 if (result)
2022 return result;
2024 target = no_output ? NULL : gen_lowpart (DImode, target);
2025 mode = DImode;
2027 else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
2029 target = no_output ? NULL : gen_lowpart (DImode, target);
2030 mode = DImode;
2033 /* Try 1 insn, then 2, then up to N. */
2034 for (i = 1; i <= n; i++)
2036 result = alpha_emit_set_const_1 (target, mode, c, i, no_output);
2037 if (result)
2039 rtx_insn *insn;
2040 rtx set;
2042 if (no_output)
2043 return result;
2045 insn = get_last_insn ();
2046 set = single_set (insn);
2047 if (! CONSTANT_P (SET_SRC (set)))
2048 set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
2049 break;
2053 /* Allow for the case where we changed the mode of TARGET. */
2054 if (result)
2056 if (result == target)
2057 result = orig_target;
2058 else if (mode != orig_mode)
2059 result = gen_lowpart (orig_mode, result);
2062 return result;
2065 /* Having failed to find a 3 insn sequence in alpha_emit_set_const,
2066 fall back to a straight forward decomposition. We do this to avoid
2067 exponential run times encountered when looking for longer sequences
2068 with alpha_emit_set_const. */
2070 static rtx
2071 alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1)
2073 HOST_WIDE_INT d1, d2, d3, d4;
2074 machine_mode mode = GET_MODE (target);
2075 rtx orig_target = target;
2077 /* Decompose the entire word */
2079 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2080 c1 -= d1;
2081 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2082 c1 = (c1 - d2) >> 32;
2083 d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2084 c1 -= d3;
2085 d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2086 gcc_assert (c1 == d4);
2088 if (mode != DImode)
2089 target = gen_lowpart (DImode, target);
2091 /* Construct the high word */
2092 if (d4)
2094 emit_move_insn (target, GEN_INT (d4));
2095 if (d3)
2096 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
2098 else
2099 emit_move_insn (target, GEN_INT (d3));
2101 /* Shift it into place */
2102 emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
2104 /* Add in the low bits. */
2105 if (d2)
2106 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
2107 if (d1)
2108 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
2110 return orig_target;
2113 /* Given an integral CONST_INT or CONST_VECTOR, return the low 64 bits. */
2115 static HOST_WIDE_INT
2116 alpha_extract_integer (rtx x)
2118 if (GET_CODE (x) == CONST_VECTOR)
2119 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
2121 gcc_assert (CONST_INT_P (x));
2123 return INTVAL (x);
2126 /* Implement TARGET_LEGITIMATE_CONSTANT_P. This is all constants for which
2127 we are willing to load the value into a register via a move pattern.
2128 Normally this is all symbolic constants, integral constants that
2129 take three or fewer instructions, and floating-point zero. */
2131 bool
2132 alpha_legitimate_constant_p (machine_mode mode, rtx x)
2134 HOST_WIDE_INT i0;
2136 switch (GET_CODE (x))
2138 case LABEL_REF:
2139 case HIGH:
2140 return true;
2142 case CONST:
2143 if (GET_CODE (XEXP (x, 0)) == PLUS
2144 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2145 x = XEXP (XEXP (x, 0), 0);
2146 else
2147 return true;
2149 if (GET_CODE (x) != SYMBOL_REF)
2150 return true;
2151 /* FALLTHRU */
2153 case SYMBOL_REF:
2154 /* TLS symbols are never valid. */
2155 return SYMBOL_REF_TLS_MODEL (x) == 0;
2157 case CONST_WIDE_INT:
2158 if (TARGET_BUILD_CONSTANTS)
2159 return true;
2160 if (x == CONST0_RTX (mode))
2161 return true;
2162 mode = DImode;
2163 gcc_assert (CONST_WIDE_INT_NUNITS (x) == 2);
2164 i0 = CONST_WIDE_INT_ELT (x, 1);
2165 if (alpha_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) == NULL)
2166 return false;
2167 i0 = CONST_WIDE_INT_ELT (x, 0);
2168 goto do_integer;
2170 case CONST_DOUBLE:
2171 if (x == CONST0_RTX (mode))
2172 return true;
2173 return false;
2175 case CONST_VECTOR:
2176 if (x == CONST0_RTX (mode))
2177 return true;
2178 if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
2179 return false;
2180 if (GET_MODE_SIZE (mode) != 8)
2181 return false;
2182 /* FALLTHRU */
2184 case CONST_INT:
2185 if (TARGET_BUILD_CONSTANTS)
2186 return true;
2187 i0 = alpha_extract_integer (x);
2188 do_integer:
2189 return alpha_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) != NULL;
2191 default:
2192 return false;
2196 /* Operand 1 is known to be a constant, and should require more than one
2197 instruction to load. Emit that multi-part load. */
2199 bool
2200 alpha_split_const_mov (machine_mode mode, rtx *operands)
2202 HOST_WIDE_INT i0;
2203 rtx temp = NULL_RTX;
2205 i0 = alpha_extract_integer (operands[1]);
2207 temp = alpha_emit_set_const (operands[0], mode, i0, 3, false);
2209 if (!temp && TARGET_BUILD_CONSTANTS)
2210 temp = alpha_emit_set_long_const (operands[0], i0);
2212 if (temp)
2214 if (!rtx_equal_p (operands[0], temp))
2215 emit_move_insn (operands[0], temp);
2216 return true;
2219 return false;
2222 /* Expand a move instruction; return true if all work is done.
2223 We don't handle non-bwx subword loads here. */
2225 bool
2226 alpha_expand_mov (machine_mode mode, rtx *operands)
2228 rtx tmp;
2230 /* If the output is not a register, the input must be. */
2231 if (MEM_P (operands[0])
2232 && ! reg_or_0_operand (operands[1], mode))
2233 operands[1] = force_reg (mode, operands[1]);
2235 /* Allow legitimize_address to perform some simplifications. */
2236 if (mode == Pmode && symbolic_operand (operands[1], mode))
2238 tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode);
2239 if (tmp)
2241 if (tmp == operands[0])
2242 return true;
2243 operands[1] = tmp;
2244 return false;
2248 /* Early out for non-constants and valid constants. */
2249 if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
2250 return false;
2252 /* Split large integers. */
2253 if (CONST_INT_P (operands[1])
2254 || GET_CODE (operands[1]) == CONST_VECTOR)
2256 if (alpha_split_const_mov (mode, operands))
2257 return true;
2260 /* Otherwise we've nothing left but to drop the thing to memory. */
2261 tmp = force_const_mem (mode, operands[1]);
2263 if (tmp == NULL_RTX)
2264 return false;
2266 if (reload_in_progress)
2268 emit_move_insn (operands[0], XEXP (tmp, 0));
2269 operands[1] = replace_equiv_address (tmp, operands[0]);
2271 else
2272 operands[1] = validize_mem (tmp);
2273 return false;
2276 /* Expand a non-bwx QImode or HImode move instruction;
2277 return true if all work is done. */
2279 bool
2280 alpha_expand_mov_nobwx (machine_mode mode, rtx *operands)
2282 rtx seq;
2284 /* If the output is not a register, the input must be. */
2285 if (MEM_P (operands[0]))
2286 operands[1] = force_reg (mode, operands[1]);
2288 /* Handle four memory cases, unaligned and aligned for either the input
2289 or the output. The only case where we can be called during reload is
2290 for aligned loads; all other cases require temporaries. */
2292 if (any_memory_operand (operands[1], mode))
2294 if (aligned_memory_operand (operands[1], mode))
2296 if (reload_in_progress)
2298 seq = gen_reload_in_aligned (mode, operands[0], operands[1]);
2299 emit_insn (seq);
2301 else
2303 rtx aligned_mem, bitnum;
2304 rtx scratch = gen_reg_rtx (SImode);
2305 rtx subtarget;
2306 bool copyout;
2308 get_aligned_mem (operands[1], &aligned_mem, &bitnum);
2310 subtarget = operands[0];
2311 if (REG_P (subtarget))
2312 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2313 else
2314 subtarget = gen_reg_rtx (DImode), copyout = true;
2316 if (mode == QImode)
2317 seq = gen_aligned_loadqi (subtarget, aligned_mem,
2318 bitnum, scratch);
2319 else
2320 seq = gen_aligned_loadhi (subtarget, aligned_mem,
2321 bitnum, scratch);
2322 emit_insn (seq);
2324 if (copyout)
2325 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2328 else
2330 /* Don't pass these as parameters since that makes the generated
2331 code depend on parameter evaluation order which will cause
2332 bootstrap failures. */
2334 rtx temp1, temp2, subtarget, ua;
2335 bool copyout;
2337 temp1 = gen_reg_rtx (DImode);
2338 temp2 = gen_reg_rtx (DImode);
2340 subtarget = operands[0];
2341 if (REG_P (subtarget))
2342 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2343 else
2344 subtarget = gen_reg_rtx (DImode), copyout = true;
2346 ua = get_unaligned_address (operands[1]);
2347 if (mode == QImode)
2348 seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2);
2349 else
2350 seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2);
2352 alpha_set_memflags (seq, operands[1]);
2353 emit_insn (seq);
2355 if (copyout)
2356 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2358 return true;
2361 if (any_memory_operand (operands[0], mode))
2363 if (aligned_memory_operand (operands[0], mode))
2365 rtx aligned_mem, bitnum;
2366 rtx temp1 = gen_reg_rtx (SImode);
2367 rtx temp2 = gen_reg_rtx (SImode);
2369 get_aligned_mem (operands[0], &aligned_mem, &bitnum);
2371 emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
2372 temp1, temp2));
2374 else
2376 rtx temp1 = gen_reg_rtx (DImode);
2377 rtx temp2 = gen_reg_rtx (DImode);
2378 rtx temp3 = gen_reg_rtx (DImode);
2379 rtx ua = get_unaligned_address (operands[0]);
2381 seq = gen_unaligned_store
2382 (mode, ua, operands[1], temp1, temp2, temp3);
2384 alpha_set_memflags (seq, operands[0]);
2385 emit_insn (seq);
2387 return true;
2390 return false;
2393 /* Implement the movmisalign patterns. One of the operands is a memory
2394 that is not naturally aligned. Emit instructions to load it. */
2396 void
2397 alpha_expand_movmisalign (machine_mode mode, rtx *operands)
2399 /* Honor misaligned loads, for those we promised to do so. */
2400 if (MEM_P (operands[1]))
2402 rtx tmp;
2404 if (register_operand (operands[0], mode))
2405 tmp = operands[0];
2406 else
2407 tmp = gen_reg_rtx (mode);
2409 alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
2410 if (tmp != operands[0])
2411 emit_move_insn (operands[0], tmp);
2413 else if (MEM_P (operands[0]))
2415 if (!reg_or_0_operand (operands[1], mode))
2416 operands[1] = force_reg (mode, operands[1]);
2417 alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
2419 else
2420 gcc_unreachable ();
2423 /* Generate an unsigned DImode to FP conversion. This is the same code
2424 optabs would emit if we didn't have TFmode patterns.
2426 For SFmode, this is the only construction I've found that can pass
2427 gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode
2428 intermediates will work, because you'll get intermediate rounding
2429 that ruins the end result. Some of this could be fixed by turning
2430 on round-to-positive-infinity, but that requires diddling the fpsr,
2431 which kills performance. I tried turning this around and converting
2432 to a negative number, so that I could turn on /m, but either I did
2433 it wrong or there's something else cause I wound up with the exact
2434 same single-bit error. There is a branch-less form of this same code:
2436 srl $16,1,$1
2437 and $16,1,$2
2438 cmplt $16,0,$3
2439 or $1,$2,$2
2440 cmovge $16,$16,$2
2441 itoft $3,$f10
2442 itoft $2,$f11
2443 cvtqs $f11,$f11
2444 adds $f11,$f11,$f0
2445 fcmoveq $f10,$f11,$f0
2447 I'm not using it because it's the same number of instructions as
2448 this branch-full form, and it has more serialized long latency
2449 instructions on the critical path.
2451 For DFmode, we can avoid rounding errors by breaking up the word
2452 into two pieces, converting them separately, and adding them back:
2454 LC0: .long 0,0x5f800000
2456 itoft $16,$f11
2457 lda $2,LC0
2458 cmplt $16,0,$1
2459 cpyse $f11,$f31,$f10
2460 cpyse $f31,$f11,$f11
2461 s4addq $1,$2,$1
2462 lds $f12,0($1)
2463 cvtqt $f10,$f10
2464 cvtqt $f11,$f11
2465 addt $f12,$f10,$f0
2466 addt $f0,$f11,$f0
2468 This doesn't seem to be a clear-cut win over the optabs form.
2469 It probably all depends on the distribution of numbers being
2470 converted -- in the optabs form, all but high-bit-set has a
2471 much lower minimum execution time. */
2473 void
2474 alpha_emit_floatuns (rtx operands[2])
2476 rtx neglab, donelab, i0, i1, f0, in, out;
2477 machine_mode mode;
2479 out = operands[0];
2480 in = force_reg (DImode, operands[1]);
2481 mode = GET_MODE (out);
2482 neglab = gen_label_rtx ();
2483 donelab = gen_label_rtx ();
2484 i0 = gen_reg_rtx (DImode);
2485 i1 = gen_reg_rtx (DImode);
2486 f0 = gen_reg_rtx (mode);
2488 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
2490 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
2491 emit_jump_insn (gen_jump (donelab));
2492 emit_barrier ();
2494 emit_label (neglab);
2496 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
2497 emit_insn (gen_anddi3 (i1, in, const1_rtx));
2498 emit_insn (gen_iordi3 (i0, i0, i1));
2499 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
2500 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
2502 emit_label (donelab);
2505 /* Generate the comparison for a conditional branch. */
2507 void
2508 alpha_emit_conditional_branch (rtx operands[], machine_mode cmp_mode)
2510 enum rtx_code cmp_code, branch_code;
2511 machine_mode branch_mode = VOIDmode;
2512 enum rtx_code code = GET_CODE (operands[0]);
2513 rtx op0 = operands[1], op1 = operands[2];
2514 rtx tem;
2516 if (cmp_mode == TFmode)
2518 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2519 op1 = const0_rtx;
2520 cmp_mode = DImode;
2523 /* The general case: fold the comparison code to the types of compares
2524 that we have, choosing the branch as necessary. */
2525 switch (code)
2527 case EQ: case LE: case LT: case LEU: case LTU:
2528 case UNORDERED:
2529 /* We have these compares. */
2530 cmp_code = code, branch_code = NE;
2531 break;
2533 case NE:
2534 case ORDERED:
2535 /* These must be reversed. */
2536 cmp_code = reverse_condition (code), branch_code = EQ;
2537 break;
2539 case GE: case GT: case GEU: case GTU:
2540 /* For FP, we swap them, for INT, we reverse them. */
2541 if (cmp_mode == DFmode)
2543 cmp_code = swap_condition (code);
2544 branch_code = NE;
2545 std::swap (op0, op1);
2547 else
2549 cmp_code = reverse_condition (code);
2550 branch_code = EQ;
2552 break;
2554 default:
2555 gcc_unreachable ();
2558 if (cmp_mode == DFmode)
2560 if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
2562 /* When we are not as concerned about non-finite values, and we
2563 are comparing against zero, we can branch directly. */
2564 if (op1 == CONST0_RTX (DFmode))
2565 cmp_code = UNKNOWN, branch_code = code;
2566 else if (op0 == CONST0_RTX (DFmode))
2568 /* Undo the swap we probably did just above. */
2569 std::swap (op0, op1);
2570 branch_code = swap_condition (cmp_code);
2571 cmp_code = UNKNOWN;
2574 else
2576 /* ??? We mark the branch mode to be CCmode to prevent the
2577 compare and branch from being combined, since the compare
2578 insn follows IEEE rules that the branch does not. */
2579 branch_mode = CCmode;
2582 else
2584 /* The following optimizations are only for signed compares. */
2585 if (code != LEU && code != LTU && code != GEU && code != GTU)
2587 /* Whee. Compare and branch against 0 directly. */
2588 if (op1 == const0_rtx)
2589 cmp_code = UNKNOWN, branch_code = code;
2591 /* If the constants doesn't fit into an immediate, but can
2592 be generated by lda/ldah, we adjust the argument and
2593 compare against zero, so we can use beq/bne directly. */
2594 /* ??? Don't do this when comparing against symbols, otherwise
2595 we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
2596 be declared false out of hand (at least for non-weak). */
2597 else if (CONST_INT_P (op1)
2598 && (code == EQ || code == NE)
2599 && !(symbolic_operand (op0, VOIDmode)
2600 || (REG_P (op0) && REG_POINTER (op0))))
2602 rtx n_op1 = GEN_INT (-INTVAL (op1));
2604 if (! satisfies_constraint_I (op1)
2605 && (satisfies_constraint_K (n_op1)
2606 || satisfies_constraint_L (n_op1)))
2607 cmp_code = PLUS, branch_code = code, op1 = n_op1;
2611 if (!reg_or_0_operand (op0, DImode))
2612 op0 = force_reg (DImode, op0);
2613 if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
2614 op1 = force_reg (DImode, op1);
2617 /* Emit an initial compare instruction, if necessary. */
2618 tem = op0;
2619 if (cmp_code != UNKNOWN)
2621 tem = gen_reg_rtx (cmp_mode);
2622 emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
2625 /* Emit the branch instruction. */
2626 tem = gen_rtx_SET (pc_rtx,
2627 gen_rtx_IF_THEN_ELSE (VOIDmode,
2628 gen_rtx_fmt_ee (branch_code,
2629 branch_mode, tem,
2630 CONST0_RTX (cmp_mode)),
2631 gen_rtx_LABEL_REF (VOIDmode,
2632 operands[3]),
2633 pc_rtx));
2634 emit_jump_insn (tem);
2637 /* Certain simplifications can be done to make invalid setcc operations
2638 valid. Return the final comparison, or NULL if we can't work. */
2640 bool
2641 alpha_emit_setcc (rtx operands[], machine_mode cmp_mode)
2643 enum rtx_code cmp_code;
2644 enum rtx_code code = GET_CODE (operands[1]);
2645 rtx op0 = operands[2], op1 = operands[3];
2646 rtx tmp;
2648 if (cmp_mode == TFmode)
2650 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2651 op1 = const0_rtx;
2652 cmp_mode = DImode;
2655 if (cmp_mode == DFmode && !TARGET_FIX)
2656 return 0;
2658 /* The general case: fold the comparison code to the types of compares
2659 that we have, choosing the branch as necessary. */
2661 cmp_code = UNKNOWN;
2662 switch (code)
2664 case EQ: case LE: case LT: case LEU: case LTU:
2665 case UNORDERED:
2666 /* We have these compares. */
2667 if (cmp_mode == DFmode)
2668 cmp_code = code, code = NE;
2669 break;
2671 case NE:
2672 if (cmp_mode == DImode && op1 == const0_rtx)
2673 break;
2674 /* FALLTHRU */
2676 case ORDERED:
2677 cmp_code = reverse_condition (code);
2678 code = EQ;
2679 break;
2681 case GE: case GT: case GEU: case GTU:
2682 /* These normally need swapping, but for integer zero we have
2683 special patterns that recognize swapped operands. */
2684 if (cmp_mode == DImode && op1 == const0_rtx)
2685 break;
2686 code = swap_condition (code);
2687 if (cmp_mode == DFmode)
2688 cmp_code = code, code = NE;
2689 std::swap (op0, op1);
2690 break;
2692 default:
2693 gcc_unreachable ();
2696 if (cmp_mode == DImode)
2698 if (!register_operand (op0, DImode))
2699 op0 = force_reg (DImode, op0);
2700 if (!reg_or_8bit_operand (op1, DImode))
2701 op1 = force_reg (DImode, op1);
2704 /* Emit an initial compare instruction, if necessary. */
2705 if (cmp_code != UNKNOWN)
2707 tmp = gen_reg_rtx (cmp_mode);
2708 emit_insn (gen_rtx_SET (tmp, gen_rtx_fmt_ee (cmp_code, cmp_mode,
2709 op0, op1)));
2711 op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp;
2712 op1 = const0_rtx;
2715 /* Emit the setcc instruction. */
2716 emit_insn (gen_rtx_SET (operands[0], gen_rtx_fmt_ee (code, DImode,
2717 op0, op1)));
2718 return true;
2722 /* Rewrite a comparison against zero CMP of the form
2723 (CODE (cc0) (const_int 0)) so it can be written validly in
2724 a conditional move (if_then_else CMP ...).
2725 If both of the operands that set cc0 are nonzero we must emit
2726 an insn to perform the compare (it can't be done within
2727 the conditional move). */
2730 alpha_emit_conditional_move (rtx cmp, machine_mode mode)
2732 enum rtx_code code = GET_CODE (cmp);
2733 enum rtx_code cmov_code = NE;
2734 rtx op0 = XEXP (cmp, 0);
2735 rtx op1 = XEXP (cmp, 1);
2736 machine_mode cmp_mode
2737 = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
2738 machine_mode cmov_mode = VOIDmode;
2739 int local_fast_math = flag_unsafe_math_optimizations;
2740 rtx tem;
2742 if (cmp_mode == TFmode)
2744 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2745 op1 = const0_rtx;
2746 cmp_mode = DImode;
2749 gcc_assert (cmp_mode == DFmode || cmp_mode == DImode);
2751 if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode))
2753 enum rtx_code cmp_code;
2755 if (! TARGET_FIX)
2756 return 0;
2758 /* If we have fp<->int register move instructions, do a cmov by
2759 performing the comparison in fp registers, and move the
2760 zero/nonzero value to integer registers, where we can then
2761 use a normal cmov, or vice-versa. */
2763 switch (code)
2765 case EQ: case LE: case LT: case LEU: case LTU:
2766 case UNORDERED:
2767 /* We have these compares. */
2768 cmp_code = code, code = NE;
2769 break;
2771 case NE:
2772 case ORDERED:
2773 /* These must be reversed. */
2774 cmp_code = reverse_condition (code), code = EQ;
2775 break;
2777 case GE: case GT: case GEU: case GTU:
2778 /* These normally need swapping, but for integer zero we have
2779 special patterns that recognize swapped operands. */
2780 if (cmp_mode == DImode && op1 == const0_rtx)
2781 cmp_code = code, code = NE;
2782 else
2784 cmp_code = swap_condition (code);
2785 code = NE;
2786 std::swap (op0, op1);
2788 break;
2790 default:
2791 gcc_unreachable ();
2794 if (cmp_mode == DImode)
2796 if (!reg_or_0_operand (op0, DImode))
2797 op0 = force_reg (DImode, op0);
2798 if (!reg_or_8bit_operand (op1, DImode))
2799 op1 = force_reg (DImode, op1);
2802 tem = gen_reg_rtx (cmp_mode);
2803 emit_insn (gen_rtx_SET (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode,
2804 op0, op1)));
2806 cmp_mode = cmp_mode == DImode ? E_DFmode : E_DImode;
2807 op0 = gen_lowpart (cmp_mode, tem);
2808 op1 = CONST0_RTX (cmp_mode);
2809 cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2810 local_fast_math = 1;
2813 if (cmp_mode == DImode)
2815 if (!reg_or_0_operand (op0, DImode))
2816 op0 = force_reg (DImode, op0);
2817 if (!reg_or_8bit_operand (op1, DImode))
2818 op1 = force_reg (DImode, op1);
2821 /* We may be able to use a conditional move directly.
2822 This avoids emitting spurious compares. */
2823 if (signed_comparison_operator (cmp, VOIDmode)
2824 && (cmp_mode == DImode || local_fast_math)
2825 && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
2826 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2828 /* We can't put the comparison inside the conditional move;
2829 emit a compare instruction and put that inside the
2830 conditional move. Make sure we emit only comparisons we have;
2831 swap or reverse as necessary. */
2833 if (!can_create_pseudo_p ())
2834 return NULL_RTX;
2836 switch (code)
2838 case EQ: case LE: case LT: case LEU: case LTU:
2839 case UNORDERED:
2840 /* We have these compares: */
2841 break;
2843 case NE:
2844 case ORDERED:
2845 /* These must be reversed. */
2846 code = reverse_condition (code);
2847 cmov_code = EQ;
2848 break;
2850 case GE: case GT: case GEU: case GTU:
2851 /* These normally need swapping, but for integer zero we have
2852 special patterns that recognize swapped operands. */
2853 if (cmp_mode == DImode && op1 == const0_rtx)
2854 break;
2855 code = swap_condition (code);
2856 std::swap (op0, op1);
2857 break;
2859 default:
2860 gcc_unreachable ();
2863 if (cmp_mode == DImode)
2865 if (!reg_or_0_operand (op0, DImode))
2866 op0 = force_reg (DImode, op0);
2867 if (!reg_or_8bit_operand (op1, DImode))
2868 op1 = force_reg (DImode, op1);
2871 /* ??? We mark the branch mode to be CCmode to prevent the compare
2872 and cmov from being combined, since the compare insn follows IEEE
2873 rules that the cmov does not. */
2874 if (cmp_mode == DFmode && !local_fast_math)
2875 cmov_mode = CCmode;
2877 tem = gen_reg_rtx (cmp_mode);
2878 emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1));
2879 return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode));
2882 /* Simplify a conditional move of two constants into a setcc with
2883 arithmetic. This is done with a splitter since combine would
2884 just undo the work if done during code generation. It also catches
2885 cases we wouldn't have before cse. */
2888 alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond,
2889 rtx t_rtx, rtx f_rtx)
2891 HOST_WIDE_INT t, f, diff;
2892 machine_mode mode;
2893 rtx target, subtarget, tmp;
2895 mode = GET_MODE (dest);
2896 t = INTVAL (t_rtx);
2897 f = INTVAL (f_rtx);
2898 diff = t - f;
2900 if (((code == NE || code == EQ) && diff < 0)
2901 || (code == GE || code == GT))
2903 code = reverse_condition (code);
2904 std::swap (t, f);
2905 diff = -diff;
2908 subtarget = target = dest;
2909 if (mode != DImode)
2911 target = gen_lowpart (DImode, dest);
2912 if (can_create_pseudo_p ())
2913 subtarget = gen_reg_rtx (DImode);
2914 else
2915 subtarget = target;
2917 /* Below, we must be careful to use copy_rtx on target and subtarget
2918 in intermediate insns, as they may be a subreg rtx, which may not
2919 be shared. */
2921 if (f == 0 && exact_log2 (diff) > 0
2922 /* On EV6, we've got enough shifters to make non-arithmetic shifts
2923 viable over a longer latency cmove. On EV5, the E0 slot is a
2924 scarce resource, and on EV4 shift has the same latency as a cmove. */
2925 && (diff <= 8 || alpha_tune == PROCESSOR_EV6))
2927 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2928 emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2930 tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2931 GEN_INT (exact_log2 (t)));
2932 emit_insn (gen_rtx_SET (target, tmp));
2934 else if (f == 0 && t == -1)
2936 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2937 emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2939 emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
2941 else if (diff == 1 || diff == 4 || diff == 8)
2943 rtx add_op;
2945 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2946 emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2948 if (diff == 1)
2949 emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
2950 else
2952 add_op = GEN_INT (f);
2953 if (sext_add_operand (add_op, mode))
2955 tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2956 GEN_INT (exact_log2 (diff)));
2957 tmp = gen_rtx_PLUS (DImode, tmp, add_op);
2958 emit_insn (gen_rtx_SET (target, tmp));
2960 else
2961 return 0;
2964 else
2965 return 0;
2967 return 1;
2970 /* Look up the function X_floating library function name for the
2971 given operation. */
2973 struct GTY(()) xfloating_op
2975 const enum rtx_code code;
2976 const char *const GTY((skip)) osf_func;
2977 const char *const GTY((skip)) vms_func;
2978 rtx libcall;
2981 static GTY(()) struct xfloating_op xfloating_ops[] =
2983 { PLUS, "_OtsAddX", "OTS$ADD_X", 0 },
2984 { MINUS, "_OtsSubX", "OTS$SUB_X", 0 },
2985 { MULT, "_OtsMulX", "OTS$MUL_X", 0 },
2986 { DIV, "_OtsDivX", "OTS$DIV_X", 0 },
2987 { EQ, "_OtsEqlX", "OTS$EQL_X", 0 },
2988 { NE, "_OtsNeqX", "OTS$NEQ_X", 0 },
2989 { LT, "_OtsLssX", "OTS$LSS_X", 0 },
2990 { LE, "_OtsLeqX", "OTS$LEQ_X", 0 },
2991 { GT, "_OtsGtrX", "OTS$GTR_X", 0 },
2992 { GE, "_OtsGeqX", "OTS$GEQ_X", 0 },
2993 { FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0 },
2994 { FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0 },
2995 { UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0 },
2996 { FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 },
2997 { FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 }
3000 static GTY(()) struct xfloating_op vax_cvt_ops[] =
3002 { FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 },
3003 { FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 }
3006 static rtx
3007 alpha_lookup_xfloating_lib_func (enum rtx_code code)
3009 struct xfloating_op *ops = xfloating_ops;
3010 long n = ARRAY_SIZE (xfloating_ops);
3011 long i;
3013 gcc_assert (TARGET_HAS_XFLOATING_LIBS);
3015 /* How irritating. Nothing to key off for the main table. */
3016 if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
3018 ops = vax_cvt_ops;
3019 n = ARRAY_SIZE (vax_cvt_ops);
3022 for (i = 0; i < n; ++i, ++ops)
3023 if (ops->code == code)
3025 rtx func = ops->libcall;
3026 if (!func)
3028 func = init_one_libfunc (TARGET_ABI_OPEN_VMS
3029 ? ops->vms_func : ops->osf_func);
3030 ops->libcall = func;
3032 return func;
3035 gcc_unreachable ();
3038 /* Most X_floating operations take the rounding mode as an argument.
3039 Compute that here. */
3041 static int
3042 alpha_compute_xfloating_mode_arg (enum rtx_code code,
3043 enum alpha_fp_rounding_mode round)
3045 int mode;
3047 switch (round)
3049 case ALPHA_FPRM_NORM:
3050 mode = 2;
3051 break;
3052 case ALPHA_FPRM_MINF:
3053 mode = 1;
3054 break;
3055 case ALPHA_FPRM_CHOP:
3056 mode = 0;
3057 break;
3058 case ALPHA_FPRM_DYN:
3059 mode = 4;
3060 break;
3061 default:
3062 gcc_unreachable ();
3064 /* XXX For reference, round to +inf is mode = 3. */
3067 if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
3068 mode |= 0x10000;
3070 return mode;
3073 /* Emit an X_floating library function call.
3075 Note that these functions do not follow normal calling conventions:
3076 TFmode arguments are passed in two integer registers (as opposed to
3077 indirect); TFmode return values appear in R16+R17.
3079 FUNC is the function to call.
3080 TARGET is where the output belongs.
3081 OPERANDS are the inputs.
3082 NOPERANDS is the count of inputs.
3083 EQUIV is the expression equivalent for the function.
3086 static void
3087 alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
3088 int noperands, rtx equiv)
3090 rtx usage = NULL_RTX, reg;
3091 int regno = 16, i;
3093 start_sequence ();
3095 for (i = 0; i < noperands; ++i)
3097 switch (GET_MODE (operands[i]))
3099 case E_TFmode:
3100 reg = gen_rtx_REG (TFmode, regno);
3101 regno += 2;
3102 break;
3104 case E_DFmode:
3105 reg = gen_rtx_REG (DFmode, regno + 32);
3106 regno += 1;
3107 break;
3109 case E_VOIDmode:
3110 gcc_assert (CONST_INT_P (operands[i]));
3111 /* FALLTHRU */
3112 case E_DImode:
3113 reg = gen_rtx_REG (DImode, regno);
3114 regno += 1;
3115 break;
3117 default:
3118 gcc_unreachable ();
3121 emit_move_insn (reg, operands[i]);
3122 use_reg (&usage, reg);
3125 switch (GET_MODE (target))
3127 case E_TFmode:
3128 reg = gen_rtx_REG (TFmode, 16);
3129 break;
3130 case E_DFmode:
3131 reg = gen_rtx_REG (DFmode, 32);
3132 break;
3133 case E_DImode:
3134 reg = gen_rtx_REG (DImode, 0);
3135 break;
3136 default:
3137 gcc_unreachable ();
3140 rtx mem = gen_rtx_MEM (QImode, func);
3141 rtx_insn *tmp = emit_call_insn (gen_call_value (reg, mem, const0_rtx,
3142 const0_rtx, const0_rtx));
3143 CALL_INSN_FUNCTION_USAGE (tmp) = usage;
3144 RTL_CONST_CALL_P (tmp) = 1;
3146 tmp = get_insns ();
3147 end_sequence ();
3149 emit_libcall_block (tmp, target, reg, equiv);
3152 /* Emit an X_floating library function call for arithmetic (+,-,*,/). */
3154 void
3155 alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[])
3157 rtx func;
3158 int mode;
3159 rtx out_operands[3];
3161 func = alpha_lookup_xfloating_lib_func (code);
3162 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3164 out_operands[0] = operands[1];
3165 out_operands[1] = operands[2];
3166 out_operands[2] = GEN_INT (mode);
3167 alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
3168 gen_rtx_fmt_ee (code, TFmode, operands[1],
3169 operands[2]));
3172 /* Emit an X_floating library function call for a comparison. */
3174 static rtx
3175 alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
3177 enum rtx_code cmp_code, res_code;
3178 rtx func, out, operands[2], note;
3180 /* X_floating library comparison functions return
3181 -1 unordered
3182 0 false
3183 1 true
3184 Convert the compare against the raw return value. */
3186 cmp_code = *pcode;
3187 switch (cmp_code)
3189 case UNORDERED:
3190 cmp_code = EQ;
3191 res_code = LT;
3192 break;
3193 case ORDERED:
3194 cmp_code = EQ;
3195 res_code = GE;
3196 break;
3197 case NE:
3198 res_code = NE;
3199 break;
3200 case EQ:
3201 case LT:
3202 case GT:
3203 case LE:
3204 case GE:
3205 res_code = GT;
3206 break;
3207 default:
3208 gcc_unreachable ();
3210 *pcode = res_code;
3212 func = alpha_lookup_xfloating_lib_func (cmp_code);
3214 operands[0] = op0;
3215 operands[1] = op1;
3216 out = gen_reg_rtx (DImode);
3218 /* What's actually returned is -1,0,1, not a proper boolean value. */
3219 note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1);
3220 note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE);
3221 alpha_emit_xfloating_libcall (func, out, operands, 2, note);
3223 return out;
3226 /* Emit an X_floating library function call for a conversion. */
3228 void
3229 alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
3231 int noperands = 1, mode;
3232 rtx out_operands[2];
3233 rtx func;
3234 enum rtx_code code = orig_code;
3236 if (code == UNSIGNED_FIX)
3237 code = FIX;
3239 func = alpha_lookup_xfloating_lib_func (code);
3241 out_operands[0] = operands[1];
3243 switch (code)
3245 case FIX:
3246 mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
3247 out_operands[1] = GEN_INT (mode);
3248 noperands = 2;
3249 break;
3250 case FLOAT_TRUNCATE:
3251 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3252 out_operands[1] = GEN_INT (mode);
3253 noperands = 2;
3254 break;
3255 default:
3256 break;
3259 alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
3260 gen_rtx_fmt_e (orig_code,
3261 GET_MODE (operands[0]),
3262 operands[1]));
3265 /* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of
3266 DImode moves from OP[2,3] to OP[0,1]. If FIXUP_OVERLAP is true,
3267 guarantee that the sequence
3268 set (OP[0] OP[2])
3269 set (OP[1] OP[3])
3270 is valid. Naturally, output operand ordering is little-endian.
3271 This is used by *movtf_internal and *movti_internal. */
3273 void
3274 alpha_split_tmode_pair (rtx operands[4], machine_mode mode,
3275 bool fixup_overlap)
3277 switch (GET_CODE (operands[1]))
3279 case REG:
3280 operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
3281 operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
3282 break;
3284 case MEM:
3285 operands[3] = adjust_address (operands[1], DImode, 8);
3286 operands[2] = adjust_address (operands[1], DImode, 0);
3287 break;
3289 CASE_CONST_SCALAR_INT:
3290 case CONST_DOUBLE:
3291 gcc_assert (operands[1] == CONST0_RTX (mode));
3292 operands[2] = operands[3] = const0_rtx;
3293 break;
3295 default:
3296 gcc_unreachable ();
3299 switch (GET_CODE (operands[0]))
3301 case REG:
3302 operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
3303 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
3304 break;
3306 case MEM:
3307 operands[1] = adjust_address (operands[0], DImode, 8);
3308 operands[0] = adjust_address (operands[0], DImode, 0);
3309 break;
3311 default:
3312 gcc_unreachable ();
3315 if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3]))
3317 std::swap (operands[0], operands[1]);
3318 std::swap (operands[2], operands[3]);
3322 /* Implement negtf2 or abstf2. Op0 is destination, op1 is source,
3323 op2 is a register containing the sign bit, operation is the
3324 logical operation to be performed. */
3326 void
3327 alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx))
3329 rtx high_bit = operands[2];
3330 rtx scratch;
3331 int move;
3333 alpha_split_tmode_pair (operands, TFmode, false);
3335 /* Detect three flavors of operand overlap. */
3336 move = 1;
3337 if (rtx_equal_p (operands[0], operands[2]))
3338 move = 0;
3339 else if (rtx_equal_p (operands[1], operands[2]))
3341 if (rtx_equal_p (operands[0], high_bit))
3342 move = 2;
3343 else
3344 move = -1;
3347 if (move < 0)
3348 emit_move_insn (operands[0], operands[2]);
3350 /* ??? If the destination overlaps both source tf and high_bit, then
3351 assume source tf is dead in its entirety and use the other half
3352 for a scratch register. Otherwise "scratch" is just the proper
3353 destination register. */
3354 scratch = operands[move < 2 ? 1 : 3];
3356 emit_insn ((*operation) (scratch, high_bit, operands[3]));
3358 if (move > 0)
3360 emit_move_insn (operands[0], operands[2]);
3361 if (move > 1)
3362 emit_move_insn (operands[1], scratch);
3366 /* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
3367 unaligned data:
3369 unsigned: signed:
3370 word: ldq_u r1,X(r11) ldq_u r1,X(r11)
3371 ldq_u r2,X+1(r11) ldq_u r2,X+1(r11)
3372 lda r3,X(r11) lda r3,X+2(r11)
3373 extwl r1,r3,r1 extql r1,r3,r1
3374 extwh r2,r3,r2 extqh r2,r3,r2
3375 or r1.r2.r1 or r1,r2,r1
3376 sra r1,48,r1
3378 long: ldq_u r1,X(r11) ldq_u r1,X(r11)
3379 ldq_u r2,X+3(r11) ldq_u r2,X+3(r11)
3380 lda r3,X(r11) lda r3,X(r11)
3381 extll r1,r3,r1 extll r1,r3,r1
3382 extlh r2,r3,r2 extlh r2,r3,r2
3383 or r1.r2.r1 addl r1,r2,r1
3385 quad: ldq_u r1,X(r11)
3386 ldq_u r2,X+7(r11)
3387 lda r3,X(r11)
3388 extql r1,r3,r1
3389 extqh r2,r3,r2
3390 or r1.r2.r1
3393 void
3394 alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size,
3395 HOST_WIDE_INT ofs, int sign)
3397 rtx meml, memh, addr, extl, exth, tmp, mema;
3398 machine_mode mode;
3400 if (TARGET_BWX && size == 2)
3402 meml = adjust_address (mem, QImode, ofs);
3403 memh = adjust_address (mem, QImode, ofs+1);
3404 extl = gen_reg_rtx (DImode);
3405 exth = gen_reg_rtx (DImode);
3406 emit_insn (gen_zero_extendqidi2 (extl, meml));
3407 emit_insn (gen_zero_extendqidi2 (exth, memh));
3408 exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8),
3409 NULL, 1, OPTAB_LIB_WIDEN);
3410 addr = expand_simple_binop (DImode, IOR, extl, exth,
3411 NULL, 1, OPTAB_LIB_WIDEN);
3413 if (sign && GET_MODE (tgt) != HImode)
3415 addr = gen_lowpart (HImode, addr);
3416 emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0));
3418 else
3420 if (GET_MODE (tgt) != DImode)
3421 addr = gen_lowpart (GET_MODE (tgt), addr);
3422 emit_move_insn (tgt, addr);
3424 return;
3427 meml = gen_reg_rtx (DImode);
3428 memh = gen_reg_rtx (DImode);
3429 addr = gen_reg_rtx (DImode);
3430 extl = gen_reg_rtx (DImode);
3431 exth = gen_reg_rtx (DImode);
3433 mema = XEXP (mem, 0);
3434 if (GET_CODE (mema) == LO_SUM)
3435 mema = force_reg (Pmode, mema);
3437 /* AND addresses cannot be in any alias set, since they may implicitly
3438 alias surrounding code. Ideally we'd have some alias set that
3439 covered all types except those with alignment 8 or higher. */
3441 tmp = change_address (mem, DImode,
3442 gen_rtx_AND (DImode,
3443 plus_constant (DImode, mema, ofs),
3444 GEN_INT (-8)));
3445 set_mem_alias_set (tmp, 0);
3446 emit_move_insn (meml, tmp);
3448 tmp = change_address (mem, DImode,
3449 gen_rtx_AND (DImode,
3450 plus_constant (DImode, mema,
3451 ofs + size - 1),
3452 GEN_INT (-8)));
3453 set_mem_alias_set (tmp, 0);
3454 emit_move_insn (memh, tmp);
3456 if (sign && size == 2)
3458 emit_move_insn (addr, plus_constant (Pmode, mema, ofs+2));
3460 emit_insn (gen_extql (extl, meml, addr));
3461 emit_insn (gen_extqh (exth, memh, addr));
3463 /* We must use tgt here for the target. Alpha-vms port fails if we use
3464 addr for the target, because addr is marked as a pointer and combine
3465 knows that pointers are always sign-extended 32-bit values. */
3466 addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
3467 addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
3468 addr, 1, OPTAB_WIDEN);
3470 else
3472 emit_move_insn (addr, plus_constant (Pmode, mema, ofs));
3473 emit_insn (gen_extxl (extl, meml, GEN_INT (size*8), addr));
3474 switch ((int) size)
3476 case 2:
3477 emit_insn (gen_extwh (exth, memh, addr));
3478 mode = HImode;
3479 break;
3480 case 4:
3481 emit_insn (gen_extlh (exth, memh, addr));
3482 mode = SImode;
3483 break;
3484 case 8:
3485 emit_insn (gen_extqh (exth, memh, addr));
3486 mode = DImode;
3487 break;
3488 default:
3489 gcc_unreachable ();
3492 addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
3493 gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
3494 sign, OPTAB_WIDEN);
3497 if (addr != tgt)
3498 emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr));
3501 /* Similarly, use ins and msk instructions to perform unaligned stores. */
3503 void
3504 alpha_expand_unaligned_store (rtx dst, rtx src,
3505 HOST_WIDE_INT size, HOST_WIDE_INT ofs)
3507 rtx dstl, dsth, addr, insl, insh, meml, memh, dsta;
3509 if (TARGET_BWX && size == 2)
3511 if (src != const0_rtx)
3513 dstl = gen_lowpart (QImode, src);
3514 dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8),
3515 NULL, 1, OPTAB_LIB_WIDEN);
3516 dsth = gen_lowpart (QImode, dsth);
3518 else
3519 dstl = dsth = const0_rtx;
3521 meml = adjust_address (dst, QImode, ofs);
3522 memh = adjust_address (dst, QImode, ofs+1);
3524 emit_move_insn (meml, dstl);
3525 emit_move_insn (memh, dsth);
3526 return;
3529 dstl = gen_reg_rtx (DImode);
3530 dsth = gen_reg_rtx (DImode);
3531 insl = gen_reg_rtx (DImode);
3532 insh = gen_reg_rtx (DImode);
3534 dsta = XEXP (dst, 0);
3535 if (GET_CODE (dsta) == LO_SUM)
3536 dsta = force_reg (Pmode, dsta);
3538 /* AND addresses cannot be in any alias set, since they may implicitly
3539 alias surrounding code. Ideally we'd have some alias set that
3540 covered all types except those with alignment 8 or higher. */
3542 meml = change_address (dst, DImode,
3543 gen_rtx_AND (DImode,
3544 plus_constant (DImode, dsta, ofs),
3545 GEN_INT (-8)));
3546 set_mem_alias_set (meml, 0);
3548 memh = change_address (dst, DImode,
3549 gen_rtx_AND (DImode,
3550 plus_constant (DImode, dsta,
3551 ofs + size - 1),
3552 GEN_INT (-8)));
3553 set_mem_alias_set (memh, 0);
3555 emit_move_insn (dsth, memh);
3556 emit_move_insn (dstl, meml);
3558 addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs));
3560 if (src != CONST0_RTX (GET_MODE (src)))
3562 emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
3563 GEN_INT (size*8), addr));
3565 switch ((int) size)
3567 case 2:
3568 emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr));
3569 break;
3570 case 4:
3571 emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr));
3572 break;
3573 case 8:
3574 emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr));
3575 break;
3576 default:
3577 gcc_unreachable ();
3581 emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr));
3583 switch ((int) size)
3585 case 2:
3586 emit_insn (gen_mskwl (dstl, dstl, addr));
3587 break;
3588 case 4:
3589 emit_insn (gen_mskll (dstl, dstl, addr));
3590 break;
3591 case 8:
3592 emit_insn (gen_mskql (dstl, dstl, addr));
3593 break;
3594 default:
3595 gcc_unreachable ();
3598 if (src != CONST0_RTX (GET_MODE (src)))
3600 dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
3601 dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
3604 /* Must store high before low for degenerate case of aligned. */
3605 emit_move_insn (memh, dsth);
3606 emit_move_insn (meml, dstl);
3609 /* The block move code tries to maximize speed by separating loads and
3610 stores at the expense of register pressure: we load all of the data
3611 before we store it back out. There are two secondary effects worth
3612 mentioning, that this speeds copying to/from aligned and unaligned
3613 buffers, and that it makes the code significantly easier to write. */
3615 #define MAX_MOVE_WORDS 8
3617 /* Load an integral number of consecutive unaligned quadwords. */
3619 static void
3620 alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
3621 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3623 rtx const im8 = GEN_INT (-8);
3624 rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1];
3625 rtx sreg, areg, tmp, smema;
3626 HOST_WIDE_INT i;
3628 smema = XEXP (smem, 0);
3629 if (GET_CODE (smema) == LO_SUM)
3630 smema = force_reg (Pmode, smema);
3632 /* Generate all the tmp registers we need. */
3633 for (i = 0; i < words; ++i)
3635 data_regs[i] = out_regs[i];
3636 ext_tmps[i] = gen_reg_rtx (DImode);
3638 data_regs[words] = gen_reg_rtx (DImode);
3640 if (ofs != 0)
3641 smem = adjust_address (smem, GET_MODE (smem), ofs);
3643 /* Load up all of the source data. */
3644 for (i = 0; i < words; ++i)
3646 tmp = change_address (smem, DImode,
3647 gen_rtx_AND (DImode,
3648 plus_constant (DImode, smema, 8*i),
3649 im8));
3650 set_mem_alias_set (tmp, 0);
3651 emit_move_insn (data_regs[i], tmp);
3654 tmp = change_address (smem, DImode,
3655 gen_rtx_AND (DImode,
3656 plus_constant (DImode, smema,
3657 8*words - 1),
3658 im8));
3659 set_mem_alias_set (tmp, 0);
3660 emit_move_insn (data_regs[words], tmp);
3662 /* Extract the half-word fragments. Unfortunately DEC decided to make
3663 extxh with offset zero a noop instead of zeroing the register, so
3664 we must take care of that edge condition ourselves with cmov. */
3666 sreg = copy_addr_to_reg (smema);
3667 areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL,
3668 1, OPTAB_WIDEN);
3669 for (i = 0; i < words; ++i)
3671 emit_insn (gen_extql (data_regs[i], data_regs[i], sreg));
3672 emit_insn (gen_extqh (ext_tmps[i], data_regs[i+1], sreg));
3673 emit_insn (gen_rtx_SET (ext_tmps[i],
3674 gen_rtx_IF_THEN_ELSE (DImode,
3675 gen_rtx_EQ (DImode, areg,
3676 const0_rtx),
3677 const0_rtx, ext_tmps[i])));
3680 /* Merge the half-words into whole words. */
3681 for (i = 0; i < words; ++i)
3683 out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i],
3684 ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN);
3688 /* Store an integral number of consecutive unaligned quadwords. DATA_REGS
3689 may be NULL to store zeros. */
3691 static void
3692 alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
3693 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3695 rtx const im8 = GEN_INT (-8);
3696 rtx ins_tmps[MAX_MOVE_WORDS];
3697 rtx st_tmp_1, st_tmp_2, dreg;
3698 rtx st_addr_1, st_addr_2, dmema;
3699 HOST_WIDE_INT i;
3701 dmema = XEXP (dmem, 0);
3702 if (GET_CODE (dmema) == LO_SUM)
3703 dmema = force_reg (Pmode, dmema);
3705 /* Generate all the tmp registers we need. */
3706 if (data_regs != NULL)
3707 for (i = 0; i < words; ++i)
3708 ins_tmps[i] = gen_reg_rtx(DImode);
3709 st_tmp_1 = gen_reg_rtx(DImode);
3710 st_tmp_2 = gen_reg_rtx(DImode);
3712 if (ofs != 0)
3713 dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
3715 st_addr_2 = change_address (dmem, DImode,
3716 gen_rtx_AND (DImode,
3717 plus_constant (DImode, dmema,
3718 words*8 - 1),
3719 im8));
3720 set_mem_alias_set (st_addr_2, 0);
3722 st_addr_1 = change_address (dmem, DImode,
3723 gen_rtx_AND (DImode, dmema, im8));
3724 set_mem_alias_set (st_addr_1, 0);
3726 /* Load up the destination end bits. */
3727 emit_move_insn (st_tmp_2, st_addr_2);
3728 emit_move_insn (st_tmp_1, st_addr_1);
3730 /* Shift the input data into place. */
3731 dreg = copy_addr_to_reg (dmema);
3732 if (data_regs != NULL)
3734 for (i = words-1; i >= 0; --i)
3736 emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg));
3737 emit_insn (gen_insql (data_regs[i], data_regs[i], dreg));
3739 for (i = words-1; i > 0; --i)
3741 ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i],
3742 ins_tmps[i-1], ins_tmps[i-1], 1,
3743 OPTAB_WIDEN);
3747 /* Split and merge the ends with the destination data. */
3748 emit_insn (gen_mskqh (st_tmp_2, st_tmp_2, dreg));
3749 emit_insn (gen_mskql (st_tmp_1, st_tmp_1, dreg));
3751 if (data_regs != NULL)
3753 st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1],
3754 st_tmp_2, 1, OPTAB_WIDEN);
3755 st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
3756 st_tmp_1, 1, OPTAB_WIDEN);
3759 /* Store it all. */
3760 emit_move_insn (st_addr_2, st_tmp_2);
3761 for (i = words-1; i > 0; --i)
3763 rtx tmp = change_address (dmem, DImode,
3764 gen_rtx_AND (DImode,
3765 plus_constant (DImode,
3766 dmema, i*8),
3767 im8));
3768 set_mem_alias_set (tmp, 0);
3769 emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx);
3771 emit_move_insn (st_addr_1, st_tmp_1);
3775 /* Expand string/block move operations.
3777 operands[0] is the pointer to the destination.
3778 operands[1] is the pointer to the source.
3779 operands[2] is the number of bytes to move.
3780 operands[3] is the alignment. */
3783 alpha_expand_block_move (rtx operands[])
3785 rtx bytes_rtx = operands[2];
3786 rtx align_rtx = operands[3];
3787 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
3788 HOST_WIDE_INT bytes = orig_bytes;
3789 HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
3790 HOST_WIDE_INT dst_align = src_align;
3791 rtx orig_src = operands[1];
3792 rtx orig_dst = operands[0];
3793 rtx data_regs[2 * MAX_MOVE_WORDS + 16];
3794 rtx tmp;
3795 unsigned int i, words, ofs, nregs = 0;
3797 if (orig_bytes <= 0)
3798 return 1;
3799 else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
3800 return 0;
3802 /* Look for additional alignment information from recorded register info. */
3804 tmp = XEXP (orig_src, 0);
3805 if (REG_P (tmp))
3806 src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3807 else if (GET_CODE (tmp) == PLUS
3808 && REG_P (XEXP (tmp, 0))
3809 && CONST_INT_P (XEXP (tmp, 1)))
3811 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3812 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3814 if (a > src_align)
3816 if (a >= 64 && c % 8 == 0)
3817 src_align = 64;
3818 else if (a >= 32 && c % 4 == 0)
3819 src_align = 32;
3820 else if (a >= 16 && c % 2 == 0)
3821 src_align = 16;
3825 tmp = XEXP (orig_dst, 0);
3826 if (REG_P (tmp))
3827 dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3828 else if (GET_CODE (tmp) == PLUS
3829 && REG_P (XEXP (tmp, 0))
3830 && CONST_INT_P (XEXP (tmp, 1)))
3832 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3833 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3835 if (a > dst_align)
3837 if (a >= 64 && c % 8 == 0)
3838 dst_align = 64;
3839 else if (a >= 32 && c % 4 == 0)
3840 dst_align = 32;
3841 else if (a >= 16 && c % 2 == 0)
3842 dst_align = 16;
3846 ofs = 0;
3847 if (src_align >= 64 && bytes >= 8)
3849 words = bytes / 8;
3851 for (i = 0; i < words; ++i)
3852 data_regs[nregs + i] = gen_reg_rtx (DImode);
3854 for (i = 0; i < words; ++i)
3855 emit_move_insn (data_regs[nregs + i],
3856 adjust_address (orig_src, DImode, ofs + i * 8));
3858 nregs += words;
3859 bytes -= words * 8;
3860 ofs += words * 8;
3863 if (src_align >= 32 && bytes >= 4)
3865 words = bytes / 4;
3867 for (i = 0; i < words; ++i)
3868 data_regs[nregs + i] = gen_reg_rtx (SImode);
3870 for (i = 0; i < words; ++i)
3871 emit_move_insn (data_regs[nregs + i],
3872 adjust_address (orig_src, SImode, ofs + i * 4));
3874 nregs += words;
3875 bytes -= words * 4;
3876 ofs += words * 4;
3879 if (bytes >= 8)
3881 words = bytes / 8;
3883 for (i = 0; i < words+1; ++i)
3884 data_regs[nregs + i] = gen_reg_rtx (DImode);
3886 alpha_expand_unaligned_load_words (data_regs + nregs, orig_src,
3887 words, ofs);
3889 nregs += words;
3890 bytes -= words * 8;
3891 ofs += words * 8;
3894 if (! TARGET_BWX && bytes >= 4)
3896 data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
3897 alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
3898 bytes -= 4;
3899 ofs += 4;
3902 if (bytes >= 2)
3904 if (src_align >= 16)
3906 do {
3907 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3908 emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
3909 bytes -= 2;
3910 ofs += 2;
3911 } while (bytes >= 2);
3913 else if (! TARGET_BWX)
3915 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3916 alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
3917 bytes -= 2;
3918 ofs += 2;
3922 while (bytes > 0)
3924 data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
3925 emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs));
3926 bytes -= 1;
3927 ofs += 1;
3930 gcc_assert (nregs <= ARRAY_SIZE (data_regs));
3932 /* Now save it back out again. */
3934 i = 0, ofs = 0;
3936 /* Write out the data in whatever chunks reading the source allowed. */
3937 if (dst_align >= 64)
3939 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3941 emit_move_insn (adjust_address (orig_dst, DImode, ofs),
3942 data_regs[i]);
3943 ofs += 8;
3944 i++;
3948 if (dst_align >= 32)
3950 /* If the source has remaining DImode regs, write them out in
3951 two pieces. */
3952 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3954 tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
3955 NULL_RTX, 1, OPTAB_WIDEN);
3957 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3958 gen_lowpart (SImode, data_regs[i]));
3959 emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4),
3960 gen_lowpart (SImode, tmp));
3961 ofs += 8;
3962 i++;
3965 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3967 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3968 data_regs[i]);
3969 ofs += 4;
3970 i++;
3974 if (i < nregs && GET_MODE (data_regs[i]) == DImode)
3976 /* Write out a remaining block of words using unaligned methods. */
3978 for (words = 1; i + words < nregs; words++)
3979 if (GET_MODE (data_regs[i + words]) != DImode)
3980 break;
3982 if (words == 1)
3983 alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
3984 else
3985 alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
3986 words, ofs);
3988 i += words;
3989 ofs += words * 8;
3992 /* Due to the above, this won't be aligned. */
3993 /* ??? If we have more than one of these, consider constructing full
3994 words in registers and using alpha_expand_unaligned_store_words. */
3995 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3997 alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
3998 ofs += 4;
3999 i++;
4002 if (dst_align >= 16)
4003 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
4005 emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
4006 i++;
4007 ofs += 2;
4009 else
4010 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
4012 alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
4013 i++;
4014 ofs += 2;
4017 /* The remainder must be byte copies. */
4018 while (i < nregs)
4020 gcc_assert (GET_MODE (data_regs[i]) == QImode);
4021 emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
4022 i++;
4023 ofs += 1;
4026 return 1;
4030 alpha_expand_block_clear (rtx operands[])
4032 rtx bytes_rtx = operands[1];
4033 rtx align_rtx = operands[3];
4034 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
4035 HOST_WIDE_INT bytes = orig_bytes;
4036 HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
4037 HOST_WIDE_INT alignofs = 0;
4038 rtx orig_dst = operands[0];
4039 rtx tmp;
4040 int i, words, ofs = 0;
4042 if (orig_bytes <= 0)
4043 return 1;
4044 if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
4045 return 0;
4047 /* Look for stricter alignment. */
4048 tmp = XEXP (orig_dst, 0);
4049 if (REG_P (tmp))
4050 align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
4051 else if (GET_CODE (tmp) == PLUS
4052 && REG_P (XEXP (tmp, 0))
4053 && CONST_INT_P (XEXP (tmp, 1)))
4055 HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
4056 int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
4058 if (a > align)
4060 if (a >= 64)
4061 align = a, alignofs = 8 - c % 8;
4062 else if (a >= 32)
4063 align = a, alignofs = 4 - c % 4;
4064 else if (a >= 16)
4065 align = a, alignofs = 2 - c % 2;
4069 /* Handle an unaligned prefix first. */
4071 if (alignofs > 0)
4073 /* Given that alignofs is bounded by align, the only time BWX could
4074 generate three stores is for a 7 byte fill. Prefer two individual
4075 stores over a load/mask/store sequence. */
4076 if ((!TARGET_BWX || alignofs == 7)
4077 && align >= 32
4078 && !(alignofs == 4 && bytes >= 4))
4080 machine_mode mode = (align >= 64 ? DImode : SImode);
4081 int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs;
4082 rtx mem, tmp;
4083 HOST_WIDE_INT mask;
4085 mem = adjust_address (orig_dst, mode, ofs - inv_alignofs);
4086 set_mem_alias_set (mem, 0);
4088 mask = ~(HOST_WIDE_INT_M1U << (inv_alignofs * 8));
4089 if (bytes < alignofs)
4091 mask |= HOST_WIDE_INT_M1U << ((inv_alignofs + bytes) * 8);
4092 ofs += bytes;
4093 bytes = 0;
4095 else
4097 bytes -= alignofs;
4098 ofs += alignofs;
4100 alignofs = 0;
4102 tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
4103 NULL_RTX, 1, OPTAB_WIDEN);
4105 emit_move_insn (mem, tmp);
4108 if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
4110 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4111 bytes -= 1;
4112 ofs += 1;
4113 alignofs -= 1;
4115 if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2)
4117 emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx);
4118 bytes -= 2;
4119 ofs += 2;
4120 alignofs -= 2;
4122 if (alignofs == 4 && bytes >= 4)
4124 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4125 bytes -= 4;
4126 ofs += 4;
4127 alignofs = 0;
4130 /* If we've not used the extra lead alignment information by now,
4131 we won't be able to. Downgrade align to match what's left over. */
4132 if (alignofs > 0)
4134 alignofs = alignofs & -alignofs;
4135 align = MIN (align, alignofs * BITS_PER_UNIT);
4139 /* Handle a block of contiguous long-words. */
4141 if (align >= 64 && bytes >= 8)
4143 words = bytes / 8;
4145 for (i = 0; i < words; ++i)
4146 emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8),
4147 const0_rtx);
4149 bytes -= words * 8;
4150 ofs += words * 8;
4153 /* If the block is large and appropriately aligned, emit a single
4154 store followed by a sequence of stq_u insns. */
4156 if (align >= 32 && bytes > 16)
4158 rtx orig_dsta;
4160 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4161 bytes -= 4;
4162 ofs += 4;
4164 orig_dsta = XEXP (orig_dst, 0);
4165 if (GET_CODE (orig_dsta) == LO_SUM)
4166 orig_dsta = force_reg (Pmode, orig_dsta);
4168 words = bytes / 8;
4169 for (i = 0; i < words; ++i)
4171 rtx mem
4172 = change_address (orig_dst, DImode,
4173 gen_rtx_AND (DImode,
4174 plus_constant (DImode, orig_dsta,
4175 ofs + i*8),
4176 GEN_INT (-8)));
4177 set_mem_alias_set (mem, 0);
4178 emit_move_insn (mem, const0_rtx);
4181 /* Depending on the alignment, the first stq_u may have overlapped
4182 with the initial stl, which means that the last stq_u didn't
4183 write as much as it would appear. Leave those questionable bytes
4184 unaccounted for. */
4185 bytes -= words * 8 - 4;
4186 ofs += words * 8 - 4;
4189 /* Handle a smaller block of aligned words. */
4191 if ((align >= 64 && bytes == 4)
4192 || (align == 32 && bytes >= 4))
4194 words = bytes / 4;
4196 for (i = 0; i < words; ++i)
4197 emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4),
4198 const0_rtx);
4200 bytes -= words * 4;
4201 ofs += words * 4;
4204 /* An unaligned block uses stq_u stores for as many as possible. */
4206 if (bytes >= 8)
4208 words = bytes / 8;
4210 alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
4212 bytes -= words * 8;
4213 ofs += words * 8;
4216 /* Next clean up any trailing pieces. */
4218 /* Count the number of bits in BYTES for which aligned stores could
4219 be emitted. */
4220 words = 0;
4221 for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1)
4222 if (bytes & i)
4223 words += 1;
4225 /* If we have appropriate alignment (and it wouldn't take too many
4226 instructions otherwise), mask out the bytes we need. */
4227 if (TARGET_BWX ? words > 2 : bytes > 0)
4229 if (align >= 64)
4231 rtx mem, tmp;
4232 HOST_WIDE_INT mask;
4234 mem = adjust_address (orig_dst, DImode, ofs);
4235 set_mem_alias_set (mem, 0);
4237 mask = HOST_WIDE_INT_M1U << (bytes * 8);
4239 tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask),
4240 NULL_RTX, 1, OPTAB_WIDEN);
4242 emit_move_insn (mem, tmp);
4243 return 1;
4245 else if (align >= 32 && bytes < 4)
4247 rtx mem, tmp;
4248 HOST_WIDE_INT mask;
4250 mem = adjust_address (orig_dst, SImode, ofs);
4251 set_mem_alias_set (mem, 0);
4253 mask = HOST_WIDE_INT_M1U << (bytes * 8);
4255 tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask),
4256 NULL_RTX, 1, OPTAB_WIDEN);
4258 emit_move_insn (mem, tmp);
4259 return 1;
4263 if (!TARGET_BWX && bytes >= 4)
4265 alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
4266 bytes -= 4;
4267 ofs += 4;
4270 if (bytes >= 2)
4272 if (align >= 16)
4274 do {
4275 emit_move_insn (adjust_address (orig_dst, HImode, ofs),
4276 const0_rtx);
4277 bytes -= 2;
4278 ofs += 2;
4279 } while (bytes >= 2);
4281 else if (! TARGET_BWX)
4283 alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
4284 bytes -= 2;
4285 ofs += 2;
4289 while (bytes > 0)
4291 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4292 bytes -= 1;
4293 ofs += 1;
4296 return 1;
4299 /* Returns a mask so that zap(x, value) == x & mask. */
4302 alpha_expand_zap_mask (HOST_WIDE_INT value)
4304 rtx result;
4305 int i;
4306 HOST_WIDE_INT mask = 0;
4308 for (i = 7; i >= 0; --i)
4310 mask <<= 8;
4311 if (!((value >> i) & 1))
4312 mask |= 0xff;
4315 result = gen_int_mode (mask, DImode);
4316 return result;
4319 void
4320 alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
4321 machine_mode mode,
4322 rtx op0, rtx op1, rtx op2)
4324 op0 = gen_lowpart (mode, op0);
4326 if (op1 == const0_rtx)
4327 op1 = CONST0_RTX (mode);
4328 else
4329 op1 = gen_lowpart (mode, op1);
4331 if (op2 == const0_rtx)
4332 op2 = CONST0_RTX (mode);
4333 else
4334 op2 = gen_lowpart (mode, op2);
4336 emit_insn ((*gen) (op0, op1, op2));
4339 /* A subroutine of the atomic operation splitters. Jump to LABEL if
4340 COND is true. Mark the jump as unlikely to be taken. */
4342 static void
4343 emit_unlikely_jump (rtx cond, rtx label)
4345 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
4346 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
4347 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
4350 /* Subroutines of the atomic operation splitters. Emit barriers
4351 as needed for the memory MODEL. */
4353 static void
4354 alpha_pre_atomic_barrier (enum memmodel model)
4356 if (need_atomic_barrier_p (model, true))
4357 emit_insn (gen_memory_barrier ());
4360 static void
4361 alpha_post_atomic_barrier (enum memmodel model)
4363 if (need_atomic_barrier_p (model, false))
4364 emit_insn (gen_memory_barrier ());
4367 /* A subroutine of the atomic operation splitters. Emit an insxl
4368 instruction in MODE. */
4370 static rtx
4371 emit_insxl (machine_mode mode, rtx op1, rtx op2)
4373 rtx ret = gen_reg_rtx (DImode);
4374 rtx (*fn) (rtx, rtx, rtx);
4376 switch (mode)
4378 case E_QImode:
4379 fn = gen_insbl;
4380 break;
4381 case E_HImode:
4382 fn = gen_inswl;
4383 break;
4384 case E_SImode:
4385 fn = gen_insll;
4386 break;
4387 case E_DImode:
4388 fn = gen_insql;
4389 break;
4390 default:
4391 gcc_unreachable ();
4394 op1 = force_reg (mode, op1);
4395 emit_insn (fn (ret, op1, op2));
4397 return ret;
4400 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
4401 to perform. MEM is the memory on which to operate. VAL is the second
4402 operand of the binary operator. BEFORE and AFTER are optional locations to
4403 return the value of MEM either before of after the operation. SCRATCH is
4404 a scratch register. */
4406 void
4407 alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before,
4408 rtx after, rtx scratch, enum memmodel model)
4410 machine_mode mode = GET_MODE (mem);
4411 rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch));
4413 alpha_pre_atomic_barrier (model);
4415 label = gen_label_rtx ();
4416 emit_label (label);
4417 label = gen_rtx_LABEL_REF (DImode, label);
4419 if (before == NULL)
4420 before = scratch;
4421 emit_insn (gen_load_locked (mode, before, mem));
4423 if (code == NOT)
4425 x = gen_rtx_AND (mode, before, val);
4426 emit_insn (gen_rtx_SET (val, x));
4428 x = gen_rtx_NOT (mode, val);
4430 else
4431 x = gen_rtx_fmt_ee (code, mode, before, val);
4432 if (after)
4433 emit_insn (gen_rtx_SET (after, copy_rtx (x)));
4434 emit_insn (gen_rtx_SET (scratch, x));
4436 emit_insn (gen_store_conditional (mode, cond, mem, scratch));
4438 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4439 emit_unlikely_jump (x, label);
4441 alpha_post_atomic_barrier (model);
4444 /* Expand a compare and swap operation. */
4446 void
4447 alpha_split_compare_and_swap (rtx operands[])
4449 rtx cond, retval, mem, oldval, newval;
4450 bool is_weak;
4451 enum memmodel mod_s, mod_f;
4452 machine_mode mode;
4453 rtx label1, label2, x;
4455 cond = operands[0];
4456 retval = operands[1];
4457 mem = operands[2];
4458 oldval = operands[3];
4459 newval = operands[4];
4460 is_weak = (operands[5] != const0_rtx);
4461 mod_s = memmodel_from_int (INTVAL (operands[6]));
4462 mod_f = memmodel_from_int (INTVAL (operands[7]));
4463 mode = GET_MODE (mem);
4465 alpha_pre_atomic_barrier (mod_s);
4467 label1 = NULL_RTX;
4468 if (!is_weak)
4470 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4471 emit_label (XEXP (label1, 0));
4473 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4475 emit_insn (gen_load_locked (mode, retval, mem));
4477 x = gen_lowpart (DImode, retval);
4478 if (oldval == const0_rtx)
4480 emit_move_insn (cond, const0_rtx);
4481 x = gen_rtx_NE (DImode, x, const0_rtx);
4483 else
4485 x = gen_rtx_EQ (DImode, x, oldval);
4486 emit_insn (gen_rtx_SET (cond, x));
4487 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4489 emit_unlikely_jump (x, label2);
4491 emit_move_insn (cond, newval);
4492 emit_insn (gen_store_conditional
4493 (mode, cond, mem, gen_lowpart (mode, cond)));
4495 if (!is_weak)
4497 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4498 emit_unlikely_jump (x, label1);
4501 if (!is_mm_relaxed (mod_f))
4502 emit_label (XEXP (label2, 0));
4504 alpha_post_atomic_barrier (mod_s);
4506 if (is_mm_relaxed (mod_f))
4507 emit_label (XEXP (label2, 0));
4510 void
4511 alpha_expand_compare_and_swap_12 (rtx operands[])
4513 rtx cond, dst, mem, oldval, newval, is_weak, mod_s, mod_f;
4514 machine_mode mode;
4515 rtx addr, align, wdst;
4517 cond = operands[0];
4518 dst = operands[1];
4519 mem = operands[2];
4520 oldval = operands[3];
4521 newval = operands[4];
4522 is_weak = operands[5];
4523 mod_s = operands[6];
4524 mod_f = operands[7];
4525 mode = GET_MODE (mem);
4527 /* We forced the address into a register via mem_noofs_operand. */
4528 addr = XEXP (mem, 0);
4529 gcc_assert (register_operand (addr, DImode));
4531 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4532 NULL_RTX, 1, OPTAB_DIRECT);
4534 oldval = convert_modes (DImode, mode, oldval, 1);
4536 if (newval != const0_rtx)
4537 newval = emit_insxl (mode, newval, addr);
4539 wdst = gen_reg_rtx (DImode);
4540 emit_insn (gen_atomic_compare_and_swap_1
4541 (mode, cond, wdst, mem, oldval, newval, align,
4542 is_weak, mod_s, mod_f));
4544 emit_move_insn (dst, gen_lowpart (mode, wdst));
4547 void
4548 alpha_split_compare_and_swap_12 (rtx operands[])
4550 rtx cond, dest, orig_mem, oldval, newval, align, scratch;
4551 machine_mode mode;
4552 bool is_weak;
4553 enum memmodel mod_s, mod_f;
4554 rtx label1, label2, mem, addr, width, mask, x;
4556 cond = operands[0];
4557 dest = operands[1];
4558 orig_mem = operands[2];
4559 oldval = operands[3];
4560 newval = operands[4];
4561 align = operands[5];
4562 is_weak = (operands[6] != const0_rtx);
4563 mod_s = memmodel_from_int (INTVAL (operands[7]));
4564 mod_f = memmodel_from_int (INTVAL (operands[8]));
4565 scratch = operands[9];
4566 mode = GET_MODE (orig_mem);
4567 addr = XEXP (orig_mem, 0);
4569 mem = gen_rtx_MEM (DImode, align);
4570 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4571 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4572 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4574 alpha_pre_atomic_barrier (mod_s);
4576 label1 = NULL_RTX;
4577 if (!is_weak)
4579 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4580 emit_label (XEXP (label1, 0));
4582 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4584 emit_insn (gen_load_locked (DImode, scratch, mem));
4586 width = GEN_INT (GET_MODE_BITSIZE (mode));
4587 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4588 emit_insn (gen_extxl (dest, scratch, width, addr));
4590 if (oldval == const0_rtx)
4592 emit_move_insn (cond, const0_rtx);
4593 x = gen_rtx_NE (DImode, dest, const0_rtx);
4595 else
4597 x = gen_rtx_EQ (DImode, dest, oldval);
4598 emit_insn (gen_rtx_SET (cond, x));
4599 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4601 emit_unlikely_jump (x, label2);
4603 emit_insn (gen_mskxl (cond, scratch, mask, addr));
4605 if (newval != const0_rtx)
4606 emit_insn (gen_iordi3 (cond, cond, newval));
4608 emit_insn (gen_store_conditional (DImode, cond, mem, cond));
4610 if (!is_weak)
4612 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4613 emit_unlikely_jump (x, label1);
4616 if (!is_mm_relaxed (mod_f))
4617 emit_label (XEXP (label2, 0));
4619 alpha_post_atomic_barrier (mod_s);
4621 if (is_mm_relaxed (mod_f))
4622 emit_label (XEXP (label2, 0));
4625 /* Expand an atomic exchange operation. */
4627 void
4628 alpha_split_atomic_exchange (rtx operands[])
4630 rtx retval, mem, val, scratch;
4631 enum memmodel model;
4632 machine_mode mode;
4633 rtx label, x, cond;
4635 retval = operands[0];
4636 mem = operands[1];
4637 val = operands[2];
4638 model = (enum memmodel) INTVAL (operands[3]);
4639 scratch = operands[4];
4640 mode = GET_MODE (mem);
4641 cond = gen_lowpart (DImode, scratch);
4643 alpha_pre_atomic_barrier (model);
4645 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4646 emit_label (XEXP (label, 0));
4648 emit_insn (gen_load_locked (mode, retval, mem));
4649 emit_move_insn (scratch, val);
4650 emit_insn (gen_store_conditional (mode, cond, mem, scratch));
4652 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4653 emit_unlikely_jump (x, label);
4655 alpha_post_atomic_barrier (model);
4658 void
4659 alpha_expand_atomic_exchange_12 (rtx operands[])
4661 rtx dst, mem, val, model;
4662 machine_mode mode;
4663 rtx addr, align, wdst;
4665 dst = operands[0];
4666 mem = operands[1];
4667 val = operands[2];
4668 model = operands[3];
4669 mode = GET_MODE (mem);
4671 /* We forced the address into a register via mem_noofs_operand. */
4672 addr = XEXP (mem, 0);
4673 gcc_assert (register_operand (addr, DImode));
4675 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4676 NULL_RTX, 1, OPTAB_DIRECT);
4678 /* Insert val into the correct byte location within the word. */
4679 if (val != const0_rtx)
4680 val = emit_insxl (mode, val, addr);
4682 wdst = gen_reg_rtx (DImode);
4683 emit_insn (gen_atomic_exchange_1 (mode, wdst, mem, val, align, model));
4685 emit_move_insn (dst, gen_lowpart (mode, wdst));
4688 void
4689 alpha_split_atomic_exchange_12 (rtx operands[])
4691 rtx dest, orig_mem, addr, val, align, scratch;
4692 rtx label, mem, width, mask, x;
4693 machine_mode mode;
4694 enum memmodel model;
4696 dest = operands[0];
4697 orig_mem = operands[1];
4698 val = operands[2];
4699 align = operands[3];
4700 model = (enum memmodel) INTVAL (operands[4]);
4701 scratch = operands[5];
4702 mode = GET_MODE (orig_mem);
4703 addr = XEXP (orig_mem, 0);
4705 mem = gen_rtx_MEM (DImode, align);
4706 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4707 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4708 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4710 alpha_pre_atomic_barrier (model);
4712 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4713 emit_label (XEXP (label, 0));
4715 emit_insn (gen_load_locked (DImode, scratch, mem));
4717 width = GEN_INT (GET_MODE_BITSIZE (mode));
4718 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4719 emit_insn (gen_extxl (dest, scratch, width, addr));
4720 emit_insn (gen_mskxl (scratch, scratch, mask, addr));
4721 if (val != const0_rtx)
4722 emit_insn (gen_iordi3 (scratch, scratch, val));
4724 emit_insn (gen_store_conditional (DImode, scratch, mem, scratch));
4726 x = gen_rtx_EQ (DImode, scratch, const0_rtx);
4727 emit_unlikely_jump (x, label);
4729 alpha_post_atomic_barrier (model);
4732 /* Adjust the cost of a scheduling dependency. Return the new cost of
4733 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4735 static int
4736 alpha_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4737 unsigned int)
4739 enum attr_type dep_insn_type;
4741 /* If the dependence is an anti-dependence, there is no cost. For an
4742 output dependence, there is sometimes a cost, but it doesn't seem
4743 worth handling those few cases. */
4744 if (dep_type != 0)
4745 return cost;
4747 /* If we can't recognize the insns, we can't really do anything. */
4748 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
4749 return cost;
4751 dep_insn_type = get_attr_type (dep_insn);
4753 /* Bring in the user-defined memory latency. */
4754 if (dep_insn_type == TYPE_ILD
4755 || dep_insn_type == TYPE_FLD
4756 || dep_insn_type == TYPE_LDSYM)
4757 cost += alpha_memory_latency-1;
4759 /* Everything else handled in DFA bypasses now. */
4761 return cost;
4764 /* The number of instructions that can be issued per cycle. */
4766 static int
4767 alpha_issue_rate (void)
4769 return (alpha_tune == PROCESSOR_EV4 ? 2 : 4);
4772 /* How many alternative schedules to try. This should be as wide as the
4773 scheduling freedom in the DFA, but no wider. Making this value too
4774 large results extra work for the scheduler.
4776 For EV4, loads can be issued to either IB0 or IB1, thus we have 2
4777 alternative schedules. For EV5, we can choose between E0/E1 and
4778 FA/FM. For EV6, an arithmetic insn can be issued to U0/U1/L0/L1. */
4780 static int
4781 alpha_multipass_dfa_lookahead (void)
4783 return (alpha_tune == PROCESSOR_EV6 ? 4 : 2);
4786 /* Machine-specific function data. */
4788 struct GTY(()) alpha_links;
4790 struct GTY(()) machine_function
4792 unsigned HOST_WIDE_INT sa_mask;
4793 HOST_WIDE_INT sa_size;
4794 HOST_WIDE_INT frame_size;
4796 /* For flag_reorder_blocks_and_partition. */
4797 rtx gp_save_rtx;
4799 /* For VMS condition handlers. */
4800 bool uses_condition_handler;
4802 /* Linkage entries. */
4803 hash_map<nofree_string_hash, alpha_links *> *links;
4806 /* How to allocate a 'struct machine_function'. */
4808 static struct machine_function *
4809 alpha_init_machine_status (void)
4811 return ggc_cleared_alloc<machine_function> ();
4814 /* Support for frame based VMS condition handlers. */
4816 /* A VMS condition handler may be established for a function with a call to
4817 __builtin_establish_vms_condition_handler, and cancelled with a call to
4818 __builtin_revert_vms_condition_handler.
4820 The VMS Condition Handling Facility knows about the existence of a handler
4821 from the procedure descriptor .handler field. As the VMS native compilers,
4822 we store the user specified handler's address at a fixed location in the
4823 stack frame and point the procedure descriptor at a common wrapper which
4824 fetches the real handler's address and issues an indirect call.
4826 The indirection wrapper is "__gcc_shell_handler", provided by libgcc.
4828 We force the procedure kind to PT_STACK, and the fixed frame location is
4829 fp+8, just before the register save area. We use the handler_data field in
4830 the procedure descriptor to state the fp offset at which the installed
4831 handler address can be found. */
4833 #define VMS_COND_HANDLER_FP_OFFSET 8
4835 /* Expand code to store the currently installed user VMS condition handler
4836 into TARGET and install HANDLER as the new condition handler. */
4838 void
4839 alpha_expand_builtin_establish_vms_condition_handler (rtx target, rtx handler)
4841 rtx handler_slot_address = plus_constant (Pmode, hard_frame_pointer_rtx,
4842 VMS_COND_HANDLER_FP_OFFSET);
4844 rtx handler_slot
4845 = gen_rtx_MEM (DImode, handler_slot_address);
4847 emit_move_insn (target, handler_slot);
4848 emit_move_insn (handler_slot, handler);
4850 /* Notify the start/prologue/epilogue emitters that the condition handler
4851 slot is needed. In addition to reserving the slot space, this will force
4852 the procedure kind to PT_STACK so ensure that the hard_frame_pointer_rtx
4853 use above is correct. */
4854 cfun->machine->uses_condition_handler = true;
4857 /* Expand code to store the current VMS condition handler into TARGET and
4858 nullify it. */
4860 void
4861 alpha_expand_builtin_revert_vms_condition_handler (rtx target)
4863 /* We implement this by establishing a null condition handler, with the tiny
4864 side effect of setting uses_condition_handler. This is a little bit
4865 pessimistic if no actual builtin_establish call is ever issued, which is
4866 not a real problem and expected never to happen anyway. */
4868 alpha_expand_builtin_establish_vms_condition_handler (target, const0_rtx);
4871 /* Functions to save and restore alpha_return_addr_rtx. */
4873 /* Start the ball rolling with RETURN_ADDR_RTX. */
4876 alpha_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4878 if (count != 0)
4879 return const0_rtx;
4881 return get_hard_reg_initial_val (Pmode, REG_RA);
4884 /* Return or create a memory slot containing the gp value for the current
4885 function. Needed only if TARGET_LD_BUGGY_LDGP. */
4888 alpha_gp_save_rtx (void)
4890 rtx_insn *seq;
4891 rtx m = cfun->machine->gp_save_rtx;
4893 if (m == NULL)
4895 start_sequence ();
4897 m = assign_stack_local (DImode, UNITS_PER_WORD, BITS_PER_WORD);
4898 m = validize_mem (m);
4899 emit_move_insn (m, pic_offset_table_rtx);
4901 seq = get_insns ();
4902 end_sequence ();
4904 /* We used to simply emit the sequence after entry_of_function.
4905 However this breaks the CFG if the first instruction in the
4906 first block is not the NOTE_INSN_BASIC_BLOCK, for example a
4907 label. Emit the sequence properly on the edge. We are only
4908 invoked from dw2_build_landing_pads and finish_eh_generation
4909 will call commit_edge_insertions thanks to a kludge. */
4910 insert_insn_on_edge (seq,
4911 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
4913 cfun->machine->gp_save_rtx = m;
4916 return m;
4919 static void
4920 alpha_instantiate_decls (void)
4922 if (cfun->machine->gp_save_rtx != NULL_RTX)
4923 instantiate_decl_rtl (cfun->machine->gp_save_rtx);
4926 static int
4927 alpha_ra_ever_killed (void)
4929 rtx_insn *top;
4931 if (!has_hard_reg_initial_val (Pmode, REG_RA))
4932 return (int)df_regs_ever_live_p (REG_RA);
4934 push_topmost_sequence ();
4935 top = get_insns ();
4936 pop_topmost_sequence ();
4938 return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL);
4942 /* Return the trap mode suffix applicable to the current
4943 instruction, or NULL. */
4945 static const char *
4946 get_trap_mode_suffix (void)
4948 enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn);
4950 switch (s)
4952 case TRAP_SUFFIX_NONE:
4953 return NULL;
4955 case TRAP_SUFFIX_SU:
4956 if (alpha_fptm >= ALPHA_FPTM_SU)
4957 return "su";
4958 return NULL;
4960 case TRAP_SUFFIX_SUI:
4961 if (alpha_fptm >= ALPHA_FPTM_SUI)
4962 return "sui";
4963 return NULL;
4965 case TRAP_SUFFIX_V_SV:
4966 switch (alpha_fptm)
4968 case ALPHA_FPTM_N:
4969 return NULL;
4970 case ALPHA_FPTM_U:
4971 return "v";
4972 case ALPHA_FPTM_SU:
4973 case ALPHA_FPTM_SUI:
4974 return "sv";
4975 default:
4976 gcc_unreachable ();
4979 case TRAP_SUFFIX_V_SV_SVI:
4980 switch (alpha_fptm)
4982 case ALPHA_FPTM_N:
4983 return NULL;
4984 case ALPHA_FPTM_U:
4985 return "v";
4986 case ALPHA_FPTM_SU:
4987 return "sv";
4988 case ALPHA_FPTM_SUI:
4989 return "svi";
4990 default:
4991 gcc_unreachable ();
4993 break;
4995 case TRAP_SUFFIX_U_SU_SUI:
4996 switch (alpha_fptm)
4998 case ALPHA_FPTM_N:
4999 return NULL;
5000 case ALPHA_FPTM_U:
5001 return "u";
5002 case ALPHA_FPTM_SU:
5003 return "su";
5004 case ALPHA_FPTM_SUI:
5005 return "sui";
5006 default:
5007 gcc_unreachable ();
5009 break;
5011 default:
5012 gcc_unreachable ();
5014 gcc_unreachable ();
5017 /* Return the rounding mode suffix applicable to the current
5018 instruction, or NULL. */
5020 static const char *
5021 get_round_mode_suffix (void)
5023 enum attr_round_suffix s = get_attr_round_suffix (current_output_insn);
5025 switch (s)
5027 case ROUND_SUFFIX_NONE:
5028 return NULL;
5029 case ROUND_SUFFIX_NORMAL:
5030 switch (alpha_fprm)
5032 case ALPHA_FPRM_NORM:
5033 return NULL;
5034 case ALPHA_FPRM_MINF:
5035 return "m";
5036 case ALPHA_FPRM_CHOP:
5037 return "c";
5038 case ALPHA_FPRM_DYN:
5039 return "d";
5040 default:
5041 gcc_unreachable ();
5043 break;
5045 case ROUND_SUFFIX_C:
5046 return "c";
5048 default:
5049 gcc_unreachable ();
5051 gcc_unreachable ();
5054 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5056 static bool
5057 alpha_print_operand_punct_valid_p (unsigned char code)
5059 return (code == '/' || code == ',' || code == '-' || code == '~'
5060 || code == '#' || code == '*' || code == '&');
5063 /* Implement TARGET_PRINT_OPERAND. The alpha-specific
5064 operand codes are documented below. */
5066 static void
5067 alpha_print_operand (FILE *file, rtx x, int code)
5069 int i;
5071 switch (code)
5073 case '~':
5074 /* Print the assembler name of the current function. */
5075 assemble_name (file, alpha_fnname);
5076 break;
5078 case '&':
5079 if (const char *name = get_some_local_dynamic_name ())
5080 assemble_name (file, name);
5081 else
5082 output_operand_lossage ("'%%&' used without any "
5083 "local dynamic TLS references");
5084 break;
5086 case '/':
5087 /* Generates the instruction suffix. The TRAP_SUFFIX and ROUND_SUFFIX
5088 attributes are examined to determine what is appropriate. */
5090 const char *trap = get_trap_mode_suffix ();
5091 const char *round = get_round_mode_suffix ();
5093 if (trap || round)
5094 fprintf (file, "/%s%s", (trap ? trap : ""), (round ? round : ""));
5095 break;
5098 case ',':
5099 /* Generates single precision suffix for floating point
5100 instructions (s for IEEE, f for VAX). */
5101 fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file);
5102 break;
5104 case '-':
5105 /* Generates double precision suffix for floating point
5106 instructions (t for IEEE, g for VAX). */
5107 fputc ((TARGET_FLOAT_VAX ? 'g' : 't'), file);
5108 break;
5110 case '#':
5111 if (alpha_this_literal_sequence_number == 0)
5112 alpha_this_literal_sequence_number = alpha_next_sequence_number++;
5113 fprintf (file, "%d", alpha_this_literal_sequence_number);
5114 break;
5116 case '*':
5117 if (alpha_this_gpdisp_sequence_number == 0)
5118 alpha_this_gpdisp_sequence_number = alpha_next_sequence_number++;
5119 fprintf (file, "%d", alpha_this_gpdisp_sequence_number);
5120 break;
5122 case 'J':
5124 const char *lituse;
5126 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL)
5128 x = XVECEXP (x, 0, 0);
5129 lituse = "lituse_tlsgd";
5131 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL)
5133 x = XVECEXP (x, 0, 0);
5134 lituse = "lituse_tlsldm";
5136 else if (CONST_INT_P (x))
5137 lituse = "lituse_jsr";
5138 else
5140 output_operand_lossage ("invalid %%J value");
5141 break;
5144 if (x != const0_rtx)
5145 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5147 break;
5149 case 'j':
5151 const char *lituse;
5153 #ifdef HAVE_AS_JSRDIRECT_RELOCS
5154 lituse = "lituse_jsrdirect";
5155 #else
5156 lituse = "lituse_jsr";
5157 #endif
5159 gcc_assert (INTVAL (x) != 0);
5160 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5162 break;
5163 case 'r':
5164 /* If this operand is the constant zero, write it as "$31". */
5165 if (REG_P (x))
5166 fprintf (file, "%s", reg_names[REGNO (x)]);
5167 else if (x == CONST0_RTX (GET_MODE (x)))
5168 fprintf (file, "$31");
5169 else
5170 output_operand_lossage ("invalid %%r value");
5171 break;
5173 case 'R':
5174 /* Similar, but for floating-point. */
5175 if (REG_P (x))
5176 fprintf (file, "%s", reg_names[REGNO (x)]);
5177 else if (x == CONST0_RTX (GET_MODE (x)))
5178 fprintf (file, "$f31");
5179 else
5180 output_operand_lossage ("invalid %%R value");
5181 break;
5183 case 'N':
5184 /* Write the 1's complement of a constant. */
5185 if (!CONST_INT_P (x))
5186 output_operand_lossage ("invalid %%N value");
5188 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
5189 break;
5191 case 'P':
5192 /* Write 1 << C, for a constant C. */
5193 if (!CONST_INT_P (x))
5194 output_operand_lossage ("invalid %%P value");
5196 fprintf (file, HOST_WIDE_INT_PRINT_DEC, HOST_WIDE_INT_1 << INTVAL (x));
5197 break;
5199 case 'h':
5200 /* Write the high-order 16 bits of a constant, sign-extended. */
5201 if (!CONST_INT_P (x))
5202 output_operand_lossage ("invalid %%h value");
5204 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16);
5205 break;
5207 case 'L':
5208 /* Write the low-order 16 bits of a constant, sign-extended. */
5209 if (!CONST_INT_P (x))
5210 output_operand_lossage ("invalid %%L value");
5212 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5213 (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000));
5214 break;
5216 case 'm':
5217 /* Write mask for ZAP insn. */
5218 if (CONST_INT_P (x))
5220 HOST_WIDE_INT mask = 0, value = INTVAL (x);
5222 for (i = 0; i < 8; i++, value >>= 8)
5223 if (value & 0xff)
5224 mask |= (1 << i);
5226 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask);
5228 else
5229 output_operand_lossage ("invalid %%m value");
5230 break;
5232 case 'M':
5233 /* 'b', 'w', 'l', or 'q' as the value of the constant. */
5234 if (!mode_width_operand (x, VOIDmode))
5235 output_operand_lossage ("invalid %%M value");
5237 fprintf (file, "%s",
5238 (INTVAL (x) == 8 ? "b"
5239 : INTVAL (x) == 16 ? "w"
5240 : INTVAL (x) == 32 ? "l"
5241 : "q"));
5242 break;
5244 case 'U':
5245 /* Similar, except do it from the mask. */
5246 if (CONST_INT_P (x))
5248 HOST_WIDE_INT value = INTVAL (x);
5250 if (value == 0xff)
5252 fputc ('b', file);
5253 break;
5255 if (value == 0xffff)
5257 fputc ('w', file);
5258 break;
5260 if (value == 0xffffffff)
5262 fputc ('l', file);
5263 break;
5265 if (value == -1)
5267 fputc ('q', file);
5268 break;
5272 output_operand_lossage ("invalid %%U value");
5273 break;
5275 case 's':
5276 /* Write the constant value divided by 8. */
5277 if (!CONST_INT_P (x)
5278 || (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5279 || (INTVAL (x) & 7) != 0)
5280 output_operand_lossage ("invalid %%s value");
5282 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8);
5283 break;
5285 case 'C': case 'D': case 'c': case 'd':
5286 /* Write out comparison name. */
5288 enum rtx_code c = GET_CODE (x);
5290 if (!COMPARISON_P (x))
5291 output_operand_lossage ("invalid %%C value");
5293 else if (code == 'D')
5294 c = reverse_condition (c);
5295 else if (code == 'c')
5296 c = swap_condition (c);
5297 else if (code == 'd')
5298 c = swap_condition (reverse_condition (c));
5300 if (c == LEU)
5301 fprintf (file, "ule");
5302 else if (c == LTU)
5303 fprintf (file, "ult");
5304 else if (c == UNORDERED)
5305 fprintf (file, "un");
5306 else
5307 fprintf (file, "%s", GET_RTX_NAME (c));
5309 break;
5311 case 'E':
5312 /* Write the divide or modulus operator. */
5313 switch (GET_CODE (x))
5315 case DIV:
5316 fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q");
5317 break;
5318 case UDIV:
5319 fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q");
5320 break;
5321 case MOD:
5322 fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q");
5323 break;
5324 case UMOD:
5325 fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q");
5326 break;
5327 default:
5328 output_operand_lossage ("invalid %%E value");
5329 break;
5331 break;
5333 case 'A':
5334 /* Write "_u" for unaligned access. */
5335 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
5336 fprintf (file, "_u");
5337 break;
5339 case 0:
5340 if (REG_P (x))
5341 fprintf (file, "%s", reg_names[REGNO (x)]);
5342 else if (MEM_P (x))
5343 output_address (GET_MODE (x), XEXP (x, 0));
5344 else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
5346 switch (XINT (XEXP (x, 0), 1))
5348 case UNSPEC_DTPREL:
5349 case UNSPEC_TPREL:
5350 output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0));
5351 break;
5352 default:
5353 output_operand_lossage ("unknown relocation unspec");
5354 break;
5357 else
5358 output_addr_const (file, x);
5359 break;
5361 default:
5362 output_operand_lossage ("invalid %%xn code");
5366 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
5368 static void
5369 alpha_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
5371 int basereg = 31;
5372 HOST_WIDE_INT offset = 0;
5374 if (GET_CODE (addr) == AND)
5375 addr = XEXP (addr, 0);
5377 if (GET_CODE (addr) == PLUS
5378 && CONST_INT_P (XEXP (addr, 1)))
5380 offset = INTVAL (XEXP (addr, 1));
5381 addr = XEXP (addr, 0);
5384 if (GET_CODE (addr) == LO_SUM)
5386 const char *reloc16, *reloclo;
5387 rtx op1 = XEXP (addr, 1);
5389 if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC)
5391 op1 = XEXP (op1, 0);
5392 switch (XINT (op1, 1))
5394 case UNSPEC_DTPREL:
5395 reloc16 = NULL;
5396 reloclo = (alpha_tls_size == 16 ? "dtprel" : "dtprello");
5397 break;
5398 case UNSPEC_TPREL:
5399 reloc16 = NULL;
5400 reloclo = (alpha_tls_size == 16 ? "tprel" : "tprello");
5401 break;
5402 default:
5403 output_operand_lossage ("unknown relocation unspec");
5404 return;
5407 output_addr_const (file, XVECEXP (op1, 0, 0));
5409 else
5411 reloc16 = "gprel";
5412 reloclo = "gprellow";
5413 output_addr_const (file, op1);
5416 if (offset)
5417 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
5419 addr = XEXP (addr, 0);
5420 switch (GET_CODE (addr))
5422 case REG:
5423 basereg = REGNO (addr);
5424 break;
5426 case SUBREG:
5427 basereg = subreg_regno (addr);
5428 break;
5430 default:
5431 gcc_unreachable ();
5434 fprintf (file, "($%d)\t\t!%s", basereg,
5435 (basereg == 29 ? reloc16 : reloclo));
5436 return;
5439 switch (GET_CODE (addr))
5441 case REG:
5442 basereg = REGNO (addr);
5443 break;
5445 case SUBREG:
5446 basereg = subreg_regno (addr);
5447 break;
5449 case CONST_INT:
5450 offset = INTVAL (addr);
5451 break;
5453 case SYMBOL_REF:
5454 gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands);
5455 fprintf (file, "%s", XSTR (addr, 0));
5456 return;
5458 case CONST:
5459 gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands);
5460 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS
5461 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF);
5462 fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC,
5463 XSTR (XEXP (XEXP (addr, 0), 0), 0),
5464 INTVAL (XEXP (XEXP (addr, 0), 1)));
5465 return;
5467 default:
5468 output_operand_lossage ("invalid operand address");
5469 return;
5472 fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg);
5475 /* Emit RTL insns to initialize the variable parts of a trampoline at
5476 M_TRAMP. FNDECL is target function's decl. CHAIN_VALUE is an rtx
5477 for the static chain value for the function. */
5479 static void
5480 alpha_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5482 rtx fnaddr, mem, word1, word2;
5484 fnaddr = XEXP (DECL_RTL (fndecl), 0);
5486 #ifdef POINTERS_EXTEND_UNSIGNED
5487 fnaddr = convert_memory_address (Pmode, fnaddr);
5488 chain_value = convert_memory_address (Pmode, chain_value);
5489 #endif
5491 if (TARGET_ABI_OPEN_VMS)
5493 const char *fnname;
5494 char *trname;
5496 /* Construct the name of the trampoline entry point. */
5497 fnname = XSTR (fnaddr, 0);
5498 trname = (char *) alloca (strlen (fnname) + 5);
5499 strcpy (trname, fnname);
5500 strcat (trname, "..tr");
5501 fnname = ggc_alloc_string (trname, strlen (trname) + 1);
5502 word2 = gen_rtx_SYMBOL_REF (Pmode, fnname);
5504 /* Trampoline (or "bounded") procedure descriptor is constructed from
5505 the function's procedure descriptor with certain fields zeroed IAW
5506 the VMS calling standard. This is stored in the first quadword. */
5507 word1 = force_reg (DImode, gen_const_mem (DImode, fnaddr));
5508 word1 = expand_and (DImode, word1,
5509 GEN_INT (HOST_WIDE_INT_C (0xffff0fff0000fff0)),
5510 NULL);
5512 else
5514 /* These 4 instructions are:
5515 ldq $1,24($27)
5516 ldq $27,16($27)
5517 jmp $31,($27),0
5519 We don't bother setting the HINT field of the jump; the nop
5520 is merely there for padding. */
5521 word1 = GEN_INT (HOST_WIDE_INT_C (0xa77b0010a43b0018));
5522 word2 = GEN_INT (HOST_WIDE_INT_C (0x47ff041f6bfb0000));
5525 /* Store the first two words, as computed above. */
5526 mem = adjust_address (m_tramp, DImode, 0);
5527 emit_move_insn (mem, word1);
5528 mem = adjust_address (m_tramp, DImode, 8);
5529 emit_move_insn (mem, word2);
5531 /* Store function address and static chain value. */
5532 mem = adjust_address (m_tramp, Pmode, 16);
5533 emit_move_insn (mem, fnaddr);
5534 mem = adjust_address (m_tramp, Pmode, 24);
5535 emit_move_insn (mem, chain_value);
5537 if (TARGET_ABI_OSF)
5539 emit_insn (gen_imb ());
5540 #ifdef HAVE_ENABLE_EXECUTE_STACK
5541 emit_library_call (init_one_libfunc ("__enable_execute_stack"),
5542 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
5543 #endif
5547 /* Determine where to put an argument to a function.
5548 Value is zero to push the argument on the stack,
5549 or a hard register in which to store the argument.
5551 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5552 the preceding args and about the function being called.
5553 ARG is a description of the argument.
5555 On Alpha the first 6 words of args are normally in registers
5556 and the rest are pushed. */
5558 static rtx
5559 alpha_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
5561 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5562 int basereg;
5563 int num_args;
5565 /* Don't get confused and pass small structures in FP registers. */
5566 if (arg.aggregate_type_p ())
5567 basereg = 16;
5568 else
5570 /* With alpha_split_complex_arg, we shouldn't see any raw complex
5571 values here. */
5572 gcc_checking_assert (!COMPLEX_MODE_P (arg.mode));
5574 /* Set up defaults for FP operands passed in FP registers, and
5575 integral operands passed in integer registers. */
5576 if (TARGET_FPREGS && GET_MODE_CLASS (arg.mode) == MODE_FLOAT)
5577 basereg = 32 + 16;
5578 else
5579 basereg = 16;
5582 /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for
5583 the two platforms, so we can't avoid conditional compilation. */
5584 #if TARGET_ABI_OPEN_VMS
5586 if (arg.end_marker_p ())
5587 return alpha_arg_info_reg_val (*cum);
5589 num_args = cum->num_args;
5590 if (num_args >= 6
5591 || targetm.calls.must_pass_in_stack (arg))
5592 return NULL_RTX;
5594 #elif TARGET_ABI_OSF
5596 if (*cum >= 6)
5597 return NULL_RTX;
5598 num_args = *cum;
5600 if (arg.end_marker_p ())
5601 basereg = 16;
5602 else if (targetm.calls.must_pass_in_stack (arg))
5603 return NULL_RTX;
5605 #else
5606 #error Unhandled ABI
5607 #endif
5609 return gen_rtx_REG (arg.mode, num_args + basereg);
5612 /* Update the data in CUM to advance over argument ARG. */
5614 static void
5615 alpha_function_arg_advance (cumulative_args_t cum_v,
5616 const function_arg_info &arg)
5618 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5619 bool onstack = targetm.calls.must_pass_in_stack (arg);
5620 int increment = onstack ? 6 : ALPHA_ARG_SIZE (arg.mode, arg.type);
5622 #if TARGET_ABI_OSF
5623 *cum += increment;
5624 #else
5625 if (!onstack && cum->num_args < 6)
5626 cum->atypes[cum->num_args] = alpha_arg_type (arg.mode);
5627 cum->num_args += increment;
5628 #endif
5631 static int
5632 alpha_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
5634 int words = 0;
5635 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED = get_cumulative_args (cum_v);
5637 #if TARGET_ABI_OPEN_VMS
5638 if (cum->num_args < 6
5639 && 6 < cum->num_args + ALPHA_ARG_SIZE (arg.mode, arg.type))
5640 words = 6 - cum->num_args;
5641 #elif TARGET_ABI_OSF
5642 if (*cum < 6 && 6 < *cum + ALPHA_ARG_SIZE (arg.mode, arg.type))
5643 words = 6 - *cum;
5644 #else
5645 #error Unhandled ABI
5646 #endif
5648 return words * UNITS_PER_WORD;
5652 /* Return true if TYPE must be returned in memory, instead of in registers. */
5654 static bool
5655 alpha_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
5657 machine_mode mode = VOIDmode;
5658 int size;
5660 if (type)
5662 mode = TYPE_MODE (type);
5664 /* All aggregates are returned in memory, except on OpenVMS where
5665 records that fit 64 bits should be returned by immediate value
5666 as required by section 3.8.7.1 of the OpenVMS Calling Standard. */
5667 if (TARGET_ABI_OPEN_VMS
5668 && TREE_CODE (type) != ARRAY_TYPE
5669 && (unsigned HOST_WIDE_INT) int_size_in_bytes(type) <= 8)
5670 return false;
5672 if (AGGREGATE_TYPE_P (type))
5673 return true;
5676 size = GET_MODE_SIZE (mode);
5677 switch (GET_MODE_CLASS (mode))
5679 case MODE_VECTOR_FLOAT:
5680 /* Pass all float vectors in memory, like an aggregate. */
5681 return true;
5683 case MODE_COMPLEX_FLOAT:
5684 /* We judge complex floats on the size of their element,
5685 not the size of the whole type. */
5686 size = GET_MODE_UNIT_SIZE (mode);
5687 break;
5689 case MODE_INT:
5690 case MODE_FLOAT:
5691 case MODE_COMPLEX_INT:
5692 case MODE_VECTOR_INT:
5693 break;
5695 default:
5696 /* ??? We get called on all sorts of random stuff from
5697 aggregate_value_p. We must return something, but it's not
5698 clear what's safe to return. Pretend it's a struct I
5699 guess. */
5700 return true;
5703 /* Otherwise types must fit in one register. */
5704 return size > UNITS_PER_WORD;
5707 /* Return true if ARG should be passed by invisible reference. */
5709 static bool
5710 alpha_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
5712 /* Pass float and _Complex float variable arguments by reference.
5713 This avoids 64-bit store from a FP register to a pretend args save area
5714 and subsequent 32-bit load from the saved location to a FP register.
5716 Note that 32-bit loads and stores to/from a FP register on alpha reorder
5717 bits to form a canonical 64-bit value in the FP register. This fact
5718 invalidates compiler assumption that 32-bit FP value lives in the lower
5719 32-bits of the passed 64-bit FP value, so loading the 32-bit value from
5720 the stored 64-bit location using 32-bit FP load is invalid on alpha.
5722 This introduces sort of ABI incompatibility, but until _Float32 was
5723 introduced, C-family languages promoted 32-bit float variable arg to
5724 a 64-bit double, and it was not allowed to pass float as a varible
5725 argument. Passing _Complex float as a variable argument never
5726 worked on alpha. Thus, we have no backward compatibility issues
5727 to worry about, and passing unpromoted _Float32 and _Complex float
5728 as a variable argument will actually work in the future. */
5730 if (arg.mode == SFmode || arg.mode == SCmode)
5731 return !arg.named;
5733 return arg.mode == TFmode || arg.mode == TCmode;
5736 /* Define how to find the value returned by a function. VALTYPE is the
5737 data type of the value (as a tree). If the precise function being
5738 called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0.
5739 MODE is set instead of VALTYPE for libcalls.
5741 On Alpha the value is found in $0 for integer functions and
5742 $f0 for floating-point functions. */
5744 static rtx
5745 alpha_function_value_1 (const_tree valtype, const_tree func ATTRIBUTE_UNUSED,
5746 machine_mode mode)
5748 unsigned int regnum, dummy ATTRIBUTE_UNUSED;
5749 enum mode_class mclass;
5751 gcc_assert (!valtype || !alpha_return_in_memory (valtype, func));
5753 if (valtype)
5754 mode = TYPE_MODE (valtype);
5756 mclass = GET_MODE_CLASS (mode);
5757 switch (mclass)
5759 case MODE_INT:
5760 /* Do the same thing as PROMOTE_MODE except for libcalls on VMS,
5761 where we have them returning both SImode and DImode. */
5762 if (!(TARGET_ABI_OPEN_VMS && valtype && AGGREGATE_TYPE_P (valtype)))
5763 PROMOTE_MODE (mode, dummy, valtype);
5764 /* FALLTHRU */
5766 case MODE_COMPLEX_INT:
5767 case MODE_VECTOR_INT:
5768 regnum = 0;
5769 break;
5771 case MODE_FLOAT:
5772 regnum = 32;
5773 break;
5775 case MODE_COMPLEX_FLOAT:
5777 machine_mode cmode = GET_MODE_INNER (mode);
5779 return gen_rtx_PARALLEL
5780 (VOIDmode,
5781 gen_rtvec (2,
5782 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32),
5783 const0_rtx),
5784 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33),
5785 GEN_INT (GET_MODE_SIZE (cmode)))));
5788 case MODE_RANDOM:
5789 /* We should only reach here for BLKmode on VMS. */
5790 gcc_assert (TARGET_ABI_OPEN_VMS && mode == BLKmode);
5791 regnum = 0;
5792 break;
5794 default:
5795 gcc_unreachable ();
5798 return gen_rtx_REG (mode, regnum);
5801 /* Implement TARGET_FUNCTION_VALUE. */
5803 static rtx
5804 alpha_function_value (const_tree valtype, const_tree fn_decl_or_type,
5805 bool /*outgoing*/)
5807 return alpha_function_value_1 (valtype, fn_decl_or_type, VOIDmode);
5810 /* Implement TARGET_LIBCALL_VALUE. */
5812 static rtx
5813 alpha_libcall_value (machine_mode mode, const_rtx /*fun*/)
5815 return alpha_function_value_1 (NULL_TREE, NULL_TREE, mode);
5818 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.
5820 On the Alpha, $0 $1 and $f0 $f1 are the only register thus used. */
5822 static bool
5823 alpha_function_value_regno_p (const unsigned int regno)
5825 return (regno == 0 || regno == 1 || regno == 32 || regno == 33);
5828 /* TCmode complex values are passed by invisible reference. We
5829 should not split these values. */
5831 static bool
5832 alpha_split_complex_arg (const_tree type)
5834 return TYPE_MODE (type) != TCmode;
5837 static tree
5838 alpha_build_builtin_va_list (void)
5840 tree base, ofs, space, record, type_decl;
5842 if (TARGET_ABI_OPEN_VMS)
5843 return ptr_type_node;
5845 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5846 type_decl = build_decl (BUILTINS_LOCATION,
5847 TYPE_DECL, get_identifier ("__va_list_tag"), record);
5848 TYPE_STUB_DECL (record) = type_decl;
5849 TYPE_NAME (record) = type_decl;
5851 /* C++? SET_IS_AGGR_TYPE (record, 1); */
5853 /* Dummy field to prevent alignment warnings. */
5854 space = build_decl (BUILTINS_LOCATION,
5855 FIELD_DECL, NULL_TREE, integer_type_node);
5856 DECL_FIELD_CONTEXT (space) = record;
5857 DECL_ARTIFICIAL (space) = 1;
5858 DECL_IGNORED_P (space) = 1;
5860 ofs = build_decl (BUILTINS_LOCATION,
5861 FIELD_DECL, get_identifier ("__offset"),
5862 integer_type_node);
5863 DECL_FIELD_CONTEXT (ofs) = record;
5864 DECL_CHAIN (ofs) = space;
5866 base = build_decl (BUILTINS_LOCATION,
5867 FIELD_DECL, get_identifier ("__base"),
5868 ptr_type_node);
5869 DECL_FIELD_CONTEXT (base) = record;
5870 DECL_CHAIN (base) = ofs;
5872 TYPE_FIELDS (record) = base;
5873 layout_type (record);
5875 va_list_gpr_counter_field = ofs;
5876 return record;
5879 #if TARGET_ABI_OSF
5880 /* Helper function for alpha_stdarg_optimize_hook. Skip over casts
5881 and constant additions. */
5883 static gimple *
5884 va_list_skip_additions (tree lhs)
5886 gimple *stmt;
5888 for (;;)
5890 enum tree_code code;
5892 stmt = SSA_NAME_DEF_STMT (lhs);
5894 if (gimple_code (stmt) == GIMPLE_PHI)
5895 return stmt;
5897 if (!is_gimple_assign (stmt)
5898 || gimple_assign_lhs (stmt) != lhs)
5899 return NULL;
5901 if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME)
5902 return stmt;
5903 code = gimple_assign_rhs_code (stmt);
5904 if (!CONVERT_EXPR_CODE_P (code)
5905 && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR)
5906 || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
5907 || !tree_fits_uhwi_p (gimple_assign_rhs2 (stmt))))
5908 return stmt;
5910 lhs = gimple_assign_rhs1 (stmt);
5914 /* Check if LHS = RHS statement is
5915 LHS = *(ap.__base + ap.__offset + cst)
5917 LHS = *(ap.__base
5918 + ((ap.__offset + cst <= 47)
5919 ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2).
5920 If the former, indicate that GPR registers are needed,
5921 if the latter, indicate that FPR registers are needed.
5923 Also look for LHS = (*ptr).field, where ptr is one of the forms
5924 listed above.
5926 On alpha, cfun->va_list_gpr_size is used as size of the needed
5927 regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR
5928 registers are needed and bit 1 set if FPR registers are needed.
5929 Return true if va_list references should not be scanned for the
5930 current statement. */
5932 static bool
5933 alpha_stdarg_optimize_hook (struct stdarg_info *si, const gimple *stmt)
5935 tree base, offset, rhs;
5936 int offset_arg = 1;
5937 gimple *base_stmt;
5939 if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
5940 != GIMPLE_SINGLE_RHS)
5941 return false;
5943 rhs = gimple_assign_rhs1 (stmt);
5944 while (handled_component_p (rhs))
5945 rhs = TREE_OPERAND (rhs, 0);
5946 if (TREE_CODE (rhs) != MEM_REF
5947 || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
5948 return false;
5950 stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0));
5951 if (stmt == NULL
5952 || !is_gimple_assign (stmt)
5953 || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR)
5954 return false;
5956 base = gimple_assign_rhs1 (stmt);
5957 if (TREE_CODE (base) == SSA_NAME)
5959 base_stmt = va_list_skip_additions (base);
5960 if (base_stmt
5961 && is_gimple_assign (base_stmt)
5962 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
5963 base = gimple_assign_rhs1 (base_stmt);
5966 if (TREE_CODE (base) != COMPONENT_REF
5967 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
5969 base = gimple_assign_rhs2 (stmt);
5970 if (TREE_CODE (base) == SSA_NAME)
5972 base_stmt = va_list_skip_additions (base);
5973 if (base_stmt
5974 && is_gimple_assign (base_stmt)
5975 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
5976 base = gimple_assign_rhs1 (base_stmt);
5979 if (TREE_CODE (base) != COMPONENT_REF
5980 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
5981 return false;
5983 offset_arg = 0;
5986 base = get_base_address (base);
5987 if (TREE_CODE (base) != VAR_DECL
5988 || !bitmap_bit_p (si->va_list_vars, DECL_UID (base) + num_ssa_names))
5989 return false;
5991 offset = gimple_op (stmt, 1 + offset_arg);
5992 if (TREE_CODE (offset) == SSA_NAME)
5994 gimple *offset_stmt = va_list_skip_additions (offset);
5996 if (offset_stmt
5997 && gimple_code (offset_stmt) == GIMPLE_PHI)
5999 HOST_WIDE_INT sub;
6000 gimple *arg1_stmt, *arg2_stmt;
6001 tree arg1, arg2;
6002 enum tree_code code1, code2;
6004 if (gimple_phi_num_args (offset_stmt) != 2)
6005 goto escapes;
6007 arg1_stmt
6008 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0));
6009 arg2_stmt
6010 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1));
6011 if (arg1_stmt == NULL
6012 || !is_gimple_assign (arg1_stmt)
6013 || arg2_stmt == NULL
6014 || !is_gimple_assign (arg2_stmt))
6015 goto escapes;
6017 code1 = gimple_assign_rhs_code (arg1_stmt);
6018 code2 = gimple_assign_rhs_code (arg2_stmt);
6019 if (code1 == COMPONENT_REF
6020 && (code2 == MINUS_EXPR || code2 == PLUS_EXPR))
6021 /* Do nothing. */;
6022 else if (code2 == COMPONENT_REF
6023 && (code1 == MINUS_EXPR || code1 == PLUS_EXPR))
6025 std::swap (arg1_stmt, arg2_stmt);
6026 code2 = code1;
6028 else
6029 goto escapes;
6031 if (!tree_fits_shwi_p (gimple_assign_rhs2 (arg2_stmt)))
6032 goto escapes;
6034 sub = tree_to_shwi (gimple_assign_rhs2 (arg2_stmt));
6035 if (code2 == MINUS_EXPR)
6036 sub = -sub;
6037 if (sub < -48 || sub > -32)
6038 goto escapes;
6040 arg1 = gimple_assign_rhs1 (arg1_stmt);
6041 arg2 = gimple_assign_rhs1 (arg2_stmt);
6042 if (TREE_CODE (arg2) == SSA_NAME)
6044 arg2_stmt = va_list_skip_additions (arg2);
6045 if (arg2_stmt == NULL
6046 || !is_gimple_assign (arg2_stmt)
6047 || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF)
6048 goto escapes;
6049 arg2 = gimple_assign_rhs1 (arg2_stmt);
6051 if (arg1 != arg2)
6052 goto escapes;
6054 if (TREE_CODE (arg1) != COMPONENT_REF
6055 || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field
6056 || get_base_address (arg1) != base)
6057 goto escapes;
6059 /* Need floating point regs. */
6060 cfun->va_list_fpr_size |= 2;
6061 return false;
6063 if (offset_stmt
6064 && is_gimple_assign (offset_stmt)
6065 && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF)
6066 offset = gimple_assign_rhs1 (offset_stmt);
6068 if (TREE_CODE (offset) != COMPONENT_REF
6069 || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field
6070 || get_base_address (offset) != base)
6071 goto escapes;
6072 else
6073 /* Need general regs. */
6074 cfun->va_list_fpr_size |= 1;
6075 return false;
6077 escapes:
6078 si->va_list_escapes = true;
6079 return false;
6081 #endif
6083 /* Perform any needed actions needed for a function that is receiving a
6084 variable number of arguments. */
6086 static void
6087 alpha_setup_incoming_varargs (cumulative_args_t pcum,
6088 const function_arg_info &arg,
6089 int *pretend_size, int no_rtl)
6091 CUMULATIVE_ARGS cum = *get_cumulative_args (pcum);
6093 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)))
6094 /* Skip the current argument. */
6095 targetm.calls.function_arg_advance (pack_cumulative_args (&cum), arg);
6097 #if TARGET_ABI_OPEN_VMS
6098 /* For VMS, we allocate space for all 6 arg registers plus a count.
6100 However, if NO registers need to be saved, don't allocate any space.
6101 This is not only because we won't need the space, but because AP
6102 includes the current_pretend_args_size and we don't want to mess up
6103 any ap-relative addresses already made. */
6104 if (cum.num_args < 6)
6106 if (!no_rtl)
6108 emit_move_insn (gen_rtx_REG (DImode, 1), virtual_incoming_args_rtx);
6109 emit_insn (gen_arg_home ());
6111 *pretend_size = 7 * UNITS_PER_WORD;
6113 #else
6114 /* On OSF/1 and friends, we allocate space for all 12 arg registers, but
6115 only push those that are remaining. However, if NO registers need to
6116 be saved, don't allocate any space. This is not only because we won't
6117 need the space, but because AP includes the current_pretend_args_size
6118 and we don't want to mess up any ap-relative addresses already made.
6120 If we are not to use the floating-point registers, save the integer
6121 registers where we would put the floating-point registers. This is
6122 not the most efficient way to implement varargs with just one register
6123 class, but it isn't worth doing anything more efficient in this rare
6124 case. */
6125 if (cum >= 6)
6126 return;
6128 if (!no_rtl)
6130 int count;
6131 alias_set_type set = get_varargs_alias_set ();
6132 rtx tmp;
6134 count = cfun->va_list_gpr_size / UNITS_PER_WORD;
6135 if (count > 6 - cum)
6136 count = 6 - cum;
6138 /* Detect whether integer registers or floating-point registers
6139 are needed by the detected va_arg statements. See above for
6140 how these values are computed. Note that the "escape" value
6141 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of
6142 these bits set. */
6143 gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3);
6145 if (cfun->va_list_fpr_size & 1)
6147 tmp = gen_rtx_MEM (BLKmode,
6148 plus_constant (Pmode, virtual_incoming_args_rtx,
6149 (cum + 6) * UNITS_PER_WORD));
6150 MEM_NOTRAP_P (tmp) = 1;
6151 set_mem_alias_set (tmp, set);
6152 move_block_from_reg (16 + cum, tmp, count);
6155 if (cfun->va_list_fpr_size & 2)
6157 tmp = gen_rtx_MEM (BLKmode,
6158 plus_constant (Pmode, virtual_incoming_args_rtx,
6159 cum * UNITS_PER_WORD));
6160 MEM_NOTRAP_P (tmp) = 1;
6161 set_mem_alias_set (tmp, set);
6162 move_block_from_reg (16 + cum + TARGET_FPREGS*32, tmp, count);
6165 *pretend_size = 12 * UNITS_PER_WORD;
6166 #endif
6169 static void
6170 alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6172 HOST_WIDE_INT offset;
6173 tree t, offset_field, base_field;
6175 if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK)
6176 return;
6178 /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base
6179 up by 48, storing fp arg registers in the first 48 bytes, and the
6180 integer arg registers in the next 48 bytes. This is only done,
6181 however, if any integer registers need to be stored.
6183 If no integer registers need be stored, then we must subtract 48
6184 in order to account for the integer arg registers which are counted
6185 in argsize above, but which are not actually stored on the stack.
6186 Must further be careful here about structures straddling the last
6187 integer argument register; that futzes with pretend_args_size,
6188 which changes the meaning of AP. */
6190 if (NUM_ARGS < 6)
6191 offset = TARGET_ABI_OPEN_VMS ? UNITS_PER_WORD : 6 * UNITS_PER_WORD;
6192 else
6193 offset = -6 * UNITS_PER_WORD + crtl->args.pretend_args_size;
6195 if (TARGET_ABI_OPEN_VMS)
6197 t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6198 t = fold_build_pointer_plus_hwi (t, offset + NUM_ARGS * UNITS_PER_WORD);
6199 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
6200 TREE_SIDE_EFFECTS (t) = 1;
6201 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6203 else
6205 base_field = TYPE_FIELDS (TREE_TYPE (valist));
6206 offset_field = DECL_CHAIN (base_field);
6208 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6209 valist, base_field, NULL_TREE);
6210 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6211 valist, offset_field, NULL_TREE);
6213 t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6214 t = fold_build_pointer_plus_hwi (t, offset);
6215 t = build2 (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t);
6216 TREE_SIDE_EFFECTS (t) = 1;
6217 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6219 t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD);
6220 t = build2 (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t);
6221 TREE_SIDE_EFFECTS (t) = 1;
6222 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6226 static tree
6227 alpha_gimplify_va_arg_1 (tree type, tree base, tree offset,
6228 gimple_seq *pre_p)
6230 tree type_size, ptr_type, addend, t, addr;
6231 gimple_seq internal_post;
6233 /* If the type could not be passed in registers, skip the block
6234 reserved for the registers. */
6235 if (must_pass_va_arg_in_stack (type))
6237 t = build_int_cst (TREE_TYPE (offset), 6*8);
6238 gimplify_assign (offset,
6239 build2 (MAX_EXPR, TREE_TYPE (offset), offset, t),
6240 pre_p);
6243 addend = offset;
6244 ptr_type = build_pointer_type_for_mode (type, ptr_mode, true);
6246 if (TREE_CODE (type) == COMPLEX_TYPE)
6248 tree real_part, imag_part, real_temp;
6250 real_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6251 offset, pre_p);
6253 /* Copy the value into a new temporary, lest the formal temporary
6254 be reused out from under us. */
6255 real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
6257 imag_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6258 offset, pre_p);
6260 return build2 (COMPLEX_EXPR, type, real_temp, imag_part);
6262 else if (SCALAR_FLOAT_TYPE_P (type))
6264 tree fpaddend, cond, fourtyeight;
6266 fourtyeight = build_int_cst (TREE_TYPE (addend), 6*8);
6267 fpaddend = fold_build2 (MINUS_EXPR, TREE_TYPE (addend),
6268 addend, fourtyeight);
6269 cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight);
6270 addend = fold_build3 (COND_EXPR, TREE_TYPE (addend), cond,
6271 fpaddend, addend);
6274 /* Build the final address and force that value into a temporary. */
6275 addr = fold_build_pointer_plus (fold_convert (ptr_type, base), addend);
6276 internal_post = NULL;
6277 gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue);
6278 gimple_seq_add_seq (pre_p, internal_post);
6280 /* Update the offset field. */
6281 type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type));
6282 if (type_size == NULL || TREE_OVERFLOW (type_size))
6283 t = size_zero_node;
6284 else
6286 t = size_binop (PLUS_EXPR, type_size, size_int (7));
6287 t = size_binop (TRUNC_DIV_EXPR, t, size_int (8));
6288 t = size_binop (MULT_EXPR, t, size_int (8));
6290 t = fold_convert (TREE_TYPE (offset), t);
6291 gimplify_assign (offset, build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t),
6292 pre_p);
6294 return build_va_arg_indirect_ref (addr);
6297 static tree
6298 alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6299 gimple_seq *post_p)
6301 tree offset_field, base_field, offset, base, t, r;
6302 bool indirect;
6304 if (TARGET_ABI_OPEN_VMS)
6305 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6307 base_field = TYPE_FIELDS (va_list_type_node);
6308 offset_field = DECL_CHAIN (base_field);
6309 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6310 valist, base_field, NULL_TREE);
6311 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6312 valist, offset_field, NULL_TREE);
6314 /* Pull the fields of the structure out into temporaries. Since we never
6315 modify the base field, we can use a formal temporary. Sign-extend the
6316 offset field so that it's the proper width for pointer arithmetic. */
6317 base = get_formal_tmp_var (base_field, pre_p);
6319 t = fold_convert (build_nonstandard_integer_type (64, 0), offset_field);
6320 offset = get_initialized_tmp_var (t, pre_p, NULL);
6322 indirect = pass_va_arg_by_reference (type);
6324 if (indirect)
6326 if (TREE_CODE (type) == COMPLEX_TYPE
6327 && targetm.calls.split_complex_arg (type))
6329 tree real_part, imag_part, real_temp;
6331 tree ptr_type = build_pointer_type_for_mode (TREE_TYPE (type),
6332 ptr_mode, true);
6334 real_part = alpha_gimplify_va_arg_1 (ptr_type, base,
6335 offset, pre_p);
6336 real_part = build_va_arg_indirect_ref (real_part);
6338 /* Copy the value into a new temporary, lest the formal temporary
6339 be reused out from under us. */
6340 real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
6342 imag_part = alpha_gimplify_va_arg_1 (ptr_type, base,
6343 offset, pre_p);
6344 imag_part = build_va_arg_indirect_ref (imag_part);
6346 r = build2 (COMPLEX_EXPR, type, real_temp, imag_part);
6348 /* Stuff the offset temporary back into its field. */
6349 gimplify_assign (unshare_expr (offset_field),
6350 fold_convert (TREE_TYPE (offset_field), offset),
6351 pre_p);
6352 return r;
6354 else
6355 type = build_pointer_type_for_mode (type, ptr_mode, true);
6358 /* Find the value. Note that this will be a stable indirection, or
6359 a composite of stable indirections in the case of complex. */
6360 r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p);
6362 /* Stuff the offset temporary back into its field. */
6363 gimplify_assign (unshare_expr (offset_field),
6364 fold_convert (TREE_TYPE (offset_field), offset), pre_p);
6366 if (indirect)
6367 r = build_va_arg_indirect_ref (r);
6369 return r;
6372 /* Builtins. */
6374 enum alpha_builtin
6376 ALPHA_BUILTIN_CMPBGE,
6377 ALPHA_BUILTIN_EXTBL,
6378 ALPHA_BUILTIN_EXTWL,
6379 ALPHA_BUILTIN_EXTLL,
6380 ALPHA_BUILTIN_EXTQL,
6381 ALPHA_BUILTIN_EXTWH,
6382 ALPHA_BUILTIN_EXTLH,
6383 ALPHA_BUILTIN_EXTQH,
6384 ALPHA_BUILTIN_INSBL,
6385 ALPHA_BUILTIN_INSWL,
6386 ALPHA_BUILTIN_INSLL,
6387 ALPHA_BUILTIN_INSQL,
6388 ALPHA_BUILTIN_INSWH,
6389 ALPHA_BUILTIN_INSLH,
6390 ALPHA_BUILTIN_INSQH,
6391 ALPHA_BUILTIN_MSKBL,
6392 ALPHA_BUILTIN_MSKWL,
6393 ALPHA_BUILTIN_MSKLL,
6394 ALPHA_BUILTIN_MSKQL,
6395 ALPHA_BUILTIN_MSKWH,
6396 ALPHA_BUILTIN_MSKLH,
6397 ALPHA_BUILTIN_MSKQH,
6398 ALPHA_BUILTIN_UMULH,
6399 ALPHA_BUILTIN_ZAP,
6400 ALPHA_BUILTIN_ZAPNOT,
6401 ALPHA_BUILTIN_AMASK,
6402 ALPHA_BUILTIN_IMPLVER,
6403 ALPHA_BUILTIN_RPCC,
6404 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6405 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER,
6407 /* TARGET_MAX */
6408 ALPHA_BUILTIN_MINUB8,
6409 ALPHA_BUILTIN_MINSB8,
6410 ALPHA_BUILTIN_MINUW4,
6411 ALPHA_BUILTIN_MINSW4,
6412 ALPHA_BUILTIN_MAXUB8,
6413 ALPHA_BUILTIN_MAXSB8,
6414 ALPHA_BUILTIN_MAXUW4,
6415 ALPHA_BUILTIN_MAXSW4,
6416 ALPHA_BUILTIN_PERR,
6417 ALPHA_BUILTIN_PKLB,
6418 ALPHA_BUILTIN_PKWB,
6419 ALPHA_BUILTIN_UNPKBL,
6420 ALPHA_BUILTIN_UNPKBW,
6422 /* TARGET_CIX */
6423 ALPHA_BUILTIN_CTTZ,
6424 ALPHA_BUILTIN_CTLZ,
6425 ALPHA_BUILTIN_CTPOP,
6427 ALPHA_BUILTIN_max
6430 static enum insn_code const code_for_builtin[ALPHA_BUILTIN_max] = {
6431 CODE_FOR_builtin_cmpbge,
6432 CODE_FOR_extbl,
6433 CODE_FOR_extwl,
6434 CODE_FOR_extll,
6435 CODE_FOR_extql,
6436 CODE_FOR_extwh,
6437 CODE_FOR_extlh,
6438 CODE_FOR_extqh,
6439 CODE_FOR_builtin_insbl,
6440 CODE_FOR_builtin_inswl,
6441 CODE_FOR_builtin_insll,
6442 CODE_FOR_insql,
6443 CODE_FOR_inswh,
6444 CODE_FOR_inslh,
6445 CODE_FOR_insqh,
6446 CODE_FOR_mskbl,
6447 CODE_FOR_mskwl,
6448 CODE_FOR_mskll,
6449 CODE_FOR_mskql,
6450 CODE_FOR_mskwh,
6451 CODE_FOR_msklh,
6452 CODE_FOR_mskqh,
6453 CODE_FOR_umuldi3_highpart,
6454 CODE_FOR_builtin_zap,
6455 CODE_FOR_builtin_zapnot,
6456 CODE_FOR_builtin_amask,
6457 CODE_FOR_builtin_implver,
6458 CODE_FOR_builtin_rpcc,
6459 CODE_FOR_builtin_establish_vms_condition_handler,
6460 CODE_FOR_builtin_revert_vms_condition_handler,
6462 /* TARGET_MAX */
6463 CODE_FOR_builtin_minub8,
6464 CODE_FOR_builtin_minsb8,
6465 CODE_FOR_builtin_minuw4,
6466 CODE_FOR_builtin_minsw4,
6467 CODE_FOR_builtin_maxub8,
6468 CODE_FOR_builtin_maxsb8,
6469 CODE_FOR_builtin_maxuw4,
6470 CODE_FOR_builtin_maxsw4,
6471 CODE_FOR_builtin_perr,
6472 CODE_FOR_builtin_pklb,
6473 CODE_FOR_builtin_pkwb,
6474 CODE_FOR_builtin_unpkbl,
6475 CODE_FOR_builtin_unpkbw,
6477 /* TARGET_CIX */
6478 CODE_FOR_ctzdi2,
6479 CODE_FOR_clzdi2,
6480 CODE_FOR_popcountdi2
6483 struct alpha_builtin_def
6485 const char *name;
6486 enum alpha_builtin code;
6487 unsigned int target_mask;
6488 bool is_const;
6491 static struct alpha_builtin_def const zero_arg_builtins[] = {
6492 { "__builtin_alpha_implver", ALPHA_BUILTIN_IMPLVER, 0, true },
6493 { "__builtin_alpha_rpcc", ALPHA_BUILTIN_RPCC, 0, false }
6496 static struct alpha_builtin_def const one_arg_builtins[] = {
6497 { "__builtin_alpha_amask", ALPHA_BUILTIN_AMASK, 0, true },
6498 { "__builtin_alpha_pklb", ALPHA_BUILTIN_PKLB, MASK_MAX, true },
6499 { "__builtin_alpha_pkwb", ALPHA_BUILTIN_PKWB, MASK_MAX, true },
6500 { "__builtin_alpha_unpkbl", ALPHA_BUILTIN_UNPKBL, MASK_MAX, true },
6501 { "__builtin_alpha_unpkbw", ALPHA_BUILTIN_UNPKBW, MASK_MAX, true },
6502 { "__builtin_alpha_cttz", ALPHA_BUILTIN_CTTZ, MASK_CIX, true },
6503 { "__builtin_alpha_ctlz", ALPHA_BUILTIN_CTLZ, MASK_CIX, true },
6504 { "__builtin_alpha_ctpop", ALPHA_BUILTIN_CTPOP, MASK_CIX, true }
6507 static struct alpha_builtin_def const two_arg_builtins[] = {
6508 { "__builtin_alpha_cmpbge", ALPHA_BUILTIN_CMPBGE, 0, true },
6509 { "__builtin_alpha_extbl", ALPHA_BUILTIN_EXTBL, 0, true },
6510 { "__builtin_alpha_extwl", ALPHA_BUILTIN_EXTWL, 0, true },
6511 { "__builtin_alpha_extll", ALPHA_BUILTIN_EXTLL, 0, true },
6512 { "__builtin_alpha_extql", ALPHA_BUILTIN_EXTQL, 0, true },
6513 { "__builtin_alpha_extwh", ALPHA_BUILTIN_EXTWH, 0, true },
6514 { "__builtin_alpha_extlh", ALPHA_BUILTIN_EXTLH, 0, true },
6515 { "__builtin_alpha_extqh", ALPHA_BUILTIN_EXTQH, 0, true },
6516 { "__builtin_alpha_insbl", ALPHA_BUILTIN_INSBL, 0, true },
6517 { "__builtin_alpha_inswl", ALPHA_BUILTIN_INSWL, 0, true },
6518 { "__builtin_alpha_insll", ALPHA_BUILTIN_INSLL, 0, true },
6519 { "__builtin_alpha_insql", ALPHA_BUILTIN_INSQL, 0, true },
6520 { "__builtin_alpha_inswh", ALPHA_BUILTIN_INSWH, 0, true },
6521 { "__builtin_alpha_inslh", ALPHA_BUILTIN_INSLH, 0, true },
6522 { "__builtin_alpha_insqh", ALPHA_BUILTIN_INSQH, 0, true },
6523 { "__builtin_alpha_mskbl", ALPHA_BUILTIN_MSKBL, 0, true },
6524 { "__builtin_alpha_mskwl", ALPHA_BUILTIN_MSKWL, 0, true },
6525 { "__builtin_alpha_mskll", ALPHA_BUILTIN_MSKLL, 0, true },
6526 { "__builtin_alpha_mskql", ALPHA_BUILTIN_MSKQL, 0, true },
6527 { "__builtin_alpha_mskwh", ALPHA_BUILTIN_MSKWH, 0, true },
6528 { "__builtin_alpha_msklh", ALPHA_BUILTIN_MSKLH, 0, true },
6529 { "__builtin_alpha_mskqh", ALPHA_BUILTIN_MSKQH, 0, true },
6530 { "__builtin_alpha_umulh", ALPHA_BUILTIN_UMULH, 0, true },
6531 { "__builtin_alpha_zap", ALPHA_BUILTIN_ZAP, 0, true },
6532 { "__builtin_alpha_zapnot", ALPHA_BUILTIN_ZAPNOT, 0, true },
6533 { "__builtin_alpha_minub8", ALPHA_BUILTIN_MINUB8, MASK_MAX, true },
6534 { "__builtin_alpha_minsb8", ALPHA_BUILTIN_MINSB8, MASK_MAX, true },
6535 { "__builtin_alpha_minuw4", ALPHA_BUILTIN_MINUW4, MASK_MAX, true },
6536 { "__builtin_alpha_minsw4", ALPHA_BUILTIN_MINSW4, MASK_MAX, true },
6537 { "__builtin_alpha_maxub8", ALPHA_BUILTIN_MAXUB8, MASK_MAX, true },
6538 { "__builtin_alpha_maxsb8", ALPHA_BUILTIN_MAXSB8, MASK_MAX, true },
6539 { "__builtin_alpha_maxuw4", ALPHA_BUILTIN_MAXUW4, MASK_MAX, true },
6540 { "__builtin_alpha_maxsw4", ALPHA_BUILTIN_MAXSW4, MASK_MAX, true },
6541 { "__builtin_alpha_perr", ALPHA_BUILTIN_PERR, MASK_MAX, true }
6544 static GTY(()) tree alpha_dimode_u;
6545 static GTY(()) tree alpha_v8qi_u;
6546 static GTY(()) tree alpha_v8qi_s;
6547 static GTY(()) tree alpha_v4hi_u;
6548 static GTY(()) tree alpha_v4hi_s;
6550 static GTY(()) tree alpha_builtins[(int) ALPHA_BUILTIN_max];
6552 /* Return the alpha builtin for CODE. */
6554 static tree
6555 alpha_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
6557 if (code >= ALPHA_BUILTIN_max)
6558 return error_mark_node;
6559 return alpha_builtins[code];
6562 /* Helper function of alpha_init_builtins. Add the built-in specified
6563 by NAME, TYPE, CODE, and ECF. */
6565 static void
6566 alpha_builtin_function (const char *name, tree ftype,
6567 enum alpha_builtin code, unsigned ecf)
6569 tree decl = add_builtin_function (name, ftype, (int) code,
6570 BUILT_IN_MD, NULL, NULL_TREE);
6572 if (ecf & ECF_CONST)
6573 TREE_READONLY (decl) = 1;
6574 if (ecf & ECF_NOTHROW)
6575 TREE_NOTHROW (decl) = 1;
6577 alpha_builtins [(int) code] = decl;
6580 /* Helper function of alpha_init_builtins. Add the COUNT built-in
6581 functions pointed to by P, with function type FTYPE. */
6583 static void
6584 alpha_add_builtins (const struct alpha_builtin_def *p, size_t count,
6585 tree ftype)
6587 size_t i;
6589 for (i = 0; i < count; ++i, ++p)
6590 if ((target_flags & p->target_mask) == p->target_mask)
6591 alpha_builtin_function (p->name, ftype, p->code,
6592 (p->is_const ? ECF_CONST : 0) | ECF_NOTHROW);
6595 static void
6596 alpha_init_builtins (void)
6598 tree ftype;
6600 alpha_dimode_u = lang_hooks.types.type_for_mode (DImode, 1);
6601 alpha_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8);
6602 alpha_v8qi_s = build_vector_type (intQI_type_node, 8);
6603 alpha_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4);
6604 alpha_v4hi_s = build_vector_type (intHI_type_node, 4);
6606 ftype = build_function_type_list (alpha_dimode_u, NULL_TREE);
6607 alpha_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins), ftype);
6609 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u, NULL_TREE);
6610 alpha_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins), ftype);
6612 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u,
6613 alpha_dimode_u, NULL_TREE);
6614 alpha_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins), ftype);
6616 if (TARGET_ABI_OPEN_VMS)
6618 ftype = build_function_type_list (ptr_type_node, ptr_type_node,
6619 NULL_TREE);
6620 alpha_builtin_function ("__builtin_establish_vms_condition_handler",
6621 ftype,
6622 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6625 ftype = build_function_type_list (ptr_type_node, void_type_node,
6626 NULL_TREE);
6627 alpha_builtin_function ("__builtin_revert_vms_condition_handler", ftype,
6628 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, 0);
6630 vms_patch_builtins ();
6634 /* Expand an expression EXP that calls a built-in function,
6635 with result going to TARGET if that's convenient
6636 (and in mode MODE if that's convenient).
6637 SUBTARGET may be used as the target for computing one of EXP's operands.
6638 IGNORE is nonzero if the value is to be ignored. */
6640 static rtx
6641 alpha_expand_builtin (tree exp, rtx target,
6642 rtx subtarget ATTRIBUTE_UNUSED,
6643 machine_mode mode ATTRIBUTE_UNUSED,
6644 int ignore ATTRIBUTE_UNUSED)
6646 #define MAX_ARGS 2
6648 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6649 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
6650 tree arg;
6651 call_expr_arg_iterator iter;
6652 enum insn_code icode;
6653 rtx op[MAX_ARGS], pat;
6654 int arity;
6655 bool nonvoid;
6657 if (fcode >= ALPHA_BUILTIN_max)
6658 internal_error ("bad builtin fcode");
6659 icode = code_for_builtin[fcode];
6660 if (icode == 0)
6661 internal_error ("bad builtin fcode");
6663 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
6665 arity = 0;
6666 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
6668 const struct insn_operand_data *insn_op;
6670 if (arg == error_mark_node)
6671 return NULL_RTX;
6672 if (arity > MAX_ARGS)
6673 return NULL_RTX;
6675 insn_op = &insn_data[icode].operand[arity + nonvoid];
6677 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
6679 if (!(*insn_op->predicate) (op[arity], insn_op->mode))
6680 op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
6681 arity++;
6684 if (nonvoid)
6686 machine_mode tmode = insn_data[icode].operand[0].mode;
6687 if (!target
6688 || GET_MODE (target) != tmode
6689 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
6690 target = gen_reg_rtx (tmode);
6693 switch (arity)
6695 case 0:
6696 pat = GEN_FCN (icode) (target);
6697 break;
6698 case 1:
6699 if (nonvoid)
6700 pat = GEN_FCN (icode) (target, op[0]);
6701 else
6702 pat = GEN_FCN (icode) (op[0]);
6703 break;
6704 case 2:
6705 pat = GEN_FCN (icode) (target, op[0], op[1]);
6706 break;
6707 default:
6708 gcc_unreachable ();
6710 if (!pat)
6711 return NULL_RTX;
6712 emit_insn (pat);
6714 if (nonvoid)
6715 return target;
6716 else
6717 return const0_rtx;
6720 /* Fold the builtin for the CMPBGE instruction. This is a vector comparison
6721 with an 8-bit output vector. OPINT contains the integer operands; bit N
6722 of OP_CONST is set if OPINT[N] is valid. */
6724 static tree
6725 alpha_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const)
6727 if (op_const == 3)
6729 int i, val;
6730 for (i = 0, val = 0; i < 8; ++i)
6732 unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff;
6733 unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff;
6734 if (c0 >= c1)
6735 val |= 1 << i;
6737 return build_int_cst (alpha_dimode_u, val);
6739 else if (op_const == 2 && opint[1] == 0)
6740 return build_int_cst (alpha_dimode_u, 0xff);
6741 return NULL;
6744 /* Fold the builtin for the ZAPNOT instruction. This is essentially a
6745 specialized form of an AND operation. Other byte manipulation instructions
6746 are defined in terms of this instruction, so this is also used as a
6747 subroutine for other builtins.
6749 OP contains the tree operands; OPINT contains the extracted integer values.
6750 Bit N of OP_CONST it set if OPINT[N] is valid. OP may be null if only
6751 OPINT may be considered. */
6753 static tree
6754 alpha_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[],
6755 long op_const)
6757 if (op_const & 2)
6759 unsigned HOST_WIDE_INT mask = 0;
6760 int i;
6762 for (i = 0; i < 8; ++i)
6763 if ((opint[1] >> i) & 1)
6764 mask |= (unsigned HOST_WIDE_INT)0xff << (i * 8);
6766 if (op_const & 1)
6767 return build_int_cst (alpha_dimode_u, opint[0] & mask);
6769 if (op)
6770 return fold_build2 (BIT_AND_EXPR, alpha_dimode_u, op[0],
6771 build_int_cst (alpha_dimode_u, mask));
6773 else if ((op_const & 1) && opint[0] == 0)
6774 return build_int_cst (alpha_dimode_u, 0);
6775 return NULL;
6778 /* Fold the builtins for the EXT family of instructions. */
6780 static tree
6781 alpha_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[],
6782 long op_const, unsigned HOST_WIDE_INT bytemask,
6783 bool is_high)
6785 long zap_const = 2;
6786 tree *zap_op = NULL;
6788 if (op_const & 2)
6790 unsigned HOST_WIDE_INT loc;
6792 loc = opint[1] & 7;
6793 loc *= BITS_PER_UNIT;
6795 if (loc != 0)
6797 if (op_const & 1)
6799 unsigned HOST_WIDE_INT temp = opint[0];
6800 if (is_high)
6801 temp <<= loc;
6802 else
6803 temp >>= loc;
6804 opint[0] = temp;
6805 zap_const = 3;
6808 else
6809 zap_op = op;
6812 opint[1] = bytemask;
6813 return alpha_fold_builtin_zapnot (zap_op, opint, zap_const);
6816 /* Fold the builtins for the INS family of instructions. */
6818 static tree
6819 alpha_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[],
6820 long op_const, unsigned HOST_WIDE_INT bytemask,
6821 bool is_high)
6823 if ((op_const & 1) && opint[0] == 0)
6824 return build_int_cst (alpha_dimode_u, 0);
6826 if (op_const & 2)
6828 unsigned HOST_WIDE_INT temp, loc, byteloc;
6829 tree *zap_op = NULL;
6831 loc = opint[1] & 7;
6832 bytemask <<= loc;
6834 temp = opint[0];
6835 if (is_high)
6837 byteloc = (64 - (loc * 8)) & 0x3f;
6838 if (byteloc == 0)
6839 zap_op = op;
6840 else
6841 temp >>= byteloc;
6842 bytemask >>= 8;
6844 else
6846 byteloc = loc * 8;
6847 if (byteloc == 0)
6848 zap_op = op;
6849 else
6850 temp <<= byteloc;
6853 opint[0] = temp;
6854 opint[1] = bytemask;
6855 return alpha_fold_builtin_zapnot (zap_op, opint, op_const);
6858 return NULL;
6861 static tree
6862 alpha_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[],
6863 long op_const, unsigned HOST_WIDE_INT bytemask,
6864 bool is_high)
6866 if (op_const & 2)
6868 unsigned HOST_WIDE_INT loc;
6870 loc = opint[1] & 7;
6871 bytemask <<= loc;
6873 if (is_high)
6874 bytemask >>= 8;
6876 opint[1] = bytemask ^ 0xff;
6879 return alpha_fold_builtin_zapnot (op, opint, op_const);
6882 static tree
6883 alpha_fold_vector_minmax (enum tree_code code, tree op[], tree vtype)
6885 tree op0 = fold_convert (vtype, op[0]);
6886 tree op1 = fold_convert (vtype, op[1]);
6887 tree val = fold_build2 (code, vtype, op0, op1);
6888 return fold_build1 (VIEW_CONVERT_EXPR, alpha_dimode_u, val);
6891 static tree
6892 alpha_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const)
6894 unsigned HOST_WIDE_INT temp = 0;
6895 int i;
6897 if (op_const != 3)
6898 return NULL;
6900 for (i = 0; i < 8; ++i)
6902 unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff;
6903 unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff;
6904 if (a >= b)
6905 temp += a - b;
6906 else
6907 temp += b - a;
6910 return build_int_cst (alpha_dimode_u, temp);
6913 static tree
6914 alpha_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const)
6916 unsigned HOST_WIDE_INT temp;
6918 if (op_const == 0)
6919 return NULL;
6921 temp = opint[0] & 0xff;
6922 temp |= (opint[0] >> 24) & 0xff00;
6924 return build_int_cst (alpha_dimode_u, temp);
6927 static tree
6928 alpha_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const)
6930 unsigned HOST_WIDE_INT temp;
6932 if (op_const == 0)
6933 return NULL;
6935 temp = opint[0] & 0xff;
6936 temp |= (opint[0] >> 8) & 0xff00;
6937 temp |= (opint[0] >> 16) & 0xff0000;
6938 temp |= (opint[0] >> 24) & 0xff000000;
6940 return build_int_cst (alpha_dimode_u, temp);
6943 static tree
6944 alpha_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const)
6946 unsigned HOST_WIDE_INT temp;
6948 if (op_const == 0)
6949 return NULL;
6951 temp = opint[0] & 0xff;
6952 temp |= (opint[0] & 0xff00) << 24;
6954 return build_int_cst (alpha_dimode_u, temp);
6957 static tree
6958 alpha_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const)
6960 unsigned HOST_WIDE_INT temp;
6962 if (op_const == 0)
6963 return NULL;
6965 temp = opint[0] & 0xff;
6966 temp |= (opint[0] & 0x0000ff00) << 8;
6967 temp |= (opint[0] & 0x00ff0000) << 16;
6968 temp |= (opint[0] & 0xff000000) << 24;
6970 return build_int_cst (alpha_dimode_u, temp);
6973 static tree
6974 alpha_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const)
6976 unsigned HOST_WIDE_INT temp;
6978 if (op_const == 0)
6979 return NULL;
6981 if (opint[0] == 0)
6982 temp = 64;
6983 else
6984 temp = exact_log2 (opint[0] & -opint[0]);
6986 return build_int_cst (alpha_dimode_u, temp);
6989 static tree
6990 alpha_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const)
6992 unsigned HOST_WIDE_INT temp;
6994 if (op_const == 0)
6995 return NULL;
6997 if (opint[0] == 0)
6998 temp = 64;
6999 else
7000 temp = 64 - floor_log2 (opint[0]) - 1;
7002 return build_int_cst (alpha_dimode_u, temp);
7005 static tree
7006 alpha_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const)
7008 unsigned HOST_WIDE_INT temp, op;
7010 if (op_const == 0)
7011 return NULL;
7013 op = opint[0];
7014 temp = 0;
7015 while (op)
7016 temp++, op &= op - 1;
7018 return build_int_cst (alpha_dimode_u, temp);
7021 /* Fold one of our builtin functions. */
7023 static tree
7024 alpha_fold_builtin (tree fndecl, int n_args, tree *op,
7025 bool ignore ATTRIBUTE_UNUSED)
7027 unsigned HOST_WIDE_INT opint[MAX_ARGS];
7028 long op_const = 0;
7029 int i;
7031 if (n_args > MAX_ARGS)
7032 return NULL;
7034 for (i = 0; i < n_args; i++)
7036 tree arg = op[i];
7037 if (arg == error_mark_node)
7038 return NULL;
7040 opint[i] = 0;
7041 if (TREE_CODE (arg) == INTEGER_CST)
7043 op_const |= 1L << i;
7044 opint[i] = int_cst_value (arg);
7048 switch (DECL_MD_FUNCTION_CODE (fndecl))
7050 case ALPHA_BUILTIN_CMPBGE:
7051 return alpha_fold_builtin_cmpbge (opint, op_const);
7053 case ALPHA_BUILTIN_EXTBL:
7054 return alpha_fold_builtin_extxx (op, opint, op_const, 0x01, false);
7055 case ALPHA_BUILTIN_EXTWL:
7056 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, false);
7057 case ALPHA_BUILTIN_EXTLL:
7058 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, false);
7059 case ALPHA_BUILTIN_EXTQL:
7060 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, false);
7061 case ALPHA_BUILTIN_EXTWH:
7062 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, true);
7063 case ALPHA_BUILTIN_EXTLH:
7064 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, true);
7065 case ALPHA_BUILTIN_EXTQH:
7066 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, true);
7068 case ALPHA_BUILTIN_INSBL:
7069 return alpha_fold_builtin_insxx (op, opint, op_const, 0x01, false);
7070 case ALPHA_BUILTIN_INSWL:
7071 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, false);
7072 case ALPHA_BUILTIN_INSLL:
7073 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, false);
7074 case ALPHA_BUILTIN_INSQL:
7075 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, false);
7076 case ALPHA_BUILTIN_INSWH:
7077 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, true);
7078 case ALPHA_BUILTIN_INSLH:
7079 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, true);
7080 case ALPHA_BUILTIN_INSQH:
7081 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, true);
7083 case ALPHA_BUILTIN_MSKBL:
7084 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x01, false);
7085 case ALPHA_BUILTIN_MSKWL:
7086 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, false);
7087 case ALPHA_BUILTIN_MSKLL:
7088 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, false);
7089 case ALPHA_BUILTIN_MSKQL:
7090 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, false);
7091 case ALPHA_BUILTIN_MSKWH:
7092 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, true);
7093 case ALPHA_BUILTIN_MSKLH:
7094 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, true);
7095 case ALPHA_BUILTIN_MSKQH:
7096 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, true);
7098 case ALPHA_BUILTIN_ZAP:
7099 opint[1] ^= 0xff;
7100 /* FALLTHRU */
7101 case ALPHA_BUILTIN_ZAPNOT:
7102 return alpha_fold_builtin_zapnot (op, opint, op_const);
7104 case ALPHA_BUILTIN_MINUB8:
7105 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_u);
7106 case ALPHA_BUILTIN_MINSB8:
7107 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_s);
7108 case ALPHA_BUILTIN_MINUW4:
7109 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_u);
7110 case ALPHA_BUILTIN_MINSW4:
7111 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_s);
7112 case ALPHA_BUILTIN_MAXUB8:
7113 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_u);
7114 case ALPHA_BUILTIN_MAXSB8:
7115 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_s);
7116 case ALPHA_BUILTIN_MAXUW4:
7117 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_u);
7118 case ALPHA_BUILTIN_MAXSW4:
7119 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_s);
7121 case ALPHA_BUILTIN_PERR:
7122 return alpha_fold_builtin_perr (opint, op_const);
7123 case ALPHA_BUILTIN_PKLB:
7124 return alpha_fold_builtin_pklb (opint, op_const);
7125 case ALPHA_BUILTIN_PKWB:
7126 return alpha_fold_builtin_pkwb (opint, op_const);
7127 case ALPHA_BUILTIN_UNPKBL:
7128 return alpha_fold_builtin_unpkbl (opint, op_const);
7129 case ALPHA_BUILTIN_UNPKBW:
7130 return alpha_fold_builtin_unpkbw (opint, op_const);
7132 case ALPHA_BUILTIN_CTTZ:
7133 return alpha_fold_builtin_cttz (opint, op_const);
7134 case ALPHA_BUILTIN_CTLZ:
7135 return alpha_fold_builtin_ctlz (opint, op_const);
7136 case ALPHA_BUILTIN_CTPOP:
7137 return alpha_fold_builtin_ctpop (opint, op_const);
7139 case ALPHA_BUILTIN_AMASK:
7140 case ALPHA_BUILTIN_IMPLVER:
7141 case ALPHA_BUILTIN_RPCC:
7142 /* None of these are foldable at compile-time. */
7143 default:
7144 return NULL;
7148 bool
7149 alpha_gimple_fold_builtin (gimple_stmt_iterator *gsi)
7151 bool changed = false;
7152 gimple *stmt = gsi_stmt (*gsi);
7153 tree call = gimple_call_fn (stmt);
7154 gimple *new_stmt = NULL;
7156 if (call)
7158 tree fndecl = gimple_call_fndecl (stmt);
7160 if (fndecl)
7162 tree arg0, arg1;
7164 switch (DECL_MD_FUNCTION_CODE (fndecl))
7166 case ALPHA_BUILTIN_UMULH:
7167 arg0 = gimple_call_arg (stmt, 0);
7168 arg1 = gimple_call_arg (stmt, 1);
7170 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
7171 MULT_HIGHPART_EXPR, arg0, arg1);
7172 break;
7173 default:
7174 break;
7179 if (new_stmt)
7181 gsi_replace (gsi, new_stmt, true);
7182 changed = true;
7185 return changed;
7188 /* This page contains routines that are used to determine what the function
7189 prologue and epilogue code will do and write them out. */
7191 /* Compute the size of the save area in the stack. */
7193 /* These variables are used for communication between the following functions.
7194 They indicate various things about the current function being compiled
7195 that are used to tell what kind of prologue, epilogue and procedure
7196 descriptor to generate. */
7198 /* Nonzero if we need a stack procedure. */
7199 enum alpha_procedure_types {PT_NULL = 0, PT_REGISTER = 1, PT_STACK = 2};
7200 static enum alpha_procedure_types alpha_procedure_type;
7202 /* Register number (either FP or SP) that is used to unwind the frame. */
7203 static int vms_unwind_regno;
7205 /* Register number used to save FP. We need not have one for RA since
7206 we don't modify it for register procedures. This is only defined
7207 for register frame procedures. */
7208 static int vms_save_fp_regno;
7210 /* Register number used to reference objects off our PV. */
7211 static int vms_base_regno;
7213 /* Compute register masks for saved registers, register save area size,
7214 and total frame size. */
7215 static void
7216 alpha_compute_frame_layout (void)
7218 unsigned HOST_WIDE_INT sa_mask = 0;
7219 HOST_WIDE_INT frame_size;
7220 int sa_size;
7222 /* When outputting a thunk, we don't have valid register life info,
7223 but assemble_start_function wants to output .frame and .mask
7224 directives. */
7225 if (!cfun->is_thunk)
7227 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7228 sa_mask |= HOST_WIDE_INT_1U << HARD_FRAME_POINTER_REGNUM;
7230 /* One for every register we have to save. */
7231 for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
7232 if (! call_used_or_fixed_reg_p (i)
7233 && df_regs_ever_live_p (i) && i != REG_RA)
7234 sa_mask |= HOST_WIDE_INT_1U << i;
7236 /* We need to restore these for the handler. */
7237 if (crtl->calls_eh_return)
7239 for (unsigned i = 0; ; ++i)
7241 unsigned regno = EH_RETURN_DATA_REGNO (i);
7242 if (regno == INVALID_REGNUM)
7243 break;
7244 sa_mask |= HOST_WIDE_INT_1U << regno;
7248 /* If any register spilled, then spill the return address also. */
7249 /* ??? This is required by the Digital stack unwind specification
7250 and isn't needed if we're doing Dwarf2 unwinding. */
7251 if (sa_mask || alpha_ra_ever_killed ())
7252 sa_mask |= HOST_WIDE_INT_1U << REG_RA;
7255 sa_size = popcount_hwi(sa_mask);
7256 frame_size = get_frame_size ();
7258 if (TARGET_ABI_OPEN_VMS)
7260 /* Start with a stack procedure if we make any calls (REG_RA used), or
7261 need a frame pointer, with a register procedure if we otherwise need
7262 at least a slot, and with a null procedure in other cases. */
7263 if ((sa_mask >> REG_RA) & 1 || frame_pointer_needed)
7264 alpha_procedure_type = PT_STACK;
7265 else if (frame_size != 0)
7266 alpha_procedure_type = PT_REGISTER;
7267 else
7268 alpha_procedure_type = PT_NULL;
7270 /* Don't reserve space for saving FP & RA yet. Do that later after we've
7271 made the final decision on stack procedure vs register procedure. */
7272 if (alpha_procedure_type == PT_STACK)
7273 sa_size -= 2;
7275 /* Decide whether to refer to objects off our PV via FP or PV.
7276 If we need FP for something else or if we receive a nonlocal
7277 goto (which expects PV to contain the value), we must use PV.
7278 Otherwise, start by assuming we can use FP. */
7280 vms_base_regno
7281 = (frame_pointer_needed
7282 || cfun->has_nonlocal_label
7283 || alpha_procedure_type == PT_STACK
7284 || crtl->outgoing_args_size)
7285 ? REG_PV : HARD_FRAME_POINTER_REGNUM;
7287 /* If we want to copy PV into FP, we need to find some register
7288 in which to save FP. */
7289 vms_save_fp_regno = -1;
7290 if (vms_base_regno == HARD_FRAME_POINTER_REGNUM)
7291 for (unsigned i = 0; i < 32; i++)
7292 if (! fixed_regs[i] && call_used_or_fixed_reg_p (i)
7293 && ! df_regs_ever_live_p (i))
7295 vms_save_fp_regno = i;
7296 break;
7299 /* A VMS condition handler requires a stack procedure in our
7300 implementation. (not required by the calling standard). */
7301 if ((vms_save_fp_regno == -1 && alpha_procedure_type == PT_REGISTER)
7302 || cfun->machine->uses_condition_handler)
7303 vms_base_regno = REG_PV, alpha_procedure_type = PT_STACK;
7304 else if (alpha_procedure_type == PT_NULL)
7305 vms_base_regno = REG_PV;
7307 /* Stack unwinding should be done via FP unless we use it for PV. */
7308 vms_unwind_regno = (vms_base_regno == REG_PV
7309 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
7311 /* If this is a stack procedure, allow space for saving FP, RA and
7312 a condition handler slot if needed. */
7313 if (alpha_procedure_type == PT_STACK)
7314 sa_size += 2 + cfun->machine->uses_condition_handler;
7316 else
7318 /* Our size must be even (multiple of 16 bytes). */
7319 if (sa_size & 1)
7320 sa_size++;
7322 sa_size *= 8;
7324 if (TARGET_ABI_OPEN_VMS)
7325 frame_size = ALPHA_ROUND (sa_size
7326 + (alpha_procedure_type == PT_STACK ? 8 : 0)
7327 + frame_size
7328 + crtl->args.pretend_args_size);
7329 else
7330 frame_size = (ALPHA_ROUND (crtl->outgoing_args_size)
7331 + sa_size
7332 + ALPHA_ROUND (frame_size + crtl->args.pretend_args_size));
7334 cfun->machine->sa_mask = sa_mask;
7335 cfun->machine->sa_size = sa_size;
7336 cfun->machine->frame_size = frame_size;
7339 #undef TARGET_COMPUTE_FRAME_LAYOUT
7340 #define TARGET_COMPUTE_FRAME_LAYOUT alpha_compute_frame_layout
7342 /* Return 1 if this function can directly return via $26. */
7344 bool
7345 direct_return (void)
7347 return (TARGET_ABI_OSF
7348 && reload_completed
7349 && cfun->machine->frame_size == 0);
7352 /* Define the offset between two registers, one to be eliminated,
7353 and the other its replacement, at the start of a routine. */
7355 HOST_WIDE_INT
7356 alpha_initial_elimination_offset (unsigned int from,
7357 unsigned int to ATTRIBUTE_UNUSED)
7359 HOST_WIDE_INT ret;
7361 ret = cfun->machine->sa_size;
7362 ret += ALPHA_ROUND (crtl->outgoing_args_size);
7364 switch (from)
7366 case FRAME_POINTER_REGNUM:
7367 break;
7369 case ARG_POINTER_REGNUM:
7370 ret += (ALPHA_ROUND (get_frame_size ()
7371 + crtl->args.pretend_args_size)
7372 - crtl->args.pretend_args_size);
7373 break;
7375 default:
7376 gcc_unreachable ();
7379 return ret;
7382 #if TARGET_ABI_OPEN_VMS
7384 /* Worker function for TARGET_CAN_ELIMINATE. */
7386 static bool
7387 alpha_vms_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
7389 switch (alpha_procedure_type)
7391 case PT_NULL:
7392 /* NULL procedures have no frame of their own and we only
7393 know how to resolve from the current stack pointer. */
7394 return to == STACK_POINTER_REGNUM;
7396 case PT_REGISTER:
7397 case PT_STACK:
7398 /* We always eliminate except to the stack pointer if there is no
7399 usable frame pointer at hand. */
7400 return (to != STACK_POINTER_REGNUM
7401 || vms_unwind_regno != HARD_FRAME_POINTER_REGNUM);
7404 gcc_unreachable ();
7407 /* FROM is to be eliminated for TO. Return the offset so that TO+offset
7408 designates the same location as FROM. */
7410 HOST_WIDE_INT
7411 alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to)
7413 /* The only possible attempts we ever expect are ARG or FRAME_PTR to
7414 HARD_FRAME or STACK_PTR. We need the alpha_procedure_type to decide
7415 on the proper computations and will need the register save area size
7416 in most cases. */
7418 HOST_WIDE_INT sa_size = cfun->machine->sa_size;
7420 /* PT_NULL procedures have no frame of their own and we only allow
7421 elimination to the stack pointer. This is the argument pointer and we
7422 resolve the soft frame pointer to that as well. */
7424 if (alpha_procedure_type == PT_NULL)
7425 return 0;
7427 /* For a PT_STACK procedure the frame layout looks as follows
7429 -----> decreasing addresses
7431 < size rounded up to 16 | likewise >
7432 --------------#------------------------------+++--------------+++-------#
7433 incoming args # pretended args | "frame" | regs sa | PV | outgoing args #
7434 --------------#---------------------------------------------------------#
7435 ^ ^ ^ ^
7436 ARG_PTR FRAME_PTR HARD_FRAME_PTR STACK_PTR
7439 PT_REGISTER procedures are similar in that they may have a frame of their
7440 own. They have no regs-sa/pv/outgoing-args area.
7442 We first compute offset to HARD_FRAME_PTR, then add what we need to get
7443 to STACK_PTR if need be. */
7446 HOST_WIDE_INT offset;
7447 HOST_WIDE_INT pv_save_size = alpha_procedure_type == PT_STACK ? 8 : 0;
7449 switch (from)
7451 case FRAME_POINTER_REGNUM:
7452 offset = ALPHA_ROUND (sa_size + pv_save_size);
7453 break;
7454 case ARG_POINTER_REGNUM:
7455 offset = (ALPHA_ROUND (sa_size + pv_save_size
7456 + get_frame_size ()
7457 + crtl->args.pretend_args_size)
7458 - crtl->args.pretend_args_size);
7459 break;
7460 default:
7461 gcc_unreachable ();
7464 if (to == STACK_POINTER_REGNUM)
7465 offset += ALPHA_ROUND (crtl->outgoing_args_size);
7467 return offset;
7471 #define COMMON_OBJECT "common_object"
7473 static tree
7474 common_object_handler (tree *node, tree name ATTRIBUTE_UNUSED,
7475 tree args ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED,
7476 bool *no_add_attrs ATTRIBUTE_UNUSED)
7478 tree decl = *node;
7479 gcc_assert (DECL_P (decl));
7481 DECL_COMMON (decl) = 1;
7482 return NULL_TREE;
7485 TARGET_GNU_ATTRIBUTES (vms_attribute_table,
7487 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
7488 affects_type_identity, handler, exclude } */
7489 { COMMON_OBJECT, 0, 1, true, false, false, false, common_object_handler,
7490 NULL }
7493 void
7494 vms_output_aligned_decl_common(FILE *file, tree decl, const char *name,
7495 unsigned HOST_WIDE_INT size,
7496 unsigned int align)
7498 tree attr = DECL_ATTRIBUTES (decl);
7499 fprintf (file, "%s", COMMON_ASM_OP);
7500 assemble_name (file, name);
7501 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED, size);
7502 /* ??? Unlike on OSF/1, the alignment factor is not in log units. */
7503 fprintf (file, ",%u", align / BITS_PER_UNIT);
7504 if (attr)
7506 attr = lookup_attribute (COMMON_OBJECT, attr);
7507 if (attr)
7508 fprintf (file, ",%s",
7509 IDENTIFIER_POINTER (TREE_VALUE (TREE_VALUE (attr))));
7511 fputc ('\n', file);
7514 #undef COMMON_OBJECT
7516 #endif
7518 bool
7519 alpha_find_lo_sum_using_gp (rtx insn)
7521 subrtx_iterator::array_type array;
7522 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
7524 const_rtx x = *iter;
7525 if (GET_CODE (x) == LO_SUM && XEXP (x, 0) == pic_offset_table_rtx)
7526 return true;
7528 return false;
7531 static int
7532 alpha_does_function_need_gp (void)
7534 rtx_insn *insn;
7536 /* The GP being variable is an OSF abi thing. */
7537 if (! TARGET_ABI_OSF)
7538 return 0;
7540 /* We need the gp to load the address of __mcount. */
7541 if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7542 return 1;
7544 /* The code emitted by alpha_output_mi_thunk_osf uses the gp. */
7545 if (cfun->is_thunk)
7546 return 1;
7548 /* The nonlocal receiver pattern assumes that the gp is valid for
7549 the nested function. Reasonable because it's almost always set
7550 correctly already. For the cases where that's wrong, make sure
7551 the nested function loads its gp on entry. */
7552 if (crtl->has_nonlocal_goto)
7553 return 1;
7555 /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first.
7556 Even if we are a static function, we still need to do this in case
7557 our address is taken and passed to something like qsort. */
7559 push_topmost_sequence ();
7560 insn = get_insns ();
7561 pop_topmost_sequence ();
7563 for (; insn; insn = NEXT_INSN (insn))
7564 if (NONDEBUG_INSN_P (insn)
7565 && GET_CODE (PATTERN (insn)) != USE
7566 && GET_CODE (PATTERN (insn)) != CLOBBER
7567 && get_attr_usegp (insn))
7568 return 1;
7570 return 0;
7573 /* Helper function for alpha_store_data_bypass_p, handle just a single SET
7574 IN_SET. */
7576 static bool
7577 alpha_store_data_bypass_p_1 (rtx_insn *out_insn, rtx in_set)
7579 if (!MEM_P (SET_DEST (in_set)))
7580 return false;
7582 rtx out_set = single_set (out_insn);
7583 if (out_set)
7584 return !reg_mentioned_p (SET_DEST (out_set), SET_DEST (in_set));
7586 rtx out_pat = PATTERN (out_insn);
7587 if (GET_CODE (out_pat) != PARALLEL)
7588 return false;
7590 for (int i = 0; i < XVECLEN (out_pat, 0); i++)
7592 rtx out_exp = XVECEXP (out_pat, 0, i);
7594 if (GET_CODE (out_exp) == CLOBBER || GET_CODE (out_exp) == USE
7595 || GET_CODE (out_exp) == TRAP_IF)
7596 continue;
7598 gcc_assert (GET_CODE (out_exp) == SET);
7600 if (reg_mentioned_p (SET_DEST (out_exp), SET_DEST (in_set)))
7601 return false;
7604 return true;
7607 /* True if the dependency between OUT_INSN and IN_INSN is on the store
7608 data not the address operand(s) of the store. IN_INSN and OUT_INSN
7609 must be either a single_set or a PARALLEL with SETs inside.
7611 This alpha-specific version of store_data_bypass_p ignores TRAP_IF
7612 that would result in assertion failure (and internal compiler error)
7613 in the generic store_data_bypass_p function. */
7616 alpha_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
7618 rtx in_set = single_set (in_insn);
7619 if (in_set)
7620 return alpha_store_data_bypass_p_1 (out_insn, in_set);
7622 rtx in_pat = PATTERN (in_insn);
7623 if (GET_CODE (in_pat) != PARALLEL)
7624 return false;
7626 for (int i = 0; i < XVECLEN (in_pat, 0); i++)
7628 rtx in_exp = XVECEXP (in_pat, 0, i);
7630 if (GET_CODE (in_exp) == CLOBBER || GET_CODE (in_exp) == USE
7631 || GET_CODE (in_exp) == TRAP_IF)
7632 continue;
7634 gcc_assert (GET_CODE (in_exp) == SET);
7636 if (!alpha_store_data_bypass_p_1 (out_insn, in_exp))
7637 return false;
7640 return true;
7643 /* Helper function to set RTX_FRAME_RELATED_P on instructions, including
7644 sequences. */
7646 static rtx_insn *
7647 set_frame_related_p (void)
7649 rtx_insn *seq = get_insns ();
7650 rtx_insn *insn;
7652 end_sequence ();
7654 if (!seq)
7655 return NULL;
7657 if (INSN_P (seq))
7659 insn = seq;
7660 while (insn != NULL_RTX)
7662 RTX_FRAME_RELATED_P (insn) = 1;
7663 insn = NEXT_INSN (insn);
7665 seq = emit_insn (seq);
7667 else
7669 seq = emit_insn (seq);
7670 RTX_FRAME_RELATED_P (seq) = 1;
7672 return seq;
7675 #define FRP(exp) (start_sequence (), exp, set_frame_related_p ())
7677 /* Generates a store with the proper unwind info attached. VALUE is
7678 stored at BASE_REG+BASE_OFS. If FRAME_BIAS is nonzero, then BASE_REG
7679 contains SP+FRAME_BIAS, and that is the unwind info that should be
7680 generated. If FRAME_REG != VALUE, then VALUE is being stored on
7681 behalf of FRAME_REG, and FRAME_REG should be present in the unwind. */
7683 static void
7684 emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias,
7685 HOST_WIDE_INT base_ofs, rtx frame_reg)
7687 rtx addr, mem;
7688 rtx_insn *insn;
7690 addr = plus_constant (Pmode, base_reg, base_ofs);
7691 mem = gen_frame_mem (DImode, addr);
7693 insn = emit_move_insn (mem, value);
7694 RTX_FRAME_RELATED_P (insn) = 1;
7696 if (frame_bias || value != frame_reg)
7698 if (frame_bias)
7700 addr = plus_constant (Pmode, stack_pointer_rtx,
7701 frame_bias + base_ofs);
7702 mem = gen_rtx_MEM (DImode, addr);
7705 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7706 gen_rtx_SET (mem, frame_reg));
7710 static void
7711 emit_frame_store (unsigned int regno, rtx base_reg,
7712 HOST_WIDE_INT frame_bias, HOST_WIDE_INT base_ofs)
7714 rtx reg = gen_rtx_REG (DImode, regno);
7715 emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg);
7718 /* Write function prologue. */
7720 /* On vms we have two kinds of functions:
7722 - stack frame (PROC_STACK)
7723 these are 'normal' functions with local vars and which are
7724 calling other functions
7725 - register frame (PROC_REGISTER)
7726 keeps all data in registers, needs no stack
7728 We must pass this to the assembler so it can generate the
7729 proper pdsc (procedure descriptor)
7730 This is done with the '.pdesc' command.
7732 On not-vms, we don't really differentiate between the two, as we can
7733 simply allocate stack without saving registers. */
7735 void
7736 alpha_expand_prologue (void)
7738 /* Registers to save. */
7739 unsigned HOST_WIDE_INT sa_mask = cfun->machine->sa_mask;
7740 /* Stack space needed for pushing registers clobbered by us. */
7741 HOST_WIDE_INT sa_size = cfun->machine->sa_size;
7742 /* Complete stack size needed. */
7743 HOST_WIDE_INT frame_size = cfun->machine->frame_size;
7744 /* Probed stack size; it additionally includes the size of
7745 the "reserve region" if any. */
7746 HOST_WIDE_INT probed_size, sa_bias;
7747 /* Offset from base reg to register save area. */
7748 HOST_WIDE_INT reg_offset;
7749 rtx sa_reg;
7751 if (flag_stack_usage_info)
7752 current_function_static_stack_size = frame_size;
7754 if (TARGET_ABI_OPEN_VMS)
7755 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
7756 else
7757 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
7759 /* Emit an insn to reload GP, if needed. */
7760 if (TARGET_ABI_OSF)
7762 alpha_function_needs_gp = alpha_does_function_need_gp ();
7763 if (alpha_function_needs_gp)
7764 emit_insn (gen_prologue_ldgp ());
7767 /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert
7768 the call to mcount ourselves, rather than having the linker do it
7769 magically in response to -pg. Since _mcount has special linkage,
7770 don't represent the call as a call. */
7771 if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7772 emit_insn (gen_prologue_mcount ());
7774 /* Adjust the stack by the frame size. If the frame size is > 4096
7775 bytes, we need to be sure we probe somewhere in the first and last
7776 4096 bytes (we can probably get away without the latter test) and
7777 every 8192 bytes in between. If the frame size is > 32768, we
7778 do this in a loop. Otherwise, we generate the explicit probe
7779 instructions.
7781 Note that we are only allowed to adjust sp once in the prologue. */
7783 probed_size = frame_size;
7784 if (flag_stack_check || flag_stack_clash_protection)
7785 probed_size += get_stack_check_protect ();
7787 if (probed_size <= 32768)
7789 if (probed_size > 4096)
7791 int probed;
7793 for (probed = 4096; probed < probed_size; probed += 8192)
7794 emit_insn (gen_stack_probe_internal (GEN_INT (-probed)));
7796 /* We only have to do this probe if we aren't saving registers or
7797 if we are probing beyond the frame because of -fstack-check. */
7798 if ((sa_size == 0 && probed_size > probed - 4096)
7799 || flag_stack_check || flag_stack_clash_protection)
7800 emit_insn (gen_stack_probe_internal (GEN_INT (-probed_size)));
7803 if (frame_size != 0)
7804 FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
7805 GEN_INT (-frame_size))));
7807 else
7809 /* Here we generate code to set R22 to SP + 4096 and set R23 to the
7810 number of 8192 byte blocks to probe. We then probe each block
7811 in the loop and then set SP to the proper location. If the
7812 amount remaining is > 4096, we have to do one more probe if we
7813 are not saving any registers or if we are probing beyond the
7814 frame because of -fstack-check. */
7816 HOST_WIDE_INT blocks = (probed_size + 4096) / 8192;
7817 HOST_WIDE_INT leftover = probed_size + 4096 - blocks * 8192;
7818 rtx ptr = gen_rtx_REG (DImode, 22);
7819 rtx count = gen_rtx_REG (DImode, 23);
7820 rtx seq;
7822 emit_move_insn (count, GEN_INT (blocks));
7823 emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096)));
7825 /* Because of the difficulty in emitting a new basic block this
7826 late in the compilation, generate the loop as a single insn. */
7827 emit_insn (gen_prologue_stack_probe_loop (count, ptr));
7829 if ((leftover > 4096 && sa_size == 0)
7830 || flag_stack_check || flag_stack_clash_protection)
7832 rtx last = gen_rtx_MEM (DImode,
7833 plus_constant (Pmode, ptr, -leftover));
7834 MEM_VOLATILE_P (last) = 1;
7835 emit_move_insn (last, const0_rtx);
7838 if (flag_stack_check || flag_stack_clash_protection)
7840 /* If -fstack-check is specified we have to load the entire
7841 constant into a register and subtract from the sp in one go,
7842 because the probed stack size is not equal to the frame size. */
7843 HOST_WIDE_INT lo, hi;
7844 lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
7845 hi = frame_size - lo;
7847 emit_move_insn (ptr, GEN_INT (hi));
7848 emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo)));
7849 seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx,
7850 ptr));
7852 else
7854 seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr,
7855 GEN_INT (-leftover)));
7858 /* This alternative is special, because the DWARF code cannot
7859 possibly intuit through the loop above. So we invent this
7860 note it looks at instead. */
7861 RTX_FRAME_RELATED_P (seq) = 1;
7862 add_reg_note (seq, REG_FRAME_RELATED_EXPR,
7863 gen_rtx_SET (stack_pointer_rtx,
7864 plus_constant (Pmode, stack_pointer_rtx,
7865 -frame_size)));
7868 /* Cope with very large offsets to the register save area. */
7869 sa_bias = 0;
7870 sa_reg = stack_pointer_rtx;
7871 if (reg_offset + sa_size > 0x8000)
7873 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
7874 rtx sa_bias_rtx;
7876 if (low + sa_size <= 0x8000)
7877 sa_bias = reg_offset - low, reg_offset = low;
7878 else
7879 sa_bias = reg_offset, reg_offset = 0;
7881 sa_reg = gen_rtx_REG (DImode, 24);
7882 sa_bias_rtx = GEN_INT (sa_bias);
7884 if (add_operand (sa_bias_rtx, DImode))
7885 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx));
7886 else
7888 emit_move_insn (sa_reg, sa_bias_rtx);
7889 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg));
7893 /* Save regs in stack order. Beginning with VMS PV. */
7894 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7895 emit_frame_store (REG_PV, stack_pointer_rtx, 0, 0);
7897 /* Save register RA next, followed by any other registers
7898 that need to be saved. */
7899 for (unsigned i = REG_RA; sa_mask != 0; i = ctz_hwi(sa_mask))
7901 emit_frame_store (i, sa_reg, sa_bias, reg_offset);
7902 reg_offset += 8;
7903 sa_mask &= ~(HOST_WIDE_INT_1U << i);
7906 if (TARGET_ABI_OPEN_VMS)
7908 /* Register frame procedures save the fp. */
7909 if (alpha_procedure_type == PT_REGISTER)
7911 rtx_insn *insn =
7912 emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno),
7913 hard_frame_pointer_rtx);
7914 add_reg_note (insn, REG_CFA_REGISTER, NULL);
7915 RTX_FRAME_RELATED_P (insn) = 1;
7918 if (alpha_procedure_type != PT_NULL && vms_base_regno != REG_PV)
7919 emit_insn (gen_force_movdi (gen_rtx_REG (DImode, vms_base_regno),
7920 gen_rtx_REG (DImode, REG_PV)));
7922 if (alpha_procedure_type != PT_NULL
7923 && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
7924 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7926 /* If we have to allocate space for outgoing args, do it now. */
7927 if (crtl->outgoing_args_size != 0)
7929 rtx_insn *seq
7930 = emit_move_insn (stack_pointer_rtx,
7931 plus_constant
7932 (Pmode, hard_frame_pointer_rtx,
7933 - (ALPHA_ROUND
7934 (crtl->outgoing_args_size))));
7936 /* Only set FRAME_RELATED_P on the stack adjustment we just emitted
7937 if ! frame_pointer_needed. Setting the bit will change the CFA
7938 computation rule to use sp again, which would be wrong if we had
7939 frame_pointer_needed, as this means sp might move unpredictably
7940 later on.
7942 Also, note that
7943 frame_pointer_needed
7944 => vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
7946 crtl->outgoing_args_size != 0
7947 => alpha_procedure_type != PT_NULL,
7949 so when we are not setting the bit here, we are guaranteed to
7950 have emitted an FRP frame pointer update just before. */
7951 RTX_FRAME_RELATED_P (seq) = ! frame_pointer_needed;
7954 else
7956 /* If we need a frame pointer, set it from the stack pointer. */
7957 if (frame_pointer_needed)
7959 if (TARGET_CAN_FAULT_IN_PROLOGUE)
7960 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7961 else
7962 /* This must always be the last instruction in the
7963 prologue, thus we emit a special move + clobber. */
7964 FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx,
7965 stack_pointer_rtx, sa_reg)));
7969 /* The ABIs for VMS and OSF/1 say that while we can schedule insns into
7970 the prologue, for exception handling reasons, we cannot do this for
7971 any insn that might fault. We could prevent this for mems with a
7972 (clobber:BLK (scratch)), but this doesn't work for fp insns. So we
7973 have to prevent all such scheduling with a blockage.
7975 Linux, on the other hand, never bothered to implement OSF/1's
7976 exception handling, and so doesn't care about such things. Anyone
7977 planning to use dwarf2 frame-unwind info can also omit the blockage. */
7979 if (! TARGET_CAN_FAULT_IN_PROLOGUE)
7980 emit_insn (gen_blockage ());
7983 /* Count the number of .file directives, so that .loc is up to date. */
7984 int num_source_filenames = 0;
7986 /* Output the textual info surrounding the prologue. */
7988 void
7989 alpha_start_function (FILE *file, const char *fnname, tree decl)
7991 unsigned long imask, fmask;
7992 /* Complete stack size needed. */
7993 HOST_WIDE_INT frame_size = cfun->machine->frame_size;
7994 /* The maximum debuggable frame size. */
7995 const HOST_WIDE_INT max_frame_size = HOST_WIDE_INT_1 << 31;
7996 /* Offset from base reg to register save area. */
7997 HOST_WIDE_INT reg_offset;
7998 char *entry_label = (char *) alloca (strlen (fnname) + 6);
7999 char *tramp_label = (char *) alloca (strlen (fnname) + 6);
8000 int i;
8002 #if TARGET_ABI_OPEN_VMS
8003 vms_start_function (fnname);
8004 #endif
8006 alpha_fnname = fnname;
8008 if (TARGET_ABI_OPEN_VMS)
8009 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
8010 else
8011 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
8013 imask = cfun->machine->sa_mask & 0xffffffffu;
8014 fmask = cfun->machine->sa_mask >> 32;
8016 /* Issue function start and label. */
8017 if (TARGET_ABI_OPEN_VMS || !flag_inhibit_size_directive)
8019 fputs ("\t.ent ", file);
8020 assemble_name (file, fnname);
8021 putc ('\n', file);
8023 /* If the function needs GP, we'll write the "..ng" label there.
8024 Otherwise, do it here. */
8025 if (TARGET_ABI_OSF
8026 && ! alpha_function_needs_gp
8027 && ! cfun->is_thunk)
8029 putc ('$', file);
8030 assemble_name (file, fnname);
8031 fputs ("..ng:\n", file);
8034 /* Nested functions on VMS that are potentially called via trampoline
8035 get a special transfer entry point that loads the called functions
8036 procedure descriptor and static chain. */
8037 if (TARGET_ABI_OPEN_VMS
8038 && !TREE_PUBLIC (decl)
8039 && DECL_CONTEXT (decl)
8040 && !TYPE_P (DECL_CONTEXT (decl))
8041 && TREE_CODE (DECL_CONTEXT (decl)) != TRANSLATION_UNIT_DECL)
8043 strcpy (tramp_label, fnname);
8044 strcat (tramp_label, "..tr");
8045 ASM_OUTPUT_LABEL (file, tramp_label);
8046 fprintf (file, "\tldq $1,24($27)\n");
8047 fprintf (file, "\tldq $27,16($27)\n");
8050 strcpy (entry_label, fnname);
8051 if (TARGET_ABI_OPEN_VMS)
8052 strcat (entry_label, "..en");
8054 ASM_OUTPUT_FUNCTION_LABEL (file, entry_label, decl);
8055 inside_function = TRUE;
8057 if (TARGET_ABI_OPEN_VMS)
8058 fprintf (file, "\t.base $%d\n", vms_base_regno);
8060 if (TARGET_ABI_OSF
8061 && TARGET_IEEE_CONFORMANT
8062 && !flag_inhibit_size_directive)
8064 /* Set flags in procedure descriptor to request IEEE-conformant
8065 math-library routines. The value we set it to is PDSC_EXC_IEEE
8066 (/usr/include/pdsc.h). */
8067 fputs ("\t.eflag 48\n", file);
8070 /* Set up offsets to alpha virtual arg/local debugging pointer. */
8071 alpha_auto_offset = -frame_size + crtl->args.pretend_args_size;
8072 alpha_arg_offset = -frame_size + 48;
8074 /* Describe our frame. If the frame size is larger than an integer,
8075 print it as zero to avoid an assembler error. We won't be
8076 properly describing such a frame, but that's the best we can do. */
8077 if (TARGET_ABI_OPEN_VMS)
8078 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,"
8079 HOST_WIDE_INT_PRINT_DEC "\n",
8080 vms_unwind_regno,
8081 frame_size >= max_frame_size ? 0 : frame_size,
8082 reg_offset);
8083 else if (!flag_inhibit_size_directive)
8084 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n",
8085 (frame_pointer_needed
8086 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM),
8087 frame_size >= max_frame_size ? 0 : frame_size,
8088 crtl->args.pretend_args_size);
8090 /* Describe which registers were spilled. */
8091 if (TARGET_ABI_OPEN_VMS)
8093 if (imask)
8094 /* ??? Does VMS care if mask contains ra? The old code didn't
8095 set it, so I don't here. */
8096 fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1UL << REG_RA));
8097 if (fmask)
8098 fprintf (file, "\t.fmask 0x%lx,0\n", fmask);
8099 if (alpha_procedure_type == PT_REGISTER)
8100 fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno);
8102 else if (!flag_inhibit_size_directive)
8104 if (imask)
8106 fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask,
8107 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
8109 for (i = 0; i < 32; ++i)
8110 if (imask & (1UL << i))
8111 reg_offset += 8;
8114 if (fmask)
8115 fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask,
8116 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
8119 #if TARGET_ABI_OPEN_VMS
8120 /* If a user condition handler has been installed at some point, emit
8121 the procedure descriptor bits to point the Condition Handling Facility
8122 at the indirection wrapper, and state the fp offset at which the user
8123 handler may be found. */
8124 if (cfun->machine->uses_condition_handler)
8126 fprintf (file, "\t.handler __gcc_shell_handler\n");
8127 fprintf (file, "\t.handler_data %d\n", VMS_COND_HANDLER_FP_OFFSET);
8130 #ifdef TARGET_VMS_CRASH_DEBUG
8131 /* Support of minimal traceback info. */
8132 switch_to_section (readonly_data_section);
8133 fprintf (file, "\t.align 3\n");
8134 assemble_name (file, fnname); fputs ("..na:\n", file);
8135 fputs ("\t.ascii \"", file);
8136 assemble_name (file, fnname);
8137 fputs ("\\0\"\n", file);
8138 switch_to_section (text_section);
8139 #endif
8140 #endif /* TARGET_ABI_OPEN_VMS */
8143 /* Emit the .prologue note at the scheduled end of the prologue. */
8145 static void
8146 alpha_output_function_end_prologue (FILE *file)
8148 if (TARGET_ABI_OPEN_VMS)
8149 fputs ("\t.prologue\n", file);
8150 else if (!flag_inhibit_size_directive)
8151 fprintf (file, "\t.prologue %d\n",
8152 alpha_function_needs_gp || cfun->is_thunk);
8155 /* Write function epilogue. */
8157 void
8158 alpha_expand_epilogue (void)
8160 /* Registers to save. */
8161 unsigned HOST_WIDE_INT sa_mask = cfun->machine->sa_mask;
8162 /* Stack space needed for pushing registers clobbered by us. */
8163 HOST_WIDE_INT sa_size = cfun->machine->sa_size;
8164 /* Complete stack size needed. */
8165 HOST_WIDE_INT frame_size = cfun->machine->frame_size;
8166 /* Offset from base reg to register save area. */
8167 HOST_WIDE_INT reg_offset;
8168 int fp_is_frame_pointer, fp_offset;
8169 rtx sa_reg, sa_reg_exp = NULL;
8170 rtx sp_adj1, sp_adj2, mem, reg, insn;
8171 rtx eh_ofs;
8172 rtx cfa_restores = NULL_RTX;
8174 if (TARGET_ABI_OPEN_VMS)
8176 if (alpha_procedure_type == PT_STACK)
8177 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
8178 else
8179 reg_offset = 0;
8181 else
8182 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
8184 fp_is_frame_pointer
8185 = (TARGET_ABI_OPEN_VMS
8186 ? alpha_procedure_type == PT_STACK
8187 : frame_pointer_needed);
8188 fp_offset = 0;
8189 sa_reg = stack_pointer_rtx;
8191 if (crtl->calls_eh_return)
8192 eh_ofs = EH_RETURN_STACKADJ_RTX;
8193 else
8194 eh_ofs = NULL_RTX;
8196 if (sa_size)
8198 /* If we have a frame pointer, restore SP from it. */
8199 if (TARGET_ABI_OPEN_VMS
8200 ? vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
8201 : frame_pointer_needed)
8202 emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
8204 /* Cope with very large offsets to the register save area. */
8205 if (reg_offset + sa_size > 0x8000)
8207 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
8208 HOST_WIDE_INT bias;
8210 if (low + sa_size <= 0x8000)
8211 bias = reg_offset - low, reg_offset = low;
8212 else
8213 bias = reg_offset, reg_offset = 0;
8215 sa_reg = gen_rtx_REG (DImode, 22);
8216 sa_reg_exp = plus_constant (Pmode, stack_pointer_rtx, bias);
8218 emit_move_insn (sa_reg, sa_reg_exp);
8221 /* Restore registers in order, excepting a true frame pointer. */
8222 for (unsigned i = REG_RA; sa_mask != 0; i = ctz_hwi(sa_mask))
8224 if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer)
8225 fp_offset = reg_offset;
8226 else
8228 mem = gen_frame_mem (DImode,
8229 plus_constant (Pmode, sa_reg,
8230 reg_offset));
8231 reg = gen_rtx_REG (DImode, i);
8232 emit_move_insn (reg, mem);
8233 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
8234 cfa_restores);
8236 reg_offset += 8;
8237 sa_mask &= ~(HOST_WIDE_INT_1U << i);
8241 if (frame_size || eh_ofs)
8243 sp_adj1 = stack_pointer_rtx;
8245 if (eh_ofs)
8247 sp_adj1 = gen_rtx_REG (DImode, 23);
8248 emit_move_insn (sp_adj1,
8249 gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs));
8252 /* If the stack size is large, begin computation into a temporary
8253 register so as not to interfere with a potential fp restore,
8254 which must be consecutive with an SP restore. */
8255 if (frame_size < 32768 && !cfun->calls_alloca)
8256 sp_adj2 = GEN_INT (frame_size);
8257 else if (frame_size < 0x40007fffL)
8259 int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
8261 sp_adj2 = plus_constant (Pmode, sp_adj1, frame_size - low);
8262 if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2))
8263 sp_adj1 = sa_reg;
8264 else
8266 sp_adj1 = gen_rtx_REG (DImode, 23);
8267 emit_move_insn (sp_adj1, sp_adj2);
8269 sp_adj2 = GEN_INT (low);
8271 else
8273 rtx tmp = gen_rtx_REG (DImode, 23);
8274 sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size, 3, false);
8275 if (!sp_adj2)
8277 /* We can't drop new things to memory this late, afaik,
8278 so build it up by pieces. */
8279 sp_adj2 = alpha_emit_set_long_const (tmp, frame_size);
8280 gcc_assert (sp_adj2);
8284 /* From now on, things must be in order. So emit blockages. */
8286 /* Restore the frame pointer. */
8287 if (fp_is_frame_pointer)
8289 emit_insn (gen_blockage ());
8290 mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg,
8291 fp_offset));
8292 emit_move_insn (hard_frame_pointer_rtx, mem);
8293 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8294 hard_frame_pointer_rtx, cfa_restores);
8296 else if (TARGET_ABI_OPEN_VMS)
8298 emit_insn (gen_blockage ());
8299 emit_move_insn (hard_frame_pointer_rtx,
8300 gen_rtx_REG (DImode, vms_save_fp_regno));
8301 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8302 hard_frame_pointer_rtx, cfa_restores);
8305 /* Restore the stack pointer. */
8306 emit_insn (gen_blockage ());
8307 if (sp_adj2 == const0_rtx)
8308 insn = emit_move_insn (stack_pointer_rtx, sp_adj1);
8309 else
8310 insn = emit_move_insn (stack_pointer_rtx,
8311 gen_rtx_PLUS (DImode, sp_adj1, sp_adj2));
8312 REG_NOTES (insn) = cfa_restores;
8313 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
8314 RTX_FRAME_RELATED_P (insn) = 1;
8316 else
8318 gcc_assert (cfa_restores == NULL);
8320 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_REGISTER)
8322 emit_insn (gen_blockage ());
8323 insn = emit_move_insn (hard_frame_pointer_rtx,
8324 gen_rtx_REG (DImode, vms_save_fp_regno));
8325 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8326 RTX_FRAME_RELATED_P (insn) = 1;
8331 /* Output the rest of the textual info surrounding the epilogue. */
8333 void
8334 alpha_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED)
8336 rtx_insn *insn;
8338 /* We output a nop after noreturn calls at the very end of the function to
8339 ensure that the return address always remains in the caller's code range,
8340 as not doing so might confuse unwinding engines. */
8341 insn = get_last_insn ();
8342 if (!INSN_P (insn))
8343 insn = prev_active_insn (insn);
8344 if (insn && CALL_P (insn))
8345 output_asm_insn (get_insn_template (CODE_FOR_nop, NULL), NULL);
8347 #if TARGET_ABI_OPEN_VMS
8348 /* Write the linkage entries. */
8349 alpha_write_linkage (file, fnname);
8350 #endif
8352 /* End the function. */
8353 if (TARGET_ABI_OPEN_VMS
8354 || !flag_inhibit_size_directive)
8356 fputs ("\t.end ", file);
8357 assemble_name (file, fnname);
8358 putc ('\n', file);
8360 inside_function = FALSE;
8363 #if TARGET_ABI_OSF
8364 /* Emit a tail call to FUNCTION after adjusting THIS by DELTA.
8366 In order to avoid the hordes of differences between generated code
8367 with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating
8368 lots of code loading up large constants, generate rtl and emit it
8369 instead of going straight to text.
8371 Not sure why this idea hasn't been explored before... */
8373 static void
8374 alpha_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8375 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8376 tree function)
8378 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
8379 HOST_WIDE_INT hi, lo;
8380 rtx this_rtx, funexp;
8381 rtx_insn *insn;
8383 /* We always require a valid GP. */
8384 emit_insn (gen_prologue_ldgp ());
8385 emit_note (NOTE_INSN_PROLOGUE_END);
8387 /* Find the "this" pointer. If the function returns a structure,
8388 the structure return pointer is in $16. */
8389 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8390 this_rtx = gen_rtx_REG (Pmode, 17);
8391 else
8392 this_rtx = gen_rtx_REG (Pmode, 16);
8394 /* Add DELTA. When possible we use ldah+lda. Otherwise load the
8395 entire constant for the add. */
8396 lo = ((delta & 0xffff) ^ 0x8000) - 0x8000;
8397 hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8398 if (hi + lo == delta)
8400 if (hi)
8401 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (hi)));
8402 if (lo)
8403 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (lo)));
8405 else
8407 rtx tmp = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 0), delta);
8408 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8411 /* Add a delta stored in the vtable at VCALL_OFFSET. */
8412 if (vcall_offset)
8414 rtx tmp, tmp2;
8416 tmp = gen_rtx_REG (Pmode, 0);
8417 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
8419 lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000;
8420 hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8421 if (hi + lo == vcall_offset)
8423 if (hi)
8424 emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi)));
8426 else
8428 tmp2 = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 1),
8429 vcall_offset);
8430 emit_insn (gen_adddi3 (tmp, tmp, tmp2));
8431 lo = 0;
8433 if (lo)
8434 tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo));
8435 else
8436 tmp2 = tmp;
8437 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2));
8439 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8442 /* Generate a tail call to the target function. */
8443 if (! TREE_USED (function))
8445 assemble_external (function);
8446 TREE_USED (function) = 1;
8448 funexp = XEXP (DECL_RTL (function), 0);
8449 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8450 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx));
8451 SIBLING_CALL_P (insn) = 1;
8453 /* Run just enough of rest_of_compilation to get the insns emitted.
8454 There's not really enough bulk here to make other passes such as
8455 instruction scheduling worth while. */
8456 insn = get_insns ();
8457 shorten_branches (insn);
8458 assemble_start_function (thunk_fndecl, fnname);
8459 final_start_function (insn, file, 1);
8460 final (insn, file, 1);
8461 final_end_function ();
8462 assemble_end_function (thunk_fndecl, fnname);
8464 #endif /* TARGET_ABI_OSF */
8466 /* Name of the file containing the current function. */
8468 static const char *current_function_file = "";
8470 /* Offsets to alpha virtual arg/local debugging pointers. */
8472 long alpha_arg_offset;
8473 long alpha_auto_offset;
8475 /* Emit a new filename to a stream. */
8477 void
8478 alpha_output_filename (FILE *stream, const char *name)
8480 static int first_time = TRUE;
8482 if (first_time)
8484 first_time = FALSE;
8485 ++num_source_filenames;
8486 current_function_file = name;
8487 fprintf (stream, "\t.file\t%d ", num_source_filenames);
8488 output_quoted_string (stream, name);
8489 fprintf (stream, "\n");
8492 else if (name != current_function_file
8493 && strcmp (name, current_function_file) != 0)
8495 ++num_source_filenames;
8496 current_function_file = name;
8497 fprintf (stream, "\t.file\t%d ", num_source_filenames);
8499 output_quoted_string (stream, name);
8500 fprintf (stream, "\n");
8504 /* Structure to show the current status of registers and memory. */
8506 struct shadow_summary
8508 struct {
8509 unsigned int i : 31; /* Mask of int regs */
8510 unsigned int fp : 31; /* Mask of fp regs */
8511 unsigned int mem : 1; /* mem == imem | fpmem */
8512 } used, defd;
8515 /* Summary the effects of expression X on the machine. Update SUM, a pointer
8516 to the summary structure. SET is nonzero if the insn is setting the
8517 object, otherwise zero. */
8519 static void
8520 summarize_insn (rtx x, struct shadow_summary *sum, int set)
8522 const char *format_ptr;
8523 int i, j;
8525 if (x == 0)
8526 return;
8528 switch (GET_CODE (x))
8530 /* ??? Note that this case would be incorrect if the Alpha had a
8531 ZERO_EXTRACT in SET_DEST. */
8532 case SET:
8533 summarize_insn (SET_SRC (x), sum, 0);
8534 summarize_insn (SET_DEST (x), sum, 1);
8535 break;
8537 case CLOBBER:
8538 summarize_insn (XEXP (x, 0), sum, 1);
8539 break;
8541 case USE:
8542 summarize_insn (XEXP (x, 0), sum, 0);
8543 break;
8545 case ASM_OPERANDS:
8546 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
8547 summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0);
8548 break;
8550 case PARALLEL:
8551 for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
8552 summarize_insn (XVECEXP (x, 0, i), sum, 0);
8553 break;
8555 case SUBREG:
8556 summarize_insn (SUBREG_REG (x), sum, 0);
8557 break;
8559 case REG:
8561 int regno = REGNO (x);
8562 unsigned long mask = ((unsigned long) 1) << (regno % 32);
8564 if (regno == 31 || regno == 63)
8565 break;
8567 if (set)
8569 if (regno < 32)
8570 sum->defd.i |= mask;
8571 else
8572 sum->defd.fp |= mask;
8574 else
8576 if (regno < 32)
8577 sum->used.i |= mask;
8578 else
8579 sum->used.fp |= mask;
8582 break;
8584 case MEM:
8585 if (set)
8586 sum->defd.mem = 1;
8587 else
8588 sum->used.mem = 1;
8590 /* Find the regs used in memory address computation: */
8591 summarize_insn (XEXP (x, 0), sum, 0);
8592 break;
8594 case CONST_INT: case CONST_WIDE_INT: case CONST_DOUBLE:
8595 case SYMBOL_REF: case LABEL_REF: case CONST:
8596 case SCRATCH: case ASM_INPUT:
8597 break;
8599 /* Handle common unary and binary ops for efficiency. */
8600 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
8601 case MOD: case UDIV: case UMOD: case AND: case IOR:
8602 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
8603 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
8604 case NE: case EQ: case GE: case GT: case LE:
8605 case LT: case GEU: case GTU: case LEU: case LTU:
8606 summarize_insn (XEXP (x, 0), sum, 0);
8607 summarize_insn (XEXP (x, 1), sum, 0);
8608 break;
8610 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
8611 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
8612 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
8613 case SQRT: case FFS:
8614 summarize_insn (XEXP (x, 0), sum, 0);
8615 break;
8617 default:
8618 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
8619 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8620 switch (format_ptr[i])
8622 case 'e':
8623 summarize_insn (XEXP (x, i), sum, 0);
8624 break;
8626 case 'E':
8627 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8628 summarize_insn (XVECEXP (x, i, j), sum, 0);
8629 break;
8631 case 'i':
8632 break;
8634 default:
8635 gcc_unreachable ();
8640 /* Ensure a sufficient number of `trapb' insns are in the code when
8641 the user requests code with a trap precision of functions or
8642 instructions.
8644 In naive mode, when the user requests a trap-precision of
8645 "instruction", a trapb is needed after every instruction that may
8646 generate a trap. This ensures that the code is resumption safe but
8647 it is also slow.
8649 When optimizations are turned on, we delay issuing a trapb as long
8650 as possible. In this context, a trap shadow is the sequence of
8651 instructions that starts with a (potentially) trap generating
8652 instruction and extends to the next trapb or call_pal instruction
8653 (but GCC never generates call_pal by itself). We can delay (and
8654 therefore sometimes omit) a trapb subject to the following
8655 conditions:
8657 (a) On entry to the trap shadow, if any Alpha register or memory
8658 location contains a value that is used as an operand value by some
8659 instruction in the trap shadow (live on entry), then no instruction
8660 in the trap shadow may modify the register or memory location.
8662 (b) Within the trap shadow, the computation of the base register
8663 for a memory load or store instruction may not involve using the
8664 result of an instruction that might generate an UNPREDICTABLE
8665 result.
8667 (c) Within the trap shadow, no register may be used more than once
8668 as a destination register. (This is to make life easier for the
8669 trap-handler.)
8671 (d) The trap shadow may not include any branch instructions. */
8673 static void
8674 alpha_handle_trap_shadows (void)
8676 struct shadow_summary shadow;
8677 int trap_pending, exception_nesting;
8678 rtx_insn *i, *n;
8680 trap_pending = 0;
8681 exception_nesting = 0;
8682 shadow.used.i = 0;
8683 shadow.used.fp = 0;
8684 shadow.used.mem = 0;
8685 shadow.defd = shadow.used;
8687 for (i = get_insns (); i ; i = NEXT_INSN (i))
8689 if (NOTE_P (i))
8691 switch (NOTE_KIND (i))
8693 case NOTE_INSN_EH_REGION_BEG:
8694 exception_nesting++;
8695 if (trap_pending)
8696 goto close_shadow;
8697 break;
8699 case NOTE_INSN_EH_REGION_END:
8700 exception_nesting--;
8701 if (trap_pending)
8702 goto close_shadow;
8703 break;
8705 case NOTE_INSN_EPILOGUE_BEG:
8706 if (trap_pending && alpha_tp >= ALPHA_TP_FUNC)
8707 goto close_shadow;
8708 break;
8711 else if (trap_pending)
8713 if (alpha_tp == ALPHA_TP_FUNC)
8715 if (JUMP_P (i)
8716 && GET_CODE (PATTERN (i)) == RETURN)
8717 goto close_shadow;
8719 else if (alpha_tp == ALPHA_TP_INSN)
8721 if (optimize > 0)
8723 struct shadow_summary sum;
8725 sum.used.i = 0;
8726 sum.used.fp = 0;
8727 sum.used.mem = 0;
8728 sum.defd = sum.used;
8730 switch (GET_CODE (i))
8732 case INSN:
8733 /* Annoyingly, get_attr_trap will die on these. */
8734 if (GET_CODE (PATTERN (i)) == USE
8735 || GET_CODE (PATTERN (i)) == CLOBBER)
8736 break;
8738 summarize_insn (PATTERN (i), &sum, 0);
8740 if ((sum.defd.i & shadow.defd.i)
8741 || (sum.defd.fp & shadow.defd.fp))
8743 /* (c) would be violated */
8744 goto close_shadow;
8747 /* Combine shadow with summary of current insn: */
8748 shadow.used.i |= sum.used.i;
8749 shadow.used.fp |= sum.used.fp;
8750 shadow.used.mem |= sum.used.mem;
8751 shadow.defd.i |= sum.defd.i;
8752 shadow.defd.fp |= sum.defd.fp;
8753 shadow.defd.mem |= sum.defd.mem;
8755 if ((sum.defd.i & shadow.used.i)
8756 || (sum.defd.fp & shadow.used.fp)
8757 || (sum.defd.mem & shadow.used.mem))
8759 /* (a) would be violated (also takes care of (b)) */
8760 gcc_assert (get_attr_trap (i) != TRAP_YES
8761 || (!(sum.defd.i & sum.used.i)
8762 && !(sum.defd.fp & sum.used.fp)));
8764 goto close_shadow;
8766 break;
8768 case BARRIER:
8769 /* __builtin_unreachable can expand to no code at all,
8770 leaving (barrier) RTXes in the instruction stream. */
8771 goto close_shadow_notrapb;
8773 case JUMP_INSN:
8774 case CALL_INSN:
8775 case CODE_LABEL:
8776 goto close_shadow;
8778 case DEBUG_INSN:
8779 break;
8781 default:
8782 gcc_unreachable ();
8785 else
8787 close_shadow:
8788 n = emit_insn_before (gen_trapb (), i);
8789 PUT_MODE (n, TImode);
8790 PUT_MODE (i, TImode);
8791 close_shadow_notrapb:
8792 trap_pending = 0;
8793 shadow.used.i = 0;
8794 shadow.used.fp = 0;
8795 shadow.used.mem = 0;
8796 shadow.defd = shadow.used;
8801 if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC)
8802 && NONJUMP_INSN_P (i)
8803 && GET_CODE (PATTERN (i)) != USE
8804 && GET_CODE (PATTERN (i)) != CLOBBER
8805 && get_attr_trap (i) == TRAP_YES)
8807 if (optimize && !trap_pending)
8808 summarize_insn (PATTERN (i), &shadow, 0);
8809 trap_pending = 1;
8814 /* Alpha can only issue instruction groups simultaneously if they are
8815 suitably aligned. This is very processor-specific. */
8816 /* There are a number of entries in alphaev4_insn_pipe and alphaev5_insn_pipe
8817 that are marked "fake". These instructions do not exist on that target,
8818 but it is possible to see these insns with deranged combinations of
8819 command-line options, such as "-mtune=ev4 -mmax". Instead of aborting,
8820 choose a result at random. */
8822 enum alphaev4_pipe {
8823 EV4_STOP = 0,
8824 EV4_IB0 = 1,
8825 EV4_IB1 = 2,
8826 EV4_IBX = 4
8829 enum alphaev5_pipe {
8830 EV5_STOP = 0,
8831 EV5_NONE = 1,
8832 EV5_E01 = 2,
8833 EV5_E0 = 4,
8834 EV5_E1 = 8,
8835 EV5_FAM = 16,
8836 EV5_FA = 32,
8837 EV5_FM = 64
8840 static enum alphaev4_pipe
8841 alphaev4_insn_pipe (rtx_insn *insn)
8843 if (recog_memoized (insn) < 0)
8844 return EV4_STOP;
8845 if (get_attr_length (insn) != 4)
8846 return EV4_STOP;
8848 switch (get_attr_type (insn))
8850 case TYPE_ILD:
8851 case TYPE_LDSYM:
8852 case TYPE_FLD:
8853 case TYPE_LD_L:
8854 return EV4_IBX;
8856 case TYPE_IADD:
8857 case TYPE_ILOG:
8858 case TYPE_ICMOV:
8859 case TYPE_ICMP:
8860 case TYPE_FST:
8861 case TYPE_SHIFT:
8862 case TYPE_IMUL:
8863 case TYPE_FBR:
8864 case TYPE_MVI: /* fake */
8865 return EV4_IB0;
8867 case TYPE_IST:
8868 case TYPE_MISC:
8869 case TYPE_IBR:
8870 case TYPE_JSR:
8871 case TYPE_CALLPAL:
8872 case TYPE_FCPYS:
8873 case TYPE_FCMOV:
8874 case TYPE_FADD:
8875 case TYPE_FDIV:
8876 case TYPE_FMUL:
8877 case TYPE_ST_C:
8878 case TYPE_MB:
8879 case TYPE_FSQRT: /* fake */
8880 case TYPE_FTOI: /* fake */
8881 case TYPE_ITOF: /* fake */
8882 return EV4_IB1;
8884 default:
8885 gcc_unreachable ();
8889 static enum alphaev5_pipe
8890 alphaev5_insn_pipe (rtx_insn *insn)
8892 if (recog_memoized (insn) < 0)
8893 return EV5_STOP;
8894 if (get_attr_length (insn) != 4)
8895 return EV5_STOP;
8897 switch (get_attr_type (insn))
8899 case TYPE_ILD:
8900 case TYPE_FLD:
8901 case TYPE_LDSYM:
8902 case TYPE_IADD:
8903 case TYPE_ILOG:
8904 case TYPE_ICMOV:
8905 case TYPE_ICMP:
8906 return EV5_E01;
8908 case TYPE_IST:
8909 case TYPE_FST:
8910 case TYPE_SHIFT:
8911 case TYPE_IMUL:
8912 case TYPE_MISC:
8913 case TYPE_MVI:
8914 case TYPE_LD_L:
8915 case TYPE_ST_C:
8916 case TYPE_MB:
8917 case TYPE_FTOI: /* fake */
8918 case TYPE_ITOF: /* fake */
8919 return EV5_E0;
8921 case TYPE_IBR:
8922 case TYPE_JSR:
8923 case TYPE_CALLPAL:
8924 return EV5_E1;
8926 case TYPE_FCPYS:
8927 return EV5_FAM;
8929 case TYPE_FBR:
8930 case TYPE_FCMOV:
8931 case TYPE_FADD:
8932 case TYPE_FDIV:
8933 case TYPE_FSQRT: /* fake */
8934 return EV5_FA;
8936 case TYPE_FMUL:
8937 return EV5_FM;
8939 default:
8940 gcc_unreachable ();
8944 /* IN_USE is a mask of the slots currently filled within the insn group.
8945 The mask bits come from alphaev4_pipe above. If EV4_IBX is set, then
8946 the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1.
8948 LEN is, of course, the length of the group in bytes. */
8950 static rtx_insn *
8951 alphaev4_next_group (rtx_insn *insn, int *pin_use, int *plen)
8953 int len, in_use;
8955 len = in_use = 0;
8957 if (! INSN_P (insn)
8958 || GET_CODE (PATTERN (insn)) == CLOBBER
8959 || GET_CODE (PATTERN (insn)) == USE)
8960 goto next_and_done;
8962 while (1)
8964 enum alphaev4_pipe pipe;
8966 pipe = alphaev4_insn_pipe (insn);
8967 switch (pipe)
8969 case EV4_STOP:
8970 /* Force complex instructions to start new groups. */
8971 if (in_use)
8972 goto done;
8974 /* If this is a completely unrecognized insn, it's an asm.
8975 We don't know how long it is, so record length as -1 to
8976 signal a needed realignment. */
8977 if (recog_memoized (insn) < 0)
8978 len = -1;
8979 else
8980 len = get_attr_length (insn);
8981 goto next_and_done;
8983 case EV4_IBX:
8984 if (in_use & EV4_IB0)
8986 if (in_use & EV4_IB1)
8987 goto done;
8988 in_use |= EV4_IB1;
8990 else
8991 in_use |= EV4_IB0 | EV4_IBX;
8992 break;
8994 case EV4_IB0:
8995 if (in_use & EV4_IB0)
8997 if (!(in_use & EV4_IBX) || (in_use & EV4_IB1))
8998 goto done;
8999 in_use |= EV4_IB1;
9001 in_use |= EV4_IB0;
9002 break;
9004 case EV4_IB1:
9005 if (in_use & EV4_IB1)
9006 goto done;
9007 in_use |= EV4_IB1;
9008 break;
9010 default:
9011 gcc_unreachable ();
9013 len += 4;
9015 /* Haifa doesn't do well scheduling branches. */
9016 if (JUMP_P (insn))
9017 goto next_and_done;
9019 next:
9020 insn = next_nonnote_insn (insn);
9022 if (!insn || ! INSN_P (insn))
9023 goto done;
9025 /* Let Haifa tell us where it thinks insn group boundaries are. */
9026 if (GET_MODE (insn) == TImode)
9027 goto done;
9029 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9030 goto next;
9033 next_and_done:
9034 insn = next_nonnote_insn (insn);
9036 done:
9037 *plen = len;
9038 *pin_use = in_use;
9039 return insn;
9042 /* IN_USE is a mask of the slots currently filled within the insn group.
9043 The mask bits come from alphaev5_pipe above. If EV5_E01 is set, then
9044 the insn in EV5_E0 can be swapped by the hardware into EV5_E1.
9046 LEN is, of course, the length of the group in bytes. */
9048 static rtx_insn *
9049 alphaev5_next_group (rtx_insn *insn, int *pin_use, int *plen)
9051 int len, in_use;
9053 len = in_use = 0;
9055 if (! INSN_P (insn)
9056 || GET_CODE (PATTERN (insn)) == CLOBBER
9057 || GET_CODE (PATTERN (insn)) == USE)
9058 goto next_and_done;
9060 while (1)
9062 enum alphaev5_pipe pipe;
9064 pipe = alphaev5_insn_pipe (insn);
9065 switch (pipe)
9067 case EV5_STOP:
9068 /* Force complex instructions to start new groups. */
9069 if (in_use)
9070 goto done;
9072 /* If this is a completely unrecognized insn, it's an asm.
9073 We don't know how long it is, so record length as -1 to
9074 signal a needed realignment. */
9075 if (recog_memoized (insn) < 0)
9076 len = -1;
9077 else
9078 len = get_attr_length (insn);
9079 goto next_and_done;
9081 /* ??? Most of the places below, we would like to assert never
9082 happen, as it would indicate an error either in Haifa, or
9083 in the scheduling description. Unfortunately, Haifa never
9084 schedules the last instruction of the BB, so we don't have
9085 an accurate TI bit to go off. */
9086 case EV5_E01:
9087 if (in_use & EV5_E0)
9089 if (in_use & EV5_E1)
9090 goto done;
9091 in_use |= EV5_E1;
9093 else
9094 in_use |= EV5_E0 | EV5_E01;
9095 break;
9097 case EV5_E0:
9098 if (in_use & EV5_E0)
9100 if (!(in_use & EV5_E01) || (in_use & EV5_E1))
9101 goto done;
9102 in_use |= EV5_E1;
9104 in_use |= EV5_E0;
9105 break;
9107 case EV5_E1:
9108 if (in_use & EV5_E1)
9109 goto done;
9110 in_use |= EV5_E1;
9111 break;
9113 case EV5_FAM:
9114 if (in_use & EV5_FA)
9116 if (in_use & EV5_FM)
9117 goto done;
9118 in_use |= EV5_FM;
9120 else
9121 in_use |= EV5_FA | EV5_FAM;
9122 break;
9124 case EV5_FA:
9125 if (in_use & EV5_FA)
9126 goto done;
9127 in_use |= EV5_FA;
9128 break;
9130 case EV5_FM:
9131 if (in_use & EV5_FM)
9132 goto done;
9133 in_use |= EV5_FM;
9134 break;
9136 case EV5_NONE:
9137 break;
9139 default:
9140 gcc_unreachable ();
9142 len += 4;
9144 /* Haifa doesn't do well scheduling branches. */
9145 /* ??? If this is predicted not-taken, slotting continues, except
9146 that no more IBR, FBR, or JSR insns may be slotted. */
9147 if (JUMP_P (insn))
9148 goto next_and_done;
9150 next:
9151 insn = next_nonnote_insn (insn);
9153 if (!insn || ! INSN_P (insn))
9154 goto done;
9156 /* Let Haifa tell us where it thinks insn group boundaries are. */
9157 if (GET_MODE (insn) == TImode)
9158 goto done;
9160 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9161 goto next;
9164 next_and_done:
9165 insn = next_nonnote_insn (insn);
9167 done:
9168 *plen = len;
9169 *pin_use = in_use;
9170 return insn;
9173 static rtx
9174 alphaev4_next_nop (int *pin_use)
9176 int in_use = *pin_use;
9177 rtx nop;
9179 if (!(in_use & EV4_IB0))
9181 in_use |= EV4_IB0;
9182 nop = gen_nop ();
9184 else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX)
9186 in_use |= EV4_IB1;
9187 nop = gen_nop ();
9189 else if (TARGET_FP && !(in_use & EV4_IB1))
9191 in_use |= EV4_IB1;
9192 nop = gen_fnop ();
9194 else
9195 nop = gen_unop ();
9197 *pin_use = in_use;
9198 return nop;
9201 static rtx
9202 alphaev5_next_nop (int *pin_use)
9204 int in_use = *pin_use;
9205 rtx nop;
9207 if (!(in_use & EV5_E1))
9209 in_use |= EV5_E1;
9210 nop = gen_nop ();
9212 else if (TARGET_FP && !(in_use & EV5_FA))
9214 in_use |= EV5_FA;
9215 nop = gen_fnop ();
9217 else if (TARGET_FP && !(in_use & EV5_FM))
9219 in_use |= EV5_FM;
9220 nop = gen_fnop ();
9222 else
9223 nop = gen_unop ();
9225 *pin_use = in_use;
9226 return nop;
9229 /* The instruction group alignment main loop. */
9231 static void
9232 alpha_align_insns_1 (unsigned int max_align,
9233 rtx_insn *(*next_group) (rtx_insn *, int *, int *),
9234 rtx (*next_nop) (int *))
9236 /* ALIGN is the known alignment for the insn group. */
9237 unsigned int align;
9238 /* OFS is the offset of the current insn in the insn group. */
9239 int ofs;
9240 int prev_in_use, in_use, len, ldgp;
9241 rtx_insn *i, *next;
9243 /* Let shorten branches care for assigning alignments to code labels. */
9244 shorten_branches (get_insns ());
9246 unsigned int option_alignment = align_functions.levels[0].get_value ();
9247 if (option_alignment < 4)
9248 align = 4;
9249 else if ((unsigned int) option_alignment < max_align)
9250 align = option_alignment;
9251 else
9252 align = max_align;
9254 ofs = prev_in_use = 0;
9255 i = get_insns ();
9256 if (NOTE_P (i))
9257 i = next_nonnote_insn (i);
9259 ldgp = alpha_function_needs_gp ? 8 : 0;
9261 while (i)
9263 next = (*next_group) (i, &in_use, &len);
9265 /* When we see a label, resync alignment etc. */
9266 if (LABEL_P (i))
9268 unsigned int new_align
9269 = label_to_alignment (i).levels[0].get_value ();
9271 if (new_align >= align)
9273 align = new_align < max_align ? new_align : max_align;
9274 ofs = 0;
9277 else if (ofs & (new_align-1))
9278 ofs = (ofs | (new_align-1)) + 1;
9279 gcc_assert (!len);
9282 /* Handle complex instructions special. */
9283 else if (in_use == 0)
9285 /* Asms will have length < 0. This is a signal that we have
9286 lost alignment knowledge. Assume, however, that the asm
9287 will not mis-align instructions. */
9288 if (len < 0)
9290 ofs = 0;
9291 align = 4;
9292 len = 0;
9296 /* If the known alignment is smaller than the recognized insn group,
9297 realign the output. */
9298 else if ((int) align < len)
9300 unsigned int new_log_align = len > 8 ? 4 : 3;
9301 rtx_insn *prev, *where;
9303 where = prev = prev_nonnote_insn (i);
9304 if (!where || !LABEL_P (where))
9305 where = i;
9307 /* Can't realign between a call and its gp reload. */
9308 if (! (TARGET_EXPLICIT_RELOCS
9309 && prev && CALL_P (prev)))
9311 emit_insn_before (gen_realign (GEN_INT (new_log_align)), where);
9312 align = 1 << new_log_align;
9313 ofs = 0;
9317 /* We may not insert padding inside the initial ldgp sequence. */
9318 else if (ldgp > 0)
9319 ldgp -= len;
9321 /* If the group won't fit in the same INT16 as the previous,
9322 we need to add padding to keep the group together. Rather
9323 than simply leaving the insn filling to the assembler, we
9324 can make use of the knowledge of what sorts of instructions
9325 were issued in the previous group to make sure that all of
9326 the added nops are really free. */
9327 else if (ofs + len > (int) align)
9329 int nop_count = (align - ofs) / 4;
9330 rtx_insn *where;
9332 /* Insert nops before labels, branches, and calls to truly merge
9333 the execution of the nops with the previous instruction group. */
9334 where = prev_nonnote_insn (i);
9335 if (where)
9337 if (LABEL_P (where))
9339 rtx_insn *where2 = prev_nonnote_insn (where);
9340 if (where2 && JUMP_P (where2))
9341 where = where2;
9343 else if (NONJUMP_INSN_P (where))
9344 where = i;
9346 else
9347 where = i;
9350 emit_insn_before ((*next_nop)(&prev_in_use), where);
9351 while (--nop_count);
9352 ofs = 0;
9355 ofs = (ofs + len) & (align - 1);
9356 prev_in_use = in_use;
9357 i = next;
9361 static void
9362 alpha_align_insns (void)
9364 if (alpha_tune == PROCESSOR_EV4)
9365 alpha_align_insns_1 (8, alphaev4_next_group, alphaev4_next_nop);
9366 else if (alpha_tune == PROCESSOR_EV5)
9367 alpha_align_insns_1 (16, alphaev5_next_group, alphaev5_next_nop);
9368 else
9369 gcc_unreachable ();
9372 /* Insert an unop between sibcall or noreturn function call and GP load. */
9374 static void
9375 alpha_pad_function_end (void)
9377 rtx_insn *insn, *next;
9379 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9381 if (!CALL_P (insn)
9382 || !(SIBLING_CALL_P (insn)
9383 || find_reg_note (insn, REG_NORETURN, NULL_RTX)))
9384 continue;
9386 next = next_active_insn (insn);
9387 if (next)
9389 rtx pat = PATTERN (next);
9391 if (GET_CODE (pat) == SET
9392 && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
9393 && XINT (SET_SRC (pat), 1) == UNSPECV_LDGP1)
9394 emit_insn_after (gen_unop (), insn);
9399 /* Machine dependent reorg pass. */
9401 static void
9402 alpha_reorg (void)
9404 /* Workaround for a linker error that triggers when an exception
9405 handler immediatelly follows a sibcall or a noreturn function.
9407 In the sibcall case:
9409 The instruction stream from an object file:
9411 1d8: 00 00 fb 6b jmp (t12)
9412 1dc: 00 00 ba 27 ldah gp,0(ra)
9413 1e0: 00 00 bd 23 lda gp,0(gp)
9414 1e4: 00 00 7d a7 ldq t12,0(gp)
9415 1e8: 00 40 5b 6b jsr ra,(t12),1ec <__funcZ+0x1ec>
9417 was converted in the final link pass to:
9419 12003aa88: 67 fa ff c3 br 120039428 <...>
9420 12003aa8c: 00 00 fe 2f unop
9421 12003aa90: 00 00 fe 2f unop
9422 12003aa94: 48 83 7d a7 ldq t12,-31928(gp)
9423 12003aa98: 00 40 5b 6b jsr ra,(t12),12003aa9c <__func+0x1ec>
9425 And in the noreturn case:
9427 The instruction stream from an object file:
9429 54: 00 40 5b 6b jsr ra,(t12),58 <__func+0x58>
9430 58: 00 00 ba 27 ldah gp,0(ra)
9431 5c: 00 00 bd 23 lda gp,0(gp)
9432 60: 00 00 7d a7 ldq t12,0(gp)
9433 64: 00 40 5b 6b jsr ra,(t12),68 <__func+0x68>
9435 was converted in the final link pass to:
9437 fdb24: a0 03 40 d3 bsr ra,fe9a8 <_called_func+0x8>
9438 fdb28: 00 00 fe 2f unop
9439 fdb2c: 00 00 fe 2f unop
9440 fdb30: 30 82 7d a7 ldq t12,-32208(gp)
9441 fdb34: 00 40 5b 6b jsr ra,(t12),fdb38 <__func+0x68>
9443 GP load instructions were wrongly cleared by the linker relaxation
9444 pass. This workaround prevents removal of GP loads by inserting
9445 an unop instruction between a sibcall or noreturn function call and
9446 exception handler prologue. */
9448 if (current_function_has_exception_handlers ())
9449 alpha_pad_function_end ();
9451 /* CALL_PAL that implements trap insn, updates program counter to point
9452 after the insn. In case trap is the last insn in the function,
9453 emit NOP to guarantee that PC remains inside function boundaries.
9454 This workaround is needed to get reliable backtraces. */
9456 rtx_insn *insn = prev_active_insn (get_last_insn ());
9458 if (insn && NONJUMP_INSN_P (insn))
9460 rtx pat = PATTERN (insn);
9461 if (GET_CODE (pat) == PARALLEL)
9463 rtx vec = XVECEXP (pat, 0, 0);
9464 if (GET_CODE (vec) == TRAP_IF
9465 && XEXP (vec, 0) == const1_rtx)
9466 emit_insn_after (gen_unop (), insn);
9471 static void
9472 alpha_file_start (void)
9474 default_file_start ();
9476 fputs ("\t.set noreorder\n", asm_out_file);
9477 fputs ("\t.set volatile\n", asm_out_file);
9478 if (TARGET_ABI_OSF)
9479 fputs ("\t.set noat\n", asm_out_file);
9480 if (TARGET_EXPLICIT_RELOCS)
9481 fputs ("\t.set nomacro\n", asm_out_file);
9482 if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX)
9484 const char *arch;
9486 if (alpha_cpu == PROCESSOR_EV6 || TARGET_FIX || TARGET_CIX)
9487 arch = "ev6";
9488 else if (TARGET_MAX)
9489 arch = "pca56";
9490 else if (TARGET_BWX)
9491 arch = "ev56";
9492 else if (alpha_cpu == PROCESSOR_EV5)
9493 arch = "ev5";
9494 else
9495 arch = "ev4";
9497 fprintf (asm_out_file, "\t.arch %s\n", arch);
9501 /* Since we don't have a .dynbss section, we should not allow global
9502 relocations in the .rodata section. */
9504 static int
9505 alpha_elf_reloc_rw_mask (void)
9507 return flag_pic ? 3 : 2;
9510 /* Return a section for X. The only special thing we do here is to
9511 honor small data. */
9513 static section *
9514 alpha_elf_select_rtx_section (machine_mode mode, rtx x,
9515 unsigned HOST_WIDE_INT align)
9517 if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value)
9518 /* ??? Consider using mergeable sdata sections. */
9519 return sdata_section;
9520 else
9521 return default_elf_select_rtx_section (mode, x, align);
9524 static unsigned int
9525 alpha_elf_section_type_flags (tree decl, const char *name, int reloc)
9527 unsigned int flags = 0;
9529 if (strcmp (name, ".sdata") == 0
9530 || startswith (name, ".sdata.")
9531 || startswith (name, ".gnu.linkonce.s.")
9532 || strcmp (name, ".sbss") == 0
9533 || startswith (name, ".sbss.")
9534 || startswith (name, ".gnu.linkonce.sb."))
9535 flags = SECTION_SMALL;
9537 flags |= default_section_type_flags (decl, name, reloc);
9538 return flags;
9541 /* Structure to collect function names for final output in link section. */
9542 /* Note that items marked with GTY can't be ifdef'ed out. */
9544 enum reloc_kind
9546 KIND_LINKAGE,
9547 KIND_CODEADDR
9550 struct GTY(()) alpha_links
9552 rtx func;
9553 rtx linkage;
9554 enum reloc_kind rkind;
9557 #if TARGET_ABI_OPEN_VMS
9559 /* Return the VMS argument type corresponding to MODE. */
9561 enum avms_arg_type
9562 alpha_arg_type (machine_mode mode)
9564 switch (mode)
9566 case E_SFmode:
9567 return TARGET_FLOAT_VAX ? FF : FS;
9568 case E_DFmode:
9569 return TARGET_FLOAT_VAX ? FD : FT;
9570 default:
9571 return I64;
9575 /* Return an rtx for an integer representing the VMS Argument Information
9576 register value. */
9579 alpha_arg_info_reg_val (CUMULATIVE_ARGS cum)
9581 unsigned HOST_WIDE_INT regval = cum.num_args;
9582 int i;
9584 for (i = 0; i < 6; i++)
9585 regval |= ((int) cum.atypes[i]) << (i * 3 + 8);
9587 return GEN_INT (regval);
9591 /* Return a SYMBOL_REF representing the reference to the .linkage entry
9592 of function FUNC built for calls made from CFUNDECL. LFLAG is 1 if
9593 this is the reference to the linkage pointer value, 0 if this is the
9594 reference to the function entry value. RFLAG is 1 if this a reduced
9595 reference (code address only), 0 if this is a full reference. */
9598 alpha_use_linkage (rtx func, bool lflag, bool rflag)
9600 struct alpha_links *al = NULL;
9601 const char *name = XSTR (func, 0);
9603 if (cfun->machine->links)
9605 /* Is this name already defined? */
9606 alpha_links **slot = cfun->machine->links->get (name);
9607 if (slot)
9608 al = *slot;
9610 else
9611 cfun->machine->links
9612 = hash_map<nofree_string_hash, alpha_links *>::create_ggc (64);
9614 if (al == NULL)
9616 size_t buf_len;
9617 char *linksym;
9618 tree id;
9620 if (name[0] == '*')
9621 name++;
9623 /* Follow transparent alias, as this is used for CRTL translations. */
9624 id = maybe_get_identifier (name);
9625 if (id)
9627 while (IDENTIFIER_TRANSPARENT_ALIAS (id))
9628 id = TREE_CHAIN (id);
9629 name = IDENTIFIER_POINTER (id);
9632 buf_len = strlen (name) + 8 + 9;
9633 linksym = (char *) alloca (buf_len);
9634 snprintf (linksym, buf_len, "$%d..%s..lk", cfun->funcdef_no, name);
9636 al = ggc_alloc<alpha_links> ();
9637 al->func = func;
9638 al->linkage = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (linksym));
9640 cfun->machine->links->put (ggc_strdup (name), al);
9643 al->rkind = rflag ? KIND_CODEADDR : KIND_LINKAGE;
9645 if (lflag)
9646 return gen_rtx_MEM (Pmode, plus_constant (Pmode, al->linkage, 8));
9647 else
9648 return al->linkage;
9651 static int
9652 alpha_write_one_linkage (const char *name, alpha_links *link, FILE *stream)
9654 ASM_OUTPUT_INTERNAL_LABEL (stream, XSTR (link->linkage, 0));
9655 if (link->rkind == KIND_CODEADDR)
9657 /* External and used, request code address. */
9658 fprintf (stream, "\t.code_address ");
9660 else
9662 if (!SYMBOL_REF_EXTERNAL_P (link->func)
9663 && SYMBOL_REF_LOCAL_P (link->func))
9665 /* Locally defined, build linkage pair. */
9666 fprintf (stream, "\t.quad %s..en\n", name);
9667 fprintf (stream, "\t.quad ");
9669 else
9671 /* External, request linkage pair. */
9672 fprintf (stream, "\t.linkage ");
9675 assemble_name (stream, name);
9676 fputs ("\n", stream);
9678 return 0;
9681 static void
9682 alpha_write_linkage (FILE *stream, const char *funname)
9684 fprintf (stream, "\t.link\n");
9685 fprintf (stream, "\t.align 3\n");
9686 in_section = NULL;
9688 #ifdef TARGET_VMS_CRASH_DEBUG
9689 fputs ("\t.name ", stream);
9690 assemble_name (stream, funname);
9691 fputs ("..na\n", stream);
9692 #endif
9694 ASM_OUTPUT_LABEL (stream, funname);
9695 fprintf (stream, "\t.pdesc ");
9696 assemble_name (stream, funname);
9697 fprintf (stream, "..en,%s\n",
9698 alpha_procedure_type == PT_STACK ? "stack"
9699 : alpha_procedure_type == PT_REGISTER ? "reg" : "null");
9701 if (cfun->machine->links)
9703 hash_map<nofree_string_hash, alpha_links *>::iterator iter
9704 = cfun->machine->links->begin ();
9705 for (; iter != cfun->machine->links->end (); ++iter)
9706 alpha_write_one_linkage ((*iter).first, (*iter).second, stream);
9710 /* Switch to an arbitrary section NAME with attributes as specified
9711 by FLAGS. ALIGN specifies any known alignment requirements for
9712 the section; 0 if the default should be used. */
9714 static void
9715 vms_asm_named_section (const char *name, unsigned int flags,
9716 tree decl ATTRIBUTE_UNUSED)
9718 fputc ('\n', asm_out_file);
9719 fprintf (asm_out_file, ".section\t%s", name);
9721 if (flags & SECTION_DEBUG)
9722 fprintf (asm_out_file, ",NOWRT");
9724 fputc ('\n', asm_out_file);
9727 /* Record an element in the table of global constructors. SYMBOL is
9728 a SYMBOL_REF of the function to be called; PRIORITY is a number
9729 between 0 and MAX_INIT_PRIORITY.
9731 Differs from default_ctors_section_asm_out_constructor in that the
9732 width of the .ctors entry is always 64 bits, rather than the 32 bits
9733 used by a normal pointer. */
9735 static void
9736 vms_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9738 switch_to_section (ctors_section);
9739 assemble_align (BITS_PER_WORD);
9740 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9743 static void
9744 vms_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9746 switch_to_section (dtors_section);
9747 assemble_align (BITS_PER_WORD);
9748 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9750 #else
9752 alpha_use_linkage (rtx func ATTRIBUTE_UNUSED,
9753 bool lflag ATTRIBUTE_UNUSED,
9754 bool rflag ATTRIBUTE_UNUSED)
9756 return NULL_RTX;
9759 #endif /* TARGET_ABI_OPEN_VMS */
9761 static void
9762 alpha_init_libfuncs (void)
9764 if (TARGET_ABI_OPEN_VMS)
9766 /* Use the VMS runtime library functions for division and
9767 remainder. */
9768 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
9769 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
9770 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
9771 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
9772 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
9773 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
9774 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
9775 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
9776 #ifdef MEM_LIBFUNCS_INIT
9777 MEM_LIBFUNCS_INIT;
9778 #endif
9782 /* On the Alpha, we use this to disable the floating-point registers
9783 when they don't exist. */
9785 static void
9786 alpha_conditional_register_usage (void)
9788 int i;
9789 if (! TARGET_FPREGS)
9790 for (i = 32; i < 63; i++)
9791 fixed_regs[i] = call_used_regs[i] = 1;
9794 /* Canonicalize a comparison from one we don't have to one we do have. */
9796 static void
9797 alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
9798 bool op0_preserve_value)
9800 if (!op0_preserve_value
9801 && (*code == GE || *code == GT || *code == GEU || *code == GTU)
9802 && (REG_P (*op1) || *op1 == const0_rtx))
9804 std::swap (*op0, *op1);
9805 *code = (int)swap_condition ((enum rtx_code)*code);
9808 if ((*code == LT || *code == LTU)
9809 && CONST_INT_P (*op1) && INTVAL (*op1) == 256)
9811 *code = *code == LT ? LE : LEU;
9812 *op1 = GEN_INT (255);
9816 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
9818 static void
9819 alpha_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
9821 const unsigned HOST_WIDE_INT SWCR_STATUS_MASK = (0x3fUL << 17);
9823 tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
9824 tree new_fenv_var, reload_fenv, restore_fnenv;
9825 tree update_call, atomic_feraiseexcept, hold_fnclex;
9827 /* Assume OSF/1 compatible interfaces. */
9828 if (!TARGET_ABI_OSF)
9829 return;
9831 /* Generate the equivalent of :
9832 unsigned long fenv_var;
9833 fenv_var = __ieee_get_fp_control ();
9835 unsigned long masked_fenv;
9836 masked_fenv = fenv_var & mask;
9838 __ieee_set_fp_control (masked_fenv); */
9840 fenv_var = create_tmp_var_raw (long_unsigned_type_node);
9841 get_fpscr
9842 = build_fn_decl ("__ieee_get_fp_control",
9843 build_function_type_list (long_unsigned_type_node, NULL));
9844 set_fpscr
9845 = build_fn_decl ("__ieee_set_fp_control",
9846 build_function_type_list (void_type_node, NULL));
9847 mask = build_int_cst (long_unsigned_type_node, ~SWCR_STATUS_MASK);
9848 ld_fenv = build4 (TARGET_EXPR, long_unsigned_type_node, fenv_var,
9849 build_call_expr (get_fpscr, 0), NULL_TREE, NULL_TREE);
9850 masked_fenv = build2 (BIT_AND_EXPR, long_unsigned_type_node, fenv_var, mask);
9851 hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
9852 *hold = build2 (COMPOUND_EXPR, void_type_node,
9853 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
9854 hold_fnclex);
9856 /* Store the value of masked_fenv to clear the exceptions:
9857 __ieee_set_fp_control (masked_fenv); */
9859 *clear = build_call_expr (set_fpscr, 1, masked_fenv);
9861 /* Generate the equivalent of :
9862 unsigned long new_fenv_var;
9863 new_fenv_var = __ieee_get_fp_control ();
9865 __ieee_set_fp_control (fenv_var);
9867 __atomic_feraiseexcept (new_fenv_var); */
9869 new_fenv_var = create_tmp_var_raw (long_unsigned_type_node);
9870 reload_fenv = build4 (TARGET_EXPR, long_unsigned_type_node, new_fenv_var,
9871 build_call_expr (get_fpscr, 0), NULL_TREE, NULL_TREE);
9872 restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
9873 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
9874 update_call
9875 = build_call_expr (atomic_feraiseexcept, 1,
9876 fold_convert (integer_type_node, new_fenv_var));
9877 *update = build2 (COMPOUND_EXPR, void_type_node,
9878 build2 (COMPOUND_EXPR, void_type_node,
9879 reload_fenv, restore_fnenv), update_call);
9882 /* Implement TARGET_HARD_REGNO_MODE_OK. On Alpha, the integer registers
9883 can hold any mode. The floating-point registers can hold 64-bit
9884 integers as well, but not smaller values. */
9886 static bool
9887 alpha_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
9889 if (IN_RANGE (regno, 32, 62))
9890 return (mode == SFmode
9891 || mode == DFmode
9892 || mode == DImode
9893 || mode == SCmode
9894 || mode == DCmode);
9895 return true;
9898 /* Implement TARGET_MODES_TIEABLE_P. This asymmetric test is true when
9899 MODE1 could be put in an FP register but MODE2 could not. */
9901 static bool
9902 alpha_modes_tieable_p (machine_mode mode1, machine_mode mode2)
9904 return (alpha_hard_regno_mode_ok (32, mode1)
9905 ? alpha_hard_regno_mode_ok (32, mode2)
9906 : true);
9909 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
9911 static bool
9912 alpha_can_change_mode_class (machine_mode from, machine_mode to,
9913 reg_class_t rclass)
9915 return (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
9916 || !reg_classes_intersect_p (FLOAT_REGS, rclass));
9919 /* Initialize the GCC target structure. */
9920 #if TARGET_ABI_OPEN_VMS
9921 # undef TARGET_ATTRIBUTE_TABLE
9922 # define TARGET_ATTRIBUTE_TABLE vms_attribute_table
9923 # undef TARGET_CAN_ELIMINATE
9924 # define TARGET_CAN_ELIMINATE alpha_vms_can_eliminate
9925 #endif
9927 #undef TARGET_IN_SMALL_DATA_P
9928 #define TARGET_IN_SMALL_DATA_P alpha_in_small_data_p
9930 #undef TARGET_ASM_ALIGNED_HI_OP
9931 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
9932 #undef TARGET_ASM_ALIGNED_DI_OP
9933 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
9935 /* Default unaligned ops are provided for ELF systems. To get unaligned
9936 data for non-ELF systems, we have to turn off auto alignment. */
9937 #if TARGET_ABI_OPEN_VMS
9938 #undef TARGET_ASM_UNALIGNED_HI_OP
9939 #define TARGET_ASM_UNALIGNED_HI_OP "\t.align 0\n\t.word\t"
9940 #undef TARGET_ASM_UNALIGNED_SI_OP
9941 #define TARGET_ASM_UNALIGNED_SI_OP "\t.align 0\n\t.long\t"
9942 #undef TARGET_ASM_UNALIGNED_DI_OP
9943 #define TARGET_ASM_UNALIGNED_DI_OP "\t.align 0\n\t.quad\t"
9944 #endif
9946 #undef TARGET_ASM_RELOC_RW_MASK
9947 #define TARGET_ASM_RELOC_RW_MASK alpha_elf_reloc_rw_mask
9948 #undef TARGET_ASM_SELECT_RTX_SECTION
9949 #define TARGET_ASM_SELECT_RTX_SECTION alpha_elf_select_rtx_section
9950 #undef TARGET_SECTION_TYPE_FLAGS
9951 #define TARGET_SECTION_TYPE_FLAGS alpha_elf_section_type_flags
9953 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
9954 #define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue
9956 #undef TARGET_INIT_LIBFUNCS
9957 #define TARGET_INIT_LIBFUNCS alpha_init_libfuncs
9959 #undef TARGET_LEGITIMIZE_ADDRESS
9960 #define TARGET_LEGITIMIZE_ADDRESS alpha_legitimize_address
9961 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
9962 #define TARGET_MODE_DEPENDENT_ADDRESS_P alpha_mode_dependent_address_p
9964 #undef TARGET_ASM_FILE_START
9965 #define TARGET_ASM_FILE_START alpha_file_start
9967 #undef TARGET_SCHED_ADJUST_COST
9968 #define TARGET_SCHED_ADJUST_COST alpha_adjust_cost
9969 #undef TARGET_SCHED_ISSUE_RATE
9970 #define TARGET_SCHED_ISSUE_RATE alpha_issue_rate
9971 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
9972 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
9973 alpha_multipass_dfa_lookahead
9975 #undef TARGET_HAVE_TLS
9976 #define TARGET_HAVE_TLS HAVE_AS_TLS
9978 #undef TARGET_BUILTIN_DECL
9979 #define TARGET_BUILTIN_DECL alpha_builtin_decl
9980 #undef TARGET_INIT_BUILTINS
9981 #define TARGET_INIT_BUILTINS alpha_init_builtins
9982 #undef TARGET_EXPAND_BUILTIN
9983 #define TARGET_EXPAND_BUILTIN alpha_expand_builtin
9984 #undef TARGET_FOLD_BUILTIN
9985 #define TARGET_FOLD_BUILTIN alpha_fold_builtin
9986 #undef TARGET_GIMPLE_FOLD_BUILTIN
9987 #define TARGET_GIMPLE_FOLD_BUILTIN alpha_gimple_fold_builtin
9989 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
9990 #define TARGET_FUNCTION_OK_FOR_SIBCALL alpha_function_ok_for_sibcall
9991 #undef TARGET_CANNOT_COPY_INSN_P
9992 #define TARGET_CANNOT_COPY_INSN_P alpha_cannot_copy_insn_p
9993 #undef TARGET_LEGITIMATE_CONSTANT_P
9994 #define TARGET_LEGITIMATE_CONSTANT_P alpha_legitimate_constant_p
9995 #undef TARGET_CANNOT_FORCE_CONST_MEM
9996 #define TARGET_CANNOT_FORCE_CONST_MEM alpha_cannot_force_const_mem
9998 #if TARGET_ABI_OSF
9999 #undef TARGET_ASM_OUTPUT_MI_THUNK
10000 #define TARGET_ASM_OUTPUT_MI_THUNK alpha_output_mi_thunk_osf
10001 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
10002 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
10003 #undef TARGET_STDARG_OPTIMIZE_HOOK
10004 #define TARGET_STDARG_OPTIMIZE_HOOK alpha_stdarg_optimize_hook
10005 #endif
10007 #undef TARGET_PRINT_OPERAND
10008 #define TARGET_PRINT_OPERAND alpha_print_operand
10009 #undef TARGET_PRINT_OPERAND_ADDRESS
10010 #define TARGET_PRINT_OPERAND_ADDRESS alpha_print_operand_address
10011 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
10012 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P alpha_print_operand_punct_valid_p
10014 /* Use 16-bits anchor. */
10015 #undef TARGET_MIN_ANCHOR_OFFSET
10016 #define TARGET_MIN_ANCHOR_OFFSET -0x7fff - 1
10017 #undef TARGET_MAX_ANCHOR_OFFSET
10018 #define TARGET_MAX_ANCHOR_OFFSET 0x7fff
10019 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
10020 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
10022 #undef TARGET_REGISTER_MOVE_COST
10023 #define TARGET_REGISTER_MOVE_COST alpha_register_move_cost
10024 #undef TARGET_MEMORY_MOVE_COST
10025 #define TARGET_MEMORY_MOVE_COST alpha_memory_move_cost
10026 #undef TARGET_RTX_COSTS
10027 #define TARGET_RTX_COSTS alpha_rtx_costs
10028 #undef TARGET_ADDRESS_COST
10029 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
10031 #undef TARGET_MACHINE_DEPENDENT_REORG
10032 #define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg
10034 #undef TARGET_PROMOTE_FUNCTION_MODE
10035 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
10036 #undef TARGET_PROMOTE_PROTOTYPES
10037 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false
10039 #undef TARGET_FUNCTION_VALUE
10040 #define TARGET_FUNCTION_VALUE alpha_function_value
10041 #undef TARGET_LIBCALL_VALUE
10042 #define TARGET_LIBCALL_VALUE alpha_libcall_value
10043 #undef TARGET_FUNCTION_VALUE_REGNO_P
10044 #define TARGET_FUNCTION_VALUE_REGNO_P alpha_function_value_regno_p
10045 #undef TARGET_RETURN_IN_MEMORY
10046 #define TARGET_RETURN_IN_MEMORY alpha_return_in_memory
10047 #undef TARGET_PASS_BY_REFERENCE
10048 #define TARGET_PASS_BY_REFERENCE alpha_pass_by_reference
10049 #undef TARGET_SETUP_INCOMING_VARARGS
10050 #define TARGET_SETUP_INCOMING_VARARGS alpha_setup_incoming_varargs
10051 #undef TARGET_STRICT_ARGUMENT_NAMING
10052 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
10053 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
10054 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
10055 #undef TARGET_SPLIT_COMPLEX_ARG
10056 #define TARGET_SPLIT_COMPLEX_ARG alpha_split_complex_arg
10057 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
10058 #define TARGET_GIMPLIFY_VA_ARG_EXPR alpha_gimplify_va_arg
10059 #undef TARGET_ARG_PARTIAL_BYTES
10060 #define TARGET_ARG_PARTIAL_BYTES alpha_arg_partial_bytes
10061 #undef TARGET_FUNCTION_ARG
10062 #define TARGET_FUNCTION_ARG alpha_function_arg
10063 #undef TARGET_FUNCTION_ARG_ADVANCE
10064 #define TARGET_FUNCTION_ARG_ADVANCE alpha_function_arg_advance
10065 #undef TARGET_TRAMPOLINE_INIT
10066 #define TARGET_TRAMPOLINE_INIT alpha_trampoline_init
10068 #undef TARGET_INSTANTIATE_DECLS
10069 #define TARGET_INSTANTIATE_DECLS alpha_instantiate_decls
10071 #undef TARGET_SECONDARY_RELOAD
10072 #define TARGET_SECONDARY_RELOAD alpha_secondary_reload
10073 #undef TARGET_SECONDARY_MEMORY_NEEDED
10074 #define TARGET_SECONDARY_MEMORY_NEEDED alpha_secondary_memory_needed
10075 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
10076 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE alpha_secondary_memory_needed_mode
10078 #undef TARGET_SCALAR_MODE_SUPPORTED_P
10079 #define TARGET_SCALAR_MODE_SUPPORTED_P alpha_scalar_mode_supported_p
10080 #undef TARGET_VECTOR_MODE_SUPPORTED_P
10081 #define TARGET_VECTOR_MODE_SUPPORTED_P alpha_vector_mode_supported_p
10083 #undef TARGET_BUILD_BUILTIN_VA_LIST
10084 #define TARGET_BUILD_BUILTIN_VA_LIST alpha_build_builtin_va_list
10086 #undef TARGET_EXPAND_BUILTIN_VA_START
10087 #define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start
10089 #undef TARGET_OPTION_OVERRIDE
10090 #define TARGET_OPTION_OVERRIDE alpha_option_override
10092 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
10093 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
10094 alpha_override_options_after_change
10096 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
10097 #undef TARGET_MANGLE_TYPE
10098 #define TARGET_MANGLE_TYPE alpha_mangle_type
10099 #endif
10101 #undef TARGET_LRA_P
10102 #define TARGET_LRA_P hook_bool_void_false
10104 #undef TARGET_LEGITIMATE_ADDRESS_P
10105 #define TARGET_LEGITIMATE_ADDRESS_P alpha_legitimate_address_p
10107 #undef TARGET_CONDITIONAL_REGISTER_USAGE
10108 #define TARGET_CONDITIONAL_REGISTER_USAGE alpha_conditional_register_usage
10110 #undef TARGET_CANONICALIZE_COMPARISON
10111 #define TARGET_CANONICALIZE_COMPARISON alpha_canonicalize_comparison
10113 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
10114 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV alpha_atomic_assign_expand_fenv
10116 #undef TARGET_HARD_REGNO_MODE_OK
10117 #define TARGET_HARD_REGNO_MODE_OK alpha_hard_regno_mode_ok
10119 #undef TARGET_MODES_TIEABLE_P
10120 #define TARGET_MODES_TIEABLE_P alpha_modes_tieable_p
10122 #undef TARGET_CAN_CHANGE_MODE_CLASS
10123 #define TARGET_CAN_CHANGE_MODE_CLASS alpha_can_change_mode_class
10125 struct gcc_target targetm = TARGET_INITIALIZER;
10128 #include "gt-alpha.h"