gcc/config/sparc/sparc.c

   1 /* Subroutines for insn-output.c for Sun SPARC.
   2    Copyright (C) 1987, 88, 89, 92, 93, 94, 1995 Free Software Foundation, Inc.
   3    Contributed by Michael Tiemann (tiemann@cygnus.com)
   4    64 bit SPARC V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
   5    at Cygnus Support.
   6
   7 This file is part of GNU CC.
   8
   9 GNU CC is free software; you can redistribute it and/or modify
  10 it under the terms of the GNU General Public License as published by
  11 the Free Software Foundation; either version 2, or (at your option)
  12 any later version.
  13
  14 GNU CC is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GNU CC; see the file COPYING.  If not, write to
  21 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
  22
  23 #include <stdio.h>
  24 #include "config.h"
  25 #include "tree.h"
  26 #include "rtl.h"
  27 #include "regs.h"
  28 #include "hard-reg-set.h"
  29 #include "real.h"
  30 #include "insn-config.h"
  31 #include "conditions.h"
  32 #include "insn-flags.h"
  33 #include "output.h"
  34 #include "insn-attr.h"
  35 #include "flags.h"
  36 #include "expr.h"
  37 #include "recog.h"
  38
  39 /* 1 if the caller has placed an "unimp" insn immediately after the call.
  40    This is used in v8 code when calling a function that returns a structure.
  41    v9 doesn't have this.  */
  42
  43 #define SKIP_CALLERS_UNIMP_P (!TARGET_V9 && current_function_returns_struct)
  44
  45 /* Global variables for machine-dependent things.  */
  46
  47 /* Says what architecture we're compiling for.  */
  48 enum arch_type sparc_arch_type;
  49
  50 /* Size of frame.  Need to know this to emit return insns from leaf procedures.
  51    ACTUAL_FSIZE is set by compute_frame_size() which is called during the
  52    reload pass.  This is important as the value is later used in insn
  53    scheduling (to see what can go in a delay slot).
  54    APPARENT_FSIZE is the size of the stack less the register save area and less
  55    the outgoing argument area.  It is used when saving call preserved regs.  */
  56 static int apparent_fsize;
  57 static int actual_fsize;
  58
  59 /* Save the operands last given to a compare for use when we
  60    generate a scc or bcc insn.  */
  61
  62 rtx sparc_compare_op0, sparc_compare_op1;
  63
  64 /* Count of named arguments (v9 only).
  65    ??? INIT_CUMULATIVE_ARGS initializes these, and FUNCTION_ARG_ADVANCE
  66    increments SPARC_ARG_COUNT. They are then used by
  67    FUNCTION_ARG_CALLEE_COPIES to determine if the argument is really a named
  68    argument or not.  This hack is necessary because the NAMED argument to the
  69    FUNCTION_ARG_XXX macros is not what it says it is: it does not include the
  70    last named argument.  */
  71
  72 int sparc_arg_count;
  73 int sparc_n_named_args;
  74
  75 /* We may need an epilogue if we spill too many registers.
  76    If this is non-zero, then we branch here for the epilogue.  */
  77 static rtx leaf_label;
  78
  79 #ifdef LEAF_REGISTERS
  80
  81 /* Vector to say how input registers are mapped to output
  82    registers.  FRAME_POINTER_REGNUM cannot be remapped by
  83    this function to eliminate it.  You must use -fomit-frame-pointer
  84    to get that.  */
  85 char leaf_reg_remap[] =
  86 { 0, 1, 2, 3, 4, 5, 6, 7,
  87   -1, -1, -1, -1, -1, -1, 14, -1,
  88   -1, -1, -1, -1, -1, -1, -1, -1,
  89   8, 9, 10, 11, 12, 13, -1, 15,
  90
  91   32, 33, 34, 35, 36, 37, 38, 39,
  92   40, 41, 42, 43, 44, 45, 46, 47,
  93   48, 49, 50, 51, 52, 53, 54, 55,
  94   56, 57, 58, 59, 60, 61, 62, 63,
  95   64, 65, 66, 67, 68, 69, 70, 71,
  96   72, 73, 74, 75, 76, 77, 78, 79,
  97   80, 81, 82, 83, 84, 85, 86, 87,
  98   88, 89, 90, 91, 92, 93, 94, 95,
  99   96, 97, 98, 99};
 100
 101 #endif
 102
 103 /* Name of where we pretend to think the frame pointer points.
 104    Normally, this is "%fp", but if we are in a leaf procedure,
 105    this is "%sp+something".  We record "something" separately as it may be
 106    too big for reg+constant addressing.  */
 107
 108 static char *frame_base_name;
 109 static int frame_base_offset;
 110
 111 static rtx find_addr_reg ();
 112 static void sparc_init_modes ();
 113 \f
 114 /* Option handling.  */
 115
 116 /* Validate and override various options, and do some machine dependent
 117    initialization.  */
 118
 119 void
 120 sparc_override_options ()
 121 {
 122   /* Check for any conflicts in the choice of options.  */
 123   /* ??? This stuff isn't really usable yet.  */
 124
 125   if (! TARGET_V9)
 126     {
 127       if (target_flags & MASK_CODE_MODEL)
 128         error ("code model support is only available with -mv9");
 129       if (TARGET_INT64)
 130         error ("-mint64 is only available with -mv9");
 131       if (TARGET_LONG64)
 132         error ("-mlong64 is only available with -mv9");
 133       if (TARGET_PTR64)
 134         error ("-mptr64 is only available with -mv9");
 135       if (TARGET_ENV32)
 136         error ("-menv32 is only available with -mv9");
 137       if (TARGET_STACK_BIAS)
 138         error ("-mstack-bias is only available with -mv9");
 139     }
 140   else
 141     {
 142       /* ??? Are there any options that aren't usable with v9.
 143          -munaligned-doubles?  */
 144     }
 145
 146   /* Check for conflicts in cpu specification.
 147      If we use -mcpu=xxx, this can be removed.  */
 148
 149   if ((TARGET_V8 != 0) + (TARGET_SPARCLITE != 0) + (TARGET_V9 != 0) > 1)
 150     error ("conflicting architectures defined");
 151
 152   /* Do various machine dependent initializations.  */
 153   sparc_init_modes ();
 154 }
 155 \f
 156 /* Float conversions (v9 only).
 157
 158    The floating point registers cannot hold DImode values because SUBREG's
 159    on them get the wrong register.   "(subreg:SI (reg:DI M int-reg) 0)" is the
 160    same as "(subreg:SI (reg:DI N float-reg) 1)", but gcc doesn't know how to
 161    turn the "0" to a "1".  Therefore, we must explicitly do the conversions
 162    to/from int/fp regs.  `sparc64_fpconv_stack_slot' is the address of an
 163    8 byte stack slot used during the transfer.
 164    ??? I could have used [%fp-16] but I didn't want to add yet another
 165    dependence on this.  */
 166 /* ??? Can we use assign_stack_temp here?  */
 167
 168 static rtx fpconv_stack_temp;
 169
 170 /* Called once for each function.  */
 171
 172 void
 173 sparc64_init_expanders ()
 174 {
 175   fpconv_stack_temp = NULL_RTX;
 176 }
 177
 178 /* Assign a stack temp for fp/int DImode conversions.  */
 179
 180 rtx
 181 sparc64_fpconv_stack_temp ()
 182 {
 183   if (fpconv_stack_temp == NULL_RTX)
 184       fpconv_stack_temp =
 185         assign_stack_local (DImode, GET_MODE_SIZE (DImode), 0);
 186
 187     return fpconv_stack_temp;
 188 }
 189 \f
 190 /* Return non-zero only if OP is a register of mode MODE,
 191    or const0_rtx.  */
 192 int
 193 reg_or_0_operand (op, mode)
 194      rtx op;
 195      enum machine_mode mode;
 196 {
 197   if (op == const0_rtx || register_operand (op, mode))
 198     return 1;
 199   if (GET_MODE (op) == VOIDmode && GET_CODE (op) == CONST_DOUBLE
 200       && CONST_DOUBLE_HIGH (op) == 0
 201       && CONST_DOUBLE_LOW (op) == 0)
 202     return 1;
 203   if (GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT
 204       && GET_CODE (op) == CONST_DOUBLE
 205       && fp_zero_operand (op))
 206     return 1;
 207   return 0;
 208 }
 209
 210 /* Nonzero if OP is a floating point value with value 0.0.  */
 211 int
 212 fp_zero_operand (op)
 213      rtx op;
 214 {
 215   REAL_VALUE_TYPE r;
 216
 217   REAL_VALUE_FROM_CONST_DOUBLE (r, op);
 218   return REAL_VALUES_EQUAL (r, dconst0);
 219 }
 220
 221 /* Nonzero if OP is an integer register.  */
 222
 223 int
 224 intreg_operand (op, mode)
 225      rtx op;
 226      enum machine_mode mode;
 227 {
 228   return (register_operand (op, SImode)
 229           || (TARGET_V9 && register_operand (op, DImode)));
 230 }
 231
 232 /* Nonzero if OP is a floating point condition code register.  */
 233
 234 int
 235 ccfp_reg_operand (op, mode)
 236      rtx op;
 237      enum machine_mode mode;
 238 {
 239   /* This can happen when recog is called from combine.  Op may be a MEM.
 240      Fail instead of calling abort in this case.  */
 241   if (GET_CODE (op) != REG || REGNO (op) == 0)
 242     return 0;
 243   if (GET_MODE (op) != mode)
 244     return 0;
 245
 246 #if 0   /* ??? ==> 1 when %fcc1-3 are pseudos first.  See gen_compare_reg().  */
 247   if (reg_renumber == 0)
 248     return REGNO (op) >= FIRST_PSEUDO_REGISTER;
 249   return REGNO_OK_FOR_CCFP_P (REGNO (op));
 250 #else
 251   return (unsigned) REGNO (op) - 96 < 4;
 252 #endif
 253 }
 254
 255 /* Nonzero if OP can appear as the dest of a RESTORE insn.  */
 256 int
 257 restore_operand (op, mode)
 258      rtx op;
 259      enum machine_mode mode;
 260 {
 261   return (GET_CODE (op) == REG && GET_MODE (op) == mode
 262           && (REGNO (op) < 8 || (REGNO (op) >= 24 && REGNO (op) < 32)));
 263 }
 264
 265 /* Call insn on SPARC can take a PC-relative constant address, or any regular
 266    memory address.  */
 267
 268 int
 269 call_operand (op, mode)
 270      rtx op;
 271      enum machine_mode mode;
 272 {
 273   if (GET_CODE (op) != MEM)
 274     abort ();
 275   op = XEXP (op, 0);
 276   return (symbolic_operand (op, mode) || memory_address_p (Pmode, op));
 277 }
 278
 279 int
 280 call_operand_address (op, mode)
 281      rtx op;
 282      enum machine_mode mode;
 283 {
 284   return (symbolic_operand (op, mode) || memory_address_p (Pmode, op));
 285 }
 286
 287 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
 288    reference and a constant.  */
 289
 290 int
 291 symbolic_operand (op, mode)
 292      register rtx op;
 293      enum machine_mode mode;
 294 {
 295   switch (GET_CODE (op))
 296     {
 297     case SYMBOL_REF:
 298     case LABEL_REF:
 299       return 1;
 300
 301     case CONST:
 302       op = XEXP (op, 0);
 303       return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
 304                || GET_CODE (XEXP (op, 0)) == LABEL_REF)
 305               && GET_CODE (XEXP (op, 1)) == CONST_INT);
 306
 307       /* ??? This clause seems to be irrelevant.  */
 308     case CONST_DOUBLE:
 309       return GET_MODE (op) == mode;
 310
 311     default:
 312       return 0;
 313     }
 314 }
 315
 316 /* Return truth value of statement that OP is a symbolic memory
 317    operand of mode MODE.  */
 318
 319 int
 320 symbolic_memory_operand (op, mode)
 321      rtx op;
 322      enum machine_mode mode;
 323 {
 324   if (GET_CODE (op) == SUBREG)
 325     op = SUBREG_REG (op);
 326   if (GET_CODE (op) != MEM)
 327     return 0;
 328   op = XEXP (op, 0);
 329   return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
 330           || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
 331 }
 332
 333 /* Return 1 if the operand is a data segment reference.  This includes
 334    the readonly data segment, or in other words anything but the text segment.
 335    This is needed in the medium/anywhere code model on v9.  These values
 336    are accessed with MEDANY_BASE_REG.  */
 337
 338 int
 339 data_segment_operand (op, mode)
 340      rtx op;
 341      enum machine_mode mode;
 342 {
 343   switch (GET_CODE (op))
 344     {
 345     case SYMBOL_REF :
 346       return ! SYMBOL_REF_FLAG (op);
 347     case PLUS :
 348       /* Assume canonical format of symbol + constant.  */
 349     case CONST :
 350       return data_segment_operand (XEXP (op, 0));
 351     default :
 352       return 0;
 353     }
 354 }
 355
 356 /* Return 1 if the operand is a text segment reference.
 357    This is needed in the medium/anywhere code model on v9.  */
 358
 359 int
 360 text_segment_operand (op, mode)
 361      rtx op;
 362      enum machine_mode mode;
 363 {
 364   switch (GET_CODE (op))
 365     {
 366     case LABEL_REF :
 367       return 1;
 368     case SYMBOL_REF :
 369       return SYMBOL_REF_FLAG (op);
 370     case PLUS :
 371       /* Assume canonical format of symbol + constant.  */
 372     case CONST :
 373       return text_segment_operand (XEXP (op, 0));
 374     default :
 375       return 0;
 376     }
 377 }
 378
 379 /* Return 1 if the operand is either a register or a memory operand that is
 380    not symbolic.  */
 381
 382 int
 383 reg_or_nonsymb_mem_operand (op, mode)
 384     register rtx op;
 385     enum machine_mode mode;
 386 {
 387   if (register_operand (op, mode))
 388     return 1;
 389
 390   if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
 391     return 1;
 392
 393   return 0;
 394 }
 395
 396 int
 397 sparc_operand (op, mode)
 398      rtx op;
 399      enum machine_mode mode;
 400 {
 401   if (register_operand (op, mode))
 402     return 1;
 403   if (GET_CODE (op) == CONST_INT)
 404     return SMALL_INT (op);
 405   if (GET_MODE (op) != mode)
 406     return 0;
 407   if (GET_CODE (op) == SUBREG)
 408     op = SUBREG_REG (op);
 409   if (GET_CODE (op) != MEM)
 410     return 0;
 411
 412   op = XEXP (op, 0);
 413   if (GET_CODE (op) == LO_SUM)
 414     return (GET_CODE (XEXP (op, 0)) == REG
 415             && symbolic_operand (XEXP (op, 1), Pmode));
 416   return memory_address_p (mode, op);
 417 }
 418
 419 int
 420 move_operand (op, mode)
 421      rtx op;
 422      enum machine_mode mode;
 423 {
 424   if (mode == DImode && arith_double_operand (op, mode))
 425     return 1;
 426   if (register_operand (op, mode))
 427     return 1;
 428   if (GET_CODE (op) == CONST_INT)
 429     return (SMALL_INT (op) || (INTVAL (op) & 0x3ff) == 0);
 430
 431   if (GET_MODE (op) != mode)
 432     return 0;
 433   if (GET_CODE (op) == SUBREG)
 434     op = SUBREG_REG (op);
 435   if (GET_CODE (op) != MEM)
 436     return 0;
 437   op = XEXP (op, 0);
 438   if (GET_CODE (op) == LO_SUM)
 439     return (register_operand (XEXP (op, 0), Pmode)
 440             && CONSTANT_P (XEXP (op, 1)));
 441   return memory_address_p (mode, op);
 442 }
 443
 444 int
 445 move_pic_label (op, mode)
 446      rtx op;
 447      enum machine_mode mode;
 448 {
 449   /* Special case for PIC.  */
 450   if (flag_pic && GET_CODE (op) == LABEL_REF)
 451     return 1;
 452   return 0;
 453 }
 454
 455 int
 456 memop (op, mode)
 457      rtx op;
 458      enum machine_mode mode;
 459 {
 460   if (GET_CODE (op) == MEM)
 461     return (mode == VOIDmode || mode == GET_MODE (op));
 462   return 0;
 463 }
 464
 465 /* Return truth value of whether OP is EQ or NE.  */
 466
 467 int
 468 eq_or_neq (op, mode)
 469      rtx op;
 470      enum machine_mode mode;
 471 {
 472   return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
 473 }
 474
 475 /* Return 1 if this is a comparison operator, but not an EQ, NE, GEU,
 476    or LTU for non-floating-point.  We handle those specially.  */
 477
 478 int
 479 normal_comp_operator (op, mode)
 480      rtx op;
 481      enum machine_mode mode;
 482 {
 483   enum rtx_code code = GET_CODE (op);
 484
 485   if (GET_RTX_CLASS (code) != '<')
 486     return 0;
 487
 488   if (GET_MODE (XEXP (op, 0)) == CCFPmode
 489       || GET_MODE (XEXP (op, 0)) == CCFPEmode)
 490     return 1;
 491
 492   return (code != NE && code != EQ && code != GEU && code != LTU);
 493 }
 494
 495 /* Return 1 if this is a comparison operator.  This allows the use of
 496    MATCH_OPERATOR to recognize all the branch insns.  */
 497
 498 int
 499 noov_compare_op (op, mode)
 500     register rtx op;
 501     enum machine_mode mode;
 502 {
 503   enum rtx_code code = GET_CODE (op);
 504
 505   if (GET_RTX_CLASS (code) != '<')
 506     return 0;
 507
 508   if (GET_MODE (XEXP (op, 0)) == CC_NOOVmode)
 509     /* These are the only branches which work with CC_NOOVmode.  */
 510     return (code == EQ || code == NE || code == GE || code == LT);
 511   return 1;
 512 }
 513
 514 /* Nonzero if OP is a comparison operator suitable for use in v9
 515    conditional move or branch on register contents instructions.  */
 516
 517 int
 518 v9_regcmp_op (op, mode)
 519      register rtx op;
 520      enum machine_mode mode;
 521 {
 522   enum rtx_code code = GET_CODE (op);
 523
 524   if (GET_RTX_CLASS (code) != '<')
 525     return 0;
 526
 527   return (code == EQ || code == NE || code == GE || code == LT
 528           || code == LE || code == GT);
 529 }
 530
 531 /* Return 1 if this is a SIGN_EXTEND or ZERO_EXTEND operation.  */
 532
 533 int
 534 extend_op (op, mode)
 535      rtx op;
 536      enum machine_mode mode;
 537 {
 538   return GET_CODE (op) == SIGN_EXTEND || GET_CODE (op) == ZERO_EXTEND;
 539 }
 540
 541 /* Return nonzero if OP is an operator of mode MODE which can set
 542    the condition codes explicitly.  We do not include PLUS and MINUS
 543    because these require CC_NOOVmode, which we handle explicitly.  */
 544
 545 int
 546 cc_arithop (op, mode)
 547      rtx op;
 548      enum machine_mode mode;
 549 {
 550   if (GET_CODE (op) == AND
 551       || GET_CODE (op) == IOR
 552       || GET_CODE (op) == XOR)
 553     return 1;
 554
 555   return 0;
 556 }
 557
 558 /* Return nonzero if OP is an operator of mode MODE which can bitwise
 559    complement its second operand and set the condition codes explicitly.  */
 560
 561 int
 562 cc_arithopn (op, mode)
 563      rtx op;
 564      enum machine_mode mode;
 565 {
 566   /* XOR is not here because combine canonicalizes (xor (not ...) ...)
 567      and (xor ... (not ...)) to (not (xor ...)).   */
 568   return (GET_CODE (op) == AND
 569           || GET_CODE (op) == IOR);
 570 }
 571 \f
 572 /* Return true if OP is a register, or is a CONST_INT that can fit in a 13
 573    bit immediate field.  This is an acceptable SImode operand for most 3
 574    address instructions.  */
 575
 576 int
 577 arith_operand (op, mode)
 578      rtx op;
 579      enum machine_mode mode;
 580 {
 581   return (register_operand (op, mode)
 582           || (GET_CODE (op) == CONST_INT && SMALL_INT (op)));
 583 }
 584
 585 /* Return true if OP is a register, or is a CONST_INT that can fit in an 11
 586    bit immediate field.  This is an acceptable SImode operand for the movcc
 587    instructions.  */
 588
 589 int
 590 arith11_operand (op, mode)
 591      rtx op;
 592      enum machine_mode mode;
 593 {
 594   return (register_operand (op, mode)
 595           || (GET_CODE (op) == CONST_INT
 596               && ((unsigned) (INTVAL (op) + 0x400) < 0x800)));
 597 }
 598
 599 /* Return true if OP is a register, or is a CONST_INT that can fit in an 10
 600    bit immediate field.  This is an acceptable SImode operand for the movrcc
 601    instructions.  */
 602
 603 int
 604 arith10_operand (op, mode)
 605      rtx op;
 606      enum machine_mode mode;
 607 {
 608   return (register_operand (op, mode)
 609           || (GET_CODE (op) == CONST_INT
 610               && ((unsigned) (INTVAL (op) + 0x200) < 0x400)));
 611 }
 612
 613 /* Return true if OP is a register, is a CONST_INT that fits in a 13 bit
 614    immediate field, or is a CONST_DOUBLE whose both parts fit in a 13 bit
 615    immediate field.
 616    v9: Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
 617    can fit in a 13 bit immediate field.  This is an acceptable DImode operand
 618    for most 3 address instructions.  */
 619
 620 int
 621 arith_double_operand (op, mode)
 622      rtx op;
 623      enum machine_mode mode;
 624 {
 625   return (register_operand (op, mode)
 626           || (GET_CODE (op) == CONST_INT && SMALL_INT (op))
 627           || (! TARGET_V9
 628               && GET_CODE (op) == CONST_DOUBLE
 629               && (unsigned) (CONST_DOUBLE_LOW (op) + 0x1000) < 0x2000
 630               && (unsigned) (CONST_DOUBLE_HIGH (op) + 0x1000) < 0x2000)
 631           || (TARGET_V9
 632               && GET_CODE (op) == CONST_DOUBLE
 633               && (unsigned) (CONST_DOUBLE_LOW (op) + 0x1000) < 0x2000
 634               && ((CONST_DOUBLE_HIGH (op) == -1
 635                    && (CONST_DOUBLE_LOW (op) & 0x1000) == 0x1000)
 636                   || (CONST_DOUBLE_HIGH (op) == 0
 637                       && (CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
 638 }
 639
 640 /* Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
 641    can fit in an 11 bit immediate field.  This is an acceptable DImode
 642    operand for the movcc instructions.  */
 643 /* ??? Replace with arith11_operand?  */
 644
 645 int
 646 arith11_double_operand (op, mode)
 647      rtx op;
 648      enum machine_mode mode;
 649 {
 650   return (register_operand (op, mode)
 651           || (GET_CODE (op) == CONST_DOUBLE
 652               && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
 653               && (unsigned) (CONST_DOUBLE_LOW (op) + 0x400) < 0x800
 654               && ((CONST_DOUBLE_HIGH (op) == -1
 655                    && (CONST_DOUBLE_LOW (op) & 0x400) == 0x400)
 656                   || (CONST_DOUBLE_HIGH (op) == 0
 657                       && (CONST_DOUBLE_LOW (op) & 0x400) == 0)))
 658           || (GET_CODE (op) == CONST_INT
 659               && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
 660               && (unsigned) (INTVAL (op) + 0x400) < 0x800));
 661 }
 662
 663 /* Return true if OP is a register, or is a CONST_INT or CONST_DOUBLE that
 664    can fit in an 10 bit immediate field.  This is an acceptable DImode
 665    operand for the movrcc instructions.  */
 666 /* ??? Replace with arith10_operand?  */
 667
 668 int
 669 arith10_double_operand (op, mode)
 670      rtx op;
 671      enum machine_mode mode;
 672 {
 673   return (register_operand (op, mode)
 674           || (GET_CODE (op) == CONST_DOUBLE
 675               && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
 676               && (unsigned) (CONST_DOUBLE_LOW (op) + 0x200) < 0x400
 677               && ((CONST_DOUBLE_HIGH (op) == -1
 678                    && (CONST_DOUBLE_LOW (op) & 0x200) == 0x200)
 679                   || (CONST_DOUBLE_HIGH (op) == 0
 680                       && (CONST_DOUBLE_LOW (op) & 0x200) == 0)))
 681           || (GET_CODE (op) == CONST_INT
 682               && (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
 683               && (unsigned) (INTVAL (op) + 0x200) < 0x400));
 684 }
 685
 686 /* Return truth value of whether OP is a integer which fits the
 687    range constraining immediate operands in most three-address insns,
 688    which have a 13 bit immediate field.  */
 689
 690 int
 691 small_int (op, mode)
 692      rtx op;
 693      enum machine_mode mode;
 694 {
 695   return (GET_CODE (op) == CONST_INT && SMALL_INT (op));
 696 }
 697
 698 /* Recognize operand values for the umul instruction.  That instruction sign
 699    extends immediate values just like all other sparc instructions, but
 700    interprets the extended result as an unsigned number.  */
 701
 702 int
 703 uns_small_int (op, mode)
 704      rtx op;
 705      enum machine_mode mode;
 706 {
 707 #if HOST_BITS_PER_WIDE_INT > 32
 708   /* All allowed constants will fit a CONST_INT.  */
 709   return (GET_CODE (op) == CONST_INT
 710           && ((INTVAL (op) >= 0 && INTVAL (op) < 0x1000)
 711               || (INTVAL (op) >= 0xFFFFF000 && INTVAL (op) < 0x100000000L)));
 712 #else
 713   return ((GET_CODE (op) == CONST_INT && (unsigned) INTVAL (op) < 0x1000)
 714           || (GET_CODE (op) == CONST_DOUBLE
 715               && CONST_DOUBLE_HIGH (op) == 0
 716               && (unsigned) CONST_DOUBLE_LOW (op) - 0xFFFFF000 < 0x1000));
 717 #endif
 718 }
 719
 720 int
 721 uns_arith_operand (op, mode)
 722      rtx op;
 723      enum machine_mode mode;
 724 {
 725   return register_operand (op, mode) || uns_small_int (op, mode);
 726 }
 727
 728 /* Return truth value of statement that OP is a call-clobbered register.  */
 729 int
 730 clobbered_register (op, mode)
 731      rtx op;
 732      enum machine_mode mode;
 733 {
 734   return (GET_CODE (op) == REG && call_used_regs[REGNO (op)]);
 735 }
 736 \f
 737 /* X and Y are two things to compare using CODE.  Emit the compare insn and
 738    return the rtx for the cc reg in the proper mode.  */
 739
 740 rtx
 741 gen_compare_reg (code, x, y)
 742      enum rtx_code code;
 743      rtx x, y;
 744 {
 745   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
 746   rtx cc_reg;
 747
 748   /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
 749      fpcc regs (cse can't tell they're really call clobbered regs and will
 750      remove a duplicate comparison even if there is an intervening function
 751      call - it will then try to reload the cc reg via an int reg which is why
 752      we need the movcc patterns).  It is possible to provide the movcc
 753      patterns by using the ldxfsr/stxfsr v9 insns.  I tried it: you need two
 754      registers (say %g1,%g5) and it takes about 6 insns.  A better fix would be
 755      to tell cse that CCFPE mode registers (even pseudoes) are call
 756      clobbered.  */
 757
 758   /* ??? This is an experiment.  Rather than making changes to cse which may
 759      or may not be easy/clean, we do our own cse.  This is possible because
 760      we will generate hard registers.  Cse knows they're call clobbered (it
 761      doesn't know the same thing about pseudos). If we guess wrong, no big
 762      deal, but if we win, great!  */
 763
 764   if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
 765 #if 1 /* experiment */
 766     {
 767       int reg;
 768       /* We cycle through the registers to ensure they're all exercised.  */
 769       static int next_fpcc_reg = 0;
 770       /* Previous x,y for each fpcc reg.  */
 771       static rtx prev_args[4][2];
 772
 773       /* Scan prev_args for x,y.  */
 774       for (reg = 0; reg < 4; reg++)
 775         if (prev_args[reg][0] == x && prev_args[reg][1] == y)
 776           break;
 777       if (reg == 4)
 778         {
 779           reg = next_fpcc_reg;
 780           prev_args[reg][0] = x;
 781           prev_args[reg][1] = y;
 782           next_fpcc_reg = (next_fpcc_reg + 1) & 3;
 783         }
 784       cc_reg = gen_rtx (REG, mode, reg + 96);
 785     }
 786 #else
 787     cc_reg = gen_reg_rtx (mode);
 788 #endif /* ! experiment */
 789   else
 790     cc_reg = gen_rtx (REG, mode, 0);
 791
 792   emit_insn (gen_rtx (SET, VOIDmode, cc_reg,
 793                       gen_rtx (COMPARE, mode, x, y)));
 794
 795   return cc_reg;
 796 }
 797
 798 /* This function is used for v9 only.
 799    CODE is the code for an Scc's comparison.
 800    OPERANDS[0] is the target of the Scc insn.
 801    OPERANDS[1] is the value we compare against const0_rtx (which hasn't
 802    been generated yet).
 803
 804    This function is needed to turn
 805
 806            (set (reg:SI 110)
 807                (gt (reg:CCX 0 %g0)
 808                    (const_int 0)))
 809    into
 810            (set (reg:SI 110)
 811                (gt:DI (reg:CCX 0 %g0)
 812                    (const_int 0)))
 813
 814    IE: The instruction recognizer needs to see the mode of the comparison to
 815    find the right instruction. We could use "gt:DI" right in the
 816    define_expand, but leaving it out allows us to handle DI, SI, etc.
 817
 818    We refer to the global sparc compare operands sparc_compare_op0 and
 819    sparc_compare_op1.
 820
 821    ??? Some of this is outdated as the scc insns set the mode of the
 822    comparison now.
 823
 824    ??? We optimize for the case where op1 is 0 and the comparison allows us to
 825    use the "movrCC" insns. This reduces the generated code from three to two
 826    insns.  This way seems too brute force though.  Is there a more elegant way
 827    to achieve the same effect?
 828
 829    Currently, this function always returns 1.  ??? Can it ever fail?  */
 830
 831 int
 832 gen_v9_scc (compare_code, operands)
 833      enum rtx_code compare_code;
 834      register rtx *operands;
 835 {
 836   rtx temp;
 837
 838   if (GET_MODE_CLASS (GET_MODE (sparc_compare_op0)) == MODE_INT
 839       && sparc_compare_op1 == const0_rtx
 840       && (compare_code == EQ || compare_code == NE
 841           || compare_code == LT || compare_code == LE
 842           || compare_code == GT || compare_code == GE))
 843     {
 844       /* Special case for op0 != 0.  This can be done with one instruction if
 845          op0 can be clobbered.  We store to a temp, and then clobber the temp,
 846          but the combiner will remove the first insn.  */
 847
 848       if (compare_code == NE
 849           && GET_MODE (operands[0]) == DImode
 850           && GET_MODE (sparc_compare_op0) == DImode)
 851         {
 852           emit_insn (gen_rtx (SET, VOIDmode, operands[0], sparc_compare_op0));
 853           emit_insn (gen_rtx (SET, VOIDmode, operands[0],
 854                               gen_rtx (IF_THEN_ELSE, VOIDmode,
 855                                        gen_rtx (compare_code, DImode,
 856                                                 sparc_compare_op0, const0_rtx),
 857                                        const1_rtx,
 858                                        operands[0])));
 859           return 1;
 860         }
 861
 862       emit_insn (gen_rtx (SET, VOIDmode, operands[0], const0_rtx));
 863       if (GET_MODE (sparc_compare_op0) != DImode)
 864         {
 865           temp = gen_reg_rtx (DImode);
 866           convert_move (temp, sparc_compare_op0, 0);
 867         }
 868       else
 869         {
 870           temp = sparc_compare_op0;
 871         }
 872       emit_insn (gen_rtx (SET, VOIDmode, operands[0],
 873                           gen_rtx (IF_THEN_ELSE, VOIDmode,
 874                                    gen_rtx (compare_code, DImode,
 875                                             temp, const0_rtx),
 876                                    const1_rtx,
 877                                    operands[0])));
 878       return 1;
 879     }
 880   else
 881     {
 882       operands[1] = gen_compare_reg (compare_code,
 883                                      sparc_compare_op0, sparc_compare_op1);
 884
 885       switch (GET_MODE (operands[1]))
 886         {
 887           case CCmode :
 888           case CCXmode :
 889           case CCFPEmode :
 890           case CCFPmode :
 891             break;
 892           default :
 893             abort ();
 894         }
 895         emit_insn (gen_rtx (SET, VOIDmode, operands[0], const0_rtx));
 896         emit_insn (gen_rtx (SET, VOIDmode, operands[0],
 897                             gen_rtx (IF_THEN_ELSE, VOIDmode,
 898                                      gen_rtx (compare_code,
 899                                               GET_MODE (operands[1]),
 900                                               operands[1], const0_rtx),
 901                                               const1_rtx, operands[0])));
 902         return 1;
 903     }
 904 }
 905
 906 /* Emit a conditional jump insn for the v9 architecture using comparison code
 907    CODE and jump target LABEL.
 908    This function exists to take advantage of the v9 brxx insns.  */
 909
 910 void
 911 emit_v9_brxx_insn (code, op0, label)
 912      enum rtx_code code;
 913      rtx op0, label;
 914 {
 915   emit_jump_insn (gen_rtx (SET, VOIDmode,
 916                            pc_rtx,
 917                            gen_rtx (IF_THEN_ELSE, VOIDmode,
 918                                     gen_rtx (code, GET_MODE (op0),
 919                                              op0, const0_rtx),
 920                                     gen_rtx (LABEL_REF, VOIDmode, label),
 921                                     pc_rtx)));
 922 }
 923 \f
 924 /* Return nonzero if a return peephole merging return with
 925    setting of output register is ok.  */
 926 int
 927 leaf_return_peephole_ok ()
 928 {
 929   return (actual_fsize == 0);
 930 }
 931
 932 /* Return nonzero if TRIAL can go into the function epilogue's
 933    delay slot.  SLOT is the slot we are trying to fill.  */
 934
 935 int
 936 eligible_for_epilogue_delay (trial, slot)
 937      rtx trial;
 938      int slot;
 939 {
 940   rtx pat, src;
 941
 942   if (slot >= 1)
 943     return 0;
 944   if (GET_CODE (trial) != INSN
 945       || GET_CODE (PATTERN (trial)) != SET)
 946     return 0;
 947   if (get_attr_length (trial) != 1)
 948     return 0;
 949
 950   /* In the case of a true leaf function, anything can go into the delay slot.
 951      A delay slot only exists however if the frame size is zero, otherwise
 952      we will put an insn to adjust the stack after the return.  */
 953   if (leaf_function)
 954     {
 955       if (leaf_return_peephole_ok ())
 956         return (get_attr_in_uncond_branch_delay (trial) == IN_BRANCH_DELAY_TRUE);
 957       return 0;
 958     }
 959
 960   /* Otherwise, only operations which can be done in tandem with
 961      a `restore' insn can go into the delay slot.  */
 962   pat = PATTERN (trial);
 963   if (GET_CODE (SET_DEST (pat)) != REG
 964       || REGNO (SET_DEST (pat)) == 0
 965       || REGNO (SET_DEST (pat)) >= 32
 966       || REGNO (SET_DEST (pat)) < 24)
 967     return 0;
 968
 969   src = SET_SRC (pat);
 970   if (arith_operand (src, GET_MODE (src)))
 971     return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
 972   if (arith_double_operand (src, GET_MODE (src)))
 973     return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
 974   if (GET_CODE (src) == PLUS)
 975     {
 976       if (register_operand (XEXP (src, 0), SImode)
 977           && arith_operand (XEXP (src, 1), SImode))
 978         return 1;
 979       if (register_operand (XEXP (src, 1), SImode)
 980           && arith_operand (XEXP (src, 0), SImode))
 981         return 1;
 982       if (register_operand (XEXP (src, 0), DImode)
 983           && arith_double_operand (XEXP (src, 1), DImode))
 984         return 1;
 985       if (register_operand (XEXP (src, 1), DImode)
 986           && arith_double_operand (XEXP (src, 0), DImode))
 987         return 1;
 988     }
 989   if (GET_CODE (src) == MINUS
 990       && register_operand (XEXP (src, 0), SImode)
 991       && small_int (XEXP (src, 1), VOIDmode))
 992     return 1;
 993   if (GET_CODE (src) == MINUS
 994       && register_operand (XEXP (src, 0), DImode)
 995       && !register_operand (XEXP (src, 1), DImode)
 996       && arith_double_operand (XEXP (src, 1), DImode))
 997     return 1;
 998   return 0;
 999 }
1000
1001 int
1002 short_branch (uid1, uid2)
1003      int uid1, uid2;
1004 {
1005   unsigned int delta = insn_addresses[uid1] - insn_addresses[uid2];
1006   if (delta + 1024 < 2048)
1007     return 1;
1008   /* warning ("long branch, distance %d", delta); */
1009   return 0;
1010 }
1011
1012 /* Return non-zero if REG is not used after INSN.
1013    We assume REG is a reload reg, and therefore does
1014    not live past labels or calls or jumps.  */
1015 int
1016 reg_unused_after (reg, insn)
1017      rtx reg;
1018      rtx insn;
1019 {
1020   enum rtx_code code, prev_code = UNKNOWN;
1021
1022   while (insn = NEXT_INSN (insn))
1023     {
1024       if (prev_code == CALL_INSN && call_used_regs[REGNO (reg)])
1025         return 1;
1026
1027       code = GET_CODE (insn);
1028       if (GET_CODE (insn) == CODE_LABEL)
1029         return 1;
1030
1031       if (GET_RTX_CLASS (code) == 'i')
1032         {
1033           rtx set = single_set (insn);
1034           int in_src = set && reg_overlap_mentioned_p (reg, SET_SRC (set));
1035           if (set && in_src)
1036             return 0;
1037           if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
1038             return 1;
1039           if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
1040             return 0;
1041         }
1042       prev_code = code;
1043     }
1044   return 1;
1045 }
1046 \f
1047 /* The rtx for the global offset table which is a special form
1048    that *is* a position independent symbolic constant.  */
1049 static rtx pic_pc_rtx;
1050
1051 /* Ensure that we are not using patterns that are not OK with PIC.  */
1052
1053 int
1054 check_pic (i)
1055      int i;
1056 {
1057   switch (flag_pic)
1058     {
1059     case 1:
1060       if (GET_CODE (recog_operand[i]) == SYMBOL_REF
1061           || (GET_CODE (recog_operand[i]) == CONST
1062               && ! rtx_equal_p (pic_pc_rtx, recog_operand[i])))
1063         abort ();
1064     case 2:
1065     default:
1066       return 1;
1067     }
1068 }
1069
1070 /* Return true if X is an address which needs a temporary register when
1071    reloaded while generating PIC code.  */
1072
1073 int
1074 pic_address_needs_scratch (x)
1075      rtx x;
1076 {
1077   /* An address which is a symbolic plus a non SMALL_INT needs a temp reg.  */
1078   if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
1079       && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
1080       && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1081       && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
1082     return 1;
1083
1084   return 0;
1085 }
1086
1087 /* Legitimize PIC addresses.  If the address is already position-independent,
1088    we return ORIG.  Newly generated position-independent addresses go into a
1089    reg.  This is REG if non zero, otherwise we allocate register(s) as
1090    necessary.  */
1091
1092 rtx
1093 legitimize_pic_address (orig, mode, reg)
1094      rtx orig;
1095      enum machine_mode mode;
1096      rtx reg;
1097 {
1098   if (GET_CODE (orig) == SYMBOL_REF)
1099     {
1100       rtx pic_ref, address;
1101       rtx insn;
1102
1103       if (reg == 0)
1104         {
1105           if (reload_in_progress || reload_completed)
1106             abort ();
1107           else
1108             reg = gen_reg_rtx (Pmode);
1109         }
1110
1111       if (flag_pic == 2)
1112         {
1113           /* If not during reload, allocate another temp reg here for loading
1114              in the address, so that these instructions can be optimized
1115              properly.  */
1116           rtx temp_reg = ((reload_in_progress || reload_completed)
1117                           ? reg : gen_reg_rtx (Pmode));
1118
1119           /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
1120              won't get confused into thinking that these two instructions
1121              are loading in the true address of the symbol.  If in the
1122              future a PIC rtx exists, that should be used instead.  */
1123           emit_insn (gen_rtx (SET, VOIDmode, temp_reg,
1124                               gen_rtx (HIGH, Pmode,
1125                                        gen_rtx (UNSPEC, Pmode,
1126                                                 gen_rtvec (1, orig),
1127                                                 0))));
1128           emit_insn (gen_rtx (SET, VOIDmode, temp_reg,
1129                               gen_rtx (LO_SUM, Pmode, temp_reg,
1130                                        gen_rtx (UNSPEC, Pmode,
1131                                                 gen_rtvec (1, orig),
1132                                                 0))));
1133           address = temp_reg;
1134         }
1135       else
1136         address = orig;
1137
1138       pic_ref = gen_rtx (MEM, Pmode,
1139                          gen_rtx (PLUS, Pmode,
1140                                   pic_offset_table_rtx, address));
1141       current_function_uses_pic_offset_table = 1;
1142       RTX_UNCHANGING_P (pic_ref) = 1;
1143       insn = emit_move_insn (reg, pic_ref);
1144       /* Put a REG_EQUAL note on this insn, so that it can be optimized
1145          by loop.  */
1146       REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_EQUAL, orig,
1147                                   REG_NOTES (insn));
1148       return reg;
1149     }
1150   else if (GET_CODE (orig) == CONST)
1151     {
1152       rtx base, offset;
1153
1154       if (GET_CODE (XEXP (orig, 0)) == PLUS
1155           && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
1156         return orig;
1157
1158       if (reg == 0)
1159         {
1160           if (reload_in_progress || reload_completed)
1161             abort ();
1162           else
1163             reg = gen_reg_rtx (Pmode);
1164         }
1165
1166       if (GET_CODE (XEXP (orig, 0)) == PLUS)
1167         {
1168           base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
1169           offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
1170                                          base == reg ? 0 : reg);
1171         }
1172       else
1173         abort ();
1174
1175       if (GET_CODE (offset) == CONST_INT)
1176         {
1177           if (SMALL_INT (offset))
1178             return plus_constant_for_output (base, INTVAL (offset));
1179           else if (! reload_in_progress && ! reload_completed)
1180             offset = force_reg (Pmode, offset);
1181           else
1182             /* If we reach here, then something is seriously wrong.  */
1183             abort ();
1184         }
1185       return gen_rtx (PLUS, Pmode, base, offset);
1186     }
1187   else if (GET_CODE (orig) == LABEL_REF)
1188     current_function_uses_pic_offset_table = 1;
1189
1190   return orig;
1191 }
1192
1193 /* Set up PIC-specific rtl.  This should not cause any insns
1194    to be emitted.  */
1195
1196 void
1197 initialize_pic ()
1198 {
1199 }
1200
1201 /* Emit special PIC prologues and epilogues.  */
1202
1203 void
1204 finalize_pic ()
1205 {
1206   /* The table we use to reference PIC data.  */
1207   rtx global_offset_table;
1208   /* Labels to get the PC in the prologue of this function.  */
1209   rtx l1, l2;
1210   rtx seq;
1211   int orig_flag_pic = flag_pic;
1212
1213   if (current_function_uses_pic_offset_table == 0)
1214     return;
1215
1216   if (! flag_pic)
1217     abort ();
1218
1219   flag_pic = 0;
1220   l1 = gen_label_rtx ();
1221   l2 = gen_label_rtx ();
1222
1223   start_sequence ();
1224
1225   emit_label (l1);
1226   /* Note that we pun calls and jumps here!  */
1227   emit_jump_insn (gen_rtx (PARALLEL, VOIDmode,
1228                          gen_rtvec (2,
1229                                     gen_rtx (SET, VOIDmode, pc_rtx, gen_rtx (LABEL_REF, VOIDmode, l2)),
1230                                     gen_rtx (SET, VOIDmode, gen_rtx (REG, SImode, 15), gen_rtx (LABEL_REF, VOIDmode, l2)))));
1231   emit_label (l2);
1232
1233   /* Initialize every time through, since we can't easily
1234      know this to be permanent.  */
1235   global_offset_table = gen_rtx (SYMBOL_REF, Pmode, "_GLOBAL_OFFSET_TABLE_");
1236   pic_pc_rtx = gen_rtx (CONST, Pmode,
1237                         gen_rtx (MINUS, Pmode,
1238                                  global_offset_table,
1239                                  gen_rtx (CONST, Pmode,
1240                                           gen_rtx (MINUS, Pmode,
1241                                                    gen_rtx (LABEL_REF, VOIDmode, l1),
1242                                                    pc_rtx))));
1243
1244   if (Pmode == DImode)
1245     emit_insn (gen_rtx (PARALLEL, VOIDmode,
1246                         gen_rtvec (2,
1247                                    gen_rtx (SET, VOIDmode, pic_offset_table_rtx,
1248                                             gen_rtx (HIGH, Pmode, pic_pc_rtx)),
1249                                    gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, Pmode, 1)))));
1250   else
1251     emit_insn (gen_rtx (SET, VOIDmode, pic_offset_table_rtx,
1252                         gen_rtx (HIGH, Pmode, pic_pc_rtx)));
1253
1254   emit_insn (gen_rtx (SET, VOIDmode,
1255                       pic_offset_table_rtx,
1256                       gen_rtx (LO_SUM, Pmode,
1257                                pic_offset_table_rtx, pic_pc_rtx)));
1258   emit_insn (gen_rtx (SET, VOIDmode,
1259                       pic_offset_table_rtx,
1260                       gen_rtx (PLUS, Pmode,
1261                                pic_offset_table_rtx, gen_rtx (REG, Pmode, 15))));
1262   /* emit_insn (gen_rtx (ASM_INPUT, VOIDmode, "!#PROLOGUE# 1")); */
1263   LABEL_PRESERVE_P (l1) = 1;
1264   LABEL_PRESERVE_P (l2) = 1;
1265   flag_pic = orig_flag_pic;
1266
1267   seq = gen_sequence ();
1268   end_sequence ();
1269   emit_insn_after (seq, get_insns ());
1270
1271   /* Need to emit this whether or not we obey regdecls,
1272      since setjmp/longjmp can cause life info to screw up.  */
1273   emit_insn (gen_rtx (USE, VOIDmode, pic_offset_table_rtx));
1274 }
1275 \f
1276 /* Emit insns to move operands[1] into operands[0].
1277
1278    Return 1 if we have written out everything that needs to be done to
1279    do the move.  Otherwise, return 0 and the caller will emit the move
1280    normally.  */
1281
1282 int
1283 emit_move_sequence (operands, mode)
1284      rtx *operands;
1285      enum machine_mode mode;
1286 {
1287   register rtx operand0 = operands[0];
1288   register rtx operand1 = operands[1];
1289
1290   if (CONSTANT_P (operand1) && flag_pic
1291       && pic_address_needs_scratch (operand1))
1292     operands[1] = operand1 = legitimize_pic_address (operand1, mode, 0);
1293
1294   /* Handle most common case first: storing into a register.  */
1295   if (register_operand (operand0, mode))
1296     {
1297       if (register_operand (operand1, mode)
1298           || (GET_CODE (operand1) == CONST_INT && SMALL_INT (operand1))
1299           || (GET_CODE (operand1) == CONST_DOUBLE
1300               && arith_double_operand (operand1, DImode))
1301           || (GET_CODE (operand1) == HIGH && GET_MODE (operand1) != DImode)
1302           /* Only `general_operands' can come here, so MEM is ok.  */
1303           || GET_CODE (operand1) == MEM)
1304         {
1305           /* Run this case quickly.  */
1306           emit_insn (gen_rtx (SET, VOIDmode, operand0, operand1));
1307           return 1;
1308         }
1309     }
1310   else if (GET_CODE (operand0) == MEM)
1311     {
1312       if (register_operand (operand1, mode) || operand1 == const0_rtx)
1313         {
1314           /* Run this case quickly.  */
1315           emit_insn (gen_rtx (SET, VOIDmode, operand0, operand1));
1316           return 1;
1317         }
1318       if (! reload_in_progress)
1319         {
1320           operands[0] = validize_mem (operand0);
1321           operands[1] = operand1 = force_reg (mode, operand1);
1322         }
1323     }
1324
1325   /* Simplify the source if we need to.  Must handle DImode HIGH operators
1326      here because such a move needs a clobber added.  */
1327   if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1328       || (GET_CODE (operand1) == HIGH && GET_MODE (operand1) == DImode))
1329     {
1330       if (flag_pic && symbolic_operand (operand1, mode))
1331         {
1332           rtx temp_reg = reload_in_progress ? operand0 : 0;
1333
1334           operands[1] = legitimize_pic_address (operand1, mode, temp_reg);
1335         }
1336       else if (GET_CODE (operand1) == CONST_INT
1337                ? (! SMALL_INT (operand1)
1338                   && (INTVAL (operand1) & 0x3ff) != 0)
1339                : (GET_CODE (operand1) == CONST_DOUBLE
1340                   ? ! arith_double_operand (operand1, DImode)
1341                   : 1))
1342         {
1343           /* For DImode values, temp must be operand0 because of the way
1344              HI and LO_SUM work.  The LO_SUM operator only copies half of
1345              the LSW from the dest of the HI operator.  If the LO_SUM dest is
1346              not the same as the HI dest, then the MSW of the LO_SUM dest will
1347              never be set.
1348
1349              ??? The real problem here is that the ...(HI:DImode pattern emits
1350              multiple instructions, and the ...(LO_SUM:DImode pattern emits
1351              one instruction.  This fails, because the compiler assumes that
1352              LO_SUM copies all bits of the first operand to its dest.  Better
1353              would be to have the HI pattern emit one instruction and the
1354              LO_SUM pattern multiple instructions.  Even better would be
1355              to use four rtl insns.  */
1356           rtx temp = ((reload_in_progress || mode == DImode)
1357                       ? operand0 : gen_reg_rtx (mode));
1358
1359           if (TARGET_V9 && mode == DImode)
1360             {
1361               int high_operand = 0;
1362
1363               /* If the operand is already a HIGH, then remove the HIGH so
1364                  that we won't get duplicate HIGH operators in this insn.
1365                  Also, we must store the result into the original dest,
1366                  because that is where the following LO_SUM expects it.  */
1367               if (GET_CODE (operand1) == HIGH)
1368                 {
1369                   operand1 = XEXP (operand1, 0);
1370                   high_operand = 1;
1371                 }
1372
1373               emit_insn (gen_rtx (PARALLEL, VOIDmode,
1374                                   gen_rtvec (2,
1375                                              gen_rtx (SET, VOIDmode, temp,
1376                                                       gen_rtx (HIGH, mode, operand1)),
1377                                              gen_rtx (CLOBBER, VOIDmode, gen_rtx (REG, DImode, 1)))));
1378
1379               /* If this was a high operand, then we are now finished.  */
1380               if (high_operand)
1381                 return 1;
1382             }
1383           else
1384             emit_insn (gen_rtx (SET, VOIDmode, temp,
1385                                 gen_rtx (HIGH, mode, operand1)));
1386
1387           operands[1] = gen_rtx (LO_SUM, mode, temp, operand1);
1388         }
1389     }
1390
1391   if (GET_CODE (operand1) == LABEL_REF && flag_pic)
1392     {
1393       /* The procedure for doing this involves using a call instruction to
1394          get the pc into o7.  We need to indicate this explicitly because
1395          the tablejump pattern assumes that it can use this value also.  */
1396       emit_insn (gen_rtx (PARALLEL, VOIDmode,
1397                           gen_rtvec (2,
1398                                      gen_rtx (SET, VOIDmode, operand0,
1399                                               operand1),
1400                                      gen_rtx (SET, VOIDmode,
1401                                               gen_rtx (REG, mode, 15),
1402                                               pc_rtx))));
1403       return 1;
1404     }
1405
1406   /* Now have insn-emit do whatever it normally does.  */
1407   return 0;
1408 }
1409 \f
1410 /* Return the best assembler insn template
1411    for moving operands[1] into operands[0] as a fullword.  */
1412
1413 char *
1414 singlemove_string (operands)
1415      rtx *operands;
1416 {
1417   if (GET_CODE (operands[0]) == MEM)
1418     {
1419       if (GET_CODE (operands[1]) != MEM)
1420         return "st %r1,%0";
1421       else
1422         abort ();
1423     }
1424   else if (GET_CODE (operands[1]) == MEM)
1425     return "ld %1,%0";
1426   else if (GET_CODE (operands[1]) == CONST_DOUBLE)
1427     {
1428       REAL_VALUE_TYPE r;
1429       long i;
1430
1431       /* Must be SFmode, otherwise this doesn't make sense.  */
1432       if (GET_MODE (operands[1]) != SFmode)
1433         abort ();
1434
1435       REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
1436       REAL_VALUE_TO_TARGET_SINGLE (r, i);
1437       operands[1] = gen_rtx (CONST_INT, VOIDmode, i);
1438
1439       if (CONST_OK_FOR_LETTER_P (i, 'I'))
1440         return "mov %1,%0";
1441       else if ((i & 0x000003FF) != 0)
1442         return "sethi %%hi(%a1),%0\n\tor %0,%%lo(%a1),%0";
1443       else
1444         return "sethi %%hi(%a1),%0";
1445     }
1446   else if (GET_CODE (operands[1]) == CONST_INT
1447            && ! CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'I'))
1448     {
1449       int i = INTVAL (operands[1]);
1450
1451       /* If all low order 10 bits are clear, then we only need a single
1452          sethi insn to load the constant.  */
1453       if ((i & 0x000003FF) != 0)
1454         return "sethi %%hi(%a1),%0\n\tor %0,%%lo(%a1),%0";
1455       else
1456         return "sethi %%hi(%a1),%0";
1457     }
1458   /* Operand 1 must be a register, or a 'I' type CONST_INT.  */
1459   return "mov %1,%0";
1460 }
1461 \f
1462 /* Return non-zero if it is OK to assume that the given memory operand is
1463    aligned at least to a 8-byte boundary.  This should only be called
1464    for memory accesses whose size is 8 bytes or larger.  */
1465
1466 int
1467 mem_aligned_8 (mem)
1468      register rtx mem;
1469 {
1470   register rtx addr;
1471   register rtx base;
1472   register rtx offset;
1473
1474   if (GET_CODE (mem) != MEM)
1475     return 0;   /* It's gotta be a MEM! */
1476
1477   addr = XEXP (mem, 0);
1478
1479   /* Now that all misaligned double parms are copied on function entry,
1480      we can assume any 64-bit object is 64-bit aligned except those which
1481      are at unaligned offsets from the stack or frame pointer.  If the
1482      TARGET_UNALIGNED_DOUBLES switch is given, we do not make this
1483      assumption.  */
1484
1485   /* See what register we use in the address.  */
1486   base = 0;
1487   if (GET_CODE (addr) == PLUS)
1488     {
1489       if (GET_CODE (XEXP (addr, 0)) == REG
1490           && GET_CODE (XEXP (addr, 1)) == CONST_INT)
1491         {
1492           base = XEXP (addr, 0);
1493           offset = XEXP (addr, 1);
1494         }
1495     }
1496   else if (GET_CODE (addr) == REG)
1497     {
1498       base = addr;
1499       offset = const0_rtx;
1500     }
1501
1502   /* If it's the stack or frame pointer, check offset alignment.
1503      We can have improper alignment in the function entry code.  */
1504   if (base
1505       && (REGNO (base) == FRAME_POINTER_REGNUM
1506           || REGNO (base) == STACK_POINTER_REGNUM))
1507     {
1508       if (((INTVAL (offset) - SPARC_STACK_BIAS) & 0x7) == 0)
1509         return 1;
1510     }
1511   /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
1512      is true, in which case we can only assume that an access is aligned if
1513      it is to a constant address, or the address involves a LO_SUM.
1514
1515      We used to assume an address was aligned if MEM_IN_STRUCT_P was true.
1516      That assumption was deleted so that gcc generated code can be used with
1517      memory allocators that only guarantee 4 byte alignment.  */
1518   else if (! TARGET_UNALIGNED_DOUBLES || CONSTANT_P (addr)
1519            || GET_CODE (addr) == LO_SUM)
1520     return 1;
1521
1522   /* An obviously unaligned address.  */
1523   return 0;
1524 }
1525
1526 enum optype { REGOP, OFFSOP, MEMOP, PUSHOP, POPOP, CNSTOP, RNDOP };
1527
1528 /* Output assembler code to perform a doubleword move insn
1529    with operands OPERANDS.  This is very similar to the following
1530    output_move_quad function.  */
1531
1532 char *
1533 output_move_double (operands)
1534      rtx *operands;
1535 {
1536   register rtx op0 = operands[0];
1537   register rtx op1 = operands[1];
1538   register enum optype optype0;
1539   register enum optype optype1;
1540   rtx latehalf[2];
1541   rtx addreg0 = 0;
1542   rtx addreg1 = 0;
1543   int highest_first = 0;
1544   int no_addreg1_decrement = 0;
1545
1546   /* First classify both operands.  */
1547
1548   if (REG_P (op0))
1549     optype0 = REGOP;
1550   else if (offsettable_memref_p (op0))
1551     optype0 = OFFSOP;
1552   else if (GET_CODE (op0) == MEM)
1553     optype0 = MEMOP;
1554   else
1555     optype0 = RNDOP;
1556
1557   if (REG_P (op1))
1558     optype1 = REGOP;
1559   else if (CONSTANT_P (op1))
1560     optype1 = CNSTOP;
1561   else if (offsettable_memref_p (op1))
1562     optype1 = OFFSOP;
1563   else if (GET_CODE (op1) == MEM)
1564     optype1 = MEMOP;
1565   else
1566     optype1 = RNDOP;
1567
1568   /* Check for the cases that the operand constraints are not
1569      supposed to allow to happen.  Abort if we get one,
1570      because generating code for these cases is painful.  */
1571
1572   if (optype0 == RNDOP || optype1 == RNDOP
1573       || (optype0 == MEM && optype1 == MEM))
1574     abort ();
1575
1576   /* If an operand is an unoffsettable memory ref, find a register
1577      we can increment temporarily to make it refer to the second word.  */
1578
1579   if (optype0 == MEMOP)
1580     addreg0 = find_addr_reg (XEXP (op0, 0));
1581
1582   if (optype1 == MEMOP)
1583     addreg1 = find_addr_reg (XEXP (op1, 0));
1584
1585   /* Ok, we can do one word at a time.
1586      Set up in LATEHALF the operands to use for the
1587      high-numbered (least significant) word and in some cases alter the
1588      operands in OPERANDS to be suitable for the low-numbered word.  */
1589
1590   if (optype0 == REGOP)
1591     latehalf[0] = gen_rtx (REG, SImode, REGNO (op0) + 1);
1592   else if (optype0 == OFFSOP)
1593     latehalf[0] = adj_offsettable_operand (op0, 4);
1594   else
1595     latehalf[0] = op0;
1596
1597   if (optype1 == REGOP)
1598     latehalf[1] = gen_rtx (REG, SImode, REGNO (op1) + 1);
1599   else if (optype1 == OFFSOP)
1600     latehalf[1] = adj_offsettable_operand (op1, 4);
1601   else if (optype1 == CNSTOP)
1602     {
1603       if (TARGET_V9)
1604         {
1605           if (arith_double_operand (op1, DImode))
1606             {
1607               operands[1] = gen_rtx (CONST_INT, VOIDmode,
1608                                      CONST_DOUBLE_LOW (op1));
1609               return "mov %1,%0";
1610             }
1611           else
1612             {
1613               /* The only way to handle CONST_DOUBLEs or other 64 bit
1614                  constants here is to use a temporary, such as is done
1615                  for the V9 DImode sethi insn pattern.  This is not
1616                  a practical solution, so abort if we reach here.
1617                  The md file should always force such constants to
1618                  memory.  */
1619               abort ();
1620             }
1621         }
1622       else
1623         split_double (op1, &operands[1], &latehalf[1]);
1624     }
1625   else
1626     latehalf[1] = op1;
1627
1628   /* Easy case: try moving both words at once.  Check for moving between
1629      an even/odd register pair and a memory location.  */
1630   if ((optype0 == REGOP && optype1 != REGOP && optype1 != CNSTOP
1631        && (TARGET_V9 || (REGNO (op0) & 1) == 0))
1632       || (optype0 != REGOP && optype0 != CNSTOP && optype1 == REGOP
1633           && (TARGET_V9 || (REGNO (op1) & 1) == 0)))
1634     {
1635       register rtx mem,reg;
1636
1637       if (optype0 == REGOP)
1638         mem = op1, reg = op0;
1639       else
1640         mem = op0, reg = op1;
1641
1642       /* In v9, ldd can be used for word aligned addresses, so technically
1643          some of this logic is unneeded.  We still avoid ldd if the address
1644          is obviously unaligned though.  */
1645
1646       if (mem_aligned_8 (mem)
1647           /* If this is a floating point register higher than %f31,
1648              then we *must* use an aligned load, since `ld' will not accept
1649              the register number.  */
1650           || (TARGET_V9 && REGNO (reg) >= 64))
1651         {
1652           if (FP_REG_P (reg) || ! TARGET_V9)
1653             return (mem == op1 ? "ldd %1,%0" : "std %1,%0");
1654           else
1655             return (mem == op1 ? "ldx %1,%0" : "stx %1,%0");
1656         }
1657     }
1658
1659   if (TARGET_V9)
1660     {
1661       if (optype0 == REGOP && optype1 == REGOP)
1662         {
1663           if (FP_REG_P (op0))
1664             return "fmovd %1,%0";
1665           else
1666             return "mov %1,%0";
1667         }
1668     }
1669
1670   /* If the first move would clobber the source of the second one,
1671      do them in the other order.  */
1672
1673   /* Overlapping registers.  */
1674   if (optype0 == REGOP && optype1 == REGOP
1675       && REGNO (op0) == REGNO (latehalf[1]))
1676     {
1677       /* Do that word.  */
1678       output_asm_insn (singlemove_string (latehalf), latehalf);
1679       /* Do low-numbered word.  */
1680       return singlemove_string (operands);
1681     }
1682   /* Loading into a register which overlaps a register used in the address.  */
1683   else if (optype0 == REGOP && optype1 != REGOP
1684            && reg_overlap_mentioned_p (op0, op1))
1685     {
1686       /* If both halves of dest are used in the src memory address,
1687          add the two regs and put them in the low reg (op0).
1688          Then it works to load latehalf first.  */
1689       if (reg_mentioned_p (op0, XEXP (op1, 0))
1690           && reg_mentioned_p (latehalf[0], XEXP (op1, 0)))
1691         {
1692           rtx xops[2];
1693           xops[0] = latehalf[0];
1694           xops[1] = op0;
1695           output_asm_insn ("add %1,%0,%1", xops);
1696           operands[1] = gen_rtx (MEM, DImode, op0);
1697           latehalf[1] = adj_offsettable_operand (operands[1], 4);
1698           addreg1 = 0;
1699           highest_first = 1;
1700         }
1701       /* Only one register in the dest is used in the src memory address,
1702          and this is the first register of the dest, so we want to do
1703          the late half first here also.  */
1704       else if (! reg_mentioned_p (latehalf[0], XEXP (op1, 0)))
1705         highest_first = 1;
1706       /* Only one register in the dest is used in the src memory address,
1707          and this is the second register of the dest, so we want to do
1708          the late half last.  If addreg1 is set, and addreg1 is the same
1709          register as latehalf, then we must suppress the trailing decrement,
1710          because it would clobber the value just loaded.  */
1711       else if (addreg1 && reg_mentioned_p (addreg1, latehalf[0]))
1712         no_addreg1_decrement = 1;
1713     }
1714
1715   /* Normal case: do the two words, low-numbered first.
1716      Overlap case (highest_first set): do high-numbered word first.  */
1717
1718   if (! highest_first)
1719     output_asm_insn (singlemove_string (operands), operands);
1720
1721   /* Make any unoffsettable addresses point at high-numbered word.  */
1722   if (addreg0)
1723     output_asm_insn ("add %0,0x4,%0", &addreg0);
1724   if (addreg1)
1725     output_asm_insn ("add %0,0x4,%0", &addreg1);
1726
1727   /* Do that word.  */
1728   output_asm_insn (singlemove_string (latehalf), latehalf);
1729
1730   /* Undo the adds we just did.  */
1731   if (addreg0)
1732     output_asm_insn ("add %0,-0x4,%0", &addreg0);
1733   if (addreg1 && ! no_addreg1_decrement)
1734     output_asm_insn ("add %0,-0x4,%0", &addreg1);
1735
1736   if (highest_first)
1737     output_asm_insn (singlemove_string (operands), operands);
1738
1739   return "";
1740 }
1741
1742 /* Output assembler code to perform a quadword move insn
1743    with operands OPERANDS.  This is very similar to the preceding
1744    output_move_double function.  */
1745
1746 char *
1747 output_move_quad (operands)
1748      rtx *operands;
1749 {
1750   register rtx op0 = operands[0];
1751   register rtx op1 = operands[1];
1752   register enum optype optype0;
1753   register enum optype optype1;
1754   rtx wordpart[4][2];
1755   rtx addreg0 = 0;
1756   rtx addreg1 = 0;
1757
1758   /* First classify both operands.  */
1759
1760   if (REG_P (op0))
1761     optype0 = REGOP;
1762   else if (offsettable_memref_p (op0))
1763     optype0 = OFFSOP;
1764   else if (GET_CODE (op0) == MEM)
1765     optype0 = MEMOP;
1766   else
1767     optype0 = RNDOP;
1768
1769   if (REG_P (op1))
1770     optype1 = REGOP;
1771   else if (CONSTANT_P (op1))
1772     optype1 = CNSTOP;
1773   else if (offsettable_memref_p (op1))
1774     optype1 = OFFSOP;
1775   else if (GET_CODE (op1) == MEM)
1776     optype1 = MEMOP;
1777   else
1778     optype1 = RNDOP;
1779
1780   /* Check for the cases that the operand constraints are not
1781      supposed to allow to happen.  Abort if we get one,
1782      because generating code for these cases is painful.  */
1783
1784   if (optype0 == RNDOP || optype1 == RNDOP
1785       || (optype0 == MEM && optype1 == MEM))
1786     abort ();
1787
1788   /* If an operand is an unoffsettable memory ref, find a register
1789      we can increment temporarily to make it refer to the later words.  */
1790
1791   if (optype0 == MEMOP)
1792     addreg0 = find_addr_reg (XEXP (op0, 0));
1793
1794   if (optype1 == MEMOP)
1795     addreg1 = find_addr_reg (XEXP (op1, 0));
1796
1797   /* Ok, we can do one word at a time.
1798      Set up in wordpart the operands to use for each word of the arguments.  */
1799
1800   if (optype0 == REGOP)
1801     {
1802       wordpart[0][0] = gen_rtx (REG, SImode, REGNO (op0) + 0);
1803       wordpart[1][0] = gen_rtx (REG, SImode, REGNO (op0) + 1);
1804       wordpart[2][0] = gen_rtx (REG, SImode, REGNO (op0) + 2);
1805       wordpart[3][0] = gen_rtx (REG, SImode, REGNO (op0) + 3);
1806     }
1807   else if (optype0 == OFFSOP)
1808     {
1809       wordpart[0][0] = adj_offsettable_operand (op0, 0);
1810       wordpart[1][0] = adj_offsettable_operand (op0, 4);
1811       wordpart[2][0] = adj_offsettable_operand (op0, 8);
1812       wordpart[3][0] = adj_offsettable_operand (op0, 12);
1813     }
1814   else
1815     {
1816       wordpart[0][0] = op0;
1817       wordpart[1][0] = op0;
1818       wordpart[2][0] = op0;
1819       wordpart[3][0] = op0;
1820     }
1821
1822   if (optype1 == REGOP)
1823     {
1824       wordpart[0][1] = gen_rtx (REG, SImode, REGNO (op1) + 0);
1825       wordpart[1][1] = gen_rtx (REG, SImode, REGNO (op1) + 1);
1826       wordpart[2][1] = gen_rtx (REG, SImode, REGNO (op1) + 2);
1827       wordpart[3][1] = gen_rtx (REG, SImode, REGNO (op1) + 3);
1828     }
1829   else if (optype1 == OFFSOP)
1830     {
1831       wordpart[0][1] = adj_offsettable_operand (op1, 0);
1832       wordpart[1][1] = adj_offsettable_operand (op1, 4);
1833       wordpart[2][1] = adj_offsettable_operand (op1, 8);
1834       wordpart[3][1] = adj_offsettable_operand (op1, 12);
1835     }
1836   else if (optype1 == CNSTOP)
1837     {
1838       REAL_VALUE_TYPE r;
1839       long l[4];
1840
1841       /* This only works for TFmode floating point constants.  */
1842       if (GET_CODE (op1) != CONST_DOUBLE || GET_MODE (op1) != TFmode)
1843         abort ();
1844
1845       REAL_VALUE_FROM_CONST_DOUBLE (r, op1);
1846       REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
1847
1848       wordpart[0][1] = GEN_INT (l[0]);
1849       wordpart[1][1] = GEN_INT (l[1]);
1850       wordpart[2][1] = GEN_INT (l[2]);
1851       wordpart[3][1] = GEN_INT (l[3]);
1852     }
1853   else
1854     {
1855       wordpart[0][1] = op1;
1856       wordpart[1][1] = op1;
1857       wordpart[2][1] = op1;
1858       wordpart[3][1] = op1;
1859     }
1860
1861   /* Easy case: try moving the quad as two pairs.  Check for moving between
1862      an even/odd register pair and a memory location.
1863      Also handle new v9 fp regs here.  */
1864   /* ??? Should also handle the case of non-offsettable addresses here.
1865      We can at least do the first pair as a ldd/std, and then do the third
1866      and fourth words individually.  */
1867   if ((optype0 == REGOP && optype1 == OFFSOP && (REGNO (op0) & 1) == 0)
1868       || (optype0 == OFFSOP && optype1 == REGOP && (REGNO (op1) & 1) == 0))
1869     {
1870       rtx mem, reg;
1871
1872       if (optype0 == REGOP)
1873         mem = op1, reg = op0;
1874       else
1875         mem = op0, reg = op1;
1876
1877       if (mem_aligned_8 (mem)
1878           /* If this is a floating point register higher than %f31,
1879              then we *must* use an aligned load, since `ld' will not accept
1880              the register number.  */
1881           || (TARGET_V9 && REGNO (reg) >= 64))
1882         {
1883           if (TARGET_V9 && FP_REG_P (reg))
1884             {
1885               if ((REGNO (reg) & 3) != 0)
1886                 abort ();
1887               return (mem == op1 ? "ldq %1,%0" : "stq %1,%0");
1888             }
1889           operands[2] = adj_offsettable_operand (mem, 8);
1890           if (mem == op1)
1891             return TARGET_V9 ? "ldx %1,%0;ldx %2,%R0" : "ldd %1,%0;ldd %2,%S0";
1892           else
1893             return TARGET_V9 ? "stx %1,%0;stx %R1,%2" : "std %1,%0;std %S1,%2";
1894         }
1895     }
1896
1897   /* If the first move would clobber the source of the second one,
1898      do them in the other order.  */
1899
1900   /* Overlapping registers.  */
1901   if (optype0 == REGOP && optype1 == REGOP
1902       && (REGNO (op0) == REGNO (wordpart[1][3])
1903           || REGNO (op0) == REGNO (wordpart[1][2])
1904           || REGNO (op0) == REGNO (wordpart[1][1])))
1905     {
1906       /* Do fourth word.  */
1907       output_asm_insn (singlemove_string (wordpart[3]), wordpart[3]);
1908       /* Do the third word.  */
1909       output_asm_insn (singlemove_string (wordpart[2]), wordpart[2]);
1910       /* Do the second word.  */
1911       output_asm_insn (singlemove_string (wordpart[1]), wordpart[1]);
1912       /* Do lowest-numbered word.  */
1913       return singlemove_string (wordpart[0]);
1914     }
1915   /* Loading into a register which overlaps a register used in the address.  */
1916   if (optype0 == REGOP && optype1 != REGOP
1917       && reg_overlap_mentioned_p (op0, op1))
1918     {
1919       /* ??? Not implemented yet.  This is a bit complicated, because we
1920          must load which ever part overlaps the address last.  If the address
1921          is a double-reg address, then there are two parts which need to
1922          be done last, which is impossible.  We would need a scratch register
1923          in that case.  */
1924       abort ();
1925     }
1926
1927   /* Normal case: move the four words in lowest to higest address order.  */
1928
1929   output_asm_insn (singlemove_string (wordpart[0]), wordpart[0]);
1930
1931   /* Make any unoffsettable addresses point at the second word.  */
1932   if (addreg0)
1933     output_asm_insn ("add %0,0x4,%0", &addreg0);
1934   if (addreg1)
1935     output_asm_insn ("add %0,0x4,%0", &addreg1);
1936
1937   /* Do the second word.  */
1938   output_asm_insn (singlemove_string (wordpart[1]), wordpart[1]);
1939
1940   /* Make any unoffsettable addresses point at the third word.  */
1941   if (addreg0)
1942     output_asm_insn ("add %0,0x4,%0", &addreg0);
1943   if (addreg1)
1944     output_asm_insn ("add %0,0x4,%0", &addreg1);
1945
1946   /* Do the third word.  */
1947   output_asm_insn (singlemove_string (wordpart[2]), wordpart[2]);
1948
1949   /* Make any unoffsettable addresses point at the fourth word.  */
1950   if (addreg0)
1951     output_asm_insn ("add %0,0x4,%0", &addreg0);
1952   if (addreg1)
1953     output_asm_insn ("add %0,0x4,%0", &addreg1);
1954
1955   /* Do the fourth word.  */
1956   output_asm_insn (singlemove_string (wordpart[3]), wordpart[3]);
1957
1958   /* Undo the adds we just did.  */
1959   if (addreg0)
1960     output_asm_insn ("add %0,-0xc,%0", &addreg0);
1961   if (addreg1)
1962     output_asm_insn ("add %0,-0xc,%0", &addreg1);
1963
1964   return "";
1965 }
1966 \f
1967 /* Output assembler code to perform a doubleword move insn with operands
1968    OPERANDS, one of which must be a floating point register.  */
1969
1970 char *
1971 output_fp_move_double (operands)
1972      rtx *operands;
1973 {
1974   if (FP_REG_P (operands[0]))
1975     {
1976       if (FP_REG_P (operands[1]))
1977         {
1978           if (TARGET_V9)
1979             return "fmovd %1,%0";
1980           else
1981             return "fmovs %1,%0\n\tfmovs %R1,%R0";
1982         }
1983       else if (GET_CODE (operands[1]) == REG)
1984         abort ();
1985       else
1986         return output_move_double (operands);
1987     }
1988   else if (FP_REG_P (operands[1]))
1989     {
1990       if (GET_CODE (operands[0]) == REG)
1991         abort ();
1992       else
1993         return output_move_double (operands);
1994     }
1995   else abort ();
1996 }
1997
1998 /* Output assembler code to perform a quadword move insn with operands
1999    OPERANDS, one of which must be a floating point register.  */
2000
2001 char *
2002 output_fp_move_quad (operands)
2003      rtx *operands;
2004 {
2005   register rtx op0 = operands[0];
2006   register rtx op1 = operands[1];
2007
2008   if (FP_REG_P (op0))
2009     {
2010       if (FP_REG_P (op1))
2011         {
2012           if (TARGET_V9)
2013             return "fmovq %1,%0";
2014           else
2015             return "fmovs %1,%0\n\tfmovs %R1,%R0\n\tfmovs %S1,%S0\n\tfmovs %T1,%T0";
2016         }
2017       else if (GET_CODE (op1) == REG)
2018         abort ();
2019       else
2020         return output_move_quad (operands);
2021     }
2022   else if (FP_REG_P (op1))
2023     {
2024       if (GET_CODE (op0) == REG)
2025         abort ();
2026       else
2027         return output_move_quad (operands);
2028     }
2029   else
2030     abort ();
2031 }
2032 \f
2033 /* Return a REG that occurs in ADDR with coefficient 1.
2034    ADDR can be effectively incremented by incrementing REG.  */
2035
2036 static rtx
2037 find_addr_reg (addr)
2038      rtx addr;
2039 {
2040   while (GET_CODE (addr) == PLUS)
2041     {
2042       /* We absolutely can not fudge the frame pointer here, because the
2043          frame pointer must always be 8 byte aligned.  It also confuses
2044          debuggers.  */
2045       if (GET_CODE (XEXP (addr, 0)) == REG
2046           && REGNO (XEXP (addr, 0)) != FRAME_POINTER_REGNUM)
2047         addr = XEXP (addr, 0);
2048       else if (GET_CODE (XEXP (addr, 1)) == REG
2049                && REGNO (XEXP (addr, 1)) != FRAME_POINTER_REGNUM)
2050         addr = XEXP (addr, 1);
2051       else if (CONSTANT_P (XEXP (addr, 0)))
2052         addr = XEXP (addr, 1);
2053       else if (CONSTANT_P (XEXP (addr, 1)))
2054         addr = XEXP (addr, 0);
2055       else
2056         abort ();
2057     }
2058   if (GET_CODE (addr) == REG)
2059     return addr;
2060   abort ();
2061 }
2062
2063 #if 0 /* not currently used */
2064
2065 void
2066 output_sized_memop (opname, mode, signedp)
2067      char *opname;
2068      enum machine_mode mode;
2069      int signedp;
2070 {
2071   static char *ld_size_suffix_u[] = { "ub", "uh", "", "?", "d" };
2072   static char *ld_size_suffix_s[] = { "sb", "sh", "", "?", "d" };
2073   static char *st_size_suffix[] = { "b", "h", "", "?", "d" };
2074   char **opnametab, *modename;
2075
2076   if (opname[0] == 'l')
2077     if (signedp)
2078       opnametab = ld_size_suffix_s;
2079     else
2080       opnametab = ld_size_suffix_u;
2081   else
2082     opnametab = st_size_suffix;
2083   modename = opnametab[GET_MODE_SIZE (mode) >> 1];
2084
2085   fprintf (asm_out_file, "\t%s%s", opname, modename);
2086 }
2087 \f
2088 void
2089 output_move_with_extension (operands)
2090      rtx *operands;
2091 {
2092   if (GET_MODE (operands[2]) == HImode)
2093     output_asm_insn ("sll %2,0x10,%0", operands);
2094   else if (GET_MODE (operands[2]) == QImode)
2095     output_asm_insn ("sll %2,0x18,%0", operands);
2096   else
2097     abort ();
2098 }
2099 #endif /* not currently used */
2100 \f
2101 #if 0
2102 /* ??? These are only used by the movstrsi pattern, but we get better code
2103    in general without that, because emit_block_move can do just as good a
2104    job as this function does when alignment and size are known.  When they
2105    aren't known, a call to strcpy may be faster anyways, because it is
2106    likely to be carefully crafted assembly language code, and below we just
2107    do a byte-wise copy.
2108
2109    Also, emit_block_move expands into multiple read/write RTL insns, which
2110    can then be optimized, whereas our movstrsi pattern can not be optimized
2111    at all.  */
2112
2113 /* Load the address specified by OPERANDS[3] into the register
2114    specified by OPERANDS[0].
2115
2116    OPERANDS[3] may be the result of a sum, hence it could either be:
2117
2118    (1) CONST
2119    (2) REG
2120    (2) REG + CONST_INT
2121    (3) REG + REG + CONST_INT
2122    (4) REG + REG  (special case of 3).
2123
2124    Note that (3) is not a legitimate address.
2125    All cases are handled here.  */
2126
2127 void
2128 output_load_address (operands)
2129      rtx *operands;
2130 {
2131   rtx base, offset;
2132
2133   if (CONSTANT_P (operands[3]))
2134     {
2135       output_asm_insn ("set %3,%0", operands);
2136       return;
2137     }
2138
2139   if (REG_P (operands[3]))
2140     {
2141       if (REGNO (operands[0]) != REGNO (operands[3]))
2142         output_asm_insn ("mov %3,%0", operands);
2143       return;
2144     }
2145
2146   if (GET_CODE (operands[3]) != PLUS)
2147     abort ();
2148
2149   base = XEXP (operands[3], 0);
2150   offset = XEXP (operands[3], 1);
2151
2152   if (GET_CODE (base) == CONST_INT)
2153     {
2154       rtx tmp = base;
2155       base = offset;
2156       offset = tmp;
2157     }
2158
2159   if (GET_CODE (offset) != CONST_INT)
2160     {
2161       /* Operand is (PLUS (REG) (REG)).  */
2162       base = operands[3];
2163       offset = const0_rtx;
2164     }
2165
2166   if (REG_P (base))
2167     {
2168       operands[6] = base;
2169       operands[7] = offset;
2170       if (SMALL_INT (offset))
2171         output_asm_insn ("add %6,%7,%0", operands);
2172       else
2173         output_asm_insn ("set %7,%0\n\tadd %0,%6,%0", operands);
2174     }
2175   else if (GET_CODE (base) == PLUS)
2176     {
2177       operands[6] = XEXP (base, 0);
2178       operands[7] = XEXP (base, 1);
2179       operands[8] = offset;
2180
2181       if (SMALL_INT (offset))
2182         output_asm_insn ("add %6,%7,%0\n\tadd %0,%8,%0", operands);
2183       else
2184         output_asm_insn ("set %8,%0\n\tadd %0,%6,%0\n\tadd %0,%7,%0", operands);
2185     }
2186   else
2187     abort ();
2188 }
2189
2190 /* Output code to place a size count SIZE in register REG.
2191    ALIGN is the size of the unit of transfer.
2192
2193    Because block moves are pipelined, we don't include the
2194    first element in the transfer of SIZE to REG.  */
2195
2196 static void
2197 output_size_for_block_move (size, reg, align)
2198      rtx size, reg;
2199      rtx align;
2200 {
2201   rtx xoperands[3];
2202
2203   xoperands[0] = reg;
2204   xoperands[1] = size;
2205   xoperands[2] = align;
2206   if (GET_CODE (size) == REG)
2207     output_asm_insn ("sub %1,%2,%0", xoperands);
2208   else
2209     {
2210       xoperands[1]
2211         = gen_rtx (CONST_INT, VOIDmode, INTVAL (size) - INTVAL (align));
2212       output_asm_insn ("set %1,%0", xoperands);
2213     }
2214 }
2215
2216 /* Emit code to perform a block move.
2217
2218    OPERANDS[0] is the destination.
2219    OPERANDS[1] is the source.
2220    OPERANDS[2] is the size.
2221    OPERANDS[3] is the alignment safe to use.
2222    OPERANDS[4] is a register we can safely clobber as a temp.  */
2223
2224 char *
2225 output_block_move (operands)
2226      rtx *operands;
2227 {
2228   /* A vector for our computed operands.  Note that load_output_address
2229      makes use of (and can clobber) up to the 8th element of this vector.  */
2230   rtx xoperands[10];
2231   rtx zoperands[10];
2232   static int movstrsi_label = 0;
2233   int i;
2234   rtx temp1 = operands[4];
2235   rtx sizertx = operands[2];
2236   rtx alignrtx = operands[3];
2237   int align = INTVAL (alignrtx);
2238   char label3[30], label5[30];
2239
2240   xoperands[0] = operands[0];
2241   xoperands[1] = operands[1];
2242   xoperands[2] = temp1;
2243
2244   /* We can't move more than this many bytes at a time because we have only
2245      one register, %g1, to move them through.  */
2246   if (align > UNITS_PER_WORD)
2247     {
2248       align = UNITS_PER_WORD;
2249       alignrtx = gen_rtx (CONST_INT, VOIDmode, UNITS_PER_WORD);
2250     }
2251
2252   /* We consider 8 ld/st pairs, for a total of 16 inline insns to be
2253      reasonable here.  (Actually will emit a maximum of 18 inline insns for
2254      the case of size == 31 and align == 4).  */
2255
2256   if (GET_CODE (sizertx) == CONST_INT && (INTVAL (sizertx) / align) <= 8
2257       && memory_address_p (QImode, plus_constant_for_output (xoperands[0],
2258                                                              INTVAL (sizertx)))
2259       && memory_address_p (QImode, plus_constant_for_output (xoperands[1],
2260                                                              INTVAL (sizertx))))
2261     {
2262       int size = INTVAL (sizertx);
2263       int offset = 0;
2264
2265       /* We will store different integers into this particular RTX.  */
2266       xoperands[2] = rtx_alloc (CONST_INT);
2267       PUT_MODE (xoperands[2], VOIDmode);
2268
2269       /* This case is currently not handled.  Abort instead of generating
2270          bad code.  */
2271       if (align > UNITS_PER_WORD)
2272         abort ();
2273
2274       if (TARGET_V9 && align >= 8)
2275         {
2276           for (i = (size >> 3) - 1; i >= 0; i--)
2277             {
2278               INTVAL (xoperands[2]) = (i << 3) + offset;
2279               output_asm_insn ("ldx [%a1+%2],%%g1\n\tstx %%g1,[%a0+%2]",
2280                                xoperands);
2281             }
2282           offset += (size & ~0x7);
2283           size = size & 0x7;
2284           if (size == 0)
2285             return "";
2286         }
2287
2288       if (align >= 4)
2289         {
2290           for (i = (size >> 2) - 1; i >= 0; i--)
2291             {
2292               INTVAL (xoperands[2]) = (i << 2) + offset;
2293               output_asm_insn ("ld [%a1+%2],%%g1\n\tst %%g1,[%a0+%2]",
2294                                xoperands);
2295             }
2296           offset += (size & ~0x3);
2297           size = size & 0x3;
2298           if (size == 0)
2299             return "";
2300         }
2301
2302       if (align >= 2)
2303         {
2304           for (i = (size >> 1) - 1; i >= 0; i--)
2305             {
2306               INTVAL (xoperands[2]) = (i << 1) + offset;
2307               output_asm_insn ("lduh [%a1+%2],%%g1\n\tsth %%g1,[%a0+%2]",
2308                                xoperands);
2309             }
2310           offset += (size & ~0x1);
2311           size = size & 0x1;
2312           if (size == 0)
2313             return "";
2314         }
2315
2316       if (align >= 1)
2317         {
2318           for (i = size - 1; i >= 0; i--)
2319             {
2320               INTVAL (xoperands[2]) = i + offset;
2321               output_asm_insn ("ldub [%a1+%2],%%g1\n\tstb %%g1,[%a0+%2]",
2322                                xoperands);
2323             }
2324           return "";
2325         }
2326
2327       /* We should never reach here.  */
2328       abort ();
2329     }
2330
2331   /* If the size isn't known to be a multiple of the alignment,
2332      we have to do it in smaller pieces.  If we could determine that
2333      the size was a multiple of 2 (or whatever), we could be smarter
2334      about this.  */
2335   if (GET_CODE (sizertx) != CONST_INT)
2336     align = 1;
2337   else
2338     {
2339       int size = INTVAL (sizertx);
2340       while (size % align)
2341         align >>= 1;
2342     }
2343
2344   if (align != INTVAL (alignrtx))
2345     alignrtx = gen_rtx (CONST_INT, VOIDmode, align);
2346
2347   xoperands[3] = gen_rtx (CONST_INT, VOIDmode, movstrsi_label++);
2348   xoperands[4] = gen_rtx (CONST_INT, VOIDmode, align);
2349   xoperands[5] = gen_rtx (CONST_INT, VOIDmode, movstrsi_label++);
2350
2351   ASM_GENERATE_INTERNAL_LABEL (label3, "Lm", INTVAL (xoperands[3]));
2352   ASM_GENERATE_INTERNAL_LABEL (label5, "Lm", INTVAL (xoperands[5]));
2353
2354   /* This is the size of the transfer.  Emit code to decrement the size
2355      value by ALIGN, and store the result in the temp1 register.  */
2356   output_size_for_block_move (sizertx, temp1, alignrtx);
2357
2358   /* Must handle the case when the size is zero or negative, so the first thing
2359      we do is compare the size against zero, and only copy bytes if it is
2360      zero or greater.  Note that we have already subtracted off the alignment
2361      once, so we must copy 1 alignment worth of bytes if the size is zero
2362      here.
2363
2364      The SUN assembler complains about labels in branch delay slots, so we
2365      do this before outputting the load address, so that there will always
2366      be a harmless insn between the branch here and the next label emitted
2367      below.  */
2368
2369   {
2370     char pattern[100];
2371
2372     sprintf (pattern, "cmp %%2,0\n\tbl %s", &label5[1]);
2373     output_asm_insn (pattern, xoperands);
2374   }
2375
2376   zoperands[0] = operands[0];
2377   zoperands[3] = plus_constant_for_output (operands[0], align);
2378   output_load_address (zoperands);
2379
2380   /* ??? This might be much faster if the loops below were preconditioned
2381      and unrolled.
2382
2383      That is, at run time, copy enough bytes one at a time to ensure that the
2384      target and source addresses are aligned to the the largest possible
2385      alignment.  Then use a preconditioned unrolled loop to copy say 16
2386      bytes at a time.  Then copy bytes one at a time until finish the rest.  */
2387
2388   /* Output the first label separately, so that it is spaced properly.  */
2389
2390   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "Lm", INTVAL (xoperands[3]));
2391
2392   {
2393     char pattern[200];
2394     register char *ld_suffix = ((align == 1) ? "ub" : (align == 2) ? "uh"
2395                                 : (align == 8 && TARGET_V9) ? "x" : "");
2396     register char *st_suffix = ((align == 1) ? "b" : (align == 2) ? "h"
2397                                 : (align == 8 && TARGET_V9) ? "x" : "");
2398
2399     sprintf (pattern, "ld%s [%%1+%%2],%%%%g1\n\tsubcc %%2,%%4,%%2\n\tbge %s\n\tst%s %%%%g1,[%%0+%%2]\n%s:", ld_suffix, &label3[1], st_suffix, &label5[1]);
2400     output_asm_insn (pattern, xoperands);
2401   }
2402
2403   return "";
2404 }
2405 #endif
2406 \f
2407 /* Output reasonable peephole for set-on-condition-code insns.
2408    Note that these insns assume a particular way of defining
2409    labels.  Therefore, *both* sparc.h and this function must
2410    be changed if a new syntax is needed.    */
2411
2412 char *
2413 output_scc_insn (operands, insn)
2414      rtx operands[];
2415      rtx insn;
2416 {
2417   static char string[100];
2418   rtx label = 0, next = insn;
2419   int need_label = 0;
2420
2421   /* Try doing a jump optimization which jump.c can't do for us
2422      because we did not expose that setcc works by using branches.
2423
2424      If this scc insn is followed by an unconditional branch, then have
2425      the jump insn emitted here jump to that location, instead of to
2426      the end of the scc sequence as usual.  */
2427
2428   do
2429     {
2430       if (GET_CODE (next) == CODE_LABEL)
2431         label = next;
2432       next = NEXT_INSN (next);
2433       if (next == 0)
2434         break;
2435     }
2436   while (GET_CODE (next) == NOTE || GET_CODE (next) == CODE_LABEL);
2437
2438   /* If we are in a sequence, and the following insn is a sequence also,
2439      then just following the current insn's next field will take us to the
2440      first insn of the next sequence, which is the wrong place.  We don't
2441      want to optimize with a branch that has had its delay slot filled.
2442      Avoid this by verifying that NEXT_INSN (PREV_INSN (next)) == next
2443      which fails only if NEXT is such a branch.  */
2444
2445   if (next && GET_CODE (next) == JUMP_INSN && simplejump_p (next)
2446       && (! final_sequence || NEXT_INSN (PREV_INSN (next)) == next))
2447     label = JUMP_LABEL (next);
2448   /* If not optimizing, jump label fields are not set.  To be safe, always
2449      check here to whether label is still zero.  */
2450   if (label == 0)
2451     {
2452       label = gen_label_rtx ();
2453       need_label = 1;
2454     }
2455
2456   LABEL_NUSES (label) += 1;
2457
2458   operands[2] = label;
2459
2460   /* If we are in a delay slot, assume it is the delay slot of an fpcc
2461      insn since our type isn't allowed anywhere else.  */
2462
2463   /* ??? Fpcc instructions no longer have delay slots, so this code is
2464      probably obsolete.  */
2465
2466   /* The fastest way to emit code for this is an annulled branch followed
2467      by two move insns.  This will take two cycles if the branch is taken,
2468      and three cycles if the branch is not taken.
2469
2470      However, if we are in the delay slot of another branch, this won't work,
2471      because we can't put a branch in the delay slot of another branch.
2472      The above sequence would effectively take 3 or 4 cycles respectively
2473      since a no op would have be inserted between the two branches.
2474      In this case, we want to emit a move, annulled branch, and then the
2475      second move.  This sequence always takes 3 cycles, and hence is faster
2476      when we are in a branch delay slot.  */
2477
2478   if (final_sequence)
2479     {
2480       strcpy (string, "mov 0,%0\n\t");
2481       strcat (string, output_cbranch (operands[1], 0, 2, 0, 1, 0));
2482       strcat (string, "\n\tmov 1,%0");
2483     }
2484   else
2485     {
2486       strcpy (string, output_cbranch (operands[1], 0, 2, 0, 1, 0));
2487       strcat (string, "\n\tmov 1,%0\n\tmov 0,%0");
2488     }
2489
2490   if (need_label)
2491     strcat (string, "\n%l2:");
2492
2493   return string;
2494 }
2495 \f
2496 /* Vectors to keep interesting information about registers where it can easily
2497    be got.  We use to use the actual mode value as the bit number, but there
2498    are more than 32 modes now.  Instead we use two tables: one indexed by
2499    hard register number, and one indexed by mode.  */
2500
2501 /* The purpose of sparc_mode_class is to shrink the range of modes so that
2502    they all fit (as bit numbers) in a 32 bit word (again).  Each real mode is
2503    mapped into one sparc_mode_class mode.  */
2504
2505 enum sparc_mode_class {
2506   C_MODE, CCFP_MODE,
2507   S_MODE, D_MODE, T_MODE, O_MODE,
2508   SF_MODE, DF_MODE, TF_MODE, OF_MODE
2509 };
2510
2511 /* Modes for condition codes.  */
2512 #define C_MODES ((1 << (int) C_MODE) | (1 << (int) CCFP_MODE))
2513 #define CCFP_MODES (1 << (int) CCFP_MODE)
2514
2515 /* Modes for single-word and smaller quantities.  */
2516 #define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
2517
2518 /* Modes for double-word and smaller quantities.  */
2519 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
2520
2521 /* Modes for quad-word and smaller quantities.  */
2522 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
2523
2524 /* Modes for single-float quantities.  We must allow any single word or
2525    smaller quantity.  This is because the fix/float conversion instructions
2526    take integer inputs/outputs from the float registers.  */
2527 #define SF_MODES (S_MODES)
2528
2529 /* Modes for double-float and smaller quantities.  */
2530 #define DF_MODES (S_MODES | D_MODES)
2531
2532 /* ??? Sparc64 fp regs cannot hold DImode values.  */
2533 #define DF_MODES64 (SF_MODES | DF_MODE /* | D_MODE*/)
2534
2535 /* Modes for double-float only quantities.  */
2536 /* ??? Sparc64 fp regs cannot hold DImode values.  */
2537 #define DF_ONLY_MODES ((1 << (int) DF_MODE) /*| (1 << (int) D_MODE)*/)
2538
2539 /* Modes for double-float and larger quantities.  */
2540 #define DF_UP_MODES (DF_ONLY_MODES | TF_ONLY_MODES)
2541
2542 /* Modes for quad-float only quantities.  */
2543 #define TF_ONLY_MODES (1 << (int) TF_MODE)
2544
2545 /* Modes for quad-float and smaller quantities.  */
2546 #define TF_MODES (DF_MODES | TF_ONLY_MODES)
2547
2548 /* ??? Sparc64 fp regs cannot hold DImode values.  */
2549 #define TF_MODES64 (DF_MODES64 | TF_ONLY_MODES)
2550
2551 /* Value is 1 if register/mode pair is acceptable on sparc.
2552    The funny mixture of D and T modes is because integer operations
2553    do not specially operate on tetra quantities, so non-quad-aligned
2554    registers can hold quadword quantities (except %o4 and %i4 because
2555    they cross fixed registers.  */
2556
2557 /* This points to either the 32 bit or the 64 bit version.  */
2558 int *hard_regno_mode_classes;
2559
2560 static int hard_32bit_mode_classes[] = {
2561   C_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
2562   T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
2563   T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
2564   T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
2565
2566   TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2567   TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2568   TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2569   TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
2570 };
2571
2572 static int hard_64bit_mode_classes[] = {
2573   C_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2574   T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2575   T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2576   T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
2577
2578   TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2579   TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2580   TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2581   TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
2582
2583   /* The remaining registers do not exist on a non-v9 sparc machine.
2584      FP regs f32 to f63.  Only the even numbered registers actually exist,
2585      and none can hold SFmode/SImode values.  */
2586   DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2587   DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2588   DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2589   DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
2590
2591   /* %fcc[0123] */
2592   CCFP_MODE, CCFP_MODE, CCFP_MODE, CCFP_MODE
2593 };
2594
2595 int sparc_mode_class [NUM_MACHINE_MODES];
2596
2597 static void
2598 sparc_init_modes ()
2599 {
2600   int i;
2601
2602   sparc_arch_type = TARGET_V9 ? ARCH_64BIT : ARCH_32BIT;
2603
2604   for (i = 0; i < NUM_MACHINE_MODES; i++)
2605     {
2606       switch (GET_MODE_CLASS (i))
2607         {
2608         case MODE_INT:
2609         case MODE_PARTIAL_INT:
2610         case MODE_COMPLEX_INT:
2611           if (GET_MODE_SIZE (i) <= 4)
2612             sparc_mode_class[i] = 1 << (int) S_MODE;
2613           else if (GET_MODE_SIZE (i) == 8)
2614             sparc_mode_class[i] = 1 << (int) D_MODE;
2615           else if (GET_MODE_SIZE (i) == 16)
2616             sparc_mode_class[i] = 1 << (int) T_MODE;
2617           else if (GET_MODE_SIZE (i) == 32)
2618             sparc_mode_class[i] = 1 << (int) O_MODE;
2619           else
2620             sparc_mode_class[i] = 0;
2621           break;
2622         case MODE_FLOAT:
2623         case MODE_COMPLEX_FLOAT:
2624           if (GET_MODE_SIZE (i) <= 4)
2625             sparc_mode_class[i] = 1 << (int) SF_MODE;
2626           else if (GET_MODE_SIZE (i) == 8)
2627             sparc_mode_class[i] = 1 << (int) DF_MODE;
2628           else if (GET_MODE_SIZE (i) == 16)
2629             sparc_mode_class[i] = 1 << (int) TF_MODE;
2630           else if (GET_MODE_SIZE (i) == 32)
2631             sparc_mode_class[i] = 1 << (int) OF_MODE;
2632           else
2633             sparc_mode_class[i] = 0;
2634           break;
2635         case MODE_CC:
2636         default:
2637           /* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so
2638              we must explicitly check for them here.  */
2639           if (i == (int) CCFPmode || i == (int) CCFPEmode)
2640             sparc_mode_class[i] = 1 << (int) CCFP_MODE;
2641           else if (i == (int) CCmode || i == (int) CC_NOOVmode
2642 #ifdef SPARCV9
2643                    || i == (int) CCXmode
2644                    || i == (int) CCX_NOOVmode
2645 #endif
2646                    )
2647             sparc_mode_class[i] = 1 << (int) C_MODE;
2648           else
2649             sparc_mode_class[i] = 0;
2650           break;
2651         }
2652     }
2653
2654   if (TARGET_V9)
2655     hard_regno_mode_classes = hard_64bit_mode_classes;
2656   else
2657     hard_regno_mode_classes = hard_32bit_mode_classes;
2658 }
2659 \f
2660 /* Save non call used registers from LOW to HIGH at BASE+OFFSET.
2661    N_REGS is the number of 4-byte regs saved thus far.  This applies even to
2662    v9 int regs as it simplifies the code.  */
2663
2664 #ifdef __GNUC__
2665 __inline__
2666 #endif
2667 static int
2668 save_regs (file, low, high, base, offset, n_regs)
2669      FILE *file;
2670      int low, high;
2671      char *base;
2672      int offset;
2673      int n_regs;
2674 {
2675   int i;
2676
2677   if (TARGET_V9 && high <= 32)
2678     {
2679       for (i = low; i < high; i++)
2680         {
2681           if (regs_ever_live[i] && ! call_used_regs[i])
2682             fprintf (file, "\tstx %s,[%s+%d]\n",
2683               reg_names[i], base, offset + 4 * n_regs),
2684             n_regs += 2;
2685         }
2686     }
2687   else
2688     {
2689       for (i = low; i < high; i += 2)
2690         {
2691           if (regs_ever_live[i] && ! call_used_regs[i])
2692             if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2693               fprintf (file, "\tstd %s,[%s+%d]\n",
2694                        reg_names[i], base, offset + 4 * n_regs),
2695               n_regs += 2;
2696             else
2697               fprintf (file, "\tst %s,[%s+%d]\n",
2698                        reg_names[i], base, offset + 4 * n_regs),
2699               n_regs += 2;
2700           else if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2701             fprintf (file, "\tst %s,[%s+%d]\n",
2702                      reg_names[i+1], base, offset + 4 * n_regs + 4),
2703             n_regs += 2;
2704         }
2705     }
2706   return n_regs;
2707 }
2708
2709 /* Restore non call used registers from LOW to HIGH at BASE+OFFSET.
2710
2711    N_REGS is the number of 4-byte regs saved thus far.  This applies even to
2712    v9 int regs as it simplifies the code.  */
2713
2714 #ifdef __GNUC__
2715 __inline__
2716 #endif
2717 static int
2718 restore_regs (file, low, high, base, offset, n_regs)
2719      FILE *file;
2720      int low, high;
2721      char *base;
2722      int offset;
2723      int n_regs;
2724 {
2725   int i;
2726
2727   if (TARGET_V9 && high <= 32)
2728     {
2729       for (i = low; i < high; i++)
2730         {
2731           if (regs_ever_live[i] && ! call_used_regs[i])
2732             fprintf (file, "\tldx [%s+%d], %s\n",
2733               base, offset + 4 * n_regs, reg_names[i]),
2734             n_regs += 2;
2735         }
2736     }
2737   else
2738     {
2739       for (i = low; i < high; i += 2)
2740         {
2741           if (regs_ever_live[i] && ! call_used_regs[i])
2742             if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2743               fprintf (file, "\tldd [%s+%d], %s\n",
2744                        base, offset + 4 * n_regs, reg_names[i]),
2745               n_regs += 2;
2746             else
2747               fprintf (file, "\tld [%s+%d],%s\n",
2748                        base, offset + 4 * n_regs, reg_names[i]),
2749               n_regs += 2;
2750           else if (regs_ever_live[i+1] && ! call_used_regs[i+1])
2751             fprintf (file, "\tld [%s+%d],%s\n",
2752                      base, offset + 4 * n_regs + 4, reg_names[i+1]),
2753             n_regs += 2;
2754         }
2755     }
2756   return n_regs;
2757 }
2758
2759 /* Static variables we want to share between prologue and epilogue.  */
2760
2761 /* Number of live general or floating point registers needed to be saved
2762    (as 4-byte quantities).  This is only done if TARGET_EPILOGUE.  */
2763 static int num_gfregs;
2764
2765 /* Compute the frame size required by the function.  This function is called
2766    during the reload pass and also by output_function_prologue().  */
2767
2768 int
2769 compute_frame_size (size, leaf_function)
2770      int size;
2771      int leaf_function;
2772 {
2773   int n_regs = 0, i;
2774   int outgoing_args_size = (current_function_outgoing_args_size
2775 #ifndef SPARCV9
2776                             + REG_PARM_STACK_SPACE (current_function_decl)
2777 #endif
2778                             );
2779
2780   if (TARGET_EPILOGUE)
2781     {
2782       /* N_REGS is the number of 4-byte regs saved thus far.  This applies
2783          even to v9 int regs to be consistent with save_regs/restore_regs.  */
2784
2785       if (TARGET_V9)
2786         {
2787           for (i = 0; i < 8; i++)
2788             if (regs_ever_live[i] && ! call_used_regs[i])
2789               n_regs += 2;
2790         }
2791       else
2792         {
2793           for (i = 0; i < 8; i += 2)
2794             if ((regs_ever_live[i] && ! call_used_regs[i])
2795                 || (regs_ever_live[i+1] && ! call_used_regs[i+1]))
2796               n_regs += 2;
2797         }
2798
2799       for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
2800         if ((regs_ever_live[i] && ! call_used_regs[i])
2801             || (regs_ever_live[i+1] && ! call_used_regs[i+1]))
2802           n_regs += 2;
2803     }
2804
2805   /* Set up values for use in `function_epilogue'.  */
2806   num_gfregs = n_regs;
2807
2808   if (leaf_function && n_regs == 0
2809       && size == 0 && current_function_outgoing_args_size == 0)
2810     {
2811       actual_fsize = apparent_fsize = 0;
2812     }
2813   else
2814     {
2815       /* We subtract STARTING_FRAME_OFFSET, remember it's negative.
2816          The stack bias (if any) is taken out to undo its effects.  */
2817       apparent_fsize = (size - STARTING_FRAME_OFFSET + SPARC_STACK_BIAS + 7) & -8;
2818       apparent_fsize += n_regs * 4;
2819       actual_fsize = apparent_fsize + ((outgoing_args_size + 7) & -8);
2820     }
2821
2822   /* Make sure nothing can clobber our register windows.
2823      If a SAVE must be done, or there is a stack-local variable,
2824      the register window area must be allocated.
2825      ??? For v9 we need an additional 8 bytes of reserved space, apparently
2826      it's needed by v8 as well.  */
2827   if (leaf_function == 0 || size > 0)
2828     actual_fsize += (16 * UNITS_PER_WORD) + 8;
2829
2830   return SPARC_STACK_ALIGN (actual_fsize);
2831 }
2832
2833 /* Build a (32 bit) big number in a register.  */
2834 /* ??? We may be able to use the set macro here too.  */
2835
2836 static void
2837 build_big_number (file, num, reg)
2838      FILE *file;
2839      int num;
2840      char *reg;
2841 {
2842   if (num >= 0 || ! TARGET_V9)
2843     {
2844       fprintf (file, "\tsethi %%hi(%d),%s\n", num, reg);
2845       if ((num & 0x3ff) != 0)
2846         fprintf (file, "\tor %s,%%lo(%d),%s\n", reg, num, reg);
2847     }
2848   else /* num < 0 && TARGET_V9 */
2849     {
2850       /* Sethi does not sign extend, so we must use a little trickery
2851          to use it for negative numbers.  Invert the constant before
2852          loading it in, then use xor immediate to invert the loaded bits
2853          (along with the upper 32 bits) to the desired constant.  This
2854          works because the sethi and immediate fields overlap.  */
2855       int asize = num;
2856       int inv = ~asize;
2857       int low = -0x400 + (asize & 0x3FF);
2858
2859       fprintf (file, "\tsethi %%hi(%d),%s\n\txor %s,%d,%s\n",
2860                inv, reg, reg, low, reg);
2861     }
2862 }
2863
2864 /* Output code for the function prologue.  */
2865
2866 void
2867 output_function_prologue (file, size, leaf_function)
2868      FILE *file;
2869      int size;
2870      int leaf_function;
2871 {
2872   /* Need to use actual_fsize, since we are also allocating
2873      space for our callee (and our own register save area).  */
2874   actual_fsize = compute_frame_size (size, leaf_function);
2875
2876   if (leaf_function)
2877     {
2878       frame_base_name = "%sp";
2879       frame_base_offset = actual_fsize + SPARC_STACK_BIAS;
2880     }
2881   else
2882     {
2883       frame_base_name = "%fp";
2884       frame_base_offset = SPARC_STACK_BIAS;
2885     }
2886
2887   /* This is only for the human reader.  */
2888   fprintf (file, "\t!#PROLOGUE# 0\n");
2889
2890   if (actual_fsize == 0)
2891     /* do nothing.  */ ;
2892   else if (actual_fsize <= 4096)
2893     {
2894       if (! leaf_function)
2895         fprintf (file, "\tsave %%sp,-%d,%%sp\n", actual_fsize);
2896       else
2897         fprintf (file, "\tadd %%sp,-%d,%%sp\n", actual_fsize);
2898     }
2899   else if (actual_fsize <= 8192)
2900     {
2901       /* For frames in the range 4097..8192, we can use just two insns.  */
2902       if (! leaf_function)
2903         {
2904           fprintf (file, "\tsave %%sp,-4096,%%sp\n");
2905           fprintf (file, "\tadd %%sp,-%d,%%sp\n", actual_fsize - 4096);
2906         }
2907       else
2908         {
2909           fprintf (file, "\tadd %%sp,-4096,%%sp\n");
2910           fprintf (file, "\tadd %%sp,-%d,%%sp\n", actual_fsize - 4096);
2911         }
2912     }
2913   else
2914     {
2915       build_big_number (file, -actual_fsize, "%g1");
2916       if (! leaf_function)
2917         fprintf (file, "\tsave %%sp,%%g1,%%sp\n");
2918       else
2919         fprintf (file, "\tadd %%sp,%%g1,%%sp\n");
2920     }
2921
2922   /* If doing anything with PIC, do it now.  */
2923   if (! flag_pic)
2924     fprintf (file, "\t!#PROLOGUE# 1\n");
2925
2926   /* Call saved registers are saved just above the outgoing argument area.  */
2927   if (num_gfregs)
2928     {
2929       int offset, n_regs;
2930       char *base;
2931
2932       offset = -apparent_fsize + frame_base_offset;
2933       if (offset < -4096 || offset + num_gfregs * 4 > 4096)
2934         {
2935           /* ??? This might be optimized a little as %g1 might already have a
2936              value close enough that a single add insn will do.  */
2937           /* ??? Although, all of this is probably only a temporary fix
2938              because if %g1 can hold a function result, then
2939              output_function_epilogue will lose (the result will get
2940              clobbered).  */
2941           build_big_number (file, offset, "%g1");
2942           fprintf (file, "\tadd %s,%%g1,%%g1\n", frame_base_name);
2943           base = "%g1";
2944           offset = 0;
2945         }
2946       else
2947         {
2948           base = frame_base_name;
2949         }
2950
2951       if (TARGET_EPILOGUE && ! leaf_function)
2952         /* ??? Originally saved regs 0-15 here.  */
2953         n_regs = save_regs (file, 0, 8, base, offset, 0);
2954       else if (leaf_function)
2955         /* ??? Originally saved regs 0-31 here.  */
2956         n_regs = save_regs (file, 0, 8, base, offset, 0);
2957       if (TARGET_EPILOGUE)
2958         save_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs);
2959     }
2960
2961   leaf_label = 0;
2962   if (leaf_function && actual_fsize != 0)
2963     {
2964       /* warning ("leaf procedure with frame size %d", actual_fsize); */
2965       if (! TARGET_EPILOGUE)
2966         leaf_label = gen_label_rtx ();
2967     }
2968 }
2969
2970 /* Output code for the function epilogue.  */
2971
2972 void
2973 output_function_epilogue (file, size, leaf_function)
2974      FILE *file;
2975      int size;
2976      int leaf_function;
2977 {
2978   char *ret;
2979
2980   if (leaf_label)
2981     {
2982       emit_label_after (leaf_label, get_last_insn ());
2983       final_scan_insn (get_last_insn (), file, 0, 0, 1);
2984     }
2985
2986   /* Restore any call saved registers.  */
2987   if (num_gfregs)
2988     {
2989       int offset, n_regs;
2990       char *base;
2991
2992       offset = -apparent_fsize + frame_base_offset;
2993       if (offset < -4096 || offset + num_gfregs * 4 > 4096 - 8 /*double*/)
2994         {
2995           build_big_number (file, offset, "%g1");
2996           fprintf (file, "\tadd %s,%%g1,%%g1\n", frame_base_name);
2997           base = "%g1";
2998           offset = 0;
2999         }
3000       else
3001         {
3002           base = frame_base_name;
3003         }
3004
3005       if (TARGET_EPILOGUE && ! leaf_function)
3006         /* ??? Originally saved regs 0-15 here.  */
3007         n_regs = restore_regs (file, 0, 8, base, offset, 0);
3008       else if (leaf_function)
3009         /* ??? Originally saved regs 0-31 here.  */
3010         n_regs = restore_regs (file, 0, 8, base, offset, 0);
3011       if (TARGET_EPILOGUE)
3012         restore_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs);
3013     }
3014
3015   /* Work out how to skip the caller's unimp instruction if required.  */
3016   if (leaf_function)
3017     ret = (SKIP_CALLERS_UNIMP_P ? "jmp %o7+12" : "retl");
3018   else
3019     ret = (SKIP_CALLERS_UNIMP_P ? "jmp %i7+12" : "ret");
3020
3021   if (TARGET_EPILOGUE || leaf_label)
3022     {
3023       int old_target_epilogue = TARGET_EPILOGUE;
3024       target_flags &= ~old_target_epilogue;
3025
3026       if (! leaf_function)
3027         {
3028           /* If we wound up with things in our delay slot, flush them here.  */
3029           if (current_function_epilogue_delay_list)
3030             {
3031               rtx insn = emit_jump_insn_after (gen_rtx (RETURN, VOIDmode),
3032                                                get_last_insn ());
3033               PATTERN (insn) = gen_rtx (PARALLEL, VOIDmode,
3034                                         gen_rtvec (2,
3035                                                    PATTERN (XEXP (current_function_epilogue_delay_list, 0)),
3036                                                    PATTERN (insn)));
3037               final_scan_insn (insn, file, 1, 0, 1);
3038             }
3039           else
3040             fprintf (file, "\t%s\n\trestore\n", ret);
3041         }
3042       /* All of the following cases are for leaf functions.  */
3043       else if (current_function_epilogue_delay_list)
3044         {
3045           /* eligible_for_epilogue_delay_slot ensures that if this is a
3046              leaf function, then we will only have insn in the delay slot
3047              if the frame size is zero, thus no adjust for the stack is
3048              needed here.  */
3049           if (actual_fsize != 0)
3050             abort ();
3051           fprintf (file, "\t%s\n", ret);
3052           final_scan_insn (XEXP (current_function_epilogue_delay_list, 0),
3053                            file, 1, 0, 1);
3054         }
3055       /* Output 'nop' instead of 'sub %sp,-0,%sp' when no frame, so as to
3056          avoid generating confusing assembly language output.  */
3057       else if (actual_fsize == 0)
3058         fprintf (file, "\t%s\n\tnop\n", ret);
3059       else if (actual_fsize <= 4096)
3060         fprintf (file, "\t%s\n\tsub %%sp,-%d,%%sp\n", ret, actual_fsize);
3061       else if (actual_fsize <= 8192)
3062         fprintf (file, "\tsub %%sp,-4096,%%sp\n\t%s\n\tsub %%sp,-%d,%%sp\n",
3063                  ret, actual_fsize - 4096);
3064       else if ((actual_fsize & 0x3ff) == 0)
3065         fprintf (file, "\tsethi %%hi(%d),%%g1\n\t%s\n\tadd %%sp,%%g1,%%sp\n",
3066                  actual_fsize, ret);
3067       else
3068         fprintf (file, "\tsethi %%hi(%d),%%g1\n\tor %%g1,%%lo(%d),%%g1\n\t%s\n\tadd %%sp,%%g1,%%sp\n",
3069                  actual_fsize, actual_fsize, ret);
3070       target_flags |= old_target_epilogue;
3071     }
3072 }
3073
3074 /* Do what is necessary for `va_start'.  The argument is ignored.
3075    !v9: We look at the current function to determine if stdarg or varargs
3076    is used and return the address of the first unnamed parameter.
3077    v9: We save the argument integer and floating point regs in a buffer, and
3078    return the address of this buffer.  The rest is handled in va-sparc.h.  */
3079 /* ??? This is currently conditioned on #ifdef SPARCV9 because
3080    current_function_args_info is different in each compiler.  */
3081
3082 #ifdef SPARCV9
3083
3084 rtx
3085 sparc_builtin_saveregs (arglist)
3086      tree arglist;
3087 {
3088   tree fntype = TREE_TYPE (current_function_decl);
3089   /* First unnamed integer register.  */
3090   int first_intreg = current_function_args_info.arg_count[(int) SPARC_ARG_INT];
3091   /* Number of integer registers we need to save.  */
3092   int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
3093   /* First unnamed SFmode float reg (no, you can't pass SFmode floats as
3094      unnamed arguments, we just number them that way).  We must round up to
3095      the next double word float reg - that is the first one to save.  */
3096   int first_floatreg = current_function_args_info.arg_count[(int) SPARC_ARG_FLOAT] + 1 & ~1;
3097   /* Number of SFmode float regs to save.  */
3098   int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
3099   int ptrsize = GET_MODE_SIZE (Pmode);
3100   rtx valist, regbuf, fpregs;
3101   int bufsize, adjust, regno;
3102
3103   /* Allocate block of memory for the regs.
3104      We only allocate as much as we need, but we must ensure quadword float
3105      regs are stored with the appropriate alignment.  */
3106   /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
3107      Or can assign_stack_local accept a 0 SIZE argument?  */
3108
3109   bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * (UNITS_PER_WORD / 2));
3110   /* Add space in front of the int regs to ensure proper alignment of quadword
3111      fp regs.  We must add the space in front because va_start assumes this.  */
3112   if (n_floatregs >= 4)
3113     adjust = ((n_intregs + first_floatreg / 2) % 2) * UNITS_PER_WORD;
3114   else
3115     adjust = 0;
3116
3117   regbuf = assign_stack_local (BLKmode, bufsize + adjust,
3118                                GET_MODE_BITSIZE (TFmode));
3119   regbuf = gen_rtx (MEM, BLKmode, plus_constant (XEXP (regbuf, 0), adjust));
3120   MEM_IN_STRUCT_P (regbuf) = 1;
3121
3122   /* Save int args.
3123      This is optimized to only save the regs that are necessary.  Explicitly
3124      named args need not be saved.  */
3125
3126   if (n_intregs > 0)
3127     move_block_from_reg (BASE_INCOMING_ARG_REG (SImode) + first_intreg,
3128                          regbuf, n_intregs, n_intregs * UNITS_PER_WORD);
3129
3130   /* Save float args.
3131      This is optimized to only save the regs that are necessary.  Explicitly
3132      named args need not be saved.
3133      We explicitly build a pointer to the buffer because it halves the insn
3134      count when not optimizing (otherwise the pointer is built for each reg
3135      saved).  */
3136
3137   fpregs = gen_reg_rtx (Pmode);
3138   emit_move_insn (fpregs, plus_constant (XEXP (regbuf, 0),
3139                                          n_intregs * UNITS_PER_WORD));
3140   for (regno = first_floatreg; regno < NPARM_REGS (SFmode); regno += 2)
3141     emit_move_insn (gen_rtx (MEM, DFmode,
3142                              plus_constant (fpregs,
3143                                             GET_MODE_SIZE (SFmode)
3144                                             * (regno - first_floatreg))),
3145                     gen_rtx (REG, DFmode,
3146                              BASE_INCOMING_ARG_REG (DFmode) + regno));
3147
3148   /* Return the address of the regbuf.  */
3149
3150   return XEXP (regbuf, 0);
3151 }
3152
3153 #else /* ! SPARCV9 */
3154
3155 rtx
3156 sparc_builtin_saveregs (arglist)
3157      tree arglist;
3158 {
3159   tree fntype = TREE_TYPE (current_function_decl);
3160   int stdarg = (TYPE_ARG_TYPES (fntype) != 0
3161                 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3162                     != void_type_node));
3163   int first_reg = current_function_args_info;
3164   rtx address;
3165   int regno;
3166
3167 #if 0 /* This code seemed to have no effect except to make
3168          varargs not work right when va_list wasn't the first arg.  */
3169   if (! stdarg)
3170     first_reg = 0;
3171 #endif
3172
3173   for (regno = first_reg; regno < NPARM_REGS (SImode); regno++)
3174     emit_move_insn (gen_rtx (MEM, word_mode,
3175                              gen_rtx (PLUS, Pmode,
3176                                       frame_pointer_rtx,
3177                                       GEN_INT (STACK_POINTER_OFFSET
3178                                                + UNITS_PER_WORD * regno))),
3179                     gen_rtx (REG, word_mode, BASE_INCOMING_ARG_REG (word_mode)
3180                              + regno));
3181
3182   address = gen_rtx (PLUS, Pmode,
3183                      frame_pointer_rtx,
3184                      GEN_INT (STACK_POINTER_OFFSET
3185                               + UNITS_PER_WORD * first_reg));
3186
3187   return address;
3188 }
3189
3190 #endif /* ! SPARCV9 */
3191 \f
3192 /* Return the string to output a conditional branch to LABEL, which is
3193    the operand number of the label.  OP is the conditional expression.  The
3194    mode of register 0 says what kind of comparison we made.
3195
3196    FP_COND_REG indicates which fp condition code register to use if this is
3197    a floating point branch.
3198
3199    REVERSED is non-zero if we should reverse the sense of the comparison.
3200
3201    ANNUL is non-zero if we should generate an annulling branch.
3202
3203    NOOP is non-zero if we have to follow this branch by a noop.  */
3204
3205 char *
3206 output_cbranch (op, fp_cond_reg, label, reversed, annul, noop)
3207      rtx op, fp_cond_reg;
3208      int label;
3209      int reversed, annul, noop;
3210 {
3211   static char string[20];
3212   enum rtx_code code = GET_CODE (op);
3213   enum machine_mode mode = GET_MODE (XEXP (op, 0));
3214   static char v8_labelno[] = " %lX";
3215   static char v9_icc_labelno[] = " %%icc,%lX";
3216   static char v9_xcc_labelno[] = " %%xcc,%lX";
3217   static char v9_fcc_labelno[] = " %%fccX,%lY";
3218   char *labelno;
3219   int labeloff;
3220
3221   /* ??? !v9: FP branches cannot be preceded by another floating point insn.
3222      Because there is currently no concept of pre-delay slots, we can fix
3223      this only by always emitting a nop before a floating point branch.  */
3224
3225   if ((mode == CCFPmode || mode == CCFPEmode) && ! TARGET_V9)
3226     strcpy (string, "nop\n\t");
3227   else
3228     string[0] = '\0';
3229
3230   /* If not floating-point or if EQ or NE, we can just reverse the code.  */
3231   if (reversed
3232       && ((mode != CCFPmode && mode != CCFPEmode) || code == EQ || code == NE))
3233     code = reverse_condition (code), reversed = 0;
3234
3235   /* Start by writing the branch condition.  */
3236   switch (code)
3237     {
3238     case NE:
3239       if (mode == CCFPmode || mode == CCFPEmode)
3240         strcat (string, "fbne");
3241       else
3242         strcpy (string, "bne");
3243       break;
3244
3245     case EQ:
3246       if (mode == CCFPmode || mode == CCFPEmode)
3247         strcat (string, "fbe");
3248       else
3249         strcpy (string, "be");
3250       break;
3251
3252     case GE:
3253       if (mode == CCFPmode || mode == CCFPEmode)
3254         {
3255           if (reversed)
3256             strcat (string, "fbul");
3257           else
3258             strcat (string, "fbge");
3259         }
3260       else if (mode == CC_NOOVmode)
3261         strcpy (string, "bpos");
3262       else
3263         strcpy (string, "bge");
3264       break;
3265
3266     case GT:
3267       if (mode == CCFPmode || mode == CCFPEmode)
3268         {
3269           if (reversed)
3270             strcat (string, "fbule");
3271           else
3272             strcat (string, "fbg");
3273         }
3274       else
3275         strcpy (string, "bg");
3276       break;
3277
3278     case LE:
3279       if (mode == CCFPmode || mode == CCFPEmode)
3280         {
3281           if (reversed)
3282             strcat (string, "fbug");
3283           else
3284             strcat (string, "fble");
3285         }
3286       else
3287         strcpy (string, "ble");
3288       break;
3289
3290     case LT:
3291       if (mode == CCFPmode || mode == CCFPEmode)
3292         {
3293           if (reversed)
3294             strcat (string, "fbuge");
3295           else
3296             strcat (string, "fbl");
3297         }
3298       else if (mode == CC_NOOVmode)
3299         strcpy (string, "bneg");
3300       else
3301         strcpy (string, "bl");
3302       break;
3303
3304     case GEU:
3305       strcpy (string, "bgeu");
3306       break;
3307
3308     case GTU:
3309       strcpy (string, "bgu");
3310       break;
3311
3312     case LEU:
3313       strcpy (string, "bleu");
3314       break;
3315
3316     case LTU:
3317       strcpy (string, "blu");
3318       break;
3319     }
3320
3321   /* Now add the annulling, the label, and a possible noop.  */
3322   if (annul)
3323     strcat (string, ",a");
3324
3325   /* ??? If v9, optional prediction bit ",pt" or ",pf" goes here.  */
3326
3327   if (! TARGET_V9)
3328     {
3329       labeloff = 3;
3330       labelno = v8_labelno;
3331     }
3332   else
3333     {
3334       labeloff = 9;
3335       if (mode == CCFPmode || mode == CCFPEmode)
3336         {
3337           labeloff = 10;
3338           labelno = v9_fcc_labelno;
3339           /* Set the char indicating the number of the fcc reg to use.  */
3340           labelno[6] = REGNO (fp_cond_reg) - 96 + '0';
3341         }
3342       else if (mode == CCXmode || mode == CCX_NOOVmode)
3343         labelno = v9_xcc_labelno;
3344       else
3345         labelno = v9_icc_labelno;
3346     }
3347   /* Set the char indicating the number of the operand containing the
3348      label_ref.  */
3349   labelno[labeloff] = label + '0';
3350   strcat (string, labelno);
3351
3352   if (noop)
3353     strcat (string, "\n\tnop");
3354
3355   return string;
3356 }
3357
3358 /* Return the string to output a conditional branch to LABEL, testing
3359    register REG.  LABEL is the operand number of the label; REG is the
3360    operand number of the reg.  OP is the conditional expression.  The mode
3361    of REG says what kind of comparison we made.
3362
3363    REVERSED is non-zero if we should reverse the sense of the comparison.
3364
3365    ANNUL is non-zero if we should generate an annulling branch.
3366
3367    NOOP is non-zero if we have to follow this branch by a noop.  */
3368
3369 char *
3370 output_v9branch (op, reg, label, reversed, annul, noop)
3371      rtx op;
3372      int reg, label;
3373      int reversed, annul, noop;
3374 {
3375   static char string[20];
3376   enum rtx_code code = GET_CODE (op);
3377   enum machine_mode mode = GET_MODE (XEXP (op, 0));
3378   static char labelno[] = " %X,%lX";
3379
3380   /* If not floating-point or if EQ or NE, we can just reverse the code.  */
3381   if (reversed)
3382     code = reverse_condition (code), reversed = 0;
3383
3384   /* Only 64 bit versions of these instructions exist.  */
3385   if (mode != DImode)
3386     abort ();
3387
3388   /* Start by writing the branch condition.  */
3389
3390   switch (code)
3391     {
3392     case NE:
3393       strcpy (string, "brnz");
3394       break;
3395
3396     case EQ:
3397       strcpy (string, "brz");
3398       break;
3399
3400     case GE:
3401       strcpy (string, "brgez");
3402       break;
3403
3404     case LT:
3405       strcpy (string, "brlz");
3406       break;
3407
3408     case LE:
3409       strcpy (string, "brlez");
3410       break;
3411
3412     case GT:
3413       strcpy (string, "brgz");
3414       break;
3415
3416     default:
3417       abort ();
3418     }
3419
3420   /* Now add the annulling, reg, label, and nop.  */
3421   if (annul)
3422     strcat (string, ",a");
3423
3424   /* ??? Optional prediction bit ",pt" or ",pf" goes here.  */
3425
3426   labelno[2] = reg + '0';
3427   labelno[6] = label + '0';
3428   strcat (string, labelno);
3429
3430   if (noop)
3431     strcat (string, "\n\tnop");
3432
3433   return string;
3434 }
3435
3436 /* Output assembler code to return from a function.  */
3437
3438 /* ??? v9: Update to use the new `return' instruction.  Also, add patterns to
3439    md file for the `return' instruction.  */
3440
3441 char *
3442 output_return (operands)
3443      rtx *operands;
3444 {
3445   if (leaf_label)
3446     {
3447       operands[0] = leaf_label;
3448       return "b,a %l0";
3449     }
3450   else if (leaf_function)
3451     {
3452       /* If we didn't allocate a frame pointer for the current function,
3453          the stack pointer might have been adjusted.  Output code to
3454          restore it now.  */
3455
3456       operands[0] = gen_rtx (CONST_INT, VOIDmode, actual_fsize);
3457
3458       /* Use sub of negated value in first two cases instead of add to
3459          allow actual_fsize == 4096.  */
3460
3461       if (actual_fsize <= 4096)
3462         {
3463           if (SKIP_CALLERS_UNIMP_P)
3464             return "jmp %%o7+12\n\tsub %%sp,-%0,%%sp";
3465           else
3466             return "retl\n\tsub %%sp,-%0,%%sp";
3467         }
3468       else if (actual_fsize <= 8192)
3469         {
3470           operands[0] = gen_rtx (CONST_INT, VOIDmode, actual_fsize - 4096);
3471           if (SKIP_CALLERS_UNIMP_P)
3472             return "sub %%sp,-4096,%%sp\n\tjmp %%o7+12\n\tsub %%sp,-%0,%%sp";
3473           else
3474             return "sub %%sp,-4096,%%sp\n\tretl\n\tsub %%sp,-%0,%%sp";
3475         }
3476       else if (SKIP_CALLERS_UNIMP_P)
3477         {
3478           if ((actual_fsize & 0x3ff) != 0)
3479             return "sethi %%hi(%a0),%%g1\n\tor %%g1,%%lo(%a0),%%g1\n\tjmp %%o7+12\n\tadd %%sp,%%g1,%%sp";
3480           else
3481             return "sethi %%hi(%a0),%%g1\n\tjmp %%o7+12\n\tadd %%sp,%%g1,%%sp";
3482         }
3483       else
3484         {
3485           if ((actual_fsize & 0x3ff) != 0)
3486             return "sethi %%hi(%a0),%%g1\n\tor %%g1,%%lo(%a0),%%g1\n\tretl\n\tadd %%sp,%%g1,%%sp";
3487           else
3488             return "sethi %%hi(%a0),%%g1\n\tretl\n\tadd %%sp,%%g1,%%sp";
3489         }
3490     }
3491   else
3492     {
3493       if (SKIP_CALLERS_UNIMP_P)
3494         return "jmp %%i7+12\n\trestore";
3495       else
3496         return "ret\n\trestore";
3497     }
3498 }
3499 \f
3500 /* Leaf functions and non-leaf functions have different needs.  */
3501
3502 static int
3503 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
3504
3505 static int
3506 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
3507
3508 static int *reg_alloc_orders[] = {
3509   reg_leaf_alloc_order,
3510   reg_nonleaf_alloc_order};
3511
3512 void
3513 order_regs_for_local_alloc ()
3514 {
3515   static int last_order_nonleaf = 1;
3516
3517   if (regs_ever_live[15] != last_order_nonleaf)
3518     {
3519       last_order_nonleaf = !last_order_nonleaf;
3520       bcopy ((char *) reg_alloc_orders[last_order_nonleaf],
3521              (char *) reg_alloc_order, FIRST_PSEUDO_REGISTER * sizeof (int));
3522     }
3523 }
3524 \f
3525 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
3526    This makes them candidates for using ldd and std insns.
3527
3528    Note reg1 and reg2 *must* be hard registers.  To be sure we will
3529    abort if we are passed pseudo registers.  */
3530
3531 int
3532 registers_ok_for_ldd_peep (reg1, reg2)
3533      rtx reg1, reg2;
3534 {
3535   /* We might have been passed a SUBREG.  */
3536   if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
3537     return 0;
3538
3539   if (REGNO (reg1) % 2 != 0)
3540     return 0;
3541
3542   return (REGNO (reg1) == REGNO (reg2) - 1);
3543 }
3544
3545 /* Return 1 if addr1 and addr2 are suitable for use in an ldd or
3546    std insn.
3547
3548    This can only happen when addr1 and addr2 are consecutive memory
3549    locations (addr1 + 4 == addr2).  addr1 must also be aligned on a
3550    64 bit boundary (addr1 % 8 == 0).
3551
3552    We know %sp and %fp are kept aligned on a 64 bit boundary.  Other
3553    registers are assumed to *never* be properly aligned and are
3554    rejected.
3555
3556    Knowing %sp and %fp are kept aligned on a 64 bit boundary, we
3557    need only check that the offset for addr1 % 8 == 0.  */
3558
3559 int
3560 addrs_ok_for_ldd_peep (addr1, addr2)
3561       rtx addr1, addr2;
3562 {
3563   int reg1, offset1;
3564
3565   /* Extract a register number and offset (if used) from the first addr.  */
3566   if (GET_CODE (addr1) == PLUS)
3567     {
3568       /* If not a REG, return zero.  */
3569       if (GET_CODE (XEXP (addr1, 0)) != REG)
3570         return 0;
3571       else
3572         {
3573           reg1 = REGNO (XEXP (addr1, 0));
3574           /* The offset must be constant!  */
3575           if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
3576             return 0;
3577           offset1 = INTVAL (XEXP (addr1, 1));
3578         }
3579     }
3580   else if (GET_CODE (addr1) != REG)
3581     return 0;
3582   else
3583     {
3584       reg1 = REGNO (addr1);
3585       /* This was a simple (mem (reg)) expression.  Offset is 0.  */
3586       offset1 = 0;
3587     }
3588
3589   /* Make sure the second address is a (mem (plus (reg) (const_int).  */
3590   if (GET_CODE (addr2) != PLUS)
3591     return 0;
3592
3593   if (GET_CODE (XEXP (addr2, 0)) != REG
3594       || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
3595     return 0;
3596
3597   /* Only %fp and %sp are allowed.  Additionally both addresses must
3598      use the same register.  */
3599   if (reg1 != FRAME_POINTER_REGNUM && reg1 != STACK_POINTER_REGNUM)
3600     return 0;
3601
3602   if (reg1 != REGNO (XEXP (addr2, 0)))
3603     return 0;
3604
3605   /* The first offset must be evenly divisible by 8 to ensure the
3606      address is 64 bit aligned.  */
3607   if (offset1 % 8 != 0)
3608     return 0;
3609
3610   /* The offset for the second addr must be 4 more than the first addr.  */
3611   if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
3612     return 0;
3613
3614   /* All the tests passed.  addr1 and addr2 are valid for ldd and std
3615      instructions.  */
3616   return 1;
3617 }
3618
3619 /* Return 1 if reg is a pseudo, or is the first register in
3620    a hard register pair.  This makes it a candidate for use in
3621    ldd and std insns.  */
3622
3623 int
3624 register_ok_for_ldd (reg)
3625      rtx reg;
3626 {
3627   /* We might have been passed a SUBREG.  */
3628   if (GET_CODE (reg) != REG)
3629     return 0;
3630
3631   if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
3632     return (REGNO (reg) % 2 == 0);
3633   else
3634     return 1;
3635 }
3636 \f
3637 /* Print operand X (an rtx) in assembler syntax to file FILE.
3638    CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
3639    For `%' followed by punctuation, CODE is the punctuation and X is null.  */
3640
3641 void
3642 print_operand (file, x, code)
3643      FILE *file;
3644      rtx x;
3645      int code;
3646 {
3647   switch (code)
3648     {
3649     case '#':
3650       /* Output a 'nop' if there's nothing for the delay slot.  */
3651       if (dbr_sequence_length () == 0)
3652         fputs ("\n\tnop", file);
3653       return;
3654     case '*':
3655       /* Output an annul flag if there's nothing for the delay slot and we
3656          are optimizing.  This is always used with '(' below.  */
3657       /* Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
3658          this is a dbx bug.  So, we only do this when optimizing.  */
3659       if (dbr_sequence_length () == 0 && optimize)
3660         fputs (",a", file);
3661       return;
3662     case '(':
3663       /* Output a 'nop' if there's nothing for the delay slot and we are
3664          not optimizing.  This is always used with '*' above.  */
3665       if (dbr_sequence_length () == 0 && ! optimize)
3666         fputs ("\n\tnop", file);
3667       return;
3668     case '_':
3669       /* Output the Medium/Anywhere code model base register.  */
3670       fputs (MEDANY_BASE_REG, file);
3671       return;
3672     case '@':
3673       /* Print out what we are using as the frame pointer.  This might
3674          be %fp, or might be %sp+offset.  */
3675       /* ??? What if offset is too big? Perhaps the caller knows it isn't? */
3676       fprintf (file, "%s+%d", frame_base_name, frame_base_offset);
3677       return;
3678     case 'Y':
3679       /* Adjust the operand to take into account a RESTORE operation.  */
3680       if (GET_CODE (x) != REG)
3681         output_operand_lossage ("Invalid %%Y operand");
3682       else if (REGNO (x) < 8)
3683         fputs (reg_names[REGNO (x)], file);
3684       else if (REGNO (x) >= 24 && REGNO (x) < 32)
3685         fputs (reg_names[REGNO (x)-16], file);
3686       else
3687         output_operand_lossage ("Invalid %%Y operand");
3688       return;
3689     case 'R':
3690       /* Print out the second register name of a register pair or quad.
3691          I.e., R (%o0) => %o1.  */
3692       fputs (reg_names[REGNO (x)+1], file);
3693       return;
3694     case 'S':
3695       /* Print out the third register name of a register quad.
3696          I.e., S (%o0) => %o2.  */
3697       fputs (reg_names[REGNO (x)+2], file);
3698       return;
3699     case 'T':
3700       /* Print out the fourth register name of a register quad.
3701          I.e., T (%o0) => %o3.  */
3702       fputs (reg_names[REGNO (x)+3], file);
3703       return;
3704     case 'm':
3705       /* Print the operand's address only.  */
3706       output_address (XEXP (x, 0));
3707       return;
3708     case 'r':
3709       /* In this case we need a register.  Use %g0 if the
3710          operand is const0_rtx.  */
3711       if (x == const0_rtx
3712           || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
3713         {
3714           fputs ("%g0", file);
3715           return;
3716         }
3717       else
3718         break;
3719
3720     case 'A':
3721       switch (GET_CODE (x))
3722         {
3723         case IOR: fputs ("or", file); break;
3724         case AND: fputs ("and", file); break;
3725         case XOR: fputs ("xor", file); break;
3726         default: output_operand_lossage ("Invalid %%A operand");
3727         }
3728       return;
3729
3730     case 'B':
3731       switch (GET_CODE (x))
3732         {
3733         case IOR: fputs ("orn", file); break;
3734         case AND: fputs ("andn", file); break;
3735         case XOR: fputs ("xnor", file); break;
3736         default: output_operand_lossage ("Invalid %%B operand");
3737         }
3738       return;
3739
3740       /* This is used by the conditional move instructions.  */
3741     case 'C':
3742       switch (GET_CODE (x))
3743         {
3744         case NE: fputs ("ne", file); break;
3745         case EQ: fputs ("e", file); break;
3746         case GE: fputs ("ge", file); break;
3747         case GT: fputs ("g", file); break;
3748         case LE: fputs ("le", file); break;
3749         case LT: fputs ("l", file); break;
3750         case GEU: fputs ("geu", file); break;
3751         case GTU: fputs ("gu", file); break;
3752         case LEU: fputs ("leu", file); break;
3753         case LTU: fputs ("lu", file); break;
3754         default: output_operand_lossage ("Invalid %%C operand");
3755         }
3756       return;
3757
3758       /* This is used by the movr instruction pattern.  */
3759     case 'D':
3760       switch (GET_CODE (x))
3761         {
3762         case NE: fputs ("ne", file); break;
3763         case EQ: fputs ("e", file); break;
3764         case GE: fputs ("gez", file); break;
3765         case LT: fputs ("lz", file); break;
3766         case LE: fputs ("lez", file); break;
3767         case GT: fputs ("gz", file); break;
3768         default: output_operand_lossage ("Invalid %%D operand");
3769         }
3770       return;
3771
3772     case 'b':
3773       {
3774         /* Print a sign-extended character.  */
3775         int i = INTVAL (x) & 0xff;
3776         if (i & 0x80)
3777           i |= 0xffffff00;
3778         fprintf (file, "%d", i);
3779         return;
3780       }
3781
3782     case 'f':
3783       /* Operand must be a MEM; write its address.  */
3784       if (GET_CODE (x) != MEM)
3785         output_operand_lossage ("Invalid %%f operand");
3786       output_address (XEXP (x, 0));
3787       return;
3788
3789     case 0:
3790       /* Do nothing special.  */
3791       break;
3792
3793     default:
3794       /* Undocumented flag.  */
3795       output_operand_lossage ("invalid operand output code");
3796     }
3797
3798   if (GET_CODE (x) == REG)
3799     fputs (reg_names[REGNO (x)], file);
3800   else if (GET_CODE (x) == MEM)
3801     {
3802       fputc ('[', file);
3803       if (CONSTANT_P (XEXP (x, 0)))
3804         /* Poor Sun assembler doesn't understand absolute addressing.  */
3805         fputs ("%g0+", file);
3806       output_address (XEXP (x, 0));
3807       fputc (']', file);
3808     }
3809   else if (GET_CODE (x) == HIGH)
3810     {
3811       fputs ("%hi(", file);
3812       output_addr_const (file, XEXP (x, 0));
3813       fputc (')', file);
3814     }
3815   else if (GET_CODE (x) == LO_SUM)
3816     {
3817       print_operand (file, XEXP (x, 0), 0);
3818       fputs ("+%lo(", file);
3819       output_addr_const (file, XEXP (x, 1));
3820       fputc (')', file);
3821     }
3822   else if (GET_CODE (x) == CONST_DOUBLE
3823            && (GET_MODE (x) == VOIDmode
3824                || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
3825     {
3826       if (CONST_DOUBLE_HIGH (x) == 0)
3827         fprintf (file, "%u", CONST_DOUBLE_LOW (x));
3828       else if (CONST_DOUBLE_HIGH (x) == -1
3829                && CONST_DOUBLE_LOW (x) < 0)
3830         fprintf (file, "%d", CONST_DOUBLE_LOW (x));
3831       else
3832         output_operand_lossage ("long long constant not a valid immediate operand");
3833     }
3834   else if (GET_CODE (x) == CONST_DOUBLE)
3835     output_operand_lossage ("floating point constant not a valid immediate operand");
3836   else { output_addr_const (file, x); }
3837 }
3838 \f
3839 /* This function outputs assembler code for VALUE to FILE, where VALUE is
3840    a 64 bit (DImode) value.  */
3841
3842 /* ??? If there is a 64 bit counterpart to .word that the assembler
3843    understands, then using that would simply this code greatly.  */
3844 /* ??? We only output .xword's for symbols and only then in environments
3845    where the assembler can handle them.  */
3846
3847 void
3848 output_double_int (file, value)
3849      FILE *file;
3850      rtx value;
3851 {
3852   if (GET_CODE (value) == CONST_INT)
3853     {
3854       if (INTVAL (value) < 0)
3855         ASM_OUTPUT_INT (file, constm1_rtx);
3856       else
3857         ASM_OUTPUT_INT (file, const0_rtx);
3858       ASM_OUTPUT_INT (file, value);
3859     }
3860   else if (GET_CODE (value) == CONST_DOUBLE)
3861     {
3862       ASM_OUTPUT_INT (file, gen_rtx (CONST_INT, VOIDmode,
3863                                      CONST_DOUBLE_HIGH (value)));
3864       ASM_OUTPUT_INT (file, gen_rtx (CONST_INT, VOIDmode,
3865                                      CONST_DOUBLE_LOW (value)));
3866     }
3867   else if (GET_CODE (value) == SYMBOL_REF
3868            || GET_CODE (value) == CONST
3869            || GET_CODE (value) == PLUS
3870            || (TARGET_V9 &&
3871                (GET_CODE (value) == LABEL_REF
3872                 || GET_CODE (value) == MINUS)))
3873     {
3874       if (!TARGET_V9 || TARGET_ENV32)
3875         {
3876           ASM_OUTPUT_INT (file, const0_rtx);
3877           ASM_OUTPUT_INT (file, value);
3878         }
3879       else
3880         {
3881           fprintf (file, "\t%s\t", ASM_LONGLONG);
3882           output_addr_const (file, value);
3883           fprintf (file, "\n");
3884         }
3885     }
3886   else
3887     abort ();
3888 }
3889 \f
3890 /* Return the value of a code used in the .proc pseudo-op that says
3891    what kind of result this function returns.  For non-C types, we pick
3892    the closest C type.  */
3893
3894 #ifndef CHAR_TYPE_SIZE
3895 #define CHAR_TYPE_SIZE BITS_PER_UNIT
3896 #endif
3897
3898 #ifndef SHORT_TYPE_SIZE
3899 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
3900 #endif
3901
3902 #ifndef INT_TYPE_SIZE
3903 #define INT_TYPE_SIZE BITS_PER_WORD
3904 #endif
3905
3906 #ifndef LONG_TYPE_SIZE
3907 #define LONG_TYPE_SIZE BITS_PER_WORD
3908 #endif
3909
3910 #ifndef LONG_LONG_TYPE_SIZE
3911 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
3912 #endif
3913
3914 #ifndef FLOAT_TYPE_SIZE
3915 #define FLOAT_TYPE_SIZE BITS_PER_WORD
3916 #endif
3917
3918 #ifndef DOUBLE_TYPE_SIZE
3919 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
3920 #endif
3921
3922 #ifndef LONG_DOUBLE_TYPE_SIZE
3923 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
3924 #endif
3925
3926 unsigned long
3927 sparc_type_code (type)
3928      register tree type;
3929 {
3930   register unsigned long qualifiers = 0;
3931   register unsigned shift = 6;
3932
3933   /* Only the first 30 bits of the qualifer are valid.  We must refrain from
3934      setting more, since some assemblers will give an error for this.  Also,
3935      we must be careful to avoid shifts of 32 bits or more to avoid getting
3936      unpredictable results.  */
3937
3938   for (;;)
3939     {
3940       switch (TREE_CODE (type))
3941         {
3942         case ERROR_MARK:
3943           return qualifiers;
3944
3945         case ARRAY_TYPE:
3946           if (shift < 30)
3947             qualifiers |= (3 << shift);
3948           shift += 2;
3949           type = TREE_TYPE (type);
3950           break;
3951
3952         case FUNCTION_TYPE:
3953         case METHOD_TYPE:
3954           if (shift < 30)
3955             qualifiers |= (2 << shift);
3956           shift += 2;
3957           type = TREE_TYPE (type);
3958           break;
3959
3960         case POINTER_TYPE:
3961         case REFERENCE_TYPE:
3962         case OFFSET_TYPE:
3963           if (shift < 30)
3964             qualifiers |= (1 << shift);
3965           shift += 2;
3966           type = TREE_TYPE (type);
3967           break;
3968
3969         case RECORD_TYPE:
3970           return (qualifiers | 8);
3971
3972         case UNION_TYPE:
3973         case QUAL_UNION_TYPE:
3974           return (qualifiers | 9);
3975
3976         case ENUMERAL_TYPE:
3977           return (qualifiers | 10);
3978
3979         case VOID_TYPE:
3980           return (qualifiers | 16);
3981
3982         case INTEGER_TYPE:
3983           /* If this is a range type, consider it to be the underlying
3984              type.  */
3985           if (TREE_TYPE (type) != 0)
3986             {
3987               type = TREE_TYPE (type);
3988               break;
3989             }
3990
3991           /* Carefully distinguish all the standard types of C,
3992              without messing up if the language is not C.  We do this by
3993              testing TYPE_PRECISION and TREE_UNSIGNED.  The old code used to
3994              look at both the names and the above fields, but that's redundant.
3995              Any type whose size is between two C types will be considered
3996              to be the wider of the two types.  Also, we do not have a
3997              special code to use for "long long", so anything wider than
3998              long is treated the same.  Note that we can't distinguish
3999              between "int" and "long" in this code if they are the same
4000              size, but that's fine, since neither can the assembler.  */
4001
4002           if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
4003             return (qualifiers | (TREE_UNSIGNED (type) ? 12 : 2));
4004
4005           else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
4006             return (qualifiers | (TREE_UNSIGNED (type) ? 13 : 3));
4007
4008           else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
4009             return (qualifiers | (TREE_UNSIGNED (type) ? 14 : 4));
4010
4011           else
4012             return (qualifiers | (TREE_UNSIGNED (type) ? 15 : 5));
4013
4014         case REAL_TYPE:
4015           /* Carefully distinguish all the standard types of C,
4016              without messing up if the language is not C.  */
4017
4018           if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
4019             return (qualifiers | 6);
4020
4021           else
4022             return (qualifiers | 7);
4023
4024         case COMPLEX_TYPE:      /* GNU Fortran COMPLEX type.  */
4025           /* ??? We need to distinguish between double and float complex types,
4026              but I don't know how yet because I can't reach this code from
4027              existing front-ends.  */
4028           return (qualifiers | 7);      /* Who knows? */
4029
4030         case CHAR_TYPE:         /* GNU Pascal CHAR type.  Not used in C.  */
4031         case BOOLEAN_TYPE:      /* GNU Fortran BOOLEAN type.  */
4032         case FILE_TYPE:         /* GNU Pascal FILE type.  */
4033         case SET_TYPE:          /* GNU Pascal SET type.  */
4034         case LANG_TYPE:         /* ? */
4035           return qualifiers;
4036
4037         default:
4038           abort ();             /* Not a type! */
4039         }
4040     }
4041 }
4042 \f
4043 /* Nested function support.  */
4044
4045 /* Emit RTL insns to initialize the variable parts of a trampoline.
4046    FNADDR is an RTX for the address of the function's pure code.
4047    CXT is an RTX for the static chain value for the function.
4048
4049    This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
4050    (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
4051    (to store insns).  This is a bit excessive.  Perhaps a different
4052    mechanism would be better here.
4053
4054    Emit 3 FLUSH instructions to synchonize the data and instruction caches.
4055
4056    ??? v9: We assume the top 32 bits of function addresses are 0.  */
4057
4058 void
4059 sparc_initialize_trampoline (tramp, fnaddr, cxt)
4060      rtx tramp, fnaddr, cxt;
4061 {
4062   rtx high_cxt = expand_shift (RSHIFT_EXPR, SImode, cxt,
4063                               size_int (10), 0, 1);
4064   rtx high_fn = expand_shift (RSHIFT_EXPR, SImode, fnaddr,
4065                              size_int (10), 0, 1);
4066   rtx low_cxt = expand_and (cxt, gen_rtx (CONST_INT, VOIDmode, 0x3ff), 0);
4067   rtx low_fn = expand_and (fnaddr, gen_rtx (CONST_INT, VOIDmode, 0x3ff), 0);
4068   rtx g1_sethi = gen_rtx (HIGH, SImode,
4069                           gen_rtx (CONST_INT, VOIDmode, 0x03000000));
4070   rtx g2_sethi = gen_rtx (HIGH, SImode,
4071                           gen_rtx (CONST_INT, VOIDmode, 0x05000000));
4072   rtx g1_ori = gen_rtx (HIGH, SImode,
4073                         gen_rtx (CONST_INT, VOIDmode, 0x82106000));
4074   rtx g2_ori = gen_rtx (HIGH, SImode,
4075                         gen_rtx (CONST_INT, VOIDmode, 0x8410A000));
4076   rtx tem = gen_reg_rtx (SImode);
4077   emit_move_insn (tem, g1_sethi);
4078   emit_insn (gen_iorsi3 (high_fn, high_fn, tem));
4079   emit_move_insn (gen_rtx (MEM, SImode, plus_constant (tramp, 0)), high_fn);
4080   emit_move_insn (tem, g1_ori);
4081   emit_insn (gen_iorsi3 (low_fn, low_fn, tem));
4082   emit_move_insn (gen_rtx (MEM, SImode, plus_constant (tramp, 4)), low_fn);
4083   emit_move_insn (tem, g2_sethi);
4084   emit_insn (gen_iorsi3 (high_cxt, high_cxt, tem));
4085   emit_move_insn (gen_rtx (MEM, SImode, plus_constant (tramp, 8)), high_cxt);
4086   emit_move_insn (tem, g2_ori);
4087   emit_insn (gen_iorsi3 (low_cxt, low_cxt, tem));
4088   emit_move_insn (gen_rtx (MEM, SImode, plus_constant (tramp, 16)), low_cxt);
4089   emit_insn (gen_flush (validize_mem (gen_rtx (MEM, SImode, tramp))));
4090   emit_insn (gen_flush (validize_mem (gen_rtx (MEM, SImode,
4091                                                plus_constant (tramp, 8)))));
4092   emit_insn (gen_flush (validize_mem (gen_rtx (MEM, SImode,
4093                                                plus_constant (tramp, 16)))));
4094 }
4095
4096 void
4097 sparc64_initialize_trampoline (tramp, fnaddr, cxt)
4098      rtx tramp, fnaddr, cxt;
4099 {
4100   rtx fnaddrdi = gen_reg_rtx (Pmode);
4101   rtx fnaddrsi = (emit_move_insn (fnaddrdi, fnaddr),
4102                 gen_rtx (SUBREG, SImode, fnaddrdi, 0));
4103   rtx cxtdi = gen_reg_rtx (Pmode);
4104   rtx cxtsi = (emit_move_insn (cxtdi, cxt),
4105                 gen_rtx (SUBREG, SImode, cxtdi, 0));
4106   rtx high_cxt = expand_shift (RSHIFT_EXPR, SImode, cxtsi,
4107                               size_int (10), 0, 1);
4108   rtx high_fn = expand_shift (RSHIFT_EXPR, SImode, fnaddrsi,
4109                              size_int (10), 0, 1);
4110   rtx low_cxt = expand_and (cxtsi, gen_rtx (CONST_INT, VOIDmode, 0x3ff), 0);
4111   rtx low_fn = expand_and (fnaddrsi, gen_rtx (CONST_INT, VOIDmode, 0x3ff), 0);
4112   rtx g1_sethi = gen_rtx (HIGH, SImode,
4113                           gen_rtx (CONST_INT, VOIDmode, 0x03000000));
4114   rtx g2_sethi = gen_rtx (HIGH, SImode,
4115                           gen_rtx (CONST_INT, VOIDmode, 0x05000000));
4116   rtx g1_ori = gen_rtx (HIGH, SImode,
4117                         gen_rtx (CONST_INT, VOIDmode, 0x82106000));
4118   rtx g2_ori = gen_rtx (HIGH, SImode,
4119                         gen_rtx (CONST_INT, VOIDmode, 0x8410A000));
4120   rtx tem = gen_reg_rtx (SImode);
4121   emit_move_insn (tem, g2_sethi);
4122   emit_insn (gen_iorsi3 (high_fn, high_fn, tem));
4123   emit_move_insn (gen_rtx (MEM, SImode, plus_constant (tramp, 0)), high_fn);
4124   emit_move_insn (tem, g2_ori);
4125   emit_insn (gen_iorsi3 (low_fn, low_fn, tem));
4126   emit_move_insn (gen_rtx (MEM, SImode, plus_constant (tramp, 4)), low_fn);
4127   emit_move_insn (tem, g1_sethi);
4128   emit_insn (gen_iorsi3 (high_cxt, high_cxt, tem));
4129   emit_move_insn (gen_rtx (MEM, SImode, plus_constant (tramp, 8)), high_cxt);
4130   emit_move_insn (tem, g1_ori);
4131   emit_insn (gen_iorsi3 (low_cxt, low_cxt, tem));
4132   emit_move_insn (gen_rtx (MEM, SImode, plus_constant (tramp, 16)), low_cxt);
4133   emit_insn (gen_rtx (UNSPEC_VOLATILE, VOIDmode,
4134                       gen_rtvec (1, plus_constant (tramp, 0)),
4135                       3));
4136   emit_insn (gen_rtx (UNSPEC_VOLATILE, VOIDmode,
4137                       gen_rtvec (1, plus_constant (tramp, 8)),
4138                       3));
4139   emit_insn (gen_rtx (UNSPEC_VOLATILE, VOIDmode,
4140                       gen_rtvec (1, plus_constant (tramp, 16)),
4141                       3));
4142 }
4143 \f
4144 /* Subroutines to support a flat (single) register window calling
4145    convention.  */
4146
4147 /* Single-register window sparc stack frames look like:
4148
4149              Before call                        After call
4150         +-----------------------+       +-----------------------+
4151    high |                       |       |                       |
4152    mem  |  caller's temps.      |       |  caller's temps.      |
4153         |                       |       |                       |
4154         +-----------------------+       +-----------------------+
4155         |                       |       |                       |
4156         |  arguments on stack.  |       |  arguments on stack.  |
4157         |                       |       |                       |
4158         +-----------------------+FP+92->+-----------------------+
4159         |  6 words to save      |       |  6 words to save      |
4160         |  arguments passed     |       |  arguments passed     |
4161         |  in registers, even   |       |  in registers, even   |
4162         |  if not passed.       |       |  if not passed.       |
4163  SP+68->+-----------------------+FP+68->+-----------------------+
4164         | 1 word struct addr    |       | 1 word struct addr    |
4165         +-----------------------+FP+64->+-----------------------+
4166         |                       |       |                       |
4167         | 16 word reg save area |       | 16 word reg save area |
4168         |                       |       |                       |
4169     SP->+-----------------------+   FP->+-----------------------+
4170                                         | 4 word area for       |
4171                                         | fp/alu reg moves      |
4172                                  FP-16->+-----------------------+
4173                                         |                       |
4174                                         |  local variables      |
4175                                         |                       |
4176                                         +-----------------------+
4177                                         |                       |
4178                                         |  fp register save     |
4179                                         |                       |
4180                                         +-----------------------+
4181                                         |                       |
4182                                         |  gp register save     |
4183                                         |                       |
4184                                         +-----------------------+
4185                                         |                       |
4186                                         |  alloca allocations   |
4187                                         |                       |
4188                                         +-----------------------+
4189                                         |                       |
4190                                         |  arguments on stack   |
4191                                         |                       |
4192                                  SP+92->+-----------------------+
4193                                         |  6 words to save      |
4194                                         |  arguments passed     |
4195                                         |  in registers, even   |
4196    low                                  |  if not passed.       |
4197    memory                        SP+68->+-----------------------+
4198                                         | 1 word struct addr    |
4199                                  SP+64->+-----------------------+
4200                                         |                       |
4201                                         I 16 word reg save area |
4202                                         |                       |
4203                                     SP->+-----------------------+  */
4204
4205 /* Structure to be filled in by sparc_flat_compute_frame_size with register
4206    save masks, and offsets for the current function.  */
4207
4208 struct sparc_frame_info
4209 {
4210   unsigned long total_size;     /* # bytes that the entire frame takes up.  */
4211   unsigned long var_size;       /* # bytes that variables take up.  */
4212   unsigned long args_size;      /* # bytes that outgoing arguments take up.  */
4213   unsigned long extra_size;     /* # bytes of extra gunk.  */
4214   unsigned int  gp_reg_size;    /* # bytes needed to store gp regs.  */
4215   unsigned int  fp_reg_size;    /* # bytes needed to store fp regs.  */
4216   unsigned long gmask;          /* Mask of saved gp registers.  */
4217   unsigned long fmask;          /* Mask of saved fp registers.  */
4218   unsigned long reg_offset;     /* Offset from new sp to store regs.  */
4219   int           initialized;    /* Nonzero if frame size already calculated.  */
4220 };
4221
4222 /* Current frame information calculated by sparc_flat_compute_frame_size.  */
4223 struct sparc_frame_info current_frame_info;
4224
4225 /* Zero structure to initialize current_frame_info.  */
4226 struct sparc_frame_info zero_frame_info;
4227
4228 /* Tell prologue and epilogue if register REGNO should be saved / restored.  */
4229
4230 #define RETURN_ADDR_REGNUM 15
4231 #define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM))
4232 #define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM))
4233
4234 #define MUST_SAVE_REGISTER(regno) \
4235  ((regs_ever_live[regno] && !call_used_regs[regno])             \
4236   || (regno == FRAME_POINTER_REGNUM && frame_pointer_needed)    \
4237   || (regno == RETURN_ADDR_REGNUM && regs_ever_live[RETURN_ADDR_REGNUM]))
4238
4239 /* Return the bytes needed to compute the frame pointer from the current
4240    stack pointer.  */
4241
4242 unsigned long
4243 sparc_flat_compute_frame_size (size)
4244      int size;                  /* # of var. bytes allocated.  */
4245 {
4246   int regno;
4247   unsigned long total_size;     /* # bytes that the entire frame takes up.  */
4248   unsigned long var_size;       /* # bytes that variables take up.  */
4249   unsigned long args_size;      /* # bytes that outgoing arguments take up.  */
4250   unsigned long extra_size;     /* # extra bytes.  */
4251   unsigned int  gp_reg_size;    /* # bytes needed to store gp regs.  */
4252   unsigned int  fp_reg_size;    /* # bytes needed to store fp regs.  */
4253   unsigned long gmask;          /* Mask of saved gp registers.  */
4254   unsigned long fmask;          /* Mask of saved fp registers.  */
4255   unsigned long reg_offset;     /* Offset to register save area.  */
4256   int           need_aligned_p; /* 1 if need the save area 8 byte aligned.  */
4257
4258   /* This is the size of the 16 word reg save area, 1 word struct addr
4259      area, and 4 word fp/alu register copy area.  */
4260   extra_size     = -STARTING_FRAME_OFFSET + FIRST_PARM_OFFSET(0);
4261   var_size       = size;
4262   /* Also include the size needed for the 6 parameter registers.  */
4263   args_size      = current_function_outgoing_args_size + 24;
4264   total_size     = var_size + args_size + extra_size;
4265   gp_reg_size    = 0;
4266   fp_reg_size    = 0;
4267   gmask          = 0;
4268   fmask          = 0;
4269   reg_offset     = 0;
4270   need_aligned_p = 0;
4271
4272   /* Calculate space needed for gp registers.  */
4273   for (regno = 1; regno <= 31; regno++)
4274     {
4275       if (MUST_SAVE_REGISTER (regno))
4276         {
4277           /* If we need to save two regs in a row, ensure there's room to bump
4278              up the address to align it to a doubleword boundary.  */
4279           if ((regno & 0x1) == 0 && MUST_SAVE_REGISTER (regno+1))
4280             {
4281               if (gp_reg_size % 8 != 0)
4282                 gp_reg_size += 4;
4283               gp_reg_size += 2 * UNITS_PER_WORD;
4284               gmask |= 3 << regno;
4285               regno++;
4286               need_aligned_p = 1;
4287             }
4288           else
4289             {
4290               gp_reg_size += UNITS_PER_WORD;
4291               gmask |= 1 << regno;
4292             }
4293         }
4294     }
4295
4296   /* Calculate space needed for fp registers.  */
4297   for (regno = 32; regno <= 63; regno++)
4298     {
4299       if (regs_ever_live[regno] && !call_used_regs[regno])
4300         {
4301           fp_reg_size += UNITS_PER_WORD;
4302           fmask |= 1 << (regno - 32);
4303         }
4304     }
4305
4306   if (gmask || fmask)
4307     {
4308       int n;
4309       reg_offset = FIRST_PARM_OFFSET(0) + args_size;
4310       /* Ensure save area is 8 byte aligned if we need it.  */
4311       n = reg_offset % 8;
4312       if (need_aligned_p && n != 0)
4313         {
4314           total_size += 8 - n;
4315           reg_offset += 8 - n;
4316         }
4317       total_size += gp_reg_size + fp_reg_size;
4318     }
4319
4320   /* ??? This looks a little suspicious.  Clarify.  */
4321   if (total_size == extra_size)
4322     total_size = extra_size = 0;
4323
4324   total_size = SPARC_STACK_ALIGN (total_size);
4325
4326   /* Save other computed information.  */
4327   current_frame_info.total_size  = total_size;
4328   current_frame_info.var_size    = var_size;
4329   current_frame_info.args_size   = args_size;
4330   current_frame_info.extra_size  = extra_size;
4331   current_frame_info.gp_reg_size = gp_reg_size;
4332   current_frame_info.fp_reg_size = fp_reg_size;
4333   current_frame_info.gmask       = gmask;
4334   current_frame_info.fmask       = fmask;
4335   current_frame_info.reg_offset  = reg_offset;
4336   current_frame_info.initialized = reload_completed;
4337
4338   /* Ok, we're done.  */
4339   return total_size;
4340 }
4341 \f
4342 /* Save/restore registers in GMASK and FMASK at register BASE_REG plus offset
4343    OFFSET.
4344
4345    BASE_REG must be 8 byte aligned.  This allows us to test OFFSET for
4346    appropriate alignment and use DOUBLEWORD_OP when we can.  We assume
4347    [BASE_REG+OFFSET] will always be a valid address.
4348
4349    WORD_OP is either "st" for save, "ld" for restore.
4350    DOUBLEWORD_OP is either "std" for save, "ldd" for restore.  */
4351
4352 void
4353 sparc_flat_save_restore (file, base_reg, offset, gmask, fmask, word_op, doubleword_op)
4354      FILE *file;
4355      char *base_reg;
4356      unsigned int offset;
4357      unsigned long gmask;
4358      unsigned long fmask;
4359      char *word_op;
4360      char *doubleword_op;
4361 {
4362   int regno;
4363
4364   if (gmask == 0 && fmask == 0)
4365     return;
4366
4367   /* Save registers starting from high to low.  We've already saved the
4368      previous frame pointer and previous return address for the debugger's
4369      sake.  The debugger allows us to not need a nop in the epilog if at least
4370      one register is reloaded in addition to return address.  */
4371
4372   if (gmask)
4373     {
4374       for (regno = 1; regno <= 31; regno++)
4375         {
4376           if ((gmask & (1L << regno)) != 0)
4377             {
4378               if ((regno & 0x1) == 0 && ((gmask & (1L << (regno+1))) != 0))
4379                 {
4380                   /* We can save two registers in a row.  If we're not at a
4381                      double word boundary, move to one.
4382                      sparc_flat_compute_frame_size ensures there's room to do
4383                      this.  */
4384                   if (offset % 8 != 0)
4385                     offset += UNITS_PER_WORD;
4386
4387                   if (word_op[0] == 's')
4388                     fprintf (file, "\t%s %s,[%s+%d]\n",
4389                              doubleword_op, reg_names[regno],
4390                              base_reg, offset);
4391                   else
4392                     fprintf (file, "\t%s [%s+%d],%s\n",
4393                              doubleword_op, base_reg, offset,
4394                              reg_names[regno]);
4395
4396                   offset += 2 * UNITS_PER_WORD;
4397                   regno++;
4398                 }
4399               else
4400                 {
4401                   if (word_op[0] == 's')
4402                     fprintf (file, "\t%s %s,[%s+%d]\n",
4403                              word_op, reg_names[regno],
4404                              base_reg, offset);
4405                   else
4406                     fprintf (file, "\t%s [%s+%d],%s\n",
4407                              word_op, base_reg, offset, reg_names[regno]);
4408
4409                   offset += UNITS_PER_WORD;
4410                 }
4411             }
4412         }
4413     }
4414
4415   if (fmask)
4416     {
4417       for (regno = 32; regno <= 63; regno++)
4418         {
4419           if ((fmask & (1L << (regno - 32))) != 0)
4420             {
4421               if (word_op[0] == 's')
4422                 fprintf (file, "\t%s %s,[%s+%d]\n",
4423                          word_op, reg_names[regno],
4424                          base_reg, offset);
4425               else
4426                 fprintf (file, "\t%s [%s+%d],%s\n",
4427                          word_op, base_reg, offset, reg_names[regno]);
4428
4429               offset += UNITS_PER_WORD;
4430             }
4431         }
4432     }
4433 }
4434 \f
4435 /* Set up the stack and frame (if desired) for the function.  */
4436
4437 void
4438 sparc_flat_output_function_prologue (file, size)
4439      FILE *file;
4440      int size;
4441 {
4442   char *sp_str = reg_names[STACK_POINTER_REGNUM];
4443   unsigned long gmask = current_frame_info.gmask;
4444
4445   /* This is only for the human reader.  */
4446   fprintf (file, "\t!#PROLOGUE# 0\n");
4447   fprintf (file, "\t!# vars= %d, regs= %d/%d, args= %d, extra= %d\n",
4448            current_frame_info.var_size,
4449            current_frame_info.gp_reg_size / 4,
4450            current_frame_info.fp_reg_size / 4,
4451            current_function_outgoing_args_size,
4452            current_frame_info.extra_size);
4453
4454   size = SPARC_STACK_ALIGN (size);
4455   size = (! current_frame_info.initialized
4456           ? sparc_flat_compute_frame_size (size)
4457           : current_frame_info.total_size);
4458
4459   /* These cases shouldn't happen.  Catch them now.  */
4460   if (size == 0 && (gmask || current_frame_info.fmask))
4461     abort ();
4462
4463   /* Allocate our stack frame by decrementing %sp.
4464      At present, the only algorithm gdb can use to determine if this is a
4465      flat frame is if we always set %i7 if we set %sp.  This can be optimized
4466      in the future by putting in some sort of debugging information that says
4467      this is a `flat' function.  However, there is still the case of debugging
4468      code without such debugging information (including cases where most fns
4469      have such info, but there is one that doesn't).  So, always do this now
4470      so we don't get a lot of code out there that gdb can't handle.
4471      If the frame pointer isn't needn't then that's ok - gdb won't be able to
4472      distinguish us from a non-flat function but there won't (and shouldn't)
4473      be any differences anyway.  The return pc is saved (if necessary) right
4474      after %i7 so gdb won't have to look too far to find it.  */
4475   if (size > 0)
4476     {
4477       unsigned int reg_offset = current_frame_info.reg_offset;
4478       char *fp_str = reg_names[FRAME_POINTER_REGNUM];
4479       char *t1_str = "%g1";
4480
4481       /* Things get a little tricky if local variables take up more than ~4096
4482          bytes and outgoing arguments take up more than ~4096 bytes.  When that
4483          happens, the register save area can't be accessed from either end of
4484          the frame.  Handle this by decrementing %sp to the start of the gp
4485          register save area, save the regs, update %i7, and then set %sp to its
4486          final value.  Given that we only have one scratch register to play
4487          with it is the cheapest solution, and it helps gdb out as it won't
4488          slow down recognition of flat functions.
4489          Don't change the order of insns emitted here without checking with
4490          the gdb folk first.  */
4491
4492       /* Is the entire register save area offsetable from %sp?  */
4493       if (reg_offset < 4096 - 64 * UNITS_PER_WORD)
4494         {
4495           if (size <= 4096)
4496             {
4497               fprintf (file, "\tadd %s,%d,%s\n",
4498                        sp_str, -size, sp_str);
4499               if (gmask & FRAME_POINTER_MASK)
4500                 {
4501                   fprintf (file, "\tst %s,[%s+%d]\n",
4502                            fp_str, sp_str, reg_offset);
4503                   fprintf (file, "\tsub %s,%d,%s\t!# set up frame pointer\n",
4504                            sp_str, -size, fp_str);
4505                   reg_offset += 4;
4506                 }
4507             }
4508           else
4509             {
4510               fprintf (file, "\tset %d,%s\n\tsub %s,%s,%s\n",
4511                        size, t1_str, sp_str, t1_str, sp_str);
4512               if (gmask & FRAME_POINTER_MASK)
4513                 {
4514                   fprintf (file, "\tst %s,[%s+%d]\n",
4515                            fp_str, sp_str, reg_offset);
4516                   fprintf (file, "\tadd %s,%s,%s\t!# set up frame pointer\n",
4517                            sp_str, t1_str, fp_str);
4518                   reg_offset += 4;
4519                 }
4520             }
4521           if (gmask & RETURN_ADDR_MASK)
4522             {
4523               fprintf (file, "\tst %s,[%s+%d]\n",
4524                        reg_names[RETURN_ADDR_REGNUM], sp_str, reg_offset);
4525               reg_offset += 4;
4526             }
4527           sparc_flat_save_restore (file, sp_str, reg_offset,
4528                                    gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
4529                                    current_frame_info.fmask,
4530                                    "st", "std");
4531         }
4532       else
4533         {
4534           /* Subtract %sp in two steps, but make sure there is always a
4535              64 byte register save area, and %sp is properly aligned.  */
4536           /* Amount to decrement %sp by, the first time.  */
4537           unsigned int size1 = ((size - reg_offset + 64) + 15) & -16;
4538           /* Offset to register save area from %sp.  */
4539           unsigned int offset = size1 - (size - reg_offset);
4540
4541           if (size1 <= 4096)
4542             {
4543               fprintf (file, "\tadd %s,%d,%s\n",
4544                        sp_str, -size1, sp_str);
4545               if (gmask & FRAME_POINTER_MASK)
4546                 {
4547                   fprintf (file, "\tst %s,[%s+%d]\n\tsub %s,%d,%s\t!# set up frame pointer\n",
4548                            fp_str, sp_str, offset, sp_str, -size1, fp_str);
4549                   offset += 4;
4550                 }
4551             }
4552           else
4553             {
4554               fprintf (file, "\tset %d,%s\n\tsub %s,%s,%s\n",
4555                        size1, t1_str, sp_str, t1_str, sp_str);
4556               if (gmask & FRAME_POINTER_MASK)
4557                 {
4558                   fprintf (file, "\tst %s,[%s+%d]\n\tadd %s,%s,%s\t!# set up frame pointer\n",
4559                            fp_str, sp_str, offset, sp_str, t1_str, fp_str);
4560                   offset += 4;
4561                 }
4562             }
4563           if (gmask & RETURN_ADDR_MASK)
4564             {
4565               fprintf (file, "\tst %s,[%s+%d]\n",
4566                        reg_names[RETURN_ADDR_REGNUM], sp_str, offset);
4567               offset += 4;
4568             }
4569           sparc_flat_save_restore (file, sp_str, offset,
4570                                    gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
4571                                    current_frame_info.fmask,
4572                                    "st", "std");
4573           fprintf (file, "\tset %d,%s\n\tsub %s,%s,%s\n",
4574                    size - size1, t1_str, sp_str, t1_str, sp_str);
4575         }
4576     }
4577
4578   fprintf (file, "\t!#PROLOGUE# 1\n");
4579 }
4580 \f
4581 /* Do any necessary cleanup after a function to restore stack, frame,
4582    and regs. */
4583
4584 void
4585 sparc_flat_output_function_epilogue (file, size)
4586      FILE *file;
4587      int size;
4588 {
4589   rtx epilogue_delay = current_function_epilogue_delay_list;
4590   int noepilogue = FALSE;
4591
4592   /* This is only for the human reader.  */
4593   fprintf (file, "\t!#EPILOGUE#\n");
4594
4595   /* The epilogue does not depend on any registers, but the stack
4596      registers, so we assume that if we have 1 pending nop, it can be
4597      ignored, and 2 it must be filled (2 nops occur for integer
4598      multiply and divide).  */
4599
4600   size = SPARC_STACK_ALIGN (size);
4601   size = (!current_frame_info.initialized
4602            ? sparc_flat_compute_frame_size (size)
4603            : current_frame_info.total_size);
4604
4605   if (size == 0 && epilogue_delay == 0)
4606     {
4607       rtx insn = get_last_insn ();
4608
4609       /* If the last insn was a BARRIER, we don't have to write any code
4610          because a jump (aka return) was put there.  */
4611       if (GET_CODE (insn) == NOTE)
4612         insn = prev_nonnote_insn (insn);
4613       if (insn && GET_CODE (insn) == BARRIER)
4614         noepilogue = TRUE;
4615     }
4616
4617   if (!noepilogue)
4618     {
4619       unsigned int reg_offset = current_frame_info.reg_offset;
4620       unsigned int size1;
4621       char *sp_str = reg_names[STACK_POINTER_REGNUM];
4622       char *fp_str = reg_names[FRAME_POINTER_REGNUM];
4623       char *t1_str = "%g1";
4624
4625       /* In the reload sequence, we don't need to fill the load delay
4626          slots for most of the loads, also see if we can fill the final
4627          delay slot if not otherwise filled by the reload sequence.  */
4628
4629       if (size > 4095)
4630         fprintf (file, "\tset %d,%s\n", size, t1_str);
4631
4632       if (frame_pointer_needed)
4633         {
4634           if (size > 4095)
4635             fprintf (file,"\tsub %s,%s,%s\t\t!# sp not trusted here\n",
4636                      fp_str, t1_str, sp_str);
4637           else
4638             fprintf (file,"\tsub %s,%d,%s\t\t!# sp not trusted here\n",
4639                      fp_str, size, sp_str);
4640         }
4641
4642       /* Is the entire register save area offsetable from %sp?  */
4643       if (reg_offset < 4096 - 64 * UNITS_PER_WORD)
4644         {
4645           size1 = 0;
4646         }
4647       else
4648         {
4649           /* Restore %sp in two steps, but make sure there is always a
4650              64 byte register save area, and %sp is properly aligned.  */
4651           /* Amount to increment %sp by, the first time.  */
4652           size1 = ((reg_offset - 64 - 16) + 15) & -16;
4653           /* Offset to register save area from %sp.  */
4654           reg_offset = size1 - reg_offset;
4655
4656           fprintf (file, "\tset %d,%s\n\tadd %s,%s,%s\n",
4657                    size1, t1_str, sp_str, t1_str, sp_str);
4658         }
4659
4660       /* We must restore the frame pointer and return address reg first
4661          because they are treated specially by the prologue output code.  */
4662       if (current_frame_info.gmask & FRAME_POINTER_MASK)
4663         {
4664           fprintf (file, "\tld [%s+%d],%s\n",
4665                    sp_str, reg_offset, fp_str);
4666           reg_offset += 4;
4667         }
4668       if (current_frame_info.gmask & RETURN_ADDR_MASK)
4669         {
4670           fprintf (file, "\tld [%s+%d],%s\n",
4671                    sp_str, reg_offset, reg_names[RETURN_ADDR_REGNUM]);
4672           reg_offset += 4;
4673         }
4674
4675       /* Restore any remaining saved registers.  */
4676       sparc_flat_save_restore (file, sp_str, reg_offset,
4677                                current_frame_info.gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
4678                                current_frame_info.fmask,
4679                                "ld", "ldd");
4680
4681       /* If we had to increment %sp in two steps, record it so the second
4682          restoration in the epilogue finishes up.  */
4683       if (size1 > 0)
4684         {
4685           size -= size1;
4686           if (size > 4095)
4687             fprintf (file, "\tset %d,%s\n",
4688                      size, t1_str);
4689         }
4690
4691       if (current_function_returns_struct)
4692         fprintf (file, "\tjmp %%o7+12\n");
4693       else
4694         fprintf (file, "\tretl\n");
4695
4696       /* If the only register saved is the return address, we need a
4697          nop, unless we have an instruction to put into it.  Otherwise
4698          we don't since reloading multiple registers doesn't reference
4699          the register being loaded.  */
4700
4701       if (epilogue_delay)
4702         {
4703           if (size)
4704             abort ();
4705           final_scan_insn (XEXP (epilogue_delay, 0), file, 1, -2, 1);
4706         }
4707
4708       else if (size > 4095)
4709         fprintf (file, "\tadd %s,%s,%s\n", sp_str, t1_str, sp_str);
4710
4711       else if (size > 0)
4712         fprintf (file, "\tadd %s,%d,%s\n", sp_str, size, sp_str);
4713
4714       else
4715         fprintf (file, "\tnop\n");
4716     }
4717
4718   /* Reset state info for each function.  */
4719   current_frame_info = zero_frame_info;
4720 }
4721 \f
4722 /* Define the number of delay slots needed for the function epilogue.
4723
4724    On the sparc, we need a slot if either no stack has been allocated,
4725    or the only register saved is the return register.  */
4726
4727 int
4728 sparc_flat_epilogue_delay_slots ()
4729 {
4730   if (!current_frame_info.initialized)
4731     (void) sparc_flat_compute_frame_size (get_frame_size ());
4732
4733   if (current_frame_info.total_size == 0)
4734     return 1;
4735
4736   return 0;
4737 }
4738
4739 /* Return true is TRIAL is a valid insn for the epilogue delay slot.
4740    Any single length instruction which doesn't reference the stack or frame
4741    pointer is OK.  */
4742
4743 int
4744 sparc_flat_eligible_for_epilogue_delay (trial, slot)
4745      rtx trial;
4746      int slot;
4747 {
4748   if (get_attr_length (trial) == 1
4749       && ! reg_mentioned_p (stack_pointer_rtx, PATTERN (trial))
4750       && ! reg_mentioned_p (frame_pointer_rtx, PATTERN (trial)))
4751     return 1;
4752   return 0;
4753 }
4754 \f
4755 /* Adjust the cost of a scheduling dependency.  Return the new cost of
4756    a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
4757
4758 int
4759 supersparc_adjust_cost (insn, link, dep_insn, cost)
4760      rtx insn;
4761      rtx link;
4762      rtx dep_insn;
4763      int cost;
4764 {
4765   enum attr_type insn_type;
4766
4767   if (! recog_memoized (insn))
4768     return 0;
4769
4770   insn_type = get_attr_type (insn);
4771
4772   if (REG_NOTE_KIND (link) == 0)
4773     {
4774       /* Data dependency; DEP_INSN writes a register that INSN reads some
4775          cycles later.  */
4776
4777       /* if a load, then the dependence must be on the memory address;
4778          add an extra 'cycle'.  Note that the cost could be two cycles
4779          if the reg was written late in an instruction group; we can't tell
4780          here.  */
4781       if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
4782         return cost + 3;
4783
4784       /* Get the delay only if the address of the store is the dependence.  */
4785       if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
4786         {
4787           rtx pat = PATTERN(insn);
4788           rtx dep_pat = PATTERN (dep_insn);
4789
4790           if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4791             return cost;  /* This shouldn't happen!  */
4792
4793           /* The dependency between the two instructions was on the data that
4794              is being stored.  Assume that this implies that the address of the
4795              store is not dependent.  */
4796           if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
4797             return cost;
4798
4799           return cost + 3;  /* An approximation.  */
4800         }
4801
4802       /* A shift instruction cannot receive its data from an instruction
4803          in the same cycle; add a one cycle penalty.  */
4804       if (insn_type == TYPE_SHIFT)
4805         return cost + 3;   /* Split before cascade into shift.  */
4806     }
4807   else
4808     {
4809       /* Anti- or output- dependency; DEP_INSN reads/writes a register that
4810          INSN writes some cycles later.  */
4811
4812       /* These are only significant for the fpu unit; writing a fp reg before
4813          the fpu has finished with it stalls the processor.  */
4814
4815       /* Reusing an integer register causes no problems.  */
4816       if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
4817         return 0;
4818     }
4819
4820   return cost;
4821 }