gcc/config/mips/10000.md

   1 ;; DFA-based pipeline description for the VR1x000.
   2 ;;   Copyright (C) 2005, 2006, 2008 Free Software Foundation, Inc.
   3 ;;
   4 ;; This file is part of GCC.
   5
   6 ;; GCC is free software; you can redistribute it and/or modify it
   7 ;; under the terms of the GNU General Public License as published
   8 ;; by the Free Software Foundation; either version 3, or (at your
   9 ;; option) any later version.
  10
  11 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  13 ;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  14 ;; License for more details.
  15
  16 ;; You should have received a copy of the GNU General Public License
  17 ;; along with GCC; see the file COPYING3.  If not see
  18 ;; <http://www.gnu.org/licenses/>.
  19
  20
  21 ;; R12K/R14K/R16K are derivatives of R10K, thus copy its description
  22 ;; until specific tuning for each is added.
  23
  24 ;; R10000 has an int queue, fp queue, address queue.
  25 ;; The int queue feeds ALU1 and ALU2.
  26 ;; The fp queue feeds the fp-adder and fp-multiplier.
  27 ;; The addr queue feeds the Load/Store unit.
  28 ;;
  29 ;; However, we define the fp-adder and fp-multiplier as
  30 ;; separate automatons, because the fp-multiplier is
  31 ;; divided into fp-multiplier, fp-division, and
  32 ;; fp-squareroot units, all of which share the same
  33 ;; issue and completion logic, yet can operate in
  34 ;; parallel.
  35 ;;
  36 ;; This is based on the model described in the R10K Manual
  37 ;; and it helps to reduce the size of the automata.
  38 (define_automaton "r10k_a_int, r10k_a_fpadder, r10k_a_addr,
  39                    r10k_a_fpmpy, r10k_a_fpdiv, r10k_a_fpsqrt")
  40
  41 (define_cpu_unit "r10k_alu1" "r10k_a_int")
  42 (define_cpu_unit "r10k_alu2" "r10k_a_int")
  43 (define_cpu_unit "r10k_fpadd" "r10k_a_fpadder")
  44 (define_cpu_unit "r10k_fpmpy" "r10k_a_fpmpy")
  45 (define_cpu_unit "r10k_fpdiv" "r10k_a_fpdiv")
  46 (define_cpu_unit "r10k_fpsqrt" "r10k_a_fpsqrt")
  47 (define_cpu_unit "r10k_loadstore" "r10k_a_addr")
  48
  49
  50 ;; R10k Loads and Stores.
  51 (define_insn_reservation "r10k_load" 2
  52   (and (eq_attr "cpu" "r10000")
  53        (eq_attr "type" "load,prefetch,prefetchx"))
  54   "r10k_loadstore")
  55
  56 (define_insn_reservation "r10k_store" 0
  57   (and (eq_attr "cpu" "r10000")
  58        (eq_attr "type" "store,fpstore,fpidxstore"))
  59   "r10k_loadstore")
  60
  61 (define_insn_reservation "r10k_fpload" 3
  62   (and (eq_attr "cpu" "r10000")
  63        (eq_attr "type" "fpload,fpidxload"))
  64   "r10k_loadstore")
  65
  66
  67 ;; Integer add/sub + logic ops, and mt hi/lo can be done by alu1 or alu2.
  68 ;; Miscellaneous arith goes here too (this is a guess).
  69 (define_insn_reservation "r10k_arith" 1
  70   (and (eq_attr "cpu" "r10000")
  71        (eq_attr "type" "arith,mthilo,slt,clz,const,nop,trap,logical"))
  72   "r10k_alu1 | r10k_alu2")
  73
  74 ;; We treat mfhilo differently, because we need to know when
  75 ;; it's HI and when it's LO.
  76 (define_insn_reservation "r10k_mfhi" 1
  77   (and (eq_attr "cpu" "r10000")
  78        (and (eq_attr "type" "mfhilo")
  79             (not (match_operand 1 "lo_operand"))))
  80   "r10k_alu1 | r10k_alu2")
  81
  82 (define_insn_reservation "r10k_mflo" 1
  83   (and (eq_attr "cpu" "r10000")
  84        (and (eq_attr "type" "mfhilo")
  85             (match_operand 1 "lo_operand")))
  86   "r10k_alu1 | r10k_alu2")
  87
  88
  89 ;; ALU1 handles shifts, branch eval, and condmove.
  90 ;;
  91 ;; Brancher is separate, but part of ALU1, but can only
  92 ;; do one branch per cycle (is this even implementable?).
  93 ;;
  94 ;; Unsure if the brancher handles jumps and calls as well, but since
  95 ;; they're related, we'll add them here for now.
  96 (define_insn_reservation "r10k_brancher" 1
  97   (and (eq_attr "cpu" "r10000")
  98        (eq_attr "type" "shift,branch,jump,call"))
  99   "r10k_alu1")
 100
 101 (define_insn_reservation "r10k_int_cmove" 1
 102   (and (eq_attr "cpu" "r10000")
 103        (and (eq_attr "type" "condmove")
 104             (eq_attr "mode" "SI,DI")))
 105   "r10k_alu1")
 106
 107
 108 ;; Coprocessor Moves.
 109 ;; mtc1/dmtc1 are handled by ALU1.
 110 ;; mfc1/dmfc1 are handled by the fp-multiplier.
 111 (define_insn_reservation "r10k_mt_xfer" 3
 112   (and (eq_attr "cpu" "r10000")
 113        (eq_attr "type" "mtc"))
 114   "r10k_alu1")
 115
 116 (define_insn_reservation "r10k_mf_xfer" 2
 117   (and (eq_attr "cpu" "r10000")
 118        (eq_attr "type" "mfc"))
 119   "r10k_fpmpy")
 120
 121
 122 ;; Only ALU2 does int multiplications and divisions.
 123 ;;
 124 ;; According to the Vr10000 series user manual,
 125 ;; integer mult and div insns can be issued one
 126 ;; cycle earlier if using register Lo.  We model
 127 ;; this by using the Lo value by default, as it
 128 ;; is the more common value, and use a bypass
 129 ;; for the Hi value when needed.
 130 ;;
 131 ;; Also of note, There are different latencies
 132 ;; for MULT/DMULT (Lo 5/Hi 6) and MULTU/DMULTU (Lo 6/Hi 7).
 133 ;; However, gcc does not have separate types
 134 ;; for these insns.  Thus to strike a balance,
 135 ;; we use the Hi latency value for imul
 136 ;; operations until the imul type can be split.
 137 (define_insn_reservation "r10k_imul_single" 6
 138   (and (eq_attr "cpu" "r10000")
 139        (and (eq_attr "type" "imul,imul3")
 140             (eq_attr "mode" "SI")))
 141   "r10k_alu2 * 6")
 142
 143 (define_insn_reservation "r10k_imul_double" 10
 144   (and (eq_attr "cpu" "r10000")
 145        (and (eq_attr "type" "imul,imul3")
 146             (eq_attr "mode" "DI")))
 147   "r10k_alu2 * 10")
 148
 149 ;; Divides keep ALU2 busy.
 150 (define_insn_reservation "r10k_idiv_single" 34
 151   (and (eq_attr "cpu" "r10000")
 152        (and (eq_attr "type" "idiv")
 153             (eq_attr "mode" "SI")))
 154   "r10k_alu2 * 35")
 155
 156 (define_insn_reservation "r10k_idiv_double" 66
 157   (and (eq_attr "cpu" "r10000")
 158        (and (eq_attr "type" "idiv")
 159             (eq_attr "mode" "DI")))
 160   "r10k_alu2 * 67")
 161
 162 (define_bypass 35 "r10k_idiv_single" "r10k_mfhi")
 163 (define_bypass 67 "r10k_idiv_double" "r10k_mfhi")
 164
 165
 166 ;; Floating point add/sub, mul, abs value, neg, comp, & moves.
 167 (define_insn_reservation "r10k_fp_miscadd" 2
 168   (and (eq_attr "cpu" "r10000")
 169        (eq_attr "type" "fadd,fabs,fneg,fcmp"))
 170   "r10k_fpadd")
 171
 172 (define_insn_reservation "r10k_fp_miscmul" 2
 173   (and (eq_attr "cpu" "r10000")
 174        (eq_attr "type" "fmul,fmove"))
 175   "r10k_fpmpy")
 176
 177 (define_insn_reservation "r10k_fp_cmove" 2
 178   (and (eq_attr "cpu" "r10000")
 179        (and (eq_attr "type" "condmove")
 180             (eq_attr "mode" "SF,DF")))
 181   "r10k_fpmpy")
 182
 183
 184 ;; The fcvt.s.[wl] insn has latency 4, repeat 2.
 185 ;; All other fcvt insns have latency 2, repeat 1.
 186 (define_insn_reservation "r10k_fcvt_single" 4
 187   (and (eq_attr "cpu" "r10000")
 188        (and (eq_attr "type" "fcvt")
 189             (eq_attr "cnv_mode" "I2S")))
 190   "r10k_fpadd * 2")
 191
 192 (define_insn_reservation "r10k_fcvt_other" 2
 193   (and (eq_attr "cpu" "r10000")
 194        (and (eq_attr "type" "fcvt")
 195             (eq_attr "cnv_mode" "!I2S")))
 196   "r10k_fpadd")
 197
 198
 199 ;; Run the fmadd insn through fp-adder first, then fp-multiplier.
 200 ;;
 201 ;; The latency for fmadd is 2 cycles if the result is used
 202 ;; by another fmadd instruction.
 203 (define_insn_reservation "r10k_fmadd" 4
 204   (and (eq_attr "cpu" "r10000")
 205        (eq_attr "type" "fmadd"))
 206   "r10k_fpadd, r10k_fpmpy")
 207
 208 (define_bypass 2 "r10k_fmadd" "r10k_fmadd")
 209
 210
 211 ;; Floating point Divisions & square roots.
 212 (define_insn_reservation "r10k_fdiv_single" 12
 213   (and (eq_attr "cpu" "r10000")
 214        (and (eq_attr "type" "fdiv,frdiv")
 215             (eq_attr "mode" "SF")))
 216   "r10k_fpdiv * 14")
 217
 218 (define_insn_reservation "r10k_fdiv_double" 19
 219   (and (eq_attr "cpu" "r10000")
 220        (and (eq_attr "type" "fdiv,frdiv")
 221             (eq_attr "mode" "DF")))
 222   "r10k_fpdiv * 21")
 223
 224 (define_insn_reservation "r10k_fsqrt_single" 18
 225   (and (eq_attr "cpu" "r10000")
 226        (and (eq_attr "type" "fsqrt")
 227             (eq_attr "mode" "SF")))
 228   "r10k_fpsqrt * 20")
 229
 230 (define_insn_reservation "r10k_fsqrt_double" 33
 231   (and (eq_attr "cpu" "r10000")
 232        (and (eq_attr "type" "fsqrt")
 233             (eq_attr "mode" "DF")))
 234   "r10k_fpsqrt * 35")
 235
 236 (define_insn_reservation "r10k_frsqrt_single" 30
 237   (and (eq_attr "cpu" "r10000")
 238        (and (eq_attr "type" "frsqrt")
 239             (eq_attr "mode" "SF")))
 240   "r10k_fpsqrt * 20")
 241
 242 (define_insn_reservation "r10k_frsqrt_double" 52
 243   (and (eq_attr "cpu" "r10000")
 244        (and (eq_attr "type" "frsqrt")
 245             (eq_attr "mode" "DF")))
 246   "r10k_fpsqrt * 35")
 247
 248
 249 ;; Handle unknown/multi insns here (this is a guess).
 250 (define_insn_reservation "r10k_unknown" 1
 251   (and (eq_attr "cpu" "r10000")
 252        (eq_attr "type" "unknown,multi"))
 253   "r10k_alu1 + r10k_alu2")