gcc/config/sparc/niagara7.md

   1 ;; Scheduling description for Niagara-7
   2 ;;   Copyright (C) 2016-2018 Free Software Foundation, Inc.
   3 ;;
   4 ;; This file is part of GCC.
   5 ;;
   6 ;; GCC is free software; you can redistribute it and/or modify
   7 ;; it under the terms of the GNU General Public License as published by
   8 ;; the Free Software Foundation; either version 3, or (at your option)
   9 ;; any later version.
  10 ;;
  11 ;; GCC is distributed in the hope that it will be useful,
  12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 ;; GNU General Public License for more details.
  15 ;;
  16 ;; You should have received a copy of the GNU General Public License
  17 ;; along with GCC; see the file COPYING3.  If not see
  18 ;; <http://www.gnu.org/licenses/>.
  19
  20 (define_automaton "niagara7_0")
  21
  22 ;; The S4 core has a dual-issue queue.  This queue is divided into two
  23 ;; slots.  One instruction can be issued each cycle to each slot, and
  24 ;; up to 2 instructions are committed each cycle.  Each slot serves
  25 ;; several execution units, as depicted below:
  26 ;;
  27 ;;
  28 ;;                 m7_slot0 - Integer unit.
  29 ;;                          - Load/Store unit.
  30 ;; === QUEUE ==>
  31 ;;
  32 ;;                 m7_slot1 - Integer unit.
  33 ;;                          - Branch unit.
  34 ;;                          - Floating-point and graphics unit.
  35 ;;                          - 3-cycles crypto unit.
  36
  37 (define_cpu_unit "n7_slot0,n7_slot1" "niagara7_0")
  38
  39 ;; Some instructions stall the pipeline and avoid any other
  40 ;; instruction to be issued in the same cycle.  We assume the same for
  41 ;; multi-instruction insns.
  42
  43 (define_reservation "n7_single_issue" "n7_slot0 + n7_slot1")
  44
  45 (define_insn_reservation "n7_single" 1
  46   (and (eq_attr "cpu" "niagara7")
  47     (eq_attr "type" "multi,savew,flushw,trap"))
  48   "n7_single_issue")
  49
  50 ;; Most of the instructions executing in the integer unit have a
  51 ;; latency of 1.
  52
  53 (define_insn_reservation "n7_integer" 1
  54   (and (eq_attr "cpu" "niagara7")
  55     (eq_attr "type" "ialu,ialuX,shift,cmove,compare"))
  56   "(n7_slot0 | n7_slot1)")
  57
  58 ;; Flushing the instruction memory takes 27 cycles.
  59
  60 (define_insn_reservation "n7_iflush" 27
  61   (and (eq_attr "cpu" "niagara7")
  62        (eq_attr "type" "iflush"))
  63   "(n7_slot0 | n7_slot1), nothing*26")
  64
  65 ;; The integer multiplication instructions have a latency of 12 cycles
  66 ;; and execute in the integer unit.
  67 ;;
  68 ;; Likewise for array*, edge* and pdistn instructions.
  69
  70 (define_insn_reservation "n7_imul" 12
  71   (and (eq_attr "cpu" "niagara7")
  72     (eq_attr "type" "imul,array,edge,edgen,pdistn"))
  73   "(n7_slot0 | n7_slot1), nothing*11")
  74
  75 ;; The integer division instructions have a latency of 35 cycles and
  76 ;; execute in the integer unit.
  77
  78 (define_insn_reservation "n7_idiv" 35
  79   (and (eq_attr "cpu" "niagara7")
  80     (eq_attr "type" "idiv"))
  81   "(n7_slot0 | n7_slot1), nothing*34")
  82
  83 ;; Both integer and floating-point load instructions have a latency of
  84 ;; 5 cycles, and execute in the slot0.
  85 ;;
  86 ;; The prefetch instruction also executes in the load/store unit, but
  87 ;; its latency is only 1 cycle.
  88
  89 (define_insn_reservation "n7_load" 5
  90   (and (eq_attr "cpu" "niagara7")
  91        (ior (eq_attr "type" "fpload,sload")
  92             (and (eq_attr "type" "load")
  93                  (eq_attr "subtype" "regular"))))
  94   "n7_slot0, nothing*4")
  95
  96 (define_insn_reservation "n7_prefetch" 1
  97   (and (eq_attr "cpu" "niagara7")
  98        (eq_attr "type" "load")
  99        (eq_attr "subtype" "prefetch"))
 100   "n7_slot0")
 101
 102 ;; Both integer and floating-point store instructions have a latency
 103 ;; of 1 cycle, and execute in the load/store unit in slot0.
 104
 105 (define_insn_reservation "n7_store" 1
 106   (and (eq_attr "cpu" "niagara7")
 107     (eq_attr "type" "store,fpstore"))
 108   "n7_slot0")
 109
 110 ;; Control-transfer instructions execute in the Branch Unit in the
 111 ;; slot1.
 112
 113 (define_insn_reservation "n7_cti" 1
 114   (and (eq_attr "cpu" "niagara7")
 115     (eq_attr "type" "cbcond,uncond_cbcond,branch,call,sibcall,call_no_delay_slot,uncond_branch,return"))
 116   "n7_slot1")
 117
 118 ;; Many instructions executing in the Floating-point and Graphics unit
 119 ;; in the slot1 feature a latency of 11 cycles.
 120
 121 (define_insn_reservation "n7_fp" 11
 122   (and (eq_attr "cpu" "niagara7")
 123        (ior (eq_attr "type" "fpmove,fpcmove,fpcrmove,fp,fpcmp,fpmul,fgm_pack,fgm_mul,pdist")
 124             (and (eq_attr "type" "fga")
 125                  (eq_attr "subtype" "fpu,maxmin"))))
 126   "n7_slot1, nothing*10")
 127
 128 ;; Floating-point division and floating-point square-root instructions
 129 ;; have high latencies.  They execute in the floating-point and
 130 ;; graphics unit in the slot1.
 131
 132
 133 (define_insn_reservation "n7_fpdivs" 24
 134   (and (eq_attr "cpu" "niagara7")
 135        (eq_attr "type" "fpdivs,fpsqrts"))
 136   "n7_slot1, nothing*23")
 137
 138 (define_insn_reservation "n7_fpdivd" 37
 139   (and (eq_attr "cpu" "niagara7")
 140     (eq_attr "type" "fpdivd,fpsqrtd"))
 141   "n7_slot1, nothing*36")
 142
 143 ;; SIMD VIS instructions executing in the Floating-point and graphics
 144 ;; unit (FPG) in slot1 usually have a latency of either 11 or 12
 145 ;; cycles.
 146 ;;
 147 ;; However, the latency for many instructions is only 3 cycles if the
 148 ;; consumer can also be executed in 3 cycles.  We model this with a
 149 ;; bypass.  In these cases the instructions are executed in the
 150 ;; 3-cycle crypto unit which also serves slot1.
 151
 152 (define_insn_reservation "n7_vis_11cycles" 11
 153   (and (eq_attr "cpu" "niagara7")
 154        (ior (and (eq_attr "type" "fga")
 155                  (eq_attr "subtype" "addsub64,other"))
 156             (and (eq_attr "type" "vismv")
 157                  (eq_attr "subtype" "double,single"))
 158             (and (eq_attr "type" "visl")
 159                  (eq_attr "subtype" "double,single"))))
 160   "n7_slot1, nothing*10")
 161
 162 (define_insn_reservation "n7_vis_12cycles" 12
 163   (and (eq_attr "cpu" "niagara7")
 164        (ior (eq_attr "type" "bmask,viscmp")
 165             (and (eq_attr "type" "fga")
 166                  (eq_attr "subtype" "cmask"))
 167             (and (eq_attr "type" "vismv")
 168                  (eq_attr "subtype" "movstouw"))))
 169   "n7_slot1, nothing*11")
 170
 171 (define_bypass 3 "n7_vis_*" "n7_vis_*")
 172
 173 ;; Some other VIS instructions have a latency of 12 cycles, and won't
 174 ;; be executed in the 3-cycle crypto pipe.
 175
 176 (define_insn_reservation "n7_lzd" 12
 177   (and (eq_attr "cpu" "niagara7")
 178        (ior (eq_attr "type" "lzd,")
 179             (and (eq_attr "type" "gsr")
 180                  (eq_attr "subtype" "alignaddr"))))
 181   "n7_slot1, nothing*11")
 182
 183 ;; A couple of VIS instructions feature very low latencies in the M7.
 184
 185 (define_insn_reservation "n7_single_vis" 1
 186   (and (eq_attr "cpu" "niagara7")
 187        (eq_attr "type" "vismv")
 188        (eq_attr "subtype" "movxtod"))
 189   "n7_slot1")
 190
 191 (define_insn_reservation "n7_double_vis" 2
 192   (and (eq_attr "cpu" "niagara7")
 193        (eq_attr "type" "vismv")
 194        (eq_attr "subtype" "movdtox"))
 195   "n7_slot1, nothing")
 196
 197 ;; Reading and writing to the gsr register takes a high number of
 198 ;; cycles that is not documented in the PRM.  Let's use the same value
 199 ;; than the M8.
 200
 201 (define_insn_reservation "n7_gsr_reg" 70
 202   (and (eq_attr "cpu" "niagara7")
 203        (eq_attr "type" "gsr")
 204        (eq_attr "subtype" "reg"))
 205   "n7_slot1, nothing*70")