CORE: suppress some warnings
[yari.git] / shared / rtl / yari-core / stage_D.v
blob6df3e01a9667b2ac82ed62082f3d2ba41ffb7a9e
1 // -----------------------------------------------------------------------
2 //
3 // Copyright 2004,2007,2008 Tommy Thorn - All Rights Reserved
4 //
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 // Bostom MA 02111-1307, USA; either version 2 of the License, or
9 // (at your option) any later version; incorporated herein by reference.
11 // -----------------------------------------------------------------------
13 `timescale 1ns/10ps
14 `include "asm.v"
17 The decoding stage is relatively simple. It takes an instruction
18 and summerizes the different aspects. The most important task in
19 this stage is the fetch the value of the operands from the register
20 file and to forwarded from later stages if needed.
23 module stage_D(input wire clock
25 ,input wire i_valid // 0 => ignore i_instr.
26 ,input wire [31:0] i_instr // Current instr.
27 ,input wire [31:0] i_pc // Addr of current instr
28 ,input wire [31:0] i_npc // Addr of next instr
30 // Forwarding
31 ,input wire x_valid
32 ,input wire [ 5:0] x_wbr
33 ,input wire [31:0] x_res
35 ,input wire [31:0] m_pc
36 ,input wire m_valid
37 ,input wire [ 5:0] m_wbr
38 ,input wire [31:0] m_res
40 ,output reg d_valid = 0
41 ,output reg d_illegal_instr = 0 // XXX Must trap on this
42 ,output reg [31:0] d_pc = 0
43 ,output reg [31:0] d_instr = 0
44 ,output reg [31:0] d_npc = 0
45 ,output reg [ 5:0] d_opcode = 0
46 ,output reg [ 5:0] d_fn = 0
47 ,output reg [ 4:0] d_rd = 0
48 ,output reg [ 5:0] d_rs = 0
49 ,output reg [ 5:0] d_rt = 0
50 ,output reg [ 4:0] d_sa = 0
51 ,output reg [31:0] d_target = 0
52 ,output reg [ 5:0] d_wbr = 0
53 ,output reg d_has_delay_slot= 0
55 ,output wire [31:0] d_op1_val // aka d_rs_val
56 ,output wire [31:0] d_op2_val
57 ,output wire [31:0] d_rt_val // For stores
58 ,output reg [31:0] d_simm = 0
60 ,output reg d_restart = 0
61 ,output reg [31:0] d_restart_pc = 0
62 ,output reg d_flush_X = 0
64 ,output reg d_load_use_hazard = 0
66 ,input wire flush_D
67 ,output reg [31:0] perf_delay_slot_bubble = 0
68 ,output reg [47:0] perf_retired_inst = 0
70 // For debugging only
71 ,output wire i_valid_muxed
72 ,output wire [31:0] i_pc_muxed
73 ,output wire [31:0] i_instr_muxed
76 parameter debug = 0;
78 // Name various instruction fields
79 wire [ 5:0] i_opcode;
80 wire [ 5:0] i_rs, i_rt;
81 wire [ 4:0] i__rs, i__rt, i_rd;
82 wire [ 4:0] i_sa;
83 wire [ 5:0] i_fn;
86 * When we restart instructions from DE due to load-use hazards,
87 * reuse the already seen instruction rather than wait for it to
88 * tickle down from IF.
90 assign i_valid_muxed = i_valid | d_load_use_hazard;
91 assign i_pc_muxed = d_load_use_hazard ? d_pc : i_pc;
92 assign i_instr_muxed = d_load_use_hazard ? d_instr : i_instr;
93 wire [31:0] i_npc_muxed = d_load_use_hazard ? d_npc : i_npc;
95 assign {i_opcode,i__rs,i__rt,i_rd,i_sa,i_fn} = i_instr_muxed;
96 assign i_rs = {1'b1,i__rs}; // Bit 5 means valid.
97 assign i_rt = {1'b1,i__rt};
99 wire [25:0] i_offset = i_instr_muxed[25:0];
101 // Sign-extend immediate field
102 wire [31:0] i_simm = {{16{i_instr_muxed[15]}}, i_instr_muxed[15:0]};
104 wire [31:0] i_branch_target = i_npc_muxed + {i_simm[29:0], 2'h0};
105 wire [31:0] i_jump_target = {i_npc_muxed[31:28],i_offset,2'h0};
107 always @(posedge clock) d_simm <= i_simm;
109 reg d_op2_is_imm = 0;
110 always @(posedge clock)
111 d_op2_is_imm <= (i_opcode[5:3] == 1 || /* Immediate instructions */
112 i_opcode[5:3] == 4 || /* Loads */
113 i_opcode[5:3] == 5); /* Stores */
115 // Register file
116 reg [31:0] regs_A [31:0]; // Initialization is handled below
117 reg [31:0] regs_B [31:0]; // Initialization is handled below
119 reg [31:0] rs_reg_val, rt_reg_val;
121 always @(posedge clock) begin
122 if (m_valid & m_wbr[5])
123 regs_A[m_wbr[4:0]] <= m_res;
124 rs_reg_val <= regs_A[i_rs[4:0]];
127 always @(posedge clock) begin
128 if (m_valid & m_wbr[5])
129 regs_B[m_wbr[4:0]] <= m_res;
130 rt_reg_val <= regs_B[i_rt[4:0]];
133 // Stage WB is only present here for bypass
134 reg w_valid = 0;
135 reg [31:0] w_res = 0;
136 reg [ 5:0] w_wbr = 0;
137 always @(posedge clock) begin
138 w_valid <= m_valid;
139 w_wbr <= m_wbr;
140 w_res <= m_res;
143 wire d_forward_x_to_s = x_valid && d_rs == x_wbr;
144 wire d_forward_x_to_t = x_valid && d_rt == x_wbr;
145 wire d_forward_m_to_s = m_valid && d_rs == m_wbr;
146 wire d_forward_m_to_t = m_valid && d_rt == m_wbr;
147 wire d_forward_w_to_s = w_valid && d_rs == w_wbr;
148 wire d_forward_w_to_t = w_valid && d_rt == w_wbr;
150 assign d_op1_val = (d_forward_x_to_s ? x_res :
151 d_forward_m_to_s ? m_res :
152 d_forward_w_to_s ? w_res :
153 rs_reg_val);
154 assign d_rt_val = (d_forward_x_to_t ? x_res :
155 d_forward_m_to_t ? m_res :
156 d_forward_w_to_t ? w_res :
157 rt_reg_val);
159 assign d_op2_val = d_op2_is_imm ? d_simm : d_rt_val;
161 reg delay_slot_bubble = 0;
162 always @(posedge clock) begin
163 d_valid <= i_valid_muxed;
164 d_pc <= i_pc_muxed;
165 d_npc <= i_npc_muxed;
166 d_instr <= i_instr_muxed;
167 {d_opcode,d_rs,d_rt,d_rd,d_sa,d_fn} <= {i_opcode,i_rs,i_rt,i_rd,i_sa,i_fn};
168 d_restart <= 0;
169 d_flush_X <= 0;
171 // Determine write-back register. We set this to 0 for
172 // unrecognized instructions so avoid unintended effects. (Valid
173 // registers are remapped to 32 - 63 to avoid having to make a
174 // special case for r0 in the bypass network.
175 case (i_opcode[5:3])
176 0: case (i_opcode[2:0])
177 `REG: d_wbr <= {|i_rd[4:0],i_rd[4:0]};
178 `REGIMM:
179 if (i__rt == `SYNCI)
180 d_wbr <= 0;
181 else
182 d_wbr <= {6{i_rt[4]}};// d_rt == `BLTZAL || d_rt == `BGEZAL ? 31 : 0;
183 `JAL: d_wbr <= 6'd32+6'd31;
184 default: d_wbr <= 0; // no writeback
185 endcase
186 1: d_wbr <= {|i_rt[4:0],i_rt[4:0]}; // Immediate instructions
187 2: if ((i_opcode == `CP0 || i_opcode == `CP2) && ~i_rs[4] && ~i_rs[2])
188 d_wbr <= {|i_rt[4:0],i_rt[4:0]}; // MTCP0
189 else
190 d_wbr <= 0;
191 3: if (i_opcode == `RDHWR)
192 d_wbr <= {|i_rt[4:0],i_rt[4:0]}; // RDHWR
193 4: d_wbr <= {|i_rt[4:0],i_rt[4:0]}; // Loads
194 default: d_wbr <= 0;
195 endcase
197 // Calculate branch targets
198 case (i_opcode)
199 `REGIMM: d_target <= i_branch_target;
200 `BEQ: d_target <= i_branch_target;
201 `BNE: d_target <= i_branch_target;
202 `BLEZ: d_target <= i_branch_target;
203 `BGTZ: d_target <= i_branch_target;
204 `JAL: d_target <= i_jump_target;
205 `J: d_target <= i_jump_target;
206 default: d_target <= debug ? i_pc_muxed : 32'hxxxxxxxx;
207 endcase
209 // Detect control transfers
210 d_has_delay_slot <= 0;
211 case (i_opcode)
212 `REG: case (i_fn)
213 `JALR: d_has_delay_slot <= 1;
214 `JR: d_has_delay_slot <= 1;
215 endcase
216 `REGIMM: if (i_rt[4:0] != `SYNCI) d_has_delay_slot <= 1;
217 `BEQ: d_has_delay_slot <= 1;
218 `BNE: d_has_delay_slot <= 1;
219 `BLEZ: d_has_delay_slot <= 1;
220 `BGTZ: d_has_delay_slot <= 1;
221 `JAL: d_has_delay_slot <= 1;
222 `J: d_has_delay_slot <= 1;
223 endcase
225 // We use d_illegal_instr to mark the instructions that we don't support
226 case (i_opcode)
227 `REG: case (i_fn)
228 `SLL: d_illegal_instr <= 0;
229 `SRL: d_illegal_instr <= 0;
230 `SRA: d_illegal_instr <= 0;
231 `SLLV: d_illegal_instr <= 0;
232 `SRLV: d_illegal_instr <= 0;
233 `SRAV: d_illegal_instr <= 0;
234 `JALR: d_illegal_instr <= 0;
235 `JR: d_illegal_instr <= 0;
236 `MFHI: d_illegal_instr <= 1;
237 `MTHI: d_illegal_instr <= 1;
238 `MFLO: d_illegal_instr <= 1;
239 `MTLO: d_illegal_instr <= 1;
240 `MULT: d_illegal_instr <= 1;
241 `MULTU: d_illegal_instr <= 1;
242 `DIV: d_illegal_instr <= 1;
243 `DIVU: d_illegal_instr <= 1;
244 `ADD: d_illegal_instr <= 1;
245 `ADDU: d_illegal_instr <= 0;
246 `SUB: d_illegal_instr <= 1;
247 `SUBU: d_illegal_instr <= 0;
248 `AND: d_illegal_instr <= 0;
249 `OR: d_illegal_instr <= 0;
250 `XOR: d_illegal_instr <= 0;
251 `NOR: d_illegal_instr <= 0;
252 `SLT: d_illegal_instr <= 0;
253 `SLTU: d_illegal_instr <= 0;
254 default: d_illegal_instr <= 1;
255 endcase
256 `REGIMM: d_illegal_instr <= 0;
257 `JAL: d_illegal_instr <= 0;
258 `J: d_illegal_instr <= 0;
259 `BEQ: d_illegal_instr <= 0;
260 `BNE: d_illegal_instr <= 0;
261 `BLEZ: d_illegal_instr <= 0;
262 `BGTZ: d_illegal_instr <= 0;
263 `ADDI: d_illegal_instr <= 1;
264 `ADDIU: d_illegal_instr <= 0;
265 `SLTI: d_illegal_instr <= 0;
266 `SLTIU: d_illegal_instr <= 0;
267 `ANDI: d_illegal_instr <= 0;
268 `ORI: d_illegal_instr <= 0;
269 `XORI: d_illegal_instr <= 0;
270 `LUI: d_illegal_instr <= 0;
271 `CP0: d_illegal_instr <= 0; // Supported == ignored
272 // `CP1:
273 `CP2: d_illegal_instr <= 0; // Supported == ignored
274 // `BBQL:
275 `LB: d_illegal_instr <= 0;
276 `LBU: d_illegal_instr <= 0;
277 `LH: d_illegal_instr <= 0;
278 `LHU: d_illegal_instr <= 0;
279 `LW: d_illegal_instr <= 0;
280 `SB: d_illegal_instr <= 0;
281 `SH: d_illegal_instr <= 0;
282 `SW: d_illegal_instr <= 0;
283 default: d_illegal_instr <= 1;
284 endcase
287 * Detect and restart upon delay-slot bubbles. The cost of this
288 * is completely hidden by the I$ fill overhead. The guarantee
289 * that delay slots always follow immediately after their
290 * preceeding instruction make correct handling of delayed
291 * branches/jumps much simpler.
293 * Note, we are making an assumption on the I$ behaviour here,
294 * that is, we're assuming it will (eventually) always be
295 * possible to hit the two consecutive lines without a miss.
296 * All normal caches behaves this way (even directly mapped),
297 * but it may be possible to construct an odd cache that
298 * doesn't have this property.
301 delay_slot_bubble <= 0;
302 if (d_valid & ~flush_D & d_has_delay_slot & ~i_valid_muxed) begin
303 $display("%05d DE: *** Taken-branch w/bubble delay slot, restarting branch at %8x",
304 $time, d_pc);
305 d_valid <= 0;
306 d_restart <= 1;
307 d_restart_pc <= d_pc;
308 d_flush_X <= 1;
309 delay_slot_bubble <= 1;
312 // Delay the count one cycle to improve cycle time
313 if (delay_slot_bubble)
314 perf_delay_slot_bubble <= perf_delay_slot_bubble + 1;
316 if (m_valid)
317 perf_retired_inst <= perf_retired_inst + 1;
319 d_load_use_hazard <= 0;
320 if (i_valid_muxed &&
321 d_valid &&
322 d_opcode[5:3] == 4 && // Load in stage DE
323 (i_rs == d_wbr ||
324 (i_rt == d_wbr && i_opcode[5:4] != 2))) // load/store forward t
325 begin
326 d_valid <= 0;
327 d_restart <= 1;
328 d_restart_pc <= i_npc_muxed; // Notice, we know that DE had a
329 // load, thus IF isn't a delay slot
330 d_load_use_hazard <= 1;
331 $display("%05d DE: *** load-use hazard, restarting %8x", $time,
332 i_pc_muxed);
339 `ifdef SIMULATE_MAIN
340 always @(posedge clock) begin
341 if (0)
342 $display("%05d d_op1_val (r%1d) %8x d_op2_is_imm %1d ? d_simm %1d : d_rt_val (r%1d) %8x (non fwd %8x %8x) (d_forward_x_to_t %1d %1d %1d %1d)", $time,
343 d_rs[4:0], d_op1_val,
344 d_op2_is_imm, d_simm, d_rt[4:0], d_rt_val,
345 rs_reg_val, rt_reg_val,
347 d_forward_x_to_t,
348 x_valid, i_rt, x_wbr
351 if (0) begin
352 if (d_forward_x_to_s)
353 $display("%05d DE %8x: rs (r%1d) <- EX (%8x)", $time,
354 d_pc, x_wbr - 32, x_res);
355 else if (d_forward_m_to_s)
356 $display("%05d DE %8x: rs (r%1d) <- ME (%8x)", $time,
357 d_pc, m_wbr - 32, m_res);
359 if (d_forward_x_to_t)
360 $display("%05d DE %8x: rt (r%1d) <- EX (%8x)", $time,
361 d_pc, x_wbr - 32, x_res);
362 else if (d_forward_m_to_t)
363 $display("%05d DE %8x: rt (r%1d) <- ME (%8x)", $time,
364 d_pc, m_wbr - 32, m_res);
367 // !!CAREFUL!! This line is being matched by the cosimulation,
368 // so if anything is changed, then run_simple.c:get_rtl_commit()
369 // must be adjusted accordingly.
370 if (m_valid & m_wbr[5])
371 $display("%05d COMMIT %8x:r%02d <- %8x",
372 $time, m_pc, m_wbr[4:0], m_res);
374 if (debug) begin
375 $display("%5db DE: instr %8x valid %d (m_wbr:%2x) (i_npc %8x i_offset*4 %8x target %8x)",
376 $time, i_instr_muxed, i_valid_muxed, m_wbr,
377 i_npc_muxed, i_offset << 2, i_jump_target);
379 if (0)
380 $display("%5db DE: %x %x %x %x %x %x %x %x %x %x %x %x ", $time,
381 regs[0], regs[1], regs[2], regs[3],
382 regs[4], regs[5], regs[6], regs[7],
383 regs[8], regs[9], regs[10], regs[11]);
387 wire [31:0] debug_regs_rs = regs_A[i_rs[4:0]];
388 wire [31:0] debug_regs_rt = regs_B[i_rt[4:0]];
389 reg [31:0] i;
390 initial
391 for (i = 0; i < 32; i = i + 1) begin
392 regs_A[i] = 0;
393 regs_B[i] = 0;
395 `endif
396 endmodule