1 // -----------------------------------------------------------------------
3 // Copyright 2004,2007,2008 Tommy Thorn - All Rights Reserved
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 // Bostom MA 02111-1307, USA; either version 2 of the License, or
9 // (at your option) any later version; incorporated herein by reference.
11 // -----------------------------------------------------------------------
17 The decoding stage is relatively simple. It takes an instruction
18 and summerizes the different aspects. The most important task in
19 this stage is the fetch the value of the operands from the register
20 file and to forwarded from later stages if needed.
23 module stage_D(input wire clock
25 ,input wire i_valid
// 0 => ignore i_instr.
26 ,input wire [31:0] i_instr
// Current instr.
27 ,input wire [31:0] i_pc
// Addr of current instr
28 ,input wire [31:0] i_npc
// Addr of next instr
32 ,input wire [ 5:0] x_wbr
33 ,input wire [31:0] x_res
35 ,input wire [31:0] m_pc
37 ,input wire [ 5:0] m_wbr
38 ,input wire [31:0] m_res
40 ,output reg d_valid
= 0
41 ,output reg d_illegal_instr
= 0 // XXX Must trap on this
42 ,output reg [31:0] d_pc
= 0
43 ,output reg [31:0] d_instr
= 0
44 ,output reg [31:0] d_npc
= 0
45 ,output reg [ 5:0] d_opcode
= 0
46 ,output reg [ 5:0] d_fn
= 0
47 ,output reg [ 4:0] d_rd
= 0
48 ,output reg [ 5:0] d_rs
= 0
49 ,output reg [ 5:0] d_rt
= 0
50 ,output reg [ 4:0] d_sa
= 0
51 ,output reg [31:0] d_target
= 0
52 ,output reg [ 5:0] d_wbr
= 0
53 ,output reg d_has_delay_slot
= 0
55 ,output wire [31:0] d_op1_val
// aka d_rs_val
56 ,output wire [31:0] d_op2_val
57 ,output wire [31:0] d_rt_val
// For stores
58 ,output reg [31:0] d_simm
= 0
60 ,output reg d_restart
= 0
61 ,output reg [31:0] d_restart_pc
= 0
62 ,output reg d_flush_X
= 0
64 ,output reg d_load_use_hazard
= 0
67 ,output reg [31:0] perf_delay_slot_bubble
= 0
68 ,output reg [47:0] perf_retired_inst
= 0
71 ,output wire i_valid_muxed
72 ,output wire [31:0] i_pc_muxed
73 ,output wire [31:0] i_instr_muxed
78 // Name various instruction fields
80 wire [ 5:0] i_rs
, i_rt
;
81 wire [ 4:0] i__rs
, i__rt
, i_rd
;
86 * When we restart instructions from DE due to load-use hazards,
87 * reuse the already seen instruction rather than wait for it to
88 * tickle down from IF.
90 assign i_valid_muxed
= i_valid | d_load_use_hazard
;
91 assign i_pc_muxed
= d_load_use_hazard ? d_pc
: i_pc
;
92 assign i_instr_muxed
= d_load_use_hazard ? d_instr
: i_instr
;
93 wire [31:0] i_npc_muxed
= d_load_use_hazard ? d_npc
: i_npc
;
95 assign {i_opcode
,i__rs
,i__rt
,i_rd
,i_sa
,i_fn
} = i_instr_muxed
;
96 assign i_rs
= {1'b1,i__rs
}; // Bit 5 means valid.
97 assign i_rt
= {1'b1,i__rt
};
99 wire [25:0] i_offset
= i_instr_muxed
[25:0];
101 // Sign-extend immediate field
102 wire [31:0] i_simm
= {{16{i_instr_muxed
[15]}}, i_instr_muxed
[15:0]};
104 wire [31:0] i_branch_target
= i_npc_muxed
+ {i_simm
[29:0], 2'h0
};
105 wire [31:0] i_jump_target
= {i_npc_muxed
[31:28],i_offset
,2'h0
};
107 always @(posedge clock
) d_simm
<= i_simm
;
109 reg d_op2_is_imm
= 0;
110 always @(posedge clock
)
111 d_op2_is_imm
<= (i_opcode
[5:3] == 1 ||
/* Immediate instructions */
112 i_opcode
[5:3] == 4 ||
/* Loads */
113 i_opcode
[5:3] == 5); /* Stores */
116 reg [31:0] regs_A
[31:0]; // Initialization is handled below
117 reg [31:0] regs_B
[31:0]; // Initialization is handled below
119 reg [31:0] rs_reg_val
, rt_reg_val
;
121 always @(posedge clock
) begin
122 if (m_valid
& m_wbr
[5])
123 regs_A
[m_wbr
[4:0]] <= m_res
;
124 rs_reg_val
<= regs_A
[i_rs
[4:0]];
127 always @(posedge clock
) begin
128 if (m_valid
& m_wbr
[5])
129 regs_B
[m_wbr
[4:0]] <= m_res
;
130 rt_reg_val
<= regs_B
[i_rt
[4:0]];
133 // Stage WB is only present here for bypass
135 reg [31:0] w_res
= 0;
136 reg [ 5:0] w_wbr
= 0;
137 always @(posedge clock
) begin
143 wire d_forward_x_to_s
= x_valid
&& d_rs
== x_wbr
;
144 wire d_forward_x_to_t
= x_valid
&& d_rt
== x_wbr
;
145 wire d_forward_m_to_s
= m_valid
&& d_rs
== m_wbr
;
146 wire d_forward_m_to_t
= m_valid
&& d_rt
== m_wbr
;
147 wire d_forward_w_to_s
= w_valid
&& d_rs
== w_wbr
;
148 wire d_forward_w_to_t
= w_valid
&& d_rt
== w_wbr
;
150 assign d_op1_val
= (d_forward_x_to_s ? x_res
:
151 d_forward_m_to_s ? m_res
:
152 d_forward_w_to_s ? w_res
:
154 assign d_rt_val
= (d_forward_x_to_t ? x_res
:
155 d_forward_m_to_t ? m_res
:
156 d_forward_w_to_t ? w_res
:
159 assign d_op2_val
= d_op2_is_imm ? d_simm
: d_rt_val
;
161 reg delay_slot_bubble
= 0;
162 always @(posedge clock
) begin
163 d_valid
<= i_valid_muxed
;
165 d_npc
<= i_npc_muxed
;
166 d_instr
<= i_instr_muxed
;
167 {d_opcode
,d_rs
,d_rt
,d_rd
,d_sa
,d_fn
} <= {i_opcode
,i_rs
,i_rt
,i_rd
,i_sa
,i_fn
};
171 // Determine write-back register. We set this to 0 for
172 // unrecognized instructions so avoid unintended effects. (Valid
173 // registers are remapped to 32 - 63 to avoid having to make a
174 // special case for r0 in the bypass network.
176 0: case (i_opcode
[2:0])
177 `REG: d_wbr <= {|i_rd[4:0],i_rd[4:0]};
182 d_wbr
<= {6{i_rt
[4]}};// d_rt == `BLTZAL || d_rt == `BGEZAL ? 31 : 0;
183 `JAL: d_wbr <= 6'd32+6'd31;
184 default: d_wbr
<= 0; // no writeback
186 1: d_wbr
<= {|i_rt
[4:0],i_rt
[4:0]}; // Immediate instructions
187 2: if ((i_opcode
== `CP0 || i_opcode == `CP2) && ~i_rs[4] && ~i_rs[2])
188 d_wbr
<= {|i_rt
[4:0],i_rt
[4:0]}; // MTCP0
191 3: if (i_opcode
== `RDHWR)
192 d_wbr
<= {|i_rt
[4:0],i_rt
[4:0]}; // RDHWR
193 4: d_wbr
<= {|i_rt
[4:0],i_rt
[4:0]}; // Loads
197 // Calculate branch targets
199 `REGIMM: d_target <= i_branch_target;
200 `BEQ: d_target <= i_branch_target;
201 `BNE: d_target <= i_branch_target;
202 `BLEZ: d_target <= i_branch_target;
203 `BGTZ: d_target <= i_branch_target;
204 `JAL: d_target <= i_jump_target;
205 `J: d_target <= i_jump_target;
206 default: d_target
<= debug ? i_pc_muxed
: 32'hxxxxxxxx
;
209 // Detect control transfers
210 d_has_delay_slot
<= 0;
213 `JALR: d_has_delay_slot <= 1;
214 `JR: d_has_delay_slot <= 1;
216 `REGIMM: if (i_rt[4:0] != `SYNCI) d_has_delay_slot <= 1;
217 `BEQ: d_has_delay_slot <= 1;
218 `BNE: d_has_delay_slot <= 1;
219 `BLEZ: d_has_delay_slot <= 1;
220 `BGTZ: d_has_delay_slot <= 1;
221 `JAL: d_has_delay_slot <= 1;
222 `J: d_has_delay_slot <= 1;
225 // We use d_illegal_instr to mark the instructions that we don't support
228 `SLL: d_illegal_instr <= 0;
229 `SRL: d_illegal_instr <= 0;
230 `SRA: d_illegal_instr <= 0;
231 `SLLV: d_illegal_instr <= 0;
232 `SRLV: d_illegal_instr <= 0;
233 `SRAV: d_illegal_instr <= 0;
234 `JALR: d_illegal_instr <= 0;
235 `JR: d_illegal_instr <= 0;
236 `MFHI: d_illegal_instr <= 1;
237 `MTHI: d_illegal_instr <= 1;
238 `MFLO: d_illegal_instr <= 1;
239 `MTLO: d_illegal_instr <= 1;
240 `MULT: d_illegal_instr <= 1;
241 `MULTU: d_illegal_instr <= 1;
242 `DIV: d_illegal_instr <= 1;
243 `DIVU: d_illegal_instr <= 1;
244 `ADD: d_illegal_instr <= 1;
245 `ADDU: d_illegal_instr <= 0;
246 `SUB: d_illegal_instr <= 1;
247 `SUBU: d_illegal_instr <= 0;
248 `AND: d_illegal_instr <= 0;
249 `OR: d_illegal_instr <= 0;
250 `XOR: d_illegal_instr <= 0;
251 `NOR: d_illegal_instr <= 0;
252 `SLT: d_illegal_instr <= 0;
253 `SLTU: d_illegal_instr <= 0;
254 default: d_illegal_instr
<= 1;
256 `REGIMM: d_illegal_instr <= 0;
257 `JAL: d_illegal_instr <= 0;
258 `J: d_illegal_instr <= 0;
259 `BEQ: d_illegal_instr <= 0;
260 `BNE: d_illegal_instr <= 0;
261 `BLEZ: d_illegal_instr <= 0;
262 `BGTZ: d_illegal_instr <= 0;
263 `ADDI: d_illegal_instr <= 1;
264 `ADDIU: d_illegal_instr <= 0;
265 `SLTI: d_illegal_instr <= 0;
266 `SLTIU: d_illegal_instr <= 0;
267 `ANDI: d_illegal_instr <= 0;
268 `ORI: d_illegal_instr <= 0;
269 `XORI: d_illegal_instr <= 0;
270 `LUI: d_illegal_instr <= 0;
271 `CP0: d_illegal_instr <= 0; // Supported == ignored
273 `CP2: d_illegal_instr <= 0; // Supported == ignored
275 `LB: d_illegal_instr <= 0;
276 `LBU: d_illegal_instr <= 0;
277 `LH: d_illegal_instr <= 0;
278 `LHU: d_illegal_instr <= 0;
279 `LW: d_illegal_instr <= 0;
280 `SB: d_illegal_instr <= 0;
281 `SH: d_illegal_instr <= 0;
282 `SW: d_illegal_instr <= 0;
283 default: d_illegal_instr
<= 1;
287 * Detect and restart upon delay-slot bubbles. The cost of this
288 * is completely hidden by the I$ fill overhead. The guarantee
289 * that delay slots always follow immediately after their
290 * preceeding instruction make correct handling of delayed
291 * branches/jumps much simpler.
293 * Note, we are making an assumption on the I$ behaviour here,
294 * that is, we're assuming it will (eventually) always be
295 * possible to hit the two consecutive lines without a miss.
296 * All normal caches behaves this way (even directly mapped),
297 * but it may be possible to construct an odd cache that
298 * doesn't have this property.
301 delay_slot_bubble
<= 0;
302 if (d_valid
& ~flush_D
& d_has_delay_slot
& ~i_valid_muxed
) begin
303 $display("%05d DE: *** Taken-branch w/bubble delay slot, restarting branch at %8x",
307 d_restart_pc
<= d_pc
;
309 delay_slot_bubble
<= 1;
312 // Delay the count one cycle to improve cycle time
313 if (delay_slot_bubble
)
314 perf_delay_slot_bubble
<= perf_delay_slot_bubble
+ 1;
317 perf_retired_inst
<= perf_retired_inst
+ 1;
319 d_load_use_hazard
<= 0;
322 d_opcode
[5:3] == 4 && // Load in stage DE
324 (i_rt
== d_wbr
&& i_opcode
[5:4] != 2))) // load/store forward t
328 d_restart_pc
<= i_npc_muxed
; // Notice, we know that DE had a
329 // load, thus IF isn't a delay slot
330 d_load_use_hazard
<= 1;
331 $display("%05d DE: *** load-use hazard, restarting %8x", $time,
340 always @(posedge clock
) begin
342 $display("%05d d_op1_val (r%1d) %8x d_op2_is_imm %1d ? d_simm %1d : d_rt_val (r%1d) %8x (non fwd %8x %8x) (d_forward_x_to_t %1d %1d %1d %1d)", $time,
343 d_rs
[4:0], d_op1_val
,
344 d_op2_is_imm
, d_simm
, d_rt
[4:0], d_rt_val
,
345 rs_reg_val
, rt_reg_val
,
352 if (d_forward_x_to_s
)
353 $display("%05d DE %8x: rs (r%1d) <- EX (%8x)", $time,
354 d_pc
, x_wbr
- 32, x_res
);
355 else if (d_forward_m_to_s
)
356 $display("%05d DE %8x: rs (r%1d) <- ME (%8x)", $time,
357 d_pc
, m_wbr
- 32, m_res
);
359 if (d_forward_x_to_t
)
360 $display("%05d DE %8x: rt (r%1d) <- EX (%8x)", $time,
361 d_pc
, x_wbr
- 32, x_res
);
362 else if (d_forward_m_to_t
)
363 $display("%05d DE %8x: rt (r%1d) <- ME (%8x)", $time,
364 d_pc
, m_wbr
- 32, m_res
);
367 // !!CAREFUL!! This line is being matched by the cosimulation,
368 // so if anything is changed, then run_simple.c:get_rtl_commit()
369 // must be adjusted accordingly.
370 if (m_valid
& m_wbr
[5])
371 $display("%05d COMMIT %8x:r%02d <- %8x",
372 $time, m_pc
, m_wbr
[4:0], m_res
);
375 $display("%5db DE: instr %8x valid %d (m_wbr:%2x) (i_npc %8x i_offset*4 %8x target %8x)",
376 $time, i_instr_muxed
, i_valid_muxed
, m_wbr
,
377 i_npc_muxed
, i_offset
<< 2, i_jump_target
);
380 $display("%5db DE: %x %x %x %x %x %x %x %x %x %x %x %x ", $time,
381 regs
[0], regs
[1], regs
[2], regs
[3],
382 regs
[4], regs
[5], regs
[6], regs
[7],
383 regs
[8], regs
[9], regs
[10], regs
[11]);
387 wire [31:0] debug_regs_rs
= regs_A
[i_rs
[4:0]];
388 wire [31:0] debug_regs_rt
= regs_B
[i_rt
[4:0]];
391 for (i
= 0; i
< 32; i
= i
+ 1) begin