1 // -----------------------------------------------------------------------
3 // Copyright 2004,2007 Tommy Thorn - All Rights Reserved
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 // Bostom MA 02111-1307, USA; either version 2 of the License, or
9 // (at your option) any later version; incorporated herein by reference.
11 // -----------------------------------------------------------------------
17 The decoding stage is relatively simple. It takes an instruction
18 and summerizes the different aspects. The most important task in
19 this stage is the fetch the value of the operands from the register
20 file and to forwarded from later stages if needed.
23 module stage_D(input wire clock
25 ,input wire i_valid
// 0 => ignore i_instr.
26 ,input wire [31:0] i_instr
// Current instr.
27 ,input wire [31:0] i_pc
// Addr of current instr
28 ,input wire [31:0] i_npc
// Addr of next instr
32 ,input wire [ 5:0] x_wbr
33 ,input wire [31:0] x_res
35 ,input wire [31:0] m_pc
37 ,input wire [ 5:0] m_wbr
38 ,input wire [31:0] m_res
40 ,output reg d_valid
= 0
41 ,output reg d_illegal_instr
= 0 // XXX Must trap on this
42 ,output reg [31:0] d_pc
= 0
43 ,output reg [31:0] d_instr
= 0
44 ,output reg [31:0] d_npc
= 0
45 ,output reg [ 5:0] d_opcode
= 0
46 ,output reg [ 5:0] d_fn
= 0
47 ,output reg [ 4:0] d_rd
= 0
48 ,output reg [ 5:0] d_rs
= 0
49 ,output reg [ 5:0] d_rt
= 0
50 ,output reg [ 4:0] d_sa
= 0
51 ,output reg [31:0] d_target
= 0
52 ,output reg [ 5:0] d_wbr
= 0
53 ,output reg d_has_delay_slot
= 0
55 ,output wire [31:0] d_op1_val
// aka d_rs_val
56 ,output wire [31:0] d_op2_val
57 ,output wire [31:0] d_rt_val
// For stores
58 ,output reg [31:0] d_simm
= 0
60 ,output reg d_restart
= 0
61 ,output reg [31:0] d_restart_pc
= 0
62 ,output reg d_flush_X
= 0
65 ,output reg [31:0] perf_delay_slot_bubble
= 0
66 ,output reg [47:0] perf_retired_inst
= 0
71 // Name various instruction fields
73 wire [ 5:0] i_rs
, i_rt
;
74 wire [ 4:0] i__rs
, i__rt
, i_rd
;
78 assign {i_opcode
,i__rs
,i__rt
,i_rd
,i_sa
,i_fn
} = i_instr
;
79 assign i_rs
= {1'b1,i__rs
}; // Bit 5 means valid.
80 assign i_rt
= {1'b1,i__rt
};
82 wire [25:0] i_offset
= i_instr
[25:0];
84 // Sign-extend immediate field
85 wire [31:0] i_simm
= {{16{i_instr
[15]}}, i_instr
[15:0]};
87 wire [31:0] i_branch_target
= i_npc
+ {i_simm
[29:0], 2'h0
};
88 wire [31:0] i_jump_target
= {i_npc
[31:28],i_offset
,2'h0
};
90 always @(posedge clock
) d_simm
<= i_simm
;
93 always @(posedge clock
)
94 d_op2_is_imm
<= (i_opcode
[5:3] == 1 ||
/* Immediate instructions */
95 i_opcode
[5:3] == 4 ||
/* Loads */
96 i_opcode
[5:3] == 5); /* Stores */
99 reg [31:0] regs_A
[31:0]; // Initialization is handled below
100 reg [31:0] regs_B
[31:0]; // Initialization is handled below
102 reg [31:0] rs_reg_val
, rt_reg_val
;
104 always @(posedge clock
) begin
105 if (m_valid
& m_wbr
[5])
106 regs_A
[m_wbr
[4:0]] <= m_res
;
107 rs_reg_val
<= regs_A
[i_rs
[4:0]];
110 always @(posedge clock
) begin
111 if (m_valid
& m_wbr
[5])
112 regs_B
[m_wbr
[4:0]] <= m_res
;
113 rt_reg_val
<= regs_B
[i_rt
[4:0]];
116 // Stage WB is only present here for bypass
118 reg [31:0] w_res
= 0;
119 reg [ 5:0] w_wbr
= 0;
120 always @(posedge clock
) begin
126 wire d_forward_x_to_s
= x_valid
&& d_rs
== x_wbr
;
127 wire d_forward_x_to_t
= x_valid
&& d_rt
== x_wbr
;
128 wire d_forward_m_to_s
= m_valid
&& d_rs
== m_wbr
;
129 wire d_forward_m_to_t
= m_valid
&& d_rt
== m_wbr
;
130 wire d_forward_w_to_s
= w_valid
&& d_rs
== w_wbr
;
131 wire d_forward_w_to_t
= w_valid
&& d_rt
== w_wbr
;
133 assign d_op1_val
= (d_forward_x_to_s ? x_res
:
134 d_forward_m_to_s ? m_res
:
135 d_forward_w_to_s ? w_res
:
137 assign d_rt_val
= (d_forward_x_to_t ? x_res
:
138 d_forward_m_to_t ? m_res
:
139 d_forward_w_to_t ? w_res
:
142 // XXX PERF This is immensely stupid; all opcodes know whether they
143 // want the immediate or the register themself, so reduce the cost
144 // of d_op2_val by not letting it cover d_simm and adjust all users
145 // of d_op2_val. The only drawback slightly less sharing for a few
146 // instructions. (And while there rename op1 -> rs, op2 -> rt).
147 assign d_op2_val
= d_op2_is_imm ? d_simm
: d_rt_val
;
149 always @(posedge clock
) begin
153 d_instr
<= i_instr
; // XXX Just for debugging
154 {d_opcode
,d_rs
,d_rt
,d_rd
,d_sa
,d_fn
} <= {i_opcode
,i_rs
,i_rt
,i_rd
,i_sa
,i_fn
};
158 // Determine write-back register. We set this to 0 for
159 // unrecognized instructions so avoid unintended effects. (Valid
160 // registers are remapped to 32 - 63 to avoid having to make a
161 // special case for r0 in the bypass network.
163 0: case (i_opcode
[2:0])
164 `REG: d_wbr <= {|i_rd[4:0],i_rd[4:0]};
169 d_wbr
<= {6{i_rt
[4]}};// d_rt == `BLTZAL || d_rt == `BGEZAL ? 31 : 0;
170 `JAL: d_wbr <= 6'd32+6'd31;
171 default: d_wbr
<= 0; // no writeback
173 1: d_wbr
<= {|i_rt
[4:0],i_rt
[4:0]}; // Immediate instructions
174 2: if ((i_opcode
== `CP0 || i_opcode == `CP2) && ~i_rs[4] && ~i_rs[2])
175 d_wbr
<= {|i_rt
[4:0],i_rt
[4:0]}; // MTCP0
178 3: if (i_opcode
== `RDHWR)
179 d_wbr
<= {|i_rt
[4:0],i_rt
[4:0]}; // RDHWR
180 4: d_wbr
<= {|i_rt
[4:0],i_rt
[4:0]}; // Loads
184 // Calculate branch targets
186 `REGIMM: d_target <= i_branch_target;
187 `BEQ: d_target <= i_branch_target;
188 `BNE: d_target <= i_branch_target;
189 `BLEZ: d_target <= i_branch_target;
190 `BGTZ: d_target <= i_branch_target;
191 `JAL: d_target <= i_jump_target;
192 `J: d_target <= i_jump_target;
193 default: d_target
<= debug ? i_pc
: 32'hxxxxxxxx
;
196 // Detect control transfers
197 d_has_delay_slot
<= 0;
200 `JALR: d_has_delay_slot <= 1;
201 `JR: d_has_delay_slot <= 1;
203 `REGIMM: d_has_delay_slot <= 1;
204 `BEQ: d_has_delay_slot <= 1;
205 `BNE: d_has_delay_slot <= 1;
206 `BLEZ: d_has_delay_slot <= 1;
207 `BGTZ: d_has_delay_slot <= 1;
208 `JAL: d_has_delay_slot <= 1;
209 `J: d_has_delay_slot <= 1;
212 // We use d_illegal_instr to mark the instructions that we don't support
215 `SLL: d_illegal_instr <= 0;
216 `SRL: d_illegal_instr <= 0;
217 `SRA: d_illegal_instr <= 0;
218 `SLLV: d_illegal_instr <= 0;
219 `SRLV: d_illegal_instr <= 0;
220 `SRAV: d_illegal_instr <= 0;
221 `JALR: d_illegal_instr <= 0;
222 `JR: d_illegal_instr <= 0;
223 `MFHI: d_illegal_instr <= 1;
224 `MTHI: d_illegal_instr <= 1;
225 `MFLO: d_illegal_instr <= 1;
226 `MTLO: d_illegal_instr <= 1;
227 `MULT: d_illegal_instr <= 1;
228 `MULTU: d_illegal_instr <= 1;
229 `DIV: d_illegal_instr <= 1;
230 `DIVU: d_illegal_instr <= 1;
231 `ADD: d_illegal_instr <= 1;
232 `ADDU: d_illegal_instr <= 0;
233 `SUB: d_illegal_instr <= 1;
234 `SUBU: d_illegal_instr <= 0;
235 `AND: d_illegal_instr <= 0;
236 `OR: d_illegal_instr <= 0;
237 `XOR: d_illegal_instr <= 0;
238 `NOR: d_illegal_instr <= 0;
239 `SLT: d_illegal_instr <= 0;
240 `SLTU: d_illegal_instr <= 0;
241 default: d_illegal_instr
<= 1;
243 `REGIMM: d_illegal_instr <= 0;
244 `JAL: d_illegal_instr <= 0;
245 `J: d_illegal_instr <= 0;
246 `BEQ: d_illegal_instr <= 0;
247 `BNE: d_illegal_instr <= 0;
248 `BLEZ: d_illegal_instr <= 0;
249 `BGTZ: d_illegal_instr <= 0;
250 `ADDI: d_illegal_instr <= 1;
251 `ADDIU: d_illegal_instr <= 0;
252 `SLTI: d_illegal_instr <= 0;
253 `SLTIU: d_illegal_instr <= 0;
254 `ANDI: d_illegal_instr <= 0;
255 `ORI: d_illegal_instr <= 0;
256 `XORI: d_illegal_instr <= 0;
257 `LUI: d_illegal_instr <= 0;
258 `CP0: d_illegal_instr <= 0; // Supported == ignored
260 `CP2: d_illegal_instr <= 0; // Supported == ignored
262 `LB: d_illegal_instr <= 0;
263 `LBU: d_illegal_instr <= 0;
264 `LH: d_illegal_instr <= 0;
265 `LHU: d_illegal_instr <= 0;
266 `LW: d_illegal_instr <= 0;
267 `SB: d_illegal_instr <= 0;
268 `SH: d_illegal_instr <= 0;
269 `SW: d_illegal_instr <= 0;
270 default: d_illegal_instr
<= 1;
274 * Detect and restart upon delay-slot bubbles. The cost of this
275 * is completely hidden by the I$ fill overhead. The guarantee
276 * that delay slots always follow immediately after their
277 * preceeding instruction make correct handling of delayed
278 * branches/jumps much simpler.
280 * Note, we are making an assumption on the I$ behaviour here,
281 * that is, we're assuming it will (eventually) always be
282 * possible to hit the two consecutive lines without a miss.
283 * All normal caches behaves this way (even directly mapped),
284 * but it may be possible to construct an odd cache that
285 * doesn't have this property.
288 if (d_valid
& ~flush_D
& d_has_delay_slot
& ~i_valid
) begin
289 $display("%05d *** Taken-branch w/bubble delay slot, restarting branch at %8x",
293 d_restart_pc
<= d_pc
;
297 // Delay the count one cycle to improve cycle time
299 perf_delay_slot_bubble
<= perf_delay_slot_bubble
+ 1;
302 perf_retired_inst
<= perf_retired_inst
+ 1;
309 always @(posedge clock
) begin
311 $display("%05d d_op1_val (r%1d) %8x d_op2_is_imm %1d ? d_simm %1d : d_rt_val (r%1d) %8x (non fwd %8x %8x) (d_forward_x_to_t %1d %1d %1d %1d)", $time,
312 d_rs
[4:0], d_op1_val
,
313 d_op2_is_imm
, d_simm
, d_rt
[4:0], d_rt_val
,
314 rs_reg_val
, rt_reg_val
,
321 if (d_forward_x_to_s
)
322 $display("%05d DE %8x: rs (r%1d) <- EX (%8x)", $time,
323 d_pc
, x_wbr
- 32, x_res
);
324 else if (d_forward_m_to_s
)
325 $display("%05d DE %8x: rs (r%1d) <- ME (%8x)", $time,
326 d_pc
, m_wbr
- 32, m_res
);
328 if (d_forward_x_to_t
)
329 $display("%05d DE %8x: rt (r%1d) <- EX (%8x)", $time,
330 d_pc
, x_wbr
- 32, x_res
);
331 else if (d_forward_m_to_t
)
332 $display("%05d DE %8x: rt (r%1d) <- ME (%8x)", $time,
333 d_pc
, m_wbr
- 32, m_res
);
336 // !!CAREFUL!! This line is being matched by the cosimulation,
337 // so if anything is changed, then run_simple.c:get_rtl_commit()
338 // must be adjusted accordingly.
339 if (m_valid
& m_wbr
[5])
340 $display("%05d COMMIT %8x:r%02d <- %8x",
341 $time, m_pc
, m_wbr
[4:0], m_res
);
344 $display("%5db DE: instr %8x valid %d (m_wbr:%2x) (i_npc %8x i_offset*4 %8x target %8x)",
345 $time, d_instr
, d_valid
, m_wbr
,
346 i_npc
, i_offset
<< 2, i_jump_target
);
349 $display("%5db DE: %x %x %x %x %x %x %x %x %x %x %x %x ", $time,
350 regs
[0], regs
[1], regs
[2], regs
[3],
351 regs
[4], regs
[5], regs
[6], regs
[7],
352 regs
[8], regs
[9], regs
[10], regs
[11]);
354 $display("%5db DE: %x %x %x", $time,
355 d_target
, i_branch_target
, i_jump_target
);
359 wire [31:0] debug_regs_rs
= regs_A
[i_rs
[4:0]];
360 wire [31:0] debug_regs_rt
= regs_B
[i_rt
[4:0]];
363 for (i
= 0; i
< 32; i
= i
+ 1) begin