CORE: Restart load-use hazards from stage DE
[yari.git] / rtl / yari-core / stage_X.v
blob981058cbfb2aef63b38e07f70d7058812ae888ca
1 // -----------------------------------------------------------------------
2 //
3 // Copyright 2004,2007,2008 Tommy Thorn - All Rights Reserved
4 //
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 // Bostom MA 02111-1307, USA; either version 2 of the License, or
9 // (at your option) any later version; incorporated herein by reference.
11 // -----------------------------------------------------------------------
13 `timescale 1ns/10ps
14 `include "asm.v"
15 `include "perfcounters.v"
17 module stage_X(input wire clock
19 ,input wire restart // for synci
20 ,input wire [31:0] restart_pc // for synci
22 ,input wire d_valid
23 ,input wire [31:0] d_instr
24 ,input wire [31:0] d_pc
25 ,input wire [31:0] d_npc
26 ,input wire [ 5:0] d_opcode
27 ,input wire [ 5:0] d_fn
28 ,input wire [ 4:0] d_rd
29 ,input wire [ 5:0] d_rs
30 ,input wire [ 5:0] d_rt
31 ,input wire [ 4:0] d_sa
32 ,input wire [31:0] d_target
33 ,input wire [ 5:0] d_wbr
34 ,input wire d_has_delay_slot
36 ,input wire [31:0] d_op1_val
37 ,input wire [31:0] d_op2_val
38 ,input wire [31:0] d_rt_val
39 ,input wire [31:0] d_simm
42 ,input wire d_restart
43 ,input wire [31:0] d_restart_pc
45 ,input wire d_load_use_hazard
47 ,input wire m_valid
48 ,input wire [ 5:0] m_wbr
50 ,output reg x_valid = 0
51 ,output reg [31:0] x_instr = 0 // XXX for debugging only
52 ,output reg x_is_delay_slot = 0
53 ,output reg [31:0] x_pc = 0
54 ,output reg [ 5:0] x_opcode = 0
55 ,output reg [31:0] x_op1_val = 0 // XXX
56 ,output reg [ 5:0] x_rt = 0
57 ,output reg [31:0] x_rt_val = 0 // for stores only
58 ,output reg [ 5:0] x_wbr = 0
59 ,output reg [31:0] x_res
61 ,output reg x_synci = 0
62 ,output reg [31:0] x_synci_a = 0
64 ,output reg x_restart = 0
65 ,output reg [31:0] x_restart_pc = 0
66 ,output reg x_flush_D = 0
68 ,output reg [31:0] perf_branch_hazard = 0
69 ,input wire [31:0] perf_dcache_misses
70 ,input wire [31:0] perf_delay_slot_bubble
71 ,output reg [31:0] perf_div_hazard = 0
72 ,input wire [31:0] perf_icache_misses
73 ,input wire [31:0] perf_io_load_busy
74 ,input wire [31:0] perf_io_store_busy
75 ,input wire [31:0] perf_load_hit_store_hazard
76 ,output reg [31:0] perf_load_use_hazard = 0
77 ,output reg [31:0] perf_mult_hazard = 0
78 ,input wire [47:0] perf_retired_inst
79 ,input wire [31:0] perf_sb_full
82 parameter debug = 0;
84 `include "config.h"
86 reg [31:0] x_op2_val = 0;
87 reg [ 5:0] x_fn = 0;
88 reg [ 4:0] x_sa = 0;
89 reg [ 4:0] x_rs = 0;
90 reg [31:0] x_npc = 0;
92 /* XXX Ideally, the core frequency is a configuration variable set
93 * at the top level, but as I'm using a different platform than the
94 * one we're comparing JOP to, I hardwire it here. This isn't
95 * cheating as this is the frequency we attain on a EP1C12C6 that
96 * the JOP numbers came from, but I don't have that particular FPGA.
98 wire [31:0] perf_frequency = 75000;
100 wire d_ops_eq = d_op1_val == d_op2_val;
101 reg x_negate_op2 = 0;
103 always @(posedge clock)
104 x_negate_op2 <= d_opcode == `SLTI ||
105 d_opcode == `SLTIU ||
106 d_opcode == `REG && (d_fn == `SLT ||
107 d_fn == `SLTU ||
108 d_fn == `SUB ||
109 d_fn == `SUBU);
110 wire [31:0] x_sum;
111 wire x_carry_flag;
112 wire [31:0] x_op2_neg = {32{x_negate_op2}} ^ x_op2_val;
113 assign {x_carry_flag,x_sum} = x_op1_val + x_op2_neg + x_negate_op2;
114 wire x_sign_flag = x_sum[31];
115 wire x_overflow_flag = x_op1_val[31] == x_op2_neg[31] &&
116 x_op1_val[31] != x_sum[31];
117 wire [4:0] x_shift_dist = x_fn[2] ? x_op1_val[4:0] : x_sa;
119 // XXX BUG These architectural registers must live in ME or later
120 // as ME can flush the pipe rendering an update of state in EX
121 // premature. Of course this leads to headaches with forwarding and
122 // hazards on instruction depending on these... Sigh.
123 reg mult_busy = 0;
124 reg [63:0] mult_a = 0, mult_3a = 0;
125 reg [31:0] mult_b = 0;
126 reg mult_neg = 0;
127 reg [31:0] mult_lo = 0;
128 reg [31:0] mult_hi = 0;
130 reg div_busy = 0, div_neg_res, div_neg_rem;
131 reg [31:0] divisor = 0, div_hi = 0, div_lo = 0;
132 reg [32:0] diff = 0;
133 reg [ 6:0] div_n = 0;
135 reg [31:0] cp0_status = 0, // XXX -- " --
136 cp0_epc = 0,
137 cp0_errorepc = 0,
138 cp0_cause = 0;
140 reg x_has_delay_slot = 0;
142 reg [35:0] tsc = 0; // Free running counter
144 reg branch_event = 0;
146 reg [31:0] x_special = 0; // A value that can be precomputed
147 always @(posedge clock)
148 case (d_opcode)
149 `REG: x_special <= d_npc + 4;
150 `REGIMM: x_special <= d_npc + 4;
151 `JAL: x_special <= d_npc + 4;
152 `RDHWR:
153 case (d_rd)
154 0: x_special <= 0; // # of processors-1
155 1: x_special <= 4 << IC_WORD_INDEX_BITS;
156 2: x_special <= tsc[35:4]; // @40 MHz 28 min before rollover
157 3: x_special <= 1 << 4; // TSC scaling factor
158 endcase
160 `LUI: x_special <= {d_simm[15: 0], 16'd0};
161 `CP2:
162 case (d_rd)
163 `PERF_BRANCH_HAZARD: x_special <= perf_branch_hazard;
164 `PERF_DCACHE_MISSES: x_special <= perf_dcache_misses;
165 `PERF_DELAY_SLOT_BUBBLE: x_special <= perf_delay_slot_bubble;
166 `PERF_DIV_HAZARD: x_special <= perf_div_hazard;
167 `PERF_FREQUENCY: x_special <= perf_frequency;
168 `PERF_ICACHE_MISSES: x_special <= perf_icache_misses;
169 `PERF_IO_LOAD_BUSY: x_special <= perf_io_load_busy;
170 `PERF_IO_STORE_BUSY: x_special <= perf_io_store_busy;
171 `PERF_LOAD_HIT_STORE_HAZARD: x_special <= perf_load_hit_store_hazard;
172 `PERF_LOAD_USE_HAZARD: x_special <= perf_load_use_hazard;
173 `PERF_MULT_HAZARD: x_special <= perf_mult_hazard;
174 // Count 16 retired instructions. @40 MHz 1 CPI, it takes 28 min to roll over
175 `PERF_RETIRED_INST: x_special <= perf_retired_inst[35:4];
176 `PERF_SB_FULL: x_special <= perf_sb_full;
177 endcase
178 endcase
181 * The ALU
184 always @* begin
185 x_res = 'hX;
186 case (x_opcode)
187 `REG:
188 case (x_fn)
189 `SLL : x_res = x_op2_val << x_shift_dist;
190 `SRL : x_res = x_op2_val >> x_shift_dist;
191 `SRA : x_res = $signed(x_op2_val) >>> x_shift_dist;
192 `SLLV: x_res = x_op2_val << x_shift_dist;
193 `SRLV: x_res = x_op2_val >> x_shift_dist;
194 `SRAV: x_res = $signed(x_op2_val) >>> x_shift_dist;
196 `JALR: x_res = x_special;
197 // XXX BUG See the comment above with mult_lo and mult_hi
198 `MFHI: x_res = mult_hi;
199 `MFLO: x_res = mult_lo;
200 // XXX BUG Trap on overflow for ADD, ADDI and SUB
201 `ADD: x_res = x_sum;
202 `ADDU: x_res = x_sum;
203 `SUB: x_res = x_sum;
204 `SUBU: x_res = x_sum;
205 `AND: x_res = x_op1_val & x_op2_val;
206 `OR: x_res = x_op1_val | x_op2_val;
207 `XOR: x_res = x_op1_val ^ x_op2_val;
208 `NOR: x_res = ~(x_op1_val | x_op2_val);
209 `SLT: x_res = {{31{1'b0}}, x_sign_flag ^ x_overflow_flag};
210 `SLTU: x_res = {{31{1'b0}}, ~x_carry_flag};
211 default: x_res = 'hX;
212 endcase
213 `REGIMM: x_res = x_special;// BLTZ, BGEZ, BLTZAL, BGEZAL
214 `JAL: x_res = x_special;
215 `ADDI: x_res = x_sum;
216 `ADDIU: x_res = x_sum;
217 `SLTI: x_res = {{31{1'b0}}, x_sign_flag ^ x_overflow_flag};
218 `SLTIU: x_res = {{31{1'b0}}, ~x_carry_flag};
219 `ANDI: x_res = {16'b0, x_op1_val[15:0] & x_op2_val[15:0]};
220 `ORI: x_res = {x_op1_val[31:16], x_op1_val[15:0] | x_op2_val[15:0]};
221 `XORI: x_res = {x_op1_val[31:16], x_op1_val[15:0] ^ x_op2_val[15:0]};
222 `LUI: x_res = x_special;
223 //`CP1:
224 `RDHWR: x_res = x_special;
225 `CP2: x_res = x_special;
226 default: x_res = 'hX;
227 endcase
230 always @(posedge clock) begin
231 tsc <= tsc + 1;
232 x_valid <= d_valid;
233 x_instr <= d_instr;
234 x_pc <= d_pc;
235 x_npc <= d_npc;
236 x_opcode <= d_opcode;
237 x_fn <= d_fn;
238 x_sa <= d_sa;
239 x_rs <= d_rs;
240 x_op1_val <= d_op1_val;
241 x_op2_val <= d_op2_val;
242 x_rt <= d_rt;
243 x_rt_val <= d_rt_val;
244 x_wbr <= d_wbr;
245 x_has_delay_slot <= d_has_delay_slot & d_valid;
246 x_is_delay_slot <= x_has_delay_slot & x_valid;
248 x_restart <= 0;
249 x_restart_pc <= d_target;
250 x_flush_D <= 0;
251 x_synci <= 0;
254 /* Stat counts aren't critical, so I delay them to keep them out
255 of the critical path */
256 if (branch_event)
257 perf_branch_hazard <= perf_branch_hazard + 1;
258 branch_event <= 0;
260 //`define MULT_RADIX_4 1
261 `ifdef MULT_RADIX_4
262 // Radix-2 Multiplication Machine (this is not the best way to do this)
263 if (mult_busy) begin
264 $display("MULT[U] %x * %x + %x", mult_a, mult_b, {mult_hi,mult_lo});
266 case (mult_b[1:0])
267 1: {mult_hi,mult_lo} <= {mult_hi,mult_lo} + mult_a;
268 2: {mult_hi,mult_lo} <= {mult_hi,mult_lo} + (mult_a << 1);
269 3: {mult_hi,mult_lo} <= {mult_hi,mult_lo} + mult_3a;
270 endcase
271 mult_a <= mult_a << 2;
272 mult_3a <= mult_3a << 2;
273 mult_b <= mult_b >> 2;
274 if (mult_b == 0) begin
275 if (mult_neg) begin
276 {mult_hi,mult_lo} <= 64'd0 - {mult_hi,mult_lo};
277 mult_neg <= 0;
278 end else
279 mult_busy <= 0;
280 $display("MULT[U] = %x", mult_a + {mult_hi,mult_lo});
283 `else
284 // Radix-2 Multiplication Machine (this is not the best way to do this)
285 if (mult_busy) begin
286 $display("MULT[U] %x * %x + %x", mult_a, mult_b, {mult_hi,mult_lo});
288 if (mult_b[0])
289 {mult_hi,mult_lo} <= {mult_hi,mult_lo} + mult_a;
290 mult_a <= mult_a << 1;
291 mult_b <= mult_b >> 1;
292 if (mult_b == 0) begin
293 if (mult_neg) begin
294 {mult_hi,mult_lo} <= 64'd0 - {mult_hi,mult_lo};
295 mult_neg <= 0;
296 end else
297 mult_busy <= 0;
298 $display("MULT[U] = %x", mult_a + {mult_hi,mult_lo});
301 `endif
303 // XXX the use of non-blocking assignments here is intentional
304 // (easier to read), but it has the unfortunate consequence of
305 // making the final negation more expensive than it should have
306 // been. Rework this.
307 if (!div_n[6]) begin
308 {div_hi,div_lo} = {div_hi,div_lo} << 1;
309 diff = div_hi - divisor;
310 if (!diff[32]) begin
311 div_hi = diff[31:0];
312 div_lo[0] = 1;
314 div_n <= div_n - 1'd1;
315 end else if (div_busy) begin
316 div_busy <= 0;
317 mult_lo <= div_neg_res ? -div_lo : div_lo; // result
318 mult_hi <= div_neg_rem ? -div_hi : div_hi; // remainder
319 $display("DIV = hi %d lo %d",
320 div_neg_rem ? -div_hi : div_hi,
321 div_neg_res ? -div_lo : div_lo);
324 case (d_opcode)
325 `REG:
326 case (d_fn)
327 `JALR:
328 if (d_valid) begin
329 $display("JAL: d_npc = %x", d_npc);
330 x_restart <= 1;
331 x_restart_pc <= d_op1_val;
332 branch_event <= 1;
334 `JR:
335 if (d_valid) begin
336 x_restart <= 1;
337 x_restart_pc <= d_op1_val;
338 branch_event <= 1;
341 // XXX BUG See the comment above with mult_lo and mult_hi
342 `MFHI:
343 if ((mult_busy | div_busy) && d_valid) begin
344 x_flush_D <= 1;
345 x_valid <= 0;
346 x_restart_pc <= d_pc - {x_has_delay_slot,2'd0};
347 x_restart <= 1;
348 if (mult_busy)
349 perf_mult_hazard <= perf_mult_hazard + 1;
350 else
351 perf_div_hazard <= perf_div_hazard + 1;
353 `MFLO:
354 if ((mult_busy | div_busy) && d_valid) begin
355 x_flush_D <= 1;
356 x_valid <= 0;
357 x_restart_pc <= d_pc - {x_has_delay_slot,2'd0};
358 x_restart <= 1;
359 if (mult_busy)
360 perf_mult_hazard <= perf_mult_hazard + 1;
361 else
362 perf_div_hazard <= perf_div_hazard + 1;
364 `MTHI:
365 if (d_valid) begin
366 if (mult_busy | div_busy) begin
367 x_flush_D <= 1;
368 x_valid <= 0;
369 x_restart_pc <= d_pc - {x_has_delay_slot,2'd0};
370 x_restart <= 1;
371 if (mult_busy)
372 perf_mult_hazard <= perf_mult_hazard + 1;
373 else
374 perf_div_hazard <= perf_div_hazard + 1;
375 end else
376 mult_hi <= d_op1_val;
378 `MTLO:
379 if (d_valid) begin
380 if (mult_busy | div_busy) begin
381 x_flush_D <= 1;
382 x_valid <= 0;
383 x_restart_pc <= d_pc - {x_has_delay_slot,2'd0};
384 x_restart <= 1;
385 if (mult_busy)
386 perf_mult_hazard <= perf_mult_hazard + 1;
387 else
388 perf_div_hazard <= perf_div_hazard + 1;
389 end else
390 mult_lo <= d_op1_val;
393 `DIV:
394 if (d_valid)
395 if (mult_busy | div_busy) begin
396 x_flush_D <= 1;
397 x_valid <= 0;
398 x_restart_pc <= d_pc - {x_has_delay_slot,2'd0};
399 x_restart <= 1;
400 if (mult_busy)
401 perf_mult_hazard <= perf_mult_hazard + 1;
402 else
403 perf_div_hazard <= perf_div_hazard + 1;
404 end else begin
405 div_busy <= 1;
406 div_hi <= 0;
407 div_lo <= d_op1_val[31] ? -d_op1_val : d_op1_val;
408 divisor <= d_op2_val[31] ? -d_op2_val : d_op2_val;
409 div_neg_res <= d_op1_val[31] ^ d_op2_val[31];
411 // res = a/b, rem = a - b*(a/b)
412 // thus the rem sign follows a only
414 div_neg_rem <= d_op1_val[31];
415 div_n <= 31;
416 $display("%05dc EX: %d / %d", $time, d_op1_val, d_op2_val);
419 `DIVU:
420 if (d_valid)
421 if (mult_busy | div_busy) begin
422 x_flush_D <= 1;
423 x_valid <= 0;
424 x_restart_pc <= d_pc - {x_has_delay_slot,2'd0};
425 x_restart <= 1;
426 if (mult_busy)
427 perf_mult_hazard <= perf_mult_hazard + 1;
428 else
429 perf_div_hazard <= perf_div_hazard + 1;
430 end else begin
431 div_busy <= 1;
432 div_hi <= 0;
433 div_lo <= d_op1_val;
434 divisor <= d_op2_val;
435 div_neg_res <= 0;
436 div_neg_rem <= 0;
437 div_n <= 31;
438 $display("%05dc EX: %d /U %d", $time, d_op1_val, d_op2_val);
441 `MULTU:
442 if (d_valid)
443 if (mult_busy | div_busy) begin
444 x_flush_D <= 1;
445 x_valid <= 0;
446 x_restart_pc <= d_pc - {x_has_delay_slot,2'd0};
447 x_restart <= 1;
448 if (mult_busy)
449 perf_mult_hazard <= perf_mult_hazard + 1;
450 else
451 perf_div_hazard <= perf_div_hazard + 1;
452 end else begin
453 $display("MULTU %x * %x", d_op1_val, d_op2_val);
454 mult_busy <= 1;
455 mult_hi <= 0;
456 mult_lo <= 0;
457 mult_a <= d_op1_val;
458 mult_b <= d_op2_val;
459 mult_3a <= 3 * d_op1_val;
460 mult_neg <= 0;
462 $display("%05dc EX: %dU * %dU", $time, d_op1_val, d_op2_val);
465 `MULT:
466 if (d_valid)
467 if (mult_busy | div_busy) begin
468 x_flush_D <= 1;
469 x_valid <= 0;
470 x_restart_pc <= d_pc - {x_has_delay_slot,2'd0};
471 x_restart <= 1;
472 if (mult_busy)
473 perf_mult_hazard <= perf_mult_hazard + 1;
474 else
475 perf_div_hazard <= perf_div_hazard + 1;
476 end else begin
477 $display("MULT %x * %x", d_op1_val, d_op2_val);
478 mult_busy <= 1;
479 mult_hi <= 0;
480 mult_lo <= 0;
481 mult_neg <= d_op1_val[31] ^ d_op2_val[31];
482 mult_a <= d_op1_val[31] ? {32'd0,32'd0 - d_op1_val} : d_op1_val;
483 mult_3a <= d_op1_val[31] ? 3 * {32'd0,32'd0-d_op1_val} : 3 * d_op1_val;
484 mult_b <= d_op2_val[31] ? 32'd0 - d_op2_val : d_op2_val;
485 $display("%05dc EX: %d * %d", $time, d_op1_val, d_op2_val);
488 `BREAK:
489 if (d_valid) begin
490 x_restart <= 1;
491 x_restart_pc <= 'hBFC00380;
492 x_flush_D <= 1;
493 cp0_status[`CP0_STATUS_EXL] <= 1;
494 //cp0_cause.exc_code = EXC_BP;
495 cp0_cause <= 9 << 2;
496 // cp0_cause.bd = branch_delay_slot; // XXX DELAY SLOT HANDLING!
497 cp0_epc <= d_pc; // XXX DELAY SLOT HANDLING!
499 endcase
500 `REGIMM: // BLTZ, BGEZ, BLTZAL, BGEZAL
501 if (d_valid)
502 if (d_rt[4:0] == `SYNCI) begin
503 x_restart <= 1;
504 x_restart_pc <= x_restart ? restart_pc : d_npc;
505 x_flush_D <= 1;
506 $display("synci restart at %x (d_restart = %d, d_restart_pc = %x, d_npc = %x)",
507 d_restart ? d_restart_pc : d_npc,
508 d_restart, d_restart_pc, d_npc);
509 x_synci <= 1;
510 x_synci_a <= d_op1_val + d_simm;
511 end else begin
512 x_restart <= d_rt[0] ^ d_op1_val[31];
513 branch_event <= 1;
515 `JAL:
516 if (d_valid) begin
517 x_restart <= 1;
518 branch_event <= 1;
520 `J: if (d_valid) x_restart <= 1;
521 `BEQ:
522 if (d_valid) begin
523 x_restart <= d_ops_eq;
524 branch_event <= d_ops_eq;
525 $display("%05d BEQ %8x == %8x (%1d)", $time,
526 d_op1_val, d_op2_val, d_ops_eq);
528 `BNE:
529 if (d_valid) begin
530 x_restart <= ~d_ops_eq;
531 branch_event <= ~d_ops_eq;
532 $display("%05d BNE %8x == %8x (%1d)", $time,
533 d_op1_val, d_op2_val, d_ops_eq);
536 `BLEZ:
537 if (d_valid) begin
538 x_restart <= d_op1_val[31] || d_op1_val == 0;
539 branch_event <= (d_op1_val[31] || d_op1_val == 0);
542 `BGTZ:
543 // XXX Share logic
544 if (d_valid) begin
545 x_restart <= !d_op1_val[31] && d_op1_val != 0;
546 branch_event <= (!d_op1_val[31] && d_op1_val != 0);
549 `CP2: begin
550 `ifdef SIMULATE_MAIN
551 if (d_valid && !d_rs[4] && 0) begin
552 if (mult_lo == 32'h87654321)
553 $display("TEST SUCCEEDED!");
554 else
555 $display("%05d TEST FAILED WITH %x (%1d:%8x:%8x)", $time, mult_lo,
556 d_valid, d_pc, d_instr);
557 $finish; // XXX do something more interesting for real hw.
558 end else
559 `endif
560 if (~d_rs[4])
561 if (d_rs[2])
562 $display("MTCP2 r%d <- %x (ignored)", d_rd, d_op2_val);
563 else
564 $display("MFCP2 r%d", d_rd);
568 * XXX Comment out the CP0 handling for now. I want to handle
569 * that in a way that doesn't affect the performance of the
570 * regular instructions
572 `ifdef LATER
573 `CP0: if (d_valid) begin
574 /* Two possible formats */
575 if (d_rs[4]) begin
576 if (d_fn == `C0_ERET) begin
577 /* Exception Return */
578 x_restart <= 1;
579 x_flush_D <= 1; // XXX BUG? Check that ERET doesn't have a delay slot!
580 if (cp0_status[`CP0_STATUS_ERL]) begin
581 x_restart_pc <= cp0_errorepc;
582 cp0_status[`CP0_STATUS_ERL] <= 0;
583 `ifdef SIMULATE_MAIN
584 $display("ERET ERROREPC %x", cp0_errorepc);
585 `endif
586 end else begin
587 x_restart_pc <= cp0_epc;
588 cp0_status[`CP0_STATUS_EXL] <= 0;
589 `ifdef SIMULATE_MAIN
590 $display("ERET EPC %x", cp0_epc);
591 `endif
594 `ifdef SIMULATE_MAIN
595 else
596 /* C1 format */
597 $display("Unhandled CP0 command %s\n",
598 d_fn == `C0_TLBR ? "tlbr" :
599 d_fn == `C0_TLBWI ? "tlbwi" :
600 d_fn == `C0_TLBWR ? "tlbwr" :
601 d_fn == `C0_TLBP ? "tlbp" :
602 d_fn == `C0_ERET ? "eret" :
603 d_fn == `C0_DERET ? "deret" :
604 d_fn == `C0_WAIT ? "wait" :
605 "???");
606 `endif
607 end else begin
608 `ifdef SIMULATE_MAIN
609 if (d_rs[2])
610 $display("MTCP0 r%d <- %x", d_rd, d_op2_val);
611 else
612 $display("MFCP0 r%d", d_rd);
614 if (d_fn != 0) $display("d_fn == %x", d_fn);
615 `endif
616 if (d_rs[2]) begin
617 x_wbr <= 0; // XXX BUG?
618 // cp0regs[i.r.rd] = t;
619 case (d_rd)
620 `CP0_STATUS:
621 begin
622 cp0_status <= d_op2_val;
623 $display("STATUS <= %x", d_op2_val);
625 `CP0_CAUSE:
626 begin
627 cp0_cause <= d_op2_val;
628 $display("CAUSE <= %x", d_op2_val);
630 `CP0_EPC:
631 begin
632 cp0_epc <= d_op2_val;
633 $display("EPC <= %x", d_op2_val);
635 `CP0_ERROREPC:
636 begin
637 cp0_errorepc <= d_op2_val;
638 $display("ERROREPC <= %x", d_op2_val);
641 cp0_status.raw = t;
642 cp0_status.res1 = cp0_status.res2 = 0;
643 printf("Operating mode %s\n",
644 cp0_status.ksu == 0 ? "kernel" :
645 cp0_status.ksu == 1 ? "supervisor" :
646 cp0_status.ksu == 2 ? "user" : "??");
647 printf("Exception level %d\n", cp0_status.exl);
648 printf("Error level %d\n", cp0_status.erl);
649 printf("Interrupts %sabled\n", cp0_status.ie ? "en" : "dis");
650 break;
652 default:
653 $display("Setting an unknown CP0 register %d", d_rd);
654 //case CP0_CAUSE:
655 endcase
659 `endif
660 endcase
662 if (d_load_use_hazard)
663 perf_load_use_hazard <= perf_load_use_hazard + 1;
665 endmodule