CORE: Restart load-use hazards from stage DE
[yari.git] / rtl / yari-core / yari.v
blob78c84948253a34a4f9af8619b273b18b0814f47c
1 // -----------------------------------------------------------------------
2 //
3 // Copyright 2008 Tommy Thorn - All Rights Reserved
4 //
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 // Bostom MA 02111-1307, USA; either version 2 of the License, or
9 // (at your option) any later version; incorporated herein by reference.
11 // -----------------------------------------------------------------------
13 `timescale 1ns/10ps
14 `include "asm.v"
15 `include "../soclib/pipeconnect.h"
18 `ifdef SIMULATE_MAIN
19 /* Conditional compilation removes a lot of annoying warnings during synthesis. */
20 module stallcheck(clock, stall, a);
21 parameter which = "?";
22 parameter id = -1;
23 parameter w = 1;
25 input clock;
26 input stall;
27 input [w-1:0] a;
29 reg stall_;
30 reg [w-1:0] a_;
32 always @(posedge clock) begin
33 {a_,stall_} <= {a,stall};
34 if (stall_ && a != a_)
35 $display("** %05d STALLCHECKER violation %d: was %x != now %x (%d)",
36 $time, id, a_, a, stall_);
37 end
38 endmodule
39 `endif
41 module yari(input wire clock // K5 PLL1 input clock (50 MHz)
42 ,input wire rst
44 // Memory access
45 ,input mem_waitrequest
46 ,output [1:0] mem_id
47 ,output [29:0] mem_address
48 ,output mem_read
49 ,output mem_write
50 ,output [31:0] mem_writedata
51 ,output [3:0] mem_writedatamask
52 ,input [31:0] mem_readdata
53 ,input [1:0] mem_readdataid
55 ,output wire `REQ peripherals_req
56 ,input wire `RES peripherals_res
59 parameter debug = 1;
61 wire i1_valid, i2_valid;
62 wire [31:0] i1_pc, i2_pc;
64 wire i_valid;
65 wire [31:0] i_instr;
66 wire [31:0] i_pc;
67 wire [31:0] i_npc;
69 wire imem_waitrequest;
70 wire [29:0] imem_address;
71 wire imem_read;
72 wire [31:0] imem_readdata;
73 wire imem_readdatavalid;
75 wire d_valid;
76 wire [31:0] d_instr;
77 wire [31:0] d_pc;
78 wire [31:0] d_npc;
79 wire [ 5:0] d_opcode;
80 wire [ 5:0] d_fn;
81 wire [ 4:0] d_rd;
82 wire [ 5:0] d_rs;
83 wire [ 5:0] d_rt;
84 wire [ 4:0] d_sa;
85 wire [31:0] d_target;
86 wire [ 5:0] d_wbr;
87 wire d_has_delay_slot;
88 wire [31:0] d_op1_val;
89 wire [31:0] d_op2_val;
90 wire [31:0] d_rt_val;
91 wire [31:0] d_simm;
93 wire d_restart;
94 wire [31:0] d_restart_pc;
95 wire d_flush_X;
96 wire d_load_use_hazard;
98 wire x_valid;
99 wire [31:0] x_instr;
100 wire x_is_delay_slot;
101 wire [31:0] x_pc;
102 wire [ 5:0] x_opcode;
103 wire [31:0] x_op1_val;
104 wire [ 5:0] x_rt;
105 wire [31:0] x_rt_val;
106 wire [ 5:0] x_wbr;
107 wire [31:0] x_res;
109 wire x_synci;
110 wire [31:0] x_synci_a;
112 wire x_restart;
113 wire [31:0] x_restart_pc;
114 wire x_flush_D;
116 wire m_valid;
117 wire [31:0] m_instr;
118 wire [31:0] m_pc;
119 wire [ 5:0] m_wbr;
120 wire [31:0] m_res;
122 wire m_restart;
123 wire [31:0] m_restart_pc;
125 wire dmem_waitrequest;
126 wire [29:0] dmem_address;
127 wire dmem_read;
128 wire dmem_write;
129 wire [31:0] dmem_writedata;
130 wire [ 3:0] dmem_writedatamask;
131 wire [31:0] dmem_readdata;
132 wire dmem_readdatavalid;
134 wire [31:0] perf_branch_hazard;
135 wire [31:0] perf_dcache_misses;
136 wire [31:0] perf_delay_slot_bubble;
137 wire [31:0] perf_div_hazard;
138 wire [31:0] perf_icache_misses;
139 wire [31:0] perf_io_load_busy;
140 wire [31:0] perf_io_store_busy;
141 wire [31:0] perf_load_hit_store_hazard;
142 wire [31:0] perf_load_use_hazard;
143 wire [31:0] perf_mult_hazard;
144 wire [47:0] perf_retired_inst;
145 wire [31:0] perf_sb_full;
148 reg [9:0] initialized = 0;
149 always @(posedge clock) initialized <= {initialized[8:0],~rst};
151 // XXX It would be nice to make this a bit more general and merge
152 // it with the interrupt mechanism (still to come)
153 wire boot = initialized[7] & ~initialized[8];
155 wire restart = d_restart | x_restart | m_restart;
156 wire [31:0] restart_pc = (d_restart ? d_restart_pc :
157 m_restart ? m_restart_pc :
158 /*********/ x_restart_pc);
159 wire flush_I = restart;
160 wire flush_D = m_restart | x_flush_D;
161 wire flush_X = m_restart | d_flush_X;
163 stage_I stI(.clock(clock)
164 ,.kill(~initialized[8])
165 ,.restart(restart)
166 ,.restart_pc(restart_pc)
167 ,.synci(x_synci)
168 ,.synci_a(x_synci_a)
170 ,.imem_waitrequest(imem_waitrequest)
171 ,.imem_address(imem_address)
172 ,.imem_read(imem_read)
173 ,.imem_readdata(imem_readdata)
174 ,.imem_readdatavalid(imem_readdatavalid)
176 // Outputs
177 ,.i1_valid(i1_valid)
178 ,.i1_pc(i1_pc)
179 ,.i2_valid(i2_valid)
180 ,.i2_pc(i2_pc)
182 ,.i_valid(i_valid)
183 ,.i_instr(i_instr)
184 ,.i_pc(i_pc)
185 ,.i_npc(i_npc)
186 ,.perf_icache_misses(perf_icache_misses));
188 stage_D stD(.clock(clock),
189 .i_valid(i_valid & ~flush_I),
190 .i_instr(i_instr),
191 .i_pc(i_pc),
192 .i_npc(i_npc),
194 .x_valid(x_valid & ~flush_X),
195 .x_wbr(x_wbr),
196 .x_res(x_res),
198 .m_valid(m_valid),
199 .m_pc(m_pc), // XXX for debugging only
200 .m_wbr(m_wbr),
201 .m_res(m_res),
203 // Outputs, mostly derived from d_instr
204 .d_valid(d_valid),
205 .d_instr(d_instr),
206 .d_pc(d_pc),
207 .d_npc(d_npc),
208 .d_opcode(d_opcode),
209 .d_fn(d_fn),
210 .d_rd(d_rd),
211 .d_rs(d_rs),
212 .d_rt(d_rt),
213 .d_sa(d_sa),
214 .d_target(d_target),
215 .d_wbr(d_wbr),
216 .d_has_delay_slot(d_has_delay_slot),
218 // Register lookups
219 .d_op1_val(d_op1_val),
220 .d_op2_val(d_op2_val),
221 .d_rt_val(d_rt_val),
222 .d_simm(d_simm),
223 .d_restart(d_restart),
224 .d_restart_pc(d_restart_pc),
225 .d_flush_X(d_flush_X),
226 .d_load_use_hazard(d_load_use_hazard),
228 .flush_D(flush_D),
229 .perf_delay_slot_bubble(perf_delay_slot_bubble),
230 .perf_retired_inst(perf_retired_inst)
233 stage_X stX(.clock(clock),
235 .restart(restart),
236 .restart_pc(restart_pc),
238 .d_valid(d_valid & ~flush_D),
239 .d_instr(d_instr),
240 .d_pc(d_pc),
241 .d_npc(d_npc),
242 .d_opcode(d_opcode),
243 .d_fn(d_fn),
244 .d_rd(d_rd),
245 .d_rs(d_rs),
246 .d_rt(d_rt),
247 .d_sa(d_sa),
248 .d_target(d_target),
249 .d_wbr(d_wbr),
250 .d_has_delay_slot(d_has_delay_slot),
252 .d_op1_val(d_op1_val),
253 .d_op2_val(d_op2_val),
254 .d_rt_val(d_rt_val),
255 .d_simm(d_simm),
256 .d_restart(d_restart),
257 .d_restart_pc(d_restart_pc),
258 .d_load_use_hazard(d_load_use_hazard),
260 .m_valid(m_valid),
261 .m_wbr(m_wbr),
263 // Results from this stage
264 .x_valid(x_valid),
265 .x_instr(x_instr), // XXX for debugging only
266 .x_is_delay_slot(x_is_delay_slot),
267 .x_pc(x_pc),
268 .x_opcode(x_opcode),
269 .x_op1_val(x_op1_val),
270 .x_rt(x_rt),
271 .x_rt_val(x_rt_val),
272 .x_wbr(x_wbr),
273 .x_res(x_res),
275 .x_synci(x_synci),
276 .x_synci_a(x_synci_a),
278 .x_restart(x_restart),
279 .x_restart_pc(x_restart_pc),
280 .x_flush_D(x_flush_D),
282 .perf_branch_hazard(perf_branch_hazard),
283 .perf_dcache_misses(perf_dcache_misses),
284 .perf_delay_slot_bubble(perf_delay_slot_bubble),
285 .perf_div_hazard(perf_div_hazard),
286 .perf_icache_misses(perf_icache_misses),
287 .perf_io_load_busy(perf_io_load_busy),
288 .perf_io_store_busy(perf_io_store_busy),
289 .perf_load_hit_store_hazard(perf_load_hit_store_hazard),
290 .perf_load_use_hazard(perf_load_use_hazard),
291 .perf_mult_hazard(perf_mult_hazard),
292 .perf_retired_inst(perf_retired_inst),
293 .perf_sb_full(perf_sb_full)
296 stage_M stM(.clock(clock),
298 .boot(boot),
299 .boot_pc('hBFC00000),
301 .d_simm(d_simm),
302 .d_op1_val(d_op1_val),
304 .x_valid(x_valid & ~flush_X),
305 .x_instr(x_instr),
306 .x_is_delay_slot(x_is_delay_slot),
307 .x_pc(x_pc),
308 .x_opcode(x_opcode),
309 .x_op1_val(x_op1_val),
310 .x_rt(x_rt),
311 .x_rt_val(x_rt_val),
312 .x_wbr(x_wbr),
313 .x_res(x_res)
315 ,.dmem_waitrequest(dmem_waitrequest)
316 ,.dmem_address(dmem_address)
317 ,.dmem_read(dmem_read)
318 ,.dmem_write(dmem_write)
319 ,.dmem_writedata(dmem_writedata)
320 ,.dmem_writedatamask(dmem_writedatamask)
321 ,.dmem_readdata(dmem_readdata)
322 ,.dmem_readdatavalid(dmem_readdatavalid)
325 .peripherals_req(peripherals_req),
326 .peripherals_res(peripherals_res),
328 .m_valid(m_valid),
329 .m_instr(m_instr),
330 .m_pc(m_pc),
331 .m_wbr(m_wbr),
332 .m_res(m_res),
334 .m_restart(m_restart),
335 .m_restart_pc(m_restart_pc),
337 .perf_dcache_misses(perf_dcache_misses),
338 .perf_io_load_busy(perf_io_load_busy),
339 .perf_io_store_busy(perf_io_store_busy),
340 .perf_load_hit_store_hazard(perf_load_hit_store_hazard),
341 .perf_sb_full(perf_sb_full)
346 * Memory arbitration. "Hopefully so simple that I can do it all
347 * right here".
348 * Static priority - bad idea in general, but ok here.
349 * Key decision: dmem port gets priority. Why? Imagine it was
350 * the other way around and both miss in their caches. IF will
351 * keep emitting bubbles while filling, but ME will repeatedly
352 * restart the load and flush the pipe. At least with ME filling
353 * first, we get to execute the few instructions already in the
354 * pipe while waiting for IF. One of them could be a MUL!
356 parameter ID_DC = 2'd1;
357 parameter ID_IC = 2'd2;
358 wire dmem_strobe = dmem_read | dmem_write;
360 assign mem_id = dmem_strobe ? ID_DC : ID_IC;
361 assign mem_address = dmem_strobe ? dmem_address : imem_address;
362 assign mem_read = dmem_strobe ? dmem_read : imem_read;
363 assign mem_write = dmem_write;
364 assign mem_writedata = dmem_writedata;
365 assign mem_writedatamask = dmem_writedatamask;
367 assign dmem_waitrequest = mem_waitrequest;
368 assign dmem_readdata = mem_readdata;
369 assign dmem_readdatavalid = mem_readdataid == ID_DC;
371 assign imem_waitrequest = mem_waitrequest | dmem_strobe;
372 assign imem_readdata = mem_readdata;
373 assign imem_readdatavalid = mem_readdataid == ID_IC;
375 `ifdef SIMULATE_MAIN
376 always @(posedge clock) if (debug) begin
377 if (restart) begin
378 $display("%05d restart pipe at %x", $time, restart_pc);
379 if (boot)
380 $display("%05d boot vector", $time);
381 else if (m_restart)
382 $display("%05d from stage ME", $time);
383 else if (x_restart)
384 $display("%05d from stage EX", $time);
387 $display(
388 "%05dz %x %x/0 I %8x %8x D %8x:%8x X %8x:%8x:%8x>%2x M %8x:%8x>%2x W %8x:%8x>%2x",
390 $time,
393 {flush_X,flush_D,flush_I},
395 // IF
396 i1_valid ? i1_pc : 'hZ,
397 i2_valid ? i2_pc : 'hZ,
399 // DE
400 i_pc, i_valid & ~flush_I ? i_instr : 'hZ,
402 // EX
403 d_pc, d_valid & ~flush_D ? d_op1_val : 'hZ,
404 d_valid & ~flush_D ? d_op2_val : 'hZ,
405 d_valid & ~flush_D ? d_wbr : 8'hZ,
407 // ME
408 x_pc, x_valid & ~flush_X ? x_res : 'hZ,
409 x_valid & ~flush_X ? x_wbr : 8'hZ,
411 // WB
412 m_pc, m_valid ? m_res : 'hZ,
413 m_valid ? m_wbr : 8'hZ);
415 `endif
416 endmodule