CORE: suppress some warnings
[yari.git] / shared / rtl / yari-core / stage_I.v
blob857ef3e3bd74704b9ffd5e5b88adf5e31c2541da
1 // -----------------------------------------------------------------------
2 //
3 // Copyright 2004,2007,2008 Tommy Thorn - All Rights Reserved
4 //
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 // Bostom MA 02111-1307, USA; either version 2 of the License, or
9 // (at your option) any later version; incorporated herein by reference.
11 // -----------------------------------------------------------------------
13 // TODO: For extra credits, make the stage start fetching from the
14 // missed instruction and emit instructions as soon as they arrive,
15 // rather than waiting for the whole line to be filled. Watch out for
16 // issues as the filling line wraps around and watch out for misses
17 // that happens while already filling. The value of this goes down for
18 // shorter cache lines and longer memory latencies.
22 * The instruction fetch/cache stage generates a sequential stream of
23 * instructions until kill is asserted, leaving the pipe without valid
24 * instructions until the next restart. Restart implies kill.
26 * When a pc misses in the cache, the stage emits invalid instructions
27 * until the missed line is filled. While this happens, both kill and
28 * restart are respected (causing the missed instruction not to be
29 * emitted).
34 Calculates the fetch address
35 Looks up tags and instructions and does a late select, leaving
36 the result in {i_valid,i_instr}
39 `timescale 1ns/10ps
40 `include "../soclib/pipeconnect.h"
42 module stage_I(input wire clock
43 ,input wire kill // Empty the pipeline
44 // until next restart
45 ,input wire restart // Target is next PC.
46 ,input wire [31:0] restart_pc
48 ,input wire synci
49 ,input wire [31:0] synci_a
51 // Memory access
52 ,input imem_waitrequest
53 ,output reg [29:0] imem_address
54 ,output reg imem_read = 0
55 ,input [31:0] imem_readdata
56 ,input imem_readdatavalid
58 // Outputs
59 ,output reg i1_valid = 0 // For debugging only
60 ,output wire [31:0] i1_pc // == i_npc
62 ,output reg i2_valid = 0
63 ,output wire [31:0] i2_pc // == i_pc
65 ,output reg i_valid = 0 // 0 => ignore i_instr.
66 ,output reg [31:0] i_pc = 0 // The address of the instr.
67 ,output reg [31:0] i_npc = 0 // The next instruction
68 ,output reg [31:0] i_instr
70 ,output reg [31:0] perf_icache_misses = 0
73 parameter debug = 0;
75 `include "config.h"
78 * The I$ is divided into n sets of k lines of m bytes (m/4 32-bit words).
79 * 16 KiB = 4 KiW. Each set is 1024 W = 32 lines
81 * We split a physical address into
83 * | check | cache line index | byte index |
85 * for example
87 * | 20 | 5 | 7 |
89 * The cache line index bits + byte index = 12 < log2(cache size) = 14
90 * reflects the fact the more than one cache line can map to the same
91 * physical address (in this example 14-12=4 way set associative).
94 /* Derived meassures. */
96 /* Size in log2 bytes of a line. */
97 parameter LINE_BITS = IC_WORD_INDEX_BITS + 2; // 4
98 /* Size in log2 bytes of a set. */
99 parameter SET_BITS = IC_LINE_INDEX_BITS + LINE_BITS; // 11
100 /* Size in log2 bytes of the cache. */
101 parameter CACHE_BITS = IC_SET_INDEX_BITS + SET_BITS; // 14
102 parameter TAG_BITS = CACHEABLE_BITS - SET_BITS; // 20
104 // Divide instruction addresses into segments
105 `define CHK [CACHEABLE_BITS-1 :SET_BITS]
106 `define CSI [SET_BITS-1 :LINE_BITS]
107 `define WDX [IC_WORD_INDEX_BITS+1:2]
109 // Stage 1 - generate address.
110 wire [31:0] fetchaddress = restart ? restart_pc : i_npc;
111 assign i1_pc = fetchaddress;
113 // Stage 1 - look up tags and instructions.
114 assign i2_pc = i_pc;
115 wire [TAG_BITS-1:0] tag0, tag1, tag2, tag3;
116 wire [31:0] ic_q0, ic_q1, ic_q2, ic_q3;
117 wire [(1 << IC_SET_INDEX_BITS)-1:0]
118 hits_2 = {tag3 == i_pc`CHK, tag2 == i_pc`CHK,
119 tag1 == i_pc`CHK, tag0 == i_pc`CHK};
121 // Cache filling stage machinery.
122 // set_2 is constructed such that it will be fill_set during
123 // filling, and the matching tag when there is a hit (which implies
124 // that tag update must be done no sooner than the last word
125 // written to the cache line)
127 `ifdef SIMULATE_MAIN
128 reg [IC_SET_INDEX_BITS-1:0] set_2;
129 always @* casex (hits_2)
130 'b0001: set_2 = 0;
131 'b0010: set_2 = 1;
132 'b0100: set_2 = 2;
133 'b1000: set_2 = 3;
134 default:set_2 = 2'bxx;
135 endcase
136 `endif
138 always @* i_instr = ((hits_2[0] ? ic_q0 : 0) |
139 (hits_2[1] ? ic_q1 : 0) |
140 (hits_2[2] ? ic_q2 : 0) |
141 (hits_2[3] ? ic_q3 : 0));
143 /* Yes this is one-hot. I don't know why Quartus think I need to be
144 reminded. */
145 always @* casex (hits_2)
146 'b0001: i_valid = i2_valid;
147 'b0010: i_valid = i2_valid;
148 'b0100: i_valid = i2_valid;
149 'b1000: i_valid = i2_valid;
150 default:i_valid = 0;
151 endcase
153 parameter S_RUNNING = 0;
154 parameter S_FILLING = 1;
155 parameter S_PRE_RUNNING = 2;
156 parameter S_LOOKUP = 3;
157 parameter S_INVALIDATE = 4;
159 reg [IC_SET_INDEX_BITS-1:0] fill_set = 0;
160 reg [31:0] state = S_RUNNING;
161 reg [IC_WORD_INDEX_BITS-1:0] fill_wi;
164 reg [IC_LINE_INDEX_BITS-1:0] tag_wraddress;
165 reg [TAG_BITS-1:0] tag_write_data;
166 reg [3:0] tag_write_ena = 0;
168 /* It is sad that you have to pull tricks like these to get
169 Verilog/Quartus to use parametrized sizing without stupid
170 warnings, but you can't write TAG_BITS'(~0) so this is the best
171 workaround I've found. */
172 wire [TAG_BITS-1:0] tag_const0 = 1'd0;
173 wire [TAG_BITS-1:0] tag_illegal = ~tag_const0;
175 simpledpram #(TAG_BITS,IC_LINE_INDEX_BITS,"icache_tag0")
176 tag0_ram(.clock(clock), .rdaddress(fetchaddress`CSI), .rddata(tag0),
177 .wraddress(tag_wraddress), .wrdata(tag_write_data),
178 .wren(tag_write_ena[0]));
180 simpledpram #(TAG_BITS,IC_LINE_INDEX_BITS,"icache_tag1")
181 tag1_ram(.clock(clock), .rdaddress(fetchaddress`CSI), .rddata(tag1),
182 .wraddress(tag_wraddress), .wrdata(tag_write_data),
183 .wren(tag_write_ena[1]));
185 simpledpram #(TAG_BITS,IC_LINE_INDEX_BITS,"icache_tag2")
186 tag2_ram(.clock(clock), .rdaddress(fetchaddress`CSI), .rddata(tag2),
187 .wraddress(tag_wraddress), .wrdata(tag_write_data),
188 .wren(tag_write_ena[2]));
190 simpledpram #(TAG_BITS,IC_LINE_INDEX_BITS,"icache_tag3")
191 tag3_ram(.clock(clock), .rdaddress(fetchaddress`CSI), .rddata(tag3),
192 .wraddress(tag_wraddress), .wrdata(tag_write_data),
193 .wren(tag_write_ena[3]));
195 simpledpram #(32,CACHE_BITS - 4,"icache_ram0")
196 icache_ram0(.clock(clock),
197 .rdaddress({fetchaddress`CSI,fetchaddress`WDX}), .rddata(ic_q0),
198 .wraddress({i_pc`CSI,fill_wi}),
199 .wrdata(imem_readdata),
200 .wren(fill_set == 0 && state == S_FILLING && imem_readdatavalid));
202 simpledpram #(32,CACHE_BITS - 4,"icache_ram1")
203 icache_ram1(.clock(clock),
204 .rdaddress({fetchaddress`CSI,fetchaddress`WDX}), .rddata(ic_q1),
205 .wraddress({i_pc`CSI,fill_wi}),
206 .wrdata(imem_readdata),
207 .wren(fill_set == 1 && state == S_FILLING && imem_readdatavalid));
209 simpledpram #(32,CACHE_BITS - 4,"icache_ram2")
210 icache_ram2(.clock(clock),
211 .rdaddress({fetchaddress`CSI,fetchaddress`WDX}), .rddata(ic_q2),
212 .wraddress({i_pc`CSI,fill_wi}),
213 .wrdata(imem_readdata),
214 .wren(fill_set == 2 && state == S_FILLING && imem_readdatavalid));
216 simpledpram #(32,CACHE_BITS - 4,"icache_ram3")
217 icache_ram3(.clock(clock),
218 .rdaddress({fetchaddress`CSI,fetchaddress`WDX}), .rddata(ic_q3),
219 .wraddress({i_pc`CSI,fill_wi}),
220 .wrdata(imem_readdata),
221 .wren(fill_set == 3 && state == S_FILLING && imem_readdatavalid));
223 reg [32:0] lfsr = 0;
225 reg pending_synci = 0;
226 reg [31:0] pending_synci_a = 0;
227 reg [31:0] pending_synci_pc = 0;
229 always @(posedge clock) begin
230 lfsr <= {lfsr[31:0], ~lfsr[32] ^ lfsr[19]};
231 tag_write_ena <= 0;
232 if (synci) begin
233 pending_synci <= 1;
234 pending_synci_a <= synci_a;
235 pending_synci_pc <= restart_pc; // restart is coincident with synci
238 if (~imem_waitrequest & imem_read) begin
239 $display("%05d I$ done issueing", $time);
240 imem_read <= 0;
243 case (state)
244 S_RUNNING:
245 if (synci | pending_synci) begin
246 $display("%05d I$ flushing line @ %x (index %d)", $time,
247 synci ? synci_a : pending_synci_a,
248 synci ? synci_a`CSI : pending_synci_a`CSI);
249 i_npc <= synci ? synci_a : pending_synci_a;
250 i1_valid <= 0;
251 i2_valid <= 0;
253 state <= S_LOOKUP;
254 end else if (|hits_2 | ~i2_valid) begin
255 if (debug)
256 $display("%05d I$ business as usual i_npc = %x", $time, i_npc);
259 * This is the normal flow (we don't care about invalid misses)
260 * Advance the pc; look up tags, word index, find hitting
261 * set; look up cache word.
263 i_pc <= fetchaddress;
264 i_npc <= fetchaddress + 4;
265 i2_valid <= i1_valid | restart;
266 if (restart)
267 i1_valid <= 1;
269 if (debug & restart)
270 $display("%05d I$ DEBUG1 restart from %x", $time, restart_pc);
271 end else begin
272 // We missed in the cache, start the filling machine
273 $display("%05d I$ %8x missed, starting to fill", $time, i_pc);
274 perf_icache_misses <= perf_icache_misses + 1;
275 i_npc <= restart ? restart_pc : i_pc;
276 i2_valid <= 0;
278 fill_wi <= 0;
279 imem_address <= {i_pc[CACHEABLE_BITS-1:LINE_BITS],
280 {(LINE_BITS - 2){1'd0}}};
281 imem_read <= 1;
282 $display("%05d I$ begin fetching from %8x", $time,
283 {i_pc[CACHEABLE_BITS-1:LINE_BITS],{(LINE_BITS){1'd0}}});
285 if (debug & restart)
286 $display("%05d I$ DEBUG2 restart from %x", $time, restart_pc);
287 state <= S_FILLING;
290 S_LOOKUP: begin
291 i_pc <= i_npc;
292 state <= S_INVALIDATE;
295 S_INVALIDATE: begin
296 `ifdef SIMULATE_MAIN
297 if (|hits_2) begin
298 $display("%05d I$ flushing %x (= %x TAG) found a stale line from set %d (hits %x), index %d tags %x %x %x %x",
299 $time,
300 fetchaddress, fetchaddress`CHK, set_2, hits_2, fetchaddress`CSI,
301 tag0, tag1, tag2, tag3);
302 end else
303 $display("%05d I$ flushing %x (= %x TAG) found nothing (hits %x), index %d tags %x %x %x %x",
304 $time,
305 fetchaddress, fetchaddress`CHK, hits_2, fetchaddress`CSI,
306 tag0, tag1, tag2, tag3);
307 `endif
308 tag_wraddress <= pending_synci_a`CSI;
309 tag_write_data <= tag_illegal;
310 tag_write_ena <= hits_2;
311 // XXX We must wait for SB to drain! It happens to work as
312 // is right now as the SB gets priority but that's actually a
313 // bug.
314 i_npc <= pending_synci_pc;
315 i1_valid <= 1;
316 pending_synci <= 0;
317 state <= S_PRE_RUNNING; // To give a cycle for the tags to be written
320 S_FILLING: begin
321 if (restart) begin
322 if (debug)
323 $display("%05d I$ DEBUG3 restart from %x", $time, restart_pc);
324 i_npc <= restart_pc;
327 if (imem_readdatavalid) begin
328 $display("%05d I$ {%1d,%1d,%1d} <- %8x", $time,
329 fill_set, i_pc`CSI, fill_wi, imem_readdata);
331 fill_wi <= fill_wi + 1'd1;
333 if (&fill_wi) begin
334 $display("%05d IF tag%d[%d] <- %x", $time,
335 fill_set, i_pc`CSI, i_pc`CHK);
336 $display("%05d IF cache filled, back to running", $time);
338 tag_wraddress <= i_pc`CSI;
339 tag_write_data <= i_pc`CHK;
340 tag_write_ena <= 4'd1 << fill_set;
341 fill_set <= lfsr[1:0];
343 state <= S_PRE_RUNNING;
345 end else
346 if (debug)
347 $display("%05d I$ waiting for memory", $time);
350 S_PRE_RUNNING: begin
351 // This lame pause is to keep the tags interface simple (for now)
352 if (restart) begin
353 if (debug)
354 $display("%05d I$ DEBUG3 restart from %x", $time, restart_pc);
355 i_npc <= restart_pc;
358 state <= S_RUNNING;
360 endcase
362 // Keep the kill handling down here to take priority
363 if (!synci & !pending_synci & kill & ~restart) begin
364 if (debug)
365 $display("%05d I$ killed", $time);
366 i1_valid <= 0;
367 i2_valid <= 0;
369 `ifdef SIMULATE_MAIN
370 // Keep all debugging output down here to keep the logic readable
371 if (debug) begin
372 if (state == S_RUNNING)
373 $display(
374 "%05d I$ running: PC %x (valid %d) <%x;%x;%x> -- PC %x (valid %d) HITS %x -- PC %x INST %x VALID %d | %d: %x %x %x %x",
375 $time,
376 fetchaddress, i1_valid, set_2, fetchaddress`CSI, i_pc`WDX,
377 i_pc, i_valid, hits_2,
378 i_pc, i_instr, i_valid,
379 set_2, ic_q0, ic_q1, ic_q2, ic_q3);
381 `endif
383 endmodule