1 // -----------------------------------------------------------------------
3 // Copyright 2004,2007,2008 Tommy Thorn - All Rights Reserved
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 // Bostom MA 02111-1307, USA; either version 2 of the License, or
9 // (at your option) any later version; incorporated herein by reference.
11 // -----------------------------------------------------------------------
13 // TODO: For extra credits, make the stage start fetching from the
14 // missed instruction and emit instructions as soon as they arrive,
15 // rather than waiting for the whole line to be filled. Watch out for
16 // issues as the filling line wraps around and watch out for misses
17 // that happens while already filling. The value of this goes down for
18 // shorter cache lines and longer memory latencies.
22 * The instruction fetch/cache stage generates a sequential stream of
23 * instructions until kill is asserted, leaving the pipe without valid
24 * instructions until the next restart. Restart implies kill.
26 * When a pc misses in the cache, the stage emits invalid instructions
27 * until the missed line is filled. While this happens, both kill and
28 * restart are respected (causing the missed instruction not to be
34 Calculates the fetch address
35 Looks up tags and instructions and does a late select, leaving
36 the result in {i_valid,i_instr}
40 `include "../soclib/pipeconnect.h"
42 module stage_I(input wire clock
43 ,input wire kill
// Empty the pipeline
45 ,input wire restart
// Target is next PC.
46 ,input wire [31:0] restart_pc
49 ,input wire [31:0] synci_a
52 ,input imem_waitrequest
53 ,output reg [29:0] imem_address
54 ,output reg imem_read
= 0
55 ,input [31:0] imem_readdata
56 ,input imem_readdatavalid
59 ,output reg i1_valid
= 0 // For debugging only
60 ,output wire [31:0] i1_pc
// == i_npc
62 ,output reg i2_valid
= 0
63 ,output wire [31:0] i2_pc
// == i_pc
65 ,output reg i_valid
= 0 // 0 => ignore i_instr.
66 ,output reg [31:0] i_pc
= 0 // The address of the instr.
67 ,output reg [31:0] i_npc
= 0 // The next instruction
68 ,output reg [31:0] i_instr
70 ,output reg [31:0] perf_icache_misses
= 0
78 * The I$ is divided into n sets of k lines of m bytes (m/4 32-bit words).
79 * 16 KiB = 4 KiW. Each set is 1024 W = 32 lines
81 * We split a physical address into
83 * | check | cache line index | byte index |
89 * The cache line index bits + byte index = 12 < log2(cache size) = 14
90 * reflects the fact the more than one cache line can map to the same
91 * physical address (in this example 14-12=4 way set associative).
94 /* Derived meassures. */
96 /* Size in log2 bytes of a line. */
97 parameter LINE_BITS
= IC_WORD_INDEX_BITS
+ 2; // 4
98 /* Size in log2 bytes of a set. */
99 parameter SET_BITS
= IC_LINE_INDEX_BITS
+ LINE_BITS
; // 11
100 /* Size in log2 bytes of the cache. */
101 parameter CACHE_BITS
= IC_SET_INDEX_BITS
+ SET_BITS
; // 14
102 parameter TAG_BITS
= CACHEABLE_BITS
- SET_BITS
; // 20
104 // Divide instruction addresses into segments
105 `define CHK [CACHEABLE_BITS-1 :SET_BITS]
106 `define CSI [SET_BITS-1 :LINE_BITS]
107 `define WDX [IC_WORD_INDEX_BITS+1:2]
109 // Stage 1 - generate address.
110 wire [31:0] fetchaddress
= restart ? restart_pc
: i_npc
;
111 assign i1_pc
= fetchaddress
;
113 // Stage 1 - look up tags and instructions.
115 wire [TAG_BITS
-1:0] tag0
, tag1
, tag2
, tag3
;
116 wire [31:0] ic_q0
, ic_q1
, ic_q2
, ic_q3
;
117 wire [(1 << IC_SET_INDEX_BITS
)-1:0]
118 hits_2
= {tag3
== i_pc
`CHK, tag2 == i_pc`CHK,
119 tag1
== i_pc
`CHK, tag0 == i_pc`CHK};
121 // Cache filling stage machinery.
122 // set_2 is constructed such that it will be fill_set during
123 // filling, and the matching tag when there is a hit (which implies
124 // that tag update must be done no sooner than the last word
125 // written to the cache line)
128 reg [IC_SET_INDEX_BITS
-1:0] set_2
;
129 always @* casex (hits_2
)
134 default:set_2
= 2'bxx
;
138 always @* i_instr
= ((hits_2
[0] ? ic_q0
: 0) |
139 (hits_2
[1] ? ic_q1
: 0) |
140 (hits_2
[2] ? ic_q2
: 0) |
141 (hits_2
[3] ? ic_q3
: 0));
143 /* Yes this is one-hot. I don't know why Quartus think I need to be
145 always @* casex (hits_2
)
146 'b0001: i_valid
= i2_valid
;
147 'b0010: i_valid
= i2_valid
;
148 'b0100: i_valid
= i2_valid
;
149 'b1000: i_valid
= i2_valid
;
153 parameter S_RUNNING
= 0;
154 parameter S_FILLING
= 1;
155 parameter S_PRE_RUNNING
= 2;
156 parameter S_LOOKUP
= 3;
157 parameter S_INVALIDATE
= 4;
159 reg [IC_SET_INDEX_BITS
-1:0] fill_set
= 0;
160 reg [31:0] state
= S_RUNNING
;
161 reg [IC_WORD_INDEX_BITS
-1:0] fill_wi
;
164 reg [IC_LINE_INDEX_BITS
-1:0] tag_wraddress
;
165 reg [TAG_BITS
-1:0] tag_write_data
;
166 reg [3:0] tag_write_ena
= 0;
168 /* It is sad that you have to pull tricks like these to get
169 Verilog/Quartus to use parametrized sizing without stupid
170 warnings, but you can't write TAG_BITS'(~0) so this is the best
171 workaround I've found. */
172 wire [TAG_BITS
-1:0] tag_const0
= 1'd0;
173 wire [TAG_BITS
-1:0] tag_illegal
= ~tag_const0
;
175 simpledpram #
(TAG_BITS
,IC_LINE_INDEX_BITS
,"icache_tag0")
176 tag0_ram(.
clock(clock
), .
rdaddress(fetchaddress
`CSI), .rddata(tag0),
177 .
wraddress(tag_wraddress
), .
wrdata(tag_write_data
),
178 .
wren(tag_write_ena
[0]));
180 simpledpram #
(TAG_BITS
,IC_LINE_INDEX_BITS
,"icache_tag1")
181 tag1_ram(.
clock(clock
), .
rdaddress(fetchaddress
`CSI), .rddata(tag1),
182 .
wraddress(tag_wraddress
), .
wrdata(tag_write_data
),
183 .
wren(tag_write_ena
[1]));
185 simpledpram #
(TAG_BITS
,IC_LINE_INDEX_BITS
,"icache_tag2")
186 tag2_ram(.
clock(clock
), .
rdaddress(fetchaddress
`CSI), .rddata(tag2),
187 .
wraddress(tag_wraddress
), .
wrdata(tag_write_data
),
188 .
wren(tag_write_ena
[2]));
190 simpledpram #
(TAG_BITS
,IC_LINE_INDEX_BITS
,"icache_tag3")
191 tag3_ram(.
clock(clock
), .
rdaddress(fetchaddress
`CSI), .rddata(tag3),
192 .
wraddress(tag_wraddress
), .
wrdata(tag_write_data
),
193 .
wren(tag_write_ena
[3]));
195 simpledpram #
(32,CACHE_BITS
- 4,"icache_ram0")
196 icache_ram0(.
clock(clock
),
197 .
rdaddress({fetchaddress
`CSI,fetchaddress`WDX}), .rddata(ic_q0),
198 .
wraddress({i_pc
`CSI,fill_wi}),
199 .
wrdata(imem_readdata
),
200 .
wren(fill_set
== 0 && state
== S_FILLING
&& imem_readdatavalid
));
202 simpledpram #
(32,CACHE_BITS
- 4,"icache_ram1")
203 icache_ram1(.
clock(clock
),
204 .
rdaddress({fetchaddress
`CSI,fetchaddress`WDX}), .rddata(ic_q1),
205 .
wraddress({i_pc
`CSI,fill_wi}),
206 .
wrdata(imem_readdata
),
207 .
wren(fill_set
== 1 && state
== S_FILLING
&& imem_readdatavalid
));
209 simpledpram #
(32,CACHE_BITS
- 4,"icache_ram2")
210 icache_ram2(.
clock(clock
),
211 .
rdaddress({fetchaddress
`CSI,fetchaddress`WDX}), .rddata(ic_q2),
212 .
wraddress({i_pc
`CSI,fill_wi}),
213 .
wrdata(imem_readdata
),
214 .
wren(fill_set
== 2 && state
== S_FILLING
&& imem_readdatavalid
));
216 simpledpram #
(32,CACHE_BITS
- 4,"icache_ram3")
217 icache_ram3(.
clock(clock
),
218 .
rdaddress({fetchaddress
`CSI,fetchaddress`WDX}), .rddata(ic_q3),
219 .
wraddress({i_pc
`CSI,fill_wi}),
220 .
wrdata(imem_readdata
),
221 .
wren(fill_set
== 3 && state
== S_FILLING
&& imem_readdatavalid
));
225 reg pending_synci
= 0;
226 reg [31:0] pending_synci_a
= 0;
227 reg [31:0] pending_synci_pc
= 0;
229 always @(posedge clock
) begin
230 lfsr
<= {lfsr
[31:0], ~lfsr
[32] ^ lfsr
[19]};
234 pending_synci_a
<= synci_a
;
235 pending_synci_pc
<= restart_pc
; // restart is coincident with synci
238 if (~imem_waitrequest
& imem_read
) begin
239 $display("%05d I$ done issueing", $time);
245 if (synci | pending_synci
) begin
246 $display("%05d I$ flushing line @ %x (index %d)", $time,
247 synci ? synci_a
: pending_synci_a
,
248 synci ? synci_a
`CSI : pending_synci_a`CSI);
249 i_npc
<= synci ? synci_a
: pending_synci_a
;
254 end else if (|hits_2 |
~i2_valid
) begin
256 $display("%05d I$ business as usual i_npc = %x", $time, i_npc
);
259 * This is the normal flow (we don't care about invalid misses)
260 * Advance the pc; look up tags, word index, find hitting
261 * set; look up cache word.
263 i_pc
<= fetchaddress
;
264 i_npc
<= fetchaddress
+ 4;
265 i2_valid
<= i1_valid | restart
;
270 $display("%05d I$ DEBUG1 restart from %x", $time, restart_pc
);
272 // We missed in the cache, start the filling machine
273 $display("%05d I$ %8x missed, starting to fill", $time, i_pc
);
274 perf_icache_misses
<= perf_icache_misses
+ 1;
275 i_npc
<= restart ? restart_pc
: i_pc
;
279 imem_address
<= {i_pc
[CACHEABLE_BITS
-1:LINE_BITS
],
280 {(LINE_BITS
- 2){1'd0}}};
282 $display("%05d I$ begin fetching from %8x", $time,
283 {i_pc
[CACHEABLE_BITS
-1:LINE_BITS
],{(LINE_BITS
){1'd0}}});
286 $display("%05d I$ DEBUG2 restart from %x", $time, restart_pc
);
292 state
<= S_INVALIDATE
;
298 $display("%05d I$ flushing %x (= %x TAG) found a stale line from set %d (hits %x), index %d tags %x %x %x %x",
300 fetchaddress
, fetchaddress
`CHK, set_2, hits_2, fetchaddress`CSI,
301 tag0
, tag1
, tag2
, tag3
);
303 $display("%05d I$ flushing %x (= %x TAG) found nothing (hits %x), index %d tags %x %x %x %x",
305 fetchaddress
, fetchaddress
`CHK, hits_2, fetchaddress`CSI,
306 tag0
, tag1
, tag2
, tag3
);
308 tag_wraddress
<= pending_synci_a
`CSI;
309 tag_write_data
<= tag_illegal
;
310 tag_write_ena
<= hits_2
;
311 // XXX We must wait for SB to drain! It happens to work as
312 // is right now as the SB gets priority but that's actually a
314 i_npc
<= pending_synci_pc
;
317 state
<= S_PRE_RUNNING
; // To give a cycle for the tags to be written
323 $display("%05d I$ DEBUG3 restart from %x", $time, restart_pc
);
327 if (imem_readdatavalid
) begin
328 $display("%05d I$ {%1d,%1d,%1d} <- %8x", $time,
329 fill_set
, i_pc
`CSI, fill_wi, imem_readdata);
331 fill_wi
<= fill_wi
+ 1'd1;
334 $display("%05d IF tag%d[%d] <- %x", $time,
335 fill_set
, i_pc
`CSI, i_pc`CHK);
336 $display("%05d IF cache filled, back to running", $time);
338 tag_wraddress
<= i_pc
`CSI;
339 tag_write_data
<= i_pc
`CHK;
340 tag_write_ena
<= 4'd1 << fill_set
;
341 fill_set
<= lfsr
[1:0];
343 state
<= S_PRE_RUNNING
;
347 $display("%05d I$ waiting for memory", $time);
351 // This lame pause is to keep the tags interface simple (for now)
354 $display("%05d I$ DEBUG3 restart from %x", $time, restart_pc
);
362 // Keep the kill handling down here to take priority
363 if (!synci
& !pending_synci
& kill
& ~restart
) begin
365 $display("%05d I$ killed", $time);
370 // Keep all debugging output down here to keep the logic readable
372 if (state
== S_RUNNING
)
374 "%05d I$ running: PC %x (valid %d) <%x;%x;%x> -- PC %x (valid %d) HITS %x -- PC %x INST %x VALID %d | %d: %x %x %x %x",
376 fetchaddress
, i1_valid
, set_2
, fetchaddress
`CSI, i_pc`WDX,
377 i_pc
, i_valid
, hits_2
,
378 i_pc
, i_instr
, i_valid
,
379 set_2
, ic_q0
, ic_q1
, ic_q2
, ic_q3
);