initial
[fpgammix.git] / rtl / core.v
blob3e44eddf813eee503becbb218ff35f2bf6a381d6
1 /*
2 * This file is part of the fpgammix package
3 * Copyright (C) Tommy Thorn 2006
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
26 * MMIX on an FPGA
28 * This is a first FPGA implementation of Dr. Knuth's MMIX
29 * architecture. The priorities for this implementation is to reach a
30 * useful functional subset as quickly as possible. Performance and
31 * logic usage in particular are *NOT* a priority (but will be for a
32 * later implementation).
34 * Todo:
35 * - TRAP [DONE]
36 * - interrupts [DONE]
37 * - Interval timer exceptions [DONE]
38 * - marginal to local register promotion [Partly, needs stack_store()]
39 * - Clean up core's memory model
40 * - MOR, MXOR
41 * - DIV
42 * - SAVE, UNSAVE
43 * - LDSF, STSF
44 * - Rename I_XX -> XX (maybe)
47 // Warn about undefined nets
48 // `default_nettype none
49 // but Icarus does that with -Wimplicit also
50 module core(input wire clock
51 ,input wire reset
52 ,input wire [63:0] start
54 ,input wire [23:0] core_interrupt
56 ,output reg core_io_access = 0
57 ,output reg core_transfer_request = 0
58 ,output wire [31:0] core_address // Physical
59 ,output reg core_wren = 0
60 ,output reg [31:0] core_wrdata
61 ,output reg [ 3:0] core_wrmask
62 ,input core_wait_request
63 ,input core_read_data_valid
64 ,input [31:0] core_read_data
66 ,output reg core_ifetch // Exported for diagnostics reasons only
69 parameter V = 0; // V for verbose
71 `include "mmix_opcodes.v"
73 /* The main state machine */
74 parameter S_RESET = 0,
76 S_IFETCH1 = 10,
77 S_IFETCH2 = 11,
78 S_IFETCH3 = 12,
80 S_INTERRUPT = 19,
82 S_RF0 = 20,
83 S_RF1 = 21,
84 S_RF2 = 22,
85 S_RF3 = 23,
86 S_RF4 = 24,
88 S_EXECUTE1 = 30,
89 S_EXECUTE2 = 31,
91 S_MEM1 = 40,
92 S_MEM2 = 41,
93 S_MEM3 = 42,
95 S_WB1 = 50,
96 S_WB2 = 51,
98 S_EXTMEM1 = 60,
99 S_EXTMEM2 = 61,
100 S_EXTMEM3 = 62,
101 S_EXTMEM4 = 63,
103 S_POP1 = 70,
104 S_POP2 = 71,
106 S_MULTIPLYING = 75,
107 S_DIVIDING = 76,
108 S_MOR = 77,
109 S_MXOR = 78,
111 S_TRIPPING1 = 80,
113 S_NOT_IMPLEMENTED
114 = 91,
115 S_ILLEGAL_INST
116 = 92,
117 S_PRIVILEGED_INST
118 = 93,
119 S_HALTED = 94;
121 // 57. Bits for arithmetic exceptions (we diverge from Knuth by
122 // also define the log2 equivalent XX_EXC) Note, Knuth left
123 // shifts them by eight to line up with the enable bits
124 parameter INTERVAL_TIMEOUT_EXC = 7, /* the timer register, rI, has reached zero */
125 X_EXC = 8,
126 Z_EXC = 9,
127 U_EXC = 10,
128 O_EXC = 11,
129 I_EXC = 12,
130 W_EXC = 13,
131 V_EXC = 14,
132 D_EXC = 15,
133 H_EXC = 16;
135 parameter RESUME_AGAIN = 0,
136 RESUME_CONT = 1,
137 RESUME_SET = 2;
139 parameter X_BIT = 1 << X_EXC,
140 Z_BIT = 1 << Z_EXC,
141 U_BIT = 1 << U_EXC,
142 O_BIT = 1 << O_EXC,
143 I_BIT = 1 << I_EXC,
144 W_BIT = 1 << W_EXC,
145 V_BIT = 1 << V_EXC,
146 D_BIT = 1 << D_EXC,
147 H_BIT = 1 << H_EXC;
149 parameter POWER_FAILURE = 1<<0, /* try to shut down calmly and quickly */
150 PARITY_ERROR = 1<<1, /* try to save the file systems */
151 NONEXISTENT_MEMORY=1<<2, /* a memory address can't be used */
152 REBOOT_SIGNAL = 1<<4; /* it's time to start over */
154 /* 65. Info flags */
155 parameter Z_is_immed_bit = 'h01,
156 Z_is_source_bit = 'h02,
157 Y_is_immed_bit = 'h04,
158 Y_is_source_bit = 'h08,
159 X_is_source_bit = 'h10,
160 X_is_dest_bit = 'h20,
161 rel_addr_bit = 'h40,
162 push_pop_bit = 'h80;
164 parameter VERSION = 1,
165 SUBVERSION = 0,
166 SUBSUBVERSION = 1,
167 ABSTIME = 1152770398;
169 reg [31:0] state = S_RESET;
171 /* XXX This is Quartus specific. Must find a way to make this more
172 portable to also Xilinx ISE. */
173 (* ram_init_file = "../info_flags.mif" *)
174 reg [ 7:0] info_flags[0:255];
176 reg [63:0] branch_target;
178 /* 55. Knuth keeps all the special registers in the global register
179 file, but there's already way too much contention for that. We
180 keep them as seperate variables. */
181 reg [63:0] rB, rD, rE, rH, rJ, rM, rR, rBB, rC, rN, rO, rS, rT,
182 rTT, rK, rQ, rU, rV, rL, rA, rF, rP, rW, rX, rY,
183 rZ, rWW, rXX, rYY, rZZ;
185 reg [64:0] rI; // We use the MSB overflow trick to save a bit of logic
186 reg [63:0] rQ_lastread;
188 reg [ 7:0] tmpbyte;
189 reg [63:0] t, aux;
190 reg [64:0] diff;
191 reg sign, z_sign;
192 reg [63:0] z_abs;
193 reg [ 7:0] n; // Random bit counter
194 reg truth;
195 reg [63:0] rX_; /* Temporary for RESUME handling only */
197 // 61.
198 reg [63:0] w, x, y, z, b, ma, mb; /* operands */
199 reg [10:0] x_ptr; /* destination */
200 reg [63:0] loc; /* location of the current instruction */
201 reg [63:0] inst_ptr; /* location of the next instruction */
202 reg [31:0] inst; /* the current instruction */
203 reg [31:0] exc; /* exceptions raised by the current instruction */
204 reg [ 1:0] rop; /* ropcode of a resumed instruction */
205 // int round_mode; /* the style of floating point rounding just used */
206 reg [ 1:0] resuming; /* are we resuming an interrupted instruction?
207 * resuming[0] yes/no
208 * resuming[1] trap/trip
210 // bool halted; /* did the program come to a halt? */
211 // bool breakpoint; /* should we pause after the current instruction? */
212 // bool tracing; /* should we trace the current instruction? */
213 // bool stack_tracing; /* should we trace details of the register stack? */
214 // bool interacting; /* are we in interactive mode? */
215 // bool interact_after_break; /* should we go into interactive mode? */
216 // bool tripping; /* are we about to go to a trip handler? */
217 // bool good; /* did the last branch instruction guess correctly? */
218 // tetra trace_threshold; /* each instruction should be traced this many times */
220 // 62.
221 reg [ 7:0] op;
222 reg [ 7:0] xx;
223 reg [ 7:0] yy;
224 reg [ 7:0] zz;
226 reg [63:0] yz;
227 reg [ 7:0] f;
228 reg [63:0] i, j;
230 // 75.
231 reg [ 7:0] G, L, O;
234 * 76.
236 * I diverge from MMIXWARE and unify the global and local
237 * registers, such that the top 256 registers are global. (For
238 * simplicity and to eliminate some muxes)
240 parameter GLOBAL = 256;
241 reg [ 8:0] regfile_rdaddress_a, regfile_rdaddress_b, regfile_rdaddress_c;
242 reg regfile_rden_a = 0;
243 wire [63:0] regfile_rddata_a;
244 reg regfile_rden_b = 0;
245 wire [63:0] regfile_rddata_b;
246 reg regfile_rden_c = 0;
247 wire [63:0] regfile_rddata_c;
249 reg regfile_wren;
250 reg [ 8:0] regfile_wraddress;
251 reg [63:0] regfile_wrdata;
253 reg [63:0] g255_readonly_cache; /* This is a read-only copy of g[255] / $255 */
255 reg [10:0] lring_size, /* the number of local registers (a power of 2) */
256 lring_mask, /* one less than lring_size */
257 S; /* congruent to rS >> 3 modulo lring_size */
259 reg [63:0] l_xx; /* local registers[(O + xx) & lring_mask] */
260 reg [63:0] l_yy; /* local registers[(O + yy) & lring_mask] */
261 reg [63:0] l_zz; /* local registers[(O + zz) & lring_mask] */
262 reg [63:0] l_q;
263 reg [63:0] g_q;
264 reg [63:0] g_xx, g_yy, g_zz;
266 reg b_sign, b_zero, b_posi, b_pari;
267 reg y_sign, y_zero, y_posi, y_pari;
268 reg [63:0] high_shift;
270 reg [63:0] datamem_addr;
271 reg [63:0] datamem_rddata = 64'h1BADDECAF4BABE;
272 reg datamem_rddata_high = 0, datamem_rddata_low = 0;
273 reg [63:0] datamem_wrdata;
274 reg [ 7:0] datamem_wrbyteena;
275 reg datamem_wren = 0;
276 reg datamem_rden = 0;
278 reg wb, wb_global;
279 reg [10:0] g_a, l_a;
281 reg error = 0;
283 reg [63:0] mul_acc, mul_aux, mul_a;
284 reg [63:0] mul_b;
286 regfile regfile_a (
287 .clock ( clock ),
289 .rden ( regfile_rden_a ),
290 .rdaddress ( regfile_rdaddress_a ),
291 .q ( regfile_rddata_a ),
293 .wren ( regfile_wren ),
294 .wraddress ( regfile_wraddress ),
295 .data ( regfile_wrdata )
298 regfile regfile_b (
299 .clock ( clock ),
301 .rden ( regfile_rden_b ),
302 .rdaddress ( regfile_rdaddress_b ),
303 .q ( regfile_rddata_b ),
305 .wren ( regfile_wren ),
306 .wraddress ( regfile_wraddress ),
307 .data ( regfile_wrdata )
310 regfile regfile_c (
311 .clock ( clock ),
313 .rden ( regfile_rden_c ),
314 .rdaddress ( regfile_rdaddress_c ),
315 .q ( regfile_rddata_c ),
317 .wren ( regfile_wren ),
318 .wraddress ( regfile_wraddress ),
319 .data ( regfile_wrdata )
322 reg [63:0] core_virt_address;
324 address_virtualization tlb(clock, core_virt_address, core_address);
326 always @(posedge clock) begin
327 /* Maintain the global read-only copy of $255 */
328 if (regfile_wren && regfile_wraddress == (GLOBAL | 255))
329 g255_readonly_cache <= regfile_wrdata;
331 rC <= rC + 1;
332 rI <= rI - 65'd1;
334 // rQ is low-pri:24 program:8 high-pri I/O:24 machine:8
335 rQ[63:40] <= rQ[63:40] | core_interrupt;
336 if (core_interrupt & ~rQ[63:40])
337 $display("%06d peripherals raising exception", $time, core_interrupt);
339 if (rI[64]) begin
340 $display("%06d Interval timer just ranout, raising exception", $time);
341 rQ[INTERVAL_TIMEOUT_EXC] <= 1;
342 rI[64] <= 0;
345 if (regfile_wren)
346 if (regfile_wraddress & GLOBAL)
347 $display("%06d g[%1d]=#%x", $time,
348 regfile_wraddress - GLOBAL, regfile_wrdata);
349 else
350 $display("%06d l[%1d]=#%x", $time,
351 regfile_wraddress, regfile_wrdata);
353 if (core_read_data_valid) begin
354 if(V)$display("%06d Core got valid data #%1x (%d%d%d)!", $time,
355 core_read_data,
356 datamem_rddata_high, datamem_rddata_low, core_ifetch);
357 if (datamem_rddata_high) begin
358 if(V)$display("%06d Core expected it for high tetra", $time);
359 datamem_rddata[63:32] <= core_read_data;
360 datamem_rddata_high <= 0;
361 end else if (datamem_rddata_low) begin
362 if(V)$display("%06d Core expected it for low tetra", $time);
363 datamem_rddata[31:0] <= core_read_data;
364 datamem_rddata_low <= 0;
365 end else if (~core_ifetch)
366 $display("%06d DROPPED DATA #%1x", $time, core_read_data);
369 regfile_wren <= 0;
370 regfile_rden_a <= 0;
371 regfile_rden_b <= 0;
372 regfile_rden_c <= 0;
374 case (state)
375 S_RESET: begin
376 $display("%06d RESET", $time);
377 core_io_access <= 0;
378 core_transfer_request <= 0;
379 core_wren <= 0;
380 datamem_rddata_low <= 0;
381 datamem_rddata_high <= 0;
382 core_ifetch <= 0;
384 datamem_wren <= 0;
385 datamem_rden <= 0;
386 inst_ptr <= start;
389 * XXX The responsibility for most of these should be move
390 * to the firmware (to save logic resources and cycle-time). Obviously,
391 * rK, rC, rN, rO, and rS are e... rO and rS??? why. How
392 * is the OS going to set the two latter? (SAVE & UNSAVE).
395 g255_readonly_cache <= 0;
397 // 77. p(14.) 37. XXX Hmm, there should be a better way ...
398 // 14.
399 // rA = must_remain_zero:46 rounding_mode:2 enable_DVWIOUZX:8 events_DVWIOUZX:8
400 // 0 - 7
401 rB = 64'hBADDECAFDEADBABE; // bootstrap register (trip) [0]
402 rD = 64'hBADDECAFDEADBABE; // dividend register [1]
403 rE = 64'hBADDECAFDEADBABE; // epsilon register [2]
404 rH = 64'hBADDECAFDEADBABE; // himult register [3]
405 rJ = 64'hBADDECAFDEADBABE; // return-jump register [4]
406 rM = 64'hBADDECAFDEADBABE; // multiplex mask register [5]
407 rR = 64'hBADDECAFDEADBABE; // remainder register [6]
408 rBB= 64'hBADDECAFDEADBABE; // bootstrap register (trap) [7]
410 // These can't be PUT
411 rC <= 0; // cycle counter [8]
412 rN[63:32] = (VERSION << 24) + (SUBVERSION << 16) + (SUBSUBVERSION << 8);
413 rN[31: 0] = ABSTIME; // serial number [9]
414 // XXX Correctly this when memory settles
415 rO = 64'h6_0000; // register stack offset [10]
416 rS = 64'h6_0000; // register stack pointer [11]
418 // These can't be PUT by the user
419 rI <= ~65'd0; // interval counter [12]
420 rT = 64'h8000000850000000; // trap address register [13]
421 rTT= 64'h8000000600000000; // dynamic trap address register [14]
422 rK = 0; // interrupt mask register [15]
423 rQ <= 0; // interrupt request register [16]
424 rU = 0; // usage counter [17]
425 rV = 64'h369c2004; // virtual translation register [18]
427 // Finally, these may cause pipeline delays
428 // global threshold register
429 G = 256 - 32;
430 rL = 0;
431 rA = {8'd255, 8'd00}; // Enable all traps, none yet, round nearest
433 rF = 0; // failure location register [22]
434 rP = 0; // prediction register [23]
435 rW = 0; // where-interrupted register (trip) [24]
436 rX = 0; // execution register (trip) [25]
437 rY = 0; // Y operand (trip) [26]
438 rZ = 0; // Z operand (trip) [27]
439 rWW= 0; // where-interrupted register (trap) [28]
440 rXX= 0; // execution register (trap) [29]
441 rYY= 0; // Y operand (trap) [30]
442 rZZ= 0; // Z operand (trap) [31]
444 lring_size = 256;
445 lring_mask = 255;
446 L = 0; // XXX I don't think there really is a need for both rL and L
447 O = 0; // XXX I don't think there really is a need for both rO and O
448 S = 0;
449 resuming = 0;
450 state <= S_IFETCH1;
453 S_IFETCH1: begin
454 $display("");
455 if (rQ & rK) begin
456 $display("%06d INTERRUPT! rQ=#%1x & rK=#%1x -> #%1x", $time,
457 rQ, rK, rQ & rK);
458 state <= S_INTERRUPT;
459 end else begin
460 state <= S_IFETCH2;
461 // 63. sort of ...
462 if(V)$display("%06d IF1 Issuing a read request to #%1x", $time, inst_ptr);
463 // $display("%06d IFETCH2", $time);
464 // CONCEPTUALLY {op, xx, yy, zz} <= progmem[inst_ptr[63:2]]
465 if (resuming) begin
466 // We shouldn't get here if resuming is true
467 $display(" Bug in resume support!");
468 state <= S_HALTED;
470 core_transfer_request <= 1;
471 core_ifetch <= 1;
472 core_virt_address <= inst_ptr;
473 core_io_access <= 0;
474 core_wren <= 0;
475 loc <= inst_ptr;
476 inst_ptr <= inst_ptr + 4;
481 * Interrupts needs to wait for the pipeline to flush.
482 * In this current implementation, the only thing pipelined is
483 * the register file write back which hasn't committed yet at S_IFETCH1,
484 * thus we go to another cycle.
486 S_INTERRUPT: begin
487 $display(" $255=#%1x inst_ptr=#%1x",
488 g255_readonly_cache, inst_ptr);
489 rK = 0;
490 rBB = g255_readonly_cache;
491 rWW = inst_ptr;
492 rXX = {32'h80000000,inst}; // rop code RESUME_AGAIN, that is, retry inst
493 rYY = y;
494 rZZ = z;
495 regfile_wraddress <= GLOBAL | 255;
496 regfile_wrdata <= rJ;
497 regfile_wren <= 1;
498 inst_ptr = rTT;
499 state <= S_IFETCH1;
502 S_IFETCH2: begin
503 if(V)$display("IFETCH2");
504 if (~core_wait_request)
505 core_transfer_request <= 0;
507 if (core_read_data_valid | resuming) begin
508 core_ifetch <= 0;
509 if (!resuming)
510 inst = core_read_data;
511 {op, xx, yy, zz} = inst;
512 f <= info_flags[op];
513 regfile_rdaddress_a <= ((xx >= G) ? (GLOBAL | xx) : ((O + xx) & lring_mask));
514 regfile_rdaddress_b <= ((yy >= G) ? (GLOBAL | yy) : ((O + yy) & lring_mask));
515 regfile_rdaddress_c <= ((zz >= G) ? (GLOBAL | zz) : ((O + zz) & lring_mask));
516 regfile_rden_a <= 1;
517 regfile_rden_b <= 1;
518 regfile_rden_c <= 1;
519 $display("%06d IF2 #%1x:#%1x", $time, loc, inst);
520 state <= S_RF1;
524 S_RF1: state <= S_RF4;
526 S_RF4: begin
527 //$display("%06d zz l[%1d]=#%1x g[%1d]", $time, zz, l_zz, zz, g_zz);
528 /* 60. The main loop. */
529 state <= S_EXECUTE1;
531 yz = {48'd0, inst[15:0]};
532 x = 0;
533 y = 0;
534 z = 0;
535 b = 0;
536 exc = 0;
537 // old_L = L;
538 //$display("%06d ** yz .. #%1x", $time, yz);
540 // 70. Convert relative address to absolute address
541 if (f & rel_addr_bit) begin
542 if ((op & 8'hFE) == I_JMP) begin
543 yz = {40'd0, inst[23:0]};
544 if(V)$display("%06d ** yz .. #%1x [JMP]", $time, yz);
546 if (op & 1) begin
547 yz = yz - ((op == I_JMPB) ? 64'h1000000 : 64'h10000);
548 if(V)$display("%06d ** yz .. #%1x [op & 1]", $time, yz);
550 y = inst_ptr; z = loc + (yz << 2);
552 if(V)$display("%06d ** yz = #%1x, z = #%1x", $time, yz, z);
554 // 71. Install operand fields.
555 if (resuming && rop != RESUME_AGAIN) begin
556 $display("%06d ** resuming ...", $time);
557 /* 126. Install special operands when resuming an interrupted operation */
558 if (rop == RESUME_SET) begin
559 op = I_ORI;
560 y = resuming[1] ? rZZ : rZ;
561 z = 0;
562 exc = {resuming[1] ? rXX[47:40] : rX[47:40],8'd0};
563 f = X_is_dest_bit;
564 $display(" resume_set op=#%1x y=#%1x exc=#%1x", op, y, exc);
565 end else begin /* RESUME_CONT */
566 y = rY;
567 z = rZ;
569 end else begin
570 if (f & X_is_source_bit) begin
571 // 74. Set b from register X
572 b = regfile_rddata_a;
573 if(V)$display("%06d ** regfile a -> b = #%1x", $time, b);
575 // XXX if (info[op].third_operand) <set b from special register 79>;
576 if (f & Z_is_immed_bit) begin
577 z = zz; //$display("%06d ** z = zz", $time);
578 end else if (f & Z_is_source_bit) begin
579 // 72. Set z from register Z
580 z = regfile_rddata_c;
581 if(V)$display("%06d ** regfile c -> z = #%1x", $time, z);
582 end else if ((op & 8'hF0) == I_SETH) begin
583 // 78. Set z as an immediate wyde
584 case (op[1:0])
585 0: z = {yz[15:0],48'd0};
586 1: z = {16'd0,yz[15:0],32'd0};
587 2: z = {32'd0,yz[15:0],16'd0};
588 3: z = {48'd0,yz[15:0]};
589 endcase
590 y = b;
591 if(V)$display("%06d ** 78. z=#%1x y=#%1x", $time, z, y);
593 if (f & Y_is_immed_bit) begin y = yy; if(V)$display("%06d ** y = yy", $time); end
594 else if (f & Y_is_source_bit) begin
595 // 73. Set y from register Y
596 y = regfile_rddata_b;
597 if(V)$display("%06d ** regfile b -> y=#%1x", $time, y);
601 wb = 0;
602 // 60...
603 if (f & X_is_dest_bit) begin
604 /* 80. Install register X as the destination, adjusting the
605 register stack if necessary */
606 wb = 1;
607 wb_global = 0;
608 if (xx >= G) begin
609 x_ptr = GLOBAL | xx;
610 end else begin
611 if (xx >= L) begin
612 //$display("%06d ** 81. Increasing rL, setting l[%1d] <- 0", $time, O + L);
613 regfile_wraddress <= (O + L) & lring_mask;
614 regfile_wrdata <= 0;
615 regfile_wren <= 1;
616 L = L + 1;
617 rL = L;
618 if (((S - O - L) & lring_mask) == 0) begin
619 $display("%06d ** 83. stack_store() not implemented!", $time);
620 state <= S_NOT_IMPLEMENTED;
622 state <= S_RF4;
623 end else
624 x_ptr = (O + xx) & lring_mask;
629 /* Even though I'm not trying to optimize, making this run at 50 MHz
630 makes my life easier, thus this extra stage which hopefully
631 enables Quartus to retime some of this */
632 S_EXECUTE1: begin
633 state <= S_EXECUTE2;
634 resuming = 0;
636 w = y + z;
637 if(V)$display("%06d ** w (#%1x) = y (#%1x) + z (#%1x)", $time, w, y, z);
639 // if (loc >= 64'h20000000) goto privileged_inst;
640 branch_target <= loc + {{46{op[0]}},yy,zz,2'd0};
642 b_sign = b[63];
643 b_zero = b == 0;
644 b_pari = b[0];
645 b_posi = ~b_sign & ~b_zero;
647 y_sign = y[63];
648 y_zero = y == 0;
649 y_pari = y[0];
650 y_posi = ~y_sign & ~y_zero;
653 S_EXECUTE2: begin
654 state <= S_MEM1;
655 if(V)$display("%06d EX2 x=#%1x y=#%1x z=#%1x b=#%1x w=#%1x", $time, x, y, z, b, w);
657 case (op)
658 // 84.
659 I_ADD, I_ADDI: x = w;
661 // 85.
662 I_SUB, I_SUBI, I_NEG, I_NEGI, I_SUBU, I_SUBUI, I_NEGU, I_NEGUI:
663 x = y - z;
664 I_ADDU, I_ADDUI, I_INCH, I_INCMH, I_INCML, I_INCL:
665 x = w;
666 I_2ADDU, I_2ADDUI: x = (y << 1) + z;
667 I_4ADDU, I_4ADDUI: x = (y << 2) + z;
668 I_8ADDU, I_8ADDUI: x = (y << 3) + z;
669 I_16ADDU, I_16ADDUI: x = (y << 4) + z;
670 I_SETH, I_SETMH, I_SETML, I_SETL, I_GETA, I_GETAB: begin
671 $display(" SETx = #%1x",z);
672 x = z;
674 // 86.
675 I_OR, I_ORI, I_ORH, I_ORMH, I_ORML, I_ORL:
676 x = y | z;
677 I_ORN, I_ORNI: x = y | ~z;
678 I_NOR, I_NORI: x = ~(y | z); // === ~y & ~z
679 I_XOR, I_XORI: x = y ^ z;
680 I_AND, I_ANDI: begin
681 x = y & z;
682 $display(" AND #%1x,#%1x -> #%1x", y, z, x);
684 I_ANDN, I_ANDNI, I_ANDNH, I_ANDNMH, I_ANDNML, I_ANDNL:
685 x = y & ~z;
686 I_NAND, I_NANDI: x = ~(y & z); // === ~y | ~z
687 I_NXOR, I_NXORI: x = ~(y ^ z);
689 // 87.
690 I_SL, I_SLI, I_SLU, I_SLUI:
691 if (z >= 64) begin
692 x = 0;
693 if (|y) begin
694 exc[V_EXC] = 1;
695 $display("%06d ** SL Overflow", $time);
697 end else begin
698 x = y << z[5:0];
700 // XXX OMG. Spend 2X the cycles here
701 /* if (($signed(x) >> z[5:0]) != y) begin
702 exc[V_EXC] = 1;
703 $display("%06d ** SL Overflow", $time);
704 end*/
706 // XXX Not 100% sure of these two
707 I_SR, I_SRI: begin
708 x = $signed(y) >>> (z[63:6] ? 63 : z[5:0]);
709 $display(" SR #%1x, #%1x -> #%1x",
710 y, z, x);
712 I_SRU, I_SRUI: begin
713 x = z[63:6] ? 0 : (y >> z[5:0]);
714 $display(" SRU #%1x, #%1x -> #%1x",
715 y, z, x);
717 I_MUX, I_MUXI: x = y & rM | z & ~rM;
719 I_SADD, I_SADDI: begin
720 t = y & ~z;
721 x = t[0]+t[1]+t[2]+t[3]+t[4]+t[5]+t[6]+t[7]+t[8]+t[9]+t[10]+t[11]+t[12]+t[13]+t[14]+t[15]+t[16]+t[17]+t[18]+t[19]+t[20]+t[21]+t[22]+t[23]+t[24]+t[25]+t[26]+t[27]+t[28]+t[29]+t[30]+t[31]+t[32]+t[33]+t[34]+t[35]+t[36]+t[37]+t[38]+t[39]+t[40]+t[41]+t[42]+t[43]+t[44]+t[45]+t[46]+t[47]+t[48]+t[49]+t[50]+t[51]+t[52]+t[53]+t[54]+t[55]+t[56]+t[57]+t[58]+t[59]+t[60]+t[61]+t[62]+t[63];
724 I_MOR, I_MORI: begin
725 n <= 62;
726 t <= y;
727 state <= S_MOR;
730 I_MXOR, I_MXORI: begin
731 n <= 62;
732 t <= y;
733 state <= S_MXOR;
736 I_BDIF, I_BDIFI: begin
737 if (y[63:56] > z[63:56]) x[63:56] = y[63:56] - z[63:56];
738 if (y[55:48] > z[55:48]) x[55:48] = y[55:48] - z[55:48];
739 if (y[47:40] > z[47:40]) x[47:40] = y[47:40] - z[47:40];
740 if (y[39:32] > z[39:32]) x[39:32] = y[39:32] - z[39:32];
741 if (y[31:24] > z[31:24]) x[31:24] = y[31:24] - z[31:24];
742 if (y[23:16] > z[23:16]) x[23:16] = y[23:16] - z[23:16];
743 if (y[15: 8] > z[15: 8]) x[15: 8] = y[15: 8] - z[15: 8];
744 if (y[ 7: 0] > z[ 7: 0]) x[ 7: 0] = y[ 7: 0] - z[ 7: 0];
746 I_WDIF, I_WDIFI: begin
747 if (y[63:48] > z[63:48]) x[63:48] = y[63:48] - z[63:48];
748 if (y[47:32] > z[47:32]) x[47:32] = y[47:32] - z[47:32];
749 if (y[31:16] > z[31:16]) x[31:16] = y[31:16] - z[31:16];
750 if (y[15: 0] > z[15: 0]) x[15: 0] = y[15: 0] - z[15: 0];
752 I_TDIF, I_TDIFI: begin
753 if (y[63:32] > z[63:32]) x[63:32] = y[63:32] - z[63:32];
754 if (y[31: 0] > z[31: 0]) x[31: 0] = y[31: 0] - z[31: 0];
756 I_ODIF, I_ODIFI: if (y > z) x = y - z;
758 I_MULU, I_MULUI, I_MUL, I_MULI: begin
759 {mul_aux,mul_acc} <= 0;
760 if (y > z) begin
761 mul_a <= y; mul_b <= z;
762 end else begin
763 mul_a <= z; mul_b <= y;
765 state <= S_MULTIPLYING;
768 I_DIV, I_DIVI:
769 if (y == 64'h8000000000000000 && z == 64'hFFFFFFFFFFFFFFFF) begin
770 exc[V_EXC] = 1;
771 x <= y;
772 rR <= 0;
773 end else if (z == 0) begin
774 exc[D_EXC] = 1;
775 x <= 0;
776 rR <= y;
777 end else begin
778 n <= 62;
779 t <= 0;
780 sign <= (y[63] ^ z[63]);
781 z_sign <= z[63];
782 if (y[63])
783 y = -y;
784 if (z[63])
785 z = -z;
786 z_abs = z;
787 x <= y;
788 state <= S_DIVIDING;
791 I_DIVU, I_DIVUI: begin
792 $display("rD = %d, z = %d, rD >= z %d", rD, z, rD >= z);
793 if (rD >= z) begin
794 /* Arith 14. check that x < z; otherwise give trivial answer */
795 x <= rD;
796 rR <= y;
797 end else begin
798 n <= 62;
799 t <= rD;
800 sign <= 0;
801 x <= y;
802 y_sign <= 0; // Override signs to avoid any fixup afterwards
803 z_sign <= 0;
804 state <= S_DIVIDING;
808 // 89.
810 I_FADD, I_FSUB, I_FMUL, I_FDIV, I_FREM, I_FSQRT, I_FINT,
811 I_FIX, I_FIXU, I_FLOT, I_FLOTI, I_FLOTU, I_FLOTUI,
812 I_SFLOT, I_SFLOTUI: begin
813 $display("%06d ** floating point isn't implemented yet", $time); // XXX
814 state <= S_NOT_IMPLEMENTED;
818 // 90.
819 I_CMP, I_CMPI: begin
820 /* I use a simpler version than Knuth's
821 Signed comparisons:
822 a > b <=> (a^M) >U (b^M), M = 1 << 63
825 if (y == z)
826 x = 0;
827 else if ((y ^ 64'h8000000000000000)
828 > (z ^ 64'h8000000000000000))
829 x = 1;
830 else
831 x = -1;
832 $display("%06d (CMP) %1d, %1d -> #%1x", $time, y, z, x);
835 I_CMPU, I_CMPUI: begin
836 if (y == z)
837 x = 0;
838 else if (y > z)
839 x = 1;
840 else
841 x = -1;
842 $display("%06d (CMPU) #%1x, #%1x -> #%1x", $time, y, z, x);
846 I_FCMPE, I_FCMP, I_FUN, I_FEQL, I_FEQLE, I_FUNE: begin
847 $display("%06d ** floating point isn't implemented yet", $time); // XXX
848 state <= S_NOT_IMPLEMENTED;
853 // 91. & 92.
854 I_CSN, I_CSNI, I_CSZ, I_CSZI, I_CSP, I_CSPI,
855 I_CSOD, I_CSODI, I_CSNN, I_CSNNI, I_CSNZ, I_CSNZI,
856 I_CSNP, I_CSNPI, I_CSEV, I_CSEVI, I_ZSN, I_ZSNI,
857 I_ZSZ, I_ZSZI, I_ZSP, I_ZSPI, I_ZSOD, I_ZSODI,
858 I_ZSNN, I_ZSNNI, I_ZSNZ, I_ZSNZI, I_ZSNP, I_ZSNPI,
859 I_ZSEV, I_ZSEVI: begin
860 case (op[2:1])
861 0: truth = y_sign;
862 1: truth = y_zero;
863 2: truth = y_posi;
864 3: truth = y_pari;
865 endcase
866 if (op[3]) truth = ~truth;
867 x = truth ? z : b;
870 // 93.
871 I_BN, I_BNB, I_PBN, I_PBNB,
872 I_BZ, I_BZB, I_PBZ, I_PBZB,
873 I_BP, I_BPB, I_PBP, I_PBPB,
874 I_BOD, I_BODB, I_PBOD, I_PBODB,
875 I_BNN, I_BNNB, I_PBNN, I_PBNNB,
876 I_BNZ, I_BNZB, I_PBNZ, I_PBNZB,
877 I_BNP, I_BNPB, I_PBNP, I_PBNPB,
878 I_BNOD, I_BNODB, I_PBNOD, I_PBNODB: begin
879 $display("%06d ** Branch op[2:0] #%1x", $time, op[2:0]);
880 case (op[2:1])
881 0: truth = b_sign;
882 1: truth = b_zero;
883 2: truth = b_posi;
884 3: truth = b_pari;
885 endcase
886 if (op[3]) truth = ~truth;
887 if (truth) begin
888 $display("%06d ** Branch taken, jumping to #%1x", $time, z);
889 inst_ptr <= z;
893 // 94.
894 I_LDB, I_LDBI, I_LDBU, I_LDBUI, I_LDW, I_LDWI, I_LDWU, I_LDWUI,
895 I_LDT, I_LDTI, I_LDTU, I_LDTUI, I_LDO, I_LDOI, I_LDOU, I_LDOUI,
896 I_LDSF, I_LDSFI, I_LDHT, I_LDHTI: begin
897 datamem_addr <= w;
898 datamem_rden <= 1;
901 // 95.
902 I_STB, I_STBI, I_STBU, I_STBUI: begin
903 if (op[1])
904 $display("%06d ** STBU [#%1x] <- #%1x", $time, w, b[7:0]);
905 else
906 $display("%06d ** STB [#%1x] <- #%1x", $time, w, b);
907 datamem_addr <= w;
908 datamem_wrdata <= {b[7:0], b[7:0], b[7:0], b[7:0],
909 b[7:0], b[7:0], b[7:0], b[7:0]};
910 datamem_wrbyteena <= 8'h80 >> w[2:0];
911 datamem_wren <= 1;
913 if ({{56{b[7]}},b[7:0]} != b && ~op[1]) begin
914 $display("%06d ** STB Overflow", $time);
915 exc[V_EXC] = 1;
919 I_STW, I_STWI, I_STWU, I_STWUI: begin
920 if (op[1])
921 $display("%06d ** STWU [#%1x] <- #%1x", $time, w, b[15:0]);
922 else
923 $display("%06d ** STW [#%1x] <- #%1x", $time, w, b);
924 datamem_addr <= w;
925 datamem_wrdata <= {b[15:0], b[15:0], b[15:0], b[15:0]};
926 datamem_wrbyteena <= 8'hC0 >> (2*w[2:1]);
927 datamem_wren <= 1;
929 if ({{48{b[15]}},b[15:0]} != b && ~op[1]) begin
930 $display("%06d ** STW Overflow", $time);
931 exc[V_EXC] = 1;
935 I_STT, I_STTI, I_STTU, I_STTUI: begin
936 if (op[1])
937 $display("%06d ** STTU [#%1x] <- #%1x", $time, w, b[31:0]);
938 else
939 $display("%06d ** STT [#%1x] <- #%1x", $time, w, b);
940 datamem_addr <= w;
941 datamem_wrdata <= {b[31:0], b[31:0]};
942 datamem_wrbyteena <= 8'hF0 >> (4*w[2]);
943 datamem_wren <= 1;
945 if ({{32{b[31]}},b[31:0]} != b && ~op[1]) begin
946 $display("%06d ** STT Overflow", $time);
947 exc[V_EXC] = 1;
951 I_STO, I_STOI, I_STOU, I_STOUI, I_STUNC, I_STUNCI: begin
952 if ((op & ~1) == I_STUNC)
953 $display("%06d ** STUNC [#%1x] <- #%1x", $time, w, b);
954 else if (op[1])
955 $display("%06d ** STOU [#%1x] <- #%1x", $time, w, b);
956 else
957 $display("%06d ** STO [#%1x] <- #%1x", $time, w, b);
958 datamem_addr <= w;
959 datamem_wrdata <= b;
960 datamem_wrbyteena <= 8'hFF;
961 datamem_wren <= 1;
965 I_STSF, I_STSFI: begin
966 state <= S_NOT_IMPLEMENTED;
970 I_STHT, I_STHTI: begin
971 $display("%06d ** STHT [#%1x] <- #%1x", $time, w, b[63:32]);
972 datamem_addr <= w;
973 datamem_wrdata <= {b[63:32], b[63:32]};
974 datamem_wrbyteena <= 8'hF0 >> (4*w[0]);
975 datamem_wren <= 1;
978 I_STCO, I_STCOI: begin
979 $display("%06d ** STCO [#%1x] <- #%1x", $time, w, {56'd0, xx});
980 datamem_addr <= w;
981 datamem_wrdata <= {56'd0, xx};
982 datamem_wrbyteena <= 8'hFF;
983 datamem_wren <= 1;
986 // 96.
988 I_CSWAP, I_CSWAPI: state <= S_NOT_IMPLEMENTED;
991 // 97.
992 I_GET: begin
993 if (yy || zz >= 32) // XXX Can synthesis compile >= 32 efficiently?
994 state <= S_NOT_IMPLEMENTED; // XXX Should be a dynamic trap
995 else
996 case (zz)
997 REG_B: x = rB;
998 REG_D: x = rD;
999 REG_E: x = rE;
1000 REG_H: x = rH;
1001 REG_J: x = rJ;
1002 REG_M: x = rM;
1003 REG_R: x = rR;
1004 REG_BB: x = rBB;
1005 REG_C: x = rC;
1006 REG_N: x = rN;
1007 REG_O: x = rO;
1008 REG_S: x = rS;
1009 REG_I: x = rI[63:0];
1010 REG_T: x = rT;
1011 REG_TT: x = rTT;
1012 REG_K: x = rK;
1013 REG_Q: begin x = rQ; rQ_lastread = rQ; end
1014 REG_U: x = rU;
1015 REG_V: x = rV;
1016 REG_G: x = G;
1017 REG_L: x = rL;
1018 REG_A: x = rA;
1019 REG_F: x = rF;
1020 REG_P: x = rP;
1021 REG_W: x = rW;
1022 REG_X: x = rX;
1023 REG_Y: x = rY;
1024 REG_Z: x = rZ;
1025 REG_WW: x = rWW;
1026 REG_XX: x = rXX;
1027 REG_YY: x = rYY;
1028 REG_ZZ: x = rZZ;
1029 endcase
1032 I_PUT, I_PUTI: begin
1033 $strobe("%06d ** PUT %d, %x", $time, xx, z);
1034 // XXX INCOMPLETE
1035 if (yy)
1036 state <= S_ILLEGAL_INST;
1037 else
1038 case (xx)
1039 // These are "unencumbered" (cf. p 176)
1040 REG_B: rB = z; // bootstrap register (trip) [0]
1041 REG_D: rD = z; // dividend register [1]
1042 REG_E: rE = z; // epsilon register [2]
1043 REG_H: rH = z; // himult register [3]
1044 REG_J: rJ = z; // return-jump register [4]
1045 REG_M: rM = z; // multiplex mask register [5]
1046 REG_R: rR = z; // remainder register [6]
1047 REG_BB:rBB= z; // bootstrap register (trap) [7]
1049 // These can't be PUT
1050 REG_C: state <= S_ILLEGAL_INST; // rC = z; // cycle counter [8]
1051 REG_N: state <= S_ILLEGAL_INST; // rN = z; // serial number [9]
1052 REG_O: state <= S_ILLEGAL_INST; // rO = z; // register stack offset [10]
1053 REG_S: state <= S_ILLEGAL_INST; // rS = z; // register stack pointer [11]
1055 // These can't be PUT by the user
1056 REG_I: rI <= {1'd0,z}; // interval counter [12] (XXX PRIVILEGED)
1057 REG_T: rT = z; // trap address register [13] (XXX PRIVILEGED)
1058 REG_TT:rTT= z; // dynamic trap address register [14] (XXX PRIVILEGED)
1059 REG_K: rK = z; // interrupt mask register [15] (XXX PRIVILEGED)
1060 /* interrupt request register [16] (XXX PRIVILEGED)
1061 "Interrupt bits in rQ might be lost if they are set
1062 between a GET and a PUT. Therefore we don't allow
1063 PUT to zero out bits that have become 1 since the
1064 most recently committed GET." */
1065 REG_Q: rQ <= rQ & ~rQ_lastread | z;
1066 REG_U: rU = z; // usage counter [17] (XXX PRIVILEGED)
1067 REG_V: rV = z; // virtual translation register [18] (XXX PRIVILEGED)
1069 // Finally, these may cause pipeline delays
1070 // global threshold register
1071 REG_G: // 99.
1072 if (z > 255 || z < L || z < 32)
1073 state <= S_ILLEGAL_INST;
1074 else if (z < G) begin
1075 // XXX Interestingly using strobe instead of
1076 // display causes a crash! (Maybe the G-1 is the cause)
1077 $display("%06d ** PUT g[%d] <- 0", $time, G-1);
1078 regfile_wraddress <= GLOBAL | (G - 1);
1079 regfile_wrdata <= 0;
1080 regfile_wren <= 1;
1081 G <= G - 1;
1082 state <= S_EXECUTE2; // Loop
1083 end else
1084 G = z;
1086 // local threshold register [20]
1087 REG_L: // 98.
1088 if (z < L) begin
1089 L = z;
1090 rL = z;
1093 // arithmetic status register [21]
1094 REG_A: if (z[63:18])
1095 state <= S_ILLEGAL_INST;
1096 else
1097 rA = z;
1099 REG_F: rF = z; // failure location register [22]
1100 REG_P: rP = z; // prediction register [23]
1101 REG_W: rW = z; // where-interrupted register (trip) [24]
1102 REG_X: rX = z; // execution register (trip) [25]
1103 REG_Y: rY = z; // Y operand (trip) [26]
1104 REG_Z: rZ = z; // Z operand (trip) [27]
1105 REG_WW:rWW= z; // where-interrupted register (trap) [28]
1106 REG_XX:rXX= z; // execution register (trap) [29]
1107 REG_YY:rYY= z; // Y operand (trap) [30]
1108 REG_ZZ:rZZ= z; // Z operand (trap) [31]
1110 default:
1111 state <= S_ILLEGAL_INST;
1112 endcase
1115 // 101.
1116 I_POP: begin
1117 $display("%06d (POP)", $time);
1118 regfile_rdaddress_a <= (O + xx - 1) & lring_mask;
1119 regfile_rdaddress_b <= (O - 1) & lring_mask;
1120 regfile_rden_a <= 1;
1121 regfile_rden_b <= 1;
1122 state <= S_POP1;
1125 I_PUSHGO, I_PUSHGOI, I_PUSHJ, I_PUSHJB: begin
1126 if (op[2])
1127 inst_ptr <= w; // PUSHGO
1128 else
1129 inst_ptr <= z; // PUSHJ
1131 if (xx > G) begin
1132 xx = L;
1133 L = L + 1;
1134 if (((S - O - L) & lring_mask) == 0) begin
1135 $display("%06d ** 83. stack_store() not implemented!", $time);
1136 state <= S_NOT_IMPLEMENTED;
1139 regfile_wraddress <= (O + xx) & lring_mask;
1140 regfile_wrdata <= xx;
1141 regfile_wren <= 1;
1142 //l[(O + xx) & lring_mask] <= xx;
1143 wb = 0;
1144 $display("%06d *** PUSHx l[%1d]=#%1x", $time,
1145 (O + xx) & lring_mask, xx);
1146 x = loc + 4; // XXX Why?
1147 rJ = loc + 4;
1148 L = L - (xx + 1);
1149 O = O + xx + 1;
1150 rO = rO + ((xx + 1) << 3);
1151 b = rO; // XXX Why?
1152 rL = L;
1153 $display("%06d rL=%1d, O=%1d, rO=#%1x, rJ=#%1x", $time, rL, O, rO, rJ);
1154 state <= S_IFETCH1;
1158 // 102.
1159 I_SAVE: state <= S_NOT_IMPLEMENTED; // XXX Lots of work
1161 // 104.
1162 I_UNSAVE: state <= S_NOT_IMPLEMENTED; // XXX Lots of work
1165 // 106.
1166 I_SYNCID, I_SYNCIDI, I_PREST, I_PRESTI, I_SYNCD,
1167 I_SYNCDI, I_PREGO, I_PREGOI, I_PRELD, I_PRELDI,
1168 I_SWYM:
1169 state <= S_IFETCH1;
1171 // 107.
1172 I_GO, I_GOI: begin
1173 $display("%06d ** GO to #%1x", $time, w);
1174 x = inst_ptr; inst_ptr <= w;
1177 I_JMP, I_JMPB: begin
1178 inst_ptr <= loc + {{38{op[0]}},xx,yy,zz,2'd0};
1181 I_SYNC:
1182 if (xx != 0 || yy != 0 || zz > 7)
1183 state <= S_ILLEGAL_INST;
1184 else
1185 state <= S_WB2;
1187 I_LDVTS, I_LDVTSI:
1188 state <= S_ILLEGAL_INST; // XXX Really: priviledged
1190 // 108.
1191 I_TRIP: begin
1192 $display("%06d TRIP %d,%d,%d", $time, xx, yy, zz);
1193 exc[H_EXC] = 1;
1196 // 108.
1197 // 124.
1198 I_RESUME: if (zz[7:1] || xx || yy /* XXX || zz[0] & ~inst_ptr[63] */)
1199 state <= S_ILLEGAL_INST; // ^^^^^ Correct?, but not yet!
1200 else begin
1201 $display("%06d RESUME #%1x", $time, zz);
1202 inst_ptr = zz[0] ? rWW : rW;
1203 z = inst_ptr;
1204 rX_ = zz[0] ? rXX : rX;
1205 if (zz[0]) begin
1206 rK = g255_readonly_cache; // Restore interrupt mask
1207 regfile_wraddress <= GLOBAL | 255;
1208 regfile_wrdata <= rBB;
1209 regfile_wren <= 1;
1210 $display(" b=#%1x x=#%1x", rX_, rBB);
1212 state <= S_IFETCH1;
1213 if (~rX_[63]) begin
1214 // 125. Prepare to perform a ropcode
1215 rop = rX_[57:56];
1216 if (rop == 3) begin
1217 $display(" Can't handle ropcode 3");
1218 state <= S_ILLEGAL_INST;
1219 end else begin
1220 // if ((1 << b[31:28]) & #8f30) -- 1000_1111_0011_0000
1222 if (rop == RESUME_CONT) // 1
1223 case (rX_[31:28])
1224 15,11,10,9,8,5,4: begin
1225 $display(" Uhhh, not rX_[31:28] was %d", rX_[31:28]);
1226 state <= S_ILLEGAL_INST;
1228 endcase
1230 if (rop == RESUME_CONT || rop == RESUME_SET) begin // 1 || 2
1231 if (rX_[23:16] >= L && rX_[23:16] < G) begin
1232 $display(" Wtf, rX_[23:16] = %d", rX_[23:16]);
1233 state <= S_ILLEGAL_INST;
1237 if (rX_[31:24] == I_RESUME) begin
1238 $display(" Arrgh, rX_[31:24] = %d", rX_[31:24]);
1239 state <= S_ILLEGAL_INST;
1242 resuming = {zz[0], 1'b1};
1243 loc <= inst_ptr - 4;
1244 inst = rX_[31:0];
1245 $display("%06d resuming #%016x:#%08x", $time, inst_ptr - 4, rX_[31:0]);
1246 state <= S_IFETCH2;
1252 I_FCMP:
1253 I_FUN:
1254 I_FEQL:
1255 I_FADD:
1256 I_FIX:
1257 I_FSUB:
1258 I_FIXU:
1259 I_FLOT:
1260 I_FLOTI:
1261 I_FLOTU:
1262 I_FLOTUI:
1263 I_SFLOT:
1264 I_SFLOTI:
1265 I_SFLOTU:
1266 I_SFLOTUI: */
1268 // 10
1269 /* I_FMUL:
1270 I_FCMPE:
1271 I_FUNE:
1272 I_FEQLE:
1273 I_FDIV:
1274 I_FSQRT:
1275 I_FREM:
1276 I_FINT:
1279 /* I_TRAP: TRAP is handled exactly like an unknown instruction */
1280 default: begin
1281 // XXX This is probably not exactly correct...
1282 if (op == I_TRAP)
1283 $display("%06d TRAP %d,%d,%d inst_ptr = #%1x", $time,
1284 xx, yy, zz, inst_ptr);
1285 else
1286 $display("%06d Unknown instruction trap %d,%d,%d", $time,
1287 xx, yy, zz);
1288 rWW = inst_ptr;
1289 rK = 0;
1290 rBB = g255_readonly_cache;
1291 /* This depends on f == info_flags[op]!! */
1292 rXX = {op == I_TRAP || !(f & X_is_dest_bit)
1293 ? 32'h80000000 // normal resume from trap
1294 : 32'h02000000, // RESUME_SET
1295 inst};
1296 $display(" setting rWW=#%1x, rXX=#%1x", rWW, rXX);
1297 rYY = y;
1298 rZZ = z;
1299 $display(" setting rYY=#%1x, rZZ=#%1x", rYY, rZZ);
1300 regfile_wraddress <= GLOBAL | 255;
1301 regfile_wrdata <= rJ;
1302 regfile_wren <= 1;
1303 inst_ptr = rT;
1304 state <= S_IFETCH1;
1306 endcase
1309 // XXX Yup, don't care nothin' 'bout being fast
1310 S_MEM1: if (~datamem_wren & ~datamem_rden) begin
1311 if(V)$display("%06d ME1: No memory op detected, skipping to ME2", $time);
1312 state <= S_MEM2; // Cheating!
1313 end else begin
1314 if(V)$display("%06d *** 1st half of access #%1x & #%1x (W%d,R%d)",
1315 $time, datamem_wrdata[63:32], datamem_wrbyteena[7:4],
1316 datamem_wren, datamem_rden);
1318 {datamem_rddata_high,datamem_rddata_low} <= 2'b11;
1320 // Writing 1st half
1321 core_transfer_request <= 1;
1322 core_virt_address <= datamem_addr & ~7;
1323 core_io_access <= datamem_addr[63:48] == 1;
1324 if (datamem_rden & datamem_wren) begin
1325 $display("%06d Wow!", $time);
1326 error <= 1;
1327 state <= S_HALTED;
1329 core_wren <= datamem_wren;
1330 core_wrdata <= datamem_wrdata[63:32];
1331 core_wrmask <= datamem_wrbyteena[7:4];
1332 state <= S_EXTMEM1;
1335 // 2nd half
1336 S_EXTMEM1: if (~core_wait_request) begin
1337 if(V)$display("%06d *** 2nd half of access #%1x & #%1x", $time,
1338 datamem_wrdata[31:0],
1339 datamem_wrbyteena[3:0]);
1341 core_virt_address <= core_virt_address + 4;
1342 core_wrdata <= datamem_wrdata[31:0];
1343 core_wrmask <= datamem_wrbyteena[3:0];
1344 state <= S_EXTMEM2;
1347 S_EXTMEM2: if (~core_wait_request) begin
1348 // And we're done XXX Not handling reads very well are we?
1349 core_transfer_request <= 0;
1350 state <= S_MEM2;
1353 S_MEM2: if (datamem_rden & (datamem_rddata_high | datamem_rddata_low)) begin
1354 if(V)$display("%06d *** Core waiting for read data to arrive", $time);
1355 end else begin
1356 if(V)$display("%06d ME2 x = #%1x", $time, x);
1357 state <= S_MEM3;
1358 case (op)
1359 I_ADD, I_ADDI:
1360 /* Test for overflow which
1361 "... occurs if and only if y and z have the same sign
1362 but the sum has a different sign."
1364 if (~(y[63] ^ z[63]) & (y[63] ^ x[63])) begin
1365 $display("%06d ** ADD Overflow x[63]=%d y[63]=%d z[63]=%d", $time,
1366 x[63], y[63], z[63]);
1367 exc[V_EXC] = 1;
1370 I_SUB, I_SUBI, I_NEG, I_NEGI:
1371 /* Test for overflow which
1372 "Other cases of signed and unsigned addition and
1373 subtraction are, of course, similar. Overflow
1374 occurs in the calculation x = y - z if and only if
1375 it occurs in the calculation y = x + z."
1377 if (~(x[63] ^ z[63]) & (x[63] ^ y[63])) begin
1378 $display("%06d ** SUB Overflow x[63]=%d y[63]=%d z[63]=%d", $time,
1379 x[63], y[63], z[63]);
1380 exc[V_EXC] = 1;
1383 I_SL, I_SLI:
1384 if (z > 64 && y)
1385 // XXX. This is insufficient. See 87.
1386 exc[V_EXC] = 1;
1387 else if (high_shift != {64{y[63]}})
1388 exc[V_EXC] = 1;
1390 // 94.
1391 I_LDB, I_LDBI: begin
1392 tmpbyte = datamem_rddata >> (8*(7 - datamem_addr[2:0]));
1393 x = {{56{tmpbyte[7]}},tmpbyte};
1394 $display("%06d ** LDB [#%1x] -> #%1x", $time, datamem_addr, {{56{tmpbyte[7]}},tmpbyte});
1397 I_LDBU, I_LDBUI: begin
1398 tmpbyte = datamem_rddata >> (8*(7 - datamem_addr[2:0]));
1399 x = {56'd0,tmpbyte};
1400 $display("%06d ** LDBU [#%1x] -> #%1x", $time, datamem_addr, {56'd0,tmpbyte});
1403 I_LDW, I_LDWI: begin
1404 t[15:0] = datamem_rddata >> (16*(3 - datamem_addr[2:1]));
1405 x = {{48{t[15]}},t[15:0]};
1406 $display("%06d ** LDW [#%1x] -> #%1x", $time, datamem_addr, {{48{t[15]}},t[15:0]});
1409 I_LDWU, I_LDWUI: begin
1410 t[15:0] = datamem_rddata >> (16*(3 - datamem_addr[2:1]));
1411 x = {48'd0,t[15:0]};
1412 $display("%06d ** LDWU [#%1x] -> #%1x", $time, datamem_addr, {48'd0,t[15:0]});
1415 I_LDT, I_LDTI: begin
1416 $display("%06d *** datamem_rddata = #%1x datamem_addr[2]=%d", $time,
1417 datamem_rddata, datamem_addr[2]);
1418 t[31:0] = datamem_rddata >> (32*(1 - datamem_addr[2]));
1419 x = {{32{t[31]}},t[31:0]};
1420 $display("%06d ** LDT [#%1x] -> #%1x", $time, datamem_addr, {{32{t[31]}},t[31:0]});
1423 I_LDTU, I_LDTUI: begin
1424 t[31:0] = datamem_rddata >> (32*(1 - datamem_addr[2]));
1425 x = {32'd0,t[31:0]};
1426 $display("%06d ** LDTU [#%1x] -> #%1x", $time, datamem_addr, {32'd0,t[31:0]});
1429 I_LDO, I_LDOI: begin
1430 x = datamem_rddata;
1431 $display("%06d ** LDO [#%1x] -> #%1x", $time, datamem_addr, datamem_rddata);
1434 I_LDOU, I_LDOUI: begin
1435 x = datamem_rddata;
1436 $display("%06d ** LDOU [#%1x] -> #%1x", $time, datamem_addr, datamem_rddata);
1439 I_LDUNC, I_LDUNCI: begin
1440 x = datamem_rddata;
1441 $display("%06d ** LDUNC [#%1x] -> #%1x", $time, datamem_addr, datamem_rddata);
1444 I_LDHT, I_LDHTI: begin
1445 t[31:0] = datamem_rddata >> (32*(1 - datamem_addr[0]));
1446 x = {t[31:0],32'd0};
1447 $display("%06d ** LDHT [#%1x] -> #%1x", $time, datamem_addr, {t[31:0],32'd0});
1449 endcase
1450 end // case: S_MEM2
1452 S_MEM3: state <= S_WB2;
1454 S_WB2: begin
1455 state <= S_IFETCH1;
1456 datamem_rden <= 0;
1457 datamem_wren <= 0;
1459 /* XXX Hmm, in which cases do we _not_ commit in presence of exception? */
1460 /* XXX We should probably make sure all instructions that can
1461 * raise an exception pass through here. What a mess!
1463 regfile_wraddress <= x_ptr;
1464 regfile_wrdata <= x;
1465 regfile_wren <= wb;
1467 // 122. Check for trip interrupt
1468 if ((exc & (U_BIT + X_BIT)) == U_BIT && !(rA & U_BIT)) exc = exc & ~U_BIT;
1469 if (exc) begin
1470 $display(" *** Exception handling %x", exc);
1471 state <= S_IFETCH1;
1472 j = exc & (rA | H_BIT); /* find all exceptions that have been enabled */
1473 if (j) begin
1474 // 123. Initiate a trip interrupt
1475 // Priority encoding. Very expensive!
1476 rW = inst_ptr;
1477 if (j[H_EXC]) begin inst_ptr = 0; exc[H_EXC] = 0; end // Trip
1478 else if (j[D_EXC]) begin inst_ptr = 16; exc[D_EXC] = 0; end // Integer divide check
1479 else if (j[V_EXC]) begin inst_ptr = 32; exc[V_EXC] = 0; end // Integer overflow
1480 else if (j[W_EXC]) begin inst_ptr = 48; exc[W_EXC] = 0; end // float-to-fix overflow
1481 else if (j[I_EXC]) begin inst_ptr = 64; exc[I_EXC] = 0; end // float invalid op
1482 else if (j[O_EXC]) begin inst_ptr = 80; exc[O_EXC] = 0; end // float overflow
1483 else if (j[U_EXC]) begin inst_ptr = 96; exc[U_EXC] = 0; end // float underflow
1484 else if (j[Z_EXC]) begin inst_ptr = 112; exc[Z_EXC] = 0; end // float div by 0
1485 else if (j[X_EXC]) begin inst_ptr = 128; exc[X_EXC] = 0; end // float inexact
1487 XXX AFAICT, these can never happen??
1488 else if (j[ 7]) begin inst_ptr = 144; exc[ 7] = 0; end
1489 else if (j[ 6]) begin inst_ptr = 160; exc[ 6] = 0; end
1490 else if (j[ 5]) begin inst_ptr = 176; exc[ 5] = 0; end
1491 else if (j[ 4]) begin inst_ptr = 192; exc[ 4] = 0; end
1492 else if (j[ 3]) begin inst_ptr = 208; exc[ 3] = 0; end
1493 else if (j[ 2]) begin inst_ptr = 224; exc[ 2] = 0; end
1494 else if (j[ 1]) begin inst_ptr = 240; exc[ 1] = 0; end
1495 else if (j[ 0]) begin inst_ptr = 256; exc[ 0] = 0; end
1497 rX[63:32] = 32'h80000000;
1498 rX[31: 0] = inst;
1499 if ((op & 8'hE0) == I_STB) begin rY = w; rZ = b; end
1500 else begin rY = y; rZ = z; end
1501 rB = g255_readonly_cache;
1502 regfile_wraddress <= GLOBAL | 8'd255;
1503 regfile_wrdata <= rJ;
1504 regfile_wren <= 1;
1506 if (op == I_TRIP) begin
1507 w = rW;
1508 x = rX;
1511 rA = rA | (exc >> 8);
1515 S_POP1: state <= S_POP2; // Register file lookup :-(
1517 S_POP2: begin
1518 $display("%06d l[(O + xx - 1) & lring_mask] == l[%1d] = #%x", $time,
1519 (O + xx - 1) & lring_mask, regfile_rddata_a);
1520 $display("%06d l[(O - 1) & lring_mask] == l[%1d] = #%x", $time,
1521 (O - 1) & lring_mask, regfile_rddata_b);
1523 regfile_rdaddress_a <= (O + xx - 1) & lring_mask;
1524 regfile_rdaddress_b <= (O - 1) & lring_mask;
1526 if (xx != 0 && xx <= L) begin
1527 y = regfile_rddata_a; /* l[(O + xx - 1) & lring_mask] */
1528 $display("%06d x=%1d y=l[%1d]=#%1x", $time,
1529 xx, (O + xx - 1) & lring_mask, y);
1531 if (rS[31:0] == rO[31:0]) begin
1532 $display("%06d ** 84. stack_load() not implemented!", $time);
1533 state <= S_NOT_IMPLEMENTED;
1535 // k = regfile_rddata_b & 8'hFF; /* l[(O - 1) & lring_mask] */
1536 $display("%06d POP k=%1d, O=%1d, S=%1d", $time, regfile_rddata_b[7:0], O, S);
1537 if (O - S <= regfile_rddata_b[7:0]) begin
1538 $display("%06d ** 84. stack_load() not implemented!", $time);
1539 state <= S_NOT_IMPLEMENTED;
1541 L = regfile_rddata_b[7:0] + ((xx <= L) ? xx : (L + 1));
1542 if (L > G) begin
1543 $display("%06d L=G %1d", $time, G);
1544 L = G;
1546 if (L > regfile_rddata_b[7:0]) begin
1547 $display("%06d l[%1d]=#%1x", $time,
1548 (O - 1) & lring_mask, y);
1549 regfile_wraddress <= (O - 1) & lring_mask;
1550 regfile_wrdata <= y;
1551 regfile_wren <= 1;
1553 y = rJ;
1554 inst_ptr = rJ + (yz << 2);
1555 $display("%06d rJ = #%1x + (yz << 2) #%1x = #%1x", $time,
1556 rJ, (yz << 2), inst_ptr);
1557 O = O - (regfile_rddata_b[7:0] + 1);
1558 rO = rO - ((regfile_rddata_b[7:0] + 1) << 3);
1559 b = rO; // XXX Why?
1560 rL = L;
1561 state <= S_WB2;
1564 S_MULTIPLYING:
1565 if (mul_b) begin
1566 $display("%06d ** MUL %d + %d * %d", $time, {mul_aux,mul_acc}, mul_a, mul_b);
1567 /* 1 bit at a time
1568 if (mul_b[0])
1569 {mul_aux,mul_acc} <= {mul_aux,mul_acc} + mul_a;
1570 mul_a <= {mul_a,1'd0};
1571 mul_b <= mul_b[63:1];
1573 // 2 bits at a time, worst-case 32-cycles.
1574 // XXX This can be done cheaper
1575 {mul_aux,mul_acc} <= {mul_aux,mul_acc} + mul_a * mul_b[1:0];
1576 mul_a <= {mul_a,2'd0};
1577 mul_b <= mul_b[63:2];
1578 end else begin
1579 if ((op & ~1) == I_MULU)
1580 rH <= mul_aux;
1581 else begin
1582 if (y[63]) mul_aux = mul_aux - z;
1583 if (z[63]) mul_aux = mul_aux - y;
1584 // Orig: if (mul_aux[63:32] != mul_aux[31:0] || (mul_aux[63:32] ^ mul_aux[62:0] ^ mul_acc[63])) begin
1585 if (mul_aux != {64{mul_acc[63]}}) begin
1586 $display("%06d ** MUL Overflow", $time);
1587 exc[V_EXC] = 1;
1590 x <= mul_acc;
1591 state <= S_WB2;
1594 // Plain Radix-2 restoring division. SRT would likely be faster
1595 S_DIVIDING: begin
1596 $display("%06d ** DIV[U] t=%16x x=%16x", $time, t, x);
1597 {t,x} = {t,x} << 1;
1598 diff = t - z; // diff is 65-bit to handle overflow correctly.
1599 if (~diff[64]) begin
1600 t = diff;
1601 x[0] = 1;
1604 n <= n - 1;
1605 if (n[6]) begin
1606 /* Done. Possibly adjust for signed division. */
1607 case ({y_sign,z_sign})
1608 2+1: begin
1609 t = 0 - t;
1610 if (x[63]) $display("\n***IMPOSSIBLE***\n");
1611 $display(" 2+1: x=%d t=%d", $signed(x), $signed(t));
1614 /* 0+0: do nothing */
1616 2+0: begin
1617 if (t) begin
1618 t = z_abs - t;
1619 x = -1 - x;
1620 end else begin
1621 x = 0 - x;
1623 $display(" 2+0: x=%d t=%d", $signed(x), $signed(t));
1626 0+1: begin
1627 if (t) begin
1628 t = t - z_abs;
1629 x = -1 - x;
1630 end else begin
1631 x = 0 - x;
1633 $display(" 0+1: x=%d t=%d", $signed(x), $signed(t));
1635 endcase
1636 $display("%06d ** DIV[U] t=%16x x=%16x (after sign correction)", $time, t, x);
1637 rR <= t;
1638 state <= S_WB2;
1642 S_MOR: begin
1643 x = x >> 1;
1644 x[63] = |(z[7:0] & {t[56],t[48],t[40],t[32],t[24],t[16],t[8],t[0]});
1645 if (n[2:0] == 3'b111) begin
1646 t <= y;
1647 z <= z >> 8;
1648 end else
1649 t <= t >> 1;
1650 n <= n - 1;
1651 if (n[6])
1652 state <= S_WB2;
1655 S_MXOR: begin
1656 x = x >> 1;
1657 x[63] = ^(z[7:0] & {t[56],t[48],t[40],t[32],t[24],t[16],t[8],t[0]});
1658 if (n[2:0] == 3'b111) begin
1659 t <= y;
1660 z <= z >> 8;
1661 end else
1662 t <= t >> 1;
1663 n <= n - 1;
1664 if (n[6])
1665 state <= S_WB2;
1668 S_NOT_IMPLEMENTED: begin // XXX Will disappear eventually
1669 $display("%06d NOT IMPLEMENTED EXCEPTION", $time); // XXX Do something here
1670 error <= 3;
1671 state <= S_HALTED;
1674 S_ILLEGAL_INST: begin // XXX Will disappear eventually
1675 $display("%06d ILLEGAL INSTRUCTION EXCEPTION", $time); // XXX Do something here
1676 error <= 4;
1677 state <= S_HALTED;
1680 S_PRIVILEGED_INST: begin // XXX Will disappear eventually
1681 $display("%06d PRIVILEGED EXCEPTION", $time); // XXX Do something here
1682 error <= 5;
1683 state <= S_HALTED;
1686 S_HALTED: begin // XXX Will disappear eventually
1687 $display("%06d HALTED", $time);
1688 // 25 MHz ~ 2^25, blink the error code with roughly 1 Hz.
1689 // XXX This depends on the surrounding system having
1690 // something useful here! My system happens to have a 7 segment display.
1691 core_transfer_request <= 1;
1692 core_io_access <= 1;
1693 core_virt_address <= 32'h24;
1694 core_wren <= 1;
1695 core_wrmask <= ~0;
1696 core_wrdata <= rC[25] ? ~0 : ~error;
1697 $finish;
1699 endcase
1701 /* At the end to overwrite everything else */
1702 if (reset)
1703 state <= S_RESET;
1706 initial $readmemh("info_flags.data", info_flags);
1707 endmodule
1709 // Cheap and dirty segments
1710 // 1 MiB = #10_0000
1711 // #00..00 - #00..3_FFFF -> segment 0 -> SRAM #0_0000 - #3_FFFF
1712 // #20..00 - #20..3_FFFF -> segment 1 -> SRAM #4_0000 - #7_FFFF
1713 // #40..00 - #40..3_FFFF -> segment 2 -> SRAM #8_0000 - #B_FFFF
1714 // #60..00 - #60..3_FFFF -> segment 3 -> SRAM #C_0000 - #F_FFFF
1715 // That is sram_a = {11'd0,core_a[62:61],core_a[18:0]}
1717 module address_virtualization(input wire clk
1718 ,input wire [63:0] virt_a
1719 ,output wire [31:0] phys_a);
1720 assign phys_a = virt_a[63] ? virt_a[31:0] :
1721 {12'd0,virt_a[62:61],virt_a[17:0]};
1722 endmodule