jtag: Apply Martin Strubel JTAG implementation for ZPU
[zpu.git] / zpu / hdl / zealot / zpu_small.vhdl
blobcf4e189b1ce758bcfa08b1c4b52a6d8f63eaebe7
1 ------------------------------------------------------------------------------
2 ----                                                                      ----
3 ----  ZPU Small                                                           ----
4 ----                                                                      ----
5 ----  http://www.opencores.org/                                           ----
6 ----                                                                      ----
7 ----  Description:                                                        ----
8 ----  ZPU is a 32 bits small stack cpu. This is the small size version.   ----
9 ----  It doesn't support external memories, needs a dual ported memory.   ----
10 ----                                                                      ----
11 ----  To Do:                                                              ----
12 ----  -                                                                   ----
13 ----                                                                      ----
14 ----  Author:                                                             ----
15 ----    - Øyvind Harboe, oyvind.harboe zylin.com                          ----
16 ----    - Salvador E. Tropea, salvador inti.gob.ar                        ----
17 ----                                                                      ----
18 ------------------------------------------------------------------------------
19 ----                                                                      ----
20 ---- Copyright (c) 2008 Øyvind Harboe <oyvind.harboe zylin.com>           ----
21 ---- Copyright (c) 2008 Salvador E. Tropea <salvador inti.gob.ar>         ----
22 ---- Copyright (c) 2008 Instituto Nacional de Tecnología Industrial       ----
23 ----                                                                      ----
24 ---- Distributed under the BSD license                                    ----
25 ----                                                                      ----
26 ------------------------------------------------------------------------------
27 ----                                                                      ----
28 ---- Design unit:      ZPUSmallCore(Behave) (Entity and architecture)     ----
29 ---- File name:        zpu_small.vhdl                                     ----
30 ---- Note:             None                                               ----
31 ---- Limitations:      None known                                         ----
32 ---- Errors:           None known                                         ----
33 ---- Library:          zpu                                                ----
34 ---- Dependencies:     IEEE.std_logic_1164                                ----
35 ----                   IEEE.numeric_std                                   ----
36 ----                   zpu.zpupkg                                         ----
37 ---- Target FPGA:      Spartan 3 (XC3S1500-4-FG456)                       ----
38 ---- Language:         VHDL                                               ----
39 ---- Wishbone:         No                                                 ----
40 ---- Synthesis tools:  Xilinx Release 9.2.03i - xst J.39                  ----
41 ---- Simulation tools: GHDL [Sokcho edition] (0.2x)                       ----
42 ---- Text editor:      SETEdit 0.5.x                                      ----
43 ----                                                                      ----
44 ------------------------------------------------------------------------------
46 library IEEE;
47 use IEEE.std_logic_1164.ALL;
48 use IEEE.numeric_std.all;
50 library zpu;
51 use zpu.zpupkg.all;
53 entity ZPUSmallCore is
54    generic(
55       WORD_SIZE    : integer:=32;  -- Data width 16/32
56       ADDR_W       : integer:=16;  -- Total address space width (incl. I/O)
57       MEM_W        : integer:=15;  -- Memory (prog+data+stack) width
58       D_CARE_VAL   : std_logic:='X'); -- Value used to fill the unsused bits
59    port(
60       clk_i        : in  std_logic; -- System Clock
61       reset_i      : in  std_logic; -- Synchronous Reset
62       interrupt_i  : in  std_logic; -- Interrupt
63       -- Emulation pins:
64       emureq_i     : in std_logic;  -- Emulation request from TAP
65       emuexec_i    : in std_logic;  -- exec pulse. 1 clk cycle wide!
66       emuack_o     : out std_logic; -- Emulation ACK to TAP
67       emurdy_o     : out std_logic; -- Emulation ready
68       pulse_o      : out std_logic;  -- Debug pulse for event counter
69       emuir        : in std_logic_vector(OPCODE_W-1 downto 0);
71       break_o      : out std_logic; -- Breakpoint opcode executed
72       dbg_o        : out zpu_dbgo_t; -- Debug outputs (i.e. trace log)
73       -- BRAM (text, data, bss and stack)
74       a_we_o       : out std_logic; -- BRAM A port Write Enable
75       a_addr_o     : out unsigned(MEM_W-1 downto WORD_SIZE/16):=(others => '0'); -- BRAM A Address
76       a_o          : out unsigned(WORD_SIZE-1 downto 0):=(others => '0'); -- Data to BRAM A port
77       a_i          : in  unsigned(WORD_SIZE-1 downto 0); -- Data from BRAM A port
78       b_we_o       : out std_logic; -- BRAM B port Write Enable
79       b_addr_o     : out unsigned(MEM_W-1 downto WORD_SIZE/16):=(others => '0'); -- BRAM B Address
80       b_o          : out unsigned(WORD_SIZE-1 downto 0):=(others => '0'); -- Data to BRAM B port
81       b_i          : in  unsigned(WORD_SIZE-1 downto 0); -- Data from BRAM B port
82       -- Memory mapped I/O
83       mem_busy_i   : in  std_logic;
84       data_i       : in  unsigned(WORD_SIZE-1 downto 0);
85       data_o       : out unsigned(WORD_SIZE-1 downto 0);
86       addr_o       : out unsigned(ADDR_W-1 downto 0);
87       write_en_o   : out std_logic;
88       read_en_o    : out std_logic);
89 end entity ZPUSmallCore;
91 architecture Behave of ZPUSmallCore is
92    constant MAX_ADDR_BIT : integer:=ADDR_W-2;
93    constant BYTE_BITS    : integer:=WORD_SIZE/16; -- # of bits in a word that addresses bytes
94    -- Stack Pointer initial value: BRAM size-8
95    constant SP_START_1   : unsigned(ADDR_W-1 downto 0):=to_unsigned((2**MEM_W)-8,ADDR_W);
96    constant SP_START     : unsigned(MAX_ADDR_BIT downto BYTE_BITS):=
97                            SP_START_1(MAX_ADDR_BIT downto BYTE_BITS);
98    constant IO_BIT       : integer:=ADDR_W-1; -- Address bit to determine this is an I/O
100    -- Program counter
101    signal pc_r           : unsigned(MAX_ADDR_BIT downto 0):=(others => '0');
102    -- Stack pointer
103    signal sp_r           : unsigned(MAX_ADDR_BIT downto BYTE_BITS):=SP_START;
104    signal idim_r         : std_logic:='0';
106    signal idim_save_r    : std_logic;
108    -- BRAM (text, data, bss and stack)
109    -- a_r is a register for the top of the stack [SP]
110    -- Note: as this is a stack CPU this is a very important register.
111    signal a_we_r         : std_logic:='0';
112    signal a_addr_r       : unsigned(MAX_ADDR_BIT downto BYTE_BITS):=(others => '0');
113    signal a_r            : unsigned(WORD_SIZE-1 downto 0):=(others => '0');
114    -- b_r is a register for the next value in the stack [SP+1]
115    -- We also use the B port to fetch instructions.
116    signal b_we_r         : std_logic:='0';
117    signal b_addr_r       : unsigned(MAX_ADDR_BIT downto BYTE_BITS):=(others => '0');
118    signal b_r            : unsigned(WORD_SIZE-1 downto 0):=(others => '0');
120    -- State machine.
121    type state_t is (st_fetch, st_write_io_done, st_execute, st_add, st_or,
122                     st_and, st_store, st_read_io, st_write_io, st_fetch_next,
123                     st_add_sp, st_decode, st_resync, st_emulation);
124    signal state          : state_t:=st_resync;
126    -- Decoded Opcode
127    type decode_t is (dec_nop, dec_im, dec_load_sp, dec_store_sp, dec_add_sp,
128                      dec_emulate, dec_break, dec_push_sp, dec_pop_pc, dec_add,
129                      dec_or, dec_and, dec_load, dec_not, dec_flip, dec_store,
130                      dec_pop_sp, dec_interrupt);
131    signal d_opcode_r     : decode_t;
132    signal d_opcode       : decode_t;
134    signal opcode         : unsigned(OPCODE_W-1 downto 0); -- Decoded
135    signal opcode_r       : unsigned(OPCODE_W-1 downto 0); -- Registered
137    -- '1' when we are in IC emulation
138    signal in_emu         : std_logic := '0';
139    signal break          : std_logic := '0'; -- emulation cause: breakpoint
140    signal ready          : std_logic := '0';
141    signal exec           : std_logic := '0'; -- Exec strobe
142    signal reset_exec     : std_logic := '0'; -- exec pulse reset
144    -- IRQ flag
145    signal in_irq_r       : std_logic:='0';
146    -- I/O space address
147    signal addr_r         : unsigned(ADDR_W-1 downto 0):=(others => '0');
148 begin
149    -- Dual ported memory interface
150    a_we_o    <= a_we_r;
151    a_addr_o  <= a_addr_r(MEM_W-1 downto BYTE_BITS);
152    a_o       <= a_r;
153    b_we_o    <= b_we_r;
154    b_addr_o  <= b_addr_r(MEM_W-1 downto BYTE_BITS);
155    b_o       <= b_r;
157    -------------------------
158    -- Instruction Decoder --
159    -------------------------
160    -- Note: We use Port B memory to fetch the opcodes.
161    decode_control:
162    process(b_i, pc_r, in_emu, exec, emuir)
163       variable topcode : unsigned(OPCODE_W-1 downto 0);
164    begin
165       -- When in emulation, get opcode from emuir
166       if in_emu = '1' and exec = '1' then
167          topcode := unsigned(emuir);
168       else
169          -- Select the addressed byte inside the fetched word
170          case (to_integer(pc_r(BYTE_BITS-1 downto 0))) is
171               when 0 =>
172                    topcode:=b_i(31 downto 24);
173               when 1 =>
174                    topcode:=b_i(23 downto 16);
175               when 2 =>
176                    topcode:=b_i(15 downto 8);
177               when others => -- 3
178                    topcode:=b_i(7 downto 0);
179          end case;
180       end if;
182       opcode <= topcode;
184       if (topcode(7 downto 7)=OPCODE_IM) then
185          d_opcode <= dec_im;
186       elsif (topcode(7 downto 5)=OPCODE_STORESP) then
187          d_opcode <= dec_store_sp;
188       elsif (topcode(7 downto 5)=OPCODE_LOADSP) then
189          d_opcode <= dec_load_sp;
190       elsif (topcode(7 downto 5)=OPCODE_EMULATE) then
191          d_opcode <= dec_emulate;
192       elsif (topcode(7 downto 4)=OPCODE_ADDSP) then
193          d_opcode <= dec_add_sp;
194       else -- OPCODE_SHORT
195          case topcode(3 downto 0) is
196               when OPCODE_BREAK =>
197                    d_opcode <= dec_break;
198               when OPCODE_PUSHSP =>
199                    d_opcode <= dec_push_sp;
200               when OPCODE_POPPC =>
201                    d_opcode <= dec_pop_pc;
202               when OPCODE_ADD =>
203                    d_opcode <= dec_add;
204               when OPCODE_OR =>
205                    d_opcode <= dec_or;
206               when OPCODE_AND =>
207                    d_opcode <= dec_and;
208               when OPCODE_LOAD =>
209                    d_opcode <= dec_load;
210               when OPCODE_NOT =>
211                    d_opcode <= dec_not;
212               when OPCODE_FLIP =>
213                    d_opcode <= dec_flip;
214               when OPCODE_STORE =>
215                    d_opcode <= dec_store;
216               when OPCODE_POPSP =>
217                    d_opcode <= dec_pop_sp;
218               -- when OPCODE_POPINT => -- Used to return from emulation
219                    -- d_opcode <= dec_emuleave;
220               when others => -- OPCODE_NOP and others
221                    d_opcode <= dec_nop;
222          end case;
223       end if;
224    end process decode_control;
226 trigger_exec:
227    process (clk_i, reset_exec)
228    begin
229       if rising_edge(clk_i) then
230          if emuexec_i = '1' then
231             exec <= '1';
232          elsif reset_exec = '1' then
233             exec <= '0';
234          end if;
235       end if;
236    end process;
238    data_o <= b_i;
239    opcode_control:
240    process (clk_i)
241       variable sp_offset : unsigned(4 downto 0);
242    begin
243       if rising_edge(clk_i) then
244          write_en_o   <= '0';
245          read_en_o    <= '0';
246          dbg_o.b_inst <= '0';
247          if reset_i='1' then
248             state    <= st_resync;
249             sp_r     <= SP_START;
250             pc_r     <= (others => '0');
251             idim_r   <= '0';
252             a_addr_r <= (others => '0');
253             b_addr_r <= (others => '0');
254             a_we_r   <= '0';
255             b_we_r   <= '0';
256             a_r      <= (others => '0');
257             b_r      <= (others => '0');
258             in_irq_r <= '0';
259             addr_r   <= (others => '0');
260          else -- reset_i/='1'
261             a_we_r <= '0';
262             b_we_r <= '0';
263             -- This saves LUTs, by explicitly declaring that the
264             -- a_o can be left at whatever value if a_we_r is
265             -- not set.
266             a_r <= (others => D_CARE_VAL);
267             b_r <= (others => D_CARE_VAL);
268             sp_offset:=(others => D_CARE_VAL);
269             a_addr_r   <= (others => D_CARE_VAL);
270             b_addr_r   <= (others => D_CARE_VAL);
271             addr_r     <= a_i(ADDR_W-1 downto 0);
272             d_opcode_r <= d_opcode;
273             opcode_r   <= opcode;
274             if interrupt_i='0' then
275                in_irq_r <= '0'; -- no longer in an interrupt
276             end if;
279             reset_exec <= '0';
280    
281             case state is
282                  when st_execute =>
283                       state <= st_fetch;
284                       -- At this point:
285                       -- b_i contains opcode word
286                       -- a_i contains top of stack
287                       if in_emu ='0' then
288                          pc_r <= pc_r+1;
289                       end if;
290           
291                       -- Debug info (Trace)
292                       dbg_o.b_inst <= '1';
293                       dbg_o.pc <= (others => '0');
294                       dbg_o.pc(MAX_ADDR_BIT downto 0) <= pc_r;
295                       dbg_o.opcode <= opcode_r;
296                       dbg_o.sp <= (others => '0');
297                       dbg_o.sp(MAX_ADDR_BIT downto BYTE_BITS) <= sp_r;
298                       dbg_o.stk_a <= a_i;
299                       dbg_o.stk_b <= b_i;
300                       dbg_o.idim <= idim_r;
301        
302                       -- During the next cycle we'll be reading the next opcode
303                       sp_offset(4):=not opcode_r(4);
304                       sp_offset(3 downto 0):=opcode_r(3 downto 0);
305           
306                       idim_r <= '0';
308                       --------------------
309                       -- Execution Unit --
310                       --------------------
311                       case d_opcode_r is
312                            when dec_interrupt =>
313                                 -- Not a real instruction, but an interrupt
314                                 -- Push(PC); PC=32
315                                 sp_r      <= sp_r-1;
316                                 a_addr_r  <= sp_r-1;
317                                 a_we_r    <= '1';
318                                 a_r       <= (others => D_CARE_VAL);
319                                 a_r(MAX_ADDR_BIT downto 0) <= pc_r;
320                                 -- Jump to ISR
321                                 pc_r <= to_unsigned(32,MAX_ADDR_BIT+1); -- interrupt address
322                                 --report "ZPU jumped to interrupt!" severity note;
323                            when dec_im =>
324                                 idim_r <= '1';
325                                 a_we_r <= '1';
326                                 if idim_r='0' then
327                                    -- First IM
328                                    -- Push the 7 bits (extending the sign)
329                                    sp_r     <= sp_r-1;
330                                    a_addr_r <= sp_r-1;
331                                    a_r <= unsigned(resize(signed(opcode_r(6 downto 0)),WORD_SIZE));
332                                 else
333                                    -- Next IMs, shift the word and put the new value in the lower
334                                    -- bits
335                                    a_addr_r <= sp_r;
336                                    a_r(WORD_SIZE-1 downto 7) <= a_i(WORD_SIZE-8 downto 0);
337                                    a_r(6 downto 0) <= opcode_r(6 downto 0);
338                                 end if;
339                            when dec_store_sp =>
340                                 -- [SP+Offset]=Pop()
341                                 b_we_r   <= '1';
342                                 b_addr_r <= sp_r+sp_offset;
343                                 b_r      <= a_i;
344                                 sp_r     <= sp_r+1;
345                                 state    <= st_resync;
346                            when dec_load_sp =>
347                                 -- Push([SP+Offset])
348                                 sp_r     <= sp_r-1;
349                                 a_addr_r <= sp_r+sp_offset;
350                            when dec_emulate =>
351                                 -- Push(PC+1), PC=Opcode[4:0]*32
352                                 sp_r     <= sp_r-1;
353                                 a_we_r   <= '1';
354                                 a_addr_r <= sp_r-1;
355                                 a_r <= (others => D_CARE_VAL);
356                                 a_r(MAX_ADDR_BIT downto 0) <= pc_r+1;
357                                 -- Jump to NUM*32
358                                 -- The emulate address is:
359                                 --        98 7654 3210
360                                 -- 0000 00aa aaa0 0000
361                                 pc_r <= (others => '0');
362                                 pc_r(9 downto 5) <= opcode_r(4 downto 0);
363                            when dec_add_sp =>
364                                 -- Push(Pop()+[SP+Offset])
365                                 a_addr_r <= sp_r;
366                                 b_addr_r <= sp_r+sp_offset;
367                                 state    <= st_add_sp;
368                            when dec_break =>
369                                 -- Hit breakpoint, enter emulation
370                                 if in_emu = '0' then
371                                    in_emu <= '1';
372                                    break <= '1';
373                                    idim_save_r <= idim_r; -- save idim flag
374                                    state <= st_emulation;
375                                 else
376                                    -- Leave emulation:
377                                    idim_r <= idim_save_r; -- restore idim flag
378                                    break <= '0';
379                                    in_emu <= '0';
380                                    b_addr_r <= pc_r(MAX_ADDR_BIT downto BYTE_BITS);
381                                    state    <= st_fetch_next;
382                                 end if;
383                            when dec_push_sp =>
384                                 -- Push(SP)
385                                 sp_r     <= sp_r-1;
386                                 a_we_r   <= '1';
387                                 a_addr_r <= sp_r-1;
388                                 a_r <= (others => D_CARE_VAL);
389                                 a_r(MAX_ADDR_BIT downto BYTE_BITS) <= sp_r;
390                            when dec_pop_pc =>
391                                 -- Pop(PC)
392                                 pc_r  <= a_i(MAX_ADDR_BIT downto 0);
393                                 sp_r  <= sp_r+1;
394                                 state <= st_resync;
395                            when dec_add =>
396                                 -- Push(Pop()+Pop())
397                                 sp_r  <= sp_r+1;
398                                 state <= st_add;
399                            when dec_or =>
400                                 -- Push(Pop() or Pop())
401                                 sp_r  <= sp_r+1;
402                                 state <= st_or;
403                            when dec_and =>
404                                 -- Push(Pop() and Pop())
405                                 sp_r  <= sp_r+1;
406                                 state <= st_and;
407                            when dec_load =>
408                                 -- Push([Pop()])
409                                 if a_i(IO_BIT)='1' then
410                                    addr_r    <= a_i(ADDR_W-1 downto 0);
411                                    read_en_o <= '1';
412                                    state     <= st_read_io;
413                                 else
414                                    a_addr_r <= a_i(MAX_ADDR_BIT downto BYTE_BITS);
415                                 end if;
416                            when dec_not =>
417                                 -- Push(not(Pop()))
418                                 a_addr_r <= sp_r(MAX_ADDR_BIT downto BYTE_BITS);
419                                 a_we_r   <= '1';
420                                 a_r      <= not a_i;
421                            when dec_flip =>
422                                 -- Push(flip(Pop()))
423                                 a_addr_r <= sp_r(MAX_ADDR_BIT downto BYTE_BITS);
424                                 a_we_r   <= '1';
425                                 for i in 0 to WORD_SIZE-1 loop
426                                    a_r(i) <= a_i(WORD_SIZE-1-i);
427                                 end loop;
428                            when dec_store =>
429                                 -- a=Pop(), b=Pop(), [a]=b
430                                 b_addr_r <= sp_r+1;
431                                 sp_r     <= sp_r+1;
432                                 if a_i(IO_BIT)='1' then
433                                    state <= st_write_io;
434                                 else
435                                    state <= st_store;
436                                 end if;
437                            when dec_pop_sp =>
438                                 -- SP=Pop()
439                                 sp_r  <= a_i(MAX_ADDR_BIT downto BYTE_BITS);
440                                 state <= st_resync;
441                            when dec_nop =>
442                                 -- Default, keep addressing to of the stack (A)
443                                 a_addr_r <= sp_r;
444                            when others =>
445                                 null;
446                       end case;
447                  when st_read_io =>
448                       -- Wait until memory I/O isn't busy
449                       if mem_busy_i='0' then
450                          state  <= st_fetch;
451                          a_we_r <= '1';
452                          a_r    <= data_i;
453                       end if;
454                  when st_write_io =>
455                       -- [A]=B
456                       sp_r       <= sp_r+1;
457                       write_en_o <= '1';
458                       addr_r     <= a_i(ADDR_W-1 downto 0);
459                       state      <= st_write_io_done;
460                  when st_write_io_done =>
461                       -- Wait until memory I/O isn't busy
462                       if mem_busy_i='0' then
463                          state <= st_resync;
464                       end if;
465                  when st_fetch =>
466                       -- We need to resync. During the *next* cycle
467                       -- we'll fetch the opcode @ pc and thus it will
468                       -- be available for st_execute the cycle after
469                       -- next
471                       -- If we just entered emulation, save idim flag
472                       -- and mark we're in emulation.
473                       if emureq_i = '1' and in_emu = '0' then
474                          in_emu <= '1';
475                          idim_save_r <= idim_r; -- save idim flag
476                       end if;
477                       b_addr_r <= pc_r(MAX_ADDR_BIT downto BYTE_BITS);
478                       state    <= st_fetch_next;
479                  when st_fetch_next =>
480                       -- At this point a_i contains the value that is either
481                       -- from the top of stack or should be copied to the top of the stack
482                       a_we_r   <= '1';
483                       a_r      <= a_i;
484                       a_addr_r <= sp_r;
485                       b_addr_r <= sp_r+1;
486                       state    <= st_decode;
487                       reset_exec <= '1';
488                  when st_decode =>
489                       state    <= st_execute;
490                       if in_emu = '1' then
491                          state    <= st_emulation;
492                       elsif interrupt_i='1' and in_irq_r='0' and idim_r='0' then
493                          -- We got an interrupt, execute interrupt instead of next instruction
494                          in_irq_r   <= '1';
495                          d_opcode_r <= dec_interrupt;
496                       end if;
497                       -- during the st_execute cycle we'll be fetching SP+1
498                       a_addr_r <= sp_r;
499                       b_addr_r <= sp_r+1;
500                  when st_store =>
501                       sp_r     <= sp_r+1;
502                       a_we_r   <= '1';
503                       a_addr_r <= a_i(MAX_ADDR_BIT downto BYTE_BITS);
504                       a_r      <= b_i;
505                       state    <= st_resync;
506                  when st_add_sp =>
507                       state <= st_add;
508                  when st_add =>
509                       a_addr_r <= sp_r;
510                       a_we_r   <= '1';
511                       a_r      <= a_i+b_i;
512                       state    <= st_fetch;
513                  when st_or =>
514                       a_addr_r <= sp_r;
515                       a_we_r   <= '1';
516                       a_r      <= a_i or b_i;
517                       state    <= st_fetch;
518                  when st_and =>
519                       a_addr_r <= sp_r;
520                       a_we_r   <= '1';
521                       a_r      <= a_i and b_i;
522                       state    <= st_fetch;
523                  when st_resync =>
524                       a_addr_r <= sp_r;
525                       state    <= st_fetch;
526                  when st_emulation =>
527                       a_addr_r <= sp_r;
528                       b_addr_r <= sp_r+1;
530                       if exec = '1' then
531                          state    <= st_execute;
532                       else
533                          state    <= st_emulation;
534                       end if;
535                  when others =>
536                       null;
537             end case;
538          end if; -- else reset_i/='1'
539       end if; -- rising_edge(clk_i)
540    end process opcode_control;
541    addr_o <= addr_r;
543 -- Emulation flag export:
545    ready <= '1' when state = st_emulation else '0';
546    emuack_o <= in_emu;
547    emurdy_o <= ready and not exec;
548    break_o <= break;
549    pulse_o <= exec;
552 end architecture Behave; -- Entity: ZPUSmallCore