1 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ;; and now
for something completely different
...
5 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6 ;; Z80 Assembler
: input stream lexer
7 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10 on left paren
, lexer tries
to look forward
to create a better token
:
11 (HL
) -> uo
-r8
, kind
-(hl
)
12 (I
[XY
]) -> uo
-mrx16 kind
-i
[xy
]-none
13 (I
[XY
]+ -> uo
-mrx16 kind
-i
[xy
]-add
14 (I
[XY
]- -> uo
-mrx16 kind
-i
[xy
]-sub
16 (BC
), (DE
), (SP
) -> uo
-mr16 with proper kind
18 "C" is always r8
. to convert it
to condition
, use
"token-c-to-cond".
19 to convert
"(HL)" to r8 use
"token-(hl)->r8"
23 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
24 ;; simple expression evaluator
27 ;; cannot include instrs here
28 vocab
-if-none asm
-instr
31 vocab
-if-none asm
-lexer
35 false value allow
-continuations
36 ;; for label detection
40 ;; one
-char and two
-char strings sets
"TOK-NUM" too
41 ;; number sign is not parsed
47 def
: tk
-punct
;; punctuation
; sets both
"TK-STR" and
"TK-NUM" (to char code
)
48 def
: tk
-label
;; line start
, word ends with
":"; token is without
":"
49 def
: tk
-mnemo
;; mnemonics
;; TK
-STR contains a word
(uppercased
); TOK
-MNEMO
-CFA is CFA
50 def
: tk
-resw
;; reserved word
; TK
-STR contains a word
(uppercased
); TOK
-UO is UO
-xxx
; TOK
-KIND is kind
55 0 value tok
-type
;; TK
-xxx
58 0 value tok
-kind
;; for reserved words
62 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
68 : warning
( addr count
-- )
70 0 (include
-file
-name
) xtype
." :" 0 (include
-file
-line
) 0 .r
75 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
81 : is
-digit
( ch
-- bool
)
82 [char
] 0 [char
] 9 bounds?
85 : is
-bin
-digit
( ch
-- bool
)
86 [char
] 0 [char
] 1 bounds?
89 : is
-oct
-digit
( ch
-- bool
)
90 [char
] 0 [char
] 7 bounds?
93 : is
-hex
-digit
( ch
-- bool
)
94 dup
[char
] 0 [char
] 9 bounds?
95 over
[char
] A
[char
] F bounds? or
96 swap
[char
] a
[char
] f bounds? or
99 : is
-alpha
( ch
-- bool
)
100 dup
[char
] A
[char
] Z bounds?
101 swap
[char
] a
[char
] z bounds? or
104 : is
-under
-dot
( ch
-- bool
)
109 : is
-alpha
-under
-dot
( ch
-- bool
)
110 dup is
-alpha swap is
-under
-dot or
113 : is
-alnum
( ch
-- bool
)
114 dup is
-alpha swap is
-digit or
117 : is
-id
-start
( ch
-- bool
)
118 dup is
-alpha swap is
-under
-dot or
121 : is
-id
-char
( ch
-- bool
)
122 dup is
-id
-start swap is
-digit or
126 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
131 tok
-type tk
-punct
= swap tok
-num
= and
134 : punct2?
( char
-- )
135 tok
-type tk
-punct
= swap
0xff and dup
8 ash or tok
-num
= and
139 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
140 ;; input stream tokenizer
144 ;; return current token string
145 ;; valid until
"next-token" is called
146 : token
( -- addr count
) tok
-addr tok
-len
;
149 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
156 tk
-eos of
." <EOS>\n" endof
157 tk
-id of
." <ID>; str=<" token xtype
." >\n" endof
158 tk
-num of
." <NUM>; num=" tok
-num
. ." str=<" token xtype
." >\n" endof
159 tk
-str of
." <STR>; str=<" token xtype
." >\n" endof
160 tk
-punct of
." <PUNCT>; num=" tok
-num
. ." str=<" token xtype
." >\n" endof
161 tk
-label of
." <LABEL>; str=<" token xtype
." >\n" endof
162 tk
-mnemo of
." <MNEMO>; str=<" token xtype
." > cfa=" tok
-mnemo
-cfa
0 .r cr endof
163 tk
-resw of
." <RES-WORD>; str=<" token xtype
." > uo=" tok
-uo
. ." kind=" tok
-kind
0 .r cr endof
164 otherwise
. " wutafuck?" error
169 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
174 : char
-upper
( ch
-- )
175 dup
[char
] a
[char
] z bounds?
if 32 - endif
178 : (init
-tok
-addr
) ( -- ) (tib
-in
) to tok
-addr
;
179 : (init
-tok
) ( -- ) (init
-tok
-addr
) 0 to tok
-len
;
180 ;; set length according
to the current TIB position
181 ;; (init
-tok
-addr
) must be already called
182 : (tok
-fix
-length
) ( -- ) (tib
-in
) tok
-addr
- to tok
-len
;
184 : (set
-eos
) ( -- ) (init
-tok
) tk
-eos
to tok
-type
;
186 : (collect
-punct
) ( -- )
190 tib
-peekch
[char
] & = if tib
-skipch
endif
193 tib
-peekch
[char
] |
= if tib
-skipch
endif
197 [char
] = of tib
-skipch endof
198 [char
] < of tib
-skipch endof
199 [char
] > of tib
-skipch endof
204 [char
] = of tib
-skipch endof
205 [char
] > of tib
-skipch endof
209 tib
-peekch
[char
] = = if tib
-skipch
endif
212 tib
-peekch
[char
] = = if tib
-skipch
endif
216 tok
-addr tok
-len case
219 3 of dup w@ swap
2+ c@
16 lsh or endof
221 otherwise
" wut?!" error
226 : (collect
) ( checkcfa
-- )
227 >r begin tib
-peekch dup
0<> swap r@ execute and
233 : (collect
-id
) ( -- )
234 (init
-tok
) ['] is-id-char (collect)
238 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
239 ;; identifier classifiers
242 : (classify-id-mnemo?) ( -- success-bool )
243 token vocid: asm-instr find-word-in-voc
245 swap to tok-mnemo-cfa
252 tok
-kind asm
-resw
:kind
-af
= if
253 tib
-peekch
[char
] ' = if
254 tib-skipch (tok-fix-length)
255 asm-resw:uo-afx to tok-uo
260 : (classify-id-resw?) ( -- success-bool )
261 token vocid: asm-resw find-word-in-voc
262 dup if drop ( cfa: UO-xxx kind )
263 cfa->pfa dup @ to tok-uo
272 : (classify
-id
-label
-colon?
) ( -- success
-bool
)
273 line
-start dup
if drop
275 tib
-peekch
[char
] : = if tib
-skipch
277 tok
-addr c@ is
-digit
" invalid label" ?error
284 : (classify
-id
-label?
) ( -- success
-bool
)
285 line
-start dup
if drop
287 tok
-addr c@ is
-digit not
if
296 : (classify
-id
-number?
) ( -- success
-bool
)
297 tok
-addr c@ is
-digit
if
298 token false base @
(based
-number
) " invalid number" ?not
-error
306 : (classify
-id
-maybe
-number?
) ( -- success
-bool
)
307 tok
-addr c@ is
-id
-char not
if
308 token false
(based
-number
) if
319 ' (classify-id-mnemo?) ,
320 ' (classify
-id
-resw?
) ,
321 ' (classify-id-label-colon?) ,
322 ' (classify
-id
-label?
) ,
323 ' (classify-id-number?) ,
324 ' (classify
-id
-maybe
-number?
) ,
328 : (run
-classifiers
) ( arr
-addr
-- )
330 while ( addr cfa
) execute
if drop exit
endif cell
+
334 : (classify
-id
) ( -- )
335 tok
-len
127 > " token too long" ?error
336 (classifiers
) (run
-classifiers
)
339 : (classify
-id
-no
-label
) ( -- )
341 tok
-len
127 > " token too long" ?error
342 \
(classifiers
-no
-label
) (run
-classifiers
)
343 (classify
-id
-mnemo?
) drop
347 : (classify
-id
-label
) ( -- )
349 tok
-len
127 > " token too long" ?error
350 \
(classifiers
-no
-label
) (run
-classifiers
)
351 (classify
-id
-label
-colon?
) ifnot
(classify
-id
-label?
) drop
endif
355 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
356 ;; number
-like collectors
359 : (other
-pfx
-dblch
) ( ch
-- )
361 >r tib
-peekch r@
= if ;; "$$", etc
.
362 tib
-skipch tib
-peekch r
> = " invalid identifier" ?error
363 tib
-peekch is
-id
-char
if
364 +1-to tok
-addr
;; convert
"@@f" to "@f"
365 ['] is-id-char (collect)
366 else (tok-fix-length) endif
367 line-start if tk-label to tok-type endif
368 else rdrop ['] is
-id
-char
(collect
) endif
369 tok
-len
127 > " token too long" ?error
372 ;; we are at one
-char prefix
, it may be a number
373 ;; it it isn
't, colled as punctuation
374 : (try-pfx-num) ( checkcfa -- )
376 tib-peekch swap execute if
377 ['] is
-id
-char
(collect
)
378 ;; (XNUMBER
) ( addr count allowsign?
-- num TRUE
/ FALSE
)
379 token false base @
(based
-number
) ifnot
380 (classify
-id
-mnemo?
) " invalid number" ?not
-error
382 to tok
-num tk
-num
to tok
-type
384 else ;; "$" is identifier
385 tok
-addr c@
[char
] $
= if [char
] $
(other
-pfx
-dblch
) endif
390 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
394 : byte
-swap
( word
-- word
)
396 swap
-8 lsh
0xff and or
399 ;;FIXME
: escape support?
400 : (collect
-char
-string
) ( -- )
402 (init
-tok
) begin tib
-getch dup
" unterminated string" ?not
-error
404 (tok
-fix
-length
) tok
-len
0> " invalid string" ?not
-error
405 -1-to tok
-len
;; final quote fix
408 1 of tok
-addr c@
to tok
-num endof
410 tok
-addr w@ r@
[char
] " <> if byte-swap endif ;; "
413 otherwise drop
-1 to tok
-num
419 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
420 ;; parse
"(" (extended
)
423 : (skip
-chars
) ( count
-- ) for tib
-skipch endfor
;
425 : (blank
-char?
) ( ch
-- ) dup
0> swap bl
<= and
;
427 ;; skip blanks until non
-blank or EOL
428 ;; eofs is at non
-blank or EOL
429 : (lparen
-skip
-blanks
) ( stofs
-- eofs char
)
430 begin
1+ dup
1- tib
-peekch
-ofs
(blank
-char?
) not
-until
1-
434 ;; skip blanks
, check
for ")"
436 : (check
-rparen
) ( stofs
-- eofs TRUE
/ FALSE
)
438 [char
] ) = if 1+ true
else drop false
endif
441 ;; "(c"; TIB is at
"c"
442 : (try
-lparen
-c
) ( -- )
444 (skip
-chars
) (tok
-fix
-length
)
446 asm
-resw
:uo
-portc
to tok
-uo
450 ;; "(.."; TIB is at the first char
451 : (try
-lparen
-r16
) ( kind char2
-- )
452 1 tib
-peekch
-ofs char
-upper
= if
454 (skip
-chars
) (tok
-fix
-length
)
456 asm
-resw
:uo
-mr16
to tok
-uo
464 : (try
-lparen
-bc
) ( -- ) asm
-resw
:kind
-bc
[char
] C
(try
-lparen
-r16
) ;
465 : (try
-lparen
-de
) ( -- ) asm
-resw
:kind
-de
[char
] E
(try
-lparen
-r16
) ;
466 : (try
-lparen
-sp
) ( -- ) asm
-resw
:kind
-sp
[char
] P
(try
-lparen
-r16
) ;
467 : (try
-lparen
-hl
) ( -- )
468 asm
-resw
:kind
-(hl
) [char
] L
(try
-lparen
-r16
)
469 tok
-uo asm
-resw
:uo
-mr16
= if asm
-resw
:uo
-r8
to tok
-uo
endif
472 ;; most
complex checker
: "(i[xy][+-]"
474 : (try
-lparen
-ixy
) ( -- )
475 ;; parse
, put initial kind on the stack
476 1 tib
-peekch
-ofs char
-upper case
477 [char
] X of asm
-resw
:kind
-ix
-none endof
478 [char
] Y of asm
-resw
:kind
-iy
-none endof
481 ( kind
; check
if we have
+ or
- )
482 2 (lparen
-skip
-blanks
) ( base
-kind ch
-ofs char
)
484 [char
] + of asm
-resw
:kind
-ix
-add endof
485 [char
] - of asm
-resw
:kind
-ix
-sub endof
489 ( base
-kind ch
-ofs ext
-kind
)
490 rot
+ swap
1+ (skip
-chars
) (tok
-fix
-length
)
492 asm
-resw
:uo
-mrx16
to tok
-uo
497 ;; something that starts with
"(".
499 ;; uo
-mr16
-- "(r16)"
500 ;; uo
-mrx16
-- "(i[xy][+-]"
502 : (check
-lparen
) ( -- )
504 parse
-skip
-blanks
;; why not? allow
"( hl )"
505 tib
-peekch char
-upper case
506 [char
] I of
(try
-lparen
-ixy
) endof
507 [char
] C of
(try
-lparen
-c
) endof
508 [char
] B of
(try
-lparen
-bc
) endof
509 [char
] D of
(try
-lparen
-de
) endof
510 [char
] H of
(try
-lparen
-hl
) endof
511 [char
] S of
(try
-lparen
-sp
) endof
516 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
517 ;; read and classify next token from the input stream
522 \
." =====NEXT-TOKEN!" cr debug
:dump
-stack
525 0 of
(set
-eos
) true endof
526 bl
<=of tib
-skipch false endof
527 [char
] ; of
(set
-eos
) parse
-skip
-line true endof
528 [char
] % of
['] is-bin-digit (try-pfx-num) true endof
529 [char] $ of ['] is
-hex
-digit
(try
-pfx
-num
) (classify
-id
-no
-label
) true endof
530 [char
] # of
['] is-hex-digit (try-pfx-num) tok-type tk-num <> " hex number expected" ?error true endof
532 (tib-in) 1+ c@ char-upper
534 [char] H of true endof
535 [char] B of (tib-in) 2+ c@ is-bin-digit endof
536 [char] O of true endof
539 if ['] is
-hex
-digit
(try
-pfx
-num
) tok
-type tk
-num
<> " hex number expected" ?error
540 else (collect
-punct
) endif
544 (collect
-punct
) (check
-lparen
)
547 [char
] @ of
(collect
-punct
) [char
] @
(other
-pfx
-dblch
) (classify
-id
-label
) true endof
548 [char
] ' of (collect-char-string) true endof ;; '
549 [char
] " of (collect-char-string) true endof ;; "
550 [char
] ` of
(collect
-char
-string
) true endof
;; `
551 [char
] \ of
;; possible continuation
553 allow
-continuations
if
556 else refill
-nocross
" unexpected end of file in line continuation" ?not
-error false
562 is
-id
-char
if (collect
-id
) (classify
-id
) else (collect
-punct
) endif
567 \
." |" TOKEN XTYPE
." | -- " TIB
-PEEKCH XEMIT CR
572 : expect
-first
-token
( -- )
573 tib @ begin dup c@ dup
0> swap bl
<= and
while 1+ repeat
574 tok
-addr
<> " must be the first token in line" ?error
577 : token
-eol?
( -- bool
) tok
-type tk
-eos
= ;
578 : token
-colon?
( -- bool
) tok
-type tk
-punct
= tok
-num
[char
] : = and
;
579 : token
-comma?
( -- bool
) tok
-type tk
-punct
= tok
-num
[char
] , = and
;
580 : token
-lparen?
( -- bool
) tok
-type tk
-punct
= tok
-num
[char
] ( = and
; ;; )
581 : token
-rparen?
( -- bool
) tok
-type tk
-punct
= tok
-num
[char
] ) = and
;
582 : token
-=?
( -- bool
) tok
-type tk
-punct
= tok
-num
[char
] = = and
;
583 : token
-str?
( -- bool
) tok
-type tk
-str
= ;
584 : token
-id?
( -- bool
) tok
-type tk
-id
= ;
585 : token
-num?
( -- bool
) tok
-type tk
-num
= ;
587 ;; are we at
end of line
(real EOL
, or
":")?
588 : token
-colon
-eol?
( -- bool
) token
-eol? token
-colon? or
;
590 : token
-resw?
( -- bool
) tok
-type tk
-resw
= ;
591 : token
-uo?
( uo
-- bool
) tok
-uo
= token
-resw? and
;
592 : token
-uo
-kind?
( uo kind
-- bool
) tok
-kind
= swap tok
-uo
= and token
-resw? and
;
594 : token
-regA?
( -- bool
) asm
-resw
:uo
-r8 asm
-resw
:kind
-a token
-uo
-kind?
;
595 : token
-regIR?
( -- bool
) asm
-resw
:uo
-sr8 token
-uo?
;
596 : token
-r8?
( -- bool
) asm
-resw
:uo
-r8 token
-uo?
;
597 : token
-rx8?
( -- bool
) asm
-resw
:uo
-rx8 token
-uo?
;
598 : token
-r16?
( -- bool
) asm
-resw
:uo
-r16 token
-uo?
;
599 : token
-af?
( -- bool
) asm
-resw
:uo
-r16 asm
-resw
:kind
-af token
-uo
-kind?
;
600 : token
-afx?
( -- bool
) asm
-resw
:uo
-afx token
-uo?
;
601 : token
-bc?
( -- bool
) asm
-resw
:uo
-r16 asm
-resw
:kind
-bc token
-uo
-kind?
;
602 : token
-de?
( -- bool
) asm
-resw
:uo
-r16 asm
-resw
:kind
-de token
-uo
-kind?
;
603 : token
-hl?
( -- bool
) asm
-resw
:uo
-r16 asm
-resw
:kind
-hl token
-uo
-kind?
;
604 : token
-sp?
( -- bool
) asm
-resw
:uo
-r16 asm
-resw
:kind
-sp token
-uo
-kind?
;
605 : token
-ix?
( -- bool
) asm
-resw
:uo
-rx16 asm
-resw
:kind
-ix token
-uo
-kind?
;
606 : token
-iy?
( -- bool
) asm
-resw
:uo
-rx16 asm
-resw
:kind
-iy token
-uo
-kind?
;
607 : token
-ixy?
( -- bool
) asm
-resw
:uo
-rx16 token
-uo?
;
608 : token
-mr16?
( -- bool
) asm
-resw
:uo
-mr16 token
-uo?
;
609 : token
-(hl
)?
( -- bool
) asm
-resw
:uo
-r8 asm
-resw
:kind
-(hl
) token
-uo
-kind?
;
610 : token
-(bc
)?
( -- bool
) asm
-resw
:uo
-mr16 asm
-resw
:kind
-bc token
-uo
-kind?
;
611 : token
-(sp
)?
( -- bool
) asm
-resw
:uo
-mr16 asm
-resw
:kind
-sp token
-uo
-kind?
;
612 : token
-(ixy
)?
( -- bool
) asm
-resw
:uo
-mrx16 token
-uo?
;
613 : token
-(c
)?
( -- bool
) asm
-resw
:uo
-portc token
-uo?
;
614 : token
-r16
-n?
( -- bool
) asm
-resw
:uo
-r16 token
-uo? tok
-kind asm
-resw
:kind
-sp
<= and
;
615 : token
-r16
-x?
( -- bool
) asm
-resw
:uo
-r16 token
-uo? tok
-kind asm
-resw
:kind
-hl
<= and
;
616 : token
-mr16
-x?
( -- bool
) asm
-resw
:uo
-mr16 token
-uo? tok
-kind asm
-resw
:kind
-hl
<= and
;
617 : token
-reg
-c?
( -- bool
) asm
-resw
:uo
-r8 asm
-resw
:kind
-c token
-uo
-kind?
;
618 : token
-equ?
( -- bool
) asm
-resw
:uo
-equ token
-uo?
;
619 : token
-(c
)/(bc
)?
( -- bool
) token
-(c
)? token
-(bc
)? or
;
621 : token
-(hl
)->mr16
( -- )
622 token
-(hl
)?
if asm
-resw
:uo
-mr16
to tok
-uo asm
-resw
:kind
-hl
to tok
-kind
endif
625 ;; converts C register
to condition
626 : token
-c
-to-cond
( -- )
628 asm
-resw
:uo
-cond
to tok
-uo
629 asm
-resw
:kind
-cc
to tok
-kind
633 : token
-cond?
( -- bool
)
634 token
-reg
-c?
if token
-c
-to-cond true
635 else asm
-resw
:uo
-cond token
-uo?
640 : (build
-expector
) ( -- ) \ name checker msg|
641 parse
-name dup
" name expected" ?not
-error
642 compiler
:(create
-forth
-header
) [compile
] ]
644 parse
-skip
-blanks
[char
] | parse
" message expected" ?not
-error strliteral
648 : (build
-expector
-finish
) ( -- )
649 compile next
-token compile forth
:(exit
) [compile
] [
653 : build
-expector
: ( -- ) \ name checker msg|
654 (build
-expector
) (build
-expector
-finish
)
657 : build
-expector
-res
: ( -- ) \ name checker msg|
658 (build
-expector
) compile tok
-kind
(build
-expector
-finish
)
661 build
-expector
: expect
-eol token
-eol?
end of line expected|
662 build
-expector
: expect
-str token
-str? quoted string expected|
663 build
-expector
: expect
-eop token
-colon
-eol?
end of line expected|
664 build
-expector
: expect
-comma token
-comma? comma expected|
665 build
-expector
: expect
-lparen token
-lparen? `
(` expected|
666 build
-expector
: expect
-rparen token
-rparen? `
)` expected|
667 build
-expector
: expect
-regA token
-regA? `A` register expected|
668 build
-expector
: expect
-(c
) token
-(c
)? `
(C
)` expected|
669 build
-expector
: expect
-(c
)/(bc
) token
-(c
)/(bc
)? `
(C
)` expected|
671 build
-expector
-res
: expect
-cond token
-cond? condition code expected|
672 build
-expector
-res
: expect
-r8 token
-r8?
8-bit register expected|
673 build
-expector
-res
: expect
-r16 token
-r16?
16-bit register expected|