From 90019aeab553de300fe94b2114debb789c22fd08 Mon Sep 17 00:00:00 2001 From: Ketmar Dark Date: Fri, 14 Aug 2020 10:59:21 +0300 Subject: [PATCH] started expression parser; for now, implemented numeric parser, with alot of prefixes and suffixes --- main.zas | 8 +- parser.zas | 402 ++++++++++++++++++++++++++++++++++++++++++++----------- parser_misc.zas | 98 ++++++++++++++ parser_stack.zas | 138 +++++++++++++++++++ 4 files changed, 568 insertions(+), 78 deletions(-) create mode 100644 parser_misc.zas create mode 100644 parser_stack.zas diff --git a/main.zas b/main.zas index 808f1c6..3557e94 100644 --- a/main.zas +++ b/main.zas @@ -56,6 +56,7 @@ doasm: push ix ; save code destination push iy ; we'll need it for listing doasm_nopush: + call EXPR_STACK_RESET call BZ80ASM.ASSEM jp c,list_and_error doasm_done_line: @@ -130,10 +131,15 @@ strbuf: ;defm "jr label",13 defm "ld a,42",13 defm "ld hl,16384",13 + defm "exx",13 + defm "ld hl,#4001",13 defm "ld a,(ix-2)",13 - defm "call 1234",13 + defm "call BEEFh",13 defm "or c",13 defm "and (hl)",13 + defm "jr $",13 + defm "ld de,$1234",13 + defm "ld bc,0x5b02",13 defb 0 dest: defs 64,0 diff --git a/parser.zas b/parser.zas index 1e89a29..73bbb43 100644 --- a/parser.zas +++ b/parser.zas @@ -1,84 +1,354 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; skip blanks -;; returns current char in A -;; sets zero flag on EOL +;; math expression parser +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; this is classical shunting-yard math expression parser +;; lurk the web to find out what it is + +;; evaluation stack holds 3-byte value: +;; db type +;; dw data +;; +;; for numbers, data is 16-bit unsigned number (negative numbers +;; are 2-complement, and treated as positive, except for when +;; one does "<0", ">0", "<=0", ">=0") +;; +;; for operators, type is precedence, and data is handler address + +;; set this to the address of error routine +;; note that you cannot return from it, you HAVE to abort everything +;; also note that machine stack is undefined, and SP should be set +;; to some initial value +;; "undefined" means that machine stack can contain alot of garbage, +;; but will never be underflowed +;; +;; numeric stack state is undefined (REALLY undefined!) +;; the only thing you can do with it is call `EXPR_STACK_RESET` +;; +;; this function calls with error code in A +EXPR_ERROR_CB: defw 0 + +;; error codes +;; expression stack overflow +EXPR_ERR_STACK_OVERFLOW equ 1 +;; expected number, but got something incomprehensible +EXPR_ERR_NUMBER_EXPECTED equ 2 +;; expected string, but got something incomprehensible +EXPR_ERR_STRING_EXPECTED equ 3 +;; expression stack underflow +;; this usually means invalid expression +;; so you can write "invalid expression" diagnostic message in this case +EXPR_ERR_STACK_UNDERFLOW equ 4 +;; general error -- something is wrong with the expression +;; write "invalid expression" diagnostic message in this case +EXPR_ERR_INVALID equ 5 + +;; should be more than enough for most expressions +EXPR_STACK_SIZE equ 16 + +;; user must set this +;; the stack grows to higher addresses +EXPR_STACK_S0: defw 0 + +;; this points to the *LAST* *USED* *SLOT* +EXPR_STACK_SP: defw 0 +;; last usable item on the expression stack +;; inited with `EXPR_STACK_RESET` +EXPR_STACK_END: defw 0 + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; no user-serviceable parts beyond this point! ;-) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +EXPR_STITEM_NUMBER equ 0 +EXPR_STITEM_LPAREN equ 255 + + include "parser_misc.zas" + include "parser_stack.zas" + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; parse a number, push it onto the stack +;; understands prefixes and suffixes ;; IN: ;; IY: text buffer ;; OUT: -;; IY: text buffer at non-blank or EOL -;; A: non-blank or EOL char -;; zero flag is set on EOL +;; IY: text buffer after the expression +;; HL: number +;; carry flag reset +;; OR: (cannot parse as a number) +;; IY: unchanged +;; carry flag set +;; DE,AF,flags: dead ;; -skipBlanks: - ld a,(iy) - or a - ret z - cp 13 +PARSE_NUMBER: + call skipBlanks + scf ret z + push iy ; we will need to rollback on error + ; A already contains a char, loaded by `skipBlanks` + cp '#' + jr z,.hexprefix + cp '$' + jr z,.maybe_lone_dollar + cp '&' + jr z,.hexprefix + cp '%' + jr z,.binprefix + ; no, leading zero doesn't mean "octal", this is stupid + ; but we may have prefixes like "0x" and such + cp '0' + jr z,.maybe_zero_prefix + ; check if we have a digit here + call convDigit + jp c,.not_a_number_carry_set + cp 10 + jp nc,.must_be_hex_with_sfx +.do_normal_decimal: + ; done with prefixes, try decimal number + ; we'll switch to suffix checking on hex digit + ld hl,0 ; accumulator +.decimal_loop: + ld a,(iy) + call convDigit + jr c,.decimal_done + cp 10 + jp nc,.must_be_hex_with_sfx + ; HL=HL*10 + add hl,hl + ld de,hl + add hl,hl + add hl,hl + add hl,de + ; HL=HL+A + ld e,a + ld d,0 + add hl,de + ; next char inc iy - cp 33 - jr c,skipBlanks - dec iy - ; reset zero flag + jr .decimal_loop +.decimal_done: + ; check for suffix + ld a,(iy) + and %11011111 ; cheap uppercase + cp 'H' + jr z,.must_be_hex_with_sfx + cp 'B' + jp z,.bin_with_sfx + cp 'O' + jp z,.oct_with_sfx + ; no suffix, we're done +.success: + pop de ; drop iy + ; reset carry flag or a ret +.hexprefix: + ; skip prefix + inc iy + call .parse_as_hex +.after_prefix: + jr c,.not_a_number_carry_set + jr .success -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; check if A is an alpha char -;; carry set: not alpha char -; -isAlpha: - cp 'a' - jr c,.notlower - cp 'z'+1 - jr nc,.notlower - or a - ret -.notlower: - cp 'A' +.maybe_lone_dollar: + ; lone dollar means "PC" + inc iy + call .parse_as_hex + ; the only case we may gen an error here is + ; when our dollar isn't followed by a digit + jr nc,.success + ; lone dollar is good too + ; IY points right after the dollar here + ld hl,(BZ80ASM.PC) + jr .success + +.binprefix: + ; skip prefix + inc iy + call .parse_as_bin + jr .after_prefix + +.octprefix: + ; skip prefix + inc iy + call .parse_as_oct + jr .after_prefix + +.maybe_zero_prefix: + ; check for '0x' and such + ; skip '0' + inc iy + ; load and prefix + ; there's no need to skip it, as it will be + ; skipped by the corresponding subroutine + ld a,(iy) + ; so IY will point to the actual number + and %11011111 ; cheap uppercase + cp 'X' + jr z,.hexprefix + cp 'B' + jr z,.binprefix + cp 'O' + jr z,.octprefix + cp 'D' + jr z,.parse_as_dec + ; do not reparse '0', no need to backup + jr .do_normal_decimal + +.parse_as_dec: + ; skip prefix + inc iy + ld hl,0 ; accumulator + ; check first digit (as this is general parser) + ld a,(iy) + call convDigit ret c - cp 'Z'+1 + cp 10 + ccf + ret c +.parse_as_dec_loop: + inc iy + ; HL=HL*10 + add hl,hl + ld de,hl + add hl,hl + add hl,hl + add hl,de + ; HL=HL+A + ld e,a + ld d,0 + add hl,de + ld a,(iy) + call convDigit + jr c,.parse_as_dec_done + cp 10 + jr nc,.parse_as_hex_loop + ; clear carry flag (it is always set here) +.parse_as_dec_done: ccf ret +.must_be_hex_with_sfx: + ; reparse as hex, and check for suffix + pop iy + push iy + call .parse_as_hex + jr c,.not_a_number_carry_set + ld a,(iy) + inc iy + and %11011111 ; cheap uppercase + cp 'H' + jr z,.success -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; check if A is a decimal digit -;; carry set: not digit char -;; -isDigit: - cp '0' +.not_a_number: + scf +.not_a_number_carry_set: + pop iy + ret + +.bin_with_sfx: + ; reparse as bin, skip suffix (it is guaranteed to be there) + pop iy + push iy + call .parse_as_bin +.done_guaranteed_suffix: + jr c,.not_a_number_carry_set + ; skip suffix + inc iy + jp .success + +.oct_with_sfx: + ; reparse as bin, skip suffix (it is guaranteed to be there) + pop iy + push iy + call .parse_as_bin + jr .done_guaranteed_suffix + +.parse_as_hex: + ld hl,0 ; accumulator + ; check first digit (as this is general parser) + ld a,(iy) + call convDigit ret c - cp '9'+1 +.parse_as_hex_loop: + inc iy + add hl,hl + add hl,hl + add hl,hl + add hl,hl + ld e,a + ld d,0 + add hl,de + ld a,(iy) + call convDigit + jr nc,.parse_as_hex_loop + ; clear carry flag (it is always set here) ccf ret - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; check if A is a valid identifier char (including digits) -;; carry set: not id char -;; -isIdChar: - call isAlpha - ret nc - call isDigit - ret nc - cp '_' - jr z,.goodchar - cp '$' - jr z,.goodchar - scf +.parse_as_bin: + ld hl,0 ; accumulator + ; check first digit (as this is general parser) + ld a,(iy) + call convDigit + ret c + cp 2 + ccf + ret c +.parse_as_bin_loop: + inc iy + add hl,hl + ld e,a + ld d,0 + add hl,de + ld a,(iy) + call convDigit + jr c,.parse_as_bin_done + cp 2 + ccf + jr nc,.parse_as_bin_loop + ; clear carry flag (it is always set here) +.parse_as_bin_done: + ccf ret -.goodchar: - or a + +.parse_as_oct: + ld hl,0 ; accumulator + ; check first digit (as this is general parser) + ld a,(iy) + call convDigit + ret c + cp 8 + ccf + ret c +.parse_as_oct_loop: + inc iy + add hl,hl + add hl,hl + add hl,hl + ld e,a + ld d,0 + add hl,de + ld a,(iy) + call convDigit + jr c,.parse_as_oct_done + cp 8 + ccf + jr nc,.parse_as_oct_loop + ; clear carry flag (it is always set here) +.parse_as_oct_done: + ccf ret + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; parse an integer expression -;; ;; IN: ;; IY: text buffer ;; OUT: @@ -100,30 +370,8 @@ PARSE_INT_EXPR: jr nz,.ccnum inc iy .ccnum: - ld a,(iy) - cp '0' + call PARSE_NUMBER jp c,error_integer_expected - cp '9'+1 - jp nc,error_integer_expected - ld hl,0 -.numloop: - ld a,(iy) - sub '0' - jr c,.numdone - cp 10 - jr nc,.numdone - ; hl = hl*10 - add hl,hl - ld de,hl - add hl,hl - add hl,hl - add hl,de - ld e,a - ld d,0 - add hl,de - inc iy - jr .numloop -.numdone: ld a,c or a jr z,.noneg diff --git a/parser_misc.zas b/parser_misc.zas new file mode 100644 index 0000000..2c1cd49 --- /dev/null +++ b/parser_misc.zas @@ -0,0 +1,98 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; math expression parser, misc routines +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; skip blanks +;; returns current char in A +;; sets zero flag on EOL +;; IN: +;; IY: text buffer +;; OUT: +;; IY: text buffer at non-blank or EOL +;; A: non-blank or EOL char +;; zero flag is set on EOL +;; +skipBlanks: + ld a,(iy) + or a + ret z + cp 13 + ret z + inc iy + cp 33 + jr c,skipBlanks + dec iy + ; reset zero flag + or a + ret + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; check if A is an alpha char +;; carry set: not an alpha char +;; +isAlpha: + cp 'a' + jr c,.notlower + cp 'z'+1 + jr nc,.notlower + or a + ret +.notlower: + cp 'A' + ret c + cp 'Z'+1 + ccf + ret + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; check if A is a decimal digit +;; carry set: not a digit char +;; +isDigit: + cp '0' + ret c + cp '9'+1 + ccf + ret + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; check if A is a valid identifier char (including digits) +;; carry set: not an id char +;; +isIdChar: + call isAlpha + ret nc + call isDigit + ret nc + cp '_' + jr z,.goodchar + cp '$' + jr z,.goodchar + scf + ret +.goodchar: + or a + ret + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; converts 'A' to digit (assume hex) +;; carry set: not a digit char (and A is destroyed) +;; +convDigit: + sub '0' + ret c + cp 10 + ccf + ret nc + add a,'0' + and %11011111 ; cheap uppercase + sub 'A'-10 + ret c + cp 16 + ccf + ret diff --git a/parser_stack.zas b/parser_stack.zas new file mode 100644 index 0000000..e2035c0 --- /dev/null +++ b/parser_stack.zas @@ -0,0 +1,138 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; math expression parser, general numeric stack management +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; clear numeric stack, setup stack vars +;; IN: +;; nothing +;; OUT: +;; HL,DE: dead +;; +EXPR_STACK_RESET: + ld de,EXPR_STACK_SIZE+1 + ld hl,de + add hl,hl + add hl,de + ex de,hl + ; DE=EXPR_STACK_SIZE*3 + ld hl,(EXPR_STACK_S0) + dec hl + dec hl + dec hl + ld (EXPR_STACK_SP),hl + add hl,de + ld (EXPR_STACK_END),hl + ret + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; loads addess of the next numeric stack slot into HL +;; increments numeric stack pointer +;; checks for stack overflow +;; +;; IN: +;; nothing +;; OUT: +;; HL: stack address +;; DE,flags: dead +;; +EXPR_STACK_GET_NEXT_SLOT_HL: + ld hl,(EXPR_STACK_SP) + inc hl + inc hl + inc hl + ld (EXPR_STACK_SP),hl + push hl + ld de,(EXPR_STACK_END) + or a + sbc hl,de + pop hl + ret c + ; stack overflow + ld a,EXPR_ERR_STACK_OVERFLOW +EXPR_CALL_ERROR_CB: + ld hl,(EXPR_ERROR_CB) + jp (hl) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; loads addess of the last numeric stack slot into HL +;; decrements numeric stack pointer +;; checks for stack underfloaw +;; +;; IN: +;; nothing +;; OUT: +;; HL: stack address +;; DE,flags: dead +;; +EXPR_STACK_POP_LAST_SLOT_HL: + ld hl,(EXPR_STACK_SP) + push hl + dec hl + dec hl + dec hl + ld (EXPR_STACK_SP),hl + ld de,(EXPR_STACK_S0) + pop hl + push hl + or a + sbc hl,de + pop hl + ret nc + ; stack underflow + ld a,EXPR_ERR_STACK_UNDERFLOW + jp EXPR_CALL_ERROR_CB + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; push number to numeric stack +;; IN: +;; HL: number +;; OUT: +;; HL,DE,flags: dead +;; +EXPR_PUSH_HL: + push hl + call EXPR_STACK_GET_NEXT_SLOT_HL + pop de + ex de,hl + ld (hl),EXPR_STITEM_NUMBER + inc hl + ld (hl),e + inc hl + ld (hl),d + ret + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; pop number from numeric stack +;; raises error if stack is empty or TOS is not a number +;; IN: +;; nothing +;; OUT: +;; HL: number +;; DE,flags: dead +;; +EXPR_POP_HL: + call EXPR_STACK_POP_LAST_SLOT_HL + ld a,(hl) + IF EXPR_STITEM_NUMBER == 0 + or a + ELSE + cp EXPR_STITEM_NUMBER + ENDIF + ld a,EXPR_ERR_INVALID + jp nz,EXPR_CALL_ERROR_CB + inc hl + ld e,(hl) + inc hl + ld d,(hl) + ex de,hl + ret -- 2.11.4.GIT