From 02481cbefc10c10a30c164a0b6d159e597fb3cbb Mon Sep 17 00:00:00 2001 From: ketmar Date: Thu, 2 Nov 2023 23:18:36 +0000 Subject: [PATCH] UrForth: some microoptimisations FossilOrigin-Name: 4972284d9912bd95c51280fe55b8b39dcfdb5760cf0a6a6c2e7e368b01b8d9e7 --- src/liburforth/urforth.c | 276 +++++++++++++++++++++++---------------- urflibs/sinopt-peephole/sinopt.f | 8 ++ 2 files changed, 171 insertions(+), 113 deletions(-) diff --git a/src/liburforth/urforth.c b/src/liburforth/urforth.c index efe8b6c..32a9ba7 100644 --- a/src/liburforth/urforth.c +++ b/src/liburforth/urforth.c @@ -1261,6 +1261,39 @@ UFO_FORCE_INLINE void ufoImgPutU32 (uint32_t addr, const uint32_t value) { //========================================================================== // +// ufoImgIOPtrU32 +// +// fast +// +//========================================================================== +UFO_FORCE_INLINE uint32_t *ufoImgIOPtrU32 (uint32_t addr) { + if ((addr & UFO_ADDR_SPECIAL_BITS_MASK) == 0) { + if (addr + 3u >= ufoImageSize) { + #ifdef UFO_HUGE_IMAGES + ufoFatal("Forth segmentation fault at address 0x%08X", addr); + #else + ufoImgEnsureSize(addr + 3u); + #endif + } + return (uint32_t *)((uint8_t *)ufoImage + addr); + } else if (addr & UFO_ADDR_TEMP_BIT) { + addr &= UFO_ADDR_TEMP_MASK; + if (addr + 3u >= ufoImageTempSize) { + #ifdef UFO_HUGE_IMAGES + ufoFatal("Forth segmentation fault at address 0x%08X", addr); + #else + ufoImgEnsureTemp(addr + 3u); + #endif + } + return (uint32_t *)((uint8_t *)ufoImageTemp + addr); + } else { + ufoFatal("Forth segmentation fault at address 0x%08X", addr); + } +} + + +//========================================================================== +// // ufoImgGetU8 // // false @@ -3506,13 +3539,13 @@ UFWORD(POKE_REGA) { // ( idx -- byte ) UFWORD(CPEEK_REGA_IDX) { if ((ufoRegA & UFO_ADDR_HANDLE_BIT) == 0) { - const uint32_t idx = ufoPop(); - const uint32_t newaddr = ufoRegA + idx; + UFO_STACK(1); + const uint32_t newaddr = ufoRegA + UFO_TOS; if ((ufoRegA & UFO_ADDR_SPECIAL_BITS_MASK) == (newaddr & UFO_ADDR_SPECIAL_BITS_MASK)) { - ufoPush(ufoImgGetU8(newaddr)); + UFO_TOS = ufoImgGetU8(newaddr); } else { ufoFatal("address offset out of range; addr=0x%08x; offset=%u; res=0x%08x", - ufoRegA, idx, newaddr); + ufoRegA, UFO_TOS, newaddr); } } else { ufoPush(ufoRegA); @@ -3524,15 +3557,15 @@ UFWORD(CPEEK_REGA_IDX) { // ( idx -- word ) UFWORD(WPEEK_REGA_IDX) { if ((ufoRegA & UFO_ADDR_HANDLE_BIT) == 0) { - const uint32_t idx = ufoPop(); - const uint32_t newaddr = ufoRegA + idx; + UFO_STACK(1); + const uint32_t newaddr = ufoRegA + UFO_TOS; if ((ufoRegA & UFO_ADDR_SPECIAL_BITS_MASK) == (newaddr & UFO_ADDR_SPECIAL_BITS_MASK) && (ufoRegA & UFO_ADDR_SPECIAL_BITS_MASK) == ((newaddr + 1u) & UFO_ADDR_SPECIAL_BITS_MASK)) { - ufoPush(ufoImgGetU16(newaddr)); + UFO_TOS = ufoImgGetU16(newaddr); } else { ufoFatal("address offset out of range; addr=0x%08x; offset=%u; res=0x%08x", - ufoRegA, idx, newaddr); + ufoRegA, UFO_TOS, newaddr); } } else { ufoPush(ufoRegA); @@ -3544,15 +3577,15 @@ UFWORD(WPEEK_REGA_IDX) { // ( idx -- value ) UFWORD(PEEK_REGA_IDX) { if ((ufoRegA & UFO_ADDR_HANDLE_BIT) == 0) { - const uint32_t idx = ufoPop(); - const uint32_t newaddr = ufoRegA + idx; + UFO_STACK(1); + const uint32_t newaddr = ufoRegA + UFO_TOS; if ((ufoRegA & UFO_ADDR_SPECIAL_BITS_MASK) == (newaddr & UFO_ADDR_SPECIAL_BITS_MASK) && (ufoRegA & UFO_ADDR_SPECIAL_BITS_MASK) == ((newaddr + 3u) & UFO_ADDR_SPECIAL_BITS_MASK)) { - ufoPush(ufoImgGetU32(newaddr)); + UFO_TOS = ufoImgGetU32(newaddr); } else { ufoFatal("address offset out of range; addr=0x%08x; offset=%u; res=0x%08x", - ufoRegA, idx, newaddr); + ufoRegA, UFO_TOS, newaddr); } } else { ufoPush(ufoRegA); @@ -3703,46 +3736,46 @@ UFWORD(PAR_BRANCH_ADDR_PEEK) { // C@ // ( addr -- value8 ) UFWORD(CPEEK) { - const uint32_t addr = ufoPop(); - ufoPush(ufoImgGetU8(addr)); + UFO_STACK(1); + UFO_TOS = ufoImgGetU8(UFO_TOS); } // W@ // ( addr -- value16 ) UFWORD(WPEEK) { - const uint32_t addr = ufoPop(); - ufoPush(ufoImgGetU16(addr)); + UFO_STACK(1); + UFO_TOS = ufoImgGetU16(UFO_TOS); } // @ // ( addr -- value32 ) UFWORD(PEEK) { - const uint32_t addr = ufoPop(); - ufoPush(ufoImgGetU32(addr)); + UFO_STACK(1); + UFO_TOS = ufoImgGetU32(UFO_TOS); } // C! // ( val8 addr -- ) UFWORD(CPOKE) { - const uint32_t addr = ufoPop(); - const uint32_t val = ufoPop(); - ufoImgPutU8(addr, val); + UFO_STACK(2); + ufoImgPutU8(UFO_TOS, UFO_S(1)); + ufoSP -= 2u; } // W! // ( val16 addr -- ) UFWORD(WPOKE) { - const uint32_t addr = ufoPop(); - const uint32_t val = ufoPop(); - ufoImgPutU16(addr, val); + UFO_STACK(2); + ufoImgPutU16(UFO_TOS, UFO_S(1)); + ufoSP -= 2u; } // ! // ( val32 addr -- ) UFWORD(POKE) { - const uint32_t addr = ufoPop(); - const uint32_t val = ufoPop(); - ufoImgPutU32(addr, val); + UFO_STACK(2); + ufoImgPutU32(UFO_TOS, UFO_S(1)); + ufoSP -= 2u; } // (DIRECT:@) @@ -3810,19 +3843,19 @@ UFWORD(DIRECT_SUB_POKE) { // ( addr -- value32 ) // code arg is offset UFWORD(DIRECT_OFS_PEEK) { - uint32_t addr = ufoPop(); - addr += ufoImgGetU32(ufoIP); ufoIP += 4u; - ufoPush(ufoImgGetU32(addr)); + UFO_STACK(1); + const uint32_t addr = UFO_TOS + ufoImgGetU32(ufoIP); ufoIP += 4u; + UFO_TOS = ufoImgGetU32(addr); } // (DIRECT:+:!) // ( value32 addr -- ) // code arg is offset UFWORD(DIRECT_OFS_POKE) { - uint32_t addr = ufoPop(); - uint32_t val = ufoPop(); - addr += ufoImgGetU32(ufoIP); ufoIP += 4u; - ufoImgPutU32(addr, val); + UFO_STACK(2); + const uint32_t addr = UFO_TOS + ufoImgGetU32(ufoIP); ufoIP += 4u; + ufoImgPutU32(addr, UFO_S(1)); + ufoSP -= 2u; } // (DIRECT:1+!) @@ -3900,134 +3933,151 @@ UFWORD(DIRECT_POKE_DEC8) { // SWAP-C! // ( addr value -- ) UFWORD(SWAP_CPOKE) { - const uint32_t val = ufoPop(); - const uint32_t addr = ufoPop(); - ufoImgPutU8(addr, val); + UFO_STACK(2); + ufoImgPutU8(UFO_S(1), UFO_TOS); + ufoSP -= 2u; } // SWAP-W! // ( addr value -- ) UFWORD(SWAP_WPOKE) { - const uint32_t val = ufoPop(); - const uint32_t addr = ufoPop(); - ufoImgPutU16(addr, val); + UFO_STACK(2); + ufoImgPutU16(UFO_S(1), UFO_TOS); + ufoSP -= 2u; } // SWAP! // ( addr value -- ) UFWORD(SWAP_POKE) { - const uint32_t val = ufoPop(); - const uint32_t addr = ufoPop(); - ufoImgPutU32(addr, val); + UFO_STACK(2); + ufoImgPutU32(UFO_S(1), UFO_TOS); + ufoSP -= 2u; } // OR-C! // ( value addr -- ) UFWORD(OR_CPOKE) { - const uint32_t addr = ufoPop(); - uint32_t val = ufoPop(); - val |= ufoImgGetU8(addr); - ufoImgPutU8(addr, val); + UFO_STACK(2); + ufoImgPutU8(UFO_TOS, UFO_S(1) | ufoImgGetU8(UFO_TOS)); + ufoSP -= 2u; } // OR-W! // ( value addr -- ) UFWORD(OR_WPOKE) { - const uint32_t addr = ufoPop(); - uint32_t val = ufoPop(); - val |= ufoImgGetU16(addr); - ufoImgPutU16(addr, val); + UFO_STACK(2); + ufoImgPutU16(UFO_TOS, UFO_S(1) | ufoImgGetU16(UFO_TOS)); + ufoSP -= 2u; } // OR! // ( value addr -- ) UFWORD(OR_POKE) { - const uint32_t addr = ufoPop(); - uint32_t val = ufoPop(); - val |= ufoImgGetU32(addr); - ufoImgPutU32(addr, val); + UFO_STACK(2); + #ifdef UFO_FAST_MEM_ACCESS + if ((UFO_TOS & UFO_ADDR_HANDLE_BIT) == 0) { + uint32_t *uptr = ufoImgIOPtrU32(UFO_TOS); + *uptr |= UFO_S(1); + } else { + ufoImgPutU32(UFO_TOS, UFO_S(1) | ufoImgGetU32(UFO_TOS)); + } + #else + ufoImgPutU32(UFO_TOS, UFO_S(1) | ufoImgGetU32(UFO_TOS)); + #endif + ufoSP -= 2u; } // XOR-C! // ( value addr -- ) UFWORD(XOR_CPOKE) { - const uint32_t addr = ufoPop(); - uint32_t val = ufoPop(); - val ^= ufoImgGetU8(addr); - ufoImgPutU8(addr, val); + UFO_STACK(2); + ufoImgPutU8(UFO_TOS, UFO_S(1) ^ ufoImgGetU8(UFO_TOS)); + ufoSP -= 2u; } // XOR-W! // ( value addr -- ) UFWORD(XOR_WPOKE) { - const uint32_t addr = ufoPop(); - uint32_t val = ufoPop(); - val ^= ufoImgGetU16(addr); - ufoImgPutU16(addr, val); + UFO_STACK(2); + ufoImgPutU16(UFO_TOS, UFO_S(1) ^ ufoImgGetU16(UFO_TOS)); + ufoSP -= 2u; } // XOR! // ( value addr -- ) UFWORD(XOR_POKE) { - const uint32_t addr = ufoPop(); - uint32_t val = ufoPop(); - val ^= ufoImgGetU32(addr); - ufoImgPutU32(addr, val); + UFO_STACK(2); + #ifdef UFO_FAST_MEM_ACCESS + if ((UFO_TOS & UFO_ADDR_HANDLE_BIT) == 0) { + uint32_t *uptr = ufoImgIOPtrU32(UFO_TOS); + *uptr ^= UFO_S(1); + } else { + ufoImgPutU32(UFO_TOS, UFO_S(1) ^ ufoImgGetU32(UFO_TOS)); + } + #else + ufoImgPutU32(UFO_TOS, UFO_S(1) ^ ufoImgGetU32(UFO_TOS)); + #endif + ufoSP -= 2u; } // ~AND-C! // ( value addr -- ) UFWORD(NAND_CPOKE) { - const uint32_t addr = ufoPop(); - uint32_t val = ufoPop(); - val = ufoImgGetU8(addr)&~val; - ufoImgPutU8(addr, val); + UFO_STACK(2); + ufoImgPutU8(UFO_TOS, ufoImgGetU8(UFO_TOS) & ~UFO_S(1)); + ufoSP -= 2u; } // ~AND-W! // ( value addr -- ) UFWORD(NAND_WPOKE) { - const uint32_t addr = ufoPop(); - uint32_t val = ufoPop(); - val = ufoImgGetU16(addr)&~val; - ufoImgPutU16(addr, val); + UFO_STACK(2); + ufoImgPutU16(UFO_TOS, ufoImgGetU16(UFO_TOS) & ~UFO_S(1)); + ufoSP -= 2u; } // ~AND! // ( value addr -- ) UFWORD(NAND_POKE) { - const uint32_t addr = ufoPop(); - uint32_t val = ufoPop(); - val = ufoImgGetU32(addr)&~val; - ufoImgPutU32(addr, val); + UFO_STACK(2); + #ifdef UFO_FAST_MEM_ACCESS + if ((UFO_TOS & UFO_ADDR_HANDLE_BIT) == 0) { + uint32_t *uptr = ufoImgIOPtrU32(UFO_TOS); + *uptr = *uptr & ~UFO_S(1); + } else { + ufoImgPutU32(UFO_TOS, ufoImgGetU32(UFO_TOS) & ~UFO_S(1)); + } + #else + ufoImgPutU32(UFO_TOS, ufoImgGetU32(UFO_TOS) & ~UFO_S(1)); + #endif + ufoSP -= 2u; } // COUNT // ( addr -- addr+4 addr@ ) UFWORD(COUNT) { - const uint32_t addr = ufoPop(); - const uint32_t count = ufoImgGetU32(addr); - ufoPush(addr + 4u); + UFO_STACK(1); + const uint32_t count = ufoImgGetU32(UFO_TOS); + UFO_TOS += 4u; ufoPush(count); } // ID-COUNT // ( addr -- addr+4 addr@&0xff ) UFWORD(ID_COUNT) { - const uint32_t addr = ufoPop(); - const uint32_t count = ufoImgGetU32(addr); - ufoPush(addr + 4u); + UFO_STACK(1); + const uint32_t count = ufoImgGetU32(UFO_TOS); + UFO_TOS += 4u; ufoPush(count & 0xffU); } - // BCOUNT // ( addr -- addr+1 addrC@ ) UFWORD(BCOUNT) { - const uint32_t addr = ufoPop(); - const uint32_t count = ufoImgGetU8(addr); - ufoPush(addr + 1u); - ufoPush(count & 0xffU); + UFO_STACK(1); + const uint32_t count = ufoImgGetU8(UFO_TOS); + UFO_TOS += 1u; + ufoPush(count); } // 0! @@ -4061,19 +4111,17 @@ UFWORD(POKE_DEC_1) { // +! // ( delta addr -- ) UFWORD(POKE_INC) { - const uint32_t addr = ufoPop(); - const uint32_t delta = ufoPop(); - const uint32_t val = ufoImgGetU32(addr); - ufoImgPutU32(addr, val + delta); + UFO_STACK(2); + ufoImgPutU32(UFO_TOS, ufoImgGetU32(UFO_TOS) + UFO_S(1)); + ufoSP -= 2u; } // -! // ( delta addr -- ) UFWORD(POKE_DEC) { - const uint32_t addr = ufoPop(); - const uint32_t delta = ufoPop(); - const uint32_t val = ufoImgGetU32(addr); - ufoImgPutU32(addr, val - delta); + UFO_STACK(2); + ufoImgPutU32(UFO_TOS, ufoImgGetU32(UFO_TOS) - UFO_S(1)); + ufoSP -= 2u; } @@ -4309,7 +4357,9 @@ UFWORD(FORTH_TAIL_CALL) { // (EXIT) UFWORD(PAR_EXIT) { if (ufoRP == 0) longjmp(ufoStopVMJP, 667); - ufoIP = ufoRPop(); + ufoRP -= 1u; + ufoIP = ufoRStack[ufoRP]; + //ufoIP = ufoRPop(); } // (SELF@) @@ -4867,30 +4917,30 @@ UFWORD(FLUSH_EMIT) { // simple math // -#define UF_UMATH(name_,op_) \ -UFWORD(name_) { \ - const uint32_t a = ufoPop(); \ - ufoPush(op_); \ -} - #define UF_BMATH(name_,op_) \ UFWORD(name_) { \ - const uint32_t b = ufoPop(); \ - const uint32_t a = ufoPop(); \ - ufoPush(op_); \ + UFO_STACK(2); \ + const uint32_t b = UFO_TOS; \ + const uint32_t a = UFO_S(1); \ + ufoSP -= 1u; \ + UFO_TOS = (op_); \ } #define UF_BDIV(name_,op_) \ UFWORD(name_) { \ - const uint32_t b = ufoPop(); \ - const uint32_t a = ufoPop(); \ + UFO_STACK(2); \ + const uint32_t b = UFO_TOS; \ + const uint32_t a = UFO_S(1); \ if (b == 0) ufoFatal("division by zero"); \ - ufoPush(op_); \ + ufoSP -= 1u; \ + UFO_TOS = (op_); \ } #define UFO_POP_U64() ({ \ - const uint32_t hi_ = ufoPop(); \ - const uint32_t lo_ = ufoPop(); \ + UFO_STACK(2); \ + const uint32_t hi_ = UFO_TOS; \ + const uint32_t lo_ = UFO_S(1); \ + ufoSP -= 2u; \ (((uint64_t)hi_ << 32) | lo_); \ }) diff --git a/urflibs/sinopt-peephole/sinopt.f b/urflibs/sinopt-peephole/sinopt.f index ef6b5cd..c7594b9 100644 --- a/urflibs/sinopt-peephole/sinopt.f +++ b/urflibs/sinopt-peephole/sinopt.f @@ -695,6 +695,14 @@ create; ['] swap = ifnot false exit endif ['] swap! endof + ['] w! of + ['] swap = ifnot false exit endif + ['] swap-w! + endof + ['] c! of + ['] swap = ifnot false exit endif + ['] swap-c! + endof otherwise 2drop false exit endcase $IF $DEBUG-SINOPT AND $DEBUG-SINOPT-STACK-OPS " stack-ops" .optim -- 2.11.4.GIT