added some builtins
[bugg-scheme-compiler.git] / src / sml / scanner.sml
blob852bd0a907815b510fefb13dccd8daed1272ad37
1 (* Scanner *)
3 signature SCANNER =
4 sig
5 val stringToTokens : string -> SchemeToken list;
6 end;
8 (* *********************************************************** Scanner *)
10 exception ErrorNothingAfterHash;
11 exception ErrorBadChar of char * string;
12 exception ErrorStringDoesntEnd of string;
13 exception ErrorNoMetaChar of string;
14 exception ErrorNoSuchMetaChar of string;
15 exception ErrorNoChar;
16 exception ErrorUnknownNamedChar of string;
17 exception ErrorHash of string;
19 structure Scanner : SCANNER =
20 struct
22 val whiteChar = makeIsChar(" \t\r\n");
23 val delimiterChar = makeIsChar("'()\";, \t\r\n");
24 val octalChar = makeCharInRange(#"0", #"7");
25 val upperChar = makeCharInRange(#"A", #"Z");
26 val lowerChar = makeCharInRange(#"a", #"z");
27 val digitChar = makeCharInRange(#"0", #"9");
28 val specialSymbolChar = makeIsChar("!@$%^*-_=+<>/?.#");
30 fun symbolChar (ch) =
31 lowerChar(ch) orelse
32 upperChar(ch) orelse
33 digitChar(ch) orelse
34 specialSymbolChar(ch);
36 local
37 fun stInit([]) = []
38 | stInit(#";" :: s) = stComment(s)
39 | stInit(#"(" :: s) = LparenToken :: stInit(s)
40 | stInit(#")" :: s) = RparenToken :: stInit(s)
41 | stInit(#"'" :: s) = QuoteToken :: stInit(s)
42 | stInit(#"#" :: s) = stHash(s)
43 | stInit(#"." :: s) = DotToken :: stInit(s)
44 | stInit(#"-" :: s) = stMinus(s)
45 | stInit(#"\"" ::s) = stString([], s)
46 | stInit(ch :: s) =
47 if whiteChar(ch) then stInit(s)
48 else if digitChar(ch) then stDecDigit([ch],s)
49 else if symbolChar(ch) then stSymbol([ch],s)
50 else raise ErrorBadChar(ch, implode(s))
52 and stMinus([]) = SymbolToken("-") :: stInit([])
53 | stMinus(ch :: s) =
54 (* [minus,digit,...] => number *)
55 if digitChar(ch) then
56 stDecDigit([#"-",ch],s)
58 (* [minus,non-digit] => symbol *)
59 else
60 stSymbol([#"-"],[ch] @ s)
62 and stDecDigit(num,[]) = IntToken(stringToInt num) :: stInit([])
63 | stDecDigit(num,ch :: s) =
64 if (delimiterChar(ch)) then (* end of int *)
65 IntToken(stringToInt num) :: stInit(ch :: s)
66 else
67 stDecDigit(num @ [ch], s)
69 and stSymbol(symname,[]) = SymbolToken(implode symname) :: stInit([])
70 | stSymbol(symname,ch :: s) =
71 if symbolChar(ch) then
72 stSymbol(symname @ [ch],s)
73 else
74 SymbolToken(implode symname) :: stInit(ch :: s)
76 and stString(str, []) = raise ErrorStringDoesntEnd(implode str)
77 | stString(str, #"\"" :: s) = StringToken(implode str) :: stInit(s)
78 | stString(str, #"\\" :: s) = stStringMetaChar(str,s)
79 | stString(str, ch :: s) = stString(str @ [ch], s)
81 and stStringMetaChar(str,[]) = raise ErrorNoMetaChar(implode str)
82 | stStringMetaChar(str, #"t" :: s) = stString(str @ [#"\t"],s)
83 | stStringMetaChar(str, #"r" :: s) = stString(str @ [#"\r"],s)
84 | stStringMetaChar(str, #"n" :: s) = stString(str @ [#"\n"],s)
85 | stStringMetaChar(str, #"\\" :: s) = stString(str @ [#"\\"],s)
86 | stStringMetaChar(str, #"\"" :: s) = stString(str @ [#"\""],s)
87 | stStringMetaChar(str, ch1 :: ch2 :: ch3 :: s) =
88 if andmap octalChar [ch1,ch2,ch3] then
89 stString(str @ [chr( digitToInt(ch1)*8*8
90 +digitToInt(ch2)*8
91 +digitToInt(ch3))
92 ],s)
93 else
94 raise (ErrorNoSuchMetaChar(implode([ch1,ch2,ch3]@s)))
95 | stStringMetaChar(str, s) = raise (ErrorNoSuchMetaChar(implode s))
97 and stHash([]) = raise ErrorNothingAfterHash
98 | stHash(#"t" :: s) = BoolToken(true) :: stInit(s)
99 | stHash(#"T" :: s) = BoolToken(true) :: stInit(s)
100 | stHash(#"f" :: s) = BoolToken(false) :: stInit(s)
101 | stHash(#"F" :: s) = BoolToken(false) :: stInit(s)
102 | stHash(#"\\" :: s) = stChar(s)
103 | stHash(#"(" :: s) = VectorToken :: stInit(s)
104 | stHash(s) = raise ErrorHash("#\\" ^ implode(s))
106 and stChar([]) = raise ErrorNoChar
107 | stChar(ch :: s) = stChar'(s, [ch])
109 and stChar'([], chars) = makeCharToken(chars) :: stInit([])
110 | stChar'(s as ch :: s', chars) =
111 if delimiterChar(ch) then makeCharToken(chars) :: stInit(s)
112 else stChar'(s', ch :: chars)
114 and stComment([]) = stInit([])
115 | stComment(#"\n" :: s) = stInit(s)
116 | stComment(ch :: s) = stComment(s)
118 and charsToString(s) = implode(rev(s))
120 and makeCharToken([ch]) = CharToken(ch)
121 | makeCharToken(chars as [ch1, ch2, ch3]) =
122 if (andmap octalChar chars) then
123 CharToken(chr(digitToInt(ch1) +
124 8 * (digitToInt(ch2) +
125 8 * digitToInt(ch3))))
126 else charNameToCharToken(charsToString(chars))
127 | makeCharToken(chars) = charNameToCharToken(charsToString(chars))
129 and charNameToCharToken(charName) =
130 if stringEqual(charName, "space") then CharToken(#" ")
131 else if stringEqual(charName, "return") then CharToken(#"\r")
132 else if stringEqual(charName, "newline") then CharToken(#"\n")
133 else if stringEqual(charName, "tab") then CharToken(#"\t")
134 else raise ErrorUnknownNamedChar(charName)
136 and digitToInt(ch) = ord(ch) - ord(#"0")
138 and stringToInt(str) = valOf( Int.fromString(implode str) )
140 fun stringToTokens(string) = stInit(explode(string))
143 end;