eval: remove utf8 encoding comment, to fix
[trinary.git] / digital_simulator / tokenizer.py
blobce7591a941bfc65ab32214084e03abfec698fa2d
1 # vim: set fileencoding=utf8
2 # tokenizer.py
3 #
5 # Created by Antonio on 2/10/08.
6 # Trinary Research Project: Digital logic simulator
7 # Update (02.17.2008) : Tokenizer will now identify integers.
10 import sys
12 from Keyword import *
13 from Identifier import *
14 from Token import *
15 from Trits import *
16 from Literal import *
18 # tokenizer
20 def removeWhiteSpace(infile):
21 '''removeWhiteSpace: remove preceding white space in the buffer
22 infile: file containing the chars to read
23 return: False if no more
24 valid chars are in the buffer or True if there are still valid
25 chars in the buffer
26 '''
27 value = infile.read(1)
28 while value and value.isspace():
29 value = infile.read(1)
31 if not value:
32 return value
33 else:
34 return value
36 def isKeyword(infile, value):
37 '''isKeyword: identifies token as keyword or symbol
38 infile: object file
39 value: string to identify
40 return: keyword or identifier
41 '''
42 infile.seek(infile.tell() - 1)
43 if value in keywords: #string is a keyword
44 return Keyword(value)
45 else: #string is an identifier
46 return Identifier(value)
48 def tokenizeVector(infile, value):
49 '''tokenizeVector: find the next trit vector in the file
50 infile: object file
51 value: current value of trit vector
52 return: Trit object containing the vector
53 '''
54 next = infile.read(1)
56 if not next:
57 raise "EOF file before end of vector."
58 if next in trit_char:
59 value = value + next
60 return tokenizeVector(infile, value)
61 elif next == "\"":
62 return Trits(value)
63 else:
64 raise "Invalid symbol detected: |%s|" % (next, )
66 def tokenizeTrit(infile):
67 '''tokenizeTrit: find the next trit or trit vector in the file
68 infile: object file
69 return: Trit object containing the trit
70 '''
71 next = infile.read(1)
72 assert next in trit_char
73 trit = Trits(next)
75 next = infile.read(1)
76 assert next == "'"
77 return trit
79 def tokenizeString(infile, value):
80 '''tokenizeString: find the next keyword or identifier in the file
81 infile: object file
82 value: current value of the keyword/identifier
83 return: string containing the keyword/identifier
84 '''
85 next = infile.read(1)
86 if next.isalnum():
87 value = value + next
88 return tokenizeString(infile, value)
89 else:
90 return isKeyword(infile, value)
92 def tokenizeNumber(infile, value):
93 '''tokenizeNumber: identify the next integer in the file
94 '''
95 next = infile.read(1)
96 if next.isdigit():
97 value = value + next
98 return tokenizeNumber(infile, value)
99 else:
100 infile.seek(infile.tell() - 1)
101 return Literal(str(value))
103 def nextToken(infile):
104 '''nextToken: read the next token from the given file
105 infile: reference to file
106 return: next token in the file: False if no more tokens, else True.
108 value = removeWhiteSpace(infile)
110 if value is None or len(value) == 0: # None if no more tokens
111 return None
112 elif value == "'":
113 return tokenizeTrit(infile) # returns a Trit
114 elif value == "\"":
115 return tokenizeVector(infile, "") # returns a Trit vector
116 elif value.isalpha():
117 return tokenizeString(infile, value) # returns an Identifier
118 elif value.isdigit():
119 return tokenizeNumber(infile, value) # returns a Literal
120 elif value in symbols:
121 return Token(value)
122 else: #invalid symbol detected
123 raise "Invalid symbol detected: |%s|" % (value, )
125 if __name__ == "__main__":
126 f = file("testParser", "r")#sys.stdin
127 while True:
128 token = nextToken(f)
129 print token
130 if token is None:
131 break