digital_simulator/tokenizer.py

   1 # vim: set fileencoding=utf8
   2 #  tokenizer.py
   3 #
   4 #
   5 #  Created by Antonio on 2/10/08.
   6 #  Trinary Research Project: Digital logic simulator
   7 #  Update (02.17.2008) : Tokenizer will now identify integers.
   8 #
   9
  10 import sys
  11
  12 from Keyword import *
  13 from Identifier import *
  14 from Token import *
  15 from Trits import *
  16 from Literal import *
  17
  18 # tokenizer
  19
  20 def removeWhiteSpace(infile):
  21     '''removeWhiteSpace: remove preceding white space in the buffer
  22         infile: file containing the chars to read
  23         return: False if no more
  24              valid chars are in the buffer or True if there are still valid
  25             chars in the buffer
  26     '''
  27     value = infile.read(1)
  28     while value and value.isspace():
  29         value = infile.read(1)
  30
  31     if not value:
  32         return value
  33     else:
  34         return value
  35
  36 def isKeyword(infile, value):
  37     '''isKeyword: identifies token as keyword or symbol
  38         infile: object file
  39         value: string to identify
  40         return: keyword or identifier
  41     '''
  42     infile.seek(infile.tell() - 1)
  43     if value in keywords: #string is a keyword
  44         return Keyword(value)
  45     else: #string is an identifier
  46         return Identifier(value)
  47
  48 def tokenizeVector(infile, value):
  49     '''tokenizeVector: find the next trit vector in the file
  50        infile: object file
  51        value: current value of trit vector
  52        return: Trit object containing the vector
  53     '''
  54     next = infile.read(1)
  55
  56     if not next:
  57         raise "EOF file before end of vector."
  58     if next in trit_char:
  59         value = value + next
  60         return tokenizeVector(infile, value)
  61     elif next == "\"":
  62         return Trits(value)
  63     else:
  64         raise "Invalid symbol detected: |%s|" % (next, )
  65
  66 def tokenizeTrit(infile):
  67     '''tokenizeTrit: find the next trit or trit vector in the file
  68         infile: object file
  69         return: Trit object containing the trit
  70     '''
  71     next = infile.read(1)
  72     assert next in trit_char
  73     trit = Trits(next)
  74
  75     next = infile.read(1)
  76     assert next == "'"
  77     return trit
  78
  79 def tokenizeString(infile, value):
  80     '''tokenizeString: find the next keyword or identifier in the file
  81         infile: object file
  82         value: current value of the keyword/identifier
  83         return: string containing the keyword/identifier
  84     '''
  85     next = infile.read(1)
  86     if next.isalnum():
  87         value = value + next
  88         return tokenizeString(infile, value)
  89     else:
  90         return isKeyword(infile, value)
  91
  92 def tokenizeNumber(infile, value):
  93     '''tokenizeNumber: identify the next integer in the file
  94     '''
  95     next = infile.read(1)
  96     if next.isdigit():
  97         value = value + next
  98         return tokenizeNumber(infile, value)
  99     else:
 100         infile.seek(infile.tell() - 1)
 101         return Literal(str(value))
 102
 103 def nextToken(infile):
 104     '''nextToken: read the next token from the given file
 105         infile: reference to file
 106         return: next token in the file: False if no more tokens, else True.
 107     '''
 108     value = removeWhiteSpace(infile)
 109
 110     if value is None or len(value) == 0:      # None if no more tokens
 111         return None
 112     elif value == "'":
 113         return tokenizeTrit(infile)           # returns a Trit
 114     elif value == "\"":
 115         return tokenizeVector(infile, "")     # returns a Trit vector
 116     elif value.isalpha():
 117         return tokenizeString(infile, value)  # returns an Identifier
 118     elif value.isdigit():
 119         return tokenizeNumber(infile, value)  # returns a Literal
 120     elif value in symbols:
 121         return Token(value)
 122     else: #invalid symbol detected
 123         raise "Invalid symbol detected: |%s|" % (value, )
 124
 125 if __name__ == "__main__":
 126      f = file("testParser", "r")#sys.stdin
 127      while True:
 128           token = nextToken(f)
 129           print token
 130           if token is None:
 131                 break
 132