cvs2svn_rcsparse/default.py

   1 # -*-python-*-
   2 #
   3 # Copyright (C) 1999-2008 The ViewCVS Group. All Rights Reserved.
   4 #
   5 # By using this file, you agree to the terms and conditions set forth in
   6 # the LICENSE.html file which can be found at the top level of the ViewVC
   7 # distribution or at http://viewvc.org/license-1.html.
   8 #
   9 # For more information, visit http://viewvc.org/
  10 #
  11 # -----------------------------------------------------------------------
  12 #
  13 # This file was originally based on portions of the blame.py script by
  14 # Curt Hagenlocher.
  15 #
  16 # -----------------------------------------------------------------------
  17
  18 import string
  19 import common
  20
  21 class _TokenStream:
  22   token_term = string.whitespace + ";:"
  23   try:
  24     token_term = frozenset(token_term)
  25   except NameError:
  26     pass
  27
  28   # the algorithm is about the same speed for any CHUNK_SIZE chosen.
  29   # grab a good-sized chunk, but not too large to overwhelm memory.
  30   # note: we use a multiple of a standard block size
  31   CHUNK_SIZE  = 192 * 512  # about 100k
  32
  33 # CHUNK_SIZE  = 5   # for debugging, make the function grind...
  34
  35   def __init__(self, file):
  36     self.rcsfile = file
  37     self.idx = 0
  38     self.buf = self.rcsfile.read(self.CHUNK_SIZE)
  39     if self.buf == '':
  40       raise RuntimeError, 'EOF'
  41
  42   def get(self):
  43     "Get the next token from the RCS file."
  44
  45     # Note: we can afford to loop within Python, examining individual
  46     # characters. For the whitespace and tokens, the number of iterations
  47     # is typically quite small. Thus, a simple iterative loop will beat
  48     # out more complex solutions.
  49
  50     buf = self.buf
  51     lbuf = len(buf)
  52     idx = self.idx
  53
  54     while 1:
  55       if idx == lbuf:
  56         buf = self.rcsfile.read(self.CHUNK_SIZE)
  57         if buf == '':
  58           # signal EOF by returning None as the token
  59           del self.buf   # so we fail if get() is called again
  60           return None
  61         lbuf = len(buf)
  62         idx = 0
  63
  64       if buf[idx] not in string.whitespace:
  65         break
  66
  67       idx = idx + 1
  68
  69     if buf[idx] in ';:':
  70       self.buf = buf
  71       self.idx = idx + 1
  72       return buf[idx]
  73
  74     if buf[idx] != '@':
  75       end = idx + 1
  76       token = ''
  77       while 1:
  78         # find token characters in the current buffer
  79         while end < lbuf and buf[end] not in self.token_term:
  80           end = end + 1
  81         token = token + buf[idx:end]
  82
  83         if end < lbuf:
  84           # we stopped before the end, so we have a full token
  85           idx = end
  86           break
  87
  88         # we stopped at the end of the buffer, so we may have a partial token
  89         buf = self.rcsfile.read(self.CHUNK_SIZE)
  90         lbuf = len(buf)
  91         idx = end = 0
  92
  93       self.buf = buf
  94       self.idx = idx
  95       return token
  96
  97     # a "string" which starts with the "@" character. we'll skip it when we
  98     # search for content.
  99     idx = idx + 1
 100
 101     chunks = [ ]
 102
 103     while 1:
 104       if idx == lbuf:
 105         idx = 0
 106         buf = self.rcsfile.read(self.CHUNK_SIZE)
 107         if buf == '':
 108           raise RuntimeError, 'EOF'
 109         lbuf = len(buf)
 110       i = string.find(buf, '@', idx)
 111       if i == -1:
 112         chunks.append(buf[idx:])
 113         idx = lbuf
 114         continue
 115       if i == lbuf - 1:
 116         chunks.append(buf[idx:i])
 117         idx = 0
 118         buf = '@' + self.rcsfile.read(self.CHUNK_SIZE)
 119         if buf == '@':
 120           raise RuntimeError, 'EOF'
 121         lbuf = len(buf)
 122         continue
 123       if buf[i + 1] == '@':
 124         chunks.append(buf[idx:i+1])
 125         idx = i + 2
 126         continue
 127
 128       chunks.append(buf[idx:i])
 129
 130       self.buf = buf
 131       self.idx = i + 1
 132
 133       return string.join(chunks, '')
 134
 135 #  _get = get
 136 #  def get(self):
 137     token = self._get()
 138     print 'T:', `token`
 139     return token
 140
 141   def match(self, match):
 142     "Try to match the next token from the input buffer."
 143
 144     token = self.get()
 145     if token != match:
 146       raise common.RCSExpected(token, match)
 147
 148   def unget(self, token):
 149     "Put this token back, for the next get() to return."
 150
 151     # Override the class' .get method with a function which clears the
 152     # overridden method then returns the pushed token. Since this function
 153     # will not be looked up via the class mechanism, it should be a "normal"
 154     # function, meaning it won't have "self" automatically inserted.
 155     # Therefore, we need to pass both self and the token thru via defaults.
 156
 157     # note: we don't put this into the input buffer because it may have been
 158     # @-unescaped already.
 159
 160     def give_it_back(self=self, token=token):
 161       del self.get
 162       return token
 163
 164     self.get = give_it_back
 165
 166   def mget(self, count):
 167     "Return multiple tokens. 'next' is at the end."
 168     result = [ ]
 169     for i in range(count):
 170       result.append(self.get())
 171     result.reverse()
 172     return result
 173
 174
 175 class Parser(common._Parser):
 176   stream_class = _TokenStream