cvs2svn_rcsparse/default.py

   1 # -*-python-*-
   2 #
   3 # Copyright (C) 1999-2014 The ViewCVS Group. All Rights Reserved.
   4 #
   5 # By using this file, you agree to the terms and conditions set forth in
   6 # the LICENSE.html file which can be found at the top level of the ViewVC
   7 # distribution or at http://viewvc.org/license-1.html.
   8 #
   9 # For more information, visit http://viewvc.org/
  10 #
  11 # -----------------------------------------------------------------------
  12 #
  13 # This file was originally based on portions of the blame.py script by
  14 # Curt Hagenlocher.
  15 #
  16 # -----------------------------------------------------------------------
  17
  18 import string
  19 import common
  20
  21 class _TokenStream:
  22   token_term = string.whitespace + ";:"
  23   try:
  24     token_term = frozenset(token_term)
  25   except NameError:
  26     pass
  27
  28   # the algorithm is about the same speed for any CHUNK_SIZE chosen.
  29   # grab a good-sized chunk, but not too large to overwhelm memory.
  30   # note: we use a multiple of a standard block size
  31   CHUNK_SIZE  = 192 * 512  # about 100k
  32
  33 # CHUNK_SIZE  = 5   # for debugging, make the function grind...
  34
  35   def __init__(self, file):
  36     self.rcsfile = file
  37     self.idx = 0
  38     self.buf = self.rcsfile.read(self.CHUNK_SIZE)
  39     if self.buf == '':
  40       raise RuntimeError, 'EOF'
  41
  42   def get(self):
  43     "Get the next token from the RCS file."
  44
  45     # Note: we can afford to loop within Python, examining individual
  46     # characters. For the whitespace and tokens, the number of iterations
  47     # is typically quite small. Thus, a simple iterative loop will beat
  48     # out more complex solutions.
  49
  50     buf = self.buf
  51     lbuf = len(buf)
  52     idx = self.idx
  53
  54     while 1:
  55       if idx == lbuf:
  56         buf = self.rcsfile.read(self.CHUNK_SIZE)
  57         if buf == '':
  58           # signal EOF by returning None as the token
  59           del self.buf   # so we fail if get() is called again
  60           return None
  61         lbuf = len(buf)
  62         idx = 0
  63
  64       if buf[idx] not in string.whitespace:
  65         break
  66
  67       idx = idx + 1
  68
  69     if buf[idx] in ';:':
  70       self.buf = buf
  71       self.idx = idx + 1
  72       return buf[idx]
  73
  74     if buf[idx] != '@':
  75       end = idx + 1
  76       token = ''
  77       while 1:
  78         # find token characters in the current buffer
  79         while end < lbuf and buf[end] not in self.token_term:
  80           end = end + 1
  81         token = token + buf[idx:end]
  82
  83         if end < lbuf:
  84           # we stopped before the end, so we have a full token
  85           idx = end
  86           break
  87
  88         # we stopped at the end of the buffer, so we may have a partial token
  89         buf = self.rcsfile.read(self.CHUNK_SIZE)
  90         if buf == '':
  91           # signal EOF by returning None as the token
  92           del self.buf   # so we fail if get() is called again
  93           return None
  94         lbuf = len(buf)
  95         idx = end = 0
  96
  97       self.buf = buf
  98       self.idx = idx
  99       return token
 100
 101     # a "string" which starts with the "@" character. we'll skip it when we
 102     # search for content.
 103     idx = idx + 1
 104
 105     chunks = [ ]
 106
 107     while 1:
 108       if idx == lbuf:
 109         idx = 0
 110         buf = self.rcsfile.read(self.CHUNK_SIZE)
 111         if buf == '':
 112           raise RuntimeError, 'EOF'
 113         lbuf = len(buf)
 114       i = string.find(buf, '@', idx)
 115       if i == -1:
 116         chunks.append(buf[idx:])
 117         idx = lbuf
 118         continue
 119       if i == lbuf - 1:
 120         chunks.append(buf[idx:i])
 121         idx = 0
 122         buf = '@' + self.rcsfile.read(self.CHUNK_SIZE)
 123         if buf == '@':
 124           raise RuntimeError, 'EOF'
 125         lbuf = len(buf)
 126         continue
 127       if buf[i + 1] == '@':
 128         chunks.append(buf[idx:i+1])
 129         idx = i + 2
 130         continue
 131
 132       chunks.append(buf[idx:i])
 133
 134       self.buf = buf
 135       self.idx = i + 1
 136
 137       return string.join(chunks, '')
 138
 139 #  _get = get
 140 #  def get(self):
 141     token = self._get()
 142     print 'T:', `token`
 143     return token
 144
 145   def match(self, match):
 146     "Try to match the next token from the input buffer."
 147
 148     token = self.get()
 149     if token != match:
 150       raise common.RCSExpected(token, match)
 151
 152   def unget(self, token):
 153     "Put this token back, for the next get() to return."
 154
 155     # Override the class' .get method with a function which clears the
 156     # overridden method then returns the pushed token. Since this function
 157     # will not be looked up via the class mechanism, it should be a "normal"
 158     # function, meaning it won't have "self" automatically inserted.
 159     # Therefore, we need to pass both self and the token thru via defaults.
 160
 161     # note: we don't put this into the input buffer because it may have been
 162     # @-unescaped already.
 163
 164     def give_it_back(self=self, token=token):
 165       del self.get
 166       return token
 167
 168     self.get = give_it_back
 169
 170   def mget(self, count):
 171     "Return multiple tokens. 'next' is at the end."
 172     result = [ ]
 173     for i in range(count):
 174       result.append(self.get())
 175     result.reverse()
 176     return result
 177
 178
 179 class Parser(common._Parser):
 180   stream_class = _TokenStream