cvs2svn_rcsparse/default.py

   1 # -*-python-*-
   2 #
   3 # Copyright (C) 1999-2008 The ViewCVS Group. All Rights Reserved.
   4 #
   5 # By using this file, you agree to the terms and conditions set forth in
   6 # the LICENSE.html file which can be found at the top level of the ViewVC
   7 # distribution or at http://viewvc.org/license-1.html.
   8 #
   9 # For more information, visit http://viewvc.org/
  10 #
  11 # -----------------------------------------------------------------------
  12 #
  13 # This file was originally based on portions of the blame.py script by
  14 # Curt Hagenlocher.
  15 #
  16 # -----------------------------------------------------------------------
  17
  18 import string
  19 import common
  20
  21 class _TokenStream:
  22   token_term = string.whitespace + ';:'
  23
  24   # the algorithm is about the same speed for any CHUNK_SIZE chosen.
  25   # grab a good-sized chunk, but not too large to overwhelm memory.
  26   # note: we use a multiple of a standard block size
  27   CHUNK_SIZE  = 192 * 512  # about 100k
  28
  29 # CHUNK_SIZE  = 5   # for debugging, make the function grind...
  30
  31   def __init__(self, file):
  32     self.rcsfile = file
  33     self.idx = 0
  34     self.buf = self.rcsfile.read(self.CHUNK_SIZE)
  35     if self.buf == '':
  36       raise RuntimeError, 'EOF'
  37
  38   def get(self):
  39     "Get the next token from the RCS file."
  40
  41     # Note: we can afford to loop within Python, examining individual
  42     # characters. For the whitespace and tokens, the number of iterations
  43     # is typically quite small. Thus, a simple iterative loop will beat
  44     # out more complex solutions.
  45
  46     buf = self.buf
  47     idx = self.idx
  48
  49     while 1:
  50       if idx == len(buf):
  51         buf = self.rcsfile.read(self.CHUNK_SIZE)
  52         if buf == '':
  53           # signal EOF by returning None as the token
  54           del self.buf   # so we fail if get() is called again
  55           return None
  56         idx = 0
  57
  58       if buf[idx] not in string.whitespace:
  59         break
  60
  61       idx = idx + 1
  62
  63     if buf[idx] == ';' or buf[idx] == ':':
  64       self.buf = buf
  65       self.idx = idx + 1
  66       return buf[idx]
  67
  68     if buf[idx] != '@':
  69       end = idx + 1
  70       token = ''
  71       while 1:
  72         # find token characters in the current buffer
  73         while end < len(buf) and buf[end] not in self.token_term:
  74           end = end + 1
  75         token = token + buf[idx:end]
  76
  77         if end < len(buf):
  78           # we stopped before the end, so we have a full token
  79           idx = end
  80           break
  81
  82         # we stopped at the end of the buffer, so we may have a partial token
  83         buf = self.rcsfile.read(self.CHUNK_SIZE)
  84         idx = end = 0
  85
  86       self.buf = buf
  87       self.idx = idx
  88       return token
  89
  90     # a "string" which starts with the "@" character. we'll skip it when we
  91     # search for content.
  92     idx = idx + 1
  93
  94     chunks = [ ]
  95
  96     while 1:
  97       if idx == len(buf):
  98         idx = 0
  99         buf = self.rcsfile.read(self.CHUNK_SIZE)
 100         if buf == '':
 101           raise RuntimeError, 'EOF'
 102       i = string.find(buf, '@', idx)
 103       if i == -1:
 104         chunks.append(buf[idx:])
 105         idx = len(buf)
 106         continue
 107       if i == len(buf) - 1:
 108         chunks.append(buf[idx:i])
 109         idx = 0
 110         buf = '@' + self.rcsfile.read(self.CHUNK_SIZE)
 111         if buf == '@':
 112           raise RuntimeError, 'EOF'
 113         continue
 114       if buf[i + 1] == '@':
 115         chunks.append(buf[idx:i+1])
 116         idx = i + 2
 117         continue
 118
 119       chunks.append(buf[idx:i])
 120
 121       self.buf = buf
 122       self.idx = i + 1
 123
 124       return string.join(chunks, '')
 125
 126 #  _get = get
 127 #  def get(self):
 128     token = self._get()
 129     print 'T:', `token`
 130     return token
 131
 132   def match(self, match):
 133     "Try to match the next token from the input buffer."
 134
 135     token = self.get()
 136     if token != match:
 137       raise common.RCSExpected(token, match)
 138
 139   def unget(self, token):
 140     "Put this token back, for the next get() to return."
 141
 142     # Override the class' .get method with a function which clears the
 143     # overridden method then returns the pushed token. Since this function
 144     # will not be looked up via the class mechanism, it should be a "normal"
 145     # function, meaning it won't have "self" automatically inserted.
 146     # Therefore, we need to pass both self and the token thru via defaults.
 147
 148     # note: we don't put this into the input buffer because it may have been
 149     # @-unescaped already.
 150
 151     def give_it_back(self=self, token=token):
 152       del self.get
 153       return token
 154
 155     self.get = give_it_back
 156
 157   def mget(self, count):
 158     "Return multiple tokens. 'next' is at the end."
 159     result = [ ]
 160     for i in range(count):
 161       result.append(self.get())
 162     result.reverse()
 163     return result
 164
 165
 166 class Parser(common._Parser):
 167   stream_class = _TokenStream