Lib/idlelib/HyperParser.py

   1 """
   2 HyperParser
   3 ===========
   4 This module defines the HyperParser class, which provides advanced parsing
   5 abilities for the ParenMatch and other extensions.
   6 The HyperParser uses PyParser. PyParser is intended mostly to give information
   7 on the proper indentation of code. HyperParser gives some information on the
   8 structure of code, used by extensions to help the user.
   9 """
  10
  11 import string
  12 import keyword
  13 import PyParse
  14
  15 class HyperParser:
  16
  17     def __init__(self, editwin, index):
  18         """Initialize the HyperParser to analyze the surroundings of the given
  19         index.
  20         """
  21
  22         self.editwin = editwin
  23         self.text = text = editwin.text
  24
  25         parser = PyParse.Parser(editwin.indentwidth, editwin.tabwidth)
  26
  27         def index2line(index):
  28             return int(float(index))
  29         lno = index2line(text.index(index))
  30
  31         if not editwin.context_use_ps1:
  32             for context in editwin.num_context_lines:
  33                 startat = max(lno - context, 1)
  34                 startatindex = `startat` + ".0"
  35                 stopatindex = "%d.end" % lno
  36                 # We add the newline because PyParse requires a newline at end.
  37                 # We add a space so that index won't be at end of line, so that
  38                 # its status will be the same as the char before it, if should.
  39                 parser.set_str(text.get(startatindex, stopatindex)+' \n')
  40                 bod = parser.find_good_parse_start(
  41                           editwin._build_char_in_string_func(startatindex))
  42                 if bod is not None or startat == 1:
  43                     break
  44             parser.set_lo(bod or 0)
  45         else:
  46             r = text.tag_prevrange("console", index)
  47             if r:
  48                 startatindex = r[1]
  49             else:
  50                 startatindex = "1.0"
  51             stopatindex = "%d.end" % lno
  52             # We add the newline because PyParse requires a newline at end.
  53             # We add a space so that index won't be at end of line, so that
  54             # its status will be the same as the char before it, if should.
  55             parser.set_str(text.get(startatindex, stopatindex)+' \n')
  56             parser.set_lo(0)
  57
  58         # We want what the parser has, except for the last newline and space.
  59         self.rawtext = parser.str[:-2]
  60         # As far as I can see, parser.str preserves the statement we are in,
  61         # so that stopatindex can be used to synchronize the string with the
  62         # text box indices.
  63         self.stopatindex = stopatindex
  64         self.bracketing = parser.get_last_stmt_bracketing()
  65         # find which pairs of bracketing are openers. These always correspond
  66         # to a character of rawtext.
  67         self.isopener = [i>0 and self.bracketing[i][1] > self.bracketing[i-1][1]
  68                          for i in range(len(self.bracketing))]
  69
  70         self.set_index(index)
  71
  72     def set_index(self, index):
  73         """Set the index to which the functions relate. Note that it must be
  74         in the same statement.
  75         """
  76         indexinrawtext = \
  77             len(self.rawtext) - len(self.text.get(index, self.stopatindex))
  78         if indexinrawtext < 0:
  79             raise ValueError("The index given is before the analyzed statement")
  80         self.indexinrawtext = indexinrawtext
  81         # find the rightmost bracket to which index belongs
  82         self.indexbracket = 0
  83         while self.indexbracket < len(self.bracketing)-1 and \
  84               self.bracketing[self.indexbracket+1][0] < self.indexinrawtext:
  85             self.indexbracket += 1
  86         if self.indexbracket < len(self.bracketing)-1 and \
  87            self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and \
  88            not self.isopener[self.indexbracket+1]:
  89             self.indexbracket += 1
  90
  91     def is_in_string(self):
  92         """Is the index given to the HyperParser is in a string?"""
  93         # The bracket to which we belong should be an opener.
  94         # If it's an opener, it has to have a character.
  95         return self.isopener[self.indexbracket] and \
  96                self.rawtext[self.bracketing[self.indexbracket][0]] in ('"', "'")
  97
  98     def is_in_code(self):
  99         """Is the index given to the HyperParser is in a normal code?"""
 100         return not self.isopener[self.indexbracket] or \
 101                self.rawtext[self.bracketing[self.indexbracket][0]] not in \
 102                                                                 ('#', '"', "'")
 103
 104     def get_surrounding_brackets(self, openers='([{', mustclose=False):
 105         """If the index given to the HyperParser is surrounded by a bracket
 106         defined in openers (or at least has one before it), return the
 107         indices of the opening bracket and the closing bracket (or the
 108         end of line, whichever comes first).
 109         If it is not surrounded by brackets, or the end of line comes before
 110         the closing bracket and mustclose is True, returns None.
 111         """
 112         bracketinglevel = self.bracketing[self.indexbracket][1]
 113         before = self.indexbracket
 114         while not self.isopener[before] or \
 115               self.rawtext[self.bracketing[before][0]] not in openers or \
 116               self.bracketing[before][1] > bracketinglevel:
 117             before -= 1
 118             if before < 0:
 119                 return None
 120             bracketinglevel = min(bracketinglevel, self.bracketing[before][1])
 121         after = self.indexbracket + 1
 122         while after < len(self.bracketing) and \
 123               self.bracketing[after][1] >= bracketinglevel:
 124             after += 1
 125
 126         beforeindex = self.text.index("%s-%dc" %
 127             (self.stopatindex, len(self.rawtext)-self.bracketing[before][0]))
 128         if after >= len(self.bracketing) or \
 129            self.bracketing[after][0] > len(self.rawtext):
 130             if mustclose:
 131                 return None
 132             afterindex = self.stopatindex
 133         else:
 134             # We are after a real char, so it is a ')' and we give the index
 135             # before it.
 136             afterindex = self.text.index("%s-%dc" %
 137                 (self.stopatindex,
 138                  len(self.rawtext)-(self.bracketing[after][0]-1)))
 139
 140         return beforeindex, afterindex
 141
 142     # This string includes all chars that may be in a white space
 143     _whitespace_chars = " \t\n\\"
 144     # This string includes all chars that may be in an identifier
 145     _id_chars = string.ascii_letters + string.digits + "_"
 146     # This string includes all chars that may be the first char of an identifier
 147     _id_first_chars = string.ascii_letters + "_"
 148
 149     # Given a string and pos, return the number of chars in the identifier
 150     # which ends at pos, or 0 if there is no such one. Saved words are not
 151     # identifiers.
 152     def _eat_identifier(self, str, limit, pos):
 153         i = pos
 154         while i > limit and str[i-1] in self._id_chars:
 155             i -= 1
 156         if i < pos and (str[i] not in self._id_first_chars or \
 157                         keyword.iskeyword(str[i:pos])):
 158             i = pos
 159         return pos - i
 160
 161     def get_expression(self):
 162         """Return a string with the Python expression which ends at the given
 163         index, which is empty if there is no real one.
 164         """
 165         if not self.is_in_code():
 166             raise ValueError("get_expression should only be called if index "\
 167                              "is inside a code.")
 168
 169         rawtext = self.rawtext
 170         bracketing = self.bracketing
 171
 172         brck_index = self.indexbracket
 173         brck_limit = bracketing[brck_index][0]
 174         pos = self.indexinrawtext
 175
 176         last_identifier_pos = pos
 177         postdot_phase = True
 178
 179         while 1:
 180             # Eat whitespaces, comments, and if postdot_phase is False - one dot
 181             while 1:
 182                 if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars:
 183                     # Eat a whitespace
 184                     pos -= 1
 185                 elif not postdot_phase and \
 186                      pos > brck_limit and rawtext[pos-1] == '.':
 187                     # Eat a dot
 188                     pos -= 1
 189                     postdot_phase = True
 190                 # The next line will fail if we are *inside* a comment, but we
 191                 # shouldn't be.
 192                 elif pos == brck_limit and brck_index > 0 and \
 193                      rawtext[bracketing[brck_index-1][0]] == '#':
 194                     # Eat a comment
 195                     brck_index -= 2
 196                     brck_limit = bracketing[brck_index][0]
 197                     pos = bracketing[brck_index+1][0]
 198                 else:
 199                     # If we didn't eat anything, quit.
 200                     break
 201
 202             if not postdot_phase:
 203                 # We didn't find a dot, so the expression end at the last
 204                 # identifier pos.
 205                 break
 206
 207             ret = self._eat_identifier(rawtext, brck_limit, pos)
 208             if ret:
 209                 # There is an identifier to eat
 210                 pos = pos - ret
 211                 last_identifier_pos = pos
 212                 # Now, in order to continue the search, we must find a dot.
 213                 postdot_phase = False
 214                 # (the loop continues now)
 215
 216             elif pos == brck_limit:
 217                 # We are at a bracketing limit. If it is a closing bracket,
 218                 # eat the bracket, otherwise, stop the search.
 219                 level = bracketing[brck_index][1]
 220                 while brck_index > 0 and bracketing[brck_index-1][1] > level:
 221                     brck_index -= 1
 222                 if bracketing[brck_index][0] == brck_limit:
 223                     # We were not at the end of a closing bracket
 224                     break
 225                 pos = bracketing[brck_index][0]
 226                 brck_index -= 1
 227                 brck_limit = bracketing[brck_index][0]
 228                 last_identifier_pos = pos
 229                 if rawtext[pos] in "([":
 230                     # [] and () may be used after an identifier, so we
 231                     # continue. postdot_phase is True, so we don't allow a dot.
 232                     pass
 233                 else:
 234                     # We can't continue after other types of brackets
 235                     break
 236
 237             else:
 238                 # We've found an operator or something.
 239                 break
 240
 241         return rawtext[last_identifier_pos:self.indexinrawtext]