Credit Nir Aides for r77288
[python.git] / Lib / idlelib / HyperParser.py
blob519de74d15830d667cec06943de62d99ae04b6b0
1 """
2 HyperParser
3 ===========
4 This module defines the HyperParser class, which provides advanced parsing
5 abilities for the ParenMatch and other extensions.
6 The HyperParser uses PyParser. PyParser is intended mostly to give information
7 on the proper indentation of code. HyperParser gives some information on the
8 structure of code, used by extensions to help the user.
9 """
11 import string
12 import keyword
13 import PyParse
15 class HyperParser:
17 def __init__(self, editwin, index):
18 """Initialize the HyperParser to analyze the surroundings of the given
19 index.
20 """
22 self.editwin = editwin
23 self.text = text = editwin.text
25 parser = PyParse.Parser(editwin.indentwidth, editwin.tabwidth)
27 def index2line(index):
28 return int(float(index))
29 lno = index2line(text.index(index))
31 if not editwin.context_use_ps1:
32 for context in editwin.num_context_lines:
33 startat = max(lno - context, 1)
34 startatindex = `startat` + ".0"
35 stopatindex = "%d.end" % lno
36 # We add the newline because PyParse requires a newline at end.
37 # We add a space so that index won't be at end of line, so that
38 # its status will be the same as the char before it, if should.
39 parser.set_str(text.get(startatindex, stopatindex)+' \n')
40 bod = parser.find_good_parse_start(
41 editwin._build_char_in_string_func(startatindex))
42 if bod is not None or startat == 1:
43 break
44 parser.set_lo(bod or 0)
45 else:
46 r = text.tag_prevrange("console", index)
47 if r:
48 startatindex = r[1]
49 else:
50 startatindex = "1.0"
51 stopatindex = "%d.end" % lno
52 # We add the newline because PyParse requires a newline at end.
53 # We add a space so that index won't be at end of line, so that
54 # its status will be the same as the char before it, if should.
55 parser.set_str(text.get(startatindex, stopatindex)+' \n')
56 parser.set_lo(0)
58 # We want what the parser has, except for the last newline and space.
59 self.rawtext = parser.str[:-2]
60 # As far as I can see, parser.str preserves the statement we are in,
61 # so that stopatindex can be used to synchronize the string with the
62 # text box indices.
63 self.stopatindex = stopatindex
64 self.bracketing = parser.get_last_stmt_bracketing()
65 # find which pairs of bracketing are openers. These always correspond
66 # to a character of rawtext.
67 self.isopener = [i>0 and self.bracketing[i][1] > self.bracketing[i-1][1]
68 for i in range(len(self.bracketing))]
70 self.set_index(index)
72 def set_index(self, index):
73 """Set the index to which the functions relate. Note that it must be
74 in the same statement.
75 """
76 indexinrawtext = \
77 len(self.rawtext) - len(self.text.get(index, self.stopatindex))
78 if indexinrawtext < 0:
79 raise ValueError("The index given is before the analyzed statement")
80 self.indexinrawtext = indexinrawtext
81 # find the rightmost bracket to which index belongs
82 self.indexbracket = 0
83 while self.indexbracket < len(self.bracketing)-1 and \
84 self.bracketing[self.indexbracket+1][0] < self.indexinrawtext:
85 self.indexbracket += 1
86 if self.indexbracket < len(self.bracketing)-1 and \
87 self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and \
88 not self.isopener[self.indexbracket+1]:
89 self.indexbracket += 1
91 def is_in_string(self):
92 """Is the index given to the HyperParser is in a string?"""
93 # The bracket to which we belong should be an opener.
94 # If it's an opener, it has to have a character.
95 return self.isopener[self.indexbracket] and \
96 self.rawtext[self.bracketing[self.indexbracket][0]] in ('"', "'")
98 def is_in_code(self):
99 """Is the index given to the HyperParser is in a normal code?"""
100 return not self.isopener[self.indexbracket] or \
101 self.rawtext[self.bracketing[self.indexbracket][0]] not in \
102 ('#', '"', "'")
104 def get_surrounding_brackets(self, openers='([{', mustclose=False):
105 """If the index given to the HyperParser is surrounded by a bracket
106 defined in openers (or at least has one before it), return the
107 indices of the opening bracket and the closing bracket (or the
108 end of line, whichever comes first).
109 If it is not surrounded by brackets, or the end of line comes before
110 the closing bracket and mustclose is True, returns None.
112 bracketinglevel = self.bracketing[self.indexbracket][1]
113 before = self.indexbracket
114 while not self.isopener[before] or \
115 self.rawtext[self.bracketing[before][0]] not in openers or \
116 self.bracketing[before][1] > bracketinglevel:
117 before -= 1
118 if before < 0:
119 return None
120 bracketinglevel = min(bracketinglevel, self.bracketing[before][1])
121 after = self.indexbracket + 1
122 while after < len(self.bracketing) and \
123 self.bracketing[after][1] >= bracketinglevel:
124 after += 1
126 beforeindex = self.text.index("%s-%dc" %
127 (self.stopatindex, len(self.rawtext)-self.bracketing[before][0]))
128 if after >= len(self.bracketing) or \
129 self.bracketing[after][0] > len(self.rawtext):
130 if mustclose:
131 return None
132 afterindex = self.stopatindex
133 else:
134 # We are after a real char, so it is a ')' and we give the index
135 # before it.
136 afterindex = self.text.index("%s-%dc" %
137 (self.stopatindex,
138 len(self.rawtext)-(self.bracketing[after][0]-1)))
140 return beforeindex, afterindex
142 # This string includes all chars that may be in a white space
143 _whitespace_chars = " \t\n\\"
144 # This string includes all chars that may be in an identifier
145 _id_chars = string.ascii_letters + string.digits + "_"
146 # This string includes all chars that may be the first char of an identifier
147 _id_first_chars = string.ascii_letters + "_"
149 # Given a string and pos, return the number of chars in the identifier
150 # which ends at pos, or 0 if there is no such one. Saved words are not
151 # identifiers.
152 def _eat_identifier(self, str, limit, pos):
153 i = pos
154 while i > limit and str[i-1] in self._id_chars:
155 i -= 1
156 if i < pos and (str[i] not in self._id_first_chars or \
157 keyword.iskeyword(str[i:pos])):
158 i = pos
159 return pos - i
161 def get_expression(self):
162 """Return a string with the Python expression which ends at the given
163 index, which is empty if there is no real one.
165 if not self.is_in_code():
166 raise ValueError("get_expression should only be called if index "\
167 "is inside a code.")
169 rawtext = self.rawtext
170 bracketing = self.bracketing
172 brck_index = self.indexbracket
173 brck_limit = bracketing[brck_index][0]
174 pos = self.indexinrawtext
176 last_identifier_pos = pos
177 postdot_phase = True
179 while 1:
180 # Eat whitespaces, comments, and if postdot_phase is False - one dot
181 while 1:
182 if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars:
183 # Eat a whitespace
184 pos -= 1
185 elif not postdot_phase and \
186 pos > brck_limit and rawtext[pos-1] == '.':
187 # Eat a dot
188 pos -= 1
189 postdot_phase = True
190 # The next line will fail if we are *inside* a comment, but we
191 # shouldn't be.
192 elif pos == brck_limit and brck_index > 0 and \
193 rawtext[bracketing[brck_index-1][0]] == '#':
194 # Eat a comment
195 brck_index -= 2
196 brck_limit = bracketing[brck_index][0]
197 pos = bracketing[brck_index+1][0]
198 else:
199 # If we didn't eat anything, quit.
200 break
202 if not postdot_phase:
203 # We didn't find a dot, so the expression end at the last
204 # identifier pos.
205 break
207 ret = self._eat_identifier(rawtext, brck_limit, pos)
208 if ret:
209 # There is an identifier to eat
210 pos = pos - ret
211 last_identifier_pos = pos
212 # Now, in order to continue the search, we must find a dot.
213 postdot_phase = False
214 # (the loop continues now)
216 elif pos == brck_limit:
217 # We are at a bracketing limit. If it is a closing bracket,
218 # eat the bracket, otherwise, stop the search.
219 level = bracketing[brck_index][1]
220 while brck_index > 0 and bracketing[brck_index-1][1] > level:
221 brck_index -= 1
222 if bracketing[brck_index][0] == brck_limit:
223 # We were not at the end of a closing bracket
224 break
225 pos = bracketing[brck_index][0]
226 brck_index -= 1
227 brck_limit = bracketing[brck_index][0]
228 last_identifier_pos = pos
229 if rawtext[pos] in "([":
230 # [] and () may be used after an identifier, so we
231 # continue. postdot_phase is True, so we don't allow a dot.
232 pass
233 else:
234 # We can't continue after other types of brackets
235 break
237 else:
238 # We've found an operator or something.
239 break
241 return rawtext[last_identifier_pos:self.indexinrawtext]