1 # -*- coding: iso-8859-1 -*-
2 """A lexical analyzer class for simple shell-like syntaxes."""
4 # Module and documentation by Eric S. Raymond, 21 Dec 1998
5 # Input stacking and error message cleanup added by ESR, March 2000
6 # push_source() and pop_source() made explicit by ESR, January 2001.
7 # Posix compliance, split(), string arguments, and
8 # iterator interface by Gustavo Niemeyer, April 2003.
12 from collections
import deque
14 from io
import StringIO
16 __all__
= ["shlex", "split"]
19 "A lexical analyzer class for simple shell-like syntaxes."
20 def __init__(self
, instream
=None, infile
=None, posix
=False):
21 if isinstance(instream
, str):
22 instream
= StringIO(instream
)
23 if instream
is not None:
24 self
.instream
= instream
27 self
.instream
= sys
.stdin
35 self
.wordchars
= ('abcdfeghijklmnopqrstuvwxyz'
36 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
38 self
.wordchars
+= ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
39 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')
40 self
.whitespace
= ' \t\r\n'
41 self
.whitespace_split
= False
44 self
.escapedquotes
= '"'
46 self
.pushback
= deque()
50 self
.filestack
= deque()
53 print('shlex: reading from %s, line %d' \
54 % (self
.instream
, self
.lineno
))
56 def push_token(self
, tok
):
57 "Push a token onto the stack popped by the get_token method"
59 print("shlex: pushing token " + repr(tok
))
60 self
.pushback
.appendleft(tok
)
62 def push_source(self
, newstream
, newfile
=None):
63 "Push an input source onto the lexer's input source stack."
64 if isinstance(newstream
, str):
65 newstream
= StringIO(newstream
)
66 self
.filestack
.appendleft((self
.infile
, self
.instream
, self
.lineno
))
68 self
.instream
= newstream
71 if newfile
is not None:
72 print('shlex: pushing to file %s' % (self
.infile
,))
74 print('shlex: pushing to stream %s' % (self
.instream
,))
77 "Pop the input source stack."
79 (self
.infile
, self
.instream
, self
.lineno
) = self
.filestack
.popleft()
81 print('shlex: popping to %s, line %d' \
82 % (self
.instream
, self
.lineno
))
86 "Get a token from the input stream (or from stack if it's nonempty)"
88 tok
= self
.pushback
.popleft()
90 print("shlex: popping token " + repr(tok
))
92 # No pushback. Get a token.
93 raw
= self
.read_token()
95 if self
.source
is not None:
96 while raw
== self
.source
:
97 spec
= self
.sourcehook(self
.read_token())
99 (newfile
, newstream
) = spec
100 self
.push_source(newstream
, newfile
)
101 raw
= self
.get_token()
102 # Maybe we got EOF instead?
103 while raw
== self
.eof
:
104 if not self
.filestack
:
108 raw
= self
.get_token()
109 # Neither inclusion nor EOF
112 print("shlex: token=" + repr(raw
))
114 print("shlex: token=EOF")
117 def read_token(self
):
121 nextchar
= self
.instream
.read(1)
123 self
.lineno
= self
.lineno
+ 1
125 print("shlex: in state", repr(self
.state
), \
126 "I see character:", repr(nextchar
))
127 if self
.state
is None:
128 self
.token
= '' # past end of file
130 elif self
.state
== ' ':
132 self
.state
= None # end of file
134 elif nextchar
in self
.whitespace
:
136 print("shlex: I see whitespace in whitespace state")
137 if self
.token
or (self
.posix
and quoted
):
138 break # emit current token
141 elif nextchar
in self
.commenters
:
142 self
.instream
.readline()
143 self
.lineno
= self
.lineno
+ 1
144 elif self
.posix
and nextchar
in self
.escape
:
146 self
.state
= nextchar
147 elif nextchar
in self
.wordchars
:
148 self
.token
= nextchar
150 elif nextchar
in self
.quotes
:
152 self
.token
= nextchar
153 self
.state
= nextchar
154 elif self
.whitespace_split
:
155 self
.token
= nextchar
158 self
.token
= nextchar
159 if self
.token
or (self
.posix
and quoted
):
160 break # emit current token
163 elif self
.state
in self
.quotes
:
165 if not nextchar
: # end of file
167 print("shlex: I see EOF in quotes state")
168 # XXX what error should be raised here?
169 raise ValueError("No closing quotation")
170 if nextchar
== self
.state
:
172 self
.token
= self
.token
+ nextchar
177 elif self
.posix
and nextchar
in self
.escape
and \
178 self
.state
in self
.escapedquotes
:
179 escapedstate
= self
.state
180 self
.state
= nextchar
182 self
.token
= self
.token
+ nextchar
183 elif self
.state
in self
.escape
:
184 if not nextchar
: # end of file
186 print("shlex: I see EOF in escape state")
187 # XXX what error should be raised here?
188 raise ValueError("No escaped character")
189 # In posix shells, only the quote itself or the escape
190 # character may be escaped within quotes.
191 if escapedstate
in self
.quotes
and \
192 nextchar
!= self
.state
and nextchar
!= escapedstate
:
193 self
.token
= self
.token
+ self
.state
194 self
.token
= self
.token
+ nextchar
195 self
.state
= escapedstate
196 elif self
.state
== 'a':
198 self
.state
= None # end of file
200 elif nextchar
in self
.whitespace
:
202 print("shlex: I see whitespace in word state")
204 if self
.token
or (self
.posix
and quoted
):
205 break # emit current token
208 elif nextchar
in self
.commenters
:
209 self
.instream
.readline()
210 self
.lineno
= self
.lineno
+ 1
213 if self
.token
or (self
.posix
and quoted
):
214 break # emit current token
217 elif self
.posix
and nextchar
in self
.quotes
:
218 self
.state
= nextchar
219 elif self
.posix
and nextchar
in self
.escape
:
221 self
.state
= nextchar
222 elif nextchar
in self
.wordchars
or nextchar
in self
.quotes \
223 or self
.whitespace_split
:
224 self
.token
= self
.token
+ nextchar
226 self
.pushback
.appendleft(nextchar
)
228 print("shlex: I see punctuation in word state")
231 break # emit current token
236 if self
.posix
and not quoted
and result
== '':
240 print("shlex: raw token=" + repr(result
))
242 print("shlex: raw token=EOF")
245 def sourcehook(self
, newfile
):
246 "Hook called on a filename to be sourced."
247 if newfile
[0] == '"':
248 newfile
= newfile
[1:-1]
249 # This implements cpp-like semantics for relative-path inclusion.
250 if isinstance(self
.infile
, str) and not os
.path
.isabs(newfile
):
251 newfile
= os
.path
.join(os
.path
.dirname(self
.infile
), newfile
)
252 return (newfile
, open(newfile
, "r"))
254 def error_leader(self
, infile
=None, lineno
=None):
255 "Emit a C-compiler-like, Emacs-friendly error-message leader."
260 return "\"%s\", line %d: " % (infile
, lineno
)
266 token
= self
.get_token()
267 if token
== self
.eof
:
271 def split(s
, comments
=False, posix
=True):
272 lex
= shlex(s
, posix
=posix
)
273 lex
.whitespace_split
= True
278 if __name__
== '__main__':
279 if len(sys
.argv
) == 1:
283 lexer
= shlex(open(file), file)
285 tt
= lexer
.get_token()
287 print("Token: " + repr(tt
))