1 # -*- coding: iso-8859-1 -*-
2 """A lexical analyzer class for simple shell-like syntaxes."""
4 # Module and documentation by Eric S. Raymond, 21 Dec 1998
5 # Input stacking and error message cleanup added by ESR, March 2000
6 # push_source() and pop_source() made explicit by ESR, January 2001.
7 # Posix compliance, split(), string arguments, and
8 # iterator interface by Gustavo Niemeyer, April 2003.
12 from collections
import deque
15 from cStringIO
import StringIO
17 from StringIO
import StringIO
19 __all__
= ["shlex", "split"]
22 "A lexical analyzer class for simple shell-like syntaxes."
23 def __init__(self
, instream
=None, infile
=None, posix
=False):
24 if isinstance(instream
, basestring
):
25 instream
= StringIO(instream
)
26 if instream
is not None:
27 self
.instream
= instream
30 self
.instream
= sys
.stdin
38 self
.wordchars
= ('abcdfeghijklmnopqrstuvwxyz'
39 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
41 self
.wordchars
+= ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
42 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')
43 self
.whitespace
= ' \t\r\n'
44 self
.whitespace_split
= False
47 self
.escapedquotes
= '"'
49 self
.pushback
= deque()
53 self
.filestack
= deque()
56 print 'shlex: reading from %s, line %d' \
57 % (self
.instream
, self
.lineno
)
59 def push_token(self
, tok
):
60 "Push a token onto the stack popped by the get_token method"
62 print "shlex: pushing token " + repr(tok
)
63 self
.pushback
.appendleft(tok
)
65 def push_source(self
, newstream
, newfile
=None):
66 "Push an input source onto the lexer's input source stack."
67 if isinstance(newstream
, basestring
):
68 newstream
= StringIO(newstream
)
69 self
.filestack
.appendleft((self
.infile
, self
.instream
, self
.lineno
))
71 self
.instream
= newstream
74 if newfile
is not None:
75 print 'shlex: pushing to file %s' % (self
.infile
,)
77 print 'shlex: pushing to stream %s' % (self
.instream
,)
80 "Pop the input source stack."
82 (self
.infile
, self
.instream
, self
.lineno
) = self
.filestack
.popleft()
84 print 'shlex: popping to %s, line %d' \
85 % (self
.instream
, self
.lineno
)
89 "Get a token from the input stream (or from stack if it's nonempty)"
91 tok
= self
.pushback
.popleft()
93 print "shlex: popping token " + repr(tok
)
95 # No pushback. Get a token.
96 raw
= self
.read_token()
98 if self
.source
is not None:
99 while raw
== self
.source
:
100 spec
= self
.sourcehook(self
.read_token())
102 (newfile
, newstream
) = spec
103 self
.push_source(newstream
, newfile
)
104 raw
= self
.get_token()
105 # Maybe we got EOF instead?
106 while raw
== self
.eof
:
107 if not self
.filestack
:
111 raw
= self
.get_token()
112 # Neither inclusion nor EOF
115 print "shlex: token=" + repr(raw
)
117 print "shlex: token=EOF"
120 def read_token(self
):
124 nextchar
= self
.instream
.read(1)
126 self
.lineno
= self
.lineno
+ 1
128 print "shlex: in state", repr(self
.state
), \
129 "I see character:", repr(nextchar
)
130 if self
.state
is None:
131 self
.token
= '' # past end of file
133 elif self
.state
== ' ':
135 self
.state
= None # end of file
137 elif nextchar
in self
.whitespace
:
139 print "shlex: I see whitespace in whitespace state"
140 if self
.token
or (self
.posix
and quoted
):
141 break # emit current token
144 elif nextchar
in self
.commenters
:
145 self
.instream
.readline()
146 self
.lineno
= self
.lineno
+ 1
147 elif self
.posix
and nextchar
in self
.escape
:
149 self
.state
= nextchar
150 elif nextchar
in self
.wordchars
:
151 self
.token
= nextchar
153 elif nextchar
in self
.quotes
:
155 self
.token
= nextchar
156 self
.state
= nextchar
157 elif self
.whitespace_split
:
158 self
.token
= nextchar
161 self
.token
= nextchar
162 if self
.token
or (self
.posix
and quoted
):
163 break # emit current token
166 elif self
.state
in self
.quotes
:
168 if not nextchar
: # end of file
170 print "shlex: I see EOF in quotes state"
171 # XXX what error should be raised here?
172 raise ValueError, "No closing quotation"
173 if nextchar
== self
.state
:
175 self
.token
= self
.token
+ nextchar
180 elif self
.posix
and nextchar
in self
.escape
and \
181 self
.state
in self
.escapedquotes
:
182 escapedstate
= self
.state
183 self
.state
= nextchar
185 self
.token
= self
.token
+ nextchar
186 elif self
.state
in self
.escape
:
187 if not nextchar
: # end of file
189 print "shlex: I see EOF in escape state"
190 # XXX what error should be raised here?
191 raise ValueError, "No escaped character"
192 # In posix shells, only the quote itself or the escape
193 # character may be escaped within quotes.
194 if escapedstate
in self
.quotes
and \
195 nextchar
!= self
.state
and nextchar
!= escapedstate
:
196 self
.token
= self
.token
+ self
.state
197 self
.token
= self
.token
+ nextchar
198 self
.state
= escapedstate
199 elif self
.state
== 'a':
201 self
.state
= None # end of file
203 elif nextchar
in self
.whitespace
:
205 print "shlex: I see whitespace in word state"
207 if self
.token
or (self
.posix
and quoted
):
208 break # emit current token
211 elif nextchar
in self
.commenters
:
212 self
.instream
.readline()
213 self
.lineno
= self
.lineno
+ 1
216 if self
.token
or (self
.posix
and quoted
):
217 break # emit current token
220 elif self
.posix
and nextchar
in self
.quotes
:
221 self
.state
= nextchar
222 elif self
.posix
and nextchar
in self
.escape
:
224 self
.state
= nextchar
225 elif nextchar
in self
.wordchars
or nextchar
in self
.quotes \
226 or self
.whitespace_split
:
227 self
.token
= self
.token
+ nextchar
229 self
.pushback
.appendleft(nextchar
)
231 print "shlex: I see punctuation in word state"
234 break # emit current token
239 if self
.posix
and not quoted
and result
== '':
243 print "shlex: raw token=" + repr(result
)
245 print "shlex: raw token=EOF"
248 def sourcehook(self
, newfile
):
249 "Hook called on a filename to be sourced."
250 if newfile
[0] == '"':
251 newfile
= newfile
[1:-1]
252 # This implements cpp-like semantics for relative-path inclusion.
253 if isinstance(self
.infile
, basestring
) and not os
.path
.isabs(newfile
):
254 newfile
= os
.path
.join(os
.path
.dirname(self
.infile
), newfile
)
255 return (newfile
, open(newfile
, "r"))
257 def error_leader(self
, infile
=None, lineno
=None):
258 "Emit a C-compiler-like, Emacs-friendly error-message leader."
263 return "\"%s\", line %d: " % (infile
, lineno
)
269 token
= self
.get_token()
270 if token
== self
.eof
:
274 def split(s
, comments
=False, posix
=True):
275 lex
= shlex(s
, posix
=posix
)
276 lex
.whitespace_split
= True
281 if __name__
== '__main__':
282 if len(sys
.argv
) == 1:
286 lexer
= shlex(open(file), file)
288 tt
= lexer
.get_token()
290 print "Token: " + repr(tt
)