2 Tests for the tokenize module.
4 >>> import glob, random, sys
6 The tests can be really simple. Given a small fragment of source
7 code, print out a table with tokens. The ENDMARK is omitted for
10 >>> dump_tokens("1 + 1")
11 NUMBER '1' (1, 0) (1, 1)
13 NUMBER '1' (1, 4) (1, 5)
15 >>> dump_tokens("if False:\\n"
17 ... " True = False # NEWLINE\\n")
18 NAME 'if' (1, 0) (1, 2)
19 NAME 'False' (1, 3) (1, 8)
21 NEWLINE '\\n' (1, 9) (1, 10)
22 COMMENT '# NL' (2, 4) (2, 8)
23 NL '\\n' (2, 8) (2, 9)
24 INDENT ' ' (3, 0) (3, 4)
25 NAME 'True' (3, 4) (3, 8)
27 NAME 'False' (3, 11) (3, 16)
28 COMMENT '# NEWLINE' (3, 17) (3, 26)
29 NEWLINE '\\n' (3, 26) (3, 27)
30 DEDENT '' (4, 0) (4, 0)
32 >>> indent_error_file = \"""
38 >>> for tok in generate_tokens(StringIO(indent_error_file).readline): pass
39 Traceback (most recent call last):
41 IndentationError: unindent does not match any outer indentation level
43 Test roundtrip for `untokenize`. `f` is an open file or a string. The source
44 code in f is tokenized, converted back to source code via tokenize.untokenize(),
45 and tokenized again from the latter. The test fails if the second tokenization
46 doesn't match the first.
49 ... if isinstance(f, str): f = StringIO(f)
50 ... token_list = list(generate_tokens(f.readline))
52 ... tokens1 = [tok[:2] for tok in token_list]
53 ... new_text = untokenize(tokens1)
54 ... readline = iter(new_text.splitlines(1)).next
55 ... tokens2 = [tok[:2] for tok in generate_tokens(readline)]
56 ... return tokens1 == tokens2
59 There are some standard formatting practices that are easy to get right.
61 >>> roundtrip("if x == 1:\\n"
65 >>> roundtrip("# This is a comment\\n# This also")
68 Some people use different formatting conventions, which makes
69 untokenize a little trickier. Note that this test involves trailing
70 whitespace after the colon. Note that we use hex escapes to make the
71 two trailing blanks apperant in the expected output.
73 >>> roundtrip("if x == 1 : \\n"
77 >>> f = test_support.findfile("tokenize_tests" + os.extsep + "txt")
78 >>> roundtrip(open(f))
81 >>> roundtrip("if x == 1:\\n"
82 ... " # A comment by itself.\\n"
83 ... " print x # Comment here, too.\\n"
84 ... " # Another comment.\\n"
85 ... "after_if = True\\n")
88 >>> roundtrip("if (x # The comments need to go in the right place\\n"
90 ... " print 'x==1'\\n")
93 >>> roundtrip("class Test: # A comment here\\n"
94 ... " # A comment with weird indent\\n"
95 ... " after_com = 5\\n"
96 ... " def x(m): return m*5 # a one liner\\n"
97 ... " def y(m): # A whitespace after the colon\\n"
98 ... " return y*4 # 3-space indent\\n")
101 Some error-handling code
103 >>> roundtrip("try: import somemodule\\n"
104 ... "except ImportError: # comment\\n"
105 ... " print 'Can not import' # comment2\\n"
106 ... "else: print 'Loaded'\\n")
109 Balancing continuation
111 >>> roundtrip("a = (3,4, \\n"
115 ... "z = {'a': 5,\\n"
116 ... "'b':15, 'c':True}\\n"
117 ... "x = len(y) + 5 - a[\\n"
119 ... "+ len(z) - z[\\n"
123 Ordinary integers and binary operators
125 >>> dump_tokens("0xff <= 255")
126 NUMBER '0xff' (1, 0) (1, 4)
127 OP '<=' (1, 5) (1, 7)
128 NUMBER '255' (1, 8) (1, 11)
129 >>> dump_tokens("0b10 <= 255")
130 NUMBER '0b10' (1, 0) (1, 4)
131 OP '<=' (1, 5) (1, 7)
132 NUMBER '255' (1, 8) (1, 11)
133 >>> dump_tokens("0o123 <= 0123")
134 NUMBER '0o123' (1, 0) (1, 5)
135 OP '<=' (1, 6) (1, 8)
136 NUMBER '0123' (1, 9) (1, 13)
137 >>> dump_tokens("01234567 > ~0x15")
138 NUMBER '01234567' (1, 0) (1, 8)
139 OP '>' (1, 9) (1, 10)
140 OP '~' (1, 11) (1, 12)
141 NUMBER '0x15' (1, 12) (1, 16)
142 >>> dump_tokens("2134568 != 01231515")
143 NUMBER '2134568' (1, 0) (1, 7)
144 OP '!=' (1, 8) (1, 10)
145 NUMBER '01231515' (1, 11) (1, 19)
146 >>> dump_tokens("(-124561-1) & 0200000000")
149 NUMBER '124561' (1, 2) (1, 8)
151 NUMBER '1' (1, 9) (1, 10)
152 OP ')' (1, 10) (1, 11)
153 OP '&' (1, 12) (1, 13)
154 NUMBER '0200000000' (1, 14) (1, 24)
155 >>> dump_tokens("0xdeadbeef != -1")
156 NUMBER '0xdeadbeef' (1, 0) (1, 10)
157 OP '!=' (1, 11) (1, 13)
158 OP '-' (1, 14) (1, 15)
159 NUMBER '1' (1, 15) (1, 16)
160 >>> dump_tokens("0xdeadc0de & 012345")
161 NUMBER '0xdeadc0de' (1, 0) (1, 10)
162 OP '&' (1, 11) (1, 12)
163 NUMBER '012345' (1, 13) (1, 19)
164 >>> dump_tokens("0xFF & 0x15 | 1234")
165 NUMBER '0xFF' (1, 0) (1, 4)
167 NUMBER '0x15' (1, 7) (1, 11)
168 OP '|' (1, 12) (1, 13)
169 NUMBER '1234' (1, 14) (1, 18)
173 >>> dump_tokens("x = 0L")
174 NAME 'x' (1, 0) (1, 1)
176 NUMBER '0L' (1, 4) (1, 6)
177 >>> dump_tokens("x = 0xfffffffffff")
178 NAME 'x' (1, 0) (1, 1)
180 NUMBER '0xffffffffff (1, 4) (1, 17)
181 >>> dump_tokens("x = 123141242151251616110l")
182 NAME 'x' (1, 0) (1, 1)
184 NUMBER '123141242151 (1, 4) (1, 26)
185 >>> dump_tokens("x = -15921590215012591L")
186 NAME 'x' (1, 0) (1, 1)
189 NUMBER '159215902150 (1, 5) (1, 23)
191 Floating point numbers
193 >>> dump_tokens("x = 3.14159")
194 NAME 'x' (1, 0) (1, 1)
196 NUMBER '3.14159' (1, 4) (1, 11)
197 >>> dump_tokens("x = 314159.")
198 NAME 'x' (1, 0) (1, 1)
200 NUMBER '314159.' (1, 4) (1, 11)
201 >>> dump_tokens("x = .314159")
202 NAME 'x' (1, 0) (1, 1)
204 NUMBER '.314159' (1, 4) (1, 11)
205 >>> dump_tokens("x = 3e14159")
206 NAME 'x' (1, 0) (1, 1)
208 NUMBER '3e14159' (1, 4) (1, 11)
209 >>> dump_tokens("x = 3E123")
210 NAME 'x' (1, 0) (1, 1)
212 NUMBER '3E123' (1, 4) (1, 9)
213 >>> dump_tokens("x+y = 3e-1230")
214 NAME 'x' (1, 0) (1, 1)
216 NAME 'y' (1, 2) (1, 3)
218 NUMBER '3e-1230' (1, 6) (1, 13)
219 >>> dump_tokens("x = 3.14e159")
220 NAME 'x' (1, 0) (1, 1)
222 NUMBER '3.14e159' (1, 4) (1, 12)
226 >>> dump_tokens("x = ''; y = \\\"\\\"")
227 NAME 'x' (1, 0) (1, 1)
229 STRING "''" (1, 4) (1, 6)
231 NAME 'y' (1, 8) (1, 9)
232 OP '=' (1, 10) (1, 11)
233 STRING '""' (1, 12) (1, 14)
234 >>> dump_tokens("x = '\\\"'; y = \\\"'\\\"")
235 NAME 'x' (1, 0) (1, 1)
237 STRING '\\'"\\'' (1, 4) (1, 7)
239 NAME 'y' (1, 9) (1, 10)
240 OP '=' (1, 11) (1, 12)
241 STRING '"\\'"' (1, 13) (1, 16)
242 >>> dump_tokens("x = \\\"doesn't \\\"shrink\\\", does it\\\"")
243 NAME 'x' (1, 0) (1, 1)
245 STRING '"doesn\\'t "' (1, 4) (1, 14)
246 NAME 'shrink' (1, 14) (1, 20)
247 STRING '", does it"' (1, 20) (1, 31)
248 >>> dump_tokens("x = u'abc' + U'ABC'")
249 NAME 'x' (1, 0) (1, 1)
251 STRING "u'abc'" (1, 4) (1, 10)
252 OP '+' (1, 11) (1, 12)
253 STRING "U'ABC'" (1, 13) (1, 19)
254 >>> dump_tokens('y = u"ABC" + U"ABC"')
255 NAME 'y' (1, 0) (1, 1)
257 STRING 'u"ABC"' (1, 4) (1, 10)
258 OP '+' (1, 11) (1, 12)
259 STRING 'U"ABC"' (1, 13) (1, 19)
260 >>> dump_tokens("x = ur'abc' + Ur'ABC' + uR'ABC' + UR'ABC'")
261 NAME 'x' (1, 0) (1, 1)
263 STRING "ur'abc'" (1, 4) (1, 11)
264 OP '+' (1, 12) (1, 13)
265 STRING "Ur'ABC'" (1, 14) (1, 21)
266 OP '+' (1, 22) (1, 23)
267 STRING "uR'ABC'" (1, 24) (1, 31)
268 OP '+' (1, 32) (1, 33)
269 STRING "UR'ABC'" (1, 34) (1, 41)
270 >>> dump_tokens('y = ur"abc" + Ur"ABC" + uR"ABC" + UR"ABC"')
271 NAME 'y' (1, 0) (1, 1)
273 STRING 'ur"abc"' (1, 4) (1, 11)
274 OP '+' (1, 12) (1, 13)
275 STRING 'Ur"ABC"' (1, 14) (1, 21)
276 OP '+' (1, 22) (1, 23)
277 STRING 'uR"ABC"' (1, 24) (1, 31)
278 OP '+' (1, 32) (1, 33)
279 STRING 'UR"ABC"' (1, 34) (1, 41)
283 >>> dump_tokens("def d22(a, b, c=2, d=2, *k): pass")
284 NAME 'def' (1, 0) (1, 3)
285 NAME 'd22' (1, 4) (1, 7)
287 NAME 'a' (1, 8) (1, 9)
288 OP ',' (1, 9) (1, 10)
289 NAME 'b' (1, 11) (1, 12)
290 OP ',' (1, 12) (1, 13)
291 NAME 'c' (1, 14) (1, 15)
292 OP '=' (1, 15) (1, 16)
293 NUMBER '2' (1, 16) (1, 17)
294 OP ',' (1, 17) (1, 18)
295 NAME 'd' (1, 19) (1, 20)
296 OP '=' (1, 20) (1, 21)
297 NUMBER '2' (1, 21) (1, 22)
298 OP ',' (1, 22) (1, 23)
299 OP '*' (1, 24) (1, 25)
300 NAME 'k' (1, 25) (1, 26)
301 OP ')' (1, 26) (1, 27)
302 OP ':' (1, 27) (1, 28)
303 NAME 'pass' (1, 29) (1, 33)
304 >>> dump_tokens("def d01v_(a=1, *k, **w): pass")
305 NAME 'def' (1, 0) (1, 3)
306 NAME 'd01v_' (1, 4) (1, 9)
307 OP '(' (1, 9) (1, 10)
308 NAME 'a' (1, 10) (1, 11)
309 OP '=' (1, 11) (1, 12)
310 NUMBER '1' (1, 12) (1, 13)
311 OP ',' (1, 13) (1, 14)
312 OP '*' (1, 15) (1, 16)
313 NAME 'k' (1, 16) (1, 17)
314 OP ',' (1, 17) (1, 18)
315 OP '**' (1, 19) (1, 21)
316 NAME 'w' (1, 21) (1, 22)
317 OP ')' (1, 22) (1, 23)
318 OP ':' (1, 23) (1, 24)
319 NAME 'pass' (1, 25) (1, 29)
323 >>> dump_tokens("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " +
324 ... "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass")
325 NAME 'if' (1, 0) (1, 2)
326 NUMBER '1' (1, 3) (1, 4)
328 NUMBER '1' (1, 7) (1, 8)
329 OP '>' (1, 9) (1, 10)
330 NUMBER '1' (1, 11) (1, 12)
331 OP '==' (1, 13) (1, 15)
332 NUMBER '1' (1, 16) (1, 17)
333 OP '>=' (1, 18) (1, 20)
334 NUMBER '5' (1, 21) (1, 22)
335 OP '<=' (1, 23) (1, 25)
336 NUMBER '0x15' (1, 26) (1, 30)
337 OP '<=' (1, 31) (1, 33)
338 NUMBER '0x12' (1, 34) (1, 38)
339 OP '!=' (1, 39) (1, 41)
340 NUMBER '1' (1, 42) (1, 43)
341 NAME 'and' (1, 44) (1, 47)
342 NUMBER '5' (1, 48) (1, 49)
343 NAME 'in' (1, 50) (1, 52)
344 NUMBER '1' (1, 53) (1, 54)
345 NAME 'not' (1, 55) (1, 58)
346 NAME 'in' (1, 59) (1, 61)
347 NUMBER '1' (1, 62) (1, 63)
348 NAME 'is' (1, 64) (1, 66)
349 NUMBER '1' (1, 67) (1, 68)
350 NAME 'or' (1, 69) (1, 71)
351 NUMBER '5' (1, 72) (1, 73)
352 NAME 'is' (1, 74) (1, 76)
353 NAME 'not' (1, 77) (1, 80)
354 NUMBER '1' (1, 81) (1, 82)
355 OP ':' (1, 82) (1, 83)
356 NAME 'pass' (1, 84) (1, 88)
360 >>> dump_tokens("x = 1 << 1 >> 5")
361 NAME 'x' (1, 0) (1, 1)
363 NUMBER '1' (1, 4) (1, 5)
364 OP '<<' (1, 6) (1, 8)
365 NUMBER '1' (1, 9) (1, 10)
366 OP '>>' (1, 11) (1, 13)
367 NUMBER '5' (1, 14) (1, 15)
371 >>> dump_tokens("x = 1 - y + 15 - 01 + 0x124 + z + a[5]")
372 NAME 'x' (1, 0) (1, 1)
374 NUMBER '1' (1, 4) (1, 5)
376 NAME 'y' (1, 8) (1, 9)
377 OP '+' (1, 10) (1, 11)
378 NUMBER '15' (1, 12) (1, 14)
379 OP '-' (1, 15) (1, 16)
380 NUMBER '01' (1, 17) (1, 19)
381 OP '+' (1, 20) (1, 21)
382 NUMBER '0x124' (1, 22) (1, 27)
383 OP '+' (1, 28) (1, 29)
384 NAME 'z' (1, 30) (1, 31)
385 OP '+' (1, 32) (1, 33)
386 NAME 'a' (1, 34) (1, 35)
387 OP '[' (1, 35) (1, 36)
388 NUMBER '5' (1, 36) (1, 37)
389 OP ']' (1, 37) (1, 38)
393 >>> dump_tokens("x = 1//1*1/5*12%0x12")
394 NAME 'x' (1, 0) (1, 1)
396 NUMBER '1' (1, 4) (1, 5)
397 OP '//' (1, 5) (1, 7)
398 NUMBER '1' (1, 7) (1, 8)
400 NUMBER '1' (1, 9) (1, 10)
401 OP '/' (1, 10) (1, 11)
402 NUMBER '5' (1, 11) (1, 12)
403 OP '*' (1, 12) (1, 13)
404 NUMBER '12' (1, 13) (1, 15)
405 OP '%' (1, 15) (1, 16)
406 NUMBER '0x12' (1, 16) (1, 20)
410 >>> dump_tokens("~1 ^ 1 & 1 |1 ^ -1")
412 NUMBER '1' (1, 1) (1, 2)
414 NUMBER '1' (1, 5) (1, 6)
416 NUMBER '1' (1, 9) (1, 10)
417 OP '|' (1, 11) (1, 12)
418 NUMBER '1' (1, 12) (1, 13)
419 OP '^' (1, 14) (1, 15)
420 OP '-' (1, 16) (1, 17)
421 NUMBER '1' (1, 17) (1, 18)
422 >>> dump_tokens("-1*1/1+1*1//1 - ---1**1")
424 NUMBER '1' (1, 1) (1, 2)
426 NUMBER '1' (1, 3) (1, 4)
428 NUMBER '1' (1, 5) (1, 6)
430 NUMBER '1' (1, 7) (1, 8)
432 NUMBER '1' (1, 9) (1, 10)
433 OP '//' (1, 10) (1, 12)
434 NUMBER '1' (1, 12) (1, 13)
435 OP '-' (1, 14) (1, 15)
436 OP '-' (1, 16) (1, 17)
437 OP '-' (1, 17) (1, 18)
438 OP '-' (1, 18) (1, 19)
439 NUMBER '1' (1, 19) (1, 20)
440 OP '**' (1, 20) (1, 22)
441 NUMBER '1' (1, 22) (1, 23)
445 >>> dump_tokens("import sys, time\\nx = sys.modules['time'].time()")
446 NAME 'import' (1, 0) (1, 6)
447 NAME 'sys' (1, 7) (1, 10)
448 OP ',' (1, 10) (1, 11)
449 NAME 'time' (1, 12) (1, 16)
450 NEWLINE '\\n' (1, 16) (1, 17)
451 NAME 'x' (2, 0) (2, 1)
453 NAME 'sys' (2, 4) (2, 7)
455 NAME 'modules' (2, 8) (2, 15)
456 OP '[' (2, 15) (2, 16)
457 STRING "'time'" (2, 16) (2, 22)
458 OP ']' (2, 22) (2, 23)
459 OP '.' (2, 23) (2, 24)
460 NAME 'time' (2, 24) (2, 28)
461 OP '(' (2, 28) (2, 29)
462 OP ')' (2, 29) (2, 30)
466 >>> dump_tokens("@staticmethod\\ndef foo(x,y): pass")
468 NAME 'staticmethod (1, 1) (1, 13)
469 NEWLINE '\\n' (1, 13) (1, 14)
470 NAME 'def' (2, 0) (2, 3)
471 NAME 'foo' (2, 4) (2, 7)
473 NAME 'x' (2, 8) (2, 9)
474 OP ',' (2, 9) (2, 10)
475 NAME 'y' (2, 10) (2, 11)
476 OP ')' (2, 11) (2, 12)
477 OP ':' (2, 12) (2, 13)
478 NAME 'pass' (2, 14) (2, 18)
480 Backslash means line continuation, except for comments
482 >>> roundtrip("x=1+\\\\n"
484 ... "# This is a comment\\\\n"
485 ... "# This also\\n")
487 >>> roundtrip("# Comment \\\\nx = 0")
490 Two string literals on the same line
492 >>> roundtrip("'' ''")
495 Test roundtrip on random python modules.
496 pass the '-ucompiler' option to process the full directory.
499 >>> tempdir = os.path.dirname(f) or os.curdir
500 >>> testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
502 >>> if not test_support.is_resource_enabled("compiler"):
503 ... testfiles = random.sample(testfiles, 10)
505 >>> for testfile in testfiles:
506 ... if not roundtrip(open(testfile)):
507 ... print "Roundtrip failed for file %s" % testfile
513 >>> dump_tokens("def f():\\n\\tif x\\n \\tpass")
514 NAME 'def' (1, 0) (1, 3)
515 NAME 'f' (1, 4) (1, 5)
519 NEWLINE '\\n' (1, 8) (1, 9)
520 INDENT '\\t' (2, 0) (2, 1)
521 NAME 'if' (2, 1) (2, 3)
522 NAME 'x' (2, 4) (2, 5)
523 NEWLINE '\\n' (2, 5) (2, 6)
524 INDENT ' \\t' (3, 0) (3, 9)
525 NAME 'pass' (3, 9) (3, 13)
526 DEDENT '' (4, 0) (4, 0)
527 DEDENT '' (4, 0) (4, 0)
531 from test
import test_support
532 from tokenize
import (untokenize
, generate_tokens
, NUMBER
, NAME
, OP
,
533 STRING
, ENDMARKER
, tok_name
)
534 from StringIO
import StringIO
538 """Print out the tokens in s in a table format.
540 The ENDMARKER is omitted.
543 for type, token
, start
, end
, line
in generate_tokens(f
.readline
):
544 if type == ENDMARKER
:
546 type = tok_name
[type]
547 print("%(type)-10.10s %(token)-13.13r %(start)s %(end)s" % locals())
549 # This is an example from the docs, set up as a doctest.
551 """Substitute Decimals for floats in a string of statements.
553 >>> from decimal import Decimal
554 >>> s = 'print +21.3e-5*-.1234/81.7'
556 "print +Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')"
558 The format of the exponent is inherited from the platform C library.
559 Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
560 we're only showing 12 digits, and the 13th isn't close to 5, the
561 rest of the output should be platform-independent.
563 >>> exec(s) #doctest: +ELLIPSIS
564 -3.21716034272e-0...7
566 Output from calculations with Decimal should be identical across all
569 >>> exec(decistmt(s))
570 -3.217160342717258261933904529E-7
574 g
= generate_tokens(StringIO(s
).readline
) # tokenize the string
575 for toknum
, tokval
, _
, _
, _
in g
:
576 if toknum
== NUMBER
and '.' in tokval
: # replace NUMBER tokens
580 (STRING
, repr(tokval
)),
584 result
.append((toknum
, tokval
))
585 return untokenize(result
)
588 __test__
= {"doctests" : doctests
, 'decistmt': decistmt
}
592 from test
import test_tokenize
593 test_support
.run_doctest(test_tokenize
, True)
595 if __name__
== "__main__":