t/compilers/pge/perl6regex/01-regex.t

   1 #!./parrot
   2 # Copyright (C) 2001-2008, The Perl Foundation.
   3 # $Id$
   4
   5 =head1 NAME
   6
   7 t/compilers/pge/perl6regex/01-regex.t  -- Basic Perl6Regex tests
   8
   9 =head1 DESCRIPTION
  10
  11 These tests are based on L<http://dev.perl.org/perl6/doc/design/syn/S05.html>.
  12 Individual tests are stored in the C<rx_*> files in the same directory;
  13 There is one test per line: each test consists of the following
  14 columns (separated by one *or more* tabs):
  15
  16 =over 4
  17
  18 =item pattern
  19
  20 The Perl 6 regex to test.
  21
  22 =item target
  23
  24 The string that will be matched against the pattern. Use '' to indicate
  25 an empty string.
  26
  27 =item result
  28
  29 The expected result of the match. Either C<y> for a successful match, C<n>
  30 for a failed one. Otherwise the output is expected to begin and end with
  31 C</>.
  32
  33 This result is used in one of two ways: If an exception is thrown by the
  34 match, the result must be contained in the exception's message. If the match
  35 succeeds, then the message must be contained in a dump of the match object.
  36
  37 =item test id
  38
  39 A unique test identifier. This allows us to track TODO/SKIP information in
  40 *this* file instead of the associated test file, which lets us easily
  41 share the tests across implementations of perl6's regex engine.
  42
  43 =item description
  44
  45 Description of the test.
  46
  47 =back
  48
  49 =head1 SYNOPSIS
  50
  51     % prove t/compilers/pge/01-regex.t
  52
  53 =cut
  54
  55 .const string TESTS = 'no_plan'
  56
  57 .sub main :main
  58     load_bytecode 'Test/Builder.pir'
  59     load_bytecode 'PGE.pbc'
  60     load_bytecode 'PGE/Dumper.pbc'
  61     load_bytecode 'String/Utils.pbc'
  62     .include "iglobals.pasm"
  63
  64     # Variable declarations, initializations
  65     .local pmc test       # the test harness object.
  66                test = new [ 'Test'; 'Builder' ]
  67
  68     .local string test_dir # the directory containing tests
  69                   test_dir = 't/compilers/pge/perl6regex/'
  70
  71     .local pmc test_files # values are test file names to run.
  72                test_files = new 'ResizablePMCArray'
  73
  74     # populate the list of test files
  75     push test_files, 'rx_metachars'
  76     push test_files, 'rx_quantifiers'
  77     push test_files, 'rx_backtrack'
  78     push test_files, 'rx_charclass'
  79     push test_files, 'rx_subrules'
  80     push test_files, 'rx_lookarounds'
  81     push test_files, 'rx_captures'
  82     push test_files, 'rx_modifiers'
  83     push test_files, 'rx_syntax'
  84
  85     .local pmc interp     # a handle to our interpreter object.
  86                interp = getinterp
  87
  88     .local pmc config
  89                config = interp[.IGLOBALS_CONFIG_HASH]
  90
  91     .local int has_icu    # flag indicating presense of icu
  92                has_icu = config['has_icu']
  93
  94     .local pmc file_iterator # iterate over list of files..
  95                file_iterator = new 'Iterator', test_files
  96
  97     .local int test_number   # the number of the test we're running
  98                test_number = 0
  99
 100     # these vars are in the loops below
 101     .local pmc file_handle   # currently open file.
 102     .local string test_file  # name of the current test file
 103     .local string test_line  # one line of one test file, a single test
 104     .local int ok            # is this a passing test?
 105
 106     # for any given test:
 107     .local int skiptest       # if the test is '# skip'
 108     .local int todotest       # if the test is '# todo'
 109     .local string reason      # reason for todo/skip
 110     .local pmc rule           # the rule
 111     .local pmc match          # the match
 112     .local string pattern     # the regexp
 113     .local string target      # this string to match against the regex
 114     .local string result      # expected result of this test. (y/n/...)
 115     .local string description # user-facing description of the test.
 116
 117     # how many tests to run?
 118     # XXX: this should be summed automatically from test_files data
 119     #      until then, it's set to no plan
 120     test.'plan'(TESTS)
 121
 122
 123   outer_loop:
 124     unless file_iterator goto end_outer_loop
 125     .local string test_name
 126                   test_name = shift file_iterator
 127     # local test number in test file
 128     .local int local_test_number
 129                local_test_number = 0
 130
 131     # local line number in test file
 132     .local int local_line_number
 133                local_line_number = 0
 134
 135     # append the test directory and filename
 136     test_file = test_dir . test_name
 137
 138     # Open the test file
 139     file_handle = open test_file, '<'
 140     $S0 = typeof file_handle
 141     if $S0 == 'Undef' goto bad_file
 142
 143   next_test:
 144     skiptest = 0
 145     todotest = 0
 146     trace 0
 147
 148     # loop over the file, one at a time.
 149   loop:
 150     # read in the file one line at a time...
 151     $I0 = file_handle.'eof'()
 152     if $I0 goto end_loop
 153
 154     test_line = readline file_handle
 155     inc local_line_number
 156
 157     # if this line is not a comment, try a test
 158     $S0 = substr test_line, 0, 1
 159     if $S0 != '#' goto parse_test
 160
 161   parse_comment:
 162     # line is a comment, if no :pge<...> then skip comment
 163     $I0 = index test_line, ':pge<'
 164     if $I0 == -1 goto loop
 165
 166     # extract out the reason for skip or todo
 167     $I0 += 5
 168     $I1 = index test_line, '>', $I0
 169     $I1 -= $I0
 170     reason = substr test_line, $I0, $I1
 171
 172     # determine skip/todo
 173     $P0 = split ' ', test_line
 174     $S0 = $P0[1]
 175     if $S0 == 'skip' goto comment_skip
 176     if $S0 == 'todo' goto comment_todo
 177     if $S0 == 'trace' goto comment_trace
 178     goto loop
 179   comment_skip:
 180     skiptest = 1
 181     goto loop
 182   comment_todo:
 183     todotest = 1
 184     goto loop
 185   comment_trace:
 186     $I0 = reason
 187     trace $I0
 188     goto loop
 189
 190   parse_test:
 191     # skip lines without tabs
 192     $I0 = index test_line, "\t"
 193     if $I0 == -1 goto loop
 194     inc test_number
 195     inc local_test_number
 196
 197   parse_data:
 198     push_eh eh_bad_line
 199     ( pattern, target, result, description ) = parse_data( test_line )
 200     pop_eh
 201
 202     # prepend test filename and line number to description
 203     description = 'build_test_desc'( description, test_name, local_line_number )
 204
 205     if target != "''" goto got_target
 206     target = ''
 207
 208   got_target:
 209     target = 'backslash_escape'( target )
 210     result = 'backslash_escape'( result )
 211
 212     # Should this test be skipped?
 213     unless skiptest goto not_skip
 214     test.'skip'(1, reason)
 215     goto next_test
 216
 217   not_skip:
 218     push_eh thrown
 219     match = 'match_perl6regex'( pattern, target )
 220     pop_eh
 221
 222     if match goto matched
 223
 224     if result == 'n' goto is_ok
 225     if result == 'y' goto is_nok
 226     goto check_dump
 227
 228   matched:
 229     if result == 'y' goto is_ok
 230     if result == 'n' goto is_nok
 231     # goto check_dump
 232
 233   check_dump:
 234     $S1 = match.'dump_str'('mob', ' ', '')
 235
 236     # remove /'s
 237     $S0 = substr result, 0, 1
 238     if $S0 != "/" goto bad_line
 239     substr result, 0, 1, ''
 240     substr result, -1, 1, ''
 241
 242     $I0 = index $S1, result
 243     if $I0 == -1 goto is_nok
 244     # goto is_ok
 245
 246   is_ok:
 247     ok = 1
 248     goto emit_test
 249   is_nok:
 250     ok = 0
 251
 252   emit_test:
 253     unless todotest goto not_todo
 254     test.'todo'(ok, description, reason)
 255     goto next_test
 256   not_todo:
 257     test.'ok'(ok, description)
 258     goto next_test
 259
 260   end_loop:
 261     close file_handle
 262     goto outer_loop
 263   end_outer_loop:
 264
 265     test.'finish'()
 266     end
 267
 268   bad_file:
 269     print "Unable to open '"
 270     print test_file
 271     print "'\n"
 272
 273   thrown:
 274     .local pmc exception
 275     .local string message
 276     get_results '0,0', exception, message
 277     say message
 278     # remove /'s
 279     $S0 = substr result, 0, 1
 280     if $S0 != "/" goto bad_error
 281     substr result, 0, 1, ''
 282     substr result, -1, 1, ''
 283     $I0 = index message, result
 284     if $I0 == -1 goto bad_error
 285     ok = 1
 286     goto emit_test
 287   bad_error:
 288     ok = 0
 289     goto emit_test
 290   bad_line:
 291     $S0 = "Test not formatted properly!"
 292     test.'ok'(0, $S0)
 293     goto loop
 294   eh_bad_line:
 295     $S0 = "Test not formatted properly!"
 296     test.'ok'(0, $S0)
 297     goto loop
 298 .end
 299
 300
 301 .sub 'parse_data'
 302     .param string test_line   # the data record
 303
 304     .local pmc rule           # the rule
 305     .local pmc match          # the match
 306     .local string pattern     # the regexp
 307     .local string target      # this string to match against the regex
 308     .local string result      # expected result of this test. (y/n/...)
 309     .local string description # user-facing description of the test.
 310
 311     # NOTE: there can be multiple tabs between entries, so skip until
 312     # we have something.
 313     # remove the trailing newline from record
 314     chopn test_line, 1
 315
 316     $P1 = split "\t", test_line
 317     $I0 = elements $P1 # length of array
 318     .local int tab_number
 319                tab_number = 0
 320   get_pattern:
 321     if tab_number >= $I0 goto bad_line
 322     pattern     = $P1[tab_number]
 323     inc tab_number
 324   get_target:
 325     if tab_number >= $I0 goto bad_line
 326     target      = $P1[tab_number]
 327     inc tab_number
 328     if target == '' goto get_target
 329   get_result:
 330     if tab_number >= $I0 goto bad_line
 331     result      = $P1[tab_number]
 332     inc tab_number
 333     if result == '' goto get_result
 334   get_description:
 335     if tab_number >= $I0 goto bad_line
 336     description = $P1[tab_number]
 337     inc tab_number
 338     if description == '' goto get_description
 339
 340   return:
 341     .return ( pattern, target, result, description )
 342
 343   bad_line:
 344       $P1 = new 'Exception'
 345       $P1 = 'invalid data format'
 346       throw $P1
 347 .end
 348
 349
 350 .sub 'build_test_desc'
 351     .param string desc
 352     .param string test_name
 353     .param string local_test_number
 354
 355     $S0  = '['
 356     $S0 .= test_name
 357     $S0 .= ':'
 358     $S0 .= local_test_number
 359     $S0 .= '] '
 360
 361     desc = concat $S0, desc
 362
 363     .return (desc)
 364 .end
 365
 366
 367 .sub 'match_perl6regex'
 368     .param string pattern
 369     .param string target
 370
 371     .local pmc match
 372
 373     .local pmc p6rule     # the perl6 regex compiler
 374                p6rule = compreg 'PGE::Perl6Regex'
 375
 376     .local pmc rule
 377                rule = p6rule(pattern)
 378
 379     unless_null rule, match_it
 380     $P1 = new 'Exception'
 381     $P1 = 'rule error'
 382     throw $P1
 383   match_it:
 384     match = rule(target)
 385
 386     .return (match)
 387 .end
 388
 389
 390 # given a 2 digit string, convert to appropriate chr() value.
 391 .sub hex_chr
 392     .param string hex
 393
 394     $S0 = substr hex, 0, 1
 395     $S1 = substr hex, 1, 1
 396
 397     $I0 = hex_val($S0)
 398     $I1 = hex_val($S1)
 399
 400     $I0 *=16
 401     $I0 += $I1
 402
 403     $S2 = chr $I0
 404
 405     .return ($S2)
 406 .end
 407
 408
 409 # given a single digit hex value, return it's int value.
 410 .sub hex_val
 411   .param string digit
 412
 413   $I0 = ord digit
 414   if $I0 < 48 goto bad_digit
 415   if $I0 > 57 goto non_numeric
 416   $I0 -=48
 417   .return ($I0)
 418 non_numeric:
 419   if $I0 < 65 goto bad_digit
 420   if $I0 > 70 goto not_capital
 421   $I0 -= 55 # A is ascii 65, so reset to zero, add 10 for hex..
 422   .return ($I0)
 423 not_capital:
 424   if $I0 < 97 goto  bad_digit
 425   if $I0 > 102 goto bad_digit
 426   $I0 -= 87 # a is ascii 97, so reset to zero, add 10 for hex..
 427   .return ($I0)
 428
 429 bad_digit:
 430   $P1 = new 'Exception'
 431   $P1 = 'invalid hex digit'
 432   throw $P1
 433 .end
 434
 435
 436 .sub backslash_escape
 437     .param string target
 438
 439   target1:
 440     $I0 = index target, '\n'
 441     if $I0 == -1 goto target2
 442     substr target, $I0, 2, "\n"
 443     goto target1
 444   target2:
 445     $I0 = index target, '\r'
 446     if $I0 == -1 goto target3
 447     substr target, $I0, 2, "\r"
 448     goto target2
 449   target3:
 450     $I0 = index target, '\e'
 451     if $I0 == -1 goto target4
 452     substr target, $I0, 2, "\e"
 453     goto target3
 454   target4:
 455     $I0 = index target, '\t'
 456     if $I0 == -1 goto target5
 457     substr target, $I0, 2, "\t"
 458     goto target4
 459   target5:
 460     $I0 = index target, '\f'
 461     if $I0 == -1 goto target6
 462     substr target, $I0, 2, "\f"
 463     goto target5
 464   target6:
 465     # handle \xHH, hex escape.
 466
 467     $I0 = index target, '\x'
 468     if $I0 == -1 goto target7
 469
 470     $I1 = $I0 + 2
 471     $P0 = get_hll_global ['String';'Utils'], 'convert_digits_to_string'
 472     ($S0, $I2) = $P0(target, 'x', $I1)
 473     $S3 = substr target, $I1, $I2
 474     $I2 += 2
 475     substr target, $I0, $I2, $S0
 476     goto target6
 477   target7:
 478     .return (target)
 479 .end
 480
 481 =head1 BUGS AND LIMITATIONS
 482
 483 Note that while our job would be easier if we could use regular expressions
 484 in here, but we want to avoid any dependency on the thing we're testing.
 485
 486 Need to add in test ids, to avoid the precarious line numbering.
 487
 488 =cut
 489
 490 # Local Variables:
 491 #   mode: pir
 492 #   fill-column: 100
 493 # End:
 494 # vim: expandtab shiftwidth=4 ft=pir: