1 // -*- coding: utf-8 -*-
3 // Copyright (c) 2005 - 2006, Google Inc.
4 // All rights reserved.
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
16 // * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 // Author: Sanjay Ghemawat
34 // TODO: Test extractions for PartialMatch/Consume
45 using pcrecpp::StringPiece
;
47 using pcrecpp::RE_Options
;
50 using pcrecpp::CRadix
;
52 static bool VERBOSE_TEST
= false;
54 // CHECK dies with a fatal error if condition is not true. It is *not*
55 // controlled by NDEBUG, so the check will be executed regardless of
56 // compilation mode. Therefore, it is safe to do things like:
57 // CHECK_EQ(fp->Write(x), 4)
58 #define CHECK(condition) do { \
60 fprintf(stderr, "%s:%d: Check failed: %s\n", \
61 __FILE__, __LINE__, #condition); \
66 #define CHECK_EQ(a, b) CHECK(a == b)
68 static void Timing1(int num_iters
) {
69 // Same pattern lots of times
70 RE
pattern("ruby:\\d+");
71 StringPiece
p("ruby:1234");
72 for (int j
= num_iters
; j
> 0; j
--) {
73 CHECK(pattern
.FullMatch(p
));
77 static void Timing2(int num_iters
) {
78 // Same pattern lots of times
79 RE
pattern("ruby:(\\d+)");
81 for (int j
= num_iters
; j
> 0; j
--) {
82 CHECK(pattern
.FullMatch("ruby:1234", &i
));
87 static void Timing3(int num_iters
) {
89 for (int j
= num_iters
; j
> 0; j
--) {
90 text_string
+= "this is another line\n";
93 RE
line_matcher(".*\n");
95 StringPiece
text(text_string
);
97 while (line_matcher
.Consume(&text
)) {
100 printf("Matched %d lines\n", counter
);
103 #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
105 static void LeakTest() {
106 // Check for memory leaks
107 unsigned long long initial_size
= 0;
108 for (int i
= 0; i
< 100000; i
++) {
110 initial_size
= VirtualProcessSize();
111 printf("Size after 50000: %llu\n", initial_size
);
114 snprintf(buf
, sizeof(buf
), "pat%09d", i
);
117 uint64 final_size
= VirtualProcessSize();
118 printf("Size after 100000: %llu\n", final_size
);
119 const double growth
= double(final_size
- initial_size
) / final_size
;
120 printf("Growth: %0.2f%%", growth
* 100);
121 CHECK(growth
< 0.02); // Allow < 2% growth
126 static void RadixTests() {
127 printf("Testing hex\n");
129 #define CHECK_HEX(type, value) \
132 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
133 CHECK_EQ(v, 0x ## value); \
134 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
135 CHECK_EQ(v, 0x ## value); \
138 CHECK_HEX(short, 2bad
);
139 CHECK_HEX(unsigned short, 2badU
);
140 CHECK_HEX(int, dead
);
141 CHECK_HEX(unsigned int, deadU
);
142 CHECK_HEX(long, 7eadbeefL
);
143 CHECK_HEX(unsigned long, deadbeefUL
);
144 #ifdef HAVE_LONG_LONG
145 CHECK_HEX(long long, 12345678deadbeefLL
);
147 #ifdef HAVE_UNSIGNED_LONG_LONG
148 CHECK_HEX(unsigned long long, cafebabedeadbeefULL
);
153 printf("Testing octal\n");
155 #define CHECK_OCTAL(type, value) \
158 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
159 CHECK_EQ(v, 0 ## value); \
160 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
161 CHECK_EQ(v, 0 ## value); \
164 CHECK_OCTAL(short, 77777);
165 CHECK_OCTAL(unsigned short, 177777U);
166 CHECK_OCTAL(int, 17777777777);
167 CHECK_OCTAL(unsigned int, 37777777777U);
168 CHECK_OCTAL(long, 17777777777L);
169 CHECK_OCTAL(unsigned long, 37777777777UL);
170 #ifdef HAVE_LONG_LONG
171 CHECK_OCTAL(long long, 777777777777777777777LL);
173 #ifdef HAVE_UNSIGNED_LONG_LONG
174 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
179 printf("Testing decimal\n");
181 #define CHECK_DECIMAL(type, value) \
184 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
185 CHECK_EQ(v, value); \
186 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
187 CHECK_EQ(v, value); \
190 CHECK_DECIMAL(short, -1);
191 CHECK_DECIMAL(unsigned short, 9999);
192 CHECK_DECIMAL(int, -1000);
193 CHECK_DECIMAL(unsigned int, 12345U);
194 CHECK_DECIMAL(long, -10000000L);
195 CHECK_DECIMAL(unsigned long, 3083324652U);
196 #ifdef HAVE_LONG_LONG
197 CHECK_DECIMAL(long long, -100000000000000LL);
199 #ifdef HAVE_UNSIGNED_LONG_LONG
200 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
207 static void TestReplace() {
208 printf("Testing Replace\n");
213 const char *original
;
217 static const ReplaceTest tests
[] = {
218 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
220 "the quick brown fox jumps over the lazy dogs.",
221 "ethay quick brown fox jumps over the lazy dogs.",
222 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
225 "paul.haahr@google.com",
226 "paul-NOSPAM.haahr@google.com",
227 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
267 "bbabbabbabbabbabb" },
272 "bbabbabb\nbbabbabb\nbb" },
277 "bbabbabb\rbbabbabb\rbb" },
282 "bbabbabb\r\nbbabbabb\r\nbb" },
286 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
287 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
288 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
291 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
292 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
293 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
294 "bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
296 { "", NULL
, NULL
, NULL
, NULL
}
300 const bool support_utf8
= true;
302 const bool support_utf8
= false;
305 for (const ReplaceTest
*t
= tests
; t
->original
!= NULL
; ++t
) {
306 RE
re(t
->regexp
, RE_Options(PCRE_NEWLINE_CRLF
).set_utf8(support_utf8
));
307 assert(re
.error().empty());
308 string
one(t
->original
);
309 CHECK(re
.Replace(t
->rewrite
, &one
));
310 CHECK_EQ(one
, t
->single
);
311 string
all(t
->original
);
312 CHECK(re
.GlobalReplace(t
->rewrite
, &all
) > 0);
313 CHECK_EQ(all
, t
->global
);
316 // One final test: test \r\n replacement when we're not in CRLF mode
318 RE
re("b*", RE_Options(PCRE_NEWLINE_CR
).set_utf8(support_utf8
));
319 assert(re
.error().empty());
320 string
all("aa\r\naa\r\n");
321 CHECK(re
.GlobalReplace("bb", &all
) > 0);
322 CHECK_EQ(all
, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
325 RE
re("b*", RE_Options(PCRE_NEWLINE_LF
).set_utf8(support_utf8
));
326 assert(re
.error().empty());
327 string
all("aa\r\naa\r\n");
328 CHECK(re
.GlobalReplace("bb", &all
) > 0);
329 CHECK_EQ(all
, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
331 // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
332 // Alas, the answer depends on how pcre was compiled.
335 static void TestExtract() {
336 printf("Testing Extract\n");
340 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s
));
341 CHECK_EQ(s
, "kremvax!boris");
343 // check the RE interface as well
344 CHECK(RE(".*").Extract("'\\0'", "foo", &s
));
345 CHECK_EQ(s
, "'foo'");
346 CHECK(!RE("bar").Extract("'\\0'", "baz", &s
));
347 CHECK_EQ(s
, "'foo'");
350 static void TestConsume() {
351 printf("Testing Consume\n");
355 string
s(" aaa b!@#$@#$cccc");
356 StringPiece
input(s
);
358 RE
r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
359 CHECK(r
.Consume(&input
, &word
));
360 CHECK_EQ(word
, "aaa");
361 CHECK(r
.Consume(&input
, &word
));
363 CHECK(! r
.Consume(&input
, &word
));
366 static void TestFindAndConsume() {
367 printf("Testing FindAndConsume\n");
371 string
s(" aaa b!@#$@#$cccc");
372 StringPiece
input(s
);
374 RE
r("(\\w+)"); // matches a word
375 CHECK(r
.FindAndConsume(&input
, &word
));
376 CHECK_EQ(word
, "aaa");
377 CHECK(r
.FindAndConsume(&input
, &word
));
379 CHECK(r
.FindAndConsume(&input
, &word
));
380 CHECK_EQ(word
, "cccc");
381 CHECK(! r
.FindAndConsume(&input
, &word
));
384 static void TestMatchNumberPeculiarity() {
385 printf("Testing match-number peculiaraity\n");
391 RE
r("(foo)|(bar)|(baz)");
392 CHECK(r
.PartialMatch("foo", &word1
, &word2
, &word3
));
393 CHECK_EQ(word1
, "foo");
396 CHECK(r
.PartialMatch("bar", &word1
, &word2
, &word3
));
398 CHECK_EQ(word2
, "bar");
400 CHECK(r
.PartialMatch("baz", &word1
, &word2
, &word3
));
403 CHECK_EQ(word3
, "baz");
404 CHECK(!r
.PartialMatch("f", &word1
, &word2
, &word3
));
407 CHECK(RE("(foo)|hello").FullMatch("hello", &a
));
411 static void TestRecursion() {
412 printf("Testing recursion\n");
414 // Get one string that passes (sometimes), one that never does.
415 string
text_good("abcdefghijk");
416 string
text_bad("acdefghijkl");
418 // According to pcretest, matching text_good against (\w+)*b
419 // requires match_limit of at least 8192, and match_recursion_limit
422 RE_Options options_ml
;
423 options_ml
.set_match_limit(8192);
424 RE
re("(\\w+)*b", options_ml
);
425 CHECK(re
.PartialMatch(text_good
) == true);
426 CHECK(re
.PartialMatch(text_bad
) == false);
427 CHECK(re
.FullMatch(text_good
) == false);
428 CHECK(re
.FullMatch(text_bad
) == false);
430 options_ml
.set_match_limit(1024);
431 RE
re2("(\\w+)*b", options_ml
);
432 CHECK(re2
.PartialMatch(text_good
) == false); // because of match_limit
433 CHECK(re2
.PartialMatch(text_bad
) == false);
434 CHECK(re2
.FullMatch(text_good
) == false);
435 CHECK(re2
.FullMatch(text_bad
) == false);
437 RE_Options options_mlr
;
438 options_mlr
.set_match_limit_recursion(50);
439 RE
re3("(\\w+)*b", options_mlr
);
440 CHECK(re3
.PartialMatch(text_good
) == true);
441 CHECK(re3
.PartialMatch(text_bad
) == false);
442 CHECK(re3
.FullMatch(text_good
) == false);
443 CHECK(re3
.FullMatch(text_bad
) == false);
445 options_mlr
.set_match_limit_recursion(10);
446 RE
re4("(\\w+)*b", options_mlr
);
447 CHECK(re4
.PartialMatch(text_good
) == false);
448 CHECK(re4
.PartialMatch(text_bad
) == false);
449 CHECK(re4
.FullMatch(text_good
) == false);
450 CHECK(re4
.FullMatch(text_bad
) == false);
453 // A meta-quoted string, interpreted as a pattern, should always match
454 // the original unquoted string.
455 static void TestQuoteMeta(string unquoted
, RE_Options options
= RE_Options()) {
456 string quoted
= RE::QuoteMeta(unquoted
);
457 RE
re(quoted
, options
);
458 CHECK(re
.FullMatch(unquoted
));
461 // A string containing meaningful regexp characters, which is then meta-
462 // quoted, should not generally match a string the unquoted string does.
463 static void NegativeTestQuoteMeta(string unquoted
, string should_not_match
,
464 RE_Options options
= RE_Options()) {
465 string quoted
= RE::QuoteMeta(unquoted
);
466 RE
re(quoted
, options
);
467 CHECK(!re
.FullMatch(should_not_match
));
470 // Tests that quoted meta characters match their original strings,
471 // and that a few things that shouldn't match indeed do not.
472 static void TestQuotaMetaSimple() {
473 TestQuoteMeta("foo");
474 TestQuoteMeta("foo.bar");
475 TestQuoteMeta("foo\\.bar");
476 TestQuoteMeta("[1-9]");
477 TestQuoteMeta("1.5-2.0?");
478 TestQuoteMeta("\\d");
479 TestQuoteMeta("Who doesn't like ice cream?");
480 TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
481 TestQuoteMeta("((?!)xxx).*yyy");
485 static void TestQuoteMetaSimpleNegative() {
486 NegativeTestQuoteMeta("foo", "bar");
487 NegativeTestQuoteMeta("...", "bar");
488 NegativeTestQuoteMeta("\\.", ".");
489 NegativeTestQuoteMeta("\\.", "..");
490 NegativeTestQuoteMeta("(a)", "a");
491 NegativeTestQuoteMeta("(a|b)", "a");
492 NegativeTestQuoteMeta("(a|b)", "(a)");
493 NegativeTestQuoteMeta("(a|b)", "a|b");
494 NegativeTestQuoteMeta("[0-9]", "0");
495 NegativeTestQuoteMeta("[0-9]", "0-9");
496 NegativeTestQuoteMeta("[0-9]", "[9]");
497 NegativeTestQuoteMeta("((?!)xxx)", "xxx");
500 static void TestQuoteMetaLatin1() {
501 TestQuoteMeta("3\xb2 = 9");
504 static void TestQuoteMetaUtf8() {
506 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
507 TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
508 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
509 TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
510 TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
511 TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
512 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
513 NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
519 static void TestQuoteMetaAll() {
520 printf("Testing QuoteMeta\n");
521 TestQuotaMetaSimple();
522 TestQuoteMetaSimpleNegative();
523 TestQuoteMetaLatin1();
528 // Options tests contributed by
529 // Giuseppe Maxia, CTO, Stardata s.r.l.
532 static void GetOneOptionResult(
533 const char *option_name
,
540 printf("Testing Option <%s>\n", option_name
);
542 printf("/%s/ finds \"%s\" within \"%s\" \n",
548 RE(regex
,options
).FullMatch(str
, &captured
);
550 RE(regex
,options
).PartialMatch(str
, &captured
);
551 CHECK_EQ(captured
, expected
);
554 static void TestOneOption(
555 const char *option_name
,
560 bool assertive
= true) {
562 printf("Testing Option <%s>\n", option_name
);
564 printf("'%s' %s /%s/ \n",
566 (assertive
? "matches" : "doesn't match"),
570 CHECK(RE(regex
,options
).FullMatch(str
));
572 CHECK(RE(regex
,options
).PartialMatch(str
));
575 CHECK(!RE(regex
,options
).FullMatch(str
));
577 CHECK(!RE(regex
,options
).PartialMatch(str
));
581 static void Test_CASELESS() {
585 options
.set_caseless(true);
586 TestOneOption("CASELESS (class)", "HELLO", "hello", options
, false);
587 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2
.set_caseless(true), false);
588 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options
, false);
590 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
591 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
592 options
.set_caseless(false);
593 TestOneOption("no CASELESS", "HELLO", "hello", options
, false, false);
596 static void Test_MULTILINE() {
599 const char *str
= "HELLO\n" "cruel\n" "world\n";
601 options
.set_multiline(true);
602 TestOneOption("MULTILINE (class)", "^cruel$", str
, options
, false);
603 TestOneOption("MULTILINE (class2)", "^cruel$", str
, options2
.set_multiline(true), false);
604 TestOneOption("MULTILINE (function)", "^cruel$", str
, pcrecpp::MULTILINE(), false);
605 options
.set_multiline(false);
606 TestOneOption("no MULTILINE", "^cruel$", str
, options
, false, false);
609 static void Test_DOTALL() {
612 const char *str
= "HELLO\n" "cruel\n" "world";
614 options
.set_dotall(true);
615 TestOneOption("DOTALL (class)", "HELLO.*world", str
, options
, true);
616 TestOneOption("DOTALL (class2)", "HELLO.*world", str
, options2
.set_dotall(true), true);
617 TestOneOption("DOTALL (function)", "HELLO.*world", str
, pcrecpp::DOTALL(), true);
618 options
.set_dotall(false);
619 TestOneOption("no DOTALL", "HELLO.*world", str
, options
, true, false);
622 static void Test_DOLLAR_ENDONLY() {
625 const char *str
= "HELLO world\n";
627 TestOneOption("no DOLLAR_ENDONLY", "world$", str
, options
, false);
628 options
.set_dollar_endonly(true);
629 TestOneOption("DOLLAR_ENDONLY 1", "world$", str
, options
, false, false);
630 TestOneOption("DOLLAR_ENDONLY 2", "world$", str
, options2
.set_dollar_endonly(true), false, false);
633 static void Test_EXTRA() {
635 const char *str
= "HELLO";
637 options
.set_extra(true);
638 TestOneOption("EXTRA 1", "\\HELL\\O", str
, options
, true, false );
639 TestOneOption("EXTRA 2", "\\HELL\\O", str
, RE_Options().set_extra(true), true, false );
640 options
.set_extra(false);
641 TestOneOption("no EXTRA", "\\HELL\\O", str
, options
, true );
644 static void Test_EXTENDED() {
647 const char *str
= "HELLO world";
649 options
.set_extended(true);
650 TestOneOption("EXTENDED (class)", "HELLO world", str
, options
, false, false);
651 TestOneOption("EXTENDED (class2)", "HELLO world", str
, options2
.set_extended(true), false, false);
652 TestOneOption("EXTENDED (class)",
660 TestOneOption("EXTENDED (function)", "HELLO world", str
, pcrecpp::EXTENDED(), false, false);
661 TestOneOption("EXTENDED (function)",
669 options
.set_extended(false);
670 TestOneOption("no EXTENDED", "HELLO world", str
, options
, false);
673 static void Test_NO_AUTO_CAPTURE() {
675 const char *str
= "HELLO world";
678 printf("Testing Option <no NO_AUTO_CAPTURE>\n");
680 printf("parentheses capture text\n");
681 RE
re("(world|universe)$", options
);
682 CHECK(re
.Extract("\\1", str
, &captured
));
683 CHECK_EQ(captured
, "world");
684 options
.set_no_auto_capture(true);
685 printf("testing Option <NO_AUTO_CAPTURE>\n");
687 printf("parentheses do not capture text\n");
688 re
.Extract("\\1",str
, &captured
);
689 CHECK_EQ(captured
, "world");
692 static void Test_UNGREEDY() {
694 const char *str
= "HELLO, 'this' is the 'world'";
696 options
.set_ungreedy(true);
697 GetOneOptionResult("UNGREEDY 1", "('.*')", str
, options
, false, "'this'" );
698 GetOneOptionResult("UNGREEDY 2", "('.*')", str
, RE_Options().set_ungreedy(true), false, "'this'" );
699 GetOneOptionResult("UNGREEDY", "('.*?')", str
, options
, false, "'this' is the 'world'" );
701 options
.set_ungreedy(false);
702 GetOneOptionResult("no UNGREEDY", "('.*')", str
, options
, false, "'this' is the 'world'" );
703 GetOneOptionResult("no UNGREEDY", "('.*?')", str
, options
, false, "'this'" );
706 static void Test_all_options() {
707 const char *str
= "HELLO\n" "cruel\n" "world";
709 options
.set_all_options(PCRE_CASELESS
| PCRE_DOTALL
);
711 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str
, options
, false);
712 options
.set_all_options(0);
713 TestOneOption("all_options (0)", "^hello.*WORLD", str
, options
, false, false);
714 options
.set_all_options(PCRE_MULTILINE
| PCRE_EXTENDED
);
716 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str
, options
, false);
717 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
720 RE_Options(PCRE_MULTILINE
| PCRE_EXTENDED
),
723 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
731 options
.set_all_options(0);
732 TestOneOption("all_options (0)", "^ c r u e l $", str
, options
, false, false);
736 static void TestOptions() {
737 printf("Testing Options\n");
741 Test_DOLLAR_ENDONLY();
743 Test_NO_AUTO_CAPTURE();
749 static void TestConstructors() {
750 printf("Testing constructors\n");
753 options
.set_dotall(true);
754 const char *str
= "HELLO\n" "cruel\n" "world";
756 RE
orig("HELLO.*world", options
);
757 CHECK(orig
.FullMatch(str
));
760 CHECK(copy1
.FullMatch(str
));
762 RE
copy2("not a match");
763 CHECK(!copy2
.FullMatch(str
));
765 CHECK(copy2
.FullMatch(str
));
767 CHECK(copy2
.FullMatch(str
));
769 // Make sure when we assign to ourselves, nothing bad happens
773 CHECK(orig
.FullMatch(str
));
774 CHECK(copy1
.FullMatch(str
));
775 CHECK(copy2
.FullMatch(str
));
778 int main(int argc
, char** argv
) {
779 // Treat any flag as --help
780 if (argc
> 1 && argv
[1][0] == '-') {
781 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
782 " If 'timingX ###' is specified, run the given timing test\n"
783 " with the given number of iterations, rather than running\n"
784 " the default corectness test.\n", argv
[0]);
789 if ( argc
== 2 || atoi(argv
[2]) == 0) {
790 printf("timing mode needs a num-iters argument\n");
793 if (!strcmp(argv
[1], "timing1"))
794 Timing1(atoi(argv
[2]));
795 else if (!strcmp(argv
[1], "timing2"))
796 Timing2(atoi(argv
[2]));
797 else if (!strcmp(argv
[1], "timing3"))
798 Timing3(atoi(argv
[2]));
800 printf("Unknown argument '%s'\n", argv
[1]);
804 printf("Testing FullMatch\n");
809 /***** FullMatch with no args *****/
811 CHECK(RE("h.*o").FullMatch("hello"));
812 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
813 CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
814 CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
815 CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
816 CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
818 /***** FullMatch with args *****/
821 CHECK(RE("\\d+").FullMatch("1001"));
824 CHECK(RE("(\\d+)").FullMatch("1001", &i
));
826 CHECK(RE("(-?\\d+)").FullMatch("-123", &i
));
828 CHECK(!RE("()\\d+").FullMatch("10", &i
));
829 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
832 // Digits surrounding integer-arg
833 CHECK(RE("1(\\d*)4").FullMatch("1234", &i
));
835 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i
));
837 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i
));
839 CHECK(RE("(\\d)").PartialMatch("1234", &i
));
841 CHECK(RE("(-\\d)").PartialMatch("-1234", &i
));
845 CHECK(RE("h(.*)o").FullMatch("hello", &s
));
846 CHECK_EQ(s
, string("ell"));
850 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp
, &i
));
851 CHECK_EQ(sp
.size(), 4);
852 CHECK(memcmp(sp
.data(), "ruby", 4) == 0);
856 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s
, &i
));
857 CHECK_EQ(s
, string("ruby"));
861 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s
, (void*)NULL
, &i
));
862 CHECK_EQ(s
, string("ruby"));
868 CHECK(RE("(H)ello").FullMatch("Hello", &c
));
873 CHECK(RE("(H)ello").FullMatch("Hello", &c
));
874 CHECK_EQ(c
, static_cast<unsigned char>('H'));
878 CHECK(RE("(-?\\d+)").FullMatch("100", &v
)); CHECK_EQ(v
, 100);
879 CHECK(RE("(-?\\d+)").FullMatch("-100", &v
)); CHECK_EQ(v
, -100);
880 CHECK(RE("(-?\\d+)").FullMatch("32767", &v
)); CHECK_EQ(v
, 32767);
881 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v
)); CHECK_EQ(v
, -32768);
882 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v
));
883 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v
));
887 CHECK(RE("(\\d+)").FullMatch("100", &v
)); CHECK_EQ(v
, 100);
888 CHECK(RE("(\\d+)").FullMatch("32767", &v
)); CHECK_EQ(v
, 32767);
889 CHECK(RE("(\\d+)").FullMatch("65535", &v
)); CHECK_EQ(v
, 65535);
890 CHECK(!RE("(\\d+)").FullMatch("65536", &v
));
894 static const int max_value
= 0x7fffffff;
895 static const int min_value
= -max_value
- 1;
896 CHECK(RE("(-?\\d+)").FullMatch("100", &v
)); CHECK_EQ(v
, 100);
897 CHECK(RE("(-?\\d+)").FullMatch("-100", &v
)); CHECK_EQ(v
, -100);
898 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v
)); CHECK_EQ(v
, max_value
);
899 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v
)); CHECK_EQ(v
, min_value
);
900 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v
));
901 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v
));
905 static const unsigned int max_value
= 0xfffffffful
;
906 CHECK(RE("(\\d+)").FullMatch("100", &v
)); CHECK_EQ(v
, 100);
907 CHECK(RE("(\\d+)").FullMatch("4294967295", &v
)); CHECK_EQ(v
, max_value
);
908 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v
));
910 #ifdef HAVE_LONG_LONG
911 # if defined(__MINGW__) || defined(__MINGW32__)
920 static const long long max_value
= 0x7fffffffffffffffLL
;
921 static const long long min_value
= -max_value
- 1;
924 CHECK(RE("(-?\\d+)").FullMatch("100", &v
)); CHECK_EQ(v
, 100);
925 CHECK(RE("(-?\\d+)").FullMatch("-100",&v
)); CHECK_EQ(v
, -100);
927 snprintf(buf
, sizeof(buf
), LLD
, max_value
);
928 CHECK(RE("(-?\\d+)").FullMatch(buf
,&v
)); CHECK_EQ(v
, max_value
);
930 snprintf(buf
, sizeof(buf
), LLD
, min_value
);
931 CHECK(RE("(-?\\d+)").FullMatch(buf
,&v
)); CHECK_EQ(v
, min_value
);
933 snprintf(buf
, sizeof(buf
), LLD
, max_value
);
934 assert(buf
[strlen(buf
)-1] != '9');
935 buf
[strlen(buf
)-1]++;
936 CHECK(!RE("(-?\\d+)").FullMatch(buf
, &v
));
938 snprintf(buf
, sizeof(buf
), LLD
, min_value
);
939 assert(buf
[strlen(buf
)-1] != '9');
940 buf
[strlen(buf
)-1]++;
941 CHECK(!RE("(-?\\d+)").FullMatch(buf
, &v
));
944 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
946 unsigned long long v
;
948 static const unsigned long long max_value
= 0xffffffffffffffffULL
;
951 CHECK(RE("(-?\\d+)").FullMatch("100",&v
)); CHECK_EQ(v
, 100);
952 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2
)); CHECK_EQ(v2
, -100);
954 snprintf(buf
, sizeof(buf
), LLU
, max_value
);
955 CHECK(RE("(-?\\d+)").FullMatch(buf
,&v
)); CHECK_EQ(v
, max_value
);
957 assert(buf
[strlen(buf
)-1] != '9');
958 buf
[strlen(buf
)-1]++;
959 CHECK(!RE("(-?\\d+)").FullMatch(buf
, &v
));
964 CHECK(RE("(.*)").FullMatch("100", &v
));
965 CHECK(RE("(.*)").FullMatch("-100.", &v
));
966 CHECK(RE("(.*)").FullMatch("1e23", &v
));
970 CHECK(RE("(.*)").FullMatch("100", &v
));
971 CHECK(RE("(.*)").FullMatch("-100.", &v
));
972 CHECK(RE("(.*)").FullMatch("1e23", &v
));
975 // Check that matching is fully anchored
976 CHECK(!RE("(\\d+)").FullMatch("x1001", &i
));
977 CHECK(!RE("(\\d+)").FullMatch("1001x", &i
));
978 CHECK(RE("x(\\d+)").FullMatch("x1001", &i
)); CHECK_EQ(i
, 1001);
979 CHECK(RE("(\\d+)x").FullMatch("1001x", &i
)); CHECK_EQ(i
, 1001);
982 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
983 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
984 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
987 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
988 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
989 CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
990 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
992 // Check full-match handling (needs '$' tacked on internally)
993 CHECK(RE("fo|foo").FullMatch("fo"));
994 CHECK(RE("fo|foo").FullMatch("foo"));
995 CHECK(RE("fo|foo$").FullMatch("fo"));
996 CHECK(RE("fo|foo$").FullMatch("foo"));
997 CHECK(RE("foo$").FullMatch("foo"));
998 CHECK(!RE("foo\\$").FullMatch("foo$bar"));
999 CHECK(!RE("fo|bar").FullMatch("fox"));
1001 // Uncomment the following if we change the handling of '$' to
1002 // prevent it from matching a trailing newline
1004 // Check that we don't get bitten by pcre's special handling of a
1005 // '\n' at the end of the string matching '$'
1006 CHECK(!RE("foo$").PartialMatch("foo\n"));
1011 CHECK(RE("").FullMatch(""));
1013 memset(a
, 0, sizeof(0));
1014 CHECK(RE("(\\d){1}").FullMatch("1",
1018 memset(a
, 0, sizeof(0));
1019 CHECK(RE("(\\d)(\\d)").FullMatch("12",
1024 memset(a
, 0, sizeof(0));
1025 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1026 &a
[0], &a
[1], &a
[2]));
1031 memset(a
, 0, sizeof(0));
1032 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1033 &a
[0], &a
[1], &a
[2], &a
[3]));
1039 memset(a
, 0, sizeof(0));
1040 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1041 &a
[0], &a
[1], &a
[2],
1049 memset(a
, 0, sizeof(0));
1050 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1051 &a
[0], &a
[1], &a
[2],
1052 &a
[3], &a
[4], &a
[5]));
1060 memset(a
, 0, sizeof(0));
1061 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1062 &a
[0], &a
[1], &a
[2], &a
[3],
1063 &a
[4], &a
[5], &a
[6]));
1072 memset(a
, 0, sizeof(0));
1073 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1074 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1076 &a
[0], &a
[1], &a
[2], &a
[3],
1077 &a
[4], &a
[5], &a
[6], &a
[7],
1078 &a
[8], &a
[9], &a
[10], &a
[11],
1079 &a
[12], &a
[13], &a
[14], &a
[15]));
1097 /***** PartialMatch *****/
1099 printf("Testing PartialMatch\n");
1101 CHECK(RE("h.*o").PartialMatch("hello"));
1102 CHECK(RE("h.*o").PartialMatch("othello"));
1103 CHECK(RE("h.*o").PartialMatch("hello!"));
1104 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1106 /***** other tests *****/
1112 TestFindAndConsume();
1114 TestMatchNumberPeculiarity();
1116 // Check the pattern() accessor
1118 const string kPattern
= "http://([^/]+)/.*";
1119 const RE
re(kPattern
);
1120 CHECK_EQ(kPattern
, re
.pattern());
1123 // Check RE error field.
1126 CHECK(re
.error().empty()); // Must have no error
1130 // Check UTF-8 handling
1132 printf("Testing UTF-8 handling\n");
1134 // Three Japanese characters (nihongo)
1135 const char utf8_string
[] = {
1136 0xe6, 0x97, 0xa5, // 65e5
1137 0xe6, 0x9c, 0xac, // 627c
1138 0xe8, 0xaa, 0x9e, // 8a9e
1141 const char utf8_pattern
[] = {
1143 0xe6, 0x9c, 0xac, // 627c
1148 // Both should match in either mode, bytes or UTF-8
1149 RE
re_test1(".........");
1150 CHECK(re_test1
.FullMatch(utf8_string
));
1151 RE
re_test2("...", pcrecpp::UTF8());
1152 CHECK(re_test2
.FullMatch(utf8_string
));
1154 // Check that '.' matches one byte or UTF-8 character
1155 // according to the mode.
1158 CHECK(re_test3
.PartialMatch(utf8_string
, &ss
));
1159 CHECK_EQ(ss
, string("\xe6"));
1160 RE
re_test4("(.)", pcrecpp::UTF8());
1161 CHECK(re_test4
.PartialMatch(utf8_string
, &ss
));
1162 CHECK_EQ(ss
, string("\xe6\x97\xa5"));
1164 // Check that string matches itself in either mode
1165 RE
re_test5(utf8_string
);
1166 CHECK(re_test5
.FullMatch(utf8_string
));
1167 RE
re_test6(utf8_string
, pcrecpp::UTF8());
1168 CHECK(re_test6
.FullMatch(utf8_string
));
1170 // Check that pattern matches string only in UTF8 mode
1171 RE
re_test7(utf8_pattern
);
1172 CHECK(!re_test7
.FullMatch(utf8_string
));
1173 RE
re_test8(utf8_pattern
, pcrecpp::UTF8());
1174 CHECK(re_test8
.FullMatch(utf8_string
));
1177 // Check that ungreedy, UTF8 regular expressions don't match when they
1178 // oughtn't -- see bug 82246.
1180 // This code always worked.
1181 const char* pattern
= "\\w+X";
1182 const string target
= "a aX";
1183 RE
match_sentence(pattern
);
1184 RE
match_sentence_re(pattern
, pcrecpp::UTF8());
1186 CHECK(!match_sentence
.FullMatch(target
));
1187 CHECK(!match_sentence_re
.FullMatch(target
));
1191 const char* pattern
= "(?U)\\w+X";
1192 const string target
= "a aX";
1193 RE
match_sentence(pattern
);
1194 RE
match_sentence_re(pattern
, pcrecpp::UTF8());
1196 CHECK(!match_sentence
.FullMatch(target
));
1197 CHECK(!match_sentence_re
.FullMatch(target
));
1199 #endif /* def SUPPORT_UTF8 */
1201 printf("Testing error reporting\n");
1203 { RE
re("a\\1"); CHECK(!re
.error().empty()); }
1206 CHECK(!re
.error().empty());
1210 CHECK(!re
.error().empty());
1213 RE
re("a[[:foobar:]]");
1214 CHECK(!re
.error().empty());
1218 CHECK(!re
.error().empty());
1222 CHECK(!re
.error().empty());
1225 // Test that recursion is stopped
1229 if (getenv("VERBOSE_TEST") != NULL
)
1230 VERBOSE_TEST
= true;
1233 // Test the constructors