1 // -*- coding: utf-8 -*-
3 // Copyright (c) 2005 - 2010, Google Inc.
4 // All rights reserved.
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
16 // * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 // Author: Sanjay Ghemawat
34 // TODO: Test extractions for PartialMatch/Consume
41 #include <string.h> /* for memset and strcmp */
47 using pcrecpp::StringPiece
;
49 using pcrecpp::RE_Options
;
52 using pcrecpp::CRadix
;
54 static bool VERBOSE_TEST
= false;
56 // CHECK dies with a fatal error if condition is not true. It is *not*
57 // controlled by NDEBUG, so the check will be executed regardless of
58 // compilation mode. Therefore, it is safe to do things like:
59 // CHECK_EQ(fp->Write(x), 4)
60 #define CHECK(condition) do { \
62 fprintf(stderr, "%s:%d: Check failed: %s\n", \
63 __FILE__, __LINE__, #condition); \
68 #define CHECK_EQ(a, b) CHECK(a == b)
70 static void Timing1(int num_iters
) {
71 // Same pattern lots of times
72 RE
pattern("ruby:\\d+");
73 StringPiece
p("ruby:1234");
74 for (int j
= num_iters
; j
> 0; j
--) {
75 CHECK(pattern
.FullMatch(p
));
79 static void Timing2(int num_iters
) {
80 // Same pattern lots of times
81 RE
pattern("ruby:(\\d+)");
83 for (int j
= num_iters
; j
> 0; j
--) {
84 CHECK(pattern
.FullMatch("ruby:1234", &i
));
89 static void Timing3(int num_iters
) {
91 for (int j
= num_iters
; j
> 0; j
--) {
92 text_string
+= "this is another line\n";
95 RE
line_matcher(".*\n");
97 StringPiece
text(text_string
);
99 while (line_matcher
.Consume(&text
)) {
102 printf("Matched %d lines\n", counter
);
105 #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
107 static void LeakTest() {
108 // Check for memory leaks
109 unsigned long long initial_size
= 0;
110 for (int i
= 0; i
< 100000; i
++) {
112 initial_size
= VirtualProcessSize();
113 printf("Size after 50000: %llu\n", initial_size
);
115 char buf
[100]; // definitely big enough
116 sprintf(buf
, "pat%09d", i
);
119 uint64 final_size
= VirtualProcessSize();
120 printf("Size after 100000: %llu\n", final_size
);
121 const double growth
= double(final_size
- initial_size
) / final_size
;
122 printf("Growth: %0.2f%%", growth
* 100);
123 CHECK(growth
< 0.02); // Allow < 2% growth
128 static void RadixTests() {
129 printf("Testing hex\n");
131 #define CHECK_HEX(type, value) \
134 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
135 CHECK_EQ(v, 0x ## value); \
136 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
137 CHECK_EQ(v, 0x ## value); \
140 CHECK_HEX(short, 2bad
);
141 CHECK_HEX(unsigned short, 2badU
);
142 CHECK_HEX(int, dead
);
143 CHECK_HEX(unsigned int, deadU
);
144 CHECK_HEX(long, 7eadbeefL
);
145 CHECK_HEX(unsigned long, deadbeefUL
);
146 #ifdef HAVE_LONG_LONG
147 CHECK_HEX(long long, 12345678deadbeefLL
);
149 #ifdef HAVE_UNSIGNED_LONG_LONG
150 CHECK_HEX(unsigned long long, cafebabedeadbeefULL
);
155 printf("Testing octal\n");
157 #define CHECK_OCTAL(type, value) \
160 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
161 CHECK_EQ(v, 0 ## value); \
162 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
163 CHECK_EQ(v, 0 ## value); \
166 CHECK_OCTAL(short, 77777);
167 CHECK_OCTAL(unsigned short, 177777U);
168 CHECK_OCTAL(int, 17777777777);
169 CHECK_OCTAL(unsigned int, 37777777777U);
170 CHECK_OCTAL(long, 17777777777L);
171 CHECK_OCTAL(unsigned long, 37777777777UL);
172 #ifdef HAVE_LONG_LONG
173 CHECK_OCTAL(long long, 777777777777777777777LL);
175 #ifdef HAVE_UNSIGNED_LONG_LONG
176 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
181 printf("Testing decimal\n");
183 #define CHECK_DECIMAL(type, value) \
186 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
187 CHECK_EQ(v, value); \
188 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
189 CHECK_EQ(v, value); \
192 CHECK_DECIMAL(short, -1);
193 CHECK_DECIMAL(unsigned short, 9999);
194 CHECK_DECIMAL(int, -1000);
195 CHECK_DECIMAL(unsigned int, 12345U);
196 CHECK_DECIMAL(long, -10000000L);
197 CHECK_DECIMAL(unsigned long, 3083324652U);
198 #ifdef HAVE_LONG_LONG
199 CHECK_DECIMAL(long long, -100000000000000LL);
201 #ifdef HAVE_UNSIGNED_LONG_LONG
202 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
209 static void TestReplace() {
210 printf("Testing Replace\n");
215 const char *original
;
218 int global_count
; // the expected return value from ReplaceAll
220 static const ReplaceTest tests
[] = {
221 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
223 "the quick brown fox jumps over the lazy dogs.",
224 "ethay quick brown fox jumps over the lazy dogs.",
225 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
229 "paul.haahr@google.com",
230 "paul-NOSPAM.haahr@google.com",
231 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
285 "bbabbabb\nbbabbabb\nbb",
291 "bbabbabb\rbbabbabb\rbb",
297 "bbabbabb\r\nbbabbabb\r\nbb",
299 // Check empty-string matching (it's tricky!)
315 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
316 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
317 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
321 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
322 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
323 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
324 "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
327 { "", NULL
, NULL
, NULL
, NULL
, 0 }
331 const bool support_utf8
= true;
333 const bool support_utf8
= false;
336 for (const ReplaceTest
*t
= tests
; t
->original
!= NULL
; ++t
) {
337 RE
re(t
->regexp
, RE_Options(PCRE_NEWLINE_CRLF
).set_utf8(support_utf8
));
338 assert(re
.error().empty());
339 string
one(t
->original
);
340 CHECK(re
.Replace(t
->rewrite
, &one
));
341 CHECK_EQ(one
, t
->single
);
342 string
all(t
->original
);
343 const int replace_count
= re
.GlobalReplace(t
->rewrite
, &all
);
344 CHECK_EQ(all
, t
->global
);
345 CHECK_EQ(replace_count
, t
->global_count
);
348 // One final test: test \r\n replacement when we're not in CRLF mode
350 RE
re("b*", RE_Options(PCRE_NEWLINE_CR
).set_utf8(support_utf8
));
351 assert(re
.error().empty());
352 string
all("aa\r\naa\r\n");
353 CHECK_EQ(re
.GlobalReplace("bb", &all
), 9);
354 CHECK_EQ(all
, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
357 RE
re("b*", RE_Options(PCRE_NEWLINE_LF
).set_utf8(support_utf8
));
358 assert(re
.error().empty());
359 string
all("aa\r\naa\r\n");
360 CHECK_EQ(re
.GlobalReplace("bb", &all
), 9);
361 CHECK_EQ(all
, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
363 // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
364 // Alas, the answer depends on how pcre was compiled.
367 static void TestExtract() {
368 printf("Testing Extract\n");
372 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s
));
373 CHECK_EQ(s
, "kremvax!boris");
375 // check the RE interface as well
376 CHECK(RE(".*").Extract("'\\0'", "foo", &s
));
377 CHECK_EQ(s
, "'foo'");
378 CHECK(!RE("bar").Extract("'\\0'", "baz", &s
));
379 CHECK_EQ(s
, "'foo'");
382 static void TestConsume() {
383 printf("Testing Consume\n");
387 string
s(" aaa b!@#$@#$cccc");
388 StringPiece
input(s
);
390 RE
r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
391 CHECK(r
.Consume(&input
, &word
));
392 CHECK_EQ(word
, "aaa");
393 CHECK(r
.Consume(&input
, &word
));
395 CHECK(! r
.Consume(&input
, &word
));
398 static void TestFindAndConsume() {
399 printf("Testing FindAndConsume\n");
403 string
s(" aaa b!@#$@#$cccc");
404 StringPiece
input(s
);
406 RE
r("(\\w+)"); // matches a word
407 CHECK(r
.FindAndConsume(&input
, &word
));
408 CHECK_EQ(word
, "aaa");
409 CHECK(r
.FindAndConsume(&input
, &word
));
411 CHECK(r
.FindAndConsume(&input
, &word
));
412 CHECK_EQ(word
, "cccc");
413 CHECK(! r
.FindAndConsume(&input
, &word
));
416 static void TestMatchNumberPeculiarity() {
417 printf("Testing match-number peculiarity\n");
423 RE
r("(foo)|(bar)|(baz)");
424 CHECK(r
.PartialMatch("foo", &word1
, &word2
, &word3
));
425 CHECK_EQ(word1
, "foo");
428 CHECK(r
.PartialMatch("bar", &word1
, &word2
, &word3
));
430 CHECK_EQ(word2
, "bar");
432 CHECK(r
.PartialMatch("baz", &word1
, &word2
, &word3
));
435 CHECK_EQ(word3
, "baz");
436 CHECK(!r
.PartialMatch("f", &word1
, &word2
, &word3
));
439 CHECK(RE("(foo)|hello").FullMatch("hello", &a
));
443 static void TestRecursion() {
444 printf("Testing recursion\n");
446 // Get one string that passes (sometimes), one that never does.
447 string
text_good("abcdefghijk");
448 string
text_bad("acdefghijkl");
450 // According to pcretest, matching text_good against (\w+)*b
451 // requires match_limit of at least 8192, and match_recursion_limit
454 RE_Options options_ml
;
455 options_ml
.set_match_limit(8192);
456 RE
re("(\\w+)*b", options_ml
);
457 CHECK(re
.PartialMatch(text_good
) == true);
458 CHECK(re
.PartialMatch(text_bad
) == false);
459 CHECK(re
.FullMatch(text_good
) == false);
460 CHECK(re
.FullMatch(text_bad
) == false);
462 options_ml
.set_match_limit(1024);
463 RE
re2("(\\w+)*b", options_ml
);
464 CHECK(re2
.PartialMatch(text_good
) == false); // because of match_limit
465 CHECK(re2
.PartialMatch(text_bad
) == false);
466 CHECK(re2
.FullMatch(text_good
) == false);
467 CHECK(re2
.FullMatch(text_bad
) == false);
469 RE_Options options_mlr
;
470 options_mlr
.set_match_limit_recursion(50);
471 RE
re3("(\\w+)*b", options_mlr
);
472 CHECK(re3
.PartialMatch(text_good
) == true);
473 CHECK(re3
.PartialMatch(text_bad
) == false);
474 CHECK(re3
.FullMatch(text_good
) == false);
475 CHECK(re3
.FullMatch(text_bad
) == false);
477 options_mlr
.set_match_limit_recursion(10);
478 RE
re4("(\\w+)*b", options_mlr
);
479 CHECK(re4
.PartialMatch(text_good
) == false);
480 CHECK(re4
.PartialMatch(text_bad
) == false);
481 CHECK(re4
.FullMatch(text_good
) == false);
482 CHECK(re4
.FullMatch(text_bad
) == false);
485 // A meta-quoted string, interpreted as a pattern, should always match
486 // the original unquoted string.
487 static void TestQuoteMeta(string unquoted
, RE_Options options
= RE_Options()) {
488 string quoted
= RE::QuoteMeta(unquoted
);
489 RE
re(quoted
, options
);
490 CHECK(re
.FullMatch(unquoted
));
493 // A string containing meaningful regexp characters, which is then meta-
494 // quoted, should not generally match a string the unquoted string does.
495 static void NegativeTestQuoteMeta(string unquoted
, string should_not_match
,
496 RE_Options options
= RE_Options()) {
497 string quoted
= RE::QuoteMeta(unquoted
);
498 RE
re(quoted
, options
);
499 CHECK(!re
.FullMatch(should_not_match
));
502 // Tests that quoted meta characters match their original strings,
503 // and that a few things that shouldn't match indeed do not.
504 static void TestQuotaMetaSimple() {
505 TestQuoteMeta("foo");
506 TestQuoteMeta("foo.bar");
507 TestQuoteMeta("foo\\.bar");
508 TestQuoteMeta("[1-9]");
509 TestQuoteMeta("1.5-2.0?");
510 TestQuoteMeta("\\d");
511 TestQuoteMeta("Who doesn't like ice cream?");
512 TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
513 TestQuoteMeta("((?!)xxx).*yyy");
515 TestQuoteMeta(string("foo\0bar", 7));
518 static void TestQuoteMetaSimpleNegative() {
519 NegativeTestQuoteMeta("foo", "bar");
520 NegativeTestQuoteMeta("...", "bar");
521 NegativeTestQuoteMeta("\\.", ".");
522 NegativeTestQuoteMeta("\\.", "..");
523 NegativeTestQuoteMeta("(a)", "a");
524 NegativeTestQuoteMeta("(a|b)", "a");
525 NegativeTestQuoteMeta("(a|b)", "(a)");
526 NegativeTestQuoteMeta("(a|b)", "a|b");
527 NegativeTestQuoteMeta("[0-9]", "0");
528 NegativeTestQuoteMeta("[0-9]", "0-9");
529 NegativeTestQuoteMeta("[0-9]", "[9]");
530 NegativeTestQuoteMeta("((?!)xxx)", "xxx");
533 static void TestQuoteMetaLatin1() {
534 TestQuoteMeta("3\xb2 = 9");
537 static void TestQuoteMetaUtf8() {
539 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
540 TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
541 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
542 TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
543 TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
544 TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
545 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
546 NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
552 static void TestQuoteMetaAll() {
553 printf("Testing QuoteMeta\n");
554 TestQuotaMetaSimple();
555 TestQuoteMetaSimpleNegative();
556 TestQuoteMetaLatin1();
561 // Options tests contributed by
562 // Giuseppe Maxia, CTO, Stardata s.r.l.
565 static void GetOneOptionResult(
566 const char *option_name
,
573 printf("Testing Option <%s>\n", option_name
);
575 printf("/%s/ finds \"%s\" within \"%s\" \n",
581 RE(regex
,options
).FullMatch(str
, &captured
);
583 RE(regex
,options
).PartialMatch(str
, &captured
);
584 CHECK_EQ(captured
, expected
);
587 static void TestOneOption(
588 const char *option_name
,
593 bool assertive
= true) {
595 printf("Testing Option <%s>\n", option_name
);
597 printf("'%s' %s /%s/ \n",
599 (assertive
? "matches" : "doesn't match"),
603 CHECK(RE(regex
,options
).FullMatch(str
));
605 CHECK(RE(regex
,options
).PartialMatch(str
));
608 CHECK(!RE(regex
,options
).FullMatch(str
));
610 CHECK(!RE(regex
,options
).PartialMatch(str
));
614 static void Test_CASELESS() {
618 options
.set_caseless(true);
619 TestOneOption("CASELESS (class)", "HELLO", "hello", options
, false);
620 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2
.set_caseless(true), false);
621 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options
, false);
623 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
624 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
625 options
.set_caseless(false);
626 TestOneOption("no CASELESS", "HELLO", "hello", options
, false, false);
629 static void Test_MULTILINE() {
632 const char *str
= "HELLO\n" "cruel\n" "world\n";
634 options
.set_multiline(true);
635 TestOneOption("MULTILINE (class)", "^cruel$", str
, options
, false);
636 TestOneOption("MULTILINE (class2)", "^cruel$", str
, options2
.set_multiline(true), false);
637 TestOneOption("MULTILINE (function)", "^cruel$", str
, pcrecpp::MULTILINE(), false);
638 options
.set_multiline(false);
639 TestOneOption("no MULTILINE", "^cruel$", str
, options
, false, false);
642 static void Test_DOTALL() {
645 const char *str
= "HELLO\n" "cruel\n" "world";
647 options
.set_dotall(true);
648 TestOneOption("DOTALL (class)", "HELLO.*world", str
, options
, true);
649 TestOneOption("DOTALL (class2)", "HELLO.*world", str
, options2
.set_dotall(true), true);
650 TestOneOption("DOTALL (function)", "HELLO.*world", str
, pcrecpp::DOTALL(), true);
651 options
.set_dotall(false);
652 TestOneOption("no DOTALL", "HELLO.*world", str
, options
, true, false);
655 static void Test_DOLLAR_ENDONLY() {
658 const char *str
= "HELLO world\n";
660 TestOneOption("no DOLLAR_ENDONLY", "world$", str
, options
, false);
661 options
.set_dollar_endonly(true);
662 TestOneOption("DOLLAR_ENDONLY 1", "world$", str
, options
, false, false);
663 TestOneOption("DOLLAR_ENDONLY 2", "world$", str
, options2
.set_dollar_endonly(true), false, false);
666 static void Test_EXTRA() {
668 const char *str
= "HELLO";
670 options
.set_extra(true);
671 TestOneOption("EXTRA 1", "\\HELL\\O", str
, options
, true, false );
672 TestOneOption("EXTRA 2", "\\HELL\\O", str
, RE_Options().set_extra(true), true, false );
673 options
.set_extra(false);
674 TestOneOption("no EXTRA", "\\HELL\\O", str
, options
, true );
677 static void Test_EXTENDED() {
680 const char *str
= "HELLO world";
682 options
.set_extended(true);
683 TestOneOption("EXTENDED (class)", "HELLO world", str
, options
, false, false);
684 TestOneOption("EXTENDED (class2)", "HELLO world", str
, options2
.set_extended(true), false, false);
685 TestOneOption("EXTENDED (class)",
693 TestOneOption("EXTENDED (function)", "HELLO world", str
, pcrecpp::EXTENDED(), false, false);
694 TestOneOption("EXTENDED (function)",
702 options
.set_extended(false);
703 TestOneOption("no EXTENDED", "HELLO world", str
, options
, false);
706 static void Test_NO_AUTO_CAPTURE() {
708 const char *str
= "HELLO world";
711 printf("Testing Option <no NO_AUTO_CAPTURE>\n");
713 printf("parentheses capture text\n");
714 RE
re("(world|universe)$", options
);
715 CHECK(re
.Extract("\\1", str
, &captured
));
716 CHECK_EQ(captured
, "world");
717 options
.set_no_auto_capture(true);
718 printf("testing Option <NO_AUTO_CAPTURE>\n");
720 printf("parentheses do not capture text\n");
721 re
.Extract("\\1",str
, &captured
);
722 CHECK_EQ(captured
, "world");
725 static void Test_UNGREEDY() {
727 const char *str
= "HELLO, 'this' is the 'world'";
729 options
.set_ungreedy(true);
730 GetOneOptionResult("UNGREEDY 1", "('.*')", str
, options
, false, "'this'" );
731 GetOneOptionResult("UNGREEDY 2", "('.*')", str
, RE_Options().set_ungreedy(true), false, "'this'" );
732 GetOneOptionResult("UNGREEDY", "('.*?')", str
, options
, false, "'this' is the 'world'" );
734 options
.set_ungreedy(false);
735 GetOneOptionResult("no UNGREEDY", "('.*')", str
, options
, false, "'this' is the 'world'" );
736 GetOneOptionResult("no UNGREEDY", "('.*?')", str
, options
, false, "'this'" );
739 static void Test_all_options() {
740 const char *str
= "HELLO\n" "cruel\n" "world";
742 options
.set_all_options(PCRE_CASELESS
| PCRE_DOTALL
);
744 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str
, options
, false);
745 options
.set_all_options(0);
746 TestOneOption("all_options (0)", "^hello.*WORLD", str
, options
, false, false);
747 options
.set_all_options(PCRE_MULTILINE
| PCRE_EXTENDED
);
749 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str
, options
, false);
750 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
753 RE_Options(PCRE_MULTILINE
| PCRE_EXTENDED
),
756 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
764 options
.set_all_options(0);
765 TestOneOption("all_options (0)", "^ c r u e l $", str
, options
, false, false);
769 static void TestOptions() {
770 printf("Testing Options\n");
774 Test_DOLLAR_ENDONLY();
776 Test_NO_AUTO_CAPTURE();
782 static void TestConstructors() {
783 printf("Testing constructors\n");
786 options
.set_dotall(true);
787 const char *str
= "HELLO\n" "cruel\n" "world";
789 RE
orig("HELLO.*world", options
);
790 CHECK(orig
.FullMatch(str
));
793 CHECK(copy1
.FullMatch(str
));
795 RE
copy2("not a match");
796 CHECK(!copy2
.FullMatch(str
));
798 CHECK(copy2
.FullMatch(str
));
800 CHECK(copy2
.FullMatch(str
));
802 // Make sure when we assign to ourselves, nothing bad happens
806 CHECK(orig
.FullMatch(str
));
807 CHECK(copy1
.FullMatch(str
));
808 CHECK(copy2
.FullMatch(str
));
811 int main(int argc
, char** argv
) {
812 // Treat any flag as --help
813 if (argc
> 1 && argv
[1][0] == '-') {
814 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
815 " If 'timingX ###' is specified, run the given timing test\n"
816 " with the given number of iterations, rather than running\n"
817 " the default corectness test.\n", argv
[0]);
822 if ( argc
== 2 || atoi(argv
[2]) == 0) {
823 printf("timing mode needs a num-iters argument\n");
826 if (!strcmp(argv
[1], "timing1"))
827 Timing1(atoi(argv
[2]));
828 else if (!strcmp(argv
[1], "timing2"))
829 Timing2(atoi(argv
[2]));
830 else if (!strcmp(argv
[1], "timing3"))
831 Timing3(atoi(argv
[2]));
833 printf("Unknown argument '%s'\n", argv
[1]);
837 printf("PCRE C++ wrapper tests\n");
838 printf("Testing FullMatch\n");
843 /***** FullMatch with no args *****/
845 CHECK(RE("h.*o").FullMatch("hello"));
846 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
847 CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
848 CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
849 CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
850 CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
852 /***** FullMatch with args *****/
855 CHECK(RE("\\d+").FullMatch("1001"));
858 CHECK(RE("(\\d+)").FullMatch("1001", &i
));
860 CHECK(RE("(-?\\d+)").FullMatch("-123", &i
));
862 CHECK(!RE("()\\d+").FullMatch("10", &i
));
863 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
866 // Digits surrounding integer-arg
867 CHECK(RE("1(\\d*)4").FullMatch("1234", &i
));
869 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i
));
871 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i
));
873 CHECK(RE("(\\d)").PartialMatch("1234", &i
));
875 CHECK(RE("(-\\d)").PartialMatch("-1234", &i
));
879 CHECK(RE("h(.*)o").FullMatch("hello", &s
));
880 CHECK_EQ(s
, string("ell"));
884 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp
, &i
));
885 CHECK_EQ(sp
.size(), 4);
886 CHECK(memcmp(sp
.data(), "ruby", 4) == 0);
890 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s
, &i
));
891 CHECK_EQ(s
, string("ruby"));
894 // Ignore non-void* NULL arg
895 CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL
));
896 CHECK(RE("h(.*)o").FullMatch("hello", (string
*)NULL
));
897 CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece
*)NULL
));
898 CHECK(RE("(.*)").FullMatch("1234", (int*)NULL
));
899 #ifdef HAVE_LONG_LONG
900 CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL
));
902 CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL
));
903 CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL
));
905 // Fail on non-void* NULL arg if the match doesn't parse for the given type.
906 CHECK(!RE("h(.*)lo").FullMatch("hello", &s
, (char*)NULL
));
907 CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL
));
908 CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL
));
909 CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL
));
910 CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL
));
913 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s
, (void*)NULL
, &i
));
914 CHECK_EQ(s
, string("ruby"));
920 CHECK(RE("(H)ello").FullMatch("Hello", &c
));
925 CHECK(RE("(H)ello").FullMatch("Hello", &c
));
926 CHECK_EQ(c
, static_cast<unsigned char>('H'));
930 CHECK(RE("(-?\\d+)").FullMatch("100", &v
)); CHECK_EQ(v
, 100);
931 CHECK(RE("(-?\\d+)").FullMatch("-100", &v
)); CHECK_EQ(v
, -100);
932 CHECK(RE("(-?\\d+)").FullMatch("32767", &v
)); CHECK_EQ(v
, 32767);
933 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v
)); CHECK_EQ(v
, -32768);
934 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v
));
935 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v
));
939 CHECK(RE("(\\d+)").FullMatch("100", &v
)); CHECK_EQ(v
, 100);
940 CHECK(RE("(\\d+)").FullMatch("32767", &v
)); CHECK_EQ(v
, 32767);
941 CHECK(RE("(\\d+)").FullMatch("65535", &v
)); CHECK_EQ(v
, 65535);
942 CHECK(!RE("(\\d+)").FullMatch("65536", &v
));
946 static const int max_value
= 0x7fffffff;
947 static const int min_value
= -max_value
- 1;
948 CHECK(RE("(-?\\d+)").FullMatch("100", &v
)); CHECK_EQ(v
, 100);
949 CHECK(RE("(-?\\d+)").FullMatch("-100", &v
)); CHECK_EQ(v
, -100);
950 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v
)); CHECK_EQ(v
, max_value
);
951 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v
)); CHECK_EQ(v
, min_value
);
952 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v
));
953 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v
));
957 static const unsigned int max_value
= 0xfffffffful
;
958 CHECK(RE("(\\d+)").FullMatch("100", &v
)); CHECK_EQ(v
, 100);
959 CHECK(RE("(\\d+)").FullMatch("4294967295", &v
)); CHECK_EQ(v
, max_value
);
960 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v
));
962 #ifdef HAVE_LONG_LONG
963 # if defined(__MINGW__) || defined(__MINGW32__)
972 static const long long max_value
= 0x7fffffffffffffffLL
;
973 static const long long min_value
= -max_value
- 1;
974 char buf
[32]; // definitely big enough for a long long
976 CHECK(RE("(-?\\d+)").FullMatch("100", &v
)); CHECK_EQ(v
, 100);
977 CHECK(RE("(-?\\d+)").FullMatch("-100",&v
)); CHECK_EQ(v
, -100);
979 sprintf(buf
, LLD
, max_value
);
980 CHECK(RE("(-?\\d+)").FullMatch(buf
,&v
)); CHECK_EQ(v
, max_value
);
982 sprintf(buf
, LLD
, min_value
);
983 CHECK(RE("(-?\\d+)").FullMatch(buf
,&v
)); CHECK_EQ(v
, min_value
);
985 sprintf(buf
, LLD
, max_value
);
986 assert(buf
[strlen(buf
)-1] != '9');
987 buf
[strlen(buf
)-1]++;
988 CHECK(!RE("(-?\\d+)").FullMatch(buf
, &v
));
990 sprintf(buf
, LLD
, min_value
);
991 assert(buf
[strlen(buf
)-1] != '9');
992 buf
[strlen(buf
)-1]++;
993 CHECK(!RE("(-?\\d+)").FullMatch(buf
, &v
));
996 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
998 unsigned long long v
;
1000 static const unsigned long long max_value
= 0xffffffffffffffffULL
;
1001 char buf
[32]; // definitely big enough for a unsigned long long
1003 CHECK(RE("(-?\\d+)").FullMatch("100",&v
)); CHECK_EQ(v
, 100);
1004 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2
)); CHECK_EQ(v2
, -100);
1006 sprintf(buf
, LLU
, max_value
);
1007 CHECK(RE("(-?\\d+)").FullMatch(buf
,&v
)); CHECK_EQ(v
, max_value
);
1009 assert(buf
[strlen(buf
)-1] != '9');
1010 buf
[strlen(buf
)-1]++;
1011 CHECK(!RE("(-?\\d+)").FullMatch(buf
, &v
));
1016 CHECK(RE("(.*)").FullMatch("100", &v
));
1017 CHECK(RE("(.*)").FullMatch("-100.", &v
));
1018 CHECK(RE("(.*)").FullMatch("1e23", &v
));
1022 CHECK(RE("(.*)").FullMatch("100", &v
));
1023 CHECK(RE("(.*)").FullMatch("-100.", &v
));
1024 CHECK(RE("(.*)").FullMatch("1e23", &v
));
1027 // Check that matching is fully anchored
1028 CHECK(!RE("(\\d+)").FullMatch("x1001", &i
));
1029 CHECK(!RE("(\\d+)").FullMatch("1001x", &i
));
1030 CHECK(RE("x(\\d+)").FullMatch("x1001", &i
)); CHECK_EQ(i
, 1001);
1031 CHECK(RE("(\\d+)x").FullMatch("1001x", &i
)); CHECK_EQ(i
, 1001);
1034 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1035 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1036 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1039 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1040 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1041 CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1042 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1044 // Check full-match handling (needs '$' tacked on internally)
1045 CHECK(RE("fo|foo").FullMatch("fo"));
1046 CHECK(RE("fo|foo").FullMatch("foo"));
1047 CHECK(RE("fo|foo$").FullMatch("fo"));
1048 CHECK(RE("fo|foo$").FullMatch("foo"));
1049 CHECK(RE("foo$").FullMatch("foo"));
1050 CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1051 CHECK(!RE("fo|bar").FullMatch("fox"));
1053 // Uncomment the following if we change the handling of '$' to
1054 // prevent it from matching a trailing newline
1056 // Check that we don't get bitten by pcre's special handling of a
1057 // '\n' at the end of the string matching '$'
1058 CHECK(!RE("foo$").PartialMatch("foo\n"));
1063 CHECK(RE("").FullMatch(""));
1065 memset(a
, 0, sizeof(0));
1066 CHECK(RE("(\\d){1}").FullMatch("1",
1070 memset(a
, 0, sizeof(0));
1071 CHECK(RE("(\\d)(\\d)").FullMatch("12",
1076 memset(a
, 0, sizeof(0));
1077 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1078 &a
[0], &a
[1], &a
[2]));
1083 memset(a
, 0, sizeof(0));
1084 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1085 &a
[0], &a
[1], &a
[2], &a
[3]));
1091 memset(a
, 0, sizeof(0));
1092 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1093 &a
[0], &a
[1], &a
[2],
1101 memset(a
, 0, sizeof(0));
1102 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1103 &a
[0], &a
[1], &a
[2],
1104 &a
[3], &a
[4], &a
[5]));
1112 memset(a
, 0, sizeof(0));
1113 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1114 &a
[0], &a
[1], &a
[2], &a
[3],
1115 &a
[4], &a
[5], &a
[6]));
1124 memset(a
, 0, sizeof(0));
1125 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1126 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1128 &a
[0], &a
[1], &a
[2], &a
[3],
1129 &a
[4], &a
[5], &a
[6], &a
[7],
1130 &a
[8], &a
[9], &a
[10], &a
[11],
1131 &a
[12], &a
[13], &a
[14], &a
[15]));
1149 /***** PartialMatch *****/
1151 printf("Testing PartialMatch\n");
1153 CHECK(RE("h.*o").PartialMatch("hello"));
1154 CHECK(RE("h.*o").PartialMatch("othello"));
1155 CHECK(RE("h.*o").PartialMatch("hello!"));
1156 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1158 /***** other tests *****/
1164 TestFindAndConsume();
1166 TestMatchNumberPeculiarity();
1168 // Check the pattern() accessor
1170 const string kPattern
= "http://([^/]+)/.*";
1171 const RE
re(kPattern
);
1172 CHECK_EQ(kPattern
, re
.pattern());
1175 // Check RE error field.
1178 CHECK(re
.error().empty()); // Must have no error
1182 // Check UTF-8 handling
1184 printf("Testing UTF-8 handling\n");
1186 // Three Japanese characters (nihongo)
1187 const unsigned char utf8_string
[] = {
1188 0xe6, 0x97, 0xa5, // 65e5
1189 0xe6, 0x9c, 0xac, // 627c
1190 0xe8, 0xaa, 0x9e, // 8a9e
1193 const unsigned char utf8_pattern
[] = {
1195 0xe6, 0x9c, 0xac, // 627c
1200 // Both should match in either mode, bytes or UTF-8
1201 RE
re_test1(".........");
1202 CHECK(re_test1
.FullMatch(utf8_string
));
1203 RE
re_test2("...", pcrecpp::UTF8());
1204 CHECK(re_test2
.FullMatch(utf8_string
));
1206 // PH added these tests for leading option settings
1208 RE
re_testZ0("(*CR)(*NO_START_OPT).........");
1209 CHECK(re_testZ0
.FullMatch(utf8_string
));
1212 RE
re_testZ1("(*UTF8)...");
1213 CHECK(re_testZ1
.FullMatch(utf8_string
));
1215 RE
re_testZ2("(*UTF)...");
1216 CHECK(re_testZ2
.FullMatch(utf8_string
));
1219 RE
re_testZ3("(*UCP)(*UTF)...");
1220 CHECK(re_testZ3
.FullMatch(utf8_string
));
1222 RE
re_testZ4("(*UCP)(*LIMIT_MATCH=1000)(*UTF)...");
1223 CHECK(re_testZ4
.FullMatch(utf8_string
));
1225 RE
re_testZ5("(*UCP)(*LIMIT_MATCH=1000)(*ANY)(*UTF)...");
1226 CHECK(re_testZ5
.FullMatch(utf8_string
));
1230 // Check that '.' matches one byte or UTF-8 character
1231 // according to the mode.
1234 CHECK(re_test3
.PartialMatch(utf8_string
, &ss
));
1235 CHECK_EQ(ss
, string("\xe6"));
1236 RE
re_test4("(.)", pcrecpp::UTF8());
1237 CHECK(re_test4
.PartialMatch(utf8_string
, &ss
));
1238 CHECK_EQ(ss
, string("\xe6\x97\xa5"));
1240 // Check that string matches itself in either mode
1241 RE
re_test5(utf8_string
);
1242 CHECK(re_test5
.FullMatch(utf8_string
));
1243 RE
re_test6(utf8_string
, pcrecpp::UTF8());
1244 CHECK(re_test6
.FullMatch(utf8_string
));
1246 // Check that pattern matches string only in UTF8 mode
1247 RE
re_test7(utf8_pattern
);
1248 CHECK(!re_test7
.FullMatch(utf8_string
));
1249 RE
re_test8(utf8_pattern
, pcrecpp::UTF8());
1250 CHECK(re_test8
.FullMatch(utf8_string
));
1253 // Check that ungreedy, UTF8 regular expressions don't match when they
1254 // oughtn't -- see bug 82246.
1256 // This code always worked.
1257 const char* pattern
= "\\w+X";
1258 const string target
= "a aX";
1259 RE
match_sentence(pattern
);
1260 RE
match_sentence_re(pattern
, pcrecpp::UTF8());
1262 CHECK(!match_sentence
.FullMatch(target
));
1263 CHECK(!match_sentence_re
.FullMatch(target
));
1267 const char* pattern
= "(?U)\\w+X";
1268 const string target
= "a aX";
1269 RE
match_sentence(pattern
);
1270 RE
match_sentence_re(pattern
, pcrecpp::UTF8());
1272 CHECK(!match_sentence
.FullMatch(target
));
1273 CHECK(!match_sentence_re
.FullMatch(target
));
1275 #endif /* def SUPPORT_UTF */
1277 printf("Testing error reporting\n");
1279 { RE
re("a\\1"); CHECK(!re
.error().empty()); }
1282 CHECK(!re
.error().empty());
1286 CHECK(!re
.error().empty());
1289 RE
re("a[[:foobar:]]");
1290 CHECK(!re
.error().empty());
1294 CHECK(!re
.error().empty());
1298 CHECK(!re
.error().empty());
1301 // Test that recursion is stopped
1305 if (getenv("VERBOSE_TEST") != NULL
)
1306 VERBOSE_TEST
= true;
1309 // Test the constructors