1 // -*- coding: utf-8 -*-
3 // Copyright (c) 2005 - 2010, Google Inc.
4 // All rights reserved.
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
16 // * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 // Author: Sanjay Ghemawat
34 // TODO: Test extractions for PartialMatch/Consume
41 #include <string.h> /* for memset and strcmp */
46 using pcrecpp::StringPiece
;
48 using pcrecpp::RE_Options
;
51 using pcrecpp::CRadix
;
53 static bool VERBOSE_TEST
= false;
55 // CHECK dies with a fatal error if condition is not true. It is *not*
56 // controlled by NDEBUG, so the check will be executed regardless of
57 // compilation mode. Therefore, it is safe to do things like:
58 // CHECK_EQ(fp->Write(x), 4)
59 #define CHECK(condition) do { \
61 fprintf(stderr, "%s:%d: Check failed: %s\n", \
62 __FILE__, __LINE__, #condition); \
67 #define CHECK_EQ(a, b) CHECK(a == b)
69 static void Timing1(int num_iters
) {
70 // Same pattern lots of times
71 RE
pattern("ruby:\\d+");
72 StringPiece
p("ruby:1234");
73 for (int j
= num_iters
; j
> 0; j
--) {
74 CHECK(pattern
.FullMatch(p
));
78 static void Timing2(int num_iters
) {
79 // Same pattern lots of times
80 RE
pattern("ruby:(\\d+)");
82 for (int j
= num_iters
; j
> 0; j
--) {
83 CHECK(pattern
.FullMatch("ruby:1234", &i
));
88 static void Timing3(int num_iters
) {
90 for (int j
= num_iters
; j
> 0; j
--) {
91 text_string
+= "this is another line\n";
94 RE
line_matcher(".*\n");
96 StringPiece
text(text_string
);
98 while (line_matcher
.Consume(&text
)) {
101 printf("Matched %d lines\n", counter
);
104 #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
106 static void LeakTest() {
107 // Check for memory leaks
108 unsigned long long initial_size
= 0;
109 for (int i
= 0; i
< 100000; i
++) {
111 initial_size
= VirtualProcessSize();
112 printf("Size after 50000: %llu\n", initial_size
);
114 char buf
[100]; // definitely big enough
115 sprintf(buf
, "pat%09d", i
);
118 uint64 final_size
= VirtualProcessSize();
119 printf("Size after 100000: %llu\n", final_size
);
120 const double growth
= double(final_size
- initial_size
) / final_size
;
121 printf("Growth: %0.2f%%", growth
* 100);
122 CHECK(growth
< 0.02); // Allow < 2% growth
127 static void RadixTests() {
128 printf("Testing hex\n");
130 #define CHECK_HEX(type, value) \
133 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
134 CHECK_EQ(v, 0x ## value); \
135 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
136 CHECK_EQ(v, 0x ## value); \
139 CHECK_HEX(short, 2bad
);
140 CHECK_HEX(unsigned short, 2badU
);
141 CHECK_HEX(int, dead
);
142 CHECK_HEX(unsigned int, deadU
);
143 CHECK_HEX(long, 7eadbeefL
);
144 CHECK_HEX(unsigned long, deadbeefUL
);
145 #ifdef HAVE_LONG_LONG
146 CHECK_HEX(long long, 12345678deadbeefLL
);
148 #ifdef HAVE_UNSIGNED_LONG_LONG
149 CHECK_HEX(unsigned long long, cafebabedeadbeefULL
);
154 printf("Testing octal\n");
156 #define CHECK_OCTAL(type, value) \
159 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
160 CHECK_EQ(v, 0 ## value); \
161 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
162 CHECK_EQ(v, 0 ## value); \
165 CHECK_OCTAL(short, 77777);
166 CHECK_OCTAL(unsigned short, 177777U);
167 CHECK_OCTAL(int, 17777777777);
168 CHECK_OCTAL(unsigned int, 37777777777U);
169 CHECK_OCTAL(long, 17777777777L);
170 CHECK_OCTAL(unsigned long, 37777777777UL);
171 #ifdef HAVE_LONG_LONG
172 CHECK_OCTAL(long long, 777777777777777777777LL);
174 #ifdef HAVE_UNSIGNED_LONG_LONG
175 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
180 printf("Testing decimal\n");
182 #define CHECK_DECIMAL(type, value) \
185 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
186 CHECK_EQ(v, value); \
187 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
188 CHECK_EQ(v, value); \
191 CHECK_DECIMAL(short, -1);
192 CHECK_DECIMAL(unsigned short, 9999);
193 CHECK_DECIMAL(int, -1000);
194 CHECK_DECIMAL(unsigned int, 12345U);
195 CHECK_DECIMAL(long, -10000000L);
196 CHECK_DECIMAL(unsigned long, 3083324652U);
197 #ifdef HAVE_LONG_LONG
198 CHECK_DECIMAL(long long, -100000000000000LL);
200 #ifdef HAVE_UNSIGNED_LONG_LONG
201 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
208 static void TestReplace() {
209 printf("Testing Replace\n");
214 const char *original
;
217 int global_count
; // the expected return value from ReplaceAll
219 static const ReplaceTest tests
[] = {
220 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
222 "the quick brown fox jumps over the lazy dogs.",
223 "ethay quick brown fox jumps over the lazy dogs.",
224 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
228 "paul.haahr@google.com",
229 "paul-NOSPAM.haahr@google.com",
230 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
284 "bbabbabb\nbbabbabb\nbb",
290 "bbabbabb\rbbabbabb\rbb",
296 "bbabbabb\r\nbbabbabb\r\nbb",
298 // Check empty-string matching (it's tricky!)
314 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
315 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
316 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
320 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
321 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
322 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
323 "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
326 { "", NULL
, NULL
, NULL
, NULL
, 0 }
330 const bool support_utf8
= true;
332 const bool support_utf8
= false;
335 for (const ReplaceTest
*t
= tests
; t
->original
!= NULL
; ++t
) {
336 RE
re(t
->regexp
, RE_Options(PCRE_NEWLINE_CRLF
).set_utf8(support_utf8
));
337 assert(re
.error().empty());
338 string
one(t
->original
);
339 CHECK(re
.Replace(t
->rewrite
, &one
));
340 CHECK_EQ(one
, t
->single
);
341 string
all(t
->original
);
342 const int replace_count
= re
.GlobalReplace(t
->rewrite
, &all
);
343 CHECK_EQ(all
, t
->global
);
344 CHECK_EQ(replace_count
, t
->global_count
);
347 // One final test: test \r\n replacement when we're not in CRLF mode
349 RE
re("b*", RE_Options(PCRE_NEWLINE_CR
).set_utf8(support_utf8
));
350 assert(re
.error().empty());
351 string
all("aa\r\naa\r\n");
352 CHECK_EQ(re
.GlobalReplace("bb", &all
), 9);
353 CHECK_EQ(all
, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
356 RE
re("b*", RE_Options(PCRE_NEWLINE_LF
).set_utf8(support_utf8
));
357 assert(re
.error().empty());
358 string
all("aa\r\naa\r\n");
359 CHECK_EQ(re
.GlobalReplace("bb", &all
), 9);
360 CHECK_EQ(all
, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
362 // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
363 // Alas, the answer depends on how pcre was compiled.
366 static void TestExtract() {
367 printf("Testing Extract\n");
371 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s
));
372 CHECK_EQ(s
, "kremvax!boris");
374 // check the RE interface as well
375 CHECK(RE(".*").Extract("'\\0'", "foo", &s
));
376 CHECK_EQ(s
, "'foo'");
377 CHECK(!RE("bar").Extract("'\\0'", "baz", &s
));
378 CHECK_EQ(s
, "'foo'");
381 static void TestConsume() {
382 printf("Testing Consume\n");
386 string
s(" aaa b!@#$@#$cccc");
387 StringPiece
input(s
);
389 RE
r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
390 CHECK(r
.Consume(&input
, &word
));
391 CHECK_EQ(word
, "aaa");
392 CHECK(r
.Consume(&input
, &word
));
394 CHECK(! r
.Consume(&input
, &word
));
397 static void TestFindAndConsume() {
398 printf("Testing FindAndConsume\n");
402 string
s(" aaa b!@#$@#$cccc");
403 StringPiece
input(s
);
405 RE
r("(\\w+)"); // matches a word
406 CHECK(r
.FindAndConsume(&input
, &word
));
407 CHECK_EQ(word
, "aaa");
408 CHECK(r
.FindAndConsume(&input
, &word
));
410 CHECK(r
.FindAndConsume(&input
, &word
));
411 CHECK_EQ(word
, "cccc");
412 CHECK(! r
.FindAndConsume(&input
, &word
));
415 static void TestMatchNumberPeculiarity() {
416 printf("Testing match-number peculiarity\n");
422 RE
r("(foo)|(bar)|(baz)");
423 CHECK(r
.PartialMatch("foo", &word1
, &word2
, &word3
));
424 CHECK_EQ(word1
, "foo");
427 CHECK(r
.PartialMatch("bar", &word1
, &word2
, &word3
));
429 CHECK_EQ(word2
, "bar");
431 CHECK(r
.PartialMatch("baz", &word1
, &word2
, &word3
));
434 CHECK_EQ(word3
, "baz");
435 CHECK(!r
.PartialMatch("f", &word1
, &word2
, &word3
));
438 CHECK(RE("(foo)|hello").FullMatch("hello", &a
));
442 static void TestRecursion() {
443 printf("Testing recursion\n");
445 // Get one string that passes (sometimes), one that never does.
446 string
text_good("abcdefghijk");
447 string
text_bad("acdefghijkl");
449 // According to pcretest, matching text_good against (\w+)*b
450 // requires match_limit of at least 8192, and match_recursion_limit
453 RE_Options options_ml
;
454 options_ml
.set_match_limit(8192);
455 RE
re("(\\w+)*b", options_ml
);
456 CHECK(re
.PartialMatch(text_good
) == true);
457 CHECK(re
.PartialMatch(text_bad
) == false);
458 CHECK(re
.FullMatch(text_good
) == false);
459 CHECK(re
.FullMatch(text_bad
) == false);
461 options_ml
.set_match_limit(1024);
462 RE
re2("(\\w+)*b", options_ml
);
463 CHECK(re2
.PartialMatch(text_good
) == false); // because of match_limit
464 CHECK(re2
.PartialMatch(text_bad
) == false);
465 CHECK(re2
.FullMatch(text_good
) == false);
466 CHECK(re2
.FullMatch(text_bad
) == false);
468 RE_Options options_mlr
;
469 options_mlr
.set_match_limit_recursion(50);
470 RE
re3("(\\w+)*b", options_mlr
);
471 CHECK(re3
.PartialMatch(text_good
) == true);
472 CHECK(re3
.PartialMatch(text_bad
) == false);
473 CHECK(re3
.FullMatch(text_good
) == false);
474 CHECK(re3
.FullMatch(text_bad
) == false);
476 options_mlr
.set_match_limit_recursion(10);
477 RE
re4("(\\w+)*b", options_mlr
);
478 CHECK(re4
.PartialMatch(text_good
) == false);
479 CHECK(re4
.PartialMatch(text_bad
) == false);
480 CHECK(re4
.FullMatch(text_good
) == false);
481 CHECK(re4
.FullMatch(text_bad
) == false);
484 // A meta-quoted string, interpreted as a pattern, should always match
485 // the original unquoted string.
486 static void TestQuoteMeta(string unquoted
, RE_Options options
= RE_Options()) {
487 string quoted
= RE::QuoteMeta(unquoted
);
488 RE
re(quoted
, options
);
489 CHECK(re
.FullMatch(unquoted
));
492 // A string containing meaningful regexp characters, which is then meta-
493 // quoted, should not generally match a string the unquoted string does.
494 static void NegativeTestQuoteMeta(string unquoted
, string should_not_match
,
495 RE_Options options
= RE_Options()) {
496 string quoted
= RE::QuoteMeta(unquoted
);
497 RE
re(quoted
, options
);
498 CHECK(!re
.FullMatch(should_not_match
));
501 // Tests that quoted meta characters match their original strings,
502 // and that a few things that shouldn't match indeed do not.
503 static void TestQuotaMetaSimple() {
504 TestQuoteMeta("foo");
505 TestQuoteMeta("foo.bar");
506 TestQuoteMeta("foo\\.bar");
507 TestQuoteMeta("[1-9]");
508 TestQuoteMeta("1.5-2.0?");
509 TestQuoteMeta("\\d");
510 TestQuoteMeta("Who doesn't like ice cream?");
511 TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
512 TestQuoteMeta("((?!)xxx).*yyy");
514 TestQuoteMeta(string("foo\0bar", 7));
517 static void TestQuoteMetaSimpleNegative() {
518 NegativeTestQuoteMeta("foo", "bar");
519 NegativeTestQuoteMeta("...", "bar");
520 NegativeTestQuoteMeta("\\.", ".");
521 NegativeTestQuoteMeta("\\.", "..");
522 NegativeTestQuoteMeta("(a)", "a");
523 NegativeTestQuoteMeta("(a|b)", "a");
524 NegativeTestQuoteMeta("(a|b)", "(a)");
525 NegativeTestQuoteMeta("(a|b)", "a|b");
526 NegativeTestQuoteMeta("[0-9]", "0");
527 NegativeTestQuoteMeta("[0-9]", "0-9");
528 NegativeTestQuoteMeta("[0-9]", "[9]");
529 NegativeTestQuoteMeta("((?!)xxx)", "xxx");
532 static void TestQuoteMetaLatin1() {
533 TestQuoteMeta("3\xb2 = 9");
536 static void TestQuoteMetaUtf8() {
538 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
539 TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
540 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
541 TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
542 TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
543 TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
544 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
545 NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
551 static void TestQuoteMetaAll() {
552 printf("Testing QuoteMeta\n");
553 TestQuotaMetaSimple();
554 TestQuoteMetaSimpleNegative();
555 TestQuoteMetaLatin1();
560 // Options tests contributed by
561 // Giuseppe Maxia, CTO, Stardata s.r.l.
564 static void GetOneOptionResult(
565 const char *option_name
,
572 printf("Testing Option <%s>\n", option_name
);
574 printf("/%s/ finds \"%s\" within \"%s\" \n",
580 RE(regex
,options
).FullMatch(str
, &captured
);
582 RE(regex
,options
).PartialMatch(str
, &captured
);
583 CHECK_EQ(captured
, expected
);
586 static void TestOneOption(
587 const char *option_name
,
592 bool assertive
= true) {
594 printf("Testing Option <%s>\n", option_name
);
596 printf("'%s' %s /%s/ \n",
598 (assertive
? "matches" : "doesn't match"),
602 CHECK(RE(regex
,options
).FullMatch(str
));
604 CHECK(RE(regex
,options
).PartialMatch(str
));
607 CHECK(!RE(regex
,options
).FullMatch(str
));
609 CHECK(!RE(regex
,options
).PartialMatch(str
));
613 static void Test_CASELESS() {
617 options
.set_caseless(true);
618 TestOneOption("CASELESS (class)", "HELLO", "hello", options
, false);
619 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2
.set_caseless(true), false);
620 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options
, false);
622 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
623 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
624 options
.set_caseless(false);
625 TestOneOption("no CASELESS", "HELLO", "hello", options
, false, false);
628 static void Test_MULTILINE() {
631 const char *str
= "HELLO\n" "cruel\n" "world\n";
633 options
.set_multiline(true);
634 TestOneOption("MULTILINE (class)", "^cruel$", str
, options
, false);
635 TestOneOption("MULTILINE (class2)", "^cruel$", str
, options2
.set_multiline(true), false);
636 TestOneOption("MULTILINE (function)", "^cruel$", str
, pcrecpp::MULTILINE(), false);
637 options
.set_multiline(false);
638 TestOneOption("no MULTILINE", "^cruel$", str
, options
, false, false);
641 static void Test_DOTALL() {
644 const char *str
= "HELLO\n" "cruel\n" "world";
646 options
.set_dotall(true);
647 TestOneOption("DOTALL (class)", "HELLO.*world", str
, options
, true);
648 TestOneOption("DOTALL (class2)", "HELLO.*world", str
, options2
.set_dotall(true), true);
649 TestOneOption("DOTALL (function)", "HELLO.*world", str
, pcrecpp::DOTALL(), true);
650 options
.set_dotall(false);
651 TestOneOption("no DOTALL", "HELLO.*world", str
, options
, true, false);
654 static void Test_DOLLAR_ENDONLY() {
657 const char *str
= "HELLO world\n";
659 TestOneOption("no DOLLAR_ENDONLY", "world$", str
, options
, false);
660 options
.set_dollar_endonly(true);
661 TestOneOption("DOLLAR_ENDONLY 1", "world$", str
, options
, false, false);
662 TestOneOption("DOLLAR_ENDONLY 2", "world$", str
, options2
.set_dollar_endonly(true), false, false);
665 static void Test_EXTRA() {
667 const char *str
= "HELLO";
669 options
.set_extra(true);
670 TestOneOption("EXTRA 1", "\\HELL\\O", str
, options
, true, false );
671 TestOneOption("EXTRA 2", "\\HELL\\O", str
, RE_Options().set_extra(true), true, false );
672 options
.set_extra(false);
673 TestOneOption("no EXTRA", "\\HELL\\O", str
, options
, true );
676 static void Test_EXTENDED() {
679 const char *str
= "HELLO world";
681 options
.set_extended(true);
682 TestOneOption("EXTENDED (class)", "HELLO world", str
, options
, false, false);
683 TestOneOption("EXTENDED (class2)", "HELLO world", str
, options2
.set_extended(true), false, false);
684 TestOneOption("EXTENDED (class)",
692 TestOneOption("EXTENDED (function)", "HELLO world", str
, pcrecpp::EXTENDED(), false, false);
693 TestOneOption("EXTENDED (function)",
701 options
.set_extended(false);
702 TestOneOption("no EXTENDED", "HELLO world", str
, options
, false);
705 static void Test_NO_AUTO_CAPTURE() {
707 const char *str
= "HELLO world";
710 printf("Testing Option <no NO_AUTO_CAPTURE>\n");
712 printf("parentheses capture text\n");
713 RE
re("(world|universe)$", options
);
714 CHECK(re
.Extract("\\1", str
, &captured
));
715 CHECK_EQ(captured
, "world");
716 options
.set_no_auto_capture(true);
717 printf("testing Option <NO_AUTO_CAPTURE>\n");
719 printf("parentheses do not capture text\n");
720 re
.Extract("\\1",str
, &captured
);
721 CHECK_EQ(captured
, "world");
724 static void Test_UNGREEDY() {
726 const char *str
= "HELLO, 'this' is the 'world'";
728 options
.set_ungreedy(true);
729 GetOneOptionResult("UNGREEDY 1", "('.*')", str
, options
, false, "'this'" );
730 GetOneOptionResult("UNGREEDY 2", "('.*')", str
, RE_Options().set_ungreedy(true), false, "'this'" );
731 GetOneOptionResult("UNGREEDY", "('.*?')", str
, options
, false, "'this' is the 'world'" );
733 options
.set_ungreedy(false);
734 GetOneOptionResult("no UNGREEDY", "('.*')", str
, options
, false, "'this' is the 'world'" );
735 GetOneOptionResult("no UNGREEDY", "('.*?')", str
, options
, false, "'this'" );
738 static void Test_all_options() {
739 const char *str
= "HELLO\n" "cruel\n" "world";
741 options
.set_all_options(PCRE_CASELESS
| PCRE_DOTALL
);
743 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str
, options
, false);
744 options
.set_all_options(0);
745 TestOneOption("all_options (0)", "^hello.*WORLD", str
, options
, false, false);
746 options
.set_all_options(PCRE_MULTILINE
| PCRE_EXTENDED
);
748 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str
, options
, false);
749 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
752 RE_Options(PCRE_MULTILINE
| PCRE_EXTENDED
),
755 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
763 options
.set_all_options(0);
764 TestOneOption("all_options (0)", "^ c r u e l $", str
, options
, false, false);
768 static void TestOptions() {
769 printf("Testing Options\n");
773 Test_DOLLAR_ENDONLY();
775 Test_NO_AUTO_CAPTURE();
781 static void TestConstructors() {
782 printf("Testing constructors\n");
785 options
.set_dotall(true);
786 const char *str
= "HELLO\n" "cruel\n" "world";
788 RE
orig("HELLO.*world", options
);
789 CHECK(orig
.FullMatch(str
));
792 CHECK(copy1
.FullMatch(str
));
794 RE
copy2("not a match");
795 CHECK(!copy2
.FullMatch(str
));
797 CHECK(copy2
.FullMatch(str
));
799 CHECK(copy2
.FullMatch(str
));
801 // Make sure when we assign to ourselves, nothing bad happens
805 CHECK(orig
.FullMatch(str
));
806 CHECK(copy1
.FullMatch(str
));
807 CHECK(copy2
.FullMatch(str
));
810 int main(int argc
, char** argv
) {
811 // Treat any flag as --help
812 if (argc
> 1 && argv
[1][0] == '-') {
813 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
814 " If 'timingX ###' is specified, run the given timing test\n"
815 " with the given number of iterations, rather than running\n"
816 " the default corectness test.\n", argv
[0]);
821 if ( argc
== 2 || atoi(argv
[2]) == 0) {
822 printf("timing mode needs a num-iters argument\n");
825 if (!strcmp(argv
[1], "timing1"))
826 Timing1(atoi(argv
[2]));
827 else if (!strcmp(argv
[1], "timing2"))
828 Timing2(atoi(argv
[2]));
829 else if (!strcmp(argv
[1], "timing3"))
830 Timing3(atoi(argv
[2]));
832 printf("Unknown argument '%s'\n", argv
[1]);
836 printf("PCRE C++ wrapper tests\n");
837 printf("Testing FullMatch\n");
842 /***** FullMatch with no args *****/
844 CHECK(RE("h.*o").FullMatch("hello"));
845 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
846 CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
847 CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
848 CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
849 CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
851 /***** FullMatch with args *****/
854 CHECK(RE("\\d+").FullMatch("1001"));
857 CHECK(RE("(\\d+)").FullMatch("1001", &i
));
859 CHECK(RE("(-?\\d+)").FullMatch("-123", &i
));
861 CHECK(!RE("()\\d+").FullMatch("10", &i
));
862 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
865 // Digits surrounding integer-arg
866 CHECK(RE("1(\\d*)4").FullMatch("1234", &i
));
868 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i
));
870 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i
));
872 CHECK(RE("(\\d)").PartialMatch("1234", &i
));
874 CHECK(RE("(-\\d)").PartialMatch("-1234", &i
));
878 CHECK(RE("h(.*)o").FullMatch("hello", &s
));
879 CHECK_EQ(s
, string("ell"));
883 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp
, &i
));
884 CHECK_EQ(sp
.size(), 4);
885 CHECK(memcmp(sp
.data(), "ruby", 4) == 0);
889 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s
, &i
));
890 CHECK_EQ(s
, string("ruby"));
893 // Ignore non-void* NULL arg
894 CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL
));
895 CHECK(RE("h(.*)o").FullMatch("hello", (string
*)NULL
));
896 CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece
*)NULL
));
897 CHECK(RE("(.*)").FullMatch("1234", (int*)NULL
));
898 #ifdef HAVE_LONG_LONG
899 CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL
));
901 CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL
));
902 CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL
));
904 // Fail on non-void* NULL arg if the match doesn't parse for the given type.
905 CHECK(!RE("h(.*)lo").FullMatch("hello", &s
, (char*)NULL
));
906 CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL
));
907 CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL
));
908 CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL
));
909 CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL
));
912 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s
, (void*)NULL
, &i
));
913 CHECK_EQ(s
, string("ruby"));
919 CHECK(RE("(H)ello").FullMatch("Hello", &c
));
924 CHECK(RE("(H)ello").FullMatch("Hello", &c
));
925 CHECK_EQ(c
, static_cast<unsigned char>('H'));
929 CHECK(RE("(-?\\d+)").FullMatch("100", &v
)); CHECK_EQ(v
, 100);
930 CHECK(RE("(-?\\d+)").FullMatch("-100", &v
)); CHECK_EQ(v
, -100);
931 CHECK(RE("(-?\\d+)").FullMatch("32767", &v
)); CHECK_EQ(v
, 32767);
932 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v
)); CHECK_EQ(v
, -32768);
933 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v
));
934 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v
));
938 CHECK(RE("(\\d+)").FullMatch("100", &v
)); CHECK_EQ(v
, 100);
939 CHECK(RE("(\\d+)").FullMatch("32767", &v
)); CHECK_EQ(v
, 32767);
940 CHECK(RE("(\\d+)").FullMatch("65535", &v
)); CHECK_EQ(v
, 65535);
941 CHECK(!RE("(\\d+)").FullMatch("65536", &v
));
945 static const int max_value
= 0x7fffffff;
946 static const int min_value
= -max_value
- 1;
947 CHECK(RE("(-?\\d+)").FullMatch("100", &v
)); CHECK_EQ(v
, 100);
948 CHECK(RE("(-?\\d+)").FullMatch("-100", &v
)); CHECK_EQ(v
, -100);
949 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v
)); CHECK_EQ(v
, max_value
);
950 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v
)); CHECK_EQ(v
, min_value
);
951 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v
));
952 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v
));
956 static const unsigned int max_value
= 0xfffffffful
;
957 CHECK(RE("(\\d+)").FullMatch("100", &v
)); CHECK_EQ(v
, 100);
958 CHECK(RE("(\\d+)").FullMatch("4294967295", &v
)); CHECK_EQ(v
, max_value
);
959 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v
));
961 #ifdef HAVE_LONG_LONG
962 # if defined(__MINGW__) || defined(__MINGW32__)
971 static const long long max_value
= 0x7fffffffffffffffLL
;
972 static const long long min_value
= -max_value
- 1;
973 char buf
[32]; // definitely big enough for a long long
975 CHECK(RE("(-?\\d+)").FullMatch("100", &v
)); CHECK_EQ(v
, 100);
976 CHECK(RE("(-?\\d+)").FullMatch("-100",&v
)); CHECK_EQ(v
, -100);
978 sprintf(buf
, LLD
, max_value
);
979 CHECK(RE("(-?\\d+)").FullMatch(buf
,&v
)); CHECK_EQ(v
, max_value
);
981 sprintf(buf
, LLD
, min_value
);
982 CHECK(RE("(-?\\d+)").FullMatch(buf
,&v
)); CHECK_EQ(v
, min_value
);
984 sprintf(buf
, LLD
, max_value
);
985 assert(buf
[strlen(buf
)-1] != '9');
986 buf
[strlen(buf
)-1]++;
987 CHECK(!RE("(-?\\d+)").FullMatch(buf
, &v
));
989 sprintf(buf
, LLD
, min_value
);
990 assert(buf
[strlen(buf
)-1] != '9');
991 buf
[strlen(buf
)-1]++;
992 CHECK(!RE("(-?\\d+)").FullMatch(buf
, &v
));
995 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
997 unsigned long long v
;
999 static const unsigned long long max_value
= 0xffffffffffffffffULL
;
1000 char buf
[32]; // definitely big enough for a unsigned long long
1002 CHECK(RE("(-?\\d+)").FullMatch("100",&v
)); CHECK_EQ(v
, 100);
1003 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2
)); CHECK_EQ(v2
, -100);
1005 sprintf(buf
, LLU
, max_value
);
1006 CHECK(RE("(-?\\d+)").FullMatch(buf
,&v
)); CHECK_EQ(v
, max_value
);
1008 assert(buf
[strlen(buf
)-1] != '9');
1009 buf
[strlen(buf
)-1]++;
1010 CHECK(!RE("(-?\\d+)").FullMatch(buf
, &v
));
1015 CHECK(RE("(.*)").FullMatch("100", &v
));
1016 CHECK(RE("(.*)").FullMatch("-100.", &v
));
1017 CHECK(RE("(.*)").FullMatch("1e23", &v
));
1021 CHECK(RE("(.*)").FullMatch("100", &v
));
1022 CHECK(RE("(.*)").FullMatch("-100.", &v
));
1023 CHECK(RE("(.*)").FullMatch("1e23", &v
));
1026 // Check that matching is fully anchored
1027 CHECK(!RE("(\\d+)").FullMatch("x1001", &i
));
1028 CHECK(!RE("(\\d+)").FullMatch("1001x", &i
));
1029 CHECK(RE("x(\\d+)").FullMatch("x1001", &i
)); CHECK_EQ(i
, 1001);
1030 CHECK(RE("(\\d+)x").FullMatch("1001x", &i
)); CHECK_EQ(i
, 1001);
1033 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1034 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1035 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1038 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1039 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1040 CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1041 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1043 // Check full-match handling (needs '$' tacked on internally)
1044 CHECK(RE("fo|foo").FullMatch("fo"));
1045 CHECK(RE("fo|foo").FullMatch("foo"));
1046 CHECK(RE("fo|foo$").FullMatch("fo"));
1047 CHECK(RE("fo|foo$").FullMatch("foo"));
1048 CHECK(RE("foo$").FullMatch("foo"));
1049 CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1050 CHECK(!RE("fo|bar").FullMatch("fox"));
1052 // Uncomment the following if we change the handling of '$' to
1053 // prevent it from matching a trailing newline
1055 // Check that we don't get bitten by pcre's special handling of a
1056 // '\n' at the end of the string matching '$'
1057 CHECK(!RE("foo$").PartialMatch("foo\n"));
1062 CHECK(RE("").FullMatch(""));
1064 memset(a
, 0, sizeof(0));
1065 CHECK(RE("(\\d){1}").FullMatch("1",
1069 memset(a
, 0, sizeof(0));
1070 CHECK(RE("(\\d)(\\d)").FullMatch("12",
1075 memset(a
, 0, sizeof(0));
1076 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1077 &a
[0], &a
[1], &a
[2]));
1082 memset(a
, 0, sizeof(0));
1083 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1084 &a
[0], &a
[1], &a
[2], &a
[3]));
1090 memset(a
, 0, sizeof(0));
1091 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1092 &a
[0], &a
[1], &a
[2],
1100 memset(a
, 0, sizeof(0));
1101 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1102 &a
[0], &a
[1], &a
[2],
1103 &a
[3], &a
[4], &a
[5]));
1111 memset(a
, 0, sizeof(0));
1112 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1113 &a
[0], &a
[1], &a
[2], &a
[3],
1114 &a
[4], &a
[5], &a
[6]));
1123 memset(a
, 0, sizeof(0));
1124 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1125 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1127 &a
[0], &a
[1], &a
[2], &a
[3],
1128 &a
[4], &a
[5], &a
[6], &a
[7],
1129 &a
[8], &a
[9], &a
[10], &a
[11],
1130 &a
[12], &a
[13], &a
[14], &a
[15]));
1148 /***** PartialMatch *****/
1150 printf("Testing PartialMatch\n");
1152 CHECK(RE("h.*o").PartialMatch("hello"));
1153 CHECK(RE("h.*o").PartialMatch("othello"));
1154 CHECK(RE("h.*o").PartialMatch("hello!"));
1155 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1157 /***** other tests *****/
1163 TestFindAndConsume();
1165 TestMatchNumberPeculiarity();
1167 // Check the pattern() accessor
1169 const string kPattern
= "http://([^/]+)/.*";
1170 const RE
re(kPattern
);
1171 CHECK_EQ(kPattern
, re
.pattern());
1174 // Check RE error field.
1177 CHECK(re
.error().empty()); // Must have no error
1181 // Check UTF-8 handling
1183 printf("Testing UTF-8 handling\n");
1185 // Three Japanese characters (nihongo)
1186 const unsigned char utf8_string
[] = {
1187 0xe6, 0x97, 0xa5, // 65e5
1188 0xe6, 0x9c, 0xac, // 627c
1189 0xe8, 0xaa, 0x9e, // 8a9e
1192 const unsigned char utf8_pattern
[] = {
1194 0xe6, 0x9c, 0xac, // 627c
1199 // Both should match in either mode, bytes or UTF-8
1200 RE
re_test1(".........");
1201 CHECK(re_test1
.FullMatch(utf8_string
));
1202 RE
re_test2("...", pcrecpp::UTF8());
1203 CHECK(re_test2
.FullMatch(utf8_string
));
1205 // Check that '.' matches one byte or UTF-8 character
1206 // according to the mode.
1209 CHECK(re_test3
.PartialMatch(utf8_string
, &ss
));
1210 CHECK_EQ(ss
, string("\xe6"));
1211 RE
re_test4("(.)", pcrecpp::UTF8());
1212 CHECK(re_test4
.PartialMatch(utf8_string
, &ss
));
1213 CHECK_EQ(ss
, string("\xe6\x97\xa5"));
1215 // Check that string matches itself in either mode
1216 RE
re_test5(utf8_string
);
1217 CHECK(re_test5
.FullMatch(utf8_string
));
1218 RE
re_test6(utf8_string
, pcrecpp::UTF8());
1219 CHECK(re_test6
.FullMatch(utf8_string
));
1221 // Check that pattern matches string only in UTF8 mode
1222 RE
re_test7(utf8_pattern
);
1223 CHECK(!re_test7
.FullMatch(utf8_string
));
1224 RE
re_test8(utf8_pattern
, pcrecpp::UTF8());
1225 CHECK(re_test8
.FullMatch(utf8_string
));
1228 // Check that ungreedy, UTF8 regular expressions don't match when they
1229 // oughtn't -- see bug 82246.
1231 // This code always worked.
1232 const char* pattern
= "\\w+X";
1233 const string target
= "a aX";
1234 RE
match_sentence(pattern
);
1235 RE
match_sentence_re(pattern
, pcrecpp::UTF8());
1237 CHECK(!match_sentence
.FullMatch(target
));
1238 CHECK(!match_sentence_re
.FullMatch(target
));
1242 const char* pattern
= "(?U)\\w+X";
1243 const string target
= "a aX";
1244 RE
match_sentence(pattern
);
1245 RE
match_sentence_re(pattern
, pcrecpp::UTF8());
1247 CHECK(!match_sentence
.FullMatch(target
));
1248 CHECK(!match_sentence_re
.FullMatch(target
));
1250 #endif /* def SUPPORT_UTF8 */
1252 printf("Testing error reporting\n");
1254 { RE
re("a\\1"); CHECK(!re
.error().empty()); }
1257 CHECK(!re
.error().empty());
1261 CHECK(!re
.error().empty());
1264 RE
re("a[[:foobar:]]");
1265 CHECK(!re
.error().empty());
1269 CHECK(!re
.error().empty());
1273 CHECK(!re
.error().empty());
1276 // Test that recursion is stopped
1280 if (getenv("VERBOSE_TEST") != NULL
)
1281 VERBOSE_TEST
= true;
1284 // Test the constructors