Busybox: Upgrade to 1.21.1 (stable). lsof active.
[tomato.git] / release / src / router / pcre / pcrecpp_unittest.cc
blob92cae8fbea57aed29b7327ad4d8a123f6ac1e53e
1 // -*- coding: utf-8 -*-
2 //
3 // Copyright (c) 2005 - 2010, Google Inc.
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 // * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 // Author: Sanjay Ghemawat
34 // TODO: Test extractions for PartialMatch/Consume
36 #ifdef HAVE_CONFIG_H
37 #include "config.h"
38 #endif
40 #include <stdio.h>
41 #include <string.h> /* for memset and strcmp */
42 #include <cassert>
43 #include <vector>
44 #include "pcrecpp.h"
46 using pcrecpp::StringPiece;
47 using pcrecpp::RE;
48 using pcrecpp::RE_Options;
49 using pcrecpp::Hex;
50 using pcrecpp::Octal;
51 using pcrecpp::CRadix;
53 static bool VERBOSE_TEST = false;
55 // CHECK dies with a fatal error if condition is not true. It is *not*
56 // controlled by NDEBUG, so the check will be executed regardless of
57 // compilation mode. Therefore, it is safe to do things like:
58 // CHECK_EQ(fp->Write(x), 4)
59 #define CHECK(condition) do { \
60 if (!(condition)) { \
61 fprintf(stderr, "%s:%d: Check failed: %s\n", \
62 __FILE__, __LINE__, #condition); \
63 exit(1); \
64 } \
65 } while (0)
67 #define CHECK_EQ(a, b) CHECK(a == b)
69 static void Timing1(int num_iters) {
70 // Same pattern lots of times
71 RE pattern("ruby:\\d+");
72 StringPiece p("ruby:1234");
73 for (int j = num_iters; j > 0; j--) {
74 CHECK(pattern.FullMatch(p));
78 static void Timing2(int num_iters) {
79 // Same pattern lots of times
80 RE pattern("ruby:(\\d+)");
81 int i;
82 for (int j = num_iters; j > 0; j--) {
83 CHECK(pattern.FullMatch("ruby:1234", &i));
84 CHECK_EQ(i, 1234);
88 static void Timing3(int num_iters) {
89 string text_string;
90 for (int j = num_iters; j > 0; j--) {
91 text_string += "this is another line\n";
94 RE line_matcher(".*\n");
95 string line;
96 StringPiece text(text_string);
97 int counter = 0;
98 while (line_matcher.Consume(&text)) {
99 counter++;
101 printf("Matched %d lines\n", counter);
104 #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
106 static void LeakTest() {
107 // Check for memory leaks
108 unsigned long long initial_size = 0;
109 for (int i = 0; i < 100000; i++) {
110 if (i == 50000) {
111 initial_size = VirtualProcessSize();
112 printf("Size after 50000: %llu\n", initial_size);
114 char buf[100]; // definitely big enough
115 sprintf(buf, "pat%09d", i);
116 RE newre(buf);
118 uint64 final_size = VirtualProcessSize();
119 printf("Size after 100000: %llu\n", final_size);
120 const double growth = double(final_size - initial_size) / final_size;
121 printf("Growth: %0.2f%%", growth * 100);
122 CHECK(growth < 0.02); // Allow < 2% growth
125 #endif
127 static void RadixTests() {
128 printf("Testing hex\n");
130 #define CHECK_HEX(type, value) \
131 do { \
132 type v; \
133 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
134 CHECK_EQ(v, 0x ## value); \
135 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
136 CHECK_EQ(v, 0x ## value); \
137 } while(0)
139 CHECK_HEX(short, 2bad);
140 CHECK_HEX(unsigned short, 2badU);
141 CHECK_HEX(int, dead);
142 CHECK_HEX(unsigned int, deadU);
143 CHECK_HEX(long, 7eadbeefL);
144 CHECK_HEX(unsigned long, deadbeefUL);
145 #ifdef HAVE_LONG_LONG
146 CHECK_HEX(long long, 12345678deadbeefLL);
147 #endif
148 #ifdef HAVE_UNSIGNED_LONG_LONG
149 CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
150 #endif
152 #undef CHECK_HEX
154 printf("Testing octal\n");
156 #define CHECK_OCTAL(type, value) \
157 do { \
158 type v; \
159 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
160 CHECK_EQ(v, 0 ## value); \
161 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
162 CHECK_EQ(v, 0 ## value); \
163 } while(0)
165 CHECK_OCTAL(short, 77777);
166 CHECK_OCTAL(unsigned short, 177777U);
167 CHECK_OCTAL(int, 17777777777);
168 CHECK_OCTAL(unsigned int, 37777777777U);
169 CHECK_OCTAL(long, 17777777777L);
170 CHECK_OCTAL(unsigned long, 37777777777UL);
171 #ifdef HAVE_LONG_LONG
172 CHECK_OCTAL(long long, 777777777777777777777LL);
173 #endif
174 #ifdef HAVE_UNSIGNED_LONG_LONG
175 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
176 #endif
178 #undef CHECK_OCTAL
180 printf("Testing decimal\n");
182 #define CHECK_DECIMAL(type, value) \
183 do { \
184 type v; \
185 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
186 CHECK_EQ(v, value); \
187 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
188 CHECK_EQ(v, value); \
189 } while(0)
191 CHECK_DECIMAL(short, -1);
192 CHECK_DECIMAL(unsigned short, 9999);
193 CHECK_DECIMAL(int, -1000);
194 CHECK_DECIMAL(unsigned int, 12345U);
195 CHECK_DECIMAL(long, -10000000L);
196 CHECK_DECIMAL(unsigned long, 3083324652U);
197 #ifdef HAVE_LONG_LONG
198 CHECK_DECIMAL(long long, -100000000000000LL);
199 #endif
200 #ifdef HAVE_UNSIGNED_LONG_LONG
201 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
202 #endif
204 #undef CHECK_DECIMAL
208 static void TestReplace() {
209 printf("Testing Replace\n");
211 struct ReplaceTest {
212 const char *regexp;
213 const char *rewrite;
214 const char *original;
215 const char *single;
216 const char *global;
217 int global_count; // the expected return value from ReplaceAll
219 static const ReplaceTest tests[] = {
220 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
221 "\\2\\1ay",
222 "the quick brown fox jumps over the lazy dogs.",
223 "ethay quick brown fox jumps over the lazy dogs.",
224 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
225 9 },
226 { "\\w+",
227 "\\0-NOSPAM",
228 "paul.haahr@google.com",
229 "paul-NOSPAM.haahr@google.com",
230 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
231 4 },
232 { "^",
233 "(START)",
234 "foo",
235 "(START)foo",
236 "(START)foo",
237 1 },
238 { "^",
239 "(START)",
241 "(START)",
242 "(START)",
243 1 },
244 { "$",
245 "(END)",
247 "(END)",
248 "(END)",
249 1 },
250 { "b",
251 "bb",
252 "ababababab",
253 "abbabababab",
254 "abbabbabbabbabb",
255 5 },
256 { "b",
257 "bb",
258 "bbbbbb",
259 "bbbbbbb",
260 "bbbbbbbbbbbb",
261 6 },
262 { "b+",
263 "bb",
264 "bbbbbb",
265 "bb",
266 "bb",
267 1 },
268 { "b*",
269 "bb",
270 "bbbbbb",
271 "bb",
272 "bbbb",
273 2 },
274 { "b*",
275 "bb",
276 "aaaaa",
277 "bbaaaaa",
278 "bbabbabbabbabbabb",
279 6 },
280 { "b*",
281 "bb",
282 "aa\naa\n",
283 "bbaa\naa\n",
284 "bbabbabb\nbbabbabb\nbb",
285 7 },
286 { "b*",
287 "bb",
288 "aa\raa\r",
289 "bbaa\raa\r",
290 "bbabbabb\rbbabbabb\rbb",
291 7 },
292 { "b*",
293 "bb",
294 "aa\r\naa\r\n",
295 "bbaa\r\naa\r\n",
296 "bbabbabb\r\nbbabbabb\r\nbb",
297 7 },
298 // Check empty-string matching (it's tricky!)
299 { "aa|b*",
300 "@",
301 "aa",
302 "@",
303 "@@",
304 2 },
305 { "b*|aa",
306 "@",
307 "aa",
308 "@aa",
309 "@@@",
310 3 },
311 #ifdef SUPPORT_UTF8
312 { "b*",
313 "bb",
314 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
315 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
316 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
317 5 },
318 { "b*",
319 "bb",
320 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
321 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
322 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
323 "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
324 9 },
325 #endif
326 { "", NULL, NULL, NULL, NULL, 0 }
329 #ifdef SUPPORT_UTF8
330 const bool support_utf8 = true;
331 #else
332 const bool support_utf8 = false;
333 #endif
335 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
336 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
337 assert(re.error().empty());
338 string one(t->original);
339 CHECK(re.Replace(t->rewrite, &one));
340 CHECK_EQ(one, t->single);
341 string all(t->original);
342 const int replace_count = re.GlobalReplace(t->rewrite, &all);
343 CHECK_EQ(all, t->global);
344 CHECK_EQ(replace_count, t->global_count);
347 // One final test: test \r\n replacement when we're not in CRLF mode
349 RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
350 assert(re.error().empty());
351 string all("aa\r\naa\r\n");
352 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
353 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
356 RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
357 assert(re.error().empty());
358 string all("aa\r\naa\r\n");
359 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
360 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
362 // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
363 // Alas, the answer depends on how pcre was compiled.
366 static void TestExtract() {
367 printf("Testing Extract\n");
369 string s;
371 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
372 CHECK_EQ(s, "kremvax!boris");
374 // check the RE interface as well
375 CHECK(RE(".*").Extract("'\\0'", "foo", &s));
376 CHECK_EQ(s, "'foo'");
377 CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
378 CHECK_EQ(s, "'foo'");
381 static void TestConsume() {
382 printf("Testing Consume\n");
384 string word;
386 string s(" aaa b!@#$@#$cccc");
387 StringPiece input(s);
389 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
390 CHECK(r.Consume(&input, &word));
391 CHECK_EQ(word, "aaa");
392 CHECK(r.Consume(&input, &word));
393 CHECK_EQ(word, "b");
394 CHECK(! r.Consume(&input, &word));
397 static void TestFindAndConsume() {
398 printf("Testing FindAndConsume\n");
400 string word;
402 string s(" aaa b!@#$@#$cccc");
403 StringPiece input(s);
405 RE r("(\\w+)"); // matches a word
406 CHECK(r.FindAndConsume(&input, &word));
407 CHECK_EQ(word, "aaa");
408 CHECK(r.FindAndConsume(&input, &word));
409 CHECK_EQ(word, "b");
410 CHECK(r.FindAndConsume(&input, &word));
411 CHECK_EQ(word, "cccc");
412 CHECK(! r.FindAndConsume(&input, &word));
415 static void TestMatchNumberPeculiarity() {
416 printf("Testing match-number peculiarity\n");
418 string word1;
419 string word2;
420 string word3;
422 RE r("(foo)|(bar)|(baz)");
423 CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
424 CHECK_EQ(word1, "foo");
425 CHECK_EQ(word2, "");
426 CHECK_EQ(word3, "");
427 CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
428 CHECK_EQ(word1, "");
429 CHECK_EQ(word2, "bar");
430 CHECK_EQ(word3, "");
431 CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
432 CHECK_EQ(word1, "");
433 CHECK_EQ(word2, "");
434 CHECK_EQ(word3, "baz");
435 CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
437 string a;
438 CHECK(RE("(foo)|hello").FullMatch("hello", &a));
439 CHECK_EQ(a, "");
442 static void TestRecursion() {
443 printf("Testing recursion\n");
445 // Get one string that passes (sometimes), one that never does.
446 string text_good("abcdefghijk");
447 string text_bad("acdefghijkl");
449 // According to pcretest, matching text_good against (\w+)*b
450 // requires match_limit of at least 8192, and match_recursion_limit
451 // of at least 37.
453 RE_Options options_ml;
454 options_ml.set_match_limit(8192);
455 RE re("(\\w+)*b", options_ml);
456 CHECK(re.PartialMatch(text_good) == true);
457 CHECK(re.PartialMatch(text_bad) == false);
458 CHECK(re.FullMatch(text_good) == false);
459 CHECK(re.FullMatch(text_bad) == false);
461 options_ml.set_match_limit(1024);
462 RE re2("(\\w+)*b", options_ml);
463 CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
464 CHECK(re2.PartialMatch(text_bad) == false);
465 CHECK(re2.FullMatch(text_good) == false);
466 CHECK(re2.FullMatch(text_bad) == false);
468 RE_Options options_mlr;
469 options_mlr.set_match_limit_recursion(50);
470 RE re3("(\\w+)*b", options_mlr);
471 CHECK(re3.PartialMatch(text_good) == true);
472 CHECK(re3.PartialMatch(text_bad) == false);
473 CHECK(re3.FullMatch(text_good) == false);
474 CHECK(re3.FullMatch(text_bad) == false);
476 options_mlr.set_match_limit_recursion(10);
477 RE re4("(\\w+)*b", options_mlr);
478 CHECK(re4.PartialMatch(text_good) == false);
479 CHECK(re4.PartialMatch(text_bad) == false);
480 CHECK(re4.FullMatch(text_good) == false);
481 CHECK(re4.FullMatch(text_bad) == false);
484 // A meta-quoted string, interpreted as a pattern, should always match
485 // the original unquoted string.
486 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
487 string quoted = RE::QuoteMeta(unquoted);
488 RE re(quoted, options);
489 CHECK(re.FullMatch(unquoted));
492 // A string containing meaningful regexp characters, which is then meta-
493 // quoted, should not generally match a string the unquoted string does.
494 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
495 RE_Options options = RE_Options()) {
496 string quoted = RE::QuoteMeta(unquoted);
497 RE re(quoted, options);
498 CHECK(!re.FullMatch(should_not_match));
501 // Tests that quoted meta characters match their original strings,
502 // and that a few things that shouldn't match indeed do not.
503 static void TestQuotaMetaSimple() {
504 TestQuoteMeta("foo");
505 TestQuoteMeta("foo.bar");
506 TestQuoteMeta("foo\\.bar");
507 TestQuoteMeta("[1-9]");
508 TestQuoteMeta("1.5-2.0?");
509 TestQuoteMeta("\\d");
510 TestQuoteMeta("Who doesn't like ice cream?");
511 TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
512 TestQuoteMeta("((?!)xxx).*yyy");
513 TestQuoteMeta("([");
514 TestQuoteMeta(string("foo\0bar", 7));
517 static void TestQuoteMetaSimpleNegative() {
518 NegativeTestQuoteMeta("foo", "bar");
519 NegativeTestQuoteMeta("...", "bar");
520 NegativeTestQuoteMeta("\\.", ".");
521 NegativeTestQuoteMeta("\\.", "..");
522 NegativeTestQuoteMeta("(a)", "a");
523 NegativeTestQuoteMeta("(a|b)", "a");
524 NegativeTestQuoteMeta("(a|b)", "(a)");
525 NegativeTestQuoteMeta("(a|b)", "a|b");
526 NegativeTestQuoteMeta("[0-9]", "0");
527 NegativeTestQuoteMeta("[0-9]", "0-9");
528 NegativeTestQuoteMeta("[0-9]", "[9]");
529 NegativeTestQuoteMeta("((?!)xxx)", "xxx");
532 static void TestQuoteMetaLatin1() {
533 TestQuoteMeta("3\xb2 = 9");
536 static void TestQuoteMetaUtf8() {
537 #ifdef SUPPORT_UTF8
538 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
539 TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
540 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
541 TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
542 TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
543 TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
544 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
545 NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
546 "27\\\xc2\\\xb0",
547 pcrecpp::UTF8());
548 #endif
551 static void TestQuoteMetaAll() {
552 printf("Testing QuoteMeta\n");
553 TestQuotaMetaSimple();
554 TestQuoteMetaSimpleNegative();
555 TestQuoteMetaLatin1();
556 TestQuoteMetaUtf8();
560 // Options tests contributed by
561 // Giuseppe Maxia, CTO, Stardata s.r.l.
562 // July 2005
564 static void GetOneOptionResult(
565 const char *option_name,
566 const char *regex,
567 const char *str,
568 RE_Options options,
569 bool full,
570 string expected) {
572 printf("Testing Option <%s>\n", option_name);
573 if(VERBOSE_TEST)
574 printf("/%s/ finds \"%s\" within \"%s\" \n",
575 regex,
576 expected.c_str(),
577 str);
578 string captured("");
579 if (full)
580 RE(regex,options).FullMatch(str, &captured);
581 else
582 RE(regex,options).PartialMatch(str, &captured);
583 CHECK_EQ(captured, expected);
586 static void TestOneOption(
587 const char *option_name,
588 const char *regex,
589 const char *str,
590 RE_Options options,
591 bool full,
592 bool assertive = true) {
594 printf("Testing Option <%s>\n", option_name);
595 if (VERBOSE_TEST)
596 printf("'%s' %s /%s/ \n",
597 str,
598 (assertive? "matches" : "doesn't match"),
599 regex);
600 if (assertive) {
601 if (full)
602 CHECK(RE(regex,options).FullMatch(str));
603 else
604 CHECK(RE(regex,options).PartialMatch(str));
605 } else {
606 if (full)
607 CHECK(!RE(regex,options).FullMatch(str));
608 else
609 CHECK(!RE(regex,options).PartialMatch(str));
613 static void Test_CASELESS() {
614 RE_Options options;
615 RE_Options options2;
617 options.set_caseless(true);
618 TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
619 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
620 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
622 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
623 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
624 options.set_caseless(false);
625 TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
628 static void Test_MULTILINE() {
629 RE_Options options;
630 RE_Options options2;
631 const char *str = "HELLO\n" "cruel\n" "world\n";
633 options.set_multiline(true);
634 TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
635 TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
636 TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
637 options.set_multiline(false);
638 TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
641 static void Test_DOTALL() {
642 RE_Options options;
643 RE_Options options2;
644 const char *str = "HELLO\n" "cruel\n" "world";
646 options.set_dotall(true);
647 TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
648 TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
649 TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
650 options.set_dotall(false);
651 TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
654 static void Test_DOLLAR_ENDONLY() {
655 RE_Options options;
656 RE_Options options2;
657 const char *str = "HELLO world\n";
659 TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
660 options.set_dollar_endonly(true);
661 TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
662 TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
665 static void Test_EXTRA() {
666 RE_Options options;
667 const char *str = "HELLO";
669 options.set_extra(true);
670 TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
671 TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
672 options.set_extra(false);
673 TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
676 static void Test_EXTENDED() {
677 RE_Options options;
678 RE_Options options2;
679 const char *str = "HELLO world";
681 options.set_extended(true);
682 TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
683 TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
684 TestOneOption("EXTENDED (class)",
685 "^ HE L{2} O "
686 "\\s+ "
687 "\\w+ $ ",
688 str,
689 options,
690 false);
692 TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
693 TestOneOption("EXTENDED (function)",
694 "^ HE L{2} O "
695 "\\s+ "
696 "\\w+ $ ",
697 str,
698 pcrecpp::EXTENDED(),
699 false);
701 options.set_extended(false);
702 TestOneOption("no EXTENDED", "HELLO world", str, options, false);
705 static void Test_NO_AUTO_CAPTURE() {
706 RE_Options options;
707 const char *str = "HELLO world";
708 string captured;
710 printf("Testing Option <no NO_AUTO_CAPTURE>\n");
711 if (VERBOSE_TEST)
712 printf("parentheses capture text\n");
713 RE re("(world|universe)$", options);
714 CHECK(re.Extract("\\1", str , &captured));
715 CHECK_EQ(captured, "world");
716 options.set_no_auto_capture(true);
717 printf("testing Option <NO_AUTO_CAPTURE>\n");
718 if (VERBOSE_TEST)
719 printf("parentheses do not capture text\n");
720 re.Extract("\\1",str, &captured );
721 CHECK_EQ(captured, "world");
724 static void Test_UNGREEDY() {
725 RE_Options options;
726 const char *str = "HELLO, 'this' is the 'world'";
728 options.set_ungreedy(true);
729 GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
730 GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
731 GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
733 options.set_ungreedy(false);
734 GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
735 GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
738 static void Test_all_options() {
739 const char *str = "HELLO\n" "cruel\n" "world";
740 RE_Options options;
741 options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
743 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
744 options.set_all_options(0);
745 TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
746 options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
748 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
749 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
750 " ^ c r u e l $ ",
751 str,
752 RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
753 false);
755 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
756 " ^ c r u e l $ ",
757 str,
758 RE_Options()
759 .set_multiline(true)
760 .set_extended(true),
761 false);
763 options.set_all_options(0);
764 TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
768 static void TestOptions() {
769 printf("Testing Options\n");
770 Test_CASELESS();
771 Test_MULTILINE();
772 Test_DOTALL();
773 Test_DOLLAR_ENDONLY();
774 Test_EXTENDED();
775 Test_NO_AUTO_CAPTURE();
776 Test_UNGREEDY();
777 Test_EXTRA();
778 Test_all_options();
781 static void TestConstructors() {
782 printf("Testing constructors\n");
784 RE_Options options;
785 options.set_dotall(true);
786 const char *str = "HELLO\n" "cruel\n" "world";
788 RE orig("HELLO.*world", options);
789 CHECK(orig.FullMatch(str));
791 RE copy1(orig);
792 CHECK(copy1.FullMatch(str));
794 RE copy2("not a match");
795 CHECK(!copy2.FullMatch(str));
796 copy2 = copy1;
797 CHECK(copy2.FullMatch(str));
798 copy2 = orig;
799 CHECK(copy2.FullMatch(str));
801 // Make sure when we assign to ourselves, nothing bad happens
802 orig = orig;
803 copy1 = copy1;
804 copy2 = copy2;
805 CHECK(orig.FullMatch(str));
806 CHECK(copy1.FullMatch(str));
807 CHECK(copy2.FullMatch(str));
810 int main(int argc, char** argv) {
811 // Treat any flag as --help
812 if (argc > 1 && argv[1][0] == '-') {
813 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
814 " If 'timingX ###' is specified, run the given timing test\n"
815 " with the given number of iterations, rather than running\n"
816 " the default corectness test.\n", argv[0]);
817 return 0;
820 if (argc > 1) {
821 if ( argc == 2 || atoi(argv[2]) == 0) {
822 printf("timing mode needs a num-iters argument\n");
823 return 1;
825 if (!strcmp(argv[1], "timing1"))
826 Timing1(atoi(argv[2]));
827 else if (!strcmp(argv[1], "timing2"))
828 Timing2(atoi(argv[2]));
829 else if (!strcmp(argv[1], "timing3"))
830 Timing3(atoi(argv[2]));
831 else
832 printf("Unknown argument '%s'\n", argv[1]);
833 return 0;
836 printf("PCRE C++ wrapper tests\n");
837 printf("Testing FullMatch\n");
839 int i;
840 string s;
842 /***** FullMatch with no args *****/
844 CHECK(RE("h.*o").FullMatch("hello"));
845 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
846 CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
847 CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
848 CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
849 CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
851 /***** FullMatch with args *****/
853 // Zero-arg
854 CHECK(RE("\\d+").FullMatch("1001"));
856 // Single-arg
857 CHECK(RE("(\\d+)").FullMatch("1001", &i));
858 CHECK_EQ(i, 1001);
859 CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
860 CHECK_EQ(i, -123);
861 CHECK(!RE("()\\d+").FullMatch("10", &i));
862 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
863 &i));
865 // Digits surrounding integer-arg
866 CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
867 CHECK_EQ(i, 23);
868 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
869 CHECK_EQ(i, 1);
870 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
871 CHECK_EQ(i, -1);
872 CHECK(RE("(\\d)").PartialMatch("1234", &i));
873 CHECK_EQ(i, 1);
874 CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
875 CHECK_EQ(i, -1);
877 // String-arg
878 CHECK(RE("h(.*)o").FullMatch("hello", &s));
879 CHECK_EQ(s, string("ell"));
881 // StringPiece-arg
882 StringPiece sp;
883 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
884 CHECK_EQ(sp.size(), 4);
885 CHECK(memcmp(sp.data(), "ruby", 4) == 0);
886 CHECK_EQ(i, 1234);
888 // Multi-arg
889 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
890 CHECK_EQ(s, string("ruby"));
891 CHECK_EQ(i, 1234);
893 // Ignore non-void* NULL arg
894 CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
895 CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
896 CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
897 CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
898 #ifdef HAVE_LONG_LONG
899 CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
900 #endif
901 CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
902 CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
904 // Fail on non-void* NULL arg if the match doesn't parse for the given type.
905 CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
906 CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
907 CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
908 CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
909 CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
911 // Ignored arg
912 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
913 CHECK_EQ(s, string("ruby"));
914 CHECK_EQ(i, 1234);
916 // Type tests
918 char c;
919 CHECK(RE("(H)ello").FullMatch("Hello", &c));
920 CHECK_EQ(c, 'H');
923 unsigned char c;
924 CHECK(RE("(H)ello").FullMatch("Hello", &c));
925 CHECK_EQ(c, static_cast<unsigned char>('H'));
928 short v;
929 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
930 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
931 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
932 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
933 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
934 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
937 unsigned short v;
938 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
939 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
940 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
941 CHECK(!RE("(\\d+)").FullMatch("65536", &v));
944 int v;
945 static const int max_value = 0x7fffffff;
946 static const int min_value = -max_value - 1;
947 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
948 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
949 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
950 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
951 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
952 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
955 unsigned int v;
956 static const unsigned int max_value = 0xfffffffful;
957 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
958 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
959 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
961 #ifdef HAVE_LONG_LONG
962 # if defined(__MINGW__) || defined(__MINGW32__)
963 # define LLD "%I64d"
964 # define LLU "%I64u"
965 # else
966 # define LLD "%lld"
967 # define LLU "%llu"
968 # endif
970 long long v;
971 static const long long max_value = 0x7fffffffffffffffLL;
972 static const long long min_value = -max_value - 1;
973 char buf[32]; // definitely big enough for a long long
975 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
976 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
978 sprintf(buf, LLD, max_value);
979 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
981 sprintf(buf, LLD, min_value);
982 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
984 sprintf(buf, LLD, max_value);
985 assert(buf[strlen(buf)-1] != '9');
986 buf[strlen(buf)-1]++;
987 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
989 sprintf(buf, LLD, min_value);
990 assert(buf[strlen(buf)-1] != '9');
991 buf[strlen(buf)-1]++;
992 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
994 #endif
995 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
997 unsigned long long v;
998 long long v2;
999 static const unsigned long long max_value = 0xffffffffffffffffULL;
1000 char buf[32]; // definitely big enough for a unsigned long long
1002 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
1003 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
1005 sprintf(buf, LLU, max_value);
1006 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
1008 assert(buf[strlen(buf)-1] != '9');
1009 buf[strlen(buf)-1]++;
1010 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
1012 #endif
1014 float v;
1015 CHECK(RE("(.*)").FullMatch("100", &v));
1016 CHECK(RE("(.*)").FullMatch("-100.", &v));
1017 CHECK(RE("(.*)").FullMatch("1e23", &v));
1020 double v;
1021 CHECK(RE("(.*)").FullMatch("100", &v));
1022 CHECK(RE("(.*)").FullMatch("-100.", &v));
1023 CHECK(RE("(.*)").FullMatch("1e23", &v));
1026 // Check that matching is fully anchored
1027 CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
1028 CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
1029 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
1030 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
1032 // Braces
1033 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1034 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1035 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1037 // Complicated RE
1038 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1039 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1040 CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1041 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1043 // Check full-match handling (needs '$' tacked on internally)
1044 CHECK(RE("fo|foo").FullMatch("fo"));
1045 CHECK(RE("fo|foo").FullMatch("foo"));
1046 CHECK(RE("fo|foo$").FullMatch("fo"));
1047 CHECK(RE("fo|foo$").FullMatch("foo"));
1048 CHECK(RE("foo$").FullMatch("foo"));
1049 CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1050 CHECK(!RE("fo|bar").FullMatch("fox"));
1052 // Uncomment the following if we change the handling of '$' to
1053 // prevent it from matching a trailing newline
1054 if (false) {
1055 // Check that we don't get bitten by pcre's special handling of a
1056 // '\n' at the end of the string matching '$'
1057 CHECK(!RE("foo$").PartialMatch("foo\n"));
1060 // Number of args
1061 int a[16];
1062 CHECK(RE("").FullMatch(""));
1064 memset(a, 0, sizeof(0));
1065 CHECK(RE("(\\d){1}").FullMatch("1",
1066 &a[0]));
1067 CHECK_EQ(a[0], 1);
1069 memset(a, 0, sizeof(0));
1070 CHECK(RE("(\\d)(\\d)").FullMatch("12",
1071 &a[0], &a[1]));
1072 CHECK_EQ(a[0], 1);
1073 CHECK_EQ(a[1], 2);
1075 memset(a, 0, sizeof(0));
1076 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1077 &a[0], &a[1], &a[2]));
1078 CHECK_EQ(a[0], 1);
1079 CHECK_EQ(a[1], 2);
1080 CHECK_EQ(a[2], 3);
1082 memset(a, 0, sizeof(0));
1083 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1084 &a[0], &a[1], &a[2], &a[3]));
1085 CHECK_EQ(a[0], 1);
1086 CHECK_EQ(a[1], 2);
1087 CHECK_EQ(a[2], 3);
1088 CHECK_EQ(a[3], 4);
1090 memset(a, 0, sizeof(0));
1091 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1092 &a[0], &a[1], &a[2],
1093 &a[3], &a[4]));
1094 CHECK_EQ(a[0], 1);
1095 CHECK_EQ(a[1], 2);
1096 CHECK_EQ(a[2], 3);
1097 CHECK_EQ(a[3], 4);
1098 CHECK_EQ(a[4], 5);
1100 memset(a, 0, sizeof(0));
1101 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1102 &a[0], &a[1], &a[2],
1103 &a[3], &a[4], &a[5]));
1104 CHECK_EQ(a[0], 1);
1105 CHECK_EQ(a[1], 2);
1106 CHECK_EQ(a[2], 3);
1107 CHECK_EQ(a[3], 4);
1108 CHECK_EQ(a[4], 5);
1109 CHECK_EQ(a[5], 6);
1111 memset(a, 0, sizeof(0));
1112 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1113 &a[0], &a[1], &a[2], &a[3],
1114 &a[4], &a[5], &a[6]));
1115 CHECK_EQ(a[0], 1);
1116 CHECK_EQ(a[1], 2);
1117 CHECK_EQ(a[2], 3);
1118 CHECK_EQ(a[3], 4);
1119 CHECK_EQ(a[4], 5);
1120 CHECK_EQ(a[5], 6);
1121 CHECK_EQ(a[6], 7);
1123 memset(a, 0, sizeof(0));
1124 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1125 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1126 "1234567890123456",
1127 &a[0], &a[1], &a[2], &a[3],
1128 &a[4], &a[5], &a[6], &a[7],
1129 &a[8], &a[9], &a[10], &a[11],
1130 &a[12], &a[13], &a[14], &a[15]));
1131 CHECK_EQ(a[0], 1);
1132 CHECK_EQ(a[1], 2);
1133 CHECK_EQ(a[2], 3);
1134 CHECK_EQ(a[3], 4);
1135 CHECK_EQ(a[4], 5);
1136 CHECK_EQ(a[5], 6);
1137 CHECK_EQ(a[6], 7);
1138 CHECK_EQ(a[7], 8);
1139 CHECK_EQ(a[8], 9);
1140 CHECK_EQ(a[9], 0);
1141 CHECK_EQ(a[10], 1);
1142 CHECK_EQ(a[11], 2);
1143 CHECK_EQ(a[12], 3);
1144 CHECK_EQ(a[13], 4);
1145 CHECK_EQ(a[14], 5);
1146 CHECK_EQ(a[15], 6);
1148 /***** PartialMatch *****/
1150 printf("Testing PartialMatch\n");
1152 CHECK(RE("h.*o").PartialMatch("hello"));
1153 CHECK(RE("h.*o").PartialMatch("othello"));
1154 CHECK(RE("h.*o").PartialMatch("hello!"));
1155 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1157 /***** other tests *****/
1159 RadixTests();
1160 TestReplace();
1161 TestExtract();
1162 TestConsume();
1163 TestFindAndConsume();
1164 TestQuoteMetaAll();
1165 TestMatchNumberPeculiarity();
1167 // Check the pattern() accessor
1169 const string kPattern = "http://([^/]+)/.*";
1170 const RE re(kPattern);
1171 CHECK_EQ(kPattern, re.pattern());
1174 // Check RE error field.
1176 RE re("foo");
1177 CHECK(re.error().empty()); // Must have no error
1180 #ifdef SUPPORT_UTF8
1181 // Check UTF-8 handling
1183 printf("Testing UTF-8 handling\n");
1185 // Three Japanese characters (nihongo)
1186 const unsigned char utf8_string[] = {
1187 0xe6, 0x97, 0xa5, // 65e5
1188 0xe6, 0x9c, 0xac, // 627c
1189 0xe8, 0xaa, 0x9e, // 8a9e
1192 const unsigned char utf8_pattern[] = {
1193 '.',
1194 0xe6, 0x9c, 0xac, // 627c
1195 '.',
1199 // Both should match in either mode, bytes or UTF-8
1200 RE re_test1(".........");
1201 CHECK(re_test1.FullMatch(utf8_string));
1202 RE re_test2("...", pcrecpp::UTF8());
1203 CHECK(re_test2.FullMatch(utf8_string));
1205 // Check that '.' matches one byte or UTF-8 character
1206 // according to the mode.
1207 string ss;
1208 RE re_test3("(.)");
1209 CHECK(re_test3.PartialMatch(utf8_string, &ss));
1210 CHECK_EQ(ss, string("\xe6"));
1211 RE re_test4("(.)", pcrecpp::UTF8());
1212 CHECK(re_test4.PartialMatch(utf8_string, &ss));
1213 CHECK_EQ(ss, string("\xe6\x97\xa5"));
1215 // Check that string matches itself in either mode
1216 RE re_test5(utf8_string);
1217 CHECK(re_test5.FullMatch(utf8_string));
1218 RE re_test6(utf8_string, pcrecpp::UTF8());
1219 CHECK(re_test6.FullMatch(utf8_string));
1221 // Check that pattern matches string only in UTF8 mode
1222 RE re_test7(utf8_pattern);
1223 CHECK(!re_test7.FullMatch(utf8_string));
1224 RE re_test8(utf8_pattern, pcrecpp::UTF8());
1225 CHECK(re_test8.FullMatch(utf8_string));
1228 // Check that ungreedy, UTF8 regular expressions don't match when they
1229 // oughtn't -- see bug 82246.
1231 // This code always worked.
1232 const char* pattern = "\\w+X";
1233 const string target = "a aX";
1234 RE match_sentence(pattern);
1235 RE match_sentence_re(pattern, pcrecpp::UTF8());
1237 CHECK(!match_sentence.FullMatch(target));
1238 CHECK(!match_sentence_re.FullMatch(target));
1242 const char* pattern = "(?U)\\w+X";
1243 const string target = "a aX";
1244 RE match_sentence(pattern);
1245 RE match_sentence_re(pattern, pcrecpp::UTF8());
1247 CHECK(!match_sentence.FullMatch(target));
1248 CHECK(!match_sentence_re.FullMatch(target));
1250 #endif /* def SUPPORT_UTF8 */
1252 printf("Testing error reporting\n");
1254 { RE re("a\\1"); CHECK(!re.error().empty()); }
1256 RE re("a[x");
1257 CHECK(!re.error().empty());
1260 RE re("a[z-a]");
1261 CHECK(!re.error().empty());
1264 RE re("a[[:foobar:]]");
1265 CHECK(!re.error().empty());
1268 RE re("a(b");
1269 CHECK(!re.error().empty());
1272 RE re("a\\");
1273 CHECK(!re.error().empty());
1276 // Test that recursion is stopped
1277 TestRecursion();
1279 // Test Options
1280 if (getenv("VERBOSE_TEST") != NULL)
1281 VERBOSE_TEST = true;
1282 TestOptions();
1284 // Test the constructors
1285 TestConstructors();
1287 // Done
1288 printf("OK\n");
1290 return 0;