2009-06-25 Dimitri Glazkov <dglazkov@chromium.org>
[webbrowser.git] / JavaScriptCore / wrec / WRECGenerator.cpp
blobe2e8abaf05ec128cc5806c97d2020e2b0b9740fb
1 /*
2 * Copyright (C) 2008 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "config.h"
27 #include "WREC.h"
29 #if ENABLE(WREC)
31 #include "CharacterClassConstructor.h"
32 #include "Interpreter.h"
33 #include "WRECFunctors.h"
34 #include "WRECParser.h"
35 #include "pcre_internal.h"
37 using namespace WTF;
39 namespace JSC { namespace WREC {
41 void Generator::generateEnter()
43 #if PLATFORM(X86)
44 // On x86 edi & esi are callee preserved registers.
45 push(X86::edi);
46 push(X86::esi);
48 #if COMPILER(MSVC)
49 // Move the arguments into registers.
50 peek(input, 3);
51 peek(index, 4);
52 peek(length, 5);
53 peek(output, 6);
54 #else
55 // On gcc the function is regparm(3), so the input, index, and length registers
56 // (eax, edx, and ecx respectively) already contain the appropriate values.
57 // Just load the fourth argument (output) into edi
58 peek(output, 3);
59 #endif
60 #endif
63 void Generator::generateReturnSuccess()
65 ASSERT(returnRegister != index);
66 ASSERT(returnRegister != output);
68 // Set return value.
69 pop(returnRegister); // match begin
70 store32(returnRegister, output);
71 store32(index, Address(output, 4)); // match end
73 // Restore callee save registers.
74 #if PLATFORM(X86)
75 pop(X86::esi);
76 pop(X86::edi);
77 #endif
78 ret();
81 void Generator::generateSaveIndex()
83 push(index);
86 void Generator::generateIncrementIndex(Jump* failure)
88 peek(index);
89 if (failure)
90 *failure = branch32(Equal, length, index);
91 add32(Imm32(1), index);
92 poke(index);
95 void Generator::generateLoadCharacter(JumpList& failures)
97 failures.append(branch32(Equal, length, index));
98 load16(BaseIndex(input, index, TimesTwo), character);
101 // For the sake of end-of-line assertions, we treat one-past-the-end as if it
102 // were part of the input string.
103 void Generator::generateJumpIfNotEndOfInput(Label target)
105 branch32(LessThanOrEqual, index, length, target);
108 void Generator::generateReturnFailure()
110 pop();
111 move(Imm32(-1), returnRegister);
113 #if PLATFORM(X86)
114 pop(X86::esi);
115 pop(X86::edi);
116 #endif
117 ret();
120 void Generator::generateBacktrack1()
122 sub32(Imm32(1), index);
125 void Generator::generateBacktrackBackreference(unsigned subpatternId)
127 sub32(Address(output, (2 * subpatternId + 1) * sizeof(int)), index);
128 add32(Address(output, (2 * subpatternId) * sizeof(int)), index);
131 void Generator::generateBackreferenceQuantifier(JumpList& failures, Quantifier::Type quantifierType, unsigned subpatternId, unsigned min, unsigned max)
133 GenerateBackreferenceFunctor functor(subpatternId);
135 load32(Address(output, (2 * subpatternId) * sizeof(int)), character);
136 Jump skipIfEmpty = branch32(Equal, Address(output, ((2 * subpatternId) + 1) * sizeof(int)), character);
138 ASSERT(quantifierType == Quantifier::Greedy || quantifierType == Quantifier::NonGreedy);
139 if (quantifierType == Quantifier::Greedy)
140 generateGreedyQuantifier(failures, functor, min, max);
141 else
142 generateNonGreedyQuantifier(failures, functor, min, max);
144 skipIfEmpty.link(this);
147 void Generator::generateNonGreedyQuantifier(JumpList& failures, GenerateAtomFunctor& functor, unsigned min, unsigned max)
149 JumpList atomFailedList;
150 JumpList alternativeFailedList;
152 // (0) Setup: Save, then init repeatCount.
153 push(repeatCount);
154 move(Imm32(0), repeatCount);
155 Jump start = jump();
157 // (4) Quantifier failed: No more atom reading possible.
158 Label quantifierFailed(this);
159 pop(repeatCount);
160 failures.append(jump());
162 // (3) Alternative failed: If we can, read another atom, then fall through to (2) to try again.
163 Label alternativeFailed(this);
164 pop(index);
165 if (max != Quantifier::Infinity)
166 branch32(Equal, repeatCount, Imm32(max), quantifierFailed);
168 // (1) Read an atom.
169 if (min)
170 start.link(this);
171 Label readAtom(this);
172 functor.generateAtom(this, atomFailedList);
173 atomFailedList.linkTo(quantifierFailed, this);
174 add32(Imm32(1), repeatCount);
176 // (2) Keep reading if we're under the minimum.
177 if (min > 1)
178 branch32(LessThan, repeatCount, Imm32(min), readAtom);
180 // (3) Test the rest of the alternative.
181 if (!min)
182 start.link(this);
183 push(index);
184 m_parser.parseAlternative(alternativeFailedList);
185 alternativeFailedList.linkTo(alternativeFailed, this);
187 pop();
188 pop(repeatCount);
191 void Generator::generateGreedyQuantifier(JumpList& failures, GenerateAtomFunctor& functor, unsigned min, unsigned max)
193 if (!max)
194 return;
196 JumpList doneReadingAtomsList;
197 JumpList alternativeFailedList;
199 // (0) Setup: Save, then init repeatCount.
200 push(repeatCount);
201 move(Imm32(0), repeatCount);
203 // (1) Greedily read as many copies of the atom as possible, then jump to (2).
204 Label readAtom(this);
205 functor.generateAtom(this, doneReadingAtomsList);
206 add32(Imm32(1), repeatCount);
207 if (max == Quantifier::Infinity)
208 jump(readAtom);
209 else if (max == 1)
210 doneReadingAtomsList.append(jump());
211 else {
212 branch32(NotEqual, repeatCount, Imm32(max), readAtom);
213 doneReadingAtomsList.append(jump());
216 // (5) Quantifier failed: No more backtracking possible.
217 Label quantifierFailed(this);
218 pop(repeatCount);
219 failures.append(jump());
221 // (4) Alternative failed: Backtrack, then fall through to (2) to try again.
222 Label alternativeFailed(this);
223 pop(index);
224 functor.backtrack(this);
225 sub32(Imm32(1), repeatCount);
227 // (2) Verify that we have enough atoms.
228 doneReadingAtomsList.link(this);
229 branch32(LessThan, repeatCount, Imm32(min), quantifierFailed);
231 // (3) Test the rest of the alternative.
232 push(index);
233 m_parser.parseAlternative(alternativeFailedList);
234 alternativeFailedList.linkTo(alternativeFailed, this);
236 pop();
237 pop(repeatCount);
240 void Generator::generatePatternCharacterSequence(JumpList& failures, int* sequence, size_t count)
242 for (size_t i = 0; i < count;) {
243 if (i < count - 1) {
244 if (generatePatternCharacterPair(failures, sequence[i], sequence[i + 1])) {
245 i += 2;
246 continue;
250 generatePatternCharacter(failures, sequence[i]);
251 ++i;
255 bool Generator::generatePatternCharacterPair(JumpList& failures, int ch1, int ch2)
257 if (m_parser.ignoreCase()) {
258 // Non-trivial case folding requires more than one test, so we can't
259 // test as a pair with an adjacent character.
260 if (!isASCII(ch1) && Unicode::toLower(ch1) != Unicode::toUpper(ch1))
261 return false;
262 if (!isASCII(ch2) && Unicode::toLower(ch2) != Unicode::toUpper(ch2))
263 return false;
266 // Optimistically consume 2 characters.
267 add32(Imm32(2), index);
268 failures.append(branch32(GreaterThan, index, length));
270 // Load the characters we just consumed, offset -2 characters from index.
271 load32(BaseIndex(input, index, TimesTwo, -2 * 2), character);
273 if (m_parser.ignoreCase()) {
274 // Convert ASCII alphabet characters to upper case before testing for
275 // equality. (ASCII non-alphabet characters don't require upper-casing
276 // because they have no uppercase equivalents. Unicode characters don't
277 // require upper-casing because we only handle Unicode characters whose
278 // upper and lower cases are equal.)
279 int ch1Mask = 0;
280 if (isASCIIAlpha(ch1)) {
281 ch1 |= 32;
282 ch1Mask = 32;
285 int ch2Mask = 0;
286 if (isASCIIAlpha(ch2)) {
287 ch2 |= 32;
288 ch2Mask = 32;
291 int mask = ch1Mask | (ch2Mask << 16);
292 if (mask)
293 or32(Imm32(mask), character);
295 int pair = ch1 | (ch2 << 16);
297 failures.append(branch32(NotEqual, character, Imm32(pair)));
298 return true;
301 void Generator::generatePatternCharacter(JumpList& failures, int ch)
303 generateLoadCharacter(failures);
305 // used for unicode case insensitive
306 bool hasUpper = false;
307 Jump isUpper;
309 // if case insensitive match
310 if (m_parser.ignoreCase()) {
311 UChar lower, upper;
313 // check for ascii case sensitive characters
314 if (isASCIIAlpha(ch)) {
315 or32(Imm32(32), character);
316 ch |= 32;
317 } else if (!isASCII(ch) && ((lower = Unicode::toLower(ch)) != (upper = Unicode::toUpper(ch)))) {
318 // handle unicode case sentitive characters - branch to success on upper
319 isUpper = branch32(Equal, character, Imm32(upper));
320 hasUpper = true;
321 ch = lower;
325 // checks for ch, or lower case version of ch, if insensitive
326 failures.append(branch32(NotEqual, character, Imm32((unsigned short)ch)));
328 if (m_parser.ignoreCase() && hasUpper) {
329 // for unicode case insensitive matches, branch here if upper matches.
330 isUpper.link(this);
333 // on success consume the char
334 add32(Imm32(1), index);
337 void Generator::generateCharacterClassInvertedRange(JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
339 do {
340 // pick which range we're going to generate
341 int which = count >> 1;
342 char lo = ranges[which].begin;
343 char hi = ranges[which].end;
345 // check if there are any ranges or matches below lo. If not, just jl to failure -
346 // if there is anything else to check, check that first, if it falls through jmp to failure.
347 if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
348 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
350 // generate code for all ranges before this one
351 if (which)
352 generateCharacterClassInvertedRange(failures, matchDest, ranges, which, matchIndex, matches, matchCount);
354 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
355 matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
356 ++*matchIndex;
358 failures.append(jump());
360 loOrAbove.link(this);
361 } else if (which) {
362 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
364 generateCharacterClassInvertedRange(failures, matchDest, ranges, which, matchIndex, matches, matchCount);
365 failures.append(jump());
367 loOrAbove.link(this);
368 } else
369 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
371 while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
372 ++*matchIndex;
374 matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
375 // fall through to here, the value is above hi.
377 // shuffle along & loop around if there are any more matches to handle.
378 unsigned next = which + 1;
379 ranges += next;
380 count -= next;
381 } while (count);
384 void Generator::generateCharacterClassInverted(JumpList& matchDest, const CharacterClass& charClass)
386 Jump unicodeFail;
387 if (charClass.numMatchesUnicode || charClass.numRangesUnicode) {
388 Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f));
390 if (charClass.numMatchesUnicode) {
391 for (unsigned i = 0; i < charClass.numMatchesUnicode; ++i) {
392 UChar ch = charClass.matchesUnicode[i];
393 matchDest.append(branch32(Equal, character, Imm32(ch)));
397 if (charClass.numRangesUnicode) {
398 for (unsigned i = 0; i < charClass.numRangesUnicode; ++i) {
399 UChar lo = charClass.rangesUnicode[i].begin;
400 UChar hi = charClass.rangesUnicode[i].end;
402 Jump below = branch32(LessThan, character, Imm32(lo));
403 matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
404 below.link(this);
408 unicodeFail = jump();
409 isAscii.link(this);
412 if (charClass.numRanges) {
413 unsigned matchIndex = 0;
414 JumpList failures;
415 generateCharacterClassInvertedRange(failures, matchDest, charClass.ranges, charClass.numRanges, &matchIndex, charClass.matches, charClass.numMatches);
416 while (matchIndex < charClass.numMatches)
417 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass.matches[matchIndex++])));
419 failures.link(this);
420 } else if (charClass.numMatches) {
421 // optimization: gather 'a','A' etc back together, can mask & test once.
422 Vector<char> matchesAZaz;
424 for (unsigned i = 0; i < charClass.numMatches; ++i) {
425 char ch = charClass.matches[i];
426 if (m_parser.ignoreCase()) {
427 if (isASCIILower(ch)) {
428 matchesAZaz.append(ch);
429 continue;
431 if (isASCIIUpper(ch))
432 continue;
434 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
437 if (unsigned countAZaz = matchesAZaz.size()) {
438 or32(Imm32(32), character);
439 for (unsigned i = 0; i < countAZaz; ++i)
440 matchDest.append(branch32(Equal, character, Imm32(matchesAZaz[i])));
444 if (charClass.numMatchesUnicode || charClass.numRangesUnicode)
445 unicodeFail.link(this);
448 void Generator::generateCharacterClass(JumpList& failures, const CharacterClass& charClass, bool invert)
450 generateLoadCharacter(failures);
452 if (invert)
453 generateCharacterClassInverted(failures, charClass);
454 else {
455 JumpList successes;
456 generateCharacterClassInverted(successes, charClass);
457 failures.append(jump());
458 successes.link(this);
461 add32(Imm32(1), index);
464 void Generator::generateParenthesesAssertion(JumpList& failures)
466 JumpList disjunctionFailed;
468 push(index);
469 m_parser.parseDisjunction(disjunctionFailed);
470 Jump success = jump();
472 disjunctionFailed.link(this);
473 pop(index);
474 failures.append(jump());
476 success.link(this);
477 pop(index);
480 void Generator::generateParenthesesInvertedAssertion(JumpList& failures)
482 JumpList disjunctionFailed;
484 push(index);
485 m_parser.parseDisjunction(disjunctionFailed);
487 // If the disjunction succeeded, the inverted assertion failed.
488 pop(index);
489 failures.append(jump());
491 // If the disjunction failed, the inverted assertion succeeded.
492 disjunctionFailed.link(this);
493 pop(index);
496 void Generator::generateParenthesesNonGreedy(JumpList& failures, Label start, Jump success, Jump fail)
498 jump(start);
499 success.link(this);
500 failures.append(fail);
503 Generator::Jump Generator::generateParenthesesResetTrampoline(JumpList& newFailures, unsigned subpatternIdBefore, unsigned subpatternIdAfter)
505 Jump skip = jump();
506 newFailures.link(this);
507 for (unsigned i = subpatternIdBefore + 1; i <= subpatternIdAfter; ++i) {
508 store32(Imm32(-1), Address(output, (2 * i) * sizeof(int)));
509 store32(Imm32(-1), Address(output, (2 * i + 1) * sizeof(int)));
512 Jump newFailJump = jump();
513 skip.link(this);
515 return newFailJump;
518 void Generator::generateAssertionBOL(JumpList& failures)
520 if (m_parser.multiline()) {
521 JumpList previousIsNewline;
523 // begin of input == success
524 previousIsNewline.append(branch32(Equal, index, Imm32(0)));
526 // now check prev char against newline characters.
527 load16(BaseIndex(input, index, TimesTwo, -2), character);
528 generateCharacterClassInverted(previousIsNewline, CharacterClass::newline());
530 failures.append(jump());
532 previousIsNewline.link(this);
533 } else
534 failures.append(branch32(NotEqual, index, Imm32(0)));
537 void Generator::generateAssertionEOL(JumpList& failures)
539 if (m_parser.multiline()) {
540 JumpList nextIsNewline;
542 generateLoadCharacter(nextIsNewline); // end of input == success
543 generateCharacterClassInverted(nextIsNewline, CharacterClass::newline());
544 failures.append(jump());
545 nextIsNewline.link(this);
546 } else {
547 failures.append(branch32(NotEqual, length, index));
551 void Generator::generateAssertionWordBoundary(JumpList& failures, bool invert)
553 JumpList wordBoundary;
554 JumpList notWordBoundary;
556 // (1) Check if the previous value was a word char
558 // (1.1) check for begin of input
559 Jump atBegin = branch32(Equal, index, Imm32(0));
560 // (1.2) load the last char, and chck if is word character
561 load16(BaseIndex(input, index, TimesTwo, -2), character);
562 JumpList previousIsWord;
563 generateCharacterClassInverted(previousIsWord, CharacterClass::wordchar());
564 // (1.3) if we get here, previous is not a word char
565 atBegin.link(this);
567 // (2) Handle situation where previous was NOT a \w
569 generateLoadCharacter(notWordBoundary);
570 generateCharacterClassInverted(wordBoundary, CharacterClass::wordchar());
571 // (2.1) If we get here, neither chars are word chars
572 notWordBoundary.append(jump());
574 // (3) Handle situation where previous was a \w
576 // (3.0) link success in first match to here
577 previousIsWord.link(this);
578 generateLoadCharacter(wordBoundary);
579 generateCharacterClassInverted(notWordBoundary, CharacterClass::wordchar());
580 // (3.1) If we get here, this is an end of a word, within the input.
582 // (4) Link everything up
584 if (invert) {
585 // handle the fall through case
586 wordBoundary.append(jump());
588 // looking for non word boundaries, so link boundary fails to here.
589 notWordBoundary.link(this);
591 failures.append(wordBoundary);
592 } else {
593 // looking for word boundaries, so link successes here.
594 wordBoundary.link(this);
596 failures.append(notWordBoundary);
600 void Generator::generateBackreference(JumpList& failures, unsigned subpatternId)
602 push(index);
603 push(repeatCount);
605 // get the start pos of the backref into repeatCount (multipurpose!)
606 load32(Address(output, (2 * subpatternId) * sizeof(int)), repeatCount);
608 Jump skipIncrement = jump();
609 Label topOfLoop(this);
611 add32(Imm32(1), index);
612 add32(Imm32(1), repeatCount);
613 skipIncrement.link(this);
615 // check if we're at the end of backref (if we are, success!)
616 Jump endOfBackRef = branch32(Equal, Address(output, ((2 * subpatternId) + 1) * sizeof(int)), repeatCount);
618 load16(BaseIndex(input, repeatCount, MacroAssembler::TimesTwo), character);
620 // check if we've run out of input (this would be a can o'fail)
621 Jump endOfInput = branch32(Equal, length, index);
623 branch16(Equal, BaseIndex(input, index, TimesTwo), character, topOfLoop);
625 endOfInput.link(this);
627 // Failure
628 pop(repeatCount);
629 pop(index);
630 failures.append(jump());
632 // Success
633 endOfBackRef.link(this);
634 pop(repeatCount);
635 pop();
638 void Generator::terminateAlternative(JumpList& successes, JumpList& failures)
640 successes.append(jump());
642 failures.link(this);
643 peek(index);
646 void Generator::terminateDisjunction(JumpList& successes)
648 successes.link(this);
651 } } // namespace JSC::WREC
653 #endif // ENABLE(WREC)