1 // -*- c-basic-offset: 2 -*-
2 // krazy:excludeall=doublequote_chars (UStrings aren't QStrings)
4 * This file is part of the KDE libraries
5 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
6 * Copyright (C) 2003 Apple Computer, Inc.
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "regexp_object.h"
27 #include "regexp_object.lut.h"
34 #include "interpreter.h"
35 #include "operations.h"
38 #include "error_object.h"
43 // ------------------------------ RegExpPrototype ---------------------------
47 const ClassInfo
RegExpPrototype::info
= {"RegExp", 0, 0, 0};
49 RegExpPrototype::RegExpPrototype(ExecState
*exec
,
50 ObjectPrototype
*objProto
,
51 FunctionPrototype
*funcProto
)
54 static const Identifier
* execPropertyName
= new Identifier("exec");
55 static const Identifier
* testPropertyName
= new Identifier("test");
56 static const Identifier
* compilePropertyName
= new Identifier("compile");
58 putDirectFunction(new RegExpProtoFunc(exec
, funcProto
, RegExpProtoFunc::Exec
, 0, *execPropertyName
), DontEnum
);
59 putDirectFunction(new RegExpProtoFunc(exec
, funcProto
, RegExpProtoFunc::Test
, 0, *testPropertyName
), DontEnum
);
60 putDirectFunction(new RegExpProtoFunc(exec
, funcProto
, RegExpProtoFunc::ToString
, 0, exec
->propertyNames().toString
), DontEnum
);
61 putDirectFunction(new RegExpProtoFunc(exec
, funcProto
, RegExpProtoFunc::Compile
, 1, *compilePropertyName
), DontEnum
);
64 // ------------------------------ RegExpProtoFunc ---------------------------
66 RegExpProtoFunc::RegExpProtoFunc(ExecState
* exec
, FunctionPrototype
* funcProto
, int i
, int len
, const Identifier
& name
)
67 : InternalFunctionImp(funcProto
, name
), id(i
)
69 putDirect(exec
->propertyNames().length
, len
, DontDelete
| ReadOnly
| DontEnum
);
72 JSValue
*RegExpProtoFunc::callAsFunction(ExecState
*exec
, JSObject
*thisObj
, const List
&args
)
74 if (!thisObj
->inherits(&RegExpImp::info
)) {
75 if (thisObj
->inherits(&RegExpPrototype::info
)) {
77 case ToString
: return jsString("//");
81 return throwError(exec
, TypeError
);
85 case Test
: // 15.10.6.2
88 RegExp
*regExp
= static_cast<RegExpImp
*>(thisObj
)->regExp();
89 RegExpObjectImp
* regExpObj
= static_cast<RegExpObjectImp
*>(exec
->lexicalInterpreter()->builtinRegExp());
93 input
= regExpObj
->get(exec
, exec
->propertyNames().input
)->toString(exec
);
95 input
= args
[0]->toString(exec
);
97 double lastIndex
= thisObj
->get(exec
, exec
->propertyNames().lastIndex
)->toInteger(exec
);
99 bool globalFlag
= thisObj
->get(exec
, exec
->propertyNames().global
)->toBoolean(exec
);
102 if (lastIndex
< 0 || lastIndex
> input
.size()) {
103 thisObj
->put(exec
, exec
->propertyNames().lastIndex
, jsNumber(0), DontDelete
| DontEnum
);
108 regExp
->prepareMatch(input
);
109 UString match
= regExpObj
->performMatch(regExp
, exec
, input
, static_cast<int>(lastIndex
), &foundIndex
);
111 if (exec
->hadException())
112 return jsUndefined();
114 bool didMatch
= !match
.isNull();
118 return jsBoolean(didMatch
);
123 thisObj
->put(exec
, exec
->propertyNames().lastIndex
, jsNumber(foundIndex
+ match
.size()), DontDelete
| DontEnum
);
124 return regExpObj
->arrayOfMatches(exec
, match
);
127 thisObj
->put(exec
, exec
->propertyNames().lastIndex
, jsNumber(0), DontDelete
| DontEnum
);
133 UString result
= "/" + thisObj
->get(exec
, exec
->propertyNames().source
)->toString(exec
) + "/";
134 if (thisObj
->get(exec
, exec
->propertyNames().global
)->toBoolean(exec
)) {
137 if (thisObj
->get(exec
, exec
->propertyNames().ignoreCase
)->toBoolean(exec
)) {
140 if (thisObj
->get(exec
, exec
->propertyNames().multiline
)->toBoolean(exec
)) {
143 return jsString(result
);
145 case Compile
: { // JS1.2 legacy, but still in use in the wild somewhat
146 RegExpImp
* instance
= static_cast<RegExpImp
*>(thisObj
);
147 RegExp
* newEngine
= RegExpObjectImp::makeEngine(exec
, args
[0]->toString(exec
), args
[1]);
149 return exec
->exception();
150 instance
->setRegExp(exec
, newEngine
);
155 return jsUndefined();
158 // ------------------------------ RegExpImp ------------------------------------
160 const ClassInfo
RegExpImp::info
= {"RegExp", 0, 0, 0};
162 RegExpImp::RegExpImp(RegExpPrototype
*regexpProto
)
163 : JSObject(regexpProto
), reg(0L)
167 RegExpImp::~RegExpImp()
172 void RegExpImp::setRegExp(ExecState
* exec
, RegExp
* r
)
177 putDirect(exec
->propertyNames().global
, jsBoolean(r
->flags() & RegExp::Global
), DontDelete
| ReadOnly
| DontEnum
);
178 putDirect(exec
->propertyNames().ignoreCase
, jsBoolean(r
->flags() & RegExp::IgnoreCase
), DontDelete
| ReadOnly
| DontEnum
);
179 putDirect(exec
->propertyNames().multiline
, jsBoolean(r
->flags() & RegExp::Multiline
), DontDelete
| ReadOnly
| DontEnum
);
181 putDirect(exec
->propertyNames().source
, jsString(r
->pattern()), DontDelete
| ReadOnly
| DontEnum
);
182 putDirect(exec
->propertyNames().lastIndex
, jsNumber(0), DontDelete
| DontEnum
);
185 // ------------------------------ RegExpObjectImp ------------------------------
187 const ClassInfo
RegExpObjectImp::info
= {"Function", &InternalFunctionImp::info
, &RegExpTable
, 0};
189 /* Source for regexp_object.lut.h
190 @begin RegExpTable 20
191 input RegExpObjectImp::Input None
192 $_ RegExpObjectImp::Input DontEnum
193 multiline RegExpObjectImp::Multiline None
194 $* RegExpObjectImp::Multiline DontEnum
195 lastMatch RegExpObjectImp::LastMatch DontDelete|ReadOnly
196 $& RegExpObjectImp::LastMatch DontDelete|ReadOnly|DontEnum
197 lastParen RegExpObjectImp::LastParen DontDelete|ReadOnly
198 $+ RegExpObjectImp::LastParen DontDelete|ReadOnly|DontEnum
199 leftContext RegExpObjectImp::LeftContext DontDelete|ReadOnly
200 $` RegExpObjectImp::LeftContext DontDelete|ReadOnly|DontEnum
201 rightContext RegExpObjectImp::RightContext DontDelete|ReadOnly
202 $' RegExpObjectImp::RightContext DontDelete|ReadOnly|DontEnum
203 $1 RegExpObjectImp::Dollar1 DontDelete|ReadOnly
204 $2 RegExpObjectImp::Dollar2 DontDelete|ReadOnly
205 $3 RegExpObjectImp::Dollar3 DontDelete|ReadOnly
206 $4 RegExpObjectImp::Dollar4 DontDelete|ReadOnly
207 $5 RegExpObjectImp::Dollar5 DontDelete|ReadOnly
208 $6 RegExpObjectImp::Dollar6 DontDelete|ReadOnly
209 $7 RegExpObjectImp::Dollar7 DontDelete|ReadOnly
210 $8 RegExpObjectImp::Dollar8 DontDelete|ReadOnly
211 $9 RegExpObjectImp::Dollar9 DontDelete|ReadOnly
215 struct KJS::RegExpObjectImpPrivate
{
216 // Global search cache / settings
217 RegExpObjectImpPrivate() : lastInput(""), lastNumSubPatterns(0), multiline(false) { }
219 OwnArrayPtr
<int> lastOvector
;
220 unsigned lastNumSubPatterns
: 31;
224 RegExpObjectImp::RegExpObjectImp(ExecState
* exec
,
225 FunctionPrototype
*funcProto
,
226 RegExpPrototype
*regProto
)
228 : InternalFunctionImp(funcProto
),
229 d(new RegExpObjectImpPrivate
)
231 // ECMA 15.10.5.1 RegExp.prototype
232 putDirect(exec
->propertyNames().prototype
, regProto
, DontEnum
| DontDelete
| ReadOnly
);
234 // no. of arguments for constructor
235 putDirect(exec
->propertyNames().length
, jsNumber(2), ReadOnly
| DontDelete
| DontEnum
);
238 void RegExpObjectImp::throwRegExpError(ExecState
* exec
)
240 throwError(exec
, RangeError
, "Resource exhaustion trying to perform regexp match.");
244 To facilitate result caching, exec(), test(), match(), search(), and replace() dipatch regular
245 expression matching through the performMatch function. We use cached results to calculate,
246 e.g., RegExp.lastMatch and RegExp.leftParen.
248 UString
RegExpObjectImp::performMatch(RegExp
* r
, ExecState
* exec
, const UString
& s
,
249 int startOffset
, int *endOffset
, int **ovector
)
254 UString match
= r
->match(s
, &error
, startOffset
, &tmpOffset
, &tmpOvector
);
258 throwRegExpError(exec
);
263 *endOffset
= tmpOffset
;
265 *ovector
= tmpOvector
;
267 if (!match
.isNull()) {
271 d
->lastOvector
.set(tmpOvector
);
272 d
->lastNumSubPatterns
= r
->subPatterns();
278 JSObject
*RegExpObjectImp::arrayOfMatches(ExecState
*exec
, const UString
&result
) const
281 // The returned array contains 'result' as first item, followed by the list of matches
282 list
.append(jsString(result
));
283 if ( d
->lastOvector
)
284 for ( int i
= 1 ; i
< d
->lastNumSubPatterns
+ 1 ; ++i
)
286 int start
= d
->lastOvector
[2*i
];
288 list
.append(jsUndefined());
290 UString substring
= d
->lastInput
.substr( start
, d
->lastOvector
[2*i
+1] - start
);
291 list
.append(jsString(substring
));
294 JSObject
*arr
= exec
->lexicalInterpreter()->builtinArray()->construct(exec
, list
);
295 arr
->put(exec
, exec
->propertyNames().index
, jsNumber(d
->lastOvector
[0]));
296 arr
->put(exec
, exec
->propertyNames().input
, jsString(d
->lastInput
));
300 JSValue
*RegExpObjectImp::getBackref(int i
) const
302 if (d
->lastOvector
&& i
< int(d
->lastNumSubPatterns
+ 1)) {
303 UString substring
= d
->lastInput
.substr(d
->lastOvector
[2*i
], d
->lastOvector
[2*i
+1] - d
->lastOvector
[2*i
] );
304 return jsString(substring
);
310 JSValue
*RegExpObjectImp::getLastMatch() const
312 if (d
->lastOvector
) {
313 UString substring
= d
->lastInput
.substr(d
->lastOvector
[0], d
->lastOvector
[1] - d
->lastOvector
[0]);
314 return jsString(substring
);
320 JSValue
*RegExpObjectImp::getLastParen() const
322 int i
= d
->lastNumSubPatterns
;
324 ASSERT(d
->lastOvector
);
325 UString substring
= d
->lastInput
.substr(d
->lastOvector
[2*i
], d
->lastOvector
[2*i
+1] - d
->lastOvector
[2*i
]);
326 return jsString(substring
);
332 JSValue
*RegExpObjectImp::getLeftContext() const
334 if (d
->lastOvector
) {
335 UString substring
= d
->lastInput
.substr(0, d
->lastOvector
[0]);
336 return jsString(substring
);
342 JSValue
*RegExpObjectImp::getRightContext() const
344 if (d
->lastOvector
) {
345 UString s
= d
->lastInput
;
346 UString substring
= s
.substr(d
->lastOvector
[1], s
.size() - d
->lastOvector
[1]);
347 return jsString(substring
);
353 bool RegExpObjectImp::getOwnPropertySlot(ExecState
*exec
, const Identifier
& propertyName
, PropertySlot
& slot
)
355 return getStaticValueSlot
<RegExpObjectImp
, InternalFunctionImp
>(exec
, &RegExpTable
, this, propertyName
, slot
);
358 JSValue
*RegExpObjectImp::getValueProperty(ExecState
*, int token
) const
362 return getBackref(1);
364 return getBackref(2);
366 return getBackref(3);
368 return getBackref(4);
370 return getBackref(5);
372 return getBackref(6);
374 return getBackref(7);
376 return getBackref(8);
378 return getBackref(9);
380 return jsString(d
->lastInput
);
382 return jsBoolean(d
->multiline
);
384 return getLastMatch();
386 return getLastParen();
388 return getLeftContext();
390 return getRightContext();
398 void RegExpObjectImp::put(ExecState
*exec
, const Identifier
&propertyName
, JSValue
*value
, int attr
)
400 lookupPut
<RegExpObjectImp
, InternalFunctionImp
>(exec
, propertyName
, value
, attr
, &RegExpTable
, this);
403 void RegExpObjectImp::putValueProperty(ExecState
*exec
, int token
, JSValue
*value
, int /*attr*/)
407 d
->lastInput
= value
->toString(exec
);
410 d
->multiline
= value
->toBoolean(exec
);
417 bool RegExpObjectImp::implementsConstruct() const
422 RegExp
* RegExpObjectImp::makeEngine(ExecState
*exec
, const UString
&p
, JSValue
*flagsInput
)
424 UString flags
= flagsInput
->isUndefined() ? UString("") : flagsInput
->toString(exec
);
426 // Check for validity of flags
427 for (int pos
= 0; pos
< flags
.size(); ++pos
) {
428 switch (flags
[pos
].unicode()) {
434 throwError(exec
, SyntaxError
,
435 "Invalid regular expression flags", 1, -1, "<regexp>");
441 bool global
= (flags
.find("g") >= 0);
442 bool ignoreCase
= (flags
.find("i") >= 0);
443 bool multiline
= (flags
.find("m") >= 0);
445 int reflags
= RegExp::None
;
447 reflags
|= RegExp::Global
;
449 reflags
|= RegExp::IgnoreCase
;
451 reflags
|= RegExp::Multiline
;
453 RegExp
*re
= new RegExp(p
, reflags
);
454 if (!re
->isValid()) {
455 throwError(exec
, SyntaxError
,
456 "Invalid regular expression", 1, -1, "<regexp>");
465 JSObject
*RegExpObjectImp::construct(ExecState
*exec
, const List
&args
)
467 JSObject
*o
= args
[0]->getObject();
468 if (o
&& o
->inherits(&RegExpImp::info
)) {
469 if (!args
[1]->isUndefined())
470 return throwError(exec
, TypeError
);
474 UString p
= args
[0]->isUndefined() ? UString("") : args
[0]->toString(exec
);
476 RegExp
* re
= makeEngine(exec
, p
, args
[1]);
478 return exec
->exception()->toObject(exec
);
481 RegExpPrototype
*proto
= static_cast<RegExpPrototype
*>(exec
->lexicalInterpreter()->builtinRegExpPrototype());
482 RegExpImp
*dat
= new RegExpImp(proto
);
484 dat
->setRegExp(exec
, re
);
490 JSValue
*RegExpObjectImp::callAsFunction(ExecState
*exec
, JSObject
* /*thisObj*/, const List
&args
)
492 // The RegExp argument case is handled by construct()
494 return construct(exec
, args
);