properly compute width of containing block of inline elements
[kdelibs.git] / kjs / regexp_object.cpp
blobaa6b0114856c0a239f180f1153f6b3c47648862a
1 // -*- c-basic-offset: 2 -*-
2 // krazy:excludeall=doublequote_chars (UStrings aren't QStrings)
3 /*
4 * This file is part of the KDE libraries
5 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
6 * Copyright (C) 2003 Apple Computer, Inc.
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "regexp_object.h"
25 #include <config.h>
27 #include "regexp_object.lut.h"
29 #include <stdio.h>
30 #include "value.h"
31 #include "object.h"
32 #include "types.h"
33 #include "nodes.h"
34 #include "interpreter.h"
35 #include "operations.h"
36 #include "internal.h"
37 #include "regexp.h"
38 #include "error_object.h"
39 #include "lookup.h"
41 using namespace KJS;
43 // ------------------------------ RegExpPrototype ---------------------------
45 // ECMA 15.10.5
47 const ClassInfo RegExpPrototype::info = {"RegExp", 0, 0, 0};
49 RegExpPrototype::RegExpPrototype(ExecState *exec,
50 ObjectPrototype *objProto,
51 FunctionPrototype *funcProto)
52 : JSObject(objProto)
54 static const Identifier* execPropertyName = new Identifier("exec");
55 static const Identifier* testPropertyName = new Identifier("test");
56 static const Identifier* compilePropertyName = new Identifier("compile");
58 putDirectFunction(new RegExpProtoFunc(exec, funcProto, RegExpProtoFunc::Exec, 0, *execPropertyName), DontEnum);
59 putDirectFunction(new RegExpProtoFunc(exec, funcProto, RegExpProtoFunc::Test, 0, *testPropertyName), DontEnum);
60 putDirectFunction(new RegExpProtoFunc(exec, funcProto, RegExpProtoFunc::ToString, 0, exec->propertyNames().toString), DontEnum);
61 putDirectFunction(new RegExpProtoFunc(exec, funcProto, RegExpProtoFunc::Compile, 1, *compilePropertyName), DontEnum);
64 // ------------------------------ RegExpProtoFunc ---------------------------
66 RegExpProtoFunc::RegExpProtoFunc(ExecState* exec, FunctionPrototype* funcProto, int i, int len, const Identifier& name)
67 : InternalFunctionImp(funcProto, name), id(i)
69 putDirect(exec->propertyNames().length, len, DontDelete | ReadOnly | DontEnum);
72 JSValue *RegExpProtoFunc::callAsFunction(ExecState *exec, JSObject *thisObj, const List &args)
74 if (!thisObj->inherits(&RegExpImp::info)) {
75 if (thisObj->inherits(&RegExpPrototype::info)) {
76 switch (id) {
77 case ToString: return jsString("//");
81 return throwError(exec, TypeError);
84 switch (id) {
85 case Test: // 15.10.6.2
86 case Exec:
88 RegExp *regExp = static_cast<RegExpImp*>(thisObj)->regExp();
89 RegExpObjectImp* regExpObj = static_cast<RegExpObjectImp*>(exec->lexicalInterpreter()->builtinRegExp());
91 UString input;
92 if (args.isEmpty())
93 input = regExpObj->get(exec, exec->propertyNames().input)->toString(exec);
94 else
95 input = args[0]->toString(exec);
97 double lastIndex = thisObj->get(exec, exec->propertyNames().lastIndex)->toInteger(exec);
99 bool globalFlag = thisObj->get(exec, exec->propertyNames().global)->toBoolean(exec);
100 if (!globalFlag)
101 lastIndex = 0;
102 if (lastIndex < 0 || lastIndex > input.size()) {
103 thisObj->put(exec, exec->propertyNames().lastIndex, jsNumber(0), DontDelete | DontEnum);
104 return jsNull();
107 int foundIndex;
108 regExp->prepareMatch(input);
109 UString match = regExpObj->performMatch(regExp, exec, input, static_cast<int>(lastIndex), &foundIndex);
110 regExp->doneMatch();
111 if (exec->hadException())
112 return jsUndefined();
114 bool didMatch = !match.isNull();
116 // Test
117 if (id == Test)
118 return jsBoolean(didMatch);
120 // Exec
121 if (didMatch) {
122 if (globalFlag)
123 thisObj->put(exec, exec->propertyNames().lastIndex, jsNumber(foundIndex + match.size()), DontDelete | DontEnum);
124 return regExpObj->arrayOfMatches(exec, match);
125 } else {
126 if (globalFlag)
127 thisObj->put(exec, exec->propertyNames().lastIndex, jsNumber(0), DontDelete | DontEnum);
128 return jsNull();
131 break;
132 case ToString: {
133 UString result = "/" + thisObj->get(exec, exec->propertyNames().source)->toString(exec) + "/";
134 if (thisObj->get(exec, exec->propertyNames().global)->toBoolean(exec)) {
135 result += "g";
137 if (thisObj->get(exec, exec->propertyNames().ignoreCase)->toBoolean(exec)) {
138 result += "i";
140 if (thisObj->get(exec, exec->propertyNames().multiline)->toBoolean(exec)) {
141 result += "m";
143 return jsString(result);
145 case Compile: { // JS1.2 legacy, but still in use in the wild somewhat
146 RegExpImp* instance = static_cast<RegExpImp*>(thisObj);
147 RegExp* newEngine = RegExpObjectImp::makeEngine(exec, args[0]->toString(exec), args[1]);
148 if (!newEngine)
149 return exec->exception();
150 instance->setRegExp(exec, newEngine);
151 return instance;
155 return jsUndefined();
158 // ------------------------------ RegExpImp ------------------------------------
160 const ClassInfo RegExpImp::info = {"RegExp", 0, 0, 0};
162 RegExpImp::RegExpImp(RegExpPrototype *regexpProto)
163 : JSObject(regexpProto), reg(0L)
167 RegExpImp::~RegExpImp()
169 delete reg;
172 void RegExpImp::setRegExp(ExecState* exec, RegExp* r)
174 delete reg;
175 reg = r;
177 putDirect(exec->propertyNames().global, jsBoolean(r->flags() & RegExp::Global), DontDelete | ReadOnly | DontEnum);
178 putDirect(exec->propertyNames().ignoreCase, jsBoolean(r->flags() & RegExp::IgnoreCase), DontDelete | ReadOnly | DontEnum);
179 putDirect(exec->propertyNames().multiline, jsBoolean(r->flags() & RegExp::Multiline), DontDelete | ReadOnly | DontEnum);
181 putDirect(exec->propertyNames().source, jsString(r->pattern()), DontDelete | ReadOnly | DontEnum);
182 putDirect(exec->propertyNames().lastIndex, jsNumber(0), DontDelete | DontEnum);
185 // ------------------------------ RegExpObjectImp ------------------------------
187 const ClassInfo RegExpObjectImp::info = {"Function", &InternalFunctionImp::info, &RegExpTable, 0};
189 /* Source for regexp_object.lut.h
190 @begin RegExpTable 20
191 input RegExpObjectImp::Input None
192 $_ RegExpObjectImp::Input DontEnum
193 multiline RegExpObjectImp::Multiline None
194 $* RegExpObjectImp::Multiline DontEnum
195 lastMatch RegExpObjectImp::LastMatch DontDelete|ReadOnly
196 $& RegExpObjectImp::LastMatch DontDelete|ReadOnly|DontEnum
197 lastParen RegExpObjectImp::LastParen DontDelete|ReadOnly
198 $+ RegExpObjectImp::LastParen DontDelete|ReadOnly|DontEnum
199 leftContext RegExpObjectImp::LeftContext DontDelete|ReadOnly
200 $` RegExpObjectImp::LeftContext DontDelete|ReadOnly|DontEnum
201 rightContext RegExpObjectImp::RightContext DontDelete|ReadOnly
202 $' RegExpObjectImp::RightContext DontDelete|ReadOnly|DontEnum
203 $1 RegExpObjectImp::Dollar1 DontDelete|ReadOnly
204 $2 RegExpObjectImp::Dollar2 DontDelete|ReadOnly
205 $3 RegExpObjectImp::Dollar3 DontDelete|ReadOnly
206 $4 RegExpObjectImp::Dollar4 DontDelete|ReadOnly
207 $5 RegExpObjectImp::Dollar5 DontDelete|ReadOnly
208 $6 RegExpObjectImp::Dollar6 DontDelete|ReadOnly
209 $7 RegExpObjectImp::Dollar7 DontDelete|ReadOnly
210 $8 RegExpObjectImp::Dollar8 DontDelete|ReadOnly
211 $9 RegExpObjectImp::Dollar9 DontDelete|ReadOnly
212 @end
215 struct KJS::RegExpObjectImpPrivate {
216 // Global search cache / settings
217 RegExpObjectImpPrivate() : lastInput(""), lastNumSubPatterns(0), multiline(false) { }
218 UString lastInput;
219 OwnArrayPtr<int> lastOvector;
220 unsigned lastNumSubPatterns : 31;
221 bool multiline : 1;
224 RegExpObjectImp::RegExpObjectImp(ExecState* exec,
225 FunctionPrototype *funcProto,
226 RegExpPrototype *regProto)
228 : InternalFunctionImp(funcProto),
229 d(new RegExpObjectImpPrivate)
231 // ECMA 15.10.5.1 RegExp.prototype
232 putDirect(exec->propertyNames().prototype, regProto, DontEnum | DontDelete | ReadOnly);
234 // no. of arguments for constructor
235 putDirect(exec->propertyNames().length, jsNumber(2), ReadOnly | DontDelete | DontEnum);
238 void RegExpObjectImp::throwRegExpError(ExecState* exec)
240 throwError(exec, RangeError, "Resource exhaustion trying to perform regexp match.");
244 To facilitate result caching, exec(), test(), match(), search(), and replace() dipatch regular
245 expression matching through the performMatch function. We use cached results to calculate,
246 e.g., RegExp.lastMatch and RegExp.leftParen.
248 UString RegExpObjectImp::performMatch(RegExp* r, ExecState* exec, const UString& s,
249 int startOffset, int *endOffset, int **ovector)
251 int tmpOffset;
252 int *tmpOvector;
253 bool error = false;
254 UString match = r->match(s, &error, startOffset, &tmpOffset, &tmpOvector);
255 if (error) {
256 if (endOffset)
257 *endOffset = -1;
258 throwRegExpError(exec);
259 return match;
262 if (endOffset)
263 *endOffset = tmpOffset;
264 if (ovector)
265 *ovector = tmpOvector;
267 if (!match.isNull()) {
268 ASSERT(tmpOvector);
270 d->lastInput = s;
271 d->lastOvector.set(tmpOvector);
272 d->lastNumSubPatterns = r->subPatterns();
275 return match;
278 JSObject *RegExpObjectImp::arrayOfMatches(ExecState *exec, const UString &result) const
280 List list;
281 // The returned array contains 'result' as first item, followed by the list of matches
282 list.append(jsString(result));
283 if ( d->lastOvector )
284 for ( int i = 1 ; i < d->lastNumSubPatterns + 1 ; ++i )
286 int start = d->lastOvector[2*i];
287 if (start == -1)
288 list.append(jsUndefined());
289 else {
290 UString substring = d->lastInput.substr( start, d->lastOvector[2*i+1] - start );
291 list.append(jsString(substring));
294 JSObject *arr = exec->lexicalInterpreter()->builtinArray()->construct(exec, list);
295 arr->put(exec, exec->propertyNames().index, jsNumber(d->lastOvector[0]));
296 arr->put(exec, exec->propertyNames().input, jsString(d->lastInput));
297 return arr;
300 JSValue *RegExpObjectImp::getBackref(int i) const
302 if (d->lastOvector && i < int(d->lastNumSubPatterns + 1)) {
303 UString substring = d->lastInput.substr(d->lastOvector[2*i], d->lastOvector[2*i+1] - d->lastOvector[2*i] );
304 return jsString(substring);
307 return jsString("");
310 JSValue *RegExpObjectImp::getLastMatch() const
312 if (d->lastOvector) {
313 UString substring = d->lastInput.substr(d->lastOvector[0], d->lastOvector[1] - d->lastOvector[0]);
314 return jsString(substring);
317 return jsString("");
320 JSValue *RegExpObjectImp::getLastParen() const
322 int i = d->lastNumSubPatterns;
323 if (i > 0) {
324 ASSERT(d->lastOvector);
325 UString substring = d->lastInput.substr(d->lastOvector[2*i], d->lastOvector[2*i+1] - d->lastOvector[2*i]);
326 return jsString(substring);
329 return jsString("");
332 JSValue *RegExpObjectImp::getLeftContext() const
334 if (d->lastOvector) {
335 UString substring = d->lastInput.substr(0, d->lastOvector[0]);
336 return jsString(substring);
339 return jsString("");
342 JSValue *RegExpObjectImp::getRightContext() const
344 if (d->lastOvector) {
345 UString s = d->lastInput;
346 UString substring = s.substr(d->lastOvector[1], s.size() - d->lastOvector[1]);
347 return jsString(substring);
350 return jsString("");
353 bool RegExpObjectImp::getOwnPropertySlot(ExecState *exec, const Identifier& propertyName, PropertySlot& slot)
355 return getStaticValueSlot<RegExpObjectImp, InternalFunctionImp>(exec, &RegExpTable, this, propertyName, slot);
358 JSValue *RegExpObjectImp::getValueProperty(ExecState*, int token) const
360 switch (token) {
361 case Dollar1:
362 return getBackref(1);
363 case Dollar2:
364 return getBackref(2);
365 case Dollar3:
366 return getBackref(3);
367 case Dollar4:
368 return getBackref(4);
369 case Dollar5:
370 return getBackref(5);
371 case Dollar6:
372 return getBackref(6);
373 case Dollar7:
374 return getBackref(7);
375 case Dollar8:
376 return getBackref(8);
377 case Dollar9:
378 return getBackref(9);
379 case Input:
380 return jsString(d->lastInput);
381 case Multiline:
382 return jsBoolean(d->multiline);
383 case LastMatch:
384 return getLastMatch();
385 case LastParen:
386 return getLastParen();
387 case LeftContext:
388 return getLeftContext();
389 case RightContext:
390 return getRightContext();
391 default:
392 ASSERT(0);
395 return jsString("");
398 void RegExpObjectImp::put(ExecState *exec, const Identifier &propertyName, JSValue *value, int attr)
400 lookupPut<RegExpObjectImp, InternalFunctionImp>(exec, propertyName, value, attr, &RegExpTable, this);
403 void RegExpObjectImp::putValueProperty(ExecState *exec, int token, JSValue *value, int /*attr*/)
405 switch (token) {
406 case Input:
407 d->lastInput = value->toString(exec);
408 break;
409 case Multiline:
410 d->multiline = value->toBoolean(exec);
411 break;
412 default:
413 ASSERT(0);
417 bool RegExpObjectImp::implementsConstruct() const
419 return true;
422 RegExp* RegExpObjectImp::makeEngine(ExecState *exec, const UString &p, JSValue *flagsInput)
424 UString flags = flagsInput->isUndefined() ? UString("") : flagsInput->toString(exec);
426 // Check for validity of flags
427 for (int pos = 0; pos < flags.size(); ++pos) {
428 switch (flags[pos].unicode()) {
429 case 'g':
430 case 'i':
431 case 'm':
432 break;
433 default: {
434 throwError(exec, SyntaxError,
435 "Invalid regular expression flags", 1, -1, "<regexp>");
436 return 0;
441 bool global = (flags.find("g") >= 0);
442 bool ignoreCase = (flags.find("i") >= 0);
443 bool multiline = (flags.find("m") >= 0);
445 int reflags = RegExp::None;
446 if (global)
447 reflags |= RegExp::Global;
448 if (ignoreCase)
449 reflags |= RegExp::IgnoreCase;
450 if (multiline)
451 reflags |= RegExp::Multiline;
453 RegExp *re = new RegExp(p, reflags);
454 if (!re->isValid()) {
455 throwError(exec, SyntaxError,
456 "Invalid regular expression", 1, -1, "<regexp>");
457 delete re;
458 return 0;
460 return re;
464 // ECMA 15.10.4
465 JSObject *RegExpObjectImp::construct(ExecState *exec, const List &args)
467 JSObject *o = args[0]->getObject();
468 if (o && o->inherits(&RegExpImp::info)) {
469 if (!args[1]->isUndefined())
470 return throwError(exec, TypeError);
471 return o;
474 UString p = args[0]->isUndefined() ? UString("") : args[0]->toString(exec);
476 RegExp* re = makeEngine(exec, p, args[1]);
477 if (!re)
478 return exec->exception()->toObject(exec);
481 RegExpPrototype *proto = static_cast<RegExpPrototype*>(exec->lexicalInterpreter()->builtinRegExpPrototype());
482 RegExpImp *dat = new RegExpImp(proto);
484 dat->setRegExp(exec, re);
486 return dat;
489 // ECMA 15.10.3
490 JSValue *RegExpObjectImp::callAsFunction(ExecState *exec, JSObject * /*thisObj*/, const List &args)
492 // The RegExp argument case is handled by construct()
494 return construct(exec, args);