1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
8 * The compiled representation of a RegExp, potentially shared among RegExp
9 * instances created during separate evaluations of a single RegExp literal in
13 #ifndef vm_RegExpShared_h
14 #define vm_RegExpShared_h
16 #include "mozilla/Assertions.h"
17 #include "mozilla/MemoryReporting.h"
19 #include "gc/Barrier.h"
20 #include "gc/Policy.h"
21 #include "gc/ZoneAllocator.h"
22 #include "irregexp/RegExpTypes.h"
23 #include "jit/JitCode.h"
24 #include "jit/JitOptions.h"
25 #include "js/AllocPolicy.h"
26 #include "js/RegExpFlags.h" // JS::RegExpFlag, JS::RegExpFlags
27 #include "js/UbiNode.h"
28 #include "js/Vector.h"
29 #include "vm/ArrayObject.h"
38 class VectorMatchPairs
;
40 using RootedRegExpShared
= JS::Rooted
<RegExpShared
*>;
41 using HandleRegExpShared
= JS::Handle
<RegExpShared
*>;
42 using MutableHandleRegExpShared
= JS::MutableHandle
<RegExpShared
*>;
44 enum RegExpRunStatus
: int32_t {
45 RegExpRunStatus_Error
= -1,
46 RegExpRunStatus_Success
= 1,
47 RegExpRunStatus_Success_NotFound
= 0,
50 inline bool IsNativeRegExpEnabled() {
51 return jit::HasJitBackend() && jit::JitOptions
.nativeRegExp
;
55 * A RegExpShared is the compiled representation of a regexp. A RegExpShared is
56 * potentially pointed to by multiple RegExpObjects. Additionally, C++ code may
57 * have pointers to RegExpShareds on the stack. The RegExpShareds are kept in a
58 * table so that they can be reused when compiling the same regex string.
60 * To save memory, a RegExpShared is not created for a RegExpObject until it is
61 * needed for execution. When a RegExpShared needs to be created, it is looked
62 * up in a per-compartment table to allow reuse between objects.
64 * During a GC, RegExpShared instances are marked and swept like GC things.
65 * Usually, RegExpObjects clear their pointers to their RegExpShareds rather
66 * than explicitly tracing them, so that the RegExpShared and any jitcode can
67 * be reclaimed quicker. However, the RegExpShareds are traced through by
68 * objects when we are preserving jitcode in their zone, to avoid the same
69 * recompilation inefficiencies as normal Ion and baseline compilation.
72 : public gc::CellWithTenuredGCPointer
<gc::TenuredCell
, JSAtom
> {
73 friend class js::gc::CellAllocator
;
76 enum class Kind
: uint32_t { Unparsed
, Atom
, RegExp
};
77 enum class CodeKind
{ Bytecode
, Jitcode
, Any
};
79 using ByteCode
= js::irregexp::ByteArrayData
;
80 using JitCodeTable
= js::irregexp::ByteArray
;
81 using JitCodeTables
= Vector
<JitCodeTable
, 0, SystemAllocPolicy
>;
84 friend class RegExpStatics
;
85 friend class RegExpZone
;
87 struct RegExpCompilation
{
88 HeapPtr
<jit::JitCode
*> jitCode
;
89 ByteCode
* byteCode
= nullptr;
91 bool compiled(CodeKind kind
= CodeKind::Any
) const {
93 case CodeKind::Bytecode
:
95 case CodeKind::Jitcode
:
98 return !!byteCode
|| !!jitCode
;
100 MOZ_CRASH("Unreachable");
103 size_t byteCodeLength() const {
104 MOZ_ASSERT(byteCode
);
105 return byteCode
->length
;
110 /* Source to the RegExp, for lazy compilation. Stored in the cell header. */
111 JSAtom
* getSource() const { return headerPtr(); }
114 RegExpCompilation compilationArray
[2];
117 JS::RegExpFlags flags
;
119 RegExpShared::Kind kind_
= Kind::Unparsed
;
120 GCPtr
<JSAtom
*> patternAtom_
;
121 uint32_t maxRegisters_
= 0;
124 uint32_t numNamedCaptures_
= {};
125 uint32_t* namedCaptureIndices_
= {};
126 GCPtr
<PlainObject
*> groupsTemplate_
= {};
128 static int CompilationIndex(bool latin1
) { return latin1
? 0 : 1; }
130 // Tables referenced by JIT code.
131 JitCodeTables tables
;
133 /* Internal functions. */
134 RegExpShared(JSAtom
* source
, JS::RegExpFlags flags
);
136 const RegExpCompilation
& compilation(bool latin1
) const {
137 return compilationArray
[CompilationIndex(latin1
)];
140 RegExpCompilation
& compilation(bool latin1
) {
141 return compilationArray
[CompilationIndex(latin1
)];
145 ~RegExpShared() = delete;
147 static bool compileIfNecessary(JSContext
* cx
, MutableHandleRegExpShared res
,
148 Handle
<JSLinearString
*> input
, CodeKind code
);
150 static RegExpRunStatus
executeAtom(MutableHandleRegExpShared re
,
151 Handle
<JSLinearString
*> input
,
152 size_t start
, VectorMatchPairs
* matches
);
154 // Execute this RegExp on input starting from searchIndex, filling in matches.
155 static RegExpRunStatus
execute(JSContext
* cx
, MutableHandleRegExpShared res
,
156 Handle
<JSLinearString
*> input
,
157 size_t searchIndex
, VectorMatchPairs
* matches
);
159 // Register a table with this RegExpShared, and take ownership.
160 bool addTable(JitCodeTable table
) { return tables
.append(std::move(table
)); }
164 size_t pairCount() const {
165 MOZ_ASSERT(kind() != Kind::Unparsed
);
169 RegExpShared::Kind
kind() const { return kind_
; }
171 // Use simple string matching for this regexp.
172 void useAtomMatch(Handle
<JSAtom
*> pattern
);
174 // Use the regular expression engine for this regexp.
175 void useRegExpMatch(size_t parenCount
);
177 static void InitializeNamedCaptures(JSContext
* cx
, HandleRegExpShared re
,
178 uint32_t numNamedCaptures
,
179 Handle
<PlainObject
*> templateObject
,
180 uint32_t* captureIndices
);
181 PlainObject
* getGroupsTemplate() { return groupsTemplate_
; }
184 bool markedForTierUp() const;
186 void setByteCode(ByteCode
* code
, bool latin1
) {
187 compilation(latin1
).byteCode
= code
;
189 ByteCode
* getByteCode(bool latin1
) const {
190 return compilation(latin1
).byteCode
;
192 void setJitCode(jit::JitCode
* code
, bool latin1
) {
193 compilation(latin1
).jitCode
= code
;
195 jit::JitCode
* getJitCode(bool latin1
) const {
196 return compilation(latin1
).jitCode
;
198 uint32_t getMaxRegisters() const { return maxRegisters_
; }
199 void updateMaxRegisters(uint32_t numRegisters
) {
200 maxRegisters_
= std::max(maxRegisters_
, numRegisters
);
203 uint32_t numNamedCaptures() const { return numNamedCaptures_
; }
204 int32_t getNamedCaptureIndex(uint32_t idx
) const {
205 MOZ_ASSERT(idx
< numNamedCaptures());
206 MOZ_ASSERT(namedCaptureIndices_
);
207 return namedCaptureIndices_
[idx
];
210 JSAtom
* patternAtom() const { return patternAtom_
; }
212 JS::RegExpFlags
getFlags() const { return flags
; }
214 bool hasIndices() const { return flags
.hasIndices(); }
215 bool global() const { return flags
.global(); }
216 bool ignoreCase() const { return flags
.ignoreCase(); }
217 bool multiline() const { return flags
.multiline(); }
218 bool dotAll() const { return flags
.dotAll(); }
219 bool unicode() const { return flags
.unicode(); }
220 bool unicodeSets() const { return flags
.unicodeSets(); }
221 bool sticky() const { return flags
.sticky(); }
223 bool isCompiled(bool latin1
, CodeKind codeKind
= CodeKind::Any
) const {
224 return compilation(latin1
).compiled(codeKind
);
226 bool isCompiled() const { return isCompiled(true) || isCompiled(false); }
228 void traceChildren(JSTracer
* trc
);
229 void discardJitCode();
230 void finalize(JS::GCContext
* gcx
);
232 static size_t offsetOfSource() { return offsetOfHeaderPtr(); }
234 static size_t offsetOfPatternAtom() {
235 return offsetof(RegExpShared
, patternAtom_
);
238 static size_t offsetOfFlags() { return offsetof(RegExpShared
, flags
); }
240 static size_t offsetOfPairCount() {
241 return offsetof(RegExpShared
, pairCount_
);
244 static size_t offsetOfKind() { return offsetof(RegExpShared
, kind_
); }
246 static size_t offsetOfJitCode(bool latin1
) {
247 return offsetof(RegExpShared
, compilationArray
) +
248 (CompilationIndex(latin1
) * sizeof(RegExpCompilation
)) +
249 offsetof(RegExpCompilation
, jitCode
);
252 static size_t offsetOfGroupsTemplate() {
253 return offsetof(RegExpShared
, groupsTemplate_
);
256 size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf
);
259 static bool dumpBytecode(JSContext
* cx
, MutableHandleRegExpShared res
,
260 Handle
<JSLinearString
*> input
);
264 static const JS::TraceKind TraceKind
= JS::TraceKind::RegExpShared
;
269 JSAtom
* atom
= nullptr;
270 JS::RegExpFlags flags
= JS::RegExpFlag::NoFlags
;
273 Key(JSAtom
* atom
, JS::RegExpFlags flags
) : atom(atom
), flags(flags
) {}
274 MOZ_IMPLICIT
Key(const WeakHeapPtr
<RegExpShared
*>& shared
)
275 : atom(shared
.unbarrieredGet()->getSource()),
276 flags(shared
.unbarrieredGet()->getFlags()) {}
279 static HashNumber
hash(const Lookup
& l
) {
280 HashNumber hash
= DefaultHasher
<JSAtom
*>::hash(l
.atom
);
281 return mozilla::AddToHash(hash
, l
.flags
.value());
283 static bool match(Key l
, Key r
) {
284 return l
.atom
== r
.atom
&& l
.flags
== r
.flags
;
289 * The set of all RegExpShareds in the zone. On every GC, every RegExpShared
290 * that was not marked is deleted and removed from the set.
292 using Set
= JS::WeakCache
<
293 JS::GCHashSet
<WeakHeapPtr
<RegExpShared
*>, Key
, ZoneAllocPolicy
>>;
297 explicit RegExpZone(Zone
* zone
);
299 ~RegExpZone() { MOZ_ASSERT(set_
.empty()); }
301 bool empty() const { return set_
.empty(); }
303 RegExpShared
* maybeGet(JSAtom
* source
, JS::RegExpFlags flags
) const {
304 Set::Ptr p
= set_
.lookup(Key(source
, flags
));
305 return p
? *p
: nullptr;
308 RegExpShared
* get(JSContext
* cx
, Handle
<JSAtom
*> source
,
309 JS::RegExpFlags flags
);
312 void clear() { set_
.clear(); }
315 size_t sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf
) const;
320 enum ResultShapeKind
{ Normal
, WithIndices
, Indices
, NumKinds
};
322 // Information about the last regular expression match. This is used by the
323 // static RegExp properties such as RegExp.lastParen.
324 UniquePtr
<RegExpStatics
> regExpStatics
;
328 * The shapes used for the result object of re.exec(), if there is a result.
329 * These are used in CreateRegExpMatchResult. There are three shapes, each of
330 * which is an ArrayObject shape with some additional properties. We decide
331 * which to use based on the |hasIndices| (/d) flag.
333 * Normal: Has |index|, |input|, and |groups| properties.
334 * Used for the result object if |hasIndices| is not set.
336 * WithIndices: Has |index|, |input|, |groups|, and |indices| properties.
337 * Used for the result object if |hasIndices| is set.
339 * Indices: Has a |groups| property. If |hasIndices| is set, used
340 * for the |.indices| property of the result object.
342 HeapPtr
<SharedShape
*> matchResultShapes_
[ResultShapeKind::NumKinds
];
345 * The shape of RegExp.prototype object that satisfies following:
346 * * RegExp.prototype.flags getter is not modified
347 * * RegExp.prototype.global getter is not modified
348 * * RegExp.prototype.ignoreCase getter is not modified
349 * * RegExp.prototype.multiline getter is not modified
350 * * RegExp.prototype.dotAll getter is not modified
351 * * RegExp.prototype.sticky getter is not modified
352 * * RegExp.prototype.unicode getter is not modified
353 * * RegExp.prototype.exec is an own data property
354 * * RegExp.prototype[@@match] is an own data property
355 * * RegExp.prototype[@@search] is an own data property
357 HeapPtr
<Shape
*> optimizableRegExpPrototypeShape_
;
360 * The shape of RegExp instance that satisfies following:
361 * * lastProperty is lastIndex
362 * * prototype is RegExp.prototype
364 HeapPtr
<Shape
*> optimizableRegExpInstanceShape_
;
366 SharedShape
* createMatchResultShape(JSContext
* cx
, ResultShapeKind kind
);
369 explicit RegExpRealm();
371 void trace(JSTracer
* trc
);
373 static const size_t MatchResultObjectIndexSlot
= 0;
374 static const size_t MatchResultObjectInputSlot
= 1;
375 static const size_t MatchResultObjectGroupsSlot
= 2;
376 static const size_t MatchResultObjectIndicesSlot
= 3;
378 // Number of used and allocated dynamic slots for a Normal match result
379 // object. These values are checked in createMatchResultShape.
380 static const size_t MatchResultObjectSlotSpan
= 3;
381 static const size_t MatchResultObjectNumDynamicSlots
= 6;
383 static const size_t IndicesGroupsSlot
= 0;
385 static size_t offsetOfMatchResultObjectIndexSlot() {
386 return sizeof(Value
) * MatchResultObjectIndexSlot
;
388 static size_t offsetOfMatchResultObjectInputSlot() {
389 return sizeof(Value
) * MatchResultObjectInputSlot
;
391 static size_t offsetOfMatchResultObjectGroupsSlot() {
392 return sizeof(Value
) * MatchResultObjectGroupsSlot
;
394 static size_t offsetOfMatchResultObjectIndicesSlot() {
395 return sizeof(Value
) * MatchResultObjectIndicesSlot
;
398 /* Get or create the shape used for the result of .exec(). */
399 SharedShape
* getOrCreateMatchResultShape(
400 JSContext
* cx
, ResultShapeKind kind
= ResultShapeKind::Normal
) {
401 if (matchResultShapes_
[kind
]) {
402 return matchResultShapes_
[kind
];
404 return createMatchResultShape(cx
, kind
);
407 Shape
* getOptimizableRegExpPrototypeShape() {
408 return optimizableRegExpPrototypeShape_
;
410 void setOptimizableRegExpPrototypeShape(Shape
* shape
) {
411 optimizableRegExpPrototypeShape_
= shape
;
413 Shape
* getOptimizableRegExpInstanceShape() {
414 return optimizableRegExpInstanceShape_
;
416 void setOptimizableRegExpInstanceShape(Shape
* shape
) {
417 optimizableRegExpInstanceShape_
= shape
;
420 static constexpr size_t offsetOfOptimizableRegExpPrototypeShape() {
421 return offsetof(RegExpRealm
, optimizableRegExpPrototypeShape_
);
423 static constexpr size_t offsetOfOptimizableRegExpInstanceShape() {
424 return offsetof(RegExpRealm
, optimizableRegExpInstanceShape_
);
426 static constexpr size_t offsetOfRegExpStatics() {
427 return offsetof(RegExpRealm
, regExpStatics
);
429 static constexpr size_t offsetOfNormalMatchResultShape() {
430 static_assert(sizeof(HeapPtr
<SharedShape
*>) == sizeof(uintptr_t));
431 return offsetof(RegExpRealm
, matchResultShapes_
) +
432 ResultShapeKind::Normal
* sizeof(uintptr_t);
436 RegExpRunStatus
ExecuteRegExpAtomRaw(RegExpShared
* re
, JSLinearString
* input
,
437 size_t start
, MatchPairs
* matchPairs
);
445 class Concrete
<js::RegExpShared
> : TracerConcrete
<js::RegExpShared
> {
447 explicit Concrete(js::RegExpShared
* ptr
)
448 : TracerConcrete
<js::RegExpShared
>(ptr
) {}
451 static void construct(void* storage
, js::RegExpShared
* ptr
) {
452 new (storage
) Concrete(ptr
);
455 CoarseType
coarseType() const final
{ return CoarseType::Other
; }
457 Size
size(mozilla::MallocSizeOf mallocSizeOf
) const override
;
459 const char16_t
* typeName() const override
{ return concreteTypeName
; }
460 static const char16_t concreteTypeName
[];
466 #endif /* vm_RegExpShared_h */