Backed out changeset 8f976ed899d7 (bug 1847231) for causing bc failures on browser_se...
[gecko.git] / js / src / vm / RegExpShared.h
blob1ccf1a4125ba75e481850f49d17ad91b5fc38b8c
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 /**
8 * The compiled representation of a RegExp, potentially shared among RegExp
9 * instances created during separate evaluations of a single RegExp literal in
10 * source code.
13 #ifndef vm_RegExpShared_h
14 #define vm_RegExpShared_h
16 #include "mozilla/Assertions.h"
17 #include "mozilla/MemoryReporting.h"
19 #include "gc/Barrier.h"
20 #include "gc/Policy.h"
21 #include "gc/ZoneAllocator.h"
22 #include "irregexp/RegExpTypes.h"
23 #include "jit/JitCode.h"
24 #include "jit/JitOptions.h"
25 #include "js/AllocPolicy.h"
26 #include "js/RegExpFlags.h" // JS::RegExpFlag, JS::RegExpFlags
27 #include "js/UbiNode.h"
28 #include "js/Vector.h"
29 #include "vm/ArrayObject.h"
31 namespace js {
33 class ArrayObject;
34 class PlainObject;
35 class RegExpRealm;
36 class RegExpShared;
37 class RegExpStatics;
38 class VectorMatchPairs;
40 using RootedRegExpShared = JS::Rooted<RegExpShared*>;
41 using HandleRegExpShared = JS::Handle<RegExpShared*>;
42 using MutableHandleRegExpShared = JS::MutableHandle<RegExpShared*>;
44 enum RegExpRunStatus : int32_t {
45 RegExpRunStatus_Error = -1,
46 RegExpRunStatus_Success = 1,
47 RegExpRunStatus_Success_NotFound = 0,
50 inline bool IsNativeRegExpEnabled() {
51 return jit::HasJitBackend() && jit::JitOptions.nativeRegExp;
55 * A RegExpShared is the compiled representation of a regexp. A RegExpShared is
56 * potentially pointed to by multiple RegExpObjects. Additionally, C++ code may
57 * have pointers to RegExpShareds on the stack. The RegExpShareds are kept in a
58 * table so that they can be reused when compiling the same regex string.
60 * To save memory, a RegExpShared is not created for a RegExpObject until it is
61 * needed for execution. When a RegExpShared needs to be created, it is looked
62 * up in a per-compartment table to allow reuse between objects.
64 * During a GC, RegExpShared instances are marked and swept like GC things.
65 * Usually, RegExpObjects clear their pointers to their RegExpShareds rather
66 * than explicitly tracing them, so that the RegExpShared and any jitcode can
67 * be reclaimed quicker. However, the RegExpShareds are traced through by
68 * objects when we are preserving jitcode in their zone, to avoid the same
69 * recompilation inefficiencies as normal Ion and baseline compilation.
71 class RegExpShared
72 : public gc::CellWithTenuredGCPointer<gc::TenuredCell, JSAtom> {
73 friend class js::gc::CellAllocator;
75 public:
76 enum class Kind : uint32_t { Unparsed, Atom, RegExp };
77 enum class CodeKind { Bytecode, Jitcode, Any };
79 using ByteCode = js::irregexp::ByteArrayData;
80 using JitCodeTable = js::irregexp::ByteArray;
81 using JitCodeTables = Vector<JitCodeTable, 0, SystemAllocPolicy>;
83 private:
84 friend class RegExpStatics;
85 friend class RegExpZone;
87 struct RegExpCompilation {
88 HeapPtr<jit::JitCode*> jitCode;
89 ByteCode* byteCode = nullptr;
91 bool compiled(CodeKind kind = CodeKind::Any) const {
92 switch (kind) {
93 case CodeKind::Bytecode:
94 return !!byteCode;
95 case CodeKind::Jitcode:
96 return !!jitCode;
97 case CodeKind::Any:
98 return !!byteCode || !!jitCode;
100 MOZ_CRASH("Unreachable");
103 size_t byteCodeLength() const {
104 MOZ_ASSERT(byteCode);
105 return byteCode->length;
109 public:
110 /* Source to the RegExp, for lazy compilation. Stored in the cell header. */
111 JSAtom* getSource() const { return headerPtr(); }
113 private:
114 RegExpCompilation compilationArray[2];
116 uint32_t pairCount_;
117 JS::RegExpFlags flags;
119 RegExpShared::Kind kind_ = Kind::Unparsed;
120 GCPtr<JSAtom*> patternAtom_;
121 uint32_t maxRegisters_ = 0;
122 uint32_t ticks_ = 0;
124 uint32_t numNamedCaptures_ = {};
125 uint32_t* namedCaptureIndices_ = {};
126 GCPtr<PlainObject*> groupsTemplate_ = {};
128 static int CompilationIndex(bool latin1) { return latin1 ? 0 : 1; }
130 // Tables referenced by JIT code.
131 JitCodeTables tables;
133 /* Internal functions. */
134 RegExpShared(JSAtom* source, JS::RegExpFlags flags);
136 const RegExpCompilation& compilation(bool latin1) const {
137 return compilationArray[CompilationIndex(latin1)];
140 RegExpCompilation& compilation(bool latin1) {
141 return compilationArray[CompilationIndex(latin1)];
144 public:
145 ~RegExpShared() = delete;
147 static bool compileIfNecessary(JSContext* cx, MutableHandleRegExpShared res,
148 Handle<JSLinearString*> input, CodeKind code);
150 static RegExpRunStatus executeAtom(MutableHandleRegExpShared re,
151 Handle<JSLinearString*> input,
152 size_t start, VectorMatchPairs* matches);
154 // Execute this RegExp on input starting from searchIndex, filling in matches.
155 static RegExpRunStatus execute(JSContext* cx, MutableHandleRegExpShared res,
156 Handle<JSLinearString*> input,
157 size_t searchIndex, VectorMatchPairs* matches);
159 // Register a table with this RegExpShared, and take ownership.
160 bool addTable(JitCodeTable table) { return tables.append(std::move(table)); }
162 /* Accessors */
164 size_t pairCount() const {
165 MOZ_ASSERT(kind() != Kind::Unparsed);
166 return pairCount_;
169 RegExpShared::Kind kind() const { return kind_; }
171 // Use simple string matching for this regexp.
172 void useAtomMatch(Handle<JSAtom*> pattern);
174 // Use the regular expression engine for this regexp.
175 void useRegExpMatch(size_t parenCount);
177 static void InitializeNamedCaptures(JSContext* cx, HandleRegExpShared re,
178 uint32_t numNamedCaptures,
179 Handle<PlainObject*> templateObject,
180 uint32_t* captureIndices);
181 PlainObject* getGroupsTemplate() { return groupsTemplate_; }
183 void tierUpTick();
184 bool markedForTierUp() const;
186 void setByteCode(ByteCode* code, bool latin1) {
187 compilation(latin1).byteCode = code;
189 ByteCode* getByteCode(bool latin1) const {
190 return compilation(latin1).byteCode;
192 void setJitCode(jit::JitCode* code, bool latin1) {
193 compilation(latin1).jitCode = code;
195 jit::JitCode* getJitCode(bool latin1) const {
196 return compilation(latin1).jitCode;
198 uint32_t getMaxRegisters() const { return maxRegisters_; }
199 void updateMaxRegisters(uint32_t numRegisters) {
200 maxRegisters_ = std::max(maxRegisters_, numRegisters);
203 uint32_t numNamedCaptures() const { return numNamedCaptures_; }
204 int32_t getNamedCaptureIndex(uint32_t idx) const {
205 MOZ_ASSERT(idx < numNamedCaptures());
206 MOZ_ASSERT(namedCaptureIndices_);
207 return namedCaptureIndices_[idx];
210 JSAtom* patternAtom() const { return patternAtom_; }
212 JS::RegExpFlags getFlags() const { return flags; }
214 bool hasIndices() const { return flags.hasIndices(); }
215 bool global() const { return flags.global(); }
216 bool ignoreCase() const { return flags.ignoreCase(); }
217 bool multiline() const { return flags.multiline(); }
218 bool dotAll() const { return flags.dotAll(); }
219 bool unicode() const { return flags.unicode(); }
220 bool unicodeSets() const { return flags.unicodeSets(); }
221 bool sticky() const { return flags.sticky(); }
223 bool isCompiled(bool latin1, CodeKind codeKind = CodeKind::Any) const {
224 return compilation(latin1).compiled(codeKind);
226 bool isCompiled() const { return isCompiled(true) || isCompiled(false); }
228 void traceChildren(JSTracer* trc);
229 void discardJitCode();
230 void finalize(JS::GCContext* gcx);
232 static size_t offsetOfSource() { return offsetOfHeaderPtr(); }
234 static size_t offsetOfPatternAtom() {
235 return offsetof(RegExpShared, patternAtom_);
238 static size_t offsetOfFlags() { return offsetof(RegExpShared, flags); }
240 static size_t offsetOfPairCount() {
241 return offsetof(RegExpShared, pairCount_);
244 static size_t offsetOfKind() { return offsetof(RegExpShared, kind_); }
246 static size_t offsetOfJitCode(bool latin1) {
247 return offsetof(RegExpShared, compilationArray) +
248 (CompilationIndex(latin1) * sizeof(RegExpCompilation)) +
249 offsetof(RegExpCompilation, jitCode);
252 static size_t offsetOfGroupsTemplate() {
253 return offsetof(RegExpShared, groupsTemplate_);
256 size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf);
258 #ifdef DEBUG
259 static bool dumpBytecode(JSContext* cx, MutableHandleRegExpShared res,
260 Handle<JSLinearString*> input);
261 #endif
263 public:
264 static const JS::TraceKind TraceKind = JS::TraceKind::RegExpShared;
267 class RegExpZone {
268 struct Key {
269 JSAtom* atom = nullptr;
270 JS::RegExpFlags flags = JS::RegExpFlag::NoFlags;
272 Key() = default;
273 Key(JSAtom* atom, JS::RegExpFlags flags) : atom(atom), flags(flags) {}
274 MOZ_IMPLICIT Key(const WeakHeapPtr<RegExpShared*>& shared)
275 : atom(shared.unbarrieredGet()->getSource()),
276 flags(shared.unbarrieredGet()->getFlags()) {}
278 using Lookup = Key;
279 static HashNumber hash(const Lookup& l) {
280 HashNumber hash = DefaultHasher<JSAtom*>::hash(l.atom);
281 return mozilla::AddToHash(hash, l.flags.value());
283 static bool match(Key l, Key r) {
284 return l.atom == r.atom && l.flags == r.flags;
289 * The set of all RegExpShareds in the zone. On every GC, every RegExpShared
290 * that was not marked is deleted and removed from the set.
292 using Set = JS::WeakCache<
293 JS::GCHashSet<WeakHeapPtr<RegExpShared*>, Key, ZoneAllocPolicy>>;
294 Set set_;
296 public:
297 explicit RegExpZone(Zone* zone);
299 ~RegExpZone() { MOZ_ASSERT(set_.empty()); }
301 bool empty() const { return set_.empty(); }
303 RegExpShared* maybeGet(JSAtom* source, JS::RegExpFlags flags) const {
304 Set::Ptr p = set_.lookup(Key(source, flags));
305 return p ? *p : nullptr;
308 RegExpShared* get(JSContext* cx, Handle<JSAtom*> source,
309 JS::RegExpFlags flags);
311 #ifdef DEBUG
312 void clear() { set_.clear(); }
313 #endif
315 size_t sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf) const;
318 class RegExpRealm {
319 public:
320 enum ResultShapeKind { Normal, WithIndices, Indices, NumKinds };
322 // Information about the last regular expression match. This is used by the
323 // static RegExp properties such as RegExp.lastParen.
324 UniquePtr<RegExpStatics> regExpStatics;
326 private:
328 * The shapes used for the result object of re.exec(), if there is a result.
329 * These are used in CreateRegExpMatchResult. There are three shapes, each of
330 * which is an ArrayObject shape with some additional properties. We decide
331 * which to use based on the |hasIndices| (/d) flag.
333 * Normal: Has |index|, |input|, and |groups| properties.
334 * Used for the result object if |hasIndices| is not set.
336 * WithIndices: Has |index|, |input|, |groups|, and |indices| properties.
337 * Used for the result object if |hasIndices| is set.
339 * Indices: Has a |groups| property. If |hasIndices| is set, used
340 * for the |.indices| property of the result object.
342 HeapPtr<SharedShape*> matchResultShapes_[ResultShapeKind::NumKinds];
345 * The shape of RegExp.prototype object that satisfies following:
346 * * RegExp.prototype.flags getter is not modified
347 * * RegExp.prototype.global getter is not modified
348 * * RegExp.prototype.ignoreCase getter is not modified
349 * * RegExp.prototype.multiline getter is not modified
350 * * RegExp.prototype.dotAll getter is not modified
351 * * RegExp.prototype.sticky getter is not modified
352 * * RegExp.prototype.unicode getter is not modified
353 * * RegExp.prototype.exec is an own data property
354 * * RegExp.prototype[@@match] is an own data property
355 * * RegExp.prototype[@@search] is an own data property
357 HeapPtr<Shape*> optimizableRegExpPrototypeShape_;
360 * The shape of RegExp instance that satisfies following:
361 * * lastProperty is lastIndex
362 * * prototype is RegExp.prototype
364 HeapPtr<Shape*> optimizableRegExpInstanceShape_;
366 SharedShape* createMatchResultShape(JSContext* cx, ResultShapeKind kind);
368 public:
369 explicit RegExpRealm();
371 void trace(JSTracer* trc);
373 static const size_t MatchResultObjectIndexSlot = 0;
374 static const size_t MatchResultObjectInputSlot = 1;
375 static const size_t MatchResultObjectGroupsSlot = 2;
376 static const size_t MatchResultObjectIndicesSlot = 3;
378 // Number of used and allocated dynamic slots for a Normal match result
379 // object. These values are checked in createMatchResultShape.
380 static const size_t MatchResultObjectSlotSpan = 3;
381 static const size_t MatchResultObjectNumDynamicSlots = 6;
383 static const size_t IndicesGroupsSlot = 0;
385 static size_t offsetOfMatchResultObjectIndexSlot() {
386 return sizeof(Value) * MatchResultObjectIndexSlot;
388 static size_t offsetOfMatchResultObjectInputSlot() {
389 return sizeof(Value) * MatchResultObjectInputSlot;
391 static size_t offsetOfMatchResultObjectGroupsSlot() {
392 return sizeof(Value) * MatchResultObjectGroupsSlot;
394 static size_t offsetOfMatchResultObjectIndicesSlot() {
395 return sizeof(Value) * MatchResultObjectIndicesSlot;
398 /* Get or create the shape used for the result of .exec(). */
399 SharedShape* getOrCreateMatchResultShape(
400 JSContext* cx, ResultShapeKind kind = ResultShapeKind::Normal) {
401 if (matchResultShapes_[kind]) {
402 return matchResultShapes_[kind];
404 return createMatchResultShape(cx, kind);
407 Shape* getOptimizableRegExpPrototypeShape() {
408 return optimizableRegExpPrototypeShape_;
410 void setOptimizableRegExpPrototypeShape(Shape* shape) {
411 optimizableRegExpPrototypeShape_ = shape;
413 Shape* getOptimizableRegExpInstanceShape() {
414 return optimizableRegExpInstanceShape_;
416 void setOptimizableRegExpInstanceShape(Shape* shape) {
417 optimizableRegExpInstanceShape_ = shape;
420 static constexpr size_t offsetOfOptimizableRegExpPrototypeShape() {
421 return offsetof(RegExpRealm, optimizableRegExpPrototypeShape_);
423 static constexpr size_t offsetOfOptimizableRegExpInstanceShape() {
424 return offsetof(RegExpRealm, optimizableRegExpInstanceShape_);
426 static constexpr size_t offsetOfRegExpStatics() {
427 return offsetof(RegExpRealm, regExpStatics);
429 static constexpr size_t offsetOfNormalMatchResultShape() {
430 static_assert(sizeof(HeapPtr<SharedShape*>) == sizeof(uintptr_t));
431 return offsetof(RegExpRealm, matchResultShapes_) +
432 ResultShapeKind::Normal * sizeof(uintptr_t);
436 RegExpRunStatus ExecuteRegExpAtomRaw(RegExpShared* re, JSLinearString* input,
437 size_t start, MatchPairs* matchPairs);
439 } /* namespace js */
441 namespace JS {
442 namespace ubi {
444 template <>
445 class Concrete<js::RegExpShared> : TracerConcrete<js::RegExpShared> {
446 protected:
447 explicit Concrete(js::RegExpShared* ptr)
448 : TracerConcrete<js::RegExpShared>(ptr) {}
450 public:
451 static void construct(void* storage, js::RegExpShared* ptr) {
452 new (storage) Concrete(ptr);
455 CoarseType coarseType() const final { return CoarseType::Other; }
457 Size size(mozilla::MallocSizeOf mallocSizeOf) const override;
459 const char16_t* typeName() const override { return concreteTypeName; }
460 static const char16_t concreteTypeName[];
463 } // namespace ubi
464 } // namespace JS
466 #endif /* vm_RegExpShared_h */