2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
22 #include "hphp/runtime/base/array-data-defs.h"
23 #include "hphp/runtime/base/mixed-array.h"
24 #include "hphp/runtime/base/tv-val.h"
25 #include "hphp/runtime/base/set-array.h"
26 #include "hphp/runtime/base/type-variant.h"
27 #include "hphp/runtime/vm/class-meth-data-ref.h"
28 #include "hphp/util/type-scan.h"
32 ///////////////////////////////////////////////////////////////////////////////
36 enum class IterTypeOp
{ NonLocal
, LocalBaseConst
, LocalBaseMutable
};
38 enum class IterNextIndex
: uint8_t {
44 // JIT-only "pointer iteration", designed for good specialized code-gen.
45 // In pointer iteration, the iterator has a pointer directly into the base.
47 // We only use this mode if all the following conditions are met:
48 // - The array is guaranteed to be unchanged during iteration
49 // - The array is a MixedArray (a dict or a darray)
50 // - The array is free of tombstones
53 // Helpers specific to bespoke array-likes.
57 // For iterator specialization, we pack all the information we need to generate
58 // specialized code in a single byte so that we can check it in one comparison.
60 // This byte should be 0 for unspecialized iterators, as created by calling the
61 // normal IterImpl constructor instead of using a specialized initializer.
62 struct IterSpecialization
{
63 enum BaseType
: uint8_t { Vec
= 0, Dict
, kNumBaseTypes
};
64 enum KeyTypes
: uint8_t { ArrayKey
= 0, Int
, Str
, StaticStr
, kNumKeyTypes
};
66 // Returns a generic (unspecialized) IterSpecialization value.
67 static IterSpecialization
generic() {
68 IterSpecialization result
;
70 assertx(!result
.specialized
);
77 // `base_type` and `key_types` are bit encodings of the enums above.
81 // When we JIT a specialized iterator, we set `specialized` to true,
82 // We set `output_key` for key-value iters but not for value-only iters.
83 // We set `base_const` if we know the base is const during iteration.
89 // A free bit. Maybe we'll need a 2-bit enum for the layout?
96 std::string
show(IterSpecialization type
);
97 std::string
show(IterSpecialization::BaseType type
);
98 std::string
show(IterSpecialization::KeyTypes type
);
101 * Iterator over an array, a collection, or an object implementing the Hack
102 * Iterator interface. This iterator is used by the JIT and its usage is
103 * mediated through the "Iter" wrapper below.
105 * By default, iterators inc-ref their base to ensure that it won't be mutated
106 * during the iteration. HHBBC can do an analysis that marks certain iterators
107 * as "local" iterators, which means that their base only changes in certain
108 * controlled ways during iteration. (Specifically: either the base does not
109 * change at all, or the current key is assigned a new value in the loop.)
111 * For local iterators, the base is kept in a frame local and passed to the
112 * iterator on each iteration. Local iterators are never used for objects,
113 * since we can't constrain writes to them in this way.
115 * The purpose of the local iter optimization is to try to keep local bases at
116 * a refcount of 1, so that they won't be COWed by the "set the current key"
117 * type of mutating operations. Apparently, this pattern is somewhat common...
120 enum NoInc
{ noInc
= 0 };
121 enum Local
{ local
= 0 };
124 * Constructors. Note that sometimes IterImpl objects are created
125 * without running their C++ constructor. (See new_iter_array.)
128 explicit IterImpl(const ArrayData
* data
);
129 IterImpl(const ArrayData
* data
, NoInc
) {
130 setArrayData
<false>(data
);
132 IterImpl(const ArrayData
* data
, Local
) {
133 setArrayData
<true>(data
);
135 explicit IterImpl(ObjectData
* obj
);
136 IterImpl(ObjectData
* obj
, NoInc
);
141 // Pass a non-NULL ad to checkInvariants iff this iterator is local.
142 // These invariants hold as long as the iterator hasn't yet reached the end.
143 bool checkInvariants(const ArrayData
* ad
= nullptr) const;
145 explicit operator bool() { return !end(); }
147 // Returns true if we've reached the end. endHelper is used for iterators
148 // over objects implementing the Iterator interface.
150 if (UNLIKELY(!hasArrayData())) return endHelper();
151 return getArrayData() == nullptr || m_pos
== m_end
;
153 bool endHelper() const;
155 // Advance the iterator's position. Assumes that end() is false. nextHelper
156 // is used for iterators over objects implementing the Iterator interface.
158 assertx(checkInvariants());
159 if (UNLIKELY(!hasArrayData())) return nextHelper();
160 m_pos
= getArrayData()->iter_advance(m_pos
);
164 bool nextLocal(const ArrayData
* ad
) {
165 assertx(checkInvariants(ad
));
166 m_pos
= ad
->iter_advance(m_pos
);
167 return m_pos
== m_end
;
170 // Return the key at the current position. firstHelper is used for Objects.
171 // This method and its variants inc-ref the key before returning it.
173 if (UNLIKELY(!hasArrayData())) return firstHelper();
174 return getArrayData()->getKey(m_pos
);
176 Variant
firstHelper();
178 // TypedValue versions of first. Used by the JIT iterator helpers.
179 // These methods do NOT inc-ref the key before returning it.
180 TypedValue
nvFirst() const {
181 return getArrayData()->nvGetKey(m_pos
);
183 TypedValue
nvFirstLocal(const ArrayData
* ad
) const {
184 assertx(getArrayData() == nullptr);
185 return ad
->nvGetKey(m_pos
);
188 // Return the value at the current position. firstHelper is used for Objects.
189 // This method and its variants inc-ref the value before returning it.
193 * Get the value at the current iterator position, without refcount ops.
195 * If called when iterating an Iterable object the secondVal() will fatal.
197 TypedValue
secondVal() const;
199 // TypedValue versions of second. Used by the JIT iterator helpers.
200 // These methods do NOT inc-ref the value before returning it.
201 TypedValue
nvSecond() const {
202 return getArrayData()->nvGetVal(m_pos
);
204 TypedValue
nvSecondLocal(const ArrayData
* ad
) const {
205 assertx(getArrayData() == nullptr);
206 return ad
->nvGetVal(m_pos
);
209 // This method returns null for local iterators, and for non-local iterators
210 // with an empty array base. It must be checked in end() for this reason.
211 bool hasArrayData() const {
212 return !((intptr_t)m_data
& objectBaseTag());
215 const ArrayData
* getArrayData() const {
216 assertx(hasArrayData());
219 ssize_t
getPos() const {
222 ssize_t
getEnd() const {
225 void setPos(ssize_t newPos
) {
229 // It's valid to call end() on a killed iter, but the iter is otherwise dead.
230 // In debug builds, this method will overwrite the iterator with garbage.
233 IterNextIndex
getHelperIndex() {
234 return m_nextHelperIdx
;
237 ObjectData
* getObject() const {
238 assertx(!hasArrayData());
239 return (ObjectData
*)((intptr_t)m_obj
& ~objectBaseTag());
242 // Used by native code and by the JIT to pack the m_typeFields components.
243 static uint32_t packTypeFields(IterNextIndex index
) {
244 return static_cast<uint32_t>(index
) << 24;
246 static uint32_t packTypeFields(
247 IterNextIndex index
, IterSpecialization spec
, uint16_t layout
) {
248 return static_cast<uint32_t>(index
) << 24 |
249 static_cast<uint32_t>(spec
.as_byte
) << 16 |
250 static_cast<uint32_t>(layout
);
253 // JIT helpers used for specializing iterators.
254 static constexpr size_t baseOffset() {
255 return offsetof(IterImpl
, m_data
);
257 static constexpr size_t baseSize() {
258 return sizeof(m_data
);
260 static constexpr size_t typeOffset() {
261 return offsetof(IterImpl
, m_typeFields
);
263 static constexpr size_t typeSize() {
264 return sizeof(m_typeFields
);
266 static constexpr size_t posOffset() {
267 return offsetof(IterImpl
, m_pos
);
269 static constexpr size_t posSize() {
270 return sizeof(m_pos
);
272 static constexpr size_t endOffset() {
273 return offsetof(IterImpl
, m_end
);
275 static constexpr size_t endSize() {
276 return sizeof(m_end
);
279 // When we specialize an iterator, we must *set* all m_type components (so as
280 // to be compatible with native helpers) but we only need to check this byte.
281 static constexpr size_t specializationOffset() {
282 return offsetof(IterImpl
, m_specialization
);
285 // ObjectData bases have this additional bit set; ArrayData bases do not.
286 static constexpr intptr_t objectBaseTag() {
291 template<IterTypeOp Type
>
292 friend int64_t new_iter_array(Iter
*, ArrayData
*, TypedValue
*);
293 template<IterTypeOp Type
>
294 friend int64_t new_iter_array_key(Iter
*, ArrayData
*, TypedValue
*,
296 template<bool HasKey
, bool Local
>
297 friend int64_t iter_next_packed_pointer(
298 Iter
*, TypedValue
*, TypedValue
*, ArrayData
*);
299 template<bool HasKey
, bool Local
>
300 friend int64_t iter_next_mixed_pointer(
301 Iter
*, TypedValue
*, TypedValue
*, ArrayData
*);
303 template <bool incRef
= true>
304 void arrInit(const ArrayData
* arr
);
306 template <bool incRef
>
307 void objInit(ObjectData
* obj
);
309 // Set all IterImpl fields for iteration over an array:
310 // - m_data is either the array, or null (for local iterators).
311 // - The type fields union is set based on the array type.
312 // - m_pos and m_end are set based on its virtual iter helpers.
313 template <bool Local
= false>
314 void setArrayData(const ArrayData
* ad
) {
315 assertx((intptr_t(ad
) & objectBaseTag()) == 0);
316 assertx(!Local
|| ad
);
317 m_data
= Local
? nullptr : ad
;
318 setArrayNext(IterNextIndex::Array
);
320 if (ad
->isVanillaVec()) {
321 setArrayNext(IterNextIndex::ArrayPacked
);
322 } else if (ad
->isVanillaDict()) {
323 setArrayNext(IterNextIndex::ArrayMixed
);
325 m_pos
= ad
->iter_begin();
326 m_end
= ad
->iter_end();
330 // Set all IterImpl fields for iteration over an object:
331 // - m_data is is always the object, with the lowest bit set as a flag.
332 // - We set the type fields union here.
333 void setObject(ObjectData
* obj
) {
334 assertx((intptr_t(obj
) & objectBaseTag()) == 0);
335 m_obj
= (ObjectData
*)((intptr_t)obj
| objectBaseTag());
336 m_typeFields
= packTypeFields(IterNextIndex::Object
);
337 assertx(m_nextHelperIdx
== IterNextIndex::Object
);
338 assertx(!m_specialization
.specialized
);
341 // Set the type fields of an array. These fields are packed so that we
342 // can set them with a single mov-immediate to the union.
343 void setArrayNext(IterNextIndex index
) {
344 m_typeFields
= packTypeFields(index
);
345 assertx(m_nextHelperIdx
== index
);
346 assertx(!m_specialization
.specialized
);
349 // The iterator base. Will be null for local iterators. We set the lowest
350 // bit for object iterators to distinguish them from array iterators.
352 const ArrayData
* m_data
;
355 // This field is a union so new_iter_array can set it in one instruction.
359 IterSpecialization m_specialization
;
360 IterNextIndex m_nextHelperIdx
;
362 uint32_t m_typeFields
;
364 // Current position. Beware that when m_data is null, m_pos is uninitialized.
365 // For the pointer iteration types, we use the appropriate pointers instead.
368 TypedValue
* m_packed_elm
;
369 MixedArrayElm
* m_mixed_elm
;
373 TypedValue
* m_packed_end
;
374 MixedArrayElm
* m_mixed_end
;
377 // These elements are always referenced elsewhere, either in the m_data field
378 // of this iterator or in a local. (If we weren't using pointer iteration, we
379 // would track elements by index, not by pointer, but GC would still work.)
380 TYPE_SCAN_IGNORE_FIELD(m_packed_end
);
381 TYPE_SCAN_IGNORE_FIELD(m_mixed_end
);
382 TYPE_SCAN_IGNORE_FIELD(m_packed_elm
);
383 TYPE_SCAN_IGNORE_FIELD(m_mixed_elm
);
386 ///////////////////////////////////////////////////////////////////////////////
389 * The iterator API used by the interpreter and the JIT. This API is relatively
390 * limited, because there are only two ways to interact with iterators in Hack:
391 * 1. In a "foreach" loop, using the *IterInit* / *IterNext* bytecodes.
392 * 2. As a delegated generator ("yield from").
394 * (*IterInit* here refers to {IterInit, IterInitK, LIterInit, LIterInitK}).
396 * The methods exposed here should be sufficient to implement both kinds of
397 * iterator behavior. To speed up "foreach" loops, we also provide helpers
398 * implementing *IterInit* / *IterNext* through helpers below.
400 * These helpers are faster than using the Iter class's methods directly
401 * because they do one vtable lookup on the array type and then execute the
402 * advance / bounds check / output key-value sequence based on that lookup,
403 * rather than doing a separate vtable lookup for each step.
405 * NOTE: If you initialize an iterator using the faster init helpers, you MUST
406 * use the faster next helpers for IterNext ops. That's because the helpers may
407 * make iterators that use pointer iteration, which Iter::next doesn't handle.
408 * doesn't handle. This invariant is checked in debug builds.
410 * In practice, this constraint shouldn't be a problem, because we always use
411 * the helpers to do IterNext. That's true both in the interpreter and the JIT.
413 struct alignas(16) Iter
{
417 // Returns true if the base is non-empty. Only used for non-local iterators.
418 // For local iterators, use new_iter_array / new_iter_array_key below.
419 bool init(TypedValue
* base
);
421 // Returns true if there are more elems. Only used for non-local iterators.
422 // For local iterators, use liter_next_ind / liter_next_key_ind below.
425 // Returns true if the iterator is at its end.
426 bool end() const { return m_iter
.end(); }
428 // Get the current key and value. Assumes that the iter is not at its end.
429 // These methods will inc-ref the key and value before returning it.
430 Variant
key() { return m_iter
.first(); }
431 Variant
val() { return m_iter
.second(); };
433 // It's valid to call end() on a killed iter, but the iter is otherwise dead.
434 // In debug builds, this method will overwrite the iterator with garbage.
435 void kill() { m_iter
.kill(); }
437 // Dec-refs the base, for non-local iters. Safe to call for local iters.
440 // Debug string, used when printing a frame.
441 std::string
toString() const;
444 // Used to implement the separate helper functions below. These functions
445 // peek into the Iter and directly manipulate m_iter's fields.
446 friend IterImpl
* unwrap(Iter
*);
451 // Native helpers for the interpreter + JIT used to implement *IterInit* ops.
452 // These helpers return 1 if the base has any elements and 0 otherwise.
453 // (They would return a bool, but native method calls from the JIT produce GP
454 // register outputs, so we extend the return type to an int64_t.)
456 // If these helpers return 1, they set `val` (and `key`, for key-value iters)
457 // from the first key-value pair of the base.
459 // For non-local iters, if these helpers return 0, they also dec-ref the base.
461 // For the array helpers, first provide an IterTypeOp to get an IterInit helper
462 // to call, then call it. This indirection lets us burn the appropriate helper
463 // into the JIT (where we know IterTypeOp statically). For objects, we don't
464 // need it because the type is always NonLocal.
465 using IterInitArr
= int64_t(*)(Iter
*, ArrayData
*, TypedValue
*);
466 using IterInitArrKey
= int64_t(*)(Iter
*, ArrayData
*, TypedValue
*, TypedValue
*);
468 IterInitArr
new_iter_array_helper(IterTypeOp type
);
469 IterInitArrKey
new_iter_array_key_helper(IterTypeOp type
);
471 int64_t new_iter_object(Iter
* dest
, ObjectData
* obj
, Class
* ctx
,
472 TypedValue
* val
, TypedValue
* key
);
475 // Native helpers for the interpreter + JIT used to implement *IterInit* ops.
476 // These helpers return 1 if the base has more elements and 0 otherwise.
477 // (As above, they return a logical bool which we extend to a GP register.)
479 // If these helpers return 1, they set `val` (and `key`, for key-value iters)
480 // from the next key-value pair of the base.
482 // For non-local iters, if these helpers return 0, they also dec-ref the base.
483 NEVER_INLINE
int64_t iter_next_ind(Iter
* iter
, TypedValue
* valOut
);
484 NEVER_INLINE
int64_t iter_next_key_ind(Iter
* iter
, TypedValue
* valOut
, TypedValue
* keyOut
);
485 NEVER_INLINE
int64_t liter_next_ind(Iter
*, TypedValue
*, ArrayData
*);
486 NEVER_INLINE
int64_t liter_next_key_ind(Iter
*, TypedValue
*, TypedValue
*, ArrayData
*);
488 //////////////////////////////////////////////////////////////////////