Optimize unaligned vanilla vec iteration
[hiphop-php.git] / hphp / runtime / vm / iter.h
blob1007775b5bb5be8dedc840a79c6720b8ea81e6b3
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #pragma once
19 #include <array>
20 #include <cstdint>
22 #include "hphp/runtime/base/array-data-defs.h"
23 #include "hphp/runtime/base/tv-val.h"
24 #include "hphp/runtime/base/type-variant.h"
25 #include "hphp/runtime/base/vanilla-dict.h"
26 #include "hphp/runtime/base/unaligned-typed-value.h"
27 #include "hphp/runtime/vm/class-meth-data-ref.h"
28 #include "hphp/util/type-scan.h"
30 namespace HPHP {
32 ///////////////////////////////////////////////////////////////////////////////
34 struct Iter;
36 enum class IterTypeOp { NonLocal, LocalBaseConst, LocalBaseMutable };
38 enum class IterNextIndex : uint8_t {
39 VanillaVec = 0,
40 ArrayMixed,
41 Array,
42 Object,
44 // JIT-only "pointer iteration", designed for good specialized code-gen.
45 // In pointer iteration, the iterator has a pointer directly into the base.
47 // We only use this mode if all the following conditions are met:
48 // - The array is guaranteed to be unchanged during iteration
49 // - The array is a VanillaDict (a dict or a darray) or VanillaVec storing unaligned tvs
50 // - (For dicts) The array is free of tombstones
51 ArrayMixedPointer,
52 VanillaVecPointer,
54 // Helpers specific to bespoke array-likes.
55 StructDict,
58 // For iterator specialization, we pack all the information we need to generate
59 // specialized code in a single byte so that we can check it in one comparison.
61 // This byte should be 0 for unspecialized iterators, as created by calling the
62 // normal IterImpl constructor instead of using a specialized initializer.
63 struct IterSpecialization {
64 enum BaseType : uint8_t { Vec = 0, Dict, kNumBaseTypes };
65 enum KeyTypes : uint8_t { ArrayKey = 0, Int, Str, StaticStr, kNumKeyTypes };
67 // Returns a generic (unspecialized) IterSpecialization value.
68 static IterSpecialization generic() {
69 IterSpecialization result;
70 result.as_byte = 0;
71 assertx(!result.specialized);
72 return result;
75 union {
76 uint8_t as_byte;
77 struct {
78 // `base_type` and `key_types` are bit encodings of the enums above.
79 uint8_t base_type: 1;
80 uint8_t key_types: 2;
82 // When we JIT a specialized iterator, we set `specialized` to true,
83 // We set `output_key` for key-value iters but not for value-only iters.
84 // We set `base_const` if we know the base is const during iteration.
85 bool specialized: 1;
86 bool output_key: 1;
87 bool base_const: 1;
88 bool bespoke: 1;
90 // A free bit. Maybe we'll need a 2-bit enum for the layout?
91 bool padding: 1;
96 // Debugging output.
97 std::string show(IterSpecialization type);
98 std::string show(IterSpecialization::BaseType type);
99 std::string show(IterSpecialization::KeyTypes type);
102 * Iterator over an array, a collection, or an object implementing the Hack
103 * Iterator interface. This iterator is used by the JIT and its usage is
104 * mediated through the "Iter" wrapper below.
106 * By default, iterators inc-ref their base to ensure that it won't be mutated
107 * during the iteration. HHBBC can do an analysis that marks certain iterators
108 * as "local" iterators, which means that their base only changes in certain
109 * controlled ways during iteration. (Specifically: either the base does not
110 * change at all, or the current key is assigned a new value in the loop.)
112 * For local iterators, the base is kept in a frame local and passed to the
113 * iterator on each iteration. Local iterators are never used for objects,
114 * since we can't constrain writes to them in this way.
116 * The purpose of the local iter optimization is to try to keep local bases at
117 * a refcount of 1, so that they won't be COWed by the "set the current key"
118 * type of mutating operations. Apparently, this pattern is somewhat common...
120 struct IterImpl {
121 enum NoInc { noInc = 0 };
122 enum Local { local = 0 };
125 * Constructors. Note that sometimes IterImpl objects are created
126 * without running their C++ constructor. (See new_iter_array.)
128 IterImpl() = delete;
129 explicit IterImpl(const ArrayData* data);
130 IterImpl(const ArrayData* data, NoInc) {
131 setArrayData<false>(data);
133 IterImpl(const ArrayData* data, Local) {
134 setArrayData<true>(data);
136 explicit IterImpl(ObjectData* obj);
137 IterImpl(ObjectData* obj, NoInc);
139 // Destructor
140 ~IterImpl();
142 // Pass a non-NULL ad to checkInvariants iff this iterator is local.
143 // These invariants hold as long as the iterator hasn't yet reached the end.
144 bool checkInvariants(const ArrayData* ad = nullptr) const;
146 explicit operator bool() { return !end(); }
148 // Returns true if we've reached the end. endHelper is used for iterators
149 // over objects implementing the Iterator interface.
150 bool end() const {
151 if (UNLIKELY(!hasArrayData())) return endHelper();
152 return getArrayData() == nullptr || m_pos == m_end;
154 bool endHelper() const;
156 // Advance the iterator's position. Assumes that end() is false. nextHelper
157 // is used for iterators over objects implementing the Iterator interface.
158 void next() {
159 assertx(checkInvariants());
160 if (UNLIKELY(!hasArrayData())) return nextHelper();
161 m_pos = getArrayData()->iter_advance(m_pos);
163 void nextHelper();
165 bool nextLocal(const ArrayData* ad) {
166 assertx(checkInvariants(ad));
167 m_pos = ad->iter_advance(m_pos);
168 return m_pos == m_end;
171 // Return the key at the current position. firstHelper is used for Objects.
172 // This method and its variants inc-ref the key before returning it.
173 Variant first() {
174 if (UNLIKELY(!hasArrayData())) return firstHelper();
175 return getArrayData()->getKey(m_pos);
177 Variant firstHelper();
179 // TypedValue versions of first. Used by the JIT iterator helpers.
180 // These methods do NOT inc-ref the key before returning it.
181 TypedValue nvFirst() const {
182 return getArrayData()->nvGetKey(m_pos);
184 TypedValue nvFirstLocal(const ArrayData* ad) const {
185 assertx(getArrayData() == nullptr);
186 return ad->nvGetKey(m_pos);
189 // Return the value at the current position. firstHelper is used for Objects.
190 // This method and its variants inc-ref the value before returning it.
191 Variant second();
194 * Get the value at the current iterator position, without refcount ops.
196 * If called when iterating an Iterable object the secondVal() will fatal.
198 TypedValue secondVal() const;
200 // TypedValue versions of second. Used by the JIT iterator helpers.
201 // These methods do NOT inc-ref the value before returning it.
202 TypedValue nvSecond() const {
203 return getArrayData()->nvGetVal(m_pos);
205 TypedValue nvSecondLocal(const ArrayData* ad) const {
206 assertx(getArrayData() == nullptr);
207 return ad->nvGetVal(m_pos);
210 // This method returns null for local iterators, and for non-local iterators
211 // with an empty array base. It must be checked in end() for this reason.
212 bool hasArrayData() const {
213 return !((intptr_t)m_data & objectBaseTag());
216 const ArrayData* getArrayData() const {
217 assertx(hasArrayData());
218 return m_data;
220 ssize_t getPos() const {
221 return m_pos;
223 ssize_t getEnd() const {
224 return m_end;
226 void setPos(ssize_t newPos) {
227 m_pos = newPos;
230 // It's valid to call end() on a killed iter, but the iter is otherwise dead.
231 // In debug builds, this method will overwrite the iterator with garbage.
232 void kill();
234 IterNextIndex getHelperIndex() {
235 return m_nextHelperIdx;
238 ObjectData* getObject() const {
239 assertx(!hasArrayData());
240 return (ObjectData*)((intptr_t)m_obj & ~objectBaseTag());
243 // Used by native code and by the JIT to pack the m_typeFields components.
244 static uint32_t packTypeFields(IterNextIndex index) {
245 return static_cast<uint32_t>(index) << 24;
247 static uint32_t packTypeFields(
248 IterNextIndex index, IterSpecialization spec, uint16_t layout) {
249 return static_cast<uint32_t>(index) << 24 |
250 static_cast<uint32_t>(spec.as_byte) << 16 |
251 static_cast<uint32_t>(layout);
254 // JIT helpers used for specializing iterators.
255 static constexpr size_t baseOffset() {
256 return offsetof(IterImpl, m_data);
258 static constexpr size_t baseSize() {
259 return sizeof(m_data);
261 static constexpr size_t typeOffset() {
262 return offsetof(IterImpl, m_typeFields);
264 static constexpr size_t typeSize() {
265 return sizeof(m_typeFields);
267 static constexpr size_t posOffset() {
268 return offsetof(IterImpl, m_pos);
270 static constexpr size_t posSize() {
271 return sizeof(m_pos);
273 static constexpr size_t endOffset() {
274 return offsetof(IterImpl, m_end);
276 static constexpr size_t endSize() {
277 return sizeof(m_end);
280 // When we specialize an iterator, we must *set* all m_type components (so as
281 // to be compatible with native helpers) but we only need to check this byte.
282 static constexpr size_t specializationOffset() {
283 return offsetof(IterImpl, m_specialization);
286 // ObjectData bases have this additional bit set; ArrayData bases do not.
287 static constexpr intptr_t objectBaseTag() {
288 return 0b1;
291 private:
292 template<IterTypeOp Type>
293 friend int64_t new_iter_array(Iter*, ArrayData*, TypedValue*);
294 template<IterTypeOp Type>
295 friend int64_t new_iter_array_key(Iter*, ArrayData*, TypedValue*,
296 TypedValue*);
297 template<bool HasKey, bool Local>
298 friend int64_t iter_next_packed_pointer(
299 Iter*, TypedValue*, TypedValue*, ArrayData*);
300 template<bool HasKey, bool Local>
301 friend int64_t iter_next_mixed_pointer(
302 Iter*, TypedValue*, TypedValue*, ArrayData*);
304 template <bool incRef = true>
305 void arrInit(const ArrayData* arr);
307 template <bool incRef>
308 void objInit(ObjectData* obj);
310 // Set all IterImpl fields for iteration over an array:
311 // - m_data is either the array, or null (for local iterators).
312 // - The type fields union is set based on the array type.
313 // - m_pos and m_end are set based on its virtual iter helpers.
314 template <bool Local = false>
315 void setArrayData(const ArrayData* ad) {
316 assertx((intptr_t(ad) & objectBaseTag()) == 0);
317 assertx(!Local || ad);
318 m_data = Local ? nullptr : ad;
319 setArrayNext(IterNextIndex::Array);
320 if (ad != nullptr) {
321 if (ad->isVanillaVec()) {
322 setArrayNext(IterNextIndex::VanillaVec);
323 } else if (ad->isVanillaDict()) {
324 setArrayNext(IterNextIndex::ArrayMixed);
326 m_pos = ad->iter_begin();
327 m_end = ad->iter_end();
331 // Set all IterImpl fields for iteration over an object:
332 // - m_data is is always the object, with the lowest bit set as a flag.
333 // - We set the type fields union here.
334 void setObject(ObjectData* obj) {
335 assertx((intptr_t(obj) & objectBaseTag()) == 0);
336 m_obj = (ObjectData*)((intptr_t)obj | objectBaseTag());
337 m_typeFields = packTypeFields(IterNextIndex::Object);
338 assertx(m_nextHelperIdx == IterNextIndex::Object);
339 assertx(!m_specialization.specialized);
342 // Set the type fields of an array. These fields are packed so that we
343 // can set them with a single mov-immediate to the union.
344 void setArrayNext(IterNextIndex index) {
345 m_typeFields = packTypeFields(index);
346 assertx(m_nextHelperIdx == index);
347 assertx(!m_specialization.specialized);
350 public:
351 // The iterator base. Will be null for local iterators. We set the lowest
352 // bit for object iterators to distinguish them from array iterators.
353 union {
354 const ArrayData* m_data;
355 ObjectData* m_obj;
357 // This field is a union so new_iter_array can set it in one instruction.
358 union {
359 struct {
360 uint16_t m_layout;
361 IterSpecialization m_specialization;
362 IterNextIndex m_nextHelperIdx;
364 uint32_t m_typeFields;
366 // Current position. Beware that when m_data is null, m_pos is uninitialized.
367 // For the pointer iteration types, we use the appropriate pointers instead.
368 union {
369 size_t m_pos;
370 UnalignedTypedValue* m_unaligned_elm;
371 VanillaDictElm* m_mixed_elm;
373 union {
374 size_t m_end;
375 UnalignedTypedValue* m_unaligned_end;
376 VanillaDictElm* m_mixed_end;
379 // These elements are always referenced elsewhere, either in the m_data field
380 // of this iterator or in a local. (If we weren't using pointer iteration, we
381 // would track elements by index, not by pointer, but GC would still work.)
382 TYPE_SCAN_IGNORE_FIELD(m_mixed_end);
383 TYPE_SCAN_IGNORE_FIELD(m_unaligned_end);
384 TYPE_SCAN_IGNORE_FIELD(m_mixed_elm);
385 TYPE_SCAN_IGNORE_FIELD(m_unaligned_elm);
388 ///////////////////////////////////////////////////////////////////////////////
391 * The iterator API used by the interpreter and the JIT. This API is relatively
392 * limited, because there are only two ways to interact with iterators in Hack:
393 * 1. In a "foreach" loop, using the *IterInit* / *IterNext* bytecodes.
394 * 2. As a delegated generator ("yield from").
396 * (*IterInit* here refers to {IterInit, IterInitK, LIterInit, LIterInitK}).
398 * The methods exposed here should be sufficient to implement both kinds of
399 * iterator behavior. To speed up "foreach" loops, we also provide helpers
400 * implementing *IterInit* / *IterNext* through helpers below.
402 * These helpers are faster than using the Iter class's methods directly
403 * because they do one vtable lookup on the array type and then execute the
404 * advance / bounds check / output key-value sequence based on that lookup,
405 * rather than doing a separate vtable lookup for each step.
407 * NOTE: If you initialize an iterator using the faster init helpers, you MUST
408 * use the faster next helpers for IterNext ops. That's because the helpers may
409 * make iterators that use pointer iteration, which Iter::next doesn't handle.
410 * doesn't handle. This invariant is checked in debug builds.
412 * In practice, this constraint shouldn't be a problem, because we always use
413 * the helpers to do IterNext. That's true both in the interpreter and the JIT.
415 struct alignas(16) Iter {
416 Iter() = delete;
417 ~Iter() = delete;
419 // Returns true if the base is non-empty. Only used for non-local iterators.
420 // For local iterators, use new_iter_array / new_iter_array_key below.
421 bool init(TypedValue* base);
423 // Returns true if there are more elems. Only used for non-local iterators.
424 // For local iterators, use liter_next_ind / liter_next_key_ind below.
425 bool next();
427 // Returns true if the iterator is at its end.
428 bool end() const { return m_iter.end(); }
430 // Get the current key and value. Assumes that the iter is not at its end.
431 // These methods will inc-ref the key and value before returning it.
432 Variant key() { return m_iter.first(); }
433 Variant val() { return m_iter.second(); };
435 // It's valid to call end() on a killed iter, but the iter is otherwise dead.
436 // In debug builds, this method will overwrite the iterator with garbage.
437 void kill() { m_iter.kill(); }
439 // Dec-refs the base, for non-local iters. Safe to call for local iters.
440 void free();
442 // Debug string, used when printing a frame.
443 std::string toString() const;
445 private:
446 // Used to implement the separate helper functions below. These functions
447 // peek into the Iter and directly manipulate m_iter's fields.
448 friend IterImpl* unwrap(Iter*);
450 IterImpl m_iter;
453 // Native helpers for the interpreter + JIT used to implement *IterInit* ops.
454 // These helpers return 1 if the base has any elements and 0 otherwise.
455 // (They would return a bool, but native method calls from the JIT produce GP
456 // register outputs, so we extend the return type to an int64_t.)
458 // If these helpers return 1, they set `val` (and `key`, for key-value iters)
459 // from the first key-value pair of the base.
461 // For non-local iters, if these helpers return 0, they also dec-ref the base.
463 // For the array helpers, first provide an IterTypeOp to get an IterInit helper
464 // to call, then call it. This indirection lets us burn the appropriate helper
465 // into the JIT (where we know IterTypeOp statically). For objects, we don't
466 // need it because the type is always NonLocal.
467 using IterInitArr = int64_t(*)(Iter*, ArrayData*, TypedValue*);
468 using IterInitArrKey = int64_t(*)(Iter*, ArrayData*, TypedValue*, TypedValue*);
470 IterInitArr new_iter_array_helper(IterTypeOp type);
471 IterInitArrKey new_iter_array_key_helper(IterTypeOp type);
473 int64_t new_iter_object(Iter* dest, ObjectData* obj, Class* ctx,
474 TypedValue* val, TypedValue* key);
477 // Native helpers for the interpreter + JIT used to implement *IterInit* ops.
478 // These helpers return 1 if the base has more elements and 0 otherwise.
479 // (As above, they return a logical bool which we extend to a GP register.)
481 // If these helpers return 1, they set `val` (and `key`, for key-value iters)
482 // from the next key-value pair of the base.
484 // For non-local iters, if these helpers return 0, they also dec-ref the base.
485 NEVER_INLINE int64_t iter_next_ind(Iter* iter, TypedValue* valOut);
486 NEVER_INLINE int64_t iter_next_key_ind(Iter* iter, TypedValue* valOut, TypedValue* keyOut);
487 NEVER_INLINE int64_t liter_next_ind(Iter*, TypedValue*, ArrayData*);
488 NEVER_INLINE int64_t liter_next_key_ind(Iter*, TypedValue*, TypedValue*, ArrayData*);
490 //////////////////////////////////////////////////////////////////////