Make comparison IR ops layout agnostic
[hiphop-php.git] / hphp / runtime / base / array-provenance.cpp
blob81eb04a78450c7c8f7fb2056f683ca8ac95936ec
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/base/array-provenance.h"
19 #include "hphp/runtime/base/apc-array.h"
20 #include "hphp/runtime/base/apc-typed-value.h"
21 #include "hphp/runtime/base/array-data.h"
22 #include "hphp/runtime/base/backtrace.h"
23 #include "hphp/runtime/base/init-fini-node.h"
24 #include "hphp/runtime/base/mixed-array.h"
25 #include "hphp/runtime/base/string-data.h"
26 #include "hphp/runtime/base/packed-array.h"
27 #include "hphp/runtime/vm/vm-regs.h"
28 #include "hphp/runtime/vm/srckey.h"
30 #include "hphp/util/stack-trace.h"
31 #include "hphp/util/rds-local.h"
32 #include "hphp/util/type-scan.h"
33 #include "hphp/util/type-traits.h"
35 #include <folly/AtomicHashMap.h>
36 #include <folly/container/F14Map.h>
37 #include <folly/Format.h>
38 #include <folly/SharedMutex.h>
39 #include <tbb/concurrent_hash_map.h>
41 #include <type_traits>
43 namespace HPHP { namespace arrprov {
45 TRACE_SET_MOD(runtime);
47 ////////////////////////////////////////////////////////////////////////////////
49 namespace {
51 static auto constexpr kKindMask = 0x7;
53 Tag::Kind extractKind(const char* ptr) {
54 return static_cast<Tag::Kind>(uintptr_t(ptr) & kKindMask);
57 const StringData* extractName(const char* ptr) {
58 return reinterpret_cast<const StringData*>(uintptr_t(ptr) & ~kKindMask);
61 const char* packKindAndName(Tag::Kind kind, const StringData* name) {
62 auto const ptr = reinterpret_cast<uintptr_t>(name);
63 assertx(!(ptr & kKindMask));
64 return reinterpret_cast<const char*>(ptr + static_cast<uintptr_t>(kind));
69 ////////////////////////////////////////////////////////////////////////////////
71 Tag::Tag(Tag::Kind kind, const StringData* name, int32_t line)
72 : m_name(packKindAndName(kind, name))
73 , m_line(line) {}
75 Tag::Kind Tag::kind() const {
76 return extractKind(m_name.get());
78 const StringData* Tag::name() const {
79 return extractName(m_name.get());
81 int32_t Tag::line() const {
82 return kind() == Kind::Known ? m_line : -1;
84 uint64_t Tag::hash() const {
85 return folly::hash::hash_combine(uintptr_t(m_name.get()), m_line);
88 bool Tag::operator==(const Tag& other) const {
89 return m_name == other.m_name && m_line == other.m_line;
91 bool Tag::operator!=(const Tag& other) const {
92 return m_name != other.m_name || m_line != other.m_line;
95 std::string Tag::toString() const {
96 switch (kind()) {
97 case Kind::Invalid:
98 return "unknown location (no tag)";
99 case Kind::Known:
100 return folly::sformat("{}:{}", name()->slice(), line());
101 case Kind::UnknownRepo:
102 return "unknown location (repo union)";
103 case Kind::KnownTraitMerge:
104 return folly::sformat("{}:{} (trait xinit merge)", name()->slice(), -1);
105 case Kind::KnownLargeEnum:
106 return folly::sformat("{}:{} (large enum)", name()->slice(), -1);
107 case Kind::RuntimeLocation:
108 return folly::sformat("{} (c++ runtime location)", name()->slice());
109 case Kind::RuntimeLocationPoison:
110 return folly::sformat("unknown location (poisoned c++ runtime location was {})",
111 name()->slice());
113 always_assert(false);
116 ///////////////////////////////////////////////////////////////////////////////
118 namespace {
120 RDS_LOCAL(Tag, rl_tag_override);
121 RDS_LOCAL(ArrayProvenanceTable, rl_array_provenance);
124 * Flush the table after each request since none of the ArrayData*s will be
125 * valid anymore.
127 InitFiniNode flushTable([]{
128 if (!RO::EvalArrayProvenance) return;
129 rl_array_provenance->tags.clear();
130 assert_flog(!rl_tag_override->valid(),
131 "contents: {}",
132 rl_tag_override->toString());
133 }, InitFiniNode::When::RequestFini);
135 } // anonymous namespace
137 ///////////////////////////////////////////////////////////////////////////////
139 bool arrayWantsTag(const ArrayData* ad) {
140 return RO::EvalArrayProvenance && ad->isDVArray() && !ad->isLegacyArray();
143 bool arrayWantsTag(const APCArray* a) {
144 return RO::EvalArrayProvenance && (a->isVArray() || a->isDArray());
147 bool arrayWantsTag(const AsioExternalThreadEvent* ev) {
148 return true;
151 namespace {
154 * Whether provenance for a given array should be request-local.
156 * True for refcounted request arrays, else false.
158 bool wants_local_prov(const ArrayData* ad) { return ad->isRefCounted(); }
159 constexpr bool wants_local_prov(const APCArray* a) { return false; }
160 constexpr bool wants_local_prov(const AsioExternalThreadEvent* ev) {
161 return true;
165 * Get the provenance slot for a non-request array.
167 Tag& tag_for_nonreq(ArrayData* ad) {
168 assertx(arrayWantsTag(ad));
169 assertx(!wants_local_prov(ad));
171 auto const mem = reinterpret_cast<char*>(ad)
172 - sizeof(Tag)
173 - (ad->hasStrKeyTable() ? sizeof(StrKeyTable) : 0)
174 - (ad->hasApcTv() ? sizeof(APCTypedValue) : 0);
176 return *reinterpret_cast<Tag*>(mem);
179 Tag& tag_for_nonreq(APCArray* a) {
180 auto const mem = reinterpret_cast<char*>(a) - sizeof(Tag);
181 return *reinterpret_cast<Tag*>(mem);
184 Tag& tag_for_nonreq(AsioExternalThreadEvent*) {
185 always_assert(false); // only req-local provenance is supported
188 template<typename A>
189 Tag tag_for_nonreq(const A* a) { return tag_for_nonreq(const_cast<A*>(a)); }
192 * Used to override the provenance tag reported for ArrayData*'s in a given
193 * thread.
195 * This is pretty hacky, but it's only used for one specific purpose: for
196 * obtaining a copy of a static array which has specific provenance.
198 * The static array cache is set up to distinguish arrays by provenance tag.
199 * However, it's a tbb::concurrent_hash_set, which we can't jam a tag into.
200 * Instead, its hash and equal functions look up the provenance tag of an array
201 * in order to allow for multiple identical static arrays with different source
202 * tags.
204 * As a result, there's no real way to thread a tag into the lookups and
205 * inserts of the hash set. We could pass in tagged temporary empty arrays,
206 * but we don't want to keep allocating those. We could keep one around for
207 * each thread... but that's pretty much the moral equivalent of doing things
208 * this way:
210 * So instead, we have a thread-local tag that is only "active" when we're
211 * trying to retrieve or create a specifically tagged copy of a static array,
212 * which facilitates the desired behavior in the static array cache.
214 thread_local folly::Optional<Tag> tl_tag_override = folly::none;
216 template<typename A>
217 Tag getTagImpl(const A* a) {
218 using ProvenanceTable = decltype(rl_array_provenance->tags);
220 auto const get = [] (
221 const A* a,
222 const ProvenanceTable& tbl
223 ) -> Tag {
224 auto const it = tbl.find(a);
225 if (it == tbl.cend()) return {};
226 assertx(it->second.valid());
227 return it->second;
230 if (wants_local_prov(a)) {
231 return get(a, rl_array_provenance->tags);
232 } else {
233 return tag_for_nonreq(a);
237 template<Mode mode, typename A>
238 bool setTagImpl(A* a, Tag tag) {
239 assertx(tag.valid());
240 if (!arrayWantsTag(a)) return false;
242 if (wants_local_prov(a)) {
243 assertx(mode == Mode::Emplace || !getTag(a) || tl_tag_override);
244 rl_array_provenance->tags[a] = tag;
245 } else {
246 tag_for_nonreq(a) = tag;
248 return true;
251 template<typename A>
252 void clearTagImpl(const A* a) {
253 if (!arrayWantsTag(a)) return;
255 if (wants_local_prov(a)) {
256 rl_array_provenance->tags.erase(a);
257 } else {
258 tag_for_nonreq(a) = Tag{};
263 } // namespace
265 Tag getTag(const ArrayData* ad) {
266 if (tl_tag_override) return *tl_tag_override;
267 if (!ad->hasProvenanceData()) return {};
268 auto const tag = getTagImpl(ad);
269 assertx(tag.valid());
270 return tag;
272 Tag getTag(const APCArray* a) {
273 return getTagImpl(a);
275 Tag getTag(const AsioExternalThreadEvent* ev) {
276 return getTagImpl(ev);
279 template<Mode mode>
280 void setTag(ArrayData* ad, Tag tag) {
281 if (setTagImpl<mode>(ad, tag)) {
282 ad->setHasProvenanceData(true);
285 template<Mode mode>
286 void setTag(APCArray* a, Tag tag) {
287 setTagImpl<mode>(a, tag);
290 template <Mode mode>
291 void setTag(AsioExternalThreadEvent* ev, Tag tag) {
292 setTagImpl<mode>(ev, tag);
295 template void setTag<Mode::Insert>(ArrayData*, Tag);
296 template void setTag<Mode::Emplace>(ArrayData*, Tag);
297 template void setTag<Mode::Insert>(APCArray*, Tag);
298 template void setTag<Mode::Emplace>(APCArray*, Tag);
299 template void setTag<Mode::Insert>(AsioExternalThreadEvent*, Tag);
300 template void setTag<Mode::Emplace>(AsioExternalThreadEvent*, Tag);
302 void clearTag(ArrayData* ad) {
303 ad->setHasProvenanceData(false);
304 clearTagImpl(ad);
306 void clearTag(APCArray* a) {
307 clearTagImpl(a);
309 void clearTag(AsioExternalThreadEvent* ev) {
310 clearTagImpl(ev);
313 void reassignTag(ArrayData* ad) {
314 if (arrayWantsTag(ad)) {
315 if (auto const tag = tagFromPC()) {
316 setTag<Mode::Emplace>(ad, tag);
317 return;
321 clearTag(ad);
324 ArrayData* tagStaticArr(ArrayData* ad, Tag tag /* = {} */) {
325 assertx(RO::EvalArrayProvenance);
326 assertx(ad->isStatic());
327 assertx(arrayWantsTag(ad));
329 if (!tag.valid()) tag = tagFromPC();
330 if (!tag.valid()) return ad;
332 tl_tag_override = tag;
333 SCOPE_EXIT { tl_tag_override = folly::none; };
335 ArrayData::GetScalarArray(&ad, tag);
336 return ad;
339 ///////////////////////////////////////////////////////////////////////////////
341 TagOverride::TagOverride(Tag tag)
342 : m_valid(RO::EvalArrayProvenance)
344 if (m_valid) {
345 m_saved_tag = *rl_tag_override;
346 *rl_tag_override = tag;
350 TagOverride::TagOverride(Tag tag, TagOverride::ForceTag)
351 : m_valid(true)
353 if (m_valid) {
354 m_saved_tag = *rl_tag_override;
355 *rl_tag_override = tag;
359 TagOverride::~TagOverride() {
360 if (m_valid) {
361 *rl_tag_override = m_saved_tag;
365 Tag tagFromPC() {
366 auto log_violation = [&](const char* why) {
367 auto const rate = RO::EvalLogArrayProvenanceDiagnosticsSampleRate;
368 if (StructuredLog::coinflip(rate)) {
369 StructuredLogEntry sle;
370 sle.setStr("reason", why);
371 sle.setStackTrace("stack", StackTrace{StackTrace::Force{}});
372 FTRACE_MOD(Trace::runtime, 2, "arrprov {} {}\n", why, show(sle));
373 StructuredLog::log("hphp_arrprov_diagnostics", sle);
377 if (rl_tag_override->valid()) {
378 if (rl_tag_override->kind() == Tag::Kind::RuntimeLocationPoison) {
379 log_violation("poison");
381 return *rl_tag_override;
384 VMRegAnchor _(VMRegAnchor::Soft);
386 if (tl_regState != VMRegState::CLEAN || vmfp() == nullptr) {
387 log_violation("no_fixup");
388 return {};
391 auto const make_tag = [&] (
392 const ActRec* fp,
393 Offset offset
395 auto const func = fp->func();
396 auto const unit = fp->unit();
397 // Builtins have empty filenames, so use the unit; else use func->filename
398 // in order to resolve the original filenames of flattened traits.
399 auto const file = func->isBuiltin() ? unit->filepath() : func->filename();
400 auto const line = unit->getLineNumber(offset);
401 return Tag { file, line };
404 auto const skip_frame = [] (const ActRec* fp) {
405 return !fp->func()->isProvenanceSkipFrame() &&
406 !fp->func()->isCPPBuiltin();
409 auto const tag = fromLeaf(make_tag, skip_frame);
410 assertx(!tag.valid() || tag.concrete());
411 return tag;
414 Tag tagFromSK(SrcKey sk) {
415 assert(sk.valid());
416 auto const unit = sk.unit();
417 auto const func = sk.func();
418 // Builtins have empty filenames, so use the unit; else use func->filename
419 // in order to resolve the original filenames of flattened traits.
420 auto const file = func->isBuiltin() ? unit->filepath() : func->filename();
421 auto const line = unit->getLineNumber(sk.offset());
422 return Tag { file, line };
425 ///////////////////////////////////////////////////////////////////////////////
427 namespace {
429 static auto const kMaxMutationStackDepth = 512;
431 template <typename Mutation>
432 struct MutationState {
433 Mutation& mutation;
434 const char* function_name;
435 bool recursive = true;
436 bool raised_stack_notice = false;
439 // Returns a copy of the given array that the caller may mutate in place.
440 // Iterator positions in the original array are also valid for the new one.
441 ArrayData* copy_if_needed(ArrayData* in, bool cow) {
442 TRACE(3, "%s %d-element rc %d %s array\n",
443 cow ? "Copying" : "Reusing", safe_cast<int32_t>(in->size()),
444 in->count(), ArrayData::kindToString(in->kind()));
445 auto const result = cow ? in->copy() : in;
446 assertx(result->hasExactlyOneRef());
447 assertx(result->iter_end() == in->iter_end());
448 return result;
451 // This function applies `state.mutation` to `tv` to get a modified array-like.
452 // Then, if `state.recursive` is set, it recursively applies the mutation to
453 // the values of the array-like. It does so with the minimum number of copies,
454 // mutating each array in-place if possible.
456 // `state.mutation` should take an ArrayData* and a `cow` param. If it can
457 // mutate the array in place (that is, either `cow` is false or no mutation is
458 // needed at all), it should do so and return nullptr. Otherwise, it must copy
459 // the array, mutate it, and return the updated result.
461 // We pass the mutation callback a `cow` param instead of checking ad->cowCheck
462 // to handle cases such as a refcount 1 array contained in a refcount 2 array;
463 // even though cowCheck return false for the refcount 1 array, we still need to
464 // copy it to get a new value to store in the COW-ed containing array.
466 // This method doesn't recurse into object properties; instead, if we encounter
467 // an object, we'll log (up to one) notice including `state.function_name`.
468 template <typename State>
469 ArrayData* apply_mutation(TypedValue tv, State& state,
470 bool cow = false, uint32_t depth = 0) {
471 if (depth == kMaxMutationStackDepth) {
472 if (!state.raised_stack_notice) {
473 raise_notice("%s stack depth exceeded!", state.function_name);
474 state.raised_stack_notice = true;
476 return nullptr;
479 if (!isArrayLikeType(type(tv))) {
480 return nullptr;
483 // Apply the mutation to the top-level array.
484 auto const in = val(tv).parr;
485 cow |= in->cowCheck();
486 auto result = state.mutation(in, cow);
487 if (!state.recursive) return result;
489 // Recursively apply the mutation to the array's contents.
490 auto const end = in->iter_end();
491 for (auto pos = in->iter_begin(); pos != end; pos = in->iter_advance(pos)) {
492 auto const prev = in->nvGetVal(pos);
493 auto const ad = apply_mutation(prev, state, cow, depth + 1);
494 if (!ad) continue;
495 auto const next = make_array_like_tv(ad);
496 result = result ? result : copy_if_needed(in, cow);
498 assertx(!result->cowCheck());
499 if (in->hasVanillaPackedLayout()) {
500 tvMove(next, PackedArray::LvalUncheckedInt(result, pos));
501 } else if (in->hasVanillaMixedLayout()) {
502 tvMove(next, &MixedArray::asMixed(result)->data()[pos].data);
503 } else {
504 auto const key = in->nvGetKey(pos);
505 auto const escalated = isStringType(type(key))
506 ? result->set(val(key).pstr, make_array_like_tv(ad))
507 : result->set(val(key).num, make_array_like_tv(ad));
508 assertx(escalated->size() == in->size());
509 if (escalated == result) continue;
510 if (result != in) result->release();
511 result = escalated;
514 return result == in ? nullptr : result;
517 TypedValue markTvImpl(TypedValue in, bool legacy, bool recursive) {
518 // Closure state: whether or not we've raised notices for array-likes.
519 auto raised_hack_array_notice = false;
520 auto raised_non_hack_array_notice = false;
521 auto warn_once = [](bool& warned, const char* message) {
522 if (!warned) raise_warning("%s", message);
523 warned = true;
526 // The closure: pre-HAM, tag dvarrays and notice on vec / dicts / PHP arrays;
527 // post-HAM, tag vecs and dicts and notice on any other array-like inputs.
528 auto const mark_tv = [&](ArrayData* ad, bool cow) -> ArrayData* {
529 if (!RO::EvalHackArrDVArrs) {
530 if (ad->isVecType()) {
531 warn_once(raised_hack_array_notice, Strings::ARRAY_MARK_LEGACY_VEC);
532 return nullptr;
533 } else if (ad->isDictType()) {
534 warn_once(raised_hack_array_notice, Strings::ARRAY_MARK_LEGACY_DICT);
535 return nullptr;
536 } else if (ad->isNotDVArray()) {
537 warn_once(raised_non_hack_array_notice,
538 "array_mark_legacy expects a varray or darray");
539 return nullptr;
541 } else if (!ad->isVecType() && !ad->isDictType()) {
542 warn_once(raised_non_hack_array_notice,
543 "array_mark_legacy expects a dict or vec");
544 return nullptr;
547 if (!RO::EvalHackArrDVArrs) assertx(ad->isDVArray());
548 if (RO::EvalHackArrDVArrs) assertx(ad->isVecType() || ad->isDictType());
549 if (ad->isLegacyArray()) return nullptr;
551 auto result = copy_if_needed(ad, cow);
552 result->setLegacyArray(true);
553 return cow ? result : nullptr;
556 // Unmark legacy vecs/dicts to silence logging,
557 // e.g. while casting to regular vecs or dicts.
558 auto const unmark_tv = [&](ArrayData* ad, bool cow) -> ArrayData* {
559 if (RO::EvalHackArrDVArrs && !ad->isVecType() && !ad->isDictType()) {
560 return nullptr;
562 if (!RO::EvalHackArrDVArrs && !ad->isDVArray()) return nullptr;
563 if (!ad->isLegacyArray()) return nullptr;
565 auto result = copy_if_needed(ad, cow);
566 result->setLegacyArray(false);
567 return cow ? result : nullptr;
570 auto const ad = [&] {
571 if (legacy) {
572 auto state = MutationState<decltype(mark_tv)>{
573 mark_tv, "array_mark_legacy", recursive};
574 return apply_mutation(in, state);
575 } else {
576 auto state = MutationState<decltype(unmark_tv)>{
577 unmark_tv, "array_unmark_legacy", recursive};
578 return apply_mutation(in, state);
580 }();
581 return ad ? make_array_like_tv(ad) : tvReturn(tvAsCVarRef(&in));
584 TypedValue tagTvImpl(TypedValue in, int64_t flags) {
585 // Closure state: an expensive-to-compute provenance tag.
586 folly::Optional<arrprov::Tag> tag = folly::none;
588 // The closure: tag array-likes if they want tags, else leave them alone.
589 auto const tag_tv = [&](ArrayData* ad, bool cow) -> ArrayData* {
590 if (!arrprov::arrayWantsTag(ad)) return nullptr;
591 if (!tag) tag = arrprov::tagFromPC();
592 if (!tag->valid()) return nullptr;
593 auto result = copy_if_needed(ad, cow);
594 arrprov::setTag<arrprov::Mode::Emplace>(result, *tag);
595 return cow ? result : nullptr;
598 auto state = MutationState<decltype(tag_tv)>{tag_tv, "tag_provenance_here"};
599 auto const ad = apply_mutation(in, state);
600 return ad ? make_array_like_tv(ad) : tvReturn(tvAsCVarRef(&in));
605 TypedValue tagTvRecursively(TypedValue in, int64_t flags) {
606 if (!RO::EvalArrayProvenance) return tvReturn(tvAsCVarRef(&in));
607 return tagTvImpl(in, flags);
610 TypedValue markTvRecursively(TypedValue in, bool legacy) {
611 return markTvImpl(in, legacy, /*recursive=*/true);
614 TypedValue markTvShallow(TypedValue in, bool legacy) {
615 return markTvImpl(in, legacy, /*recursive=*/false);
618 ///////////////////////////////////////////////////////////////////////////////