Deshim VirtualExecutor in folly
[hiphop-php.git] / hphp / hhbbc / wide-func.cpp
blobf0547762274b9207cee4bfbef82f2b4a7e7766e5
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/hhbbc/wide-func.h"
18 #include "hphp/hhbbc/bc.h"
19 #include "hphp/hhbbc/func-util.h"
20 #include "hphp/hhbbc/interp.h"
21 #include "hphp/util/trace.h"
23 #ifdef __GNUG__
24 #include <cxxabi.h>
25 #endif // _GNUG_
27 namespace HPHP::HHBBC { namespace php {
29 //////////////////////////////////////////////////////////////////////
31 namespace {
33 //////////////////////////////////////////////////////////////////////
35 TRACE_SET_MOD(hhbbc_mem);
37 using Buffer = CompressedBytecode;
39 static_assert(std::is_same<LSString, LowStringPtr>::value);
41 constexpr int32_t kNoSrcLoc = -1;
43 constexpr uint8_t k16BitCode = 0xfe;
44 constexpr uint8_t k32BitCode = 0xff;
46 // HHBC uses "9-bit" opcodes...that is, we have more than 256 valid
47 // bytecode ops, but less than 512. How convenient!
48 constexpr uint8_t k9BitOpShift = 0xff;
50 // Most static strings will have addresses that fit in 4 bytes. We set
51 // this flag when encoding a string address that needs the full 8 bytes.
52 constexpr uint64_t kStringDataFlag = 0x1;
54 template <typename>
55 struct is_compact_vector : std::false_type {};
57 template <typename T, typename A>
58 struct is_compact_vector<CompactVector<T, A>> : std::true_type {};
60 std::string name(const std::type_info& type) {
61 #ifdef __GNUG__
62 auto length = size_t{0};
63 auto status = int{0};
64 std::unique_ptr<char, decltype(&std::free)> result(
65 __cxxabiv1::__cxa_demangle(type.name(), nullptr, &length, &status),
66 &std::free);
67 return result.get();
68 #else
69 return type.name();
70 #endif // _GNUG_
73 BytecodeVec decodeBytecodeVec(const Buffer& buffer, size_t& pos);
74 void encodeBytecodeVec(Buffer& buffer, const BytecodeVec& bcs);
76 //////////////////////////////////////////////////////////////////////
78 template <typename T>
79 T decode_as_bytes(const Buffer& buffer, size_t& pos) {
80 static_assert(std::is_trivially_copyable<T>::value);
81 alignas(alignof(T)) char data[sizeof(T)];
82 memmove(&data[0], &buffer[pos], sizeof(T));
83 pos += sizeof(T);
84 return *reinterpret_cast<const T*>(&data[0]);
87 #define DECODE_MEMBER(x) decode<decltype(std::declval<T>().x)>(buffer, pos)
89 template <typename T>
90 T decode(const Buffer& buffer, size_t& pos) {
91 assertx(pos < buffer.size());
92 ITRACE(5, "at {}: {}\n", pos, name(typeid(T)));
93 Trace::Indent _;
95 if constexpr (std::is_same<T, BlockUpdateInfo>::value) {
96 T result;
97 result.fallthrough = DECODE_MEMBER(fallthrough);
98 result.unchangedBcs = DECODE_MEMBER(unchangedBcs);
99 result.replacedBcs = decodeBytecodeVec(buffer, pos);
100 return result;
103 if constexpr (std::is_same<T, FCallArgs>::value) {
104 using FCA = FCallArgsBase;
105 auto const base = decode<FCA>(buffer, pos);
106 auto const context = decode<LSString>(buffer, pos);
107 auto const aeTarget = decode<BlockId>(buffer, pos) + NoBlockId;
108 auto inout = std::unique_ptr<uint8_t[]>();
109 if (base.flags & FCallArgsFlags::EnforceInOut) {
110 auto const bytes = (base.numArgs + 7) / 8;
111 inout = std::make_unique<uint8_t[]>(bytes);
112 memmove(inout.get(), &buffer[pos], bytes);
113 pos += bytes;
115 auto readonly = std::unique_ptr<uint8_t[]>();
116 if (base.flags & FCallArgsFlags::EnforceReadonly) {
117 auto const bytes = (base.numArgs + 7) / 8;
118 readonly = std::make_unique<uint8_t[]>(bytes);
119 memmove(readonly.get(), &buffer[pos], bytes);
120 pos += bytes;
122 return FCallArgs(static_cast<FCallArgsFlags>(base.flags & FCA::kInternalFlags),
123 base.numArgs, base.numRets, std::move(inout),
124 std::move(readonly), aeTarget, context);
127 if constexpr (std::is_same<T, IterArgs>::value) {
128 auto const flags = DECODE_MEMBER(flags);
129 auto const iterId = DECODE_MEMBER(iterId);
130 auto const keyId = DECODE_MEMBER(keyId) + IterArgs::kNoKey;
131 auto const valId = DECODE_MEMBER(valId);
132 return T(flags, iterId, keyId, valId);
135 if constexpr (std::is_same<T, LocalRange>::value) {
136 auto const first = DECODE_MEMBER(first);
137 auto const count = DECODE_MEMBER(count);
138 return T{first, count};
141 if constexpr (std::is_same<T, LowStringPtr>::value) {
142 auto const lo = decode_as_bytes<uint32_t>(buffer, pos);
143 if (!(lo & kStringDataFlag)) {
144 return LowStringPtr(reinterpret_cast<const StringData*>(lo));
146 auto const hi = decode_as_bytes<uint32_t>(buffer, pos);
147 auto const both = (uint64_t(hi) << 32) | (uint64_t(lo) & ~kStringDataFlag);
148 return LowStringPtr(reinterpret_cast<const StringData*>(both));
151 if constexpr (std::is_same<T, MKey>::value) {
152 auto const mcode = DECODE_MEMBER(mcode);
153 switch (mcode) {
154 case MET: case MPT: case MQT: {
155 auto const litstr = DECODE_MEMBER(litstr);
156 return T(mcode, litstr, DECODE_MEMBER(rop));
158 case MEI: case MEC: case MPC: {
159 auto const iva = DECODE_MEMBER(int64);
160 return T(mcode, iva, DECODE_MEMBER(rop));
162 case MEL: case MPL: {
163 auto const local = DECODE_MEMBER(local);
164 return T(mcode, local, DECODE_MEMBER(rop));
166 case MW:
167 return T();
171 if constexpr (std::is_same<T, NamedLocal>::value) {
172 auto const base = safe_cast<int32_t>(decode<uint32_t>(buffer, pos));
173 auto const name = base + kInvalidLocalName;
174 auto const id = DECODE_MEMBER(id) + NoLocalId;
175 return T(name, id);
178 if constexpr (std::is_same<T, SSwitchTabEnt>::value) {
179 auto const first = DECODE_MEMBER(first);
180 auto const second = DECODE_MEMBER(second);
181 return T{first, second};
184 if constexpr (is_compact_vector<T>::value) {
185 auto data = T(decode<uint32_t>(buffer, pos));
186 for (auto& item : data) {
187 using Item = typename std::remove_reference<decltype(item)>::type;
188 item = decode<Item>(buffer, pos);
190 return data;
193 if constexpr (std::is_same<T, uint32_t>::value) {
194 auto const byte = decode_as_bytes<uint8_t>(buffer, pos);
195 return byte == k32BitCode ? decode_as_bytes<uint32_t>(buffer, pos) :
196 byte == k16BitCode ? decode_as_bytes<uint16_t>(buffer, pos) : byte;
199 if constexpr (std::is_same<T, Op>::value) {
200 static_assert(sizeof(Op) <= sizeof(uint16_t), "");
201 auto const byte = decode_as_bytes<uint8_t>(buffer, pos);
202 if (sizeof(Op) == sizeof(uint8_t) || byte < k9BitOpShift) return Op(byte);
203 auto const next = decode_as_bytes<uint8_t>(buffer, pos);
204 return Op(safe_cast<uint16_t>(next) + k9BitOpShift);
207 if constexpr (std::is_trivially_copyable<T>::value) {
208 return decode_as_bytes<T>(buffer, pos);
212 #undef DECODE_MEMBER
214 //////////////////////////////////////////////////////////////////////
216 template <typename T>
217 void encode_as_bytes(Buffer& buffer, const T& data) {
218 static_assert(std::is_trivially_copyable<T>::value);
219 auto const ptr = reinterpret_cast<const char*>(&data);
220 buffer.insert(buffer.end(), ptr, ptr + sizeof(T));
223 template <typename T>
224 void encode(Buffer& buffer, const T& data) {
225 ITRACE(5, "at {}: {}\n", buffer.size(), name(typeid(T)));
226 Trace::Indent _;
228 if constexpr (std::is_same<T, BlockUpdateInfo>::value) {
229 encode(buffer, data.fallthrough);
230 encode(buffer, data.unchangedBcs);
231 encodeBytecodeVec(buffer, data.replacedBcs);
233 } else if constexpr (std::is_same<T, FCallArgs>::value) {
234 auto base = data.base();
235 if (data.enforceInOut()) {
236 base.flags = base.flags | FCallArgsFlags::EnforceInOut;
238 if (data.enforceReadonly()) {
239 base.flags = base.flags | FCallArgsFlags::EnforceReadonly;
241 encode(buffer, base);
242 encode(buffer, data.context());
243 encode(buffer, data.asyncEagerTarget() - NoBlockId);
244 if (data.enforceInOut()) {
245 auto const start = buffer.size();
246 auto const bytes = (data.numArgs() + 7) / 8;
247 buffer.insert(buffer.end(), bytes, 0);
248 for (auto i = 0; i < data.numArgs(); i++) {
249 if (data.isInOut(i)) buffer[start + (i / 8)] |= (1 << (i % 8));
252 if (data.enforceReadonly()) {
253 auto const start = buffer.size();
254 auto const bytes = (data.numArgs() + 7) / 8;
255 buffer.insert(buffer.end(), bytes, 0);
256 for (auto i = 0; i < data.numArgs(); i++) {
257 if (data.isReadonly(i)) buffer[start + (i / 8)] |= (1 << (i % 8));
261 } else if constexpr (std::is_same<T, IterArgs>::value) {
262 encode(buffer, data.flags);
263 encode(buffer, data.iterId);
264 encode(buffer, data.keyId - IterArgs::kNoKey);
265 encode(buffer, data.valId);
267 } else if constexpr (std::is_same<T, LocalRange>::value) {
268 encode(buffer, data.first);
269 encode(buffer, data.count);
271 } else if constexpr (std::is_same<T, LowStringPtr>::value) {
272 static_assert(alignof(StringData) % 2 == 0);
273 auto const raw = uintptr_t(data.get());
274 if (raw <= std::numeric_limits<uint32_t>::max()) {
275 encode_as_bytes(buffer, safe_cast<uint32_t>(raw));
276 } else {
277 auto const hi = raw >> 32;
278 auto const lo = (raw & 0xffffffff) | kStringDataFlag;
279 encode_as_bytes(buffer, safe_cast<uint32_t>(lo));
280 encode_as_bytes(buffer, safe_cast<uint32_t>(hi));
283 } else if constexpr (std::is_same<T, MKey>::value) {
284 encode(buffer, data.mcode);
285 switch (data.mcode) {
286 case MET: case MPT: case MQT:
287 encode(buffer, data.litstr);
288 encode(buffer, data.rop);
289 break;
290 case MEI: case MEC: case MPC:
291 encode(buffer, data.int64);
292 encode(buffer, data.rop);
293 break;
294 case MEL: case MPL:
295 encode(buffer, data.local);
296 encode(buffer, data.rop);
297 break;
298 case MW:
299 break;
302 } else if constexpr (std::is_same<T, NamedLocal>::value) {
303 encode(buffer, safe_cast<uint32_t>(data.name - kInvalidLocalName));
304 encode(buffer, data.id - NoLocalId);
306 } else if constexpr (std::is_same<T, SSwitchTabEnt>::value) {
307 encode(buffer, data.first);
308 encode(buffer, data.second);
310 } else if constexpr (is_compact_vector<T>::value) {
311 encode(buffer, safe_cast<uint32_t>(data.size()));
312 for (auto const& item : data) encode(buffer, item);
314 } else if constexpr (std::is_same<T, uint32_t>::value) {
315 if (data < std::min(k16BitCode, k32BitCode)) {
316 encode_as_bytes(buffer, safe_cast<uint8_t>(data));
317 } else if (data <= std::numeric_limits<uint16_t>::max()) {
318 encode_as_bytes(buffer, k16BitCode);
319 encode_as_bytes(buffer, safe_cast<uint16_t>(data));
320 } else {
321 encode_as_bytes(buffer, k32BitCode);
322 encode_as_bytes(buffer, data);
325 } else if constexpr (std::is_same<T, Op>::value) {
326 static_assert(sizeof(Op) <= sizeof(uint16_t), "");
327 auto const raw = uint16_t(data);
328 if (sizeof(Op) == sizeof(uint8_t) || raw < k9BitOpShift) {
329 encode_as_bytes(buffer, safe_cast<uint8_t>(raw));
330 } else {
331 encode_as_bytes(buffer, k9BitOpShift);
332 encode_as_bytes(buffer, safe_cast<uint8_t>(raw - k9BitOpShift));
335 } else if constexpr (std::is_trivially_copyable<T>::value) {
336 encode_as_bytes(buffer, data);
340 //////////////////////////////////////////////////////////////////////
342 #define IMM_NA
343 #define IMM_ONE(x) IMM(x, 1)
344 #define IMM_TWO(x, y) IMM_ONE(x) IMM(y, 2)
345 #define IMM_THREE(x, y, z) IMM_TWO(x, y) IMM(z, 3)
346 #define IMM_FOUR(x, y, z, n) IMM_THREE(x, y, z) IMM(n, 4)
347 #define IMM_FIVE(x, y, z, n, m) IMM_FOUR(x, y, z, n) IMM(m, 5)
348 #define IMM_SIX(x, y, z, n, m, o) IMM_FIVE(x, y, z, n, m) IMM(o, 6)
350 BytecodeVec decodeBytecodeVec(const Buffer& buffer, size_t& pos) {
351 FTRACE(3, "\ndecodeBytecodeVec: {} bytes\n", buffer.size());
352 Trace::Indent _;
353 auto bcs = BytecodeVec{};
355 #define IMM(type, n) \
356 decode<decltype(std::declval<T>().IMM_NAME_##type(n))>(buffer, pos),
357 #define O(op, imms, ...) \
358 auto const decode_##op = [&] { \
359 using T = bc::op; \
360 return T { IMM_##imms }; \
362 OPCODES
363 #undef O
364 #undef IMM
366 bcs.resize(decode<uint32_t>(buffer, pos));
367 for (auto& inst : bcs) {
368 inst.op = decode<Op>(buffer, pos);
369 inst.srcLoc = safe_cast<int32_t>(decode<uint32_t>(buffer, pos)) + kNoSrcLoc;
370 ITRACE(4, "at {}: {}:\n", pos, opcodeToName(inst.op));
371 Trace::Indent _;
372 #define O(op, ...) \
373 case Op::op: new (&inst.op) bc::op(decode_##op()); break;
374 switch (inst.op) { OPCODES }
375 #undef O
378 bcs.shrink_to_fit();
379 return bcs;
382 void encodeBytecodeVec(Buffer& buffer, const BytecodeVec& bcs) {
383 FTRACE(3, "\nencodeBytecodeVec: {} elements\n", bcs.size());
384 Trace::Indent _;
386 #define IMM(type, n) encode(buffer, data.IMM_NAME_##type(n));
387 #define O(op, imms, ...) \
388 auto const encode_##op = [&](const bc::op& data) { \
389 IMM_##imms \
391 OPCODES
392 #undef O
393 #undef IMM
395 encode(buffer, safe_cast<uint32_t>(bcs.size()));
396 for (auto const& inst : bcs) {
397 encode(buffer, inst.op);
398 encode(buffer, safe_cast<uint32_t>(inst.srcLoc - kNoSrcLoc));
399 ITRACE(4, "at {}: {}\n", buffer.size(), opcodeToName(inst.op));
400 Trace::Indent _;
401 #define O(op, ...) case Op::op: encode_##op(inst.op); break;
402 switch (inst.op) { OPCODES }
403 #undef O
407 #undef IMM_NA
408 #undef IMM_ONE
409 #undef IMM_TWO
410 #undef IMM_THREE
411 #undef IMM_FOUR
412 #undef IMM_FIVE
413 #undef IMM_SIX
415 //////////////////////////////////////////////////////////////////////
417 BlockVec decodeBlockVec(const Buffer& buffer, size_t& pos) {
418 auto blocks = BlockVec{};
419 blocks.resize(decode<uint32_t>(buffer, pos));
420 for (auto& block : blocks) {
421 auto tmp = Block {
422 decodeBytecodeVec(buffer, pos),
423 decode<ExnNodeId>(buffer, pos) + NoExnNodeId,
424 decode<BlockId>(buffer, pos) + NoBlockId,
425 decode<BlockId>(buffer, pos) + NoBlockId,
426 {decode<uint8_t>(buffer, pos)}
428 block.emplace(std::move(tmp));
430 blocks.shrink_to_fit();
431 return blocks;
434 void encodeBlockVec(Buffer& buffer, const BlockVec& blocks) {
435 encode(buffer, safe_cast<uint32_t>(blocks.size()));
436 for (auto const& block : blocks) {
437 encodeBytecodeVec(buffer, block->hhbcs);
438 encode(buffer, block->exnNodeId - NoExnNodeId);
439 encode(buffer, block->fallthrough - NoBlockId);
440 encode(buffer, block->throwExit - NoBlockId);
441 encode(buffer, block->initializer);
443 buffer.shrink_to_fit();
446 //////////////////////////////////////////////////////////////////////
448 size_t estimateHeapSize(const BlockVec& blocks) {
449 auto result = blocks.size() * sizeof(decltype(blocks[0]));
450 for (auto const& block : blocks) {
451 result += sizeof(Block);
452 result += block->hhbcs.size() * sizeof(decltype(block->hhbcs[0]));
454 return result;
457 bool checkBlockVecs(const Func& func, const BlockVec& a, const BlockVec& b) {
458 always_assert(a.size() == b.size());
459 for (auto i = 0; i < a.size(); i++) {
460 auto const& ai = a[i];
461 auto const& bi = b[i];
462 always_assert(ai->hhbcs.size() == bi->hhbcs.size());
463 for (auto j = 0; j < ai->hhbcs.size(); j++) {
464 SCOPE_ASSERT_DETAIL("test_compression") {
465 return folly::format("Original:\n{}\n\nFinal:\n{}",
466 show(func, ai->hhbcs[j]),
467 show(func, bi->hhbcs[j])).str();
469 always_assert(ai->hhbcs[j] == bi->hhbcs[j]);
471 always_assert(ai->exnNodeId == bi->exnNodeId);
472 always_assert(ai->fallthrough == bi->fallthrough);
473 always_assert(ai->throwExit == bi->throwExit);
474 always_assert(ai->initializer == bi->initializer);
476 return true;
479 //////////////////////////////////////////////////////////////////////
483 //////////////////////////////////////////////////////////////////////
485 WideFunc::WideFunc(const Func* func, bool mut, bool create)
486 : m_func(const_cast<Func*>(func))
487 , m_mut(mut)
489 assertx(m_func);
490 FTRACE(2, "WideFunc::{}: {}\n",
491 m_mut ? "mut" : "cns",
492 func_fullname(*m_func));
493 if (mut && create && !m_func->rawBlocks) return;
494 always_assert_flog(
495 m_func->rawBlocks && !m_func->rawBlocks->empty(),
496 "Attempting to decompress empty bytecode for {}",
497 func_fullname(*m_func)
499 auto pos = size_t{0};
500 m_blocks = decodeBlockVec(*func->rawBlocks, pos);
501 assertx(pos == func->rawBlocks->size());
504 WideFunc::~WideFunc() {
505 if (!m_func) return;
506 FTRACE(2, "~WideFunc::{}: {}\n",
507 m_mut ? "mut" : "cns",
508 func_fullname(*m_func));
509 if (!m_mut) return;
510 auto buffer = Buffer{};
511 encodeBlockVec(buffer, m_blocks);
512 if (!m_func->rawBlocks || buffer != *m_func->rawBlocks) {
513 FTRACE(2, "~WideFunc::mut: updating blocks!\n");
514 m_func->rawBlocks.emplace(std::move(buffer));
518 void WideFunc::release() {
519 m_func = nullptr;
520 m_mut = false;
521 m_blocks.clear();
524 BlockVec WideFunc::uncompress(const CompressedBytecode& b) {
525 auto pos = size_t{0};
526 auto d = decodeBlockVec(b, pos);
527 assertx(pos == b.size());
528 return d;
531 CompressedBytecode WideFunc::compress(const BlockVec& v) {
532 Buffer buffer;
533 encodeBlockVec(buffer, v);
534 return buffer;
537 //////////////////////////////////////////////////////////////////////
541 //////////////////////////////////////////////////////////////////////
543 CompressedBlockUpdate::CompressedBlockUpdate(BlockUpdateInfo&& in) {
544 php::encode(raw, in);
545 in = {};
546 raw.shrink_to_fit();
549 void CompressedBlockUpdate::expand(BlockUpdateInfo& out) {
550 assertx(!raw.empty());
551 auto pos = size_t{0};
552 auto result = php::decode<BlockUpdateInfo>(raw, pos);
553 assertx(pos == raw.size());
554 out = std::move(result);
555 raw.clear();
558 //////////////////////////////////////////////////////////////////////