2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/util/asm-x64.h"
18 #include <folly/Format.h>
20 #include "hphp/util/safe-cast.h"
22 namespace HPHP
{ namespace jit
{
24 // These are in order according to the binary encoding of the X64
26 const char* cc_names
[] = {
27 "O", "NO", "B", "AE", "E", "NE", "BE", "A",
28 "S", "NS", "P", "NP", "L", "GE", "LE", "G"
31 const char* show(RoundDirection rd
) {
33 case RoundDirection::nearest
: return "nearest";
34 case RoundDirection::floor
: return "floor";
35 case RoundDirection::ceil
: return "ceil";
36 case RoundDirection::truncate
: return "truncate";
43 void DecodedInstruction::decode(uint8_t* ip
) {
51 while (decodePrefix(ip
)) {
54 while (int sz
= decodeRexVexXop(ip
)) {
58 ip
+= decodeOpcode(ip
);
59 ip
+= decodeModRm(ip
);
60 ip
+= m_offSz
+ m_immSz
;
64 bool DecodedInstruction::decodePrefix(uint8_t* ip
) {
66 case 0xf0: m_flags
.lock
= 1; return true;
67 case 0xf2: m_flags
.repNE
= 1; return true;
68 case 0xf3: m_flags
.rep
= 1; return true;
70 case 0x26: m_flags
.es
= 1; return true;
71 case 0x2e: m_flags
.bTaken
= 1; return true;
72 case 0x36: m_flags
.ss
= 1; return true;
73 case 0x3e: m_flags
.bNotTaken
= 1; return true;
74 case 0x64: m_flags
.fs
= 1; return true;
75 case 0x65: m_flags
.gs
= 1; return true;
77 case 0x66: m_flags
.opndSzOvr
= 1; return true;
78 case 0x67: m_flags
.addrSzOvr
= 1; return true;
83 int DecodedInstruction::decodeRexVexXop(uint8_t* ip
) {
84 if ((*ip
& 0xf0) == 0x40) {
86 m_flags
.w
= *ip
& 8 ? 1 : 0;
87 m_flags
.r
= *ip
& 4 ? 1 : 0;
88 m_flags
.x
= *ip
& 2 ? 1 : 0;
89 m_flags
.b
= *ip
& 1 ? 1 : 0;
100 // 0x8f is both a valid one-byte opcode and the first byte of the
101 // 3-byte XOP prefix. Figure out which one we have here by inspecting
111 m_flags
.r
= ip
[1] & 0x80 ? 0 : 1;
112 m_flags
.x
= ip
[1] & 0x40 ? 0 : 1;
113 m_flags
.b
= ip
[1] & 0x20 ? 0 : 1;
114 m_map_select
= ip
[1] & 0x1f;
115 assert(m_map_select
>= 1 && (m_flags
.xop
|| m_map_select
<= 3));
116 m_flags
.w
= ip
[2] & 0x80 ? 1 : 0;
122 m_flags
.r
= ip
[1] & 0x80 ? 0 : 1;
130 // The final 7 bits of all VEX/XOP prefixes are the same:
131 m_xtra_op
= (~ip
[0] >> 3) & 0x0f;
132 m_flags
.l
= ip
[0] & 0x04 ? 1 : 0;
135 case 1: m_flags
.opndSzOvr
= 1; break;
136 case 2: m_flags
.rep
= 1; break;
137 case 3: m_flags
.repNE
= 1; break;
142 int DecodedInstruction::decodeOpcode(uint8_t* ip
) {
152 } else if (*ip
== 0x3a) {
160 switch (m_map_select
) {
161 case 0: determineOperandsMap0(ip
); break;
162 case 1: determineOperandsMap1(ip
); break;
163 case 2: determineOperandsMap2(ip
); break;
164 case 3: determineOperandsMap3(ip
); break;
165 default: assert(false);
170 void DecodedInstruction::determineOperandsMap0(uint8_t* ip
) {
171 switch (m_opcode
>> 4) {
176 if ((m_opcode
& 0x04) == 0) {
177 m_flags
.hasModRm
= true;
178 } else if ((m_opcode
& 0x07) == 4) {
180 } else if ((m_opcode
& 0x07) == 5) {
181 m_immSz
= m_flags
.opndSzOvr
? sz::word
: sz::dword
;
184 case 0x04: break; // REX
189 if ((m_opcode
& 0x0c) == 0x08) {
190 m_immSz
= m_opcode
& 0x02 ? sz::byte
:
191 m_flags
.opndSzOvr
? sz::word
: sz::dword
;
195 m_flags
.picOff
= true;
199 m_flags
.hasModRm
= true;
200 if ((m_opcode
& 0x0c) == 0) {
201 m_immSz
= (m_opcode
& 0x0f) != 1 ? sz::byte
:
202 m_flags
.opndSzOvr
? sz::word
: sz::dword
;
208 if ((m_opcode
& 0x0c) == 0) {
209 m_offSz
= m_flags
.addrSzOvr
? sz::dword
: sz::qword
;
210 } else if ((m_opcode
& 0x0e) == 8) {
211 m_immSz
= !(m_opcode
& 1) ? sz::byte
:
212 m_flags
.opndSzOvr
? sz::word
: sz::dword
;
216 m_immSz
= ((m_opcode
& 8) == 0 ? sz::byte
:
217 m_flags
.w
? sz::qword
:
218 m_flags
.opndSzOvr
? sz::word
: sz::dword
);
221 m_flags
.hasModRm
= !(m_opcode
& 8) && (m_opcode
& 6) != 2;
222 switch (m_opcode
& 0x0f) {
223 case 0: case 1: case 6: case 13:
230 m_immSz
= m_flags
.opndSzOvr
? sz::word
: sz::dword
;
239 m_flags
.hasModRm
= (m_opcode
& 0x0c) == 0;
240 m_immSz
= (m_opcode
& 0x0e) == 4 ? sz::byte
: sz::nosize
;
243 uint8_t siz
= sz::nosize
;
244 if ((m_opcode
& 0x08) == 0 || (m_opcode
& 0x0f) == 0xb) {
246 } else if ((m_opcode
& 0x0e) == 0x8) {
249 if (siz
!= sz::nosize
) {
250 if (!(m_opcode
& 0x04)) {
252 m_flags
.picOff
= true;
260 if ((m_opcode
& 0x06) == 0x06) {
261 m_flags
.hasModRm
= true;
262 if (!(m_opcode
& 0x08) && !((ip
[1] >> 3) & 7)) {
263 m_immSz
= !(m_opcode
& 1) ? sz::byte
:
264 m_flags
.opndSzOvr
? sz::word
: sz::dword
;
271 void DecodedInstruction::determineOperandsMap1(uint8_t* /*ip*/) {
272 switch (m_opcode
>> 4) {
274 if ((m_opcode
& 15) < 4 || (m_opcode
& 15) == 13) {
275 m_flags
.hasModRm
= true;
278 case 0x01: case 0x02: case 0x04: case 0x05: case 0x06:
279 m_flags
.hasModRm
= true;
284 m_flags
.hasModRm
= (m_opcode
& 15) != 7;
285 m_immSz
= (m_opcode
& 15) < 4 ? sz::byte
: sz::nosize
;
289 m_flags
.picOff
= true;
292 m_flags
.hasModRm
= true;
295 m_flags
.hasModRm
= (m_opcode
& 7) >= 3;
296 if ((m_opcode
& 7) == 4) m_immSz
= sz::byte
;
299 m_flags
.hasModRm
= true;
300 if ((m_opcode
& 15) == 0x0a) m_immSz
= sz::byte
;
303 if (!(m_opcode
& 8)) {
304 m_flags
.hasModRm
= true;
305 switch (m_opcode
& 7) {
318 m_flags
.hasModRm
= true;
323 void DecodedInstruction::determineOperandsMap2(uint8_t* /*ip*/) {
324 m_flags
.hasModRm
= true;
325 if (m_opcode
== 0x13) m_immSz
= sz::byte
;
328 void DecodedInstruction::determineOperandsMap3(uint8_t* /*ip*/) {
329 m_flags
.hasModRm
= true;
333 int DecodedInstruction::decodeModRm(uint8_t* ip
) {
334 if (!m_flags
.hasModRm
) return 0;
336 if ((*ip
& 0xc7) == 0x05) {
337 m_flags
.picOff
= true;
340 if ((*ip
& 0xc0) != 0xc0 &&
341 (*ip
& 0x07) == 0x04) {
342 m_flags
.hasSib
= true;
345 if ((*ip
& 0xc0) == 0x00) {
346 if (m_flags
.hasSib
&& (ip
[1] & 7) == 0x05) {
349 } else if ((*ip
& 0xc0) == 0x40) {
351 } else if ((*ip
& 0xc0) == 0x80) {
359 * Read the numeric value stored at `ip`. It's widened to 64 bits (if smaller
360 * than 8 serialized bytes) but the MSB's sign is preserved.
362 static int64_t readValue(uint8_t* ip
, int size
) {
364 // This is the most-significant byte, so keep its sign.
365 value
= (signed char)ip
[--size
];
367 // Shift left 8 bits. (UBSAN doesn't like `<<` with signed values.)
369 value
+= (uint64_t) ip
[size
];
374 static bool writeValue(uint8_t* ip
, int size
, int64_t v
) {
375 auto value
= uint64_t(v
);
376 if (size
* CHAR_BIT
< 64) {
377 auto topBit
= uint64_t(1) << (size
* CHAR_BIT
- 1);
378 if (value
+ topBit
>= topBit
* 2) return false;
382 *ip
++ = (uint8_t)value
;
389 std::string
DecodedInstruction::toString() {
390 auto str
= folly::format("{:08x} {:02x}",
393 if (m_flags
.hasModRm
) {
394 auto modRm
= getModRm();
395 str
+= folly::format(" ModRM({:02b} {} {})",
399 if (m_flags
.hasSib
) {
400 auto sib
= m_ip
[m_size
- m_immSz
- m_offSz
- 1];
401 str
+= folly::format(" SIB({:02b} {} {})",
408 auto ip
= m_ip
+ m_size
- m_immSz
- m_offSz
;
410 int64_t value
= readValue(ip
, m_offSz
);
412 str
+= folly::format(" {}{:+x}",
413 m_flags
.picOff
? "rip" : "",
415 if (m_flags
.picOff
) {
416 str
+= folly::format("({:08x})", uintptr_t(m_ip
+ m_size
+ value
)).str();
421 int64_t value
= readValue(ip
, m_immSz
);
423 str
+= folly::format(" #{}", value
).str();
428 int32_t DecodedInstruction::offset() const {
430 auto const addr
= m_ip
+ m_size
;
431 return safe_cast
<int32_t>(readValue(addr
- m_offSz
, m_offSz
));
434 uint8_t* DecodedInstruction::picAddress() const {
435 assert(hasPicOffset());
436 uint8_t* addr
= m_ip
+ m_size
;
437 uint8_t* rel
= m_base
+ m_size
;
438 return rel
+ readValue(addr
- m_immSz
- m_offSz
, m_offSz
);
441 bool DecodedInstruction::setPicAddress(uint8_t* target
) {
442 assert(hasPicOffset());
443 uint8_t* addr
= m_ip
+ m_size
;
444 uint8_t* rel
= m_base
+ m_size
;
445 ptrdiff_t diff
= target
- rel
;
447 return writeValue(addr
- m_offSz
- m_immSz
, m_offSz
, diff
);
450 int64_t DecodedInstruction::immediate() const {
451 assert(hasImmediate());
452 return readValue(m_ip
+ m_size
- m_immSz
, m_immSz
);
455 bool DecodedInstruction::setImmediate(int64_t value
) {
456 assert(hasImmediate());
457 return writeValue(m_ip
+ m_size
- m_immSz
, m_immSz
, value
);
460 bool DecodedInstruction::isNop() const {
461 if (m_opcode
== 0x90) {
462 return m_size
== 1 || (m_size
== 2 && m_flags
.opndSzOvr
);
464 return m_opcode
== 0x1f && m_map_select
== 1;
467 bool DecodedInstruction::isBranch(BranchType branchType
469 Unconditional */) const {
470 if (!m_flags
.picOff
) return false;
471 if (m_map_select
== 0) {
472 // The one-byte opcode map
474 ((m_opcode
& 0xf0) == 0x70 /* 8-bit conditional branch */ &&
475 (branchType
& Conditional
)) ||
476 ((m_opcode
== 0xe9 /* 32-bit unconditional branch */ ||
477 m_opcode
== 0xeb /* 8-bit unconditional branch */) &&
478 (branchType
& Unconditional
));
480 if (m_map_select
== 1 && (branchType
& Conditional
)) {
481 // The two-byte opcode map (first byte is 0x0f)
482 return (m_opcode
& 0xf0) == 0x80 /* 32-bit conditional branch */;
487 bool DecodedInstruction::isCall() const {
488 if (m_map_select
!= 0) return false;
489 if (m_opcode
== 0xe8) return true;
490 if (m_opcode
!= 0xff) return false;
491 return ((getModRm() >> 3) & 0x6) == 2;
494 bool DecodedInstruction::isJmp() const {
495 if (m_map_select
!= 0) return false;
496 return m_opcode
== 0xe9;
499 bool DecodedInstruction::isLea() const {
500 if (m_map_select
!= 0) return false;
501 return m_opcode
== 0x8d;
504 ConditionCode
DecodedInstruction::jccCondCode() const {
505 if (m_map_select
== 0) {
506 assert((m_opcode
& 0xf0) == 0x70); // 8-bit jcc
508 assert(m_map_select
== 1);
509 assert((m_opcode
& 0xf0) == 0x80); // 32-bit jcc
511 return static_cast<ConditionCode
>(m_opcode
& 0x0f);
514 bool DecodedInstruction::shrinkBranch() {
516 if (m_offSz
!= sz::dword
) return false;
517 auto addr
= m_ip
+ m_size
- m_offSz
;
518 auto delta
= readValue(addr
, m_offSz
);
519 if (m_map_select
== 1) {
520 if (m_flags
.vex
) return false;
521 assert((m_opcode
& 0xf0) == 0x80); // must be a 32-bit conditional branch
523 The pc-relative offset is from the end of the instruction, and the
524 instruction is shrinking by 4 bytes (opcode goes from 2 bytes to 1,
525 and offset goes from 4 to 1), so we need to adjust delta by 4.
528 if (-128 > delta
|| delta
> 127) return false;
529 addr
[-2] = 0x70 | (m_opcode
& 0x0f); // make it an 8 bit conditional branch
532 assert(m_opcode
== 0xe9); // must be a 32-bit unconditional branch
534 As above, but opcode was already 1 byte, so the reduction is only 3
538 if (-128 > delta
|| delta
> 127) return false;
543 assert(isBranch() && m_offSz
== 1);
547 void DecodedInstruction::widenBranch() {
548 assert(m_offSz
== 1 && isBranch());
549 auto addr
= m_ip
+ m_size
- m_offSz
;
550 auto delta
= readValue(addr
, 1);
551 if (m_opcode
== 0xeb) {
553 writeValue(addr
, 4, delta
+ 3);
556 addr
[0] = 0x80 | (m_opcode
& 0xf);
557 writeValue(addr
+ 1, 4, delta
+ 4);
560 assert(isBranch() && m_offSz
== 4);
563 uint8_t DecodedInstruction::getModRm() const {
564 assert(m_flags
.hasModRm
);
565 return m_ip
[m_size
- m_immSz
- m_offSz
- m_flags
.hasSib
- 1];
568 #define FUSEABLE_INSTRUCTIONS \
577 AND(0x20, 0xFF, true) \
578 AND(0x21, 0xFF, true) \
579 AND(0x22, 0xFF, false) \
580 AND(0x23, 0xFF, false) \
581 AND(0x24, 0xFF, false) \
582 AND(0x25, 0xFF, false) \
595 ADD(0x00, 0xFF, true) \
596 ADD(0x01, 0xFF, true) \
597 ADD(0x02, 0xFF, false) \
598 ADD(0x03, 0xFF, false) \
599 ADD(0x04, 0xFF, false) \
600 ADD(0x05, 0xFF, false) \
604 SUB(0x28, 0xFF, true) \
605 SUB(0x29, 0xFF, true) \
606 SUB(0x2A, 0xFF, false) \
607 SUB(0x2B, 0xFF, false) \
608 SUB(0x2C, 0xFF, false) \
609 SUB(0x2D, 0xFF, false) \
619 X(i, j, false, CC_O, CC_NO, CC_B, CC_NAE, CC_AE, CC_NB, CC_NC, CC_E, CC_Z, \
620 CC_NE, CC_NZ, CC_BE, CC_NA, CC_A, CC_NBE, CC_S, CC_NS, CC_P, CC_NP, CC_L, \
621 CC_NGE, CC_GE, CC_NL, CC_LE, CC_NG, CC_G, CC_NLE)
622 #define AND(i, j, k) \
623 X(i, j, k, CC_O, CC_NO, CC_B, CC_NAE, CC_AE, CC_NB, CC_NC, CC_E, CC_Z, \
624 CC_NE, CC_NZ, CC_BE, CC_NA, CC_A, CC_NBE, CC_S, CC_NS, CC_P, CC_NP, CC_L, \
625 CC_NGE, CC_GE, CC_NL, CC_LE, CC_NG, CC_G, CC_NLE)
627 X(i, j, false, CC_B, CC_NAE, CC_AE, CC_NB, CC_NC, CC_E, CC_Z, CC_NE, CC_NZ, \
628 CC_BE, CC_NA, CC_A, CC_NBE, CC_L, CC_NGE, CC_GE, CC_NL, CC_LE, CC_NG, \
630 #define ADD(i, j, k) \
631 X(i, j, k, CC_B, CC_NAE, CC_AE, CC_NB, CC_NC, CC_E, CC_Z, CC_NE, CC_NZ, \
632 CC_BE, CC_NA, CC_A, CC_NBE, CC_L, CC_NGE, CC_GE, CC_NL, CC_LE, CC_NG, \
634 #define SUB(i, j, k) \
635 X(i, j, k, CC_B, CC_NAE, CC_AE, CC_NB, CC_NC, CC_E, CC_Z, CC_NE, CC_NZ, \
636 CC_BE, CC_NA, CC_A, CC_NBE, CC_L, CC_NGE, CC_GE, CC_NL, CC_LE, CC_NG, \
639 X(i, j, true, CC_E, CC_Z, CC_NE, CC_NZ, CC_L, CC_NGE, CC_GE, CC_NL, CC_LE, \
642 X(i, j, true, CC_E, CC_Z, CC_NE, CC_NZ, CC_L, CC_NGE, CC_GE, CC_NL, CC_LE, \
645 bool DecodedInstruction::isFuseable(const DecodedInstruction
& next
) const {
646 // Assumes no invalid instructions.
647 if (m_map_select
!= 0 || // No multibyte instructions are fuseable.
648 hasPicOffset() || // No rip relative addressing
649 !next
.isBranch(Conditional
)) {
653 // Find extra 3 bits of opcode in the modrm byte if this opcode has it.
656 case 0xF6: case 0xF7: case 0x80: case 0x81: case 0x83: case 0xFE: case 0xFF:
657 e
= m_flags
.hasModRm
? (getModRm() & 0x38) >> 3 : -1;
663 switch (e
<< 16 | m_opcode
) {
664 #define X(opcode, opExt, operand1IsDest, ...) \
665 case (opExt << 16 | opcode): { \
666 if ((operand1IsDest || hasImmediate()) && \
667 m_flags.hasModRm && (getModRm() & 0xC0) != 0xC0) { \
668 /* No fusing instruction with immediate and memory operands. */ \
669 /* Also cannot fuse instructions with destination memory operand. */ \
672 const ConditionCode ccs[] = { __VA_ARGS__ }; \
673 for (size_t i = 0; i < sizeof(ccs) / sizeof(ConditionCode); ++i) { \
674 if (next.jccCondCode() == ccs[i]) { \
680 FUSEABLE_INSTRUCTIONS
696 #undef FUSEABLE_INSTRUCTIONS