Deshim VirtualExecutor in folly
[hiphop-php.git] / hphp / util / test / asm.cpp
blobd6324966526eab0bcc7c33cea86e7b851184770c
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #if defined(__x86_64__)
18 #include "hphp/util/asm-x64.h"
19 #include <gtest/gtest.h>
21 #include <vector>
22 #include <cstdio>
23 #include <fstream>
24 #include <sstream>
25 #include <cstdlib>
26 #include <cstring>
28 #include <boost/regex.hpp>
29 #include <boost/algorithm/string.hpp>
31 #include <folly/Format.h>
32 #include <folly/String.h>
34 #include "hphp/util/disasm.h"
36 namespace HPHP::jit {
38 typedef X64Assembler Asm;
39 using namespace reg;
41 namespace {
43 struct TestDataBlock {
44 explicit TestDataBlock(size_t sz) {
45 auto code = (Address)mmap(0, sz, PROT_READ | PROT_WRITE | PROT_EXEC,
46 MAP_ANON | MAP_PRIVATE, -1, 0);
47 always_assert(code != MAP_FAILED);
48 m_db.init(code, sz, "TestBlock");
51 ~TestDataBlock() {
52 munmap(m_db.base(), m_db.capacity());
55 Address frontier() const {
56 return m_db.frontier();
59 /* implicit */ operator DataBlock&() {
60 return m_db;
63 private:
64 DataBlock m_db;
67 //////////////////////////////////////////////////////////////////////
70 * The following environment variables can be used to turn up the number of
71 * combinations. Off by default to keep the test running fast normally ...
73 const bool testMore = getenv("ASM_TEST_MORE");
74 const bool testMax = getenv("ASM_TEST_MAX");
76 bool match_opcode_line(const std::string& line,
77 std::string& opName,
78 std::string& opArgs) {
79 static boost::regex re{ R"(([^\s]*)\s+(.*))" };
80 boost::smatch cm;
81 if (!regex_match(line, cm, re)) return false;
82 opName = cm[1];
83 opArgs = cm[2];
84 return true;
87 void compare(const char* expectedOpName,
88 const std::vector<std::string>& actuals,
89 const std::vector<std::string>& expecteds) {
90 auto expectIt = expecteds.begin();
92 std::string expect;
93 std::string opName, opArgs;
94 for (auto& real : actuals) {
95 if (!match_opcode_line(real, opName, opArgs)) continue;
97 // The xed library will add operand size suffixes on any opcode
98 // that affects memory. We could figure this out and check for
99 // it, but it's good enough just to see that the opcode has the
100 // prefix we expect.
101 EXPECT_TRUE(boost::starts_with(opName, expectedOpName))
102 << "expected " << expectedOpName << ", got " << opName;
104 if (expectIt == expecteds.end()) {
105 EXPECT_EQ(1, 0) << "Incorrect number of assembler lines";
106 break;
109 EXPECT_EQ(*expectIt, opArgs)
110 << "in opcode: " << expectedOpName;
111 ++expectIt;
114 EXPECT_EQ(expectIt, expecteds.end())
115 << "More lines expected than read";
118 std::vector<std::string> dump_disasm(Asm& a) {
119 Disasm dis(Disasm::Options()
120 .addresses(false)
121 .forceAttSyntax(true));
122 std::ostringstream sstream;
123 dis.disasm(sstream, a.base(), a.base() + a.used(), 0);
125 std::vector<std::string> ret;
126 folly::split('\n', sstream.str(), ret);
127 return ret;
130 void expect_asm(Asm& a, const std::string& str) {
131 auto const dump = dump_disasm(a);
133 std::ostringstream out;
134 out << '\n';
136 std::string opName, opArgs;
137 for (auto& line : dump) {
138 if (match_opcode_line(line, opName, opArgs)) {
139 out << opName << ' ' << opArgs << '\n';
142 EXPECT_EQ(out.str(), str);
145 // Generate a bunch of operands of a given type.
146 template<class T> struct Gen;
148 template<> struct Gen<Reg64> {
149 static const std::vector<Reg64>& gen() {
150 if (testMax) {
151 static const std::vector<Reg64> v = {
152 rax, rbx, rcx, rdx, rsp, rbp, rsi, rdi,
153 r8, r9, r10, r11, r12, r13, r14, r15
155 return v;
157 if (testMore) {
158 static const std::vector<Reg64> v = {
159 rax, rbx, rsp, rbp, r12, r13, r15
161 return v;
163 static const std::vector<Reg64> v = { rax, rbp, r15 };
164 return v;
169 template<> struct Gen<Reg32> {
170 static const std::vector<Reg32>& gen() {
171 if (testMax) {
172 static const std::vector<Reg32> v = {
173 eax, ecx, edx, ebx, esp, ebp, esi, edi,
174 r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d
176 return v;
178 if (testMore) {
179 static const std::vector<Reg32> v = {
180 eax, ebx, esp, ebp, r12d, r13d, r14d
182 return v;
184 static const std::vector<Reg32> v = { eax, ebp, r15d };
185 return v;
189 template<> struct Gen<Reg8> {
190 static const std::vector<Reg8>& gen() {
191 // We can't really test the high-byte regs here because they can't be
192 // used any time we have a REX byte.
193 if (testMax) {
194 static const std::vector<Reg8> v = {
195 al, cl, dl, bl, spl, bpl, sil, dil, r8b, r9b, r10b,
196 r11b, r12b, r13b, r14b, r15b
198 return v;
200 static const std::vector<Reg8> v = { al, r8b, sil };
201 return v;
205 template<> struct Gen<MemoryRef> {
206 static const std::vector<MemoryRef>& gen_mr() {
207 static bool inited = false;
208 static std::vector<MemoryRef> vec;
209 if (inited) return vec;
210 auto& regs = Gen<Reg64>::gen();
211 const std::vector<int> disps = { -1024, 0, 12, 1024 };
212 for (auto& r : regs) {
213 for (auto& d : disps) {
214 vec.push_back(r[d]);
217 inited = true;
218 return vec;
220 static const std::vector<MemoryRef>& gen() {
221 static bool inited = false;
222 static std::vector<MemoryRef> vec;
223 if (inited) return vec;
224 auto& indexes = Gen<Reg64>::gen();
225 auto& mrs = Gen<MemoryRef>::gen_mr();
226 std::vector<int> scales = { 4 };
227 for (auto& mr : mrs) {
228 for (auto& idx : indexes) {
229 if (idx == rsp) continue;
230 for (auto& s : scales) {
231 vec.push_back(*(IndexedDispReg(mr.r.base, idx * s) + mr.r.disp));
235 return vec;
239 static bool doingByteOpcodes = false;
241 template<> struct Gen<Immed> {
242 static const std::vector<Immed>& gen() {
243 if (doingByteOpcodes) {
244 // Don't use any immediates that don't fit in a byte. (Normally we
245 // want to though because they can be encoded different.)
246 static const std::vector<Immed> vec { 1, 127 };
247 return vec;
249 static const std::vector<Immed> vec { 1, 1 << 20 };
250 return vec;
254 const char* expected_str(Reg64 r) { return regname(r); }
255 const char* expected_str(Reg32 r) { return regname(r); }
256 const char* expected_str(Reg8 r) { return regname(r); }
257 #undef X
259 void expected_disp_str(intptr_t disp, std::ostream& out) {
260 if (disp < 0) {
261 out << "-0x" << std::hex << -disp;
262 } else {
263 out << "0x" << std::hex << disp;
267 std::string expected_str(MemoryRef mr) {
268 std::ostringstream out;
269 if (mr.r.disp != 0) {
270 expected_disp_str(mr.r.disp, out);
272 if (int(mr.r.index) != -1) {
273 out << '(' << expected_str(mr.r.base)
274 << ',' << expected_str(mr.r.index)
275 << ',' << mr.r.scale
276 << ')';
277 } else {
278 out << '(' << expected_str(mr.r.base) << ')';
280 return out.str();
283 std::string expected_str(Immed i) {
284 std::ostringstream out;
285 out << "$0x" << std::hex << i.q();
286 return out.str();
289 //////////////////////////////////////////////////////////////////////
291 template<class Arg>
292 void dotest(const char* opName, Asm& a, void (Asm::*memFn)(Arg)) {
293 std::vector<std::string> expecteds;
295 auto& args = Gen<Arg>::gen();
296 for (auto& ar : args) {
297 expecteds.push_back(expected_str(ar));
298 (a.*memFn)(ar);
301 auto const dump = dump_disasm(a);
302 compare(opName, dump, expecteds);
303 a.clear();
306 template<class Arg1, class Arg2>
307 void dotest(const char* opName, Asm& a, void (Asm::*memFn)(Arg1, Arg2),
308 const std::vector<Arg1>& args1, const std::vector<Arg2>& args2) {
309 std::vector<std::string> expecteds;
311 for (auto& ar1 : args1) {
312 for (auto& ar2 : args2) {
313 expecteds.push_back(
314 folly::format("{}, {}", expected_str(ar1), expected_str(ar2)).str()
316 (a.*memFn)(ar1, ar2);
320 auto const dump = dump_disasm(a);
321 compare(opName, dump, expecteds);
322 a.clear();
325 template<class Arg1, class Arg2>
326 void dotest(const char* opName, Asm& a, void (Asm::*memFn)(Arg1, Arg2)) {
327 dotest(opName, a, memFn, Gen<Arg1>::gen(), Gen<Arg2>::gen());
331 //////////////////////////////////////////////////////////////////////
333 // Wrappers for generating test cases for various addressing modes.
335 typedef void (Asm::*OpR64)(Reg64);
336 typedef void (Asm::*OpR32)(Reg32);
337 typedef void (Asm::*OpR8)(Reg8);
338 typedef void (Asm::*OpRR64)(Reg64, Reg64);
339 typedef void (Asm::*OpRR32)(Reg32, Reg32);
340 typedef void (Asm::*OpRR8)(Reg8, Reg8);
341 typedef void (Asm::*OpR8R32)(Reg8, Reg32);
342 typedef void (Asm::*OpR8R64)(Reg8, Reg64);
343 typedef void (Asm::*OpMR64)(MemoryRef, Reg64);
344 typedef void (Asm::*OpMR32)(MemoryRef, Reg32);
345 typedef void (Asm::*OpMR8)(MemoryRef, Reg8);
346 typedef void (Asm::*OpSMR64)(MemoryRef, Reg64);
347 typedef void (Asm::*OpSMR32)(MemoryRef, Reg32);
348 typedef void (Asm::*OpSMR8)(MemoryRef, Reg8);
349 typedef void (Asm::*OpRM64)(Reg64, MemoryRef);
350 typedef void (Asm::*OpRM32)(Reg32, MemoryRef);
351 typedef void (Asm::*OpRM8)(Reg8, MemoryRef);
352 typedef void (Asm::*OpRSM64)(Reg64, MemoryRef);
353 typedef void (Asm::*OpRSM32)(Reg32, MemoryRef);
354 typedef void (Asm::*OpRSM8)(Reg8, MemoryRef);
355 typedef void (Asm::*OpIR64)(Immed, Reg64);
356 typedef void (Asm::*OpIR32)(Immed, Reg32);
357 typedef void (Asm::*OpIR8)(Immed, Reg8);
358 typedef void (Asm::*OpIM64)(Immed, MemoryRef);
359 typedef void (Asm::*OpIM32)(Immed, MemoryRef);
360 typedef void (Asm::*OpISM64)(Immed, MemoryRef);
361 typedef void (Asm::*OpISM32)(Immed, MemoryRef);
363 //////////////////////////////////////////////////////////////////////
367 TEST(Asm, General) {
368 TestDataBlock db(10 << 24);
369 Asm a { db };
372 * Test is a little different, so we have this BASIC_OP stuff.
374 * Skips using a memory source operand---there's actually only a test
375 * instruction with memory destination (even though our API allows
376 * writing it the other way), so when we disassemble the args look like
377 * the wrong order.
380 #define BASIC_OP(op) \
381 dotest(#op, a, OpRR32(&Asm::op##l)); \
382 dotest(#op, a, OpRR64(&Asm::op##q)); \
383 dotest(#op, a, OpRM32(&Asm::op##l)); \
384 dotest(#op, a, OpRM64(&Asm::op##q)); \
385 dotest(#op, a, OpRSM32(&Asm::op##l)); \
386 dotest(#op, a, OpRSM64(&Asm::op##q)); \
387 dotest(#op, a, OpIR64(&Asm::op##q)); \
388 dotest(#op, a, OpIR32(&Asm::op##l)); \
389 dotest(#op "q", a, OpIM64(&Asm::op##q)); \
390 dotest(#op "l", a, OpIM32(&Asm::op##l)); \
391 dotest(#op "q", a, OpISM64(&Asm::op##q)); \
392 dotest(#op "l", a, OpISM32(&Asm::op##l));
394 #define FULL_OP(op) \
395 BASIC_OP(op) \
396 dotest(#op, a, OpMR32(&Asm::op##l)); \
397 dotest(#op, a, OpMR64(&Asm::op##q)); \
398 dotest(#op, a, OpSMR32(&Asm::op##l)); \
399 dotest(#op, a, OpSMR64(&Asm::op##q));
402 #define BASIC_BYTE_OP(op) \
403 dotest(#op, a, OpIR8(&Asm::op##b)); \
404 dotest(#op, a, OpRR8(&Asm::op##b)); \
405 dotest(#op, a, OpRM8(&Asm::op##b)); \
406 dotest(#op, a, OpRSM8(&Asm::op##b)); \
407 dotest(#op, a, OpIR8(&Asm::op##b));
409 #define FULL_BYTE_OP(op) \
410 BASIC_BYTE_OP(op) \
411 dotest(#op, a, OpMR8(&Asm::op##b)); \
412 dotest(#op, a, OpSMR8(&Asm::op##b));
414 #define UNARY_BYTE_OP(op) \
415 dotest(#op, a, OpR8(&Asm::op##b));
417 dotest("inc", a, OpR32(&Asm::incl));
418 dotest("inc", a, OpR64(&Asm::incq));
420 dotest("mov", a, OpRR8(&Asm::movb));
421 dotest("mov", a, OpRR32(&Asm::movl));
422 dotest("mov", a, OpRR64(&Asm::movq));
423 dotest("mov", a, OpMR8(&Asm::loadb));
424 dotest("mov", a, OpMR32(&Asm::loadl));
425 dotest("mov", a, OpMR64(&Asm::loadq));
426 dotest("mov", a, OpSMR8(&Asm::loadb));
427 dotest("mov", a, OpSMR32(&Asm::loadl));
428 dotest("mov", a, OpSMR64(&Asm::loadq));
429 dotest("mov", a, OpRM8(&Asm::storeb));
430 dotest("mov", a, OpRM32(&Asm::storel));
431 dotest("mov", a, OpRM64(&Asm::storeq));
432 dotest("movl",a, OpISM32(&Asm::storel));
433 dotest("movq",a, OpISM64(&Asm::storeq));
434 dotest("mov", a, OpRSM8(&Asm::storeb));
435 dotest("mov", a, OpRSM32(&Asm::storel));
436 dotest("mov", a, OpRSM64(&Asm::storeq));
438 dotest("movzx", a, OpMR32(&Asm::loadzbl));
439 dotest("movzx", a, OpSMR32(&Asm::loadzbl));
440 dotest("movzx", a, OpR8R32(&Asm::movzbl));
442 dotest("movsx", a, OpMR64(&Asm::loadsbq));
443 dotest("movsx", a, OpSMR64(&Asm::loadsbq));
444 dotest("movsx", a, OpR8R64(&Asm::movsbq));
446 FULL_OP(add);
447 FULL_OP(xor);
448 FULL_OP(sub);
449 FULL_OP(and);
450 FULL_OP(or);
451 FULL_OP(cmp);
452 BASIC_OP(test);
454 // Note: objdump disassembles xchg %rax,%rax as rex.W nop, so we're just
455 // leaving it out.
457 doingByteOpcodes = true;
459 FULL_BYTE_OP(cmp);
460 BASIC_BYTE_OP(test);
461 UNARY_BYTE_OP(not);
462 UNARY_BYTE_OP(neg);
464 doingByteOpcodes = false;
467 TEST(Asm, WordSizeInstructions) {
468 TestDataBlock db(10 << 24);
469 Asm a { db };
471 // single register operations
472 a. incw (ax);
473 // single memory operations
474 a. decw (*r8);
475 // register-register operations
476 a. addw (ax, bx);
477 a. xorw (r10w, r11w);
478 a. movw (cx, si);
479 // register-memory operations
480 a. storew (ax, *rbx);
481 a. testw (r10w, rsi[0x10]);
482 // memory-register operations
483 a. subw (*rcx, ax);
484 a. orw (r11[0x100], dx);
485 // immediate-register operations
486 a. shlw (0x3, di);
487 a. andw (0x5555, r12w);
488 // immediate-memory operations
489 a. storew (0x1, *r9);
490 a. storew (0x1, rax[0x100]);
492 expect_asm(a, R"(
493 inc %ax
494 decw (%r8)
495 add %ax, %bx
496 xor %r10w, %r11w
497 mov %cx, %si
498 movw %ax, (%rbx)
499 testw %r10w, 0x10(%rsi)
500 subw (%rcx), %ax
501 orw 0x100(%r11), %dx
502 shl $0x3, %di
503 and $0x5555, %r12w
504 movw $0x1, (%r9)
505 movw $0x1, 0x100(%rax)
506 )");
509 TEST(Asm, RetImmediate) {
510 TestDataBlock db(10 << 24);
511 Asm a { db };
513 a.ret(8);
514 ASSERT_FALSE(a.base()[0] == kOpsizePrefix);
517 TEST(Asm, IncDecRegs) {
518 TestDataBlock db(10 << 24);
519 Asm a { db };
521 // incq, incl, incw
522 a. incq(rax);
523 a. incl(eax);
524 a. incw(ax);
525 a. incq(r15);
526 a. incl(r15d);
527 a. incw(r15w);
528 // decq, decl, decw
529 a. decq(rax);
530 a. decl(eax);
531 a. decw(ax);
532 a. decq(r15);
533 a. decl(r15d);
534 a. decw(r15w);
536 expect_asm(a, R"(
537 inc %rax
538 inc %eax
539 inc %ax
540 inc %r15
541 inc %r15d
542 inc %r15w
543 dec %rax
544 dec %eax
545 dec %ax
546 dec %r15
547 dec %r15d
548 dec %r15w
549 )");
552 TEST(Asm, HighByteReg) {
553 TestDataBlock db(10 << 24);
554 Asm a { db };
556 // Test movzbl with high byte regs, avoiding destination registers
557 // that need a rex prefix
558 std::vector<Reg8> hiregs = {ah, bh, ch, dh};
559 std::vector<Reg32> reg32s = {eax, ecx, esi, ebp};
560 dotest("movzx", a, OpR8R32(&Asm::movzbl), hiregs, reg32s);
562 a. movb (al, ah);
563 a. testb (0x1, ah);
564 a. cmpb (ch, dh);
566 expect_asm(a, R"(
567 mov %al, %ah
568 test $0x1, %ah
569 cmp %ch, %dh
570 )");
573 TEST(Asm, RandomJunk) {
574 TestDataBlock db(10 << 24);
575 Asm a { db };
577 a. push (rbp);
578 a. movq (rsp, rbp);
579 a. subq (0x80, rsp);
581 a. movl (0, eax);
582 a. incq (rax);
583 a. storeq (rax, rsp[0x8]);
584 a. loadq (rsp[0x8], rdi);
586 a. pop (rbp);
587 a. ret ();
589 expect_asm(a, R"(
590 pushq %rbp
591 mov %rsp, %rbp
592 sub $0x80, %rsp
593 mov $0x0, %eax
594 inc %rax
595 movq %rax, 0x8(%rsp)
596 movq 0x8(%rsp), %rdi
597 popq %rbp
598 retq )" "\n"); // string concat to avoid space at end of line after retq
601 TEST(Asm, AluBytes) {
602 TestDataBlock db(10 << 24);
603 Asm a { db };
605 #define INSTRS \
606 FROB(cmp) \
607 FROB(add) \
608 FROB(sub) \
609 FROB(and) \
610 FROB(or) \
611 FROB(xor)
613 #define FROB(instr) \
614 a. instr ## b(sil, al); \
615 a. instr ## b(0xf, al); \
616 a. instr ## b(sil, rcx[0x10]); \
617 a. instr ## b(rsp[0x10], sil); \
618 a. instr ## b(rcx[rsi * 8], al); \
619 a. instr ## b(al, rcx[rsi * 8]);
621 INSTRS
623 #undef FROB
625 #define FROB(name) \
626 #name " %sil, %al\n" \
627 #name " $0xf, %al\n" \
628 #name "b %sil, 0x10(%rcx)\n" \
629 #name "b 0x10(%rsp), %sil\n" \
630 #name "b (%rcx,%rsi,8), %al\n" \
631 #name "b %al, (%rcx,%rsi,8)\n"
633 expect_asm(a, "\n" INSTRS "");
635 #undef FROB
636 #undef INSTRS
638 // test is asymmetric.
639 a.clear();
640 a. testb(sil, al);
641 a. testb(0xf, al);
642 a. testb(sil, rcx[0x10]);
643 a. testb(sil, rcx[rsi * 8]);
645 expect_asm(a, R"(
646 test %sil, %al
647 test $0xf, %al
648 testb %sil, 0x10(%rcx)
649 testb %sil, (%rcx,%rsi,8)
650 )");
653 TEST(Asm, CMov) {
654 TestDataBlock db(10 << 24);
655 Asm a { db };
656 a. testq (rax, rax);
657 a. cload_reg64_disp_reg64(CC_Z, rax, 0, rax);
658 a. cload_reg64_disp_reg32(CC_Z, rax, 0, eax);
659 expect_asm(a, R"(
660 test %rax, %rax
661 cmovzq (%rax), %rax
662 cmovzl (%rax), %eax
663 )");
666 TEST(Asm, SimpleLabelTest) {
667 TestDataBlock db(10 << 24);
668 Asm a { db };
670 Label loop;
672 auto loopCallee = [] (int* counter) { ++*counter; };
674 // Function that calls loopCallee N times.
675 auto function = reinterpret_cast<int (*)(int, int*)>(a.frontier());
676 a. push (rbp);
677 a. movq (rsp, rbp);
678 a. push (r15);
679 a. push (r12);
680 a. push (r10);
681 a. push (rbx);
683 a. movl (edi, r12d);
684 a. movq (rsi, r15);
685 a. movl (0, ebx);
687 asm_label(a, loop);
688 a. movq (r15, rdi);
689 a. movq (CodeAddress(static_cast<void (*)(int*)>(loopCallee)), r10);
690 a. call (r10);
691 a. incl (ebx);
692 a. cmpl (ebx, r12d);
693 a. jne (loop);
695 a. pop (rbx);
696 a. pop (r10);
697 a. pop (r12);
698 a. pop (r15);
699 a. pop (rbp);
700 a. ret ();
702 auto test_case = [&] (int n) {
703 int counter = 0;
704 function(n, &counter);
705 EXPECT_EQ(n, counter);
707 for (int i = 1; i < 15; ++i) test_case(i);
708 test_case(51);
709 test_case(127);
712 TEST(Asm, ShiftingWithCl) {
713 TestDataBlock db(10 << 24);
714 Asm a { db };
716 a. shlq(rax);
717 a. shlq(rdx);
718 a. shlq(r8);
719 a. sarq(rbx);
720 a. sarq(rsi);
721 a. sarq(r8);
722 expect_asm(a, R"(
723 shl %cl, %rax
724 shl %cl, %rdx
725 shl %cl, %r8
726 sar %cl, %rbx
727 sar %cl, %rsi
728 sar %cl, %r8
729 )");
732 TEST(Asm, FloatRounding) {
733 if (folly::CpuId().sse41()) {
734 TestDataBlock db(10 << 24);
735 Asm a { db };
737 a. roundsd(RoundDirection::nearest, xmm1, xmm2);
738 a. roundsd(RoundDirection::floor, xmm2, xmm4);
739 a. roundsd(RoundDirection::ceil, xmm8, xmm7);
740 a. roundsd(RoundDirection::truncate, xmm12, xmm9);
742 expect_asm(a, R"(
743 roundsd $0x0, %xmm1, %xmm2
744 roundsd $0x1, %xmm2, %xmm4
745 roundsd $0x2, %xmm8, %xmm7
746 roundsd $0x3, %xmm12, %xmm9
747 )");
751 TEST(Asm, SSEDivision) {
752 TestDataBlock db(10 << 24);
753 Asm a { db };
754 a. divsd(xmm0, xmm1);
755 a. divsd(xmm1, xmm2);
756 a. divsd(xmm2, xmm0);
757 a. divsd(xmm15, xmm0);
758 a. divsd(xmm12, xmm8);
759 expect_asm(a, R"(
760 divsd %xmm0, %xmm1
761 divsd %xmm1, %xmm2
762 divsd %xmm2, %xmm0
763 divsd %xmm15, %xmm0
764 divsd %xmm12, %xmm8
765 )");
768 TEST(Asm, SSESqrt) {
769 TestDataBlock db(10 << 24);
770 Asm a { db };
771 a. sqrtsd(xmm0, xmm1);
772 a. sqrtsd(xmm1, xmm2);
773 a. sqrtsd(xmm2, xmm0);
774 a. sqrtsd(xmm15, xmm0);
775 a. sqrtsd(xmm12, xmm8);
776 expect_asm(a, R"(
777 sqrtsd %xmm0, %xmm1
778 sqrtsd %xmm1, %xmm2
779 sqrtsd %xmm2, %xmm0
780 sqrtsd %xmm15, %xmm0
781 sqrtsd %xmm12, %xmm8
782 )");
785 TEST(Asm, DoubleToIntConv) {
786 TestDataBlock db(10 << 24);
787 Asm a { db };
788 a. cvttsd2siq(xmm0, rax);
789 a. cvttsd2siq(xmm1, rbx);
790 a. cvttsd2siq(xmm2, rcx);
791 a. cvttsd2siq(xmm15, rdx);
792 a. cvttsd2siq(xmm12, r12);
793 expect_asm(a, R"(
794 cvttsd2si %xmm0, %rax
795 cvttsd2si %xmm1, %rbx
796 cvttsd2si %xmm2, %rcx
797 cvttsd2si %xmm15, %rdx
798 cvttsd2si %xmm12, %r12
799 )");
802 TEST(Asm, Baseless) {
803 TestDataBlock db(10 << 24);
804 Asm a { db };
806 a. call (baseless(rax*8 + 0x400));
807 a. call (baseless(rax*8 + 0x40));
808 a. call (baseless(rsi*4 + 0x42));
809 a. call (baseless(rbx*2 + 0x700));
810 expect_asm(a, R"(
811 callq 0x400(,%rax,8)
812 callq 0x40(,%rax,8)
813 callq 0x42(,%rsi,4)
814 callq 0x700(,%rbx,2)
815 )");
818 TEST(Asm, IncDecIndexed) {
819 TestDataBlock db(10 << 24);
820 Asm a { db };
822 a. incw (rax[rdi*2 + 0x10]);
823 a. incw (rax[rsi*4 + 0x15]);
824 a. decw (rbp[r15*2 + 0x12]);
825 a. incl (rax[rdi*2 + 0x10]);
826 a. incl (rax[rsi*4 + 0x15]);
827 a. decl (rbp[r15*2 + 0x12]);
828 a. incq (rax[rdi*2 + 0x10]);
829 a. incq (rax[rsi*4 + 0x15]);
830 a. decq (rbp[r15*2 + 0x12]);
831 expect_asm(a, R"(
832 incw 0x10(%rax,%rdi,2)
833 incw 0x15(%rax,%rsi,4)
834 decw 0x12(%rbp,%r15,2)
835 incl 0x10(%rax,%rdi,2)
836 incl 0x15(%rax,%rsi,4)
837 decl 0x12(%rbp,%r15,2)
838 incq 0x10(%rax,%rdi,2)
839 incq 0x15(%rax,%rsi,4)
840 decq 0x12(%rbp,%r15,2)
841 )");
844 TEST(Asm, Unpcklpd) {
845 TestDataBlock db(256);
846 Asm a { db };
848 Address current = db.frontier();
849 a.unpcklpd(xmm0, xmm1);
850 EXPECT_EQ(0x66, current[0]);
851 EXPECT_EQ(0x0f, current[1]);
852 EXPECT_EQ(0x14, current[2]);
854 current = db.frontier();
855 a.unpcklpd(xmm11, xmm1);
856 EXPECT_EQ(0x66, current[0]);
857 EXPECT_EQ(0x41, current[1]);
858 EXPECT_EQ(0x0f, current[2]);
859 EXPECT_EQ(0x14, current[3]);
861 current = db.frontier();
862 a.unpcklpd(xmm5, xmm13);
863 EXPECT_EQ(0x66, current[0]);
864 EXPECT_EQ(0x44, current[1]);
865 EXPECT_EQ(0x0f, current[2]);
866 EXPECT_EQ(0x14, current[3]);
868 current = db.frontier();
869 a.unpcklpd(xmm11, xmm13);
870 EXPECT_EQ(0x66, current[0]);
871 EXPECT_EQ(0x45, current[1]);
872 EXPECT_EQ(0x0f, current[2]);
873 EXPECT_EQ(0x14, current[3]);
876 TEST(Asm, Ucomisd) {
877 TestDataBlock db(256);
878 Asm a { db };
880 Address current = db.frontier();
881 a.ucomisd(xmm0, xmm1);
882 EXPECT_EQ(0x66, current[0]);
883 EXPECT_EQ(0x0f, current[1]);
884 EXPECT_EQ(0x2e, current[2]);
886 current = db.frontier();
887 a.ucomisd(xmm9, xmm2);
888 EXPECT_EQ(0x66, current[0]);
889 EXPECT_EQ(0x44, current[1]);
890 EXPECT_EQ(0x0f, current[2]);
891 EXPECT_EQ(0x2e, current[3]);
893 current = db.frontier();
894 a.ucomisd(xmm3, xmm12);
895 EXPECT_EQ(0x66, current[0]);
896 EXPECT_EQ(0x41, current[1]);
897 EXPECT_EQ(0x0f, current[2]);
898 EXPECT_EQ(0x2e, current[3]);
900 current = db.frontier();
901 a.ucomisd(xmm11, xmm12);
902 EXPECT_EQ(0x66, current[0]);
903 EXPECT_EQ(0x45, current[1]);
904 EXPECT_EQ(0x0f, current[2]);
905 EXPECT_EQ(0x2e, current[3]);
908 TEST(Asm, Pxor) {
909 TestDataBlock db(256);
910 Asm a { db };
912 Address current = db.frontier();
913 a.pxor(xmm0, xmm1);
914 EXPECT_EQ(0x66, current[0]);
915 EXPECT_EQ(0x0f, current[1]);
916 EXPECT_EQ(0xef, current[2]);
918 current = db.frontier();
919 a.pxor(xmm8, xmm1);
920 EXPECT_EQ(0x66, current[0]);
921 EXPECT_EQ(0x41, current[1]);
922 EXPECT_EQ(0x0f, current[2]);
923 EXPECT_EQ(0xef, current[3]);
925 current = db.frontier();
926 a.pxor(xmm0, xmm15);
927 EXPECT_EQ(0x66, current[0]);
928 EXPECT_EQ(0x44, current[1]);
929 EXPECT_EQ(0x0f, current[2]);
930 EXPECT_EQ(0xef, current[3]);
932 current = db.frontier();
933 a.pxor(xmm11, xmm15);
934 EXPECT_EQ(0x66, current[0]);
935 EXPECT_EQ(0x45, current[1]);
936 EXPECT_EQ(0x0f, current[2]);
937 EXPECT_EQ(0xef, current[3]);
940 TEST(Asm, Psrlq) {
941 TestDataBlock db(256);
942 Asm a { db };
944 Address current = db.frontier();
945 a.psrlq(3, xmm1);
946 EXPECT_EQ(0x66, current[0]);
947 EXPECT_EQ(0x0f, current[1]);
948 EXPECT_EQ(0x73, current[2]);
950 current = db.frontier();
951 a.psrlq(3, xmm9);
952 EXPECT_EQ(0x66, current[0]);
953 EXPECT_EQ(0x41, current[1]);
954 EXPECT_EQ(0x0f, current[2]);
955 EXPECT_EQ(0x73, current[3]);
958 TEST(Asm, Psllq) {
959 TestDataBlock db(256);
960 Asm a { db };
962 Address current = db.frontier();
963 a.psllq(3, xmm1);
964 EXPECT_EQ(0x66, current[0]);
965 EXPECT_EQ(0x0f, current[1]);
966 EXPECT_EQ(0x73, current[2]);
968 current = db.frontier();
969 a.psllq(3, xmm9);
970 EXPECT_EQ(0x66, current[0]);
971 EXPECT_EQ(0x41, current[1]);
972 EXPECT_EQ(0x0f, current[2]);
973 EXPECT_EQ(0x73, current[3]);
976 #if defined(USE_HWCRC) && defined(__SSE4_2__)
977 TEST(Asm, Crc32q) {
978 TestDataBlock db(10 << 24);
979 Asm a { db };
981 a. push (rax);
982 a. push (rbx);
983 a. movq (0x15, rax);
984 a. crc32q (rax, rbx);
985 expect_asm(a, R"(
986 pushq %rax
987 pushq %rbx
988 mov $0x15, %rax
989 crc32 %rax, %rbx
990 )");
992 #endif
996 #endif