[ruby/etc] bump up to 1.3.1
[ruby-80x24.org.git] / yjit_asm.c
blob4cc3a538fa3d7c6763f2663d1d9820ef64cc81f5
1 // This file is a fragment of the yjit.o compilation unit. See yjit.c.
2 //
3 // Note that the definition for some of these functions don't specify
4 // static inline, but their declaration in yjit_asm.h do. The resulting
5 // linkage is the same as if they both specify. The relevant sections in
6 // N1256 is 6.2.2p4, 6.2.2p5, and 6.7.4p5.
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdarg.h>
11 #include <stdint.h>
12 #include <assert.h>
13 #include <errno.h>
15 // For mmapp(), sysconf()
16 #ifndef _WIN32
17 #include <unistd.h>
18 #include <sys/mman.h>
19 #endif
21 #include "yjit_asm.h"
23 // Compute the number of bits needed to encode a signed value
24 uint32_t sig_imm_size(int64_t imm)
26 // Compute the smallest size this immediate fits in
27 if (imm >= INT8_MIN && imm <= INT8_MAX)
28 return 8;
29 if (imm >= INT16_MIN && imm <= INT16_MAX)
30 return 16;
31 if (imm >= INT32_MIN && imm <= INT32_MAX)
32 return 32;
34 return 64;
37 // Compute the number of bits needed to encode an unsigned value
38 uint32_t unsig_imm_size(uint64_t imm)
40 // Compute the smallest size this immediate fits in
41 if (imm <= UINT8_MAX)
42 return 8;
43 else if (imm <= UINT16_MAX)
44 return 16;
45 else if (imm <= UINT32_MAX)
46 return 32;
48 return 64;
51 x86opnd_t mem_opnd(uint32_t num_bits, x86opnd_t base_reg, int32_t disp)
53 bool is_iprel = base_reg.as.reg.reg_type == REG_IP;
55 x86opnd_t opnd = {
56 OPND_MEM,
57 num_bits,
58 .as.mem = { base_reg.as.reg.reg_no, 0, 0, false, is_iprel, disp }
61 return opnd;
64 x86opnd_t mem_opnd_sib(uint32_t num_bits, x86opnd_t base_reg, x86opnd_t index_reg, int32_t scale, int32_t disp)
66 uint8_t scale_exp;
67 switch (scale) {
68 case 8:
69 scale_exp = 3;
70 break;
71 case 4:
72 scale_exp = 2;
73 break;
74 case 2:
75 scale_exp = 1;
76 break;
77 case 1:
78 scale_exp = 0;
79 break;
80 default:
81 rb_bug("yjit: scale not one of 1,2,4,8");
82 break;
85 bool is_iprel = base_reg.as.reg.reg_type == REG_IP;
87 x86opnd_t opnd = {
88 OPND_MEM,
89 num_bits,
90 .as.mem = {
91 .base_reg_no = base_reg.as.reg.reg_no,
92 .idx_reg_no = index_reg.as.reg.reg_no,
93 .has_idx = 1,
94 .scale_exp = scale_exp,
95 .is_iprel = is_iprel,
96 .disp = disp
100 return opnd;
103 static x86opnd_t resize_opnd(x86opnd_t opnd, uint32_t num_bits)
105 assert (num_bits % 8 == 0);
106 x86opnd_t sub = opnd;
107 sub.num_bits = num_bits;
108 return sub;
111 x86opnd_t imm_opnd(int64_t imm)
113 x86opnd_t opnd = {
114 OPND_IMM,
115 sig_imm_size(imm),
116 .as.imm = imm
119 return opnd;
122 x86opnd_t const_ptr_opnd(const void *ptr)
124 x86opnd_t opnd = {
125 OPND_IMM,
127 .as.unsig_imm = (uint64_t)ptr
130 return opnd;
133 // Align the current write position to a multiple of bytes
134 static uint8_t *align_ptr(uint8_t *ptr, uint32_t multiple)
136 // Compute the pointer modulo the given alignment boundary
137 uint32_t rem = ((uint32_t)(uintptr_t)ptr) % multiple;
139 // If the pointer is already aligned, stop
140 if (rem == 0)
141 return ptr;
143 // Pad the pointer by the necessary amount to align it
144 uint32_t pad = multiple - rem;
146 return ptr + pad;
149 // Allocate a block of executable memory
150 static uint8_t *alloc_exec_mem(uint32_t mem_size)
152 uint8_t *mem_block;
154 // On Linux
155 #if defined(MAP_FIXED_NOREPLACE) && defined(_SC_PAGESIZE)
156 // Align the requested address to page size
157 uint32_t page_size = (uint32_t)sysconf(_SC_PAGESIZE);
158 uint8_t *req_addr = align_ptr((uint8_t*)&alloc_exec_mem, page_size);
160 do {
161 // Try to map a chunk of memory as executable
162 mem_block = (uint8_t*)mmap(
163 (void*)req_addr,
164 mem_size,
165 PROT_READ | PROT_EXEC,
166 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE,
171 // If we succeeded, stop
172 if (mem_block != MAP_FAILED) {
173 break;
176 // +4MB
177 req_addr += 4 * 1024 * 1024;
178 } while (req_addr < (uint8_t*)&alloc_exec_mem + INT32_MAX);
180 // On MacOS and other platforms
181 #else
182 // Try to map a chunk of memory as executable
183 mem_block = (uint8_t*)mmap(
184 (void*)alloc_exec_mem,
185 mem_size,
186 PROT_READ | PROT_EXEC,
187 MAP_PRIVATE | MAP_ANONYMOUS,
191 #endif
193 // Fallback
194 if (mem_block == MAP_FAILED) {
195 // Try again without the address hint (e.g., valgrind)
196 mem_block = (uint8_t*)mmap(
197 NULL,
198 mem_size,
199 PROT_READ | PROT_EXEC,
200 MAP_PRIVATE | MAP_ANONYMOUS,
206 // Check that the memory mapping was successful
207 if (mem_block == MAP_FAILED) {
208 perror("mmap call failed");
209 exit(-1);
212 codeblock_t block;
213 codeblock_t *cb = &block;
215 cb_init(cb, mem_block, mem_size);
217 // Fill the executable memory with PUSH DS (0x1E) so that
218 // executing uninitialized memory will fault with #UD in
219 // 64-bit mode.
220 cb_mark_all_writeable(cb);
221 memset(mem_block, 0x1E, mem_size);
222 cb_mark_all_executable(cb);
224 return mem_block;
227 // Initialize a code block object
228 void cb_init(codeblock_t *cb, uint8_t *mem_block, uint32_t mem_size)
230 assert (mem_block);
231 cb->mem_block_ = mem_block;
232 cb->mem_size = mem_size;
233 cb->write_pos = 0;
234 cb->num_labels = 0;
235 cb->num_refs = 0;
236 cb->current_aligned_write_pos = ALIGNED_WRITE_POSITION_NONE;
239 // Set the current write position
240 void cb_set_pos(codeblock_t *cb, uint32_t pos)
242 // Assert here since while assembler functions do bounds checking, there is
243 // nothing stopping users from taking out an out-of-bounds pointer and
244 // doing bad accesses with it.
245 assert (pos < cb->mem_size);
246 cb->write_pos = pos;
249 // Align the current write position to a multiple of bytes
250 void cb_align_pos(codeblock_t *cb, uint32_t multiple)
252 // Compute the pointer modulo the given alignment boundary
253 uint8_t *ptr = cb_get_write_ptr(cb);
254 uint8_t *aligned_ptr = align_ptr(ptr, multiple);
255 const uint32_t write_pos = cb->write_pos;
257 // Pad the pointer by the necessary amount to align it
258 ptrdiff_t pad = aligned_ptr - ptr;
259 cb_set_pos(cb, write_pos + (int32_t)pad);
262 // Set the current write position from a pointer
263 void cb_set_write_ptr(codeblock_t *cb, uint8_t *code_ptr)
265 intptr_t pos = code_ptr - cb->mem_block_;
266 assert (pos < cb->mem_size);
267 cb_set_pos(cb, (uint32_t)pos);
270 // Get a direct pointer into the executable memory block
271 uint8_t *cb_get_ptr(const codeblock_t *cb, uint32_t index)
273 if (index < cb->mem_size) {
274 return &cb->mem_block_[index];
276 else {
277 return NULL;
281 // Get a direct pointer to the current write position
282 uint8_t *cb_get_write_ptr(const codeblock_t *cb)
284 return cb_get_ptr(cb, cb->write_pos);
287 // Write a byte at the current position
288 void cb_write_byte(codeblock_t *cb, uint8_t byte)
290 assert (cb->mem_block_);
291 if (cb->write_pos < cb->mem_size) {
292 cb_mark_position_writeable(cb, cb->write_pos);
293 cb->mem_block_[cb->write_pos] = byte;
294 cb->write_pos++;
296 else {
297 cb->dropped_bytes = true;
301 // Write multiple bytes starting from the current position
302 void cb_write_bytes(codeblock_t *cb, uint32_t num_bytes, ...)
304 va_list va;
305 va_start(va, num_bytes);
307 for (uint32_t i = 0; i < num_bytes; ++i)
309 uint8_t byte = va_arg(va, int);
310 cb_write_byte(cb, byte);
313 va_end(va);
316 // Write a signed integer over a given number of bits at the current position
317 void cb_write_int(codeblock_t *cb, uint64_t val, uint32_t num_bits)
319 assert (num_bits > 0);
320 assert (num_bits % 8 == 0);
322 // Switch on the number of bits
323 switch (num_bits) {
324 case 8:
325 cb_write_byte(cb, (uint8_t)val);
326 break;
328 case 16:
329 cb_write_bytes(
332 (uint8_t)((val >> 0) & 0xFF),
333 (uint8_t)((val >> 8) & 0xFF)
335 break;
337 case 32:
338 cb_write_bytes(
341 (uint8_t)((val >> 0) & 0xFF),
342 (uint8_t)((val >> 8) & 0xFF),
343 (uint8_t)((val >> 16) & 0xFF),
344 (uint8_t)((val >> 24) & 0xFF)
346 break;
348 default:
350 // Compute the size in bytes
351 uint32_t num_bytes = num_bits / 8;
353 // Write out the bytes
354 for (uint32_t i = 0; i < num_bytes; ++i)
356 uint8_t byte_val = (uint8_t)(val & 0xFF);
357 cb_write_byte(cb, byte_val);
358 val >>= 8;
364 // Allocate a new label with a given name
365 uint32_t cb_new_label(codeblock_t *cb, const char *name)
367 //if (hasASM)
368 // writeString(to!string(label) ~ ":");
370 assert (cb->num_labels < MAX_LABELS);
372 // Allocate the new label
373 uint32_t label_idx = cb->num_labels++;
375 // This label doesn't have an address yet
376 cb->label_addrs[label_idx] = 0;
377 cb->label_names[label_idx] = name;
379 return label_idx;
382 // Write a label at the current address
383 void cb_write_label(codeblock_t *cb, uint32_t label_idx)
385 assert (label_idx < MAX_LABELS);
386 cb->label_addrs[label_idx] = cb->write_pos;
389 // Add a label reference at the current write position
390 void cb_label_ref(codeblock_t *cb, uint32_t label_idx)
392 assert (label_idx < MAX_LABELS);
393 assert (cb->num_refs < MAX_LABEL_REFS);
395 // Keep track of the reference
396 cb->label_refs[cb->num_refs] = (labelref_t){ cb->write_pos, label_idx };
397 cb->num_refs++;
400 // Link internal label references
401 void cb_link_labels(codeblock_t *cb)
403 uint32_t orig_pos = cb->write_pos;
405 // For each label reference
406 for (uint32_t i = 0; i < cb->num_refs; ++i)
408 uint32_t ref_pos = cb->label_refs[i].pos;
409 uint32_t label_idx = cb->label_refs[i].label_idx;
410 assert (ref_pos < cb->mem_size);
411 assert (label_idx < MAX_LABELS);
413 uint32_t label_addr = cb->label_addrs[label_idx];
414 assert (label_addr < cb->mem_size);
416 // Compute the offset from the reference's end to the label
417 int64_t offset = (int64_t)label_addr - (int64_t)(ref_pos + 4);
419 cb_set_pos(cb, ref_pos);
420 cb_write_int(cb, offset, 32);
423 cb->write_pos = orig_pos;
425 // Clear the label positions and references
426 cb->num_labels = 0;
427 cb->num_refs = 0;
430 // Check if an operand needs a REX byte to be encoded
431 static bool rex_needed(x86opnd_t opnd)
433 if (opnd.type == OPND_NONE || opnd.type == OPND_IMM)
435 return false;
438 if (opnd.type == OPND_REG)
440 return (
441 opnd.as.reg.reg_no > 7 ||
442 (opnd.num_bits == 8 && opnd.as.reg.reg_no >= 4 && opnd.as.reg.reg_no <= 7)
446 if (opnd.type == OPND_MEM)
448 return (opnd.as.mem.base_reg_no > 7) || (opnd.as.mem.has_idx && opnd.as.mem.idx_reg_no > 7);
451 rb_bug("unreachable");
454 // Check if an SIB byte is needed to encode this operand
455 static bool sib_needed(x86opnd_t opnd)
457 if (opnd.type != OPND_MEM)
458 return false;
460 return (
461 opnd.as.mem.has_idx ||
462 opnd.as.mem.base_reg_no == RSP.as.reg.reg_no ||
463 opnd.as.mem.base_reg_no == R12.as.reg.reg_no
467 // Compute the size of the displacement field needed for a memory operand
468 static uint32_t disp_size(x86opnd_t opnd)
470 assert (opnd.type == OPND_MEM);
472 // If using RIP as the base, use disp32
473 if (opnd.as.mem.is_iprel)
475 return 32;
478 // Compute the required displacement size
479 if (opnd.as.mem.disp != 0)
481 uint32_t num_bits = sig_imm_size(opnd.as.mem.disp);
482 assert (num_bits <= 32 && "displacement does not fit in 32 bits");
484 // x86 can only encode 8-bit and 32-bit displacements
485 if (num_bits == 16)
486 num_bits = 32;;
488 return num_bits;
491 // If EBP or RBP or R13 is used as the base, displacement must be encoded
492 if (opnd.as.mem.base_reg_no == RBP.as.reg.reg_no ||
493 opnd.as.mem.base_reg_no == R13.as.reg.reg_no)
495 return 8;
498 return 0;
501 // Write the REX byte
502 static void cb_write_rex(
503 codeblock_t *cb,
504 bool w_flag,
505 uint8_t reg_no,
506 uint8_t idx_reg_no,
507 uint8_t rm_reg_no
510 // 0 1 0 0 w r x b
511 // w - 64-bit operand size flag
512 // r - MODRM.reg extension
513 // x - SIB.index extension
514 // b - MODRM.rm or SIB.base extension
515 uint8_t w = w_flag? 1:0;
516 uint8_t r = (reg_no & 8)? 1:0;
517 uint8_t x = (idx_reg_no & 8)? 1:0;
518 uint8_t b = (rm_reg_no & 8)? 1:0;
520 // Encode and write the REX byte
521 uint8_t rexByte = 0x40 + (w << 3) + (r << 2) + (x << 1) + (b);
522 cb_write_byte(cb, rexByte);
525 // Write an opcode byte with an embedded register operand
526 static void cb_write_opcode(codeblock_t *cb, uint8_t opcode, x86opnd_t reg)
528 // Write the reg field into the opcode byte
529 uint8_t op_byte = opcode | (reg.as.reg.reg_no & 7);
530 cb_write_byte(cb, op_byte);
533 // Encode an RM instruction
534 static void cb_write_rm(
535 codeblock_t *cb,
536 bool szPref,
537 bool rexW,
538 x86opnd_t r_opnd,
539 x86opnd_t rm_opnd,
540 uint8_t opExt,
541 uint32_t op_len,
542 ...)
544 assert (op_len > 0 && op_len <= 3);
545 assert (r_opnd.type == OPND_REG || r_opnd.type == OPND_NONE);
547 // Flag to indicate the REX prefix is needed
548 bool need_rex = rexW || rex_needed(r_opnd) || rex_needed(rm_opnd);
550 // Flag to indicate SIB byte is needed
551 bool need_sib = sib_needed(r_opnd) || sib_needed(rm_opnd);
553 // Add the operand-size prefix, if needed
554 if (szPref == true)
555 cb_write_byte(cb, 0x66);
557 // Add the REX prefix, if needed
558 if (need_rex)
560 // 0 1 0 0 w r x b
561 // w - 64-bit operand size flag
562 // r - MODRM.reg extension
563 // x - SIB.index extension
564 // b - MODRM.rm or SIB.base extension
566 uint8_t w = rexW? 1:0;
568 uint8_t r;
569 if (r_opnd.type != OPND_NONE)
570 r = (r_opnd.as.reg.reg_no & 8)? 1:0;
571 else
572 r = 0;
574 uint8_t x;
575 if (need_sib && rm_opnd.as.mem.has_idx)
576 x = (rm_opnd.as.mem.idx_reg_no & 8)? 1:0;
577 else
578 x = 0;
580 uint8_t b;
581 if (rm_opnd.type == OPND_REG)
582 b = (rm_opnd.as.reg.reg_no & 8)? 1:0;
583 else if (rm_opnd.type == OPND_MEM)
584 b = (rm_opnd.as.mem.base_reg_no & 8)? 1:0;
585 else
586 b = 0;
588 // Encode and write the REX byte
589 uint8_t rex_byte = 0x40 + (w << 3) + (r << 2) + (x << 1) + (b);
590 cb_write_byte(cb, rex_byte);
593 // Write the opcode bytes to the code block
594 va_list va;
595 va_start(va, op_len);
596 for (uint32_t i = 0; i < op_len; ++i)
598 uint8_t byte = va_arg(va, int);
599 cb_write_byte(cb, byte);
601 va_end(va);
603 // MODRM.mod (2 bits)
604 // MODRM.reg (3 bits)
605 // MODRM.rm (3 bits)
607 assert (
608 !(opExt != 0xFF && r_opnd.type != OPND_NONE) &&
609 "opcode extension and register operand present"
612 // Encode the mod field
613 uint8_t mod;
614 if (rm_opnd.type == OPND_REG)
616 mod = 3;
618 else
620 uint32_t dsize = disp_size(rm_opnd);
621 if (dsize == 0 || rm_opnd.as.mem.is_iprel)
622 mod = 0;
623 else if (dsize == 8)
624 mod = 1;
625 else if (dsize == 32)
626 mod = 2;
627 else
628 rb_bug("unreachable");
631 // Encode the reg field
632 uint8_t reg;
633 if (opExt != 0xFF)
634 reg = opExt;
635 else if (r_opnd.type == OPND_REG)
636 reg = r_opnd.as.reg.reg_no & 7;
637 else
638 reg = 0;
640 // Encode the rm field
641 uint8_t rm;
642 if (rm_opnd.type == OPND_REG)
644 rm = rm_opnd.as.reg.reg_no & 7;
646 else
648 if (need_sib)
649 rm = 4;
650 else
651 rm = rm_opnd.as.mem.base_reg_no & 7;
654 // Encode and write the ModR/M byte
655 uint8_t rm_byte = (mod << 6) + (reg << 3) + (rm);
656 cb_write_byte(cb, rm_byte);
658 // Add the SIB byte, if needed
659 if (need_sib)
661 // SIB.scale (2 bits)
662 // SIB.index (3 bits)
663 // SIB.base (3 bits)
665 assert (rm_opnd.type == OPND_MEM);
667 // Encode the scale value
668 uint8_t scale = rm_opnd.as.mem.scale_exp;
670 // Encode the index value
671 uint8_t index;
672 if (!rm_opnd.as.mem.has_idx)
673 index = 4;
674 else
675 index = rm_opnd.as.mem.idx_reg_no & 7;
677 // Encode the base register
678 uint8_t base = rm_opnd.as.mem.base_reg_no & 7;
680 // Encode and write the SIB byte
681 uint8_t sib_byte = (scale << 6) + (index << 3) + (base);
682 cb_write_byte(cb, sib_byte);
685 // Add the displacement
686 if (rm_opnd.type == OPND_MEM)
688 uint32_t dsize = disp_size(rm_opnd);
689 if (dsize > 0)
690 cb_write_int(cb, rm_opnd.as.mem.disp, dsize);
694 // Encode a mul-like single-operand RM instruction
695 static void write_rm_unary(
696 codeblock_t *cb,
697 const char *mnem,
698 uint8_t opMemReg8,
699 uint8_t opMemRegPref,
700 uint8_t opExt,
701 x86opnd_t opnd)
703 // Write a disassembly string
704 //cb.writeASM(mnem, opnd);
706 // Check the size of opnd0
707 uint32_t opndSize;
708 if (opnd.type == OPND_REG || opnd.type == OPND_MEM)
709 opndSize = opnd.num_bits;
710 else
711 rb_bug("yjit: invalid operand");
713 assert (opndSize == 8 || opndSize == 16 || opndSize == 32 || opndSize == 64);
714 bool szPref = opndSize == 16;
715 bool rexW = opndSize == 64;
717 if (opndSize == 8)
718 cb_write_rm(cb, false, false, NO_OPND, opnd, opExt, 1, opMemReg8);
719 else
720 cb_write_rm(cb, szPref, rexW, NO_OPND, opnd, opExt, 1, opMemRegPref);
723 // Encode an add-like RM instruction with multiple possible encodings
724 static void cb_write_rm_multi(
725 codeblock_t *cb,
726 const char *mnem,
727 uint8_t opMemReg8,
728 uint8_t opMemRegPref,
729 uint8_t opRegMem8,
730 uint8_t opRegMemPref,
731 uint8_t opMemImm8,
732 uint8_t opMemImmSml,
733 uint8_t opMemImmLrg,
734 uint8_t opExtImm,
735 x86opnd_t opnd0,
736 x86opnd_t opnd1)
738 assert (opnd0.type == OPND_REG || opnd0.type == OPND_MEM);
741 // Write disassembly string
742 if (!opnd1.isNone)
743 cb.writeASM(mnem, opnd0, opnd1);
744 else
745 cb.writeASM(mnem, opnd0);
748 // Check the size of opnd0
749 uint32_t opndSize = opnd0.num_bits;
751 // Check the size of opnd1
752 if (opnd1.type == OPND_REG || opnd1.type == OPND_MEM)
754 assert (opnd1.num_bits == opndSize && "operand size mismatch");
756 else if (opnd1.type == OPND_IMM)
758 assert (opnd1.num_bits <= opndSize);
761 assert (opndSize == 8 || opndSize == 16 || opndSize == 32 || opndSize == 64);
762 bool szPref = opndSize == 16;
763 bool rexW = opndSize == 64;
765 // R/M + Reg
766 if ((opnd0.type == OPND_MEM && opnd1.type == OPND_REG) ||
767 (opnd0.type == OPND_REG && opnd1.type == OPND_REG))
769 // R/M is opnd0
770 if (opndSize == 8)
771 cb_write_rm(cb, false, false, opnd1, opnd0, 0xFF, 1, opMemReg8);
772 else
773 cb_write_rm(cb, szPref, rexW, opnd1, opnd0, 0xFF, 1, opMemRegPref);
776 // Reg + R/M
777 else if (opnd0.type == OPND_REG && opnd1.type == OPND_MEM)
779 // R/M is opnd1
780 if (opndSize == 8)
781 cb_write_rm(cb, false, false, opnd0, opnd1, 0xFF, 1, opRegMem8);
782 else
783 cb_write_rm(cb, szPref, rexW, opnd0, opnd1, 0xFF, 1, opRegMemPref);
786 // R/M + Imm
787 else if (opnd1.type == OPND_IMM)
789 // 8-bit immediate
790 if (opnd1.num_bits <= 8)
792 if (opndSize == 8)
793 cb_write_rm(cb, false, false, NO_OPND, opnd0, opExtImm, 1, opMemImm8);
794 else
795 cb_write_rm(cb, szPref, rexW, NO_OPND, opnd0, opExtImm, 1, opMemImmSml);
797 cb_write_int(cb, opnd1.as.imm, 8);
800 // 32-bit immediate
801 else if (opnd1.num_bits <= 32)
803 assert (opnd1.num_bits <= opndSize && "immediate too large for dst");
804 cb_write_rm(cb, szPref, rexW, NO_OPND, opnd0, opExtImm, 1, opMemImmLrg);
805 cb_write_int(cb, opnd1.as.imm, (opndSize > 32)? 32:opndSize);
808 // Immediate too large
809 else
811 assert (false && "immediate value too large");
815 // Invalid operands
816 else
818 assert (false && "invalid operand combination");
822 // Encode a single-operand shift instruction
823 static void cb_write_shift(
824 codeblock_t *cb,
825 const char *mnem,
826 uint8_t opMemOnePref,
827 uint8_t opMemClPref,
828 uint8_t opMemImmPref,
829 uint8_t opExt,
830 x86opnd_t opnd0,
831 x86opnd_t opnd1)
833 // Write a disassembly string
834 //cb.writeASM(mnem, opnd0, opnd1);
836 // Check the size of opnd0
837 uint32_t opndSize;
838 if (opnd0.type == OPND_REG || opnd0.type == OPND_MEM)
839 opndSize = opnd0.num_bits;
840 else
841 rb_bug("yjit: shift: invalid first operand");
843 assert (opndSize == 16 || opndSize == 32 || opndSize == 64);
844 bool szPref = opndSize == 16;
845 bool rexW = opndSize == 64;
847 if (opnd1.type == OPND_IMM)
849 if (opnd1.as.imm == 1)
851 cb_write_rm(cb, szPref, rexW, NO_OPND, opnd0, opExt, 1, opMemOnePref);
853 else
855 assert (opnd1.num_bits <= 8);
856 cb_write_rm(cb, szPref, rexW, NO_OPND, opnd0, opExt, 1, opMemImmPref);
857 cb_write_byte(cb, (uint8_t)opnd1.as.imm);
861 else if (opnd1.isReg && opnd1.reg == CL)
863 cb.writeRMInstr!('l', opExt, opMemClPref)(szPref, rexW, opnd0, X86Opnd.NONE);
866 else
868 assert (false);
872 // Encode a relative jump to a label (direct or conditional)
873 // Note: this always encodes a 32-bit offset
874 static void cb_write_jcc(codeblock_t *cb, const char *mnem, uint8_t op0, uint8_t op1, uint32_t label_idx)
876 //cb.writeASM(mnem, label);
878 // Write the opcode
879 if (op0 != 0xFF)
880 cb_write_byte(cb, op0);
881 cb_write_byte(cb, op1);
883 // Add a reference to the label
884 cb_label_ref(cb, label_idx);
886 // Relative 32-bit offset to be patched
887 cb_write_int(cb, 0, 32);
890 // Encode a relative jump to a pointer at a 32-bit offset (direct or conditional)
891 static void cb_write_jcc_ptr(codeblock_t *cb, const char *mnem, uint8_t op0, uint8_t op1, uint8_t *dst_ptr)
893 //cb.writeASM(mnem, label);
895 // Write the opcode
896 if (op0 != 0xFF)
897 cb_write_byte(cb, op0);
898 cb_write_byte(cb, op1);
900 // Pointer to the end of this jump instruction
901 uint8_t *end_ptr = cb_get_ptr(cb, cb->write_pos + 4);
903 // Compute the jump offset
904 int64_t rel64 = (int64_t)(dst_ptr - end_ptr);
905 if (rel64 >= INT32_MIN && rel64 <= INT32_MAX) {
906 // Write the relative 32-bit jump offset
907 cb_write_int(cb, (int32_t)rel64, 32);
909 else {
910 // Offset doesn't fit in 4 bytes. Report error.
911 cb->dropped_bytes = true;
915 // Encode a conditional move instruction
916 static void cb_write_cmov(codeblock_t *cb, const char *mnem, uint8_t opcode1, x86opnd_t dst, x86opnd_t src)
918 //cb.writeASM(mnem, dst, src);
920 assert (dst.type == OPND_REG);
921 assert (src.type == OPND_REG || src.type == OPND_MEM);
922 assert (dst.num_bits >= 16 && "invalid dst reg size in cmov");
924 bool szPref = dst.num_bits == 16;
925 bool rexW = dst.num_bits == 64;
927 cb_write_rm(cb, szPref, rexW, dst, src, 0xFF, 2, 0x0F, opcode1);
930 // add - Integer addition
931 void add(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1)
933 cb_write_rm_multi(
935 "add",
936 0x00, // opMemReg8
937 0x01, // opMemRegPref
938 0x02, // opRegMem8
939 0x03, // opRegMemPref
940 0x80, // opMemImm8
941 0x83, // opMemImmSml
942 0x81, // opMemImmLrg
943 0x00, // opExtImm
944 opnd0,
945 opnd1
949 /// and - Bitwise AND
950 void and(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1)
952 cb_write_rm_multi(
954 "and",
955 0x20, // opMemReg8
956 0x21, // opMemRegPref
957 0x22, // opRegMem8
958 0x23, // opRegMemPref
959 0x80, // opMemImm8
960 0x83, // opMemImmSml
961 0x81, // opMemImmLrg
962 0x04, // opExtImm
963 opnd0,
964 opnd1
968 // call - Call to a pointer with a 32-bit displacement offset
969 static void call_rel32(codeblock_t *cb, int32_t rel32)
971 //cb.writeASM("call", rel32);
973 // Write the opcode
974 cb_write_byte(cb, 0xE8);
976 // Write the relative 32-bit jump offset
977 cb_write_int(cb, (int32_t)rel32, 32);
980 // call - Call a pointer, encode with a 32-bit offset if possible
981 void call_ptr(codeblock_t *cb, x86opnd_t scratch_reg, uint8_t *dst_ptr)
983 assert (scratch_reg.type == OPND_REG);
985 // Pointer to the end of this call instruction
986 uint8_t *end_ptr = cb_get_ptr(cb, cb->write_pos + 5);
988 // Compute the jump offset
989 int64_t rel64 = (int64_t)(dst_ptr - end_ptr);
991 // If the offset fits in 32-bit
992 if (rel64 >= INT32_MIN && rel64 <= INT32_MAX) {
993 call_rel32(cb, (int32_t)rel64);
994 return;
997 // Move the pointer into the scratch register and call
998 mov(cb, scratch_reg, const_ptr_opnd(dst_ptr));
999 call(cb, scratch_reg);
1002 /// call - Call to label with 32-bit offset
1003 void call_label(codeblock_t *cb, uint32_t label_idx)
1005 //cb.writeASM("call", label);
1007 // Write the opcode
1008 cb_write_byte(cb, 0xE8);
1010 // Add a reference to the label
1011 cb_label_ref(cb, label_idx);
1013 // Relative 32-bit offset to be patched
1014 cb_write_int(cb, 0, 32);
1017 /// call - Indirect call with an R/M operand
1018 void call(codeblock_t *cb, x86opnd_t opnd)
1020 //cb.writeASM("call", opnd);
1021 cb_write_rm(cb, false, false, NO_OPND, opnd, 2, 1, 0xFF);
1024 /// cmovcc - Conditional move
1025 void cmova(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmova", 0x47, dst, src); }
1026 void cmovae(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovae", 0x43, dst, src); }
1027 void cmovb(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovb", 0x42, dst, src); }
1028 void cmovbe(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovbe", 0x46, dst, src); }
1029 void cmovc(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovc", 0x42, dst, src); }
1030 void cmove(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmove", 0x44, dst, src); }
1031 void cmovg(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovg", 0x4F, dst, src); }
1032 void cmovge(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovge", 0x4D, dst, src); }
1033 void cmovl(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovl", 0x4C, dst, src); }
1034 void cmovle(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovle", 0x4E, dst, src); }
1035 void cmovna(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovna", 0x46, dst, src); }
1036 void cmovnae(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovnae", 0x42, dst, src); }
1037 void cmovnb(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovnb", 0x43, dst, src); }
1038 void cmovnbe(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovnbe", 0x47, dst, src); }
1039 void cmovnc(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovnc", 0x43, dst, src); }
1040 void cmovne(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovne", 0x45, dst, src); }
1041 void cmovng(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovng", 0x4E, dst, src); }
1042 void cmovnge(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovnge", 0x4C, dst, src); }
1043 void cmovnl(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovnl" , 0x4D, dst, src); }
1044 void cmovnle(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovnle", 0x4F, dst, src); }
1045 void cmovno(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovno", 0x41, dst, src); }
1046 void cmovnp(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovnp", 0x4B, dst, src); }
1047 void cmovns(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovns", 0x49, dst, src); }
1048 void cmovnz(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovnz", 0x45, dst, src); }
1049 void cmovo(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovo", 0x40, dst, src); }
1050 void cmovp(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovp", 0x4A, dst, src); }
1051 void cmovpe(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovpe", 0x4A, dst, src); }
1052 void cmovpo(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovpo", 0x4B, dst, src); }
1053 void cmovs(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovs", 0x48, dst, src); }
1054 void cmovz(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovz", 0x44, dst, src); }
1056 /// cmp - Compare and set flags
1057 void cmp(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1)
1059 cb_write_rm_multi(
1061 "cmp",
1062 0x38, // opMemReg8
1063 0x39, // opMemRegPref
1064 0x3A, // opRegMem8
1065 0x3B, // opRegMemPref
1066 0x80, // opMemImm8
1067 0x83, // opMemImmSml
1068 0x81, // opMemImmLrg
1069 0x07, // opExtImm
1070 opnd0,
1071 opnd1
1075 /// cdq - Convert doubleword to quadword
1076 void cdq(codeblock_t *cb)
1078 //cb.writeASM("cdq");
1079 cb_write_byte(cb, 0x99);
1082 /// cqo - Convert quadword to octaword
1083 void cqo(codeblock_t *cb)
1085 //cb.writeASM("cqo");
1086 cb_write_bytes(cb, 2, 0x48, 0x99);
1089 /// Interrupt 3 - trap to debugger
1090 void int3(codeblock_t *cb)
1092 //cb.writeASM("INT 3");
1093 cb_write_byte(cb, 0xCC);
1097 // div - Unsigned integer division
1098 alias div = writeRMUnary!(
1099 "div",
1100 0xF6, // opMemReg8
1101 0xF7, // opMemRegPref
1102 0x06 // opExt
1107 /// divsd - Divide scalar double
1108 alias divsd = writeXMM64!(
1109 "divsd",
1110 0xF2, // prefix
1111 0x0F, // opRegMem0
1112 0x5E // opRegMem1
1117 // idiv - Signed integer division
1118 alias idiv = writeRMUnary!(
1119 "idiv",
1120 0xF6, // opMemReg8
1121 0xF7, // opMemRegPref
1122 0x07 // opExt
1127 /// imul - Signed integer multiplication with two operands
1128 void imul(CodeBlock cb, X86Opnd opnd0, X86Opnd opnd1)
1130 cb.writeASM("imul", opnd0, opnd1);
1132 assert (opnd0.isReg, "invalid first operand");
1133 auto opndSize = opnd0.reg.size;
1135 // Check the size of opnd1
1136 if (opnd1.isReg)
1137 assert (opnd1.reg.size is opndSize, "operand size mismatch");
1138 else if (opnd1.isMem)
1139 assert (opnd1.mem.size is opndSize, "operand size mismatch");
1141 assert (opndSize is 16 || opndSize is 32 || opndSize is 64);
1142 auto szPref = opndSize is 16;
1143 auto rexW = opndSize is 64;
1145 cb.writeRMInstr!('r', 0xFF, 0x0F, 0xAF)(szPref, rexW, opnd0, opnd1);
1150 /// imul - Signed integer multiplication with three operands (one immediate)
1151 void imul(CodeBlock cb, X86Opnd opnd0, X86Opnd opnd1, X86Opnd opnd2)
1153 cb.writeASM("imul", opnd0, opnd1, opnd2);
1155 assert (opnd0.isReg, "invalid first operand");
1156 auto opndSize = opnd0.reg.size;
1158 // Check the size of opnd1
1159 if (opnd1.isReg)
1160 assert (opnd1.reg.size is opndSize, "operand size mismatch");
1161 else if (opnd1.isMem)
1162 assert (opnd1.mem.size is opndSize, "operand size mismatch");
1164 assert (opndSize is 16 || opndSize is 32 || opndSize is 64);
1165 auto szPref = opndSize is 16;
1166 auto rexW = opndSize is 64;
1168 assert (opnd2.isImm, "invalid third operand");
1169 auto imm = opnd2.imm;
1171 // 8-bit immediate
1172 if (imm.immSize <= 8)
1174 cb.writeRMInstr!('r', 0xFF, 0x6B)(szPref, rexW, opnd0, opnd1);
1175 cb.writeInt(imm.imm, 8);
1178 // 32-bit immediate
1179 else if (imm.immSize <= 32)
1181 assert (imm.immSize <= opndSize, "immediate too large for dst");
1182 cb.writeRMInstr!('r', 0xFF, 0x69)(szPref, rexW, opnd0, opnd1);
1183 cb.writeInt(imm.imm, min(opndSize, 32));
1186 // Immediate too large
1187 else
1189 assert (false, "immediate value too large");
1194 /// jcc - relative jumps to a label
1195 void ja_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "ja" , 0x0F, 0x87, label_idx); }
1196 void jae_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jae" , 0x0F, 0x83, label_idx); }
1197 void jb_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jb" , 0x0F, 0x82, label_idx); }
1198 void jbe_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jbe" , 0x0F, 0x86, label_idx); }
1199 void jc_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jc" , 0x0F, 0x82, label_idx); }
1200 void je_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "je" , 0x0F, 0x84, label_idx); }
1201 void jg_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jg" , 0x0F, 0x8F, label_idx); }
1202 void jge_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jge" , 0x0F, 0x8D, label_idx); }
1203 void jl_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jl" , 0x0F, 0x8C, label_idx); }
1204 void jle_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jle" , 0x0F, 0x8E, label_idx); }
1205 void jna_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jna" , 0x0F, 0x86, label_idx); }
1206 void jnae_label(codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jnae", 0x0F, 0x82, label_idx); }
1207 void jnb_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jnb" , 0x0F, 0x83, label_idx); }
1208 void jnbe_label(codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jnbe", 0x0F, 0x87, label_idx); }
1209 void jnc_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jnc" , 0x0F, 0x83, label_idx); }
1210 void jne_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jne" , 0x0F, 0x85, label_idx); }
1211 void jng_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jng" , 0x0F, 0x8E, label_idx); }
1212 void jnge_label(codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jnge", 0x0F, 0x8C, label_idx); }
1213 void jnl_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jnl" , 0x0F, 0x8D, label_idx); }
1214 void jnle_label(codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jnle", 0x0F, 0x8F, label_idx); }
1215 void jno_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jno" , 0x0F, 0x81, label_idx); }
1216 void jnp_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jnp" , 0x0F, 0x8b, label_idx); }
1217 void jns_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jns" , 0x0F, 0x89, label_idx); }
1218 void jnz_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jnz" , 0x0F, 0x85, label_idx); }
1219 void jo_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jo" , 0x0F, 0x80, label_idx); }
1220 void jp_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jp" , 0x0F, 0x8A, label_idx); }
1221 void jpe_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jpe" , 0x0F, 0x8A, label_idx); }
1222 void jpo_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jpo" , 0x0F, 0x8B, label_idx); }
1223 void js_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "js" , 0x0F, 0x88, label_idx); }
1224 void jz_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jz" , 0x0F, 0x84, label_idx); }
1225 void jmp_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jmp" , 0xFF, 0xE9, label_idx); }
1227 /// jcc - relative jumps to a pointer (32-bit offset)
1228 void ja_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "ja" , 0x0F, 0x87, ptr); }
1229 void jae_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jae" , 0x0F, 0x83, ptr); }
1230 void jb_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jb" , 0x0F, 0x82, ptr); }
1231 void jbe_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jbe" , 0x0F, 0x86, ptr); }
1232 void jc_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jc" , 0x0F, 0x82, ptr); }
1233 void je_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "je" , 0x0F, 0x84, ptr); }
1234 void jg_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jg" , 0x0F, 0x8F, ptr); }
1235 void jge_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jge" , 0x0F, 0x8D, ptr); }
1236 void jl_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jl" , 0x0F, 0x8C, ptr); }
1237 void jle_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jle" , 0x0F, 0x8E, ptr); }
1238 void jna_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jna" , 0x0F, 0x86, ptr); }
1239 void jnae_ptr(codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jnae", 0x0F, 0x82, ptr); }
1240 void jnb_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jnb" , 0x0F, 0x83, ptr); }
1241 void jnbe_ptr(codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jnbe", 0x0F, 0x87, ptr); }
1242 void jnc_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jnc" , 0x0F, 0x83, ptr); }
1243 void jne_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jne" , 0x0F, 0x85, ptr); }
1244 void jng_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jng" , 0x0F, 0x8E, ptr); }
1245 void jnge_ptr(codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jnge", 0x0F, 0x8C, ptr); }
1246 void jnl_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jnl" , 0x0F, 0x8D, ptr); }
1247 void jnle_ptr(codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jnle", 0x0F, 0x8F, ptr); }
1248 void jno_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jno" , 0x0F, 0x81, ptr); }
1249 void jnp_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jnp" , 0x0F, 0x8b, ptr); }
1250 void jns_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jns" , 0x0F, 0x89, ptr); }
1251 void jnz_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jnz" , 0x0F, 0x85, ptr); }
1252 void jo_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jo" , 0x0F, 0x80, ptr); }
1253 void jp_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jp" , 0x0F, 0x8A, ptr); }
1254 void jpe_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jpe" , 0x0F, 0x8A, ptr); }
1255 void jpo_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jpo" , 0x0F, 0x8B, ptr); }
1256 void js_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "js" , 0x0F, 0x88, ptr); }
1257 void jz_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jz" , 0x0F, 0x84, ptr); }
1258 void jmp_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jmp" , 0xFF, 0xE9, ptr); }
1260 /// jmp - Indirect jump near to an R/M operand
1261 void jmp_rm(codeblock_t *cb, x86opnd_t opnd)
1263 //cb.writeASM("jmp", opnd);
1264 cb_write_rm(cb, false, false, NO_OPND, opnd, 4, 1, 0xFF);
1267 // jmp - Jump with relative 32-bit offset
1268 void jmp32(codeblock_t *cb, int32_t offset)
1270 //cb.writeASM("jmp", ((offset > 0)? "+":"-") ~ to!string(offset));
1271 cb_write_byte(cb, 0xE9);
1272 cb_write_int(cb, offset, 32);
1275 /// lea - Load Effective Address
1276 void lea(codeblock_t *cb, x86opnd_t dst, x86opnd_t src)
1278 //cb.writeASM("lea", dst, src);
1279 assert (dst.num_bits == 64);
1280 cb_write_rm(cb, false, true, dst, src, 0xFF, 1, 0x8D);
1283 // Does this number fit in 32 bits and stays the same if you zero extend it to 64 bit?
1284 // If the sign bit is clear, sign extension and zero extension yield the same
1285 // result.
1286 static bool
1287 zero_extendable_32bit(uint64_t number)
1289 return number <= UINT32_MAX && (number & (1ull << 31ull)) == 0;
1292 /// mov - Data move operation
1293 void mov(codeblock_t *cb, x86opnd_t dst, x86opnd_t src)
1295 // R/M + Imm
1296 if (src.type == OPND_IMM)
1298 //cb.writeASM("mov", dst, src);
1300 // R + Imm
1301 if (dst.type == OPND_REG)
1303 assert (
1304 src.num_bits <= dst.num_bits ||
1305 unsig_imm_size(src.as.imm) <= dst.num_bits
1308 // In case the source immediate could be zero extended to be 64
1309 // bit, we can use the 32-bit operands version of the instruction.
1310 // For example, we can turn mov(rax, 0x34) into the equivalent
1311 // mov(eax, 0x34).
1312 if (dst.num_bits == 64 && zero_extendable_32bit(src.as.unsig_imm)) {
1313 if (rex_needed(dst))
1314 cb_write_rex(cb, false, 0, 0, dst.as.reg.reg_no);
1315 cb_write_opcode(cb, 0xB8, dst);
1316 cb_write_int(cb, src.as.imm, 32);
1318 else {
1319 if (dst.num_bits == 16)
1320 cb_write_byte(cb, 0x66);
1322 if (rex_needed(dst) || dst.num_bits == 64)
1323 cb_write_rex(cb, dst.num_bits == 64, 0, 0, dst.as.reg.reg_no);
1325 cb_write_opcode(cb, (dst.num_bits == 8)? 0xB0:0xB8, dst);
1327 cb_write_int(cb, src.as.imm, dst.num_bits);
1331 // M + Imm
1332 else if (dst.type == OPND_MEM)
1334 assert (src.num_bits <= dst.num_bits);
1336 if (dst.num_bits == 8)
1337 cb_write_rm(cb, false, false, NO_OPND, dst, 0xFF, 1, 0xC6);
1338 else
1339 cb_write_rm(cb, dst.num_bits == 16, dst.num_bits == 64, NO_OPND, dst, 0, 1, 0xC7);
1341 const uint32_t output_num_bits = (dst.num_bits > 32u) ? 32u : dst.num_bits;
1342 // assert that we can write whole immediate without loss of information
1343 assert (sig_imm_size(src.as.imm) <= output_num_bits);
1344 cb_write_int(cb, src.as.imm, output_num_bits);
1347 else
1349 assert (false);
1352 else
1354 cb_write_rm_multi(
1356 "mov",
1357 0x88, // opMemReg8
1358 0x89, // opMemRegPref
1359 0x8A, // opRegMem8
1360 0x8B, // opRegMemPref
1361 0xC6, // opMemImm8
1362 0xFF, // opMemImmSml (not available)
1363 0xFF, // opMemImmLrg
1364 0xFF, // opExtImm
1365 dst,
1371 /// movsx - Move with sign extension (signed integers)
1372 void movsx(codeblock_t *cb, x86opnd_t dst, x86opnd_t src)
1374 assert (dst.type == OPND_REG);
1375 assert (src.type == OPND_REG || src.type == OPND_MEM);
1376 assert (src.num_bits < dst.num_bits);
1378 //cb.writeASM("movsx", dst, src);
1380 if (src.num_bits == 8)
1382 cb_write_rm(cb, dst.num_bits == 16, dst.num_bits == 64, dst, src, 0xFF, 2, 0x0F, 0xBE);
1384 else if (src.num_bits == 16)
1386 cb_write_rm(cb, dst.num_bits == 16, dst.num_bits == 64, dst, src, 0xFF, 2, 0x0F, 0xBF);
1388 else if (src.num_bits == 32)
1390 cb_write_rm(cb, false, true, dst, src, 0xFF, 1, 0x63);
1392 else
1394 assert (false);
1399 /// movzx - Move with zero extension (unsigned values)
1400 void movzx(codeblock_t *cb, x86opnd_t dst, x86opnd_t src)
1402 cb.writeASM("movzx", dst, src);
1404 uint32_t dstSize;
1405 if (dst.isReg)
1406 dstSize = dst.reg.size;
1407 else
1408 assert (false, "movzx dst must be a register");
1410 uint32_t srcSize;
1411 if (src.isReg)
1412 srcSize = src.reg.size;
1413 else if (src.isMem)
1414 srcSize = src.mem.size;
1415 else
1416 assert (false);
1418 assert (
1419 srcSize < dstSize,
1420 "movzx: srcSize >= dstSize"
1423 if (srcSize is 8)
1425 cb.writeRMInstr!('r', 0xFF, 0x0F, 0xB6)(dstSize is 16, dstSize is 64, dst, src);
1427 else if (srcSize is 16)
1429 cb.writeRMInstr!('r', 0xFF, 0x0F, 0xB7)(dstSize is 16, dstSize is 64, dst, src);
1431 else
1433 assert (false, "invalid src operand size for movxz");
1438 // neg - Integer negation (multiplication by -1)
1439 void neg(codeblock_t *cb, x86opnd_t opnd)
1441 write_rm_unary(
1443 "neg",
1444 0xF6, // opMemReg8
1445 0xF7, // opMemRegPref
1446 0x03, // opExt
1447 opnd
1451 // nop - Noop, one or multiple bytes long
1452 void nop(codeblock_t *cb, uint32_t length)
1454 switch (length) {
1455 case 0:
1456 break;
1458 case 1:
1459 //cb.writeASM("nop1");
1460 cb_write_byte(cb, 0x90);
1461 break;
1463 case 2:
1464 //cb.writeASM("nop2");
1465 cb_write_bytes(cb, 2, 0x66,0x90);
1466 break;
1468 case 3:
1469 //cb.writeASM("nop3");
1470 cb_write_bytes(cb, 3, 0x0F,0x1F,0x00);
1471 break;
1473 case 4:
1474 //cb.writeASM("nop4");
1475 cb_write_bytes(cb, 4, 0x0F,0x1F,0x40,0x00);
1476 break;
1478 case 5:
1479 //cb.writeASM("nop5");
1480 cb_write_bytes(cb, 5, 0x0F,0x1F,0x44,0x00,0x00);
1481 break;
1483 case 6:
1484 //cb.writeASM("nop6");
1485 cb_write_bytes(cb, 6, 0x66,0x0F,0x1F,0x44,0x00,0x00);
1486 break;
1488 case 7:
1489 //cb.writeASM("nop7");
1490 cb_write_bytes(cb, 7, 0x0F,0x1F,0x80,0x00,0x00,0x00,0x00);
1491 break;
1493 case 8:
1494 //cb.writeASM("nop8");
1495 cb_write_bytes(cb, 8, 0x0F,0x1F,0x84,0x00,0x00,0x00,0x00,0x00);
1496 break;
1498 case 9:
1499 //cb.writeASM("nop9");
1500 cb_write_bytes(cb, 9, 0x66,0x0F,0x1F,0x84,0x00,0x00,0x00,0x00,0x00);
1501 break;
1503 default:
1505 uint32_t written = 0;
1506 while (written + 9 <= length)
1508 nop(cb, 9);
1509 written += 9;
1511 nop(cb, length - written);
1513 break;
1517 // not - Bitwise NOT
1518 void not(codeblock_t *cb, x86opnd_t opnd)
1520 write_rm_unary(
1522 "not",
1523 0xF6, // opMemReg8
1524 0xF7, // opMemRegPref
1525 0x02, // opExt
1526 opnd
1530 /// or - Bitwise OR
1531 void or(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1)
1533 cb_write_rm_multi(
1535 "or",
1536 0x08, // opMemReg8
1537 0x09, // opMemRegPref
1538 0x0A, // opRegMem8
1539 0x0B, // opRegMemPref
1540 0x80, // opMemImm8
1541 0x83, // opMemImmSml
1542 0x81, // opMemImmLrg
1543 0x01, // opExtImm
1544 opnd0,
1545 opnd1
1549 /// pop - Pop a register off the stack
1550 void pop(codeblock_t *cb, x86opnd_t opnd)
1552 assert (opnd.num_bits == 64);
1554 //cb.writeASM("pop", opnd);
1556 if (opnd.type == OPND_REG) {
1557 if (rex_needed(opnd))
1558 cb_write_rex(cb, false, 0, 0, opnd.as.reg.reg_no);
1559 cb_write_opcode(cb, 0x58, opnd);
1561 else if (opnd.type == OPND_MEM) {
1562 cb_write_rm(cb, false, false, NO_OPND, opnd, 0, 1, 0x8F);
1564 else {
1565 assert(false && "unexpected operand type");
1569 /// popfq - Pop the flags register (64-bit)
1570 void popfq(codeblock_t *cb)
1572 //cb.writeASM("popfq");
1574 // REX.W + 0x9D
1575 cb_write_bytes(cb, 2, 0x48, 0x9D);
1578 /// push - Push an operand on the stack
1579 void push(codeblock_t *cb, x86opnd_t opnd)
1581 assert (opnd.num_bits == 64);
1583 //cb.writeASM("push", opnd);
1585 if (opnd.type == OPND_REG) {
1586 if (rex_needed(opnd))
1587 cb_write_rex(cb, false, 0, 0, opnd.as.reg.reg_no);
1588 cb_write_opcode(cb, 0x50, opnd);
1590 else if (opnd.type == OPND_MEM) {
1591 cb_write_rm(cb, false, false, NO_OPND, opnd, 6, 1, 0xFF);
1593 else {
1594 assert(false && "unexpected operand type");
1598 /// pushfq - Push the flags register (64-bit)
1599 void pushfq(codeblock_t *cb)
1601 //cb.writeASM("pushfq");
1602 cb_write_byte(cb, 0x9C);
1605 /// ret - Return from call, popping only the return address
1606 void ret(codeblock_t *cb)
1608 //cb.writeASM("ret");
1609 cb_write_byte(cb, 0xC3);
1612 // sal - Shift arithmetic left
1613 void sal(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1)
1615 cb_write_shift(
1617 "sal",
1618 0xD1, // opMemOnePref,
1619 0xD3, // opMemClPref,
1620 0xC1, // opMemImmPref,
1621 0x04,
1622 opnd0,
1623 opnd1
1627 /// sar - Shift arithmetic right (signed)
1628 void sar(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1)
1630 cb_write_shift(
1632 "sar",
1633 0xD1, // opMemOnePref,
1634 0xD3, // opMemClPref,
1635 0xC1, // opMemImmPref,
1636 0x07,
1637 opnd0,
1638 opnd1
1641 // shl - Shift logical left
1642 void shl(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1)
1644 cb_write_shift(
1646 "shl",
1647 0xD1, // opMemOnePref,
1648 0xD3, // opMemClPref,
1649 0xC1, // opMemImmPref,
1650 0x04,
1651 opnd0,
1652 opnd1
1656 /// shr - Shift logical right (unsigned)
1657 void shr(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1)
1659 cb_write_shift(
1661 "shr",
1662 0xD1, // opMemOnePref,
1663 0xD3, // opMemClPref,
1664 0xC1, // opMemImmPref,
1665 0x05,
1666 opnd0,
1667 opnd1
1671 /// sub - Integer subtraction
1672 void sub(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1)
1674 cb_write_rm_multi(
1676 "sub",
1677 0x28, // opMemReg8
1678 0x29, // opMemRegPref
1679 0x2A, // opRegMem8
1680 0x2B, // opRegMemPref
1681 0x80, // opMemImm8
1682 0x83, // opMemImmSml
1683 0x81, // opMemImmLrg
1684 0x05, // opExtImm
1685 opnd0,
1686 opnd1
1690 /// test - Logical Compare
1691 void test(codeblock_t *cb, x86opnd_t rm_opnd, x86opnd_t test_opnd)
1693 assert (rm_opnd.type == OPND_REG || rm_opnd.type == OPND_MEM);
1694 assert (test_opnd.type == OPND_REG || test_opnd.type == OPND_IMM);
1696 // If the second operand is an immediate
1697 if (test_opnd.type == OPND_IMM)
1699 x86opnd_t imm_opnd = test_opnd;
1701 if (imm_opnd.as.imm >= 0)
1703 assert (unsig_imm_size(imm_opnd.as.unsig_imm) <= 32);
1704 assert (unsig_imm_size(imm_opnd.as.unsig_imm) <= rm_opnd.num_bits);
1706 // Use the smallest operand size possible
1707 rm_opnd = resize_opnd(rm_opnd, unsig_imm_size(imm_opnd.as.unsig_imm));
1709 if (rm_opnd.num_bits == 8)
1711 cb_write_rm(cb, false, false, NO_OPND, rm_opnd, 0x00, 1, 0xF6);
1712 cb_write_int(cb, imm_opnd.as.imm, rm_opnd.num_bits);
1714 else
1716 cb_write_rm(cb, rm_opnd.num_bits == 16, false, NO_OPND, rm_opnd, 0x00, 1, 0xF7);
1717 cb_write_int(cb, imm_opnd.as.imm, rm_opnd.num_bits);
1720 else
1722 // This mode only applies to 64-bit R/M operands with 32-bit signed immediates
1723 assert (imm_opnd.as.imm < 0);
1724 assert (sig_imm_size(imm_opnd.as.imm) <= 32);
1725 assert (rm_opnd.num_bits == 64);
1726 cb_write_rm(cb, false, true, NO_OPND, rm_opnd, 0x00, 1, 0xF7);
1727 cb_write_int(cb, imm_opnd.as.imm, 32);
1730 else
1732 assert (test_opnd.num_bits == rm_opnd.num_bits);
1734 if (rm_opnd.num_bits == 8)
1736 cb_write_rm(cb, false, false, test_opnd, rm_opnd, 0xFF, 1, 0x84);
1738 else
1740 cb_write_rm(cb, rm_opnd.num_bits == 16, rm_opnd.num_bits == 64, test_opnd, rm_opnd, 0xFF, 1, 0x85);
1745 /// Undefined opcode
1746 void ud2(codeblock_t *cb)
1748 cb_write_bytes(cb, 2, 0x0F, 0x0B);
1751 /// xchg - Exchange Register/Memory with Register
1752 void xchg(codeblock_t *cb, x86opnd_t rm_opnd, x86opnd_t r_opnd)
1754 assert (rm_opnd.num_bits == 64);
1755 assert (r_opnd.num_bits == 64);
1756 assert (rm_opnd.type == OPND_REG);
1757 assert (r_opnd.type == OPND_REG);
1759 // If we're exchanging with RAX
1760 if (rm_opnd.type == OPND_REG && rm_opnd.as.reg.reg_no == RAX.as.reg.reg_no)
1762 // Write the REX byte
1763 cb_write_rex(cb, rm_opnd.num_bits == 64, 0, 0, r_opnd.as.reg.reg_no);
1765 // Write the opcode and register number
1766 cb_write_byte(cb, 0x90 + (r_opnd.as.reg.reg_no & 7));
1768 else
1770 cb_write_rm(cb, rm_opnd.num_bits == 16, rm_opnd.num_bits == 64, r_opnd, rm_opnd, 0xFF, 1, 0x87);
1774 /// xor - Exclusive bitwise OR
1775 void xor(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1)
1777 cb_write_rm_multi(
1779 "xor",
1780 0x30, // opMemReg8
1781 0x31, // opMemRegPref
1782 0x32, // opRegMem8
1783 0x33, // opRegMemPref
1784 0x80, // opMemImm8
1785 0x83, // opMemImmSml
1786 0x81, // opMemImmLrg
1787 0x06, // opExtImm
1788 opnd0,
1789 opnd1
1793 // LOCK - lock prefix for atomic shared memory operations
1794 void cb_write_lock_prefix(codeblock_t *cb)
1796 cb_write_byte(cb, 0xF0);
1799 void cb_mark_all_writeable(codeblock_t * cb)
1801 if (mprotect(cb->mem_block_, cb->mem_size, PROT_READ | PROT_WRITE)) {
1802 fprintf(stderr, "Couldn't make JIT page (%p) writeable, errno: %s", (void *)cb->mem_block_, strerror(errno));
1803 abort();
1807 void cb_mark_position_writeable(codeblock_t * cb, uint32_t write_pos)
1809 #ifdef _WIN32
1810 uint32_t pagesize = 0x1000; // 4KB
1811 #else
1812 uint32_t pagesize = (uint32_t)sysconf(_SC_PAGESIZE);
1813 #endif
1814 uint32_t aligned_position = (write_pos / pagesize) * pagesize;
1816 if (cb->current_aligned_write_pos != aligned_position) {
1817 cb->current_aligned_write_pos = aligned_position;
1818 void *const page_addr = cb_get_ptr(cb, aligned_position);
1819 if (mprotect(page_addr, pagesize, PROT_READ | PROT_WRITE)) {
1820 fprintf(stderr, "Couldn't make JIT page (%p) writeable, errno: %s", page_addr, strerror(errno));
1821 abort();
1826 void cb_mark_all_executable(codeblock_t * cb)
1828 cb->current_aligned_write_pos = ALIGNED_WRITE_POSITION_NONE;
1829 if (mprotect(cb->mem_block_, cb->mem_size, PROT_READ | PROT_EXEC)) {
1830 fprintf(stderr, "Couldn't make JIT page (%p) executable, errno: %s", (void *)cb->mem_block_, strerror(errno));
1831 abort();