Remove some prints and add speed test for rescalers
[jpcrr.git] / streamtools / dynamic / dynamic.cpp
blob915ff8108af5d163ecda966790087b3d0704fca9
1 #include "dynamic/dynamic.hpp"
2 #include <iostream>
3 #include <cstring>
4 #define _USE_BSD
5 #include <sys/mman.h>
7 void* commit_machine_code(const std::vector<uint8_t>& code)
9 uint8_t* cb = NULL;
10 if(code.empty())
11 return NULL;
12 #ifdef __linux__
13 uint32_t toalloc = (code.size() + getpagesize() - 1) / getpagesize() * getpagesize();
14 cb = (uint8_t*)mmap(NULL, toalloc, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
15 if(cb == MAP_FAILED)
16 return NULL;
17 memcpy(cb, &code[0], code.size());
18 if(mprotect(cb, toalloc, PROT_READ | PROT_EXEC) < 0) {
19 munmap(cb, toalloc);
20 return NULL;
22 #endif
23 return cb;
26 void write_trailer_bytes_64(std::vector<uint8_t>& code, uint8_t byte, bool enable)
28 if(!enable)
29 return;
30 code.push_back(byte);
31 code.push_back(byte);
32 code.push_back(byte);
33 code.push_back(byte);
36 void write32_le(std::vector<uint8_t>& code, uint32_t value)
38 code.push_back(value);
39 code.push_back(value >> 8);
40 code.push_back(value >> 16);
41 code.push_back(value >> 24);
44 //Trashes [ER]CX, [ER]SI and [ER]DI. Pops stuff from stack.
45 void write_line_intel(std::vector<uint8_t>& code, uint32_t* strip_widths, uint32_t swidth, uint32_t twidth,
46 bool bits64)
48 for(uint32_t i = 0; i < swidth; i++)
50 code.push_back(0xAD); //LODSD
51 for(uint32_t j = 0; j < strip_widths[i]; j++)
52 code.push_back(0xAB); //STOSD
54 code.push_back(0x59); //POP RCX / POP ECX
55 code.push_back(0x56); //PUSH RSI / PUSH ESI
56 if(bits64)
57 code.push_back(0x48); //Make next instruction 64-bit.
58 code.push_back(0x89); //MOV RSI, RDI / MOV ESI, EDI
59 code.push_back(0xFE);
60 if(bits64)
61 code.push_back(0x48); //Make next instruction 64-bit.
62 code.push_back(0x81); //SUB RSI, x / SUB ESI, x
63 code.push_back(0xEE);
64 write32_le(code, 4 * twidth);
65 code.push_back(0xF3); //REP MOVSD
66 code.push_back(0xA5);
67 code.push_back(0x5E); //POP RSI
70 void write_loop_intel(std::vector<uint8_t>& code, uint32_t* strip_widths, uint32_t swidth, uint32_t twidth,
71 uint32_t* strip_heights, uint32_t lines, bool bits64)
73 code.push_back(0x9C); //PUSHF
74 code.push_back(0xFC); //CLD
76 for(uint32_t i = lines - 1; i < lines; i--) {
77 if(bits64)
78 code.push_back(0x48); //Make next instruction 64-bit.
79 code.push_back(0xB8); //MOV EAX, imm / MOV RAX, imm.
80 write32_le(code, (strip_heights[i] - 1) * twidth);
81 write_trailer_bytes_64(code, 0, bits64);
82 code.push_back(0x50); //PUSH EAX.
85 if(bits64)
86 code.push_back(0x48); //Make next instruction 64-bit.
87 code.push_back(0xBA); //MOV RDX, imm / MOV EDX, imm.
88 write32_le(code, lines);
89 write_trailer_bytes_64(code, 0, bits64);
91 uint32_t osize = code.size();
92 write_line_intel(code, strip_widths, swidth, twidth, bits64);
93 if(bits64)
94 code.push_back(0x48); //Make next instruction 64-bit.
95 code.push_back(0x83); //SUB RDX, 1 / SUB EDX, 1.
96 code.push_back(0xEA);
97 code.push_back(0x01);
98 code.push_back(0x0F); //JNZ NEAR.
99 code.push_back(0x85);
100 uint32_t jumpoffset = osize - (code.size() + 4); //Yes, this is negative.
101 write32_le(code, jumpoffset);
103 code.push_back(0x9D); //POPF
106 void prologue_linux_intel(std::vector<uint8_t>& code, bool bits64)
108 code.push_back(0x55); //PUSH RBP / PUSH EBP
109 if(bits64)
110 code.push_back(0x48); //Make next instruction 64-bit.
111 code.push_back(0x89); //MOV EBP, ESP / MOV RBP, RSP
112 code.push_back(0xE5);
114 if(!bits64) {
115 code.push_back(0x8B); //MOV EDI, [EBP + 8]
116 code.push_back(0x7D);
117 code.push_back(0x08);
118 code.push_back(0x8B); //MOV EDI, [EBP + 12]
119 code.push_back(0x75);
120 code.push_back(0x0C);
124 void postlogue_linux_intel(std::vector<uint8_t>& code)
126 code.push_back(0xC9); //LEAVE
127 code.push_back(0xC3); //RET
132 void generate_hdscaler(std::vector<uint8_t>& code, uint32_t* strip_widths, uint32_t* strip_heights, uint32_t swidth,
133 uint32_t sheight, uint32_t twidth)
135 #if defined(__x86_64__)
136 #if defined(__linux__)
137 prologue_linux_intel(code, true);
138 write_loop_intel(code, strip_widths, swidth, twidth, strip_heights, sheight, true);
139 postlogue_linux_intel(code);
140 #else
141 return;
142 #endif
143 #else
144 #if defined(__i386__)
145 #if defined(__linux__)
146 prologue_linux_intel(code, false);
147 write_loop_intel(code, strip_widths, swidth, twidth, strip_heights, sheight, false);
148 postlogue_linux_intel(code);
149 #else
150 return;
151 #endif
152 #else
153 return;
154 #endif
155 #endif