target/i386: emulate 64-bit ring 0 for linux-user if LM feature is set
[qemu/kevin.git] / linux-user / i386 / cpu_loop.c
blobef2dcb3d767e905c9d3665302b3b22e6a1f3e147
1 /*
2 * qemu user cpu loop
4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "qemu.h"
22 #include "qemu/timer.h"
23 #include "user-internals.h"
24 #include "cpu_loop-common.h"
25 #include "signal-common.h"
26 #include "user-mmap.h"
28 /***********************************************************/
29 /* CPUX86 core interface */
31 uint64_t cpu_get_tsc(CPUX86State *env)
33 return cpu_get_host_ticks();
36 static void write_dt(void *ptr, unsigned long addr, unsigned long limit,
37 int flags)
39 unsigned int e1, e2;
40 uint32_t *p;
41 e1 = (addr << 16) | (limit & 0xffff);
42 e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000);
43 e2 |= flags;
44 p = ptr;
45 p[0] = tswap32(e1);
46 p[1] = tswap32(e2);
49 static uint64_t *idt_table;
51 static void set_gate64(void *ptr, unsigned int type, unsigned int dpl,
52 uint64_t addr, unsigned int sel)
54 uint32_t *p, e1, e2;
55 e1 = (addr & 0xffff) | (sel << 16);
56 e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
57 p = ptr;
58 p[0] = tswap32(e1);
59 p[1] = tswap32(e2);
60 p[2] = tswap32(addr >> 32);
61 p[3] = 0;
64 #ifdef TARGET_X86_64
65 /* only dpl matters as we do only user space emulation */
66 static void set_idt(int n, unsigned int dpl, bool is64)
68 set_gate64(idt_table + n * 2, 0, dpl, 0, 0);
70 #else
71 static void set_gate(void *ptr, unsigned int type, unsigned int dpl,
72 uint32_t addr, unsigned int sel)
74 uint32_t *p, e1, e2;
75 e1 = (addr & 0xffff) | (sel << 16);
76 e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
77 p = ptr;
78 p[0] = tswap32(e1);
79 p[1] = tswap32(e2);
82 /* only dpl matters as we do only user space emulation */
83 static void set_idt(int n, unsigned int dpl, bool is64)
85 if (is64) {
86 set_gate64(idt_table + n * 2, 0, dpl, 0, 0);
87 } else {
88 set_gate(idt_table + n, 0, dpl, 0, 0);
91 #endif
93 #ifdef TARGET_X86_64
94 static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len)
97 * For all the vsyscalls, NULL means "don't write anything" not
98 * "write it at address 0".
100 if (addr == 0 || access_ok(env_cpu(env), VERIFY_WRITE, addr, len)) {
101 return true;
104 env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK;
105 force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr);
106 return false;
110 * Since v3.1, the kernel traps and emulates the vsyscall page.
111 * Entry points other than the official generate SIGSEGV.
113 static void emulate_vsyscall(CPUX86State *env)
115 int syscall;
116 abi_ulong ret;
117 uint64_t caller;
120 * Validate the entry point. We have already validated the page
121 * during translation to get here; now verify the offset.
123 switch (env->eip & ~TARGET_PAGE_MASK) {
124 case 0x000:
125 syscall = TARGET_NR_gettimeofday;
126 break;
127 case 0x400:
128 syscall = TARGET_NR_time;
129 break;
130 case 0x800:
131 syscall = TARGET_NR_getcpu;
132 break;
133 default:
134 goto sigsegv;
138 * Validate the return address.
139 * Note that the kernel treats this the same as an invalid entry point.
141 if (get_user_u64(caller, env->regs[R_ESP])) {
142 goto sigsegv;
146 * Validate the pointer arguments.
148 switch (syscall) {
149 case TARGET_NR_gettimeofday:
150 if (!write_ok_or_segv(env, env->regs[R_EDI],
151 sizeof(struct target_timeval)) ||
152 !write_ok_or_segv(env, env->regs[R_ESI],
153 sizeof(struct target_timezone))) {
154 return;
156 break;
157 case TARGET_NR_time:
158 if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) {
159 return;
161 break;
162 case TARGET_NR_getcpu:
163 if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) ||
164 !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) {
165 return;
167 break;
168 default:
169 g_assert_not_reached();
173 * Perform the syscall. None of the vsyscalls should need restarting.
175 ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI],
176 env->regs[R_EDX], env->regs[10], env->regs[8],
177 env->regs[9], 0, 0);
178 g_assert(ret != -QEMU_ERESTARTSYS);
179 g_assert(ret != -QEMU_ESIGRETURN);
180 if (ret == -TARGET_EFAULT) {
181 goto sigsegv;
183 env->regs[R_EAX] = ret;
185 /* Emulate a ret instruction to leave the vsyscall page. */
186 env->eip = caller;
187 env->regs[R_ESP] += 8;
188 return;
190 sigsegv:
191 force_sig(TARGET_SIGSEGV);
193 #endif
195 static bool maybe_handle_vm86_trap(CPUX86State *env, int trapnr)
197 #ifndef TARGET_X86_64
198 if (env->eflags & VM_MASK) {
199 handle_vm86_trap(env, trapnr);
200 return true;
202 #endif
203 return false;
206 void cpu_loop(CPUX86State *env)
208 CPUState *cs = env_cpu(env);
209 int trapnr;
210 abi_ulong ret;
212 for(;;) {
213 cpu_exec_start(cs);
214 trapnr = cpu_exec(cs);
215 cpu_exec_end(cs);
216 process_queued_cpu_work(cs);
218 switch(trapnr) {
219 case 0x80:
220 #ifndef TARGET_X86_64
221 case EXCP_SYSCALL:
222 #endif
223 /* linux syscall from int $0x80 */
224 ret = do_syscall(env,
225 env->regs[R_EAX],
226 env->regs[R_EBX],
227 env->regs[R_ECX],
228 env->regs[R_EDX],
229 env->regs[R_ESI],
230 env->regs[R_EDI],
231 env->regs[R_EBP],
232 0, 0);
233 if (ret == -QEMU_ERESTARTSYS) {
234 env->eip -= 2;
235 } else if (ret != -QEMU_ESIGRETURN) {
236 env->regs[R_EAX] = ret;
238 break;
239 #ifdef TARGET_X86_64
240 case EXCP_SYSCALL:
241 /* linux syscall from syscall instruction. */
242 ret = do_syscall(env,
243 env->regs[R_EAX],
244 env->regs[R_EDI],
245 env->regs[R_ESI],
246 env->regs[R_EDX],
247 env->regs[10],
248 env->regs[8],
249 env->regs[9],
250 0, 0);
251 if (ret == -QEMU_ERESTARTSYS) {
252 env->eip -= 2;
253 } else if (ret != -QEMU_ESIGRETURN) {
254 env->regs[R_EAX] = ret;
256 break;
257 case EXCP_VSYSCALL:
258 emulate_vsyscall(env);
259 break;
260 #endif
261 case EXCP0B_NOSEG:
262 case EXCP0C_STACK:
263 force_sig(TARGET_SIGBUS);
264 break;
265 case EXCP0D_GPF:
266 /* XXX: potential problem if ABI32 */
267 if (maybe_handle_vm86_trap(env, trapnr)) {
268 break;
270 force_sig(TARGET_SIGSEGV);
271 break;
272 case EXCP0E_PAGE:
273 force_sig_fault(TARGET_SIGSEGV,
274 (env->error_code & PG_ERROR_P_MASK ?
275 TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR),
276 env->cr[2]);
277 break;
278 case EXCP00_DIVZ:
279 if (maybe_handle_vm86_trap(env, trapnr)) {
280 break;
282 force_sig_fault(TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip);
283 break;
284 case EXCP01_DB:
285 if (maybe_handle_vm86_trap(env, trapnr)) {
286 break;
288 force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
289 break;
290 case EXCP03_INT3:
291 if (maybe_handle_vm86_trap(env, trapnr)) {
292 break;
294 force_sig(TARGET_SIGTRAP);
295 break;
296 case EXCP04_INTO:
297 case EXCP05_BOUND:
298 if (maybe_handle_vm86_trap(env, trapnr)) {
299 break;
301 force_sig(TARGET_SIGSEGV);
302 break;
303 case EXCP06_ILLOP:
304 force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip);
305 break;
306 case EXCP_INTERRUPT:
307 /* just indicate that signals should be handled asap */
308 break;
309 case EXCP_DEBUG:
310 force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
311 break;
312 case EXCP_ATOMIC:
313 cpu_exec_step_atomic(cs);
314 break;
315 default:
316 EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n",
317 trapnr);
318 abort();
320 process_pending_signals(env);
324 static void target_cpu_free(void *obj)
326 CPUArchState *env = ((CPUState *)obj)->env_ptr;
327 target_munmap(env->gdt.base, sizeof(uint64_t) * TARGET_GDT_ENTRIES);
328 g_free(obj);
331 void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs)
333 CPUState *cpu = env_cpu(env);
334 bool is64 = (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) != 0;
335 int i;
337 OBJECT(cpu)->free = target_cpu_free;
338 env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
339 env->hflags |= HF_PE_MASK | HF_CPL_MASK;
340 if (env->features[FEAT_1_EDX] & CPUID_SSE) {
341 env->cr[4] |= CR4_OSFXSR_MASK;
342 env->hflags |= HF_OSFXSR_MASK;
345 /* enable 64 bit mode if possible */
346 if (is64) {
347 env->cr[4] |= CR4_PAE_MASK;
348 env->efer |= MSR_EFER_LMA | MSR_EFER_LME;
349 env->hflags |= HF_LMA_MASK;
351 #ifndef TARGET_ABI32
352 else {
353 fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n");
354 exit(EXIT_FAILURE);
356 #endif
358 /* flags setup : we activate the IRQs by default as in user mode */
359 env->eflags |= IF_MASK;
361 /* linux register setup */
362 #ifndef TARGET_ABI32
363 env->regs[R_EAX] = regs->rax;
364 env->regs[R_EBX] = regs->rbx;
365 env->regs[R_ECX] = regs->rcx;
366 env->regs[R_EDX] = regs->rdx;
367 env->regs[R_ESI] = regs->rsi;
368 env->regs[R_EDI] = regs->rdi;
369 env->regs[R_EBP] = regs->rbp;
370 env->regs[R_ESP] = regs->rsp;
371 env->eip = regs->rip;
372 #else
373 env->regs[R_EAX] = regs->eax;
374 env->regs[R_EBX] = regs->ebx;
375 env->regs[R_ECX] = regs->ecx;
376 env->regs[R_EDX] = regs->edx;
377 env->regs[R_ESI] = regs->esi;
378 env->regs[R_EDI] = regs->edi;
379 env->regs[R_EBP] = regs->ebp;
380 env->regs[R_ESP] = regs->esp;
381 env->eip = regs->eip;
382 #endif
384 /* linux interrupt setup */
385 #ifndef TARGET_ABI32
386 env->idt.limit = 511;
387 #else
388 env->idt.limit = 255;
389 #endif
390 env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1),
391 PROT_READ|PROT_WRITE,
392 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
393 idt_table = g2h_untagged(env->idt.base);
394 for (i = 0; i < 20; i++) {
395 set_idt(i, 0, is64);
397 set_idt(3, 3, is64);
398 set_idt(4, 3, is64);
399 set_idt(0x80, 3, is64);
401 /* linux segment setup */
403 uint64_t *gdt_table;
404 env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES,
405 PROT_READ|PROT_WRITE,
406 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
407 env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1;
408 gdt_table = g2h_untagged(env->gdt.base);
409 #ifdef TARGET_ABI32
410 write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
411 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
412 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
413 #else
414 /* 64 bit code segment */
415 write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
416 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
417 DESC_L_MASK |
418 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
419 #endif
420 write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff,
421 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
422 (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT));
424 cpu_x86_load_seg(env, R_CS, __USER_CS);
425 cpu_x86_load_seg(env, R_SS, __USER_DS);
426 #ifdef TARGET_ABI32
427 cpu_x86_load_seg(env, R_DS, __USER_DS);
428 cpu_x86_load_seg(env, R_ES, __USER_DS);
429 cpu_x86_load_seg(env, R_FS, __USER_DS);
430 cpu_x86_load_seg(env, R_GS, __USER_DS);
431 /* This hack makes Wine work... */
432 env->segs[R_FS].selector = 0;
433 #else
434 cpu_x86_load_seg(env, R_DS, 0);
435 cpu_x86_load_seg(env, R_ES, 0);
436 cpu_x86_load_seg(env, R_FS, 0);
437 cpu_x86_load_seg(env, R_GS, 0);
438 #endif