hw/xen_disk: aio_inflight not released in handling ioreq when nr_segments==0
[qemu.git] / target-i386 / helper.c
blobf0c546df5cff18a51ace6485fa3f821e56039020
1 /*
2 * i386 helpers (without register variable usage)
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include <stdarg.h>
20 #include <stdlib.h>
21 #include <stdio.h>
22 #include <string.h>
23 #include <inttypes.h>
24 #include <signal.h>
26 #include "cpu.h"
27 #include "exec-all.h"
28 #include "qemu-common.h"
29 #include "kvm.h"
30 #include "kvm_x86.h"
32 //#define DEBUG_MMU
34 /* NOTE: must be called outside the CPU execute loop */
35 void cpu_reset(CPUX86State *env)
37 int i;
39 if (qemu_loglevel_mask(CPU_LOG_RESET)) {
40 qemu_log("CPU Reset (CPU %d)\n", env->cpu_index);
41 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
44 memset(env, 0, offsetof(CPUX86State, breakpoints));
46 tlb_flush(env, 1);
48 env->old_exception = -1;
50 /* init to reset state */
52 #ifdef CONFIG_SOFTMMU
53 env->hflags |= HF_SOFTMMU_MASK;
54 #endif
55 env->hflags2 |= HF2_GIF_MASK;
57 cpu_x86_update_cr0(env, 0x60000010);
58 env->a20_mask = ~0x0;
59 env->smbase = 0x30000;
61 env->idt.limit = 0xffff;
62 env->gdt.limit = 0xffff;
63 env->ldt.limit = 0xffff;
64 env->ldt.flags = DESC_P_MASK | (2 << DESC_TYPE_SHIFT);
65 env->tr.limit = 0xffff;
66 env->tr.flags = DESC_P_MASK | (11 << DESC_TYPE_SHIFT);
68 cpu_x86_load_seg_cache(env, R_CS, 0xf000, 0xffff0000, 0xffff,
69 DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK |
70 DESC_R_MASK | DESC_A_MASK);
71 cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffff,
72 DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
73 DESC_A_MASK);
74 cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffff,
75 DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
76 DESC_A_MASK);
77 cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffff,
78 DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
79 DESC_A_MASK);
80 cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffff,
81 DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
82 DESC_A_MASK);
83 cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffff,
84 DESC_P_MASK | DESC_S_MASK | DESC_W_MASK |
85 DESC_A_MASK);
87 env->eip = 0xfff0;
88 env->regs[R_EDX] = env->cpuid_version;
90 env->eflags = 0x2;
92 /* FPU init */
93 for(i = 0;i < 8; i++)
94 env->fptags[i] = 1;
95 env->fpuc = 0x37f;
97 env->mxcsr = 0x1f80;
99 memset(env->dr, 0, sizeof(env->dr));
100 env->dr[6] = DR6_FIXED_1;
101 env->dr[7] = DR7_FIXED_1;
102 cpu_breakpoint_remove_all(env, BP_CPU);
103 cpu_watchpoint_remove_all(env, BP_CPU);
105 env->mcg_status = 0;
108 void cpu_x86_close(CPUX86State *env)
110 qemu_free(env);
113 static void cpu_x86_version(CPUState *env, int *family, int *model)
115 int cpuver = env->cpuid_version;
117 if (family == NULL || model == NULL) {
118 return;
121 *family = (cpuver >> 8) & 0x0f;
122 *model = ((cpuver >> 12) & 0xf0) + ((cpuver >> 4) & 0x0f);
125 /* Broadcast MCA signal for processor version 06H_EH and above */
126 int cpu_x86_support_mca_broadcast(CPUState *env)
128 int family = 0;
129 int model = 0;
131 cpu_x86_version(env, &family, &model);
132 if ((family == 6 && model >= 14) || family > 6) {
133 return 1;
136 return 0;
139 /***********************************************************/
140 /* x86 debug */
142 static const char *cc_op_str[] = {
143 "DYNAMIC",
144 "EFLAGS",
146 "MULB",
147 "MULW",
148 "MULL",
149 "MULQ",
151 "ADDB",
152 "ADDW",
153 "ADDL",
154 "ADDQ",
156 "ADCB",
157 "ADCW",
158 "ADCL",
159 "ADCQ",
161 "SUBB",
162 "SUBW",
163 "SUBL",
164 "SUBQ",
166 "SBBB",
167 "SBBW",
168 "SBBL",
169 "SBBQ",
171 "LOGICB",
172 "LOGICW",
173 "LOGICL",
174 "LOGICQ",
176 "INCB",
177 "INCW",
178 "INCL",
179 "INCQ",
181 "DECB",
182 "DECW",
183 "DECL",
184 "DECQ",
186 "SHLB",
187 "SHLW",
188 "SHLL",
189 "SHLQ",
191 "SARB",
192 "SARW",
193 "SARL",
194 "SARQ",
197 static void
198 cpu_x86_dump_seg_cache(CPUState *env, FILE *f, fprintf_function cpu_fprintf,
199 const char *name, struct SegmentCache *sc)
201 #ifdef TARGET_X86_64
202 if (env->hflags & HF_CS64_MASK) {
203 cpu_fprintf(f, "%-3s=%04x %016" PRIx64 " %08x %08x", name,
204 sc->selector, sc->base, sc->limit, sc->flags & 0x00ffff00);
205 } else
206 #endif
208 cpu_fprintf(f, "%-3s=%04x %08x %08x %08x", name, sc->selector,
209 (uint32_t)sc->base, sc->limit, sc->flags & 0x00ffff00);
212 if (!(env->hflags & HF_PE_MASK) || !(sc->flags & DESC_P_MASK))
213 goto done;
215 cpu_fprintf(f, " DPL=%d ", (sc->flags & DESC_DPL_MASK) >> DESC_DPL_SHIFT);
216 if (sc->flags & DESC_S_MASK) {
217 if (sc->flags & DESC_CS_MASK) {
218 cpu_fprintf(f, (sc->flags & DESC_L_MASK) ? "CS64" :
219 ((sc->flags & DESC_B_MASK) ? "CS32" : "CS16"));
220 cpu_fprintf(f, " [%c%c", (sc->flags & DESC_C_MASK) ? 'C' : '-',
221 (sc->flags & DESC_R_MASK) ? 'R' : '-');
222 } else {
223 cpu_fprintf(f, (sc->flags & DESC_B_MASK) ? "DS " : "DS16");
224 cpu_fprintf(f, " [%c%c", (sc->flags & DESC_E_MASK) ? 'E' : '-',
225 (sc->flags & DESC_W_MASK) ? 'W' : '-');
227 cpu_fprintf(f, "%c]", (sc->flags & DESC_A_MASK) ? 'A' : '-');
228 } else {
229 static const char *sys_type_name[2][16] = {
230 { /* 32 bit mode */
231 "Reserved", "TSS16-avl", "LDT", "TSS16-busy",
232 "CallGate16", "TaskGate", "IntGate16", "TrapGate16",
233 "Reserved", "TSS32-avl", "Reserved", "TSS32-busy",
234 "CallGate32", "Reserved", "IntGate32", "TrapGate32"
236 { /* 64 bit mode */
237 "<hiword>", "Reserved", "LDT", "Reserved", "Reserved",
238 "Reserved", "Reserved", "Reserved", "Reserved",
239 "TSS64-avl", "Reserved", "TSS64-busy", "CallGate64",
240 "Reserved", "IntGate64", "TrapGate64"
243 cpu_fprintf(f, "%s",
244 sys_type_name[(env->hflags & HF_LMA_MASK) ? 1 : 0]
245 [(sc->flags & DESC_TYPE_MASK)
246 >> DESC_TYPE_SHIFT]);
248 done:
249 cpu_fprintf(f, "\n");
252 #define DUMP_CODE_BYTES_TOTAL 50
253 #define DUMP_CODE_BYTES_BACKWARD 20
255 void cpu_dump_state(CPUState *env, FILE *f, fprintf_function cpu_fprintf,
256 int flags)
258 int eflags, i, nb;
259 char cc_op_name[32];
260 static const char *seg_name[6] = { "ES", "CS", "SS", "DS", "FS", "GS" };
262 cpu_synchronize_state(env);
264 eflags = env->eflags;
265 #ifdef TARGET_X86_64
266 if (env->hflags & HF_CS64_MASK) {
267 cpu_fprintf(f,
268 "RAX=%016" PRIx64 " RBX=%016" PRIx64 " RCX=%016" PRIx64 " RDX=%016" PRIx64 "\n"
269 "RSI=%016" PRIx64 " RDI=%016" PRIx64 " RBP=%016" PRIx64 " RSP=%016" PRIx64 "\n"
270 "R8 =%016" PRIx64 " R9 =%016" PRIx64 " R10=%016" PRIx64 " R11=%016" PRIx64 "\n"
271 "R12=%016" PRIx64 " R13=%016" PRIx64 " R14=%016" PRIx64 " R15=%016" PRIx64 "\n"
272 "RIP=%016" PRIx64 " RFL=%08x [%c%c%c%c%c%c%c] CPL=%d II=%d A20=%d SMM=%d HLT=%d\n",
273 env->regs[R_EAX],
274 env->regs[R_EBX],
275 env->regs[R_ECX],
276 env->regs[R_EDX],
277 env->regs[R_ESI],
278 env->regs[R_EDI],
279 env->regs[R_EBP],
280 env->regs[R_ESP],
281 env->regs[8],
282 env->regs[9],
283 env->regs[10],
284 env->regs[11],
285 env->regs[12],
286 env->regs[13],
287 env->regs[14],
288 env->regs[15],
289 env->eip, eflags,
290 eflags & DF_MASK ? 'D' : '-',
291 eflags & CC_O ? 'O' : '-',
292 eflags & CC_S ? 'S' : '-',
293 eflags & CC_Z ? 'Z' : '-',
294 eflags & CC_A ? 'A' : '-',
295 eflags & CC_P ? 'P' : '-',
296 eflags & CC_C ? 'C' : '-',
297 env->hflags & HF_CPL_MASK,
298 (env->hflags >> HF_INHIBIT_IRQ_SHIFT) & 1,
299 (env->a20_mask >> 20) & 1,
300 (env->hflags >> HF_SMM_SHIFT) & 1,
301 env->halted);
302 } else
303 #endif
305 cpu_fprintf(f, "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n"
306 "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n"
307 "EIP=%08x EFL=%08x [%c%c%c%c%c%c%c] CPL=%d II=%d A20=%d SMM=%d HLT=%d\n",
308 (uint32_t)env->regs[R_EAX],
309 (uint32_t)env->regs[R_EBX],
310 (uint32_t)env->regs[R_ECX],
311 (uint32_t)env->regs[R_EDX],
312 (uint32_t)env->regs[R_ESI],
313 (uint32_t)env->regs[R_EDI],
314 (uint32_t)env->regs[R_EBP],
315 (uint32_t)env->regs[R_ESP],
316 (uint32_t)env->eip, eflags,
317 eflags & DF_MASK ? 'D' : '-',
318 eflags & CC_O ? 'O' : '-',
319 eflags & CC_S ? 'S' : '-',
320 eflags & CC_Z ? 'Z' : '-',
321 eflags & CC_A ? 'A' : '-',
322 eflags & CC_P ? 'P' : '-',
323 eflags & CC_C ? 'C' : '-',
324 env->hflags & HF_CPL_MASK,
325 (env->hflags >> HF_INHIBIT_IRQ_SHIFT) & 1,
326 (env->a20_mask >> 20) & 1,
327 (env->hflags >> HF_SMM_SHIFT) & 1,
328 env->halted);
331 for(i = 0; i < 6; i++) {
332 cpu_x86_dump_seg_cache(env, f, cpu_fprintf, seg_name[i],
333 &env->segs[i]);
335 cpu_x86_dump_seg_cache(env, f, cpu_fprintf, "LDT", &env->ldt);
336 cpu_x86_dump_seg_cache(env, f, cpu_fprintf, "TR", &env->tr);
338 #ifdef TARGET_X86_64
339 if (env->hflags & HF_LMA_MASK) {
340 cpu_fprintf(f, "GDT= %016" PRIx64 " %08x\n",
341 env->gdt.base, env->gdt.limit);
342 cpu_fprintf(f, "IDT= %016" PRIx64 " %08x\n",
343 env->idt.base, env->idt.limit);
344 cpu_fprintf(f, "CR0=%08x CR2=%016" PRIx64 " CR3=%016" PRIx64 " CR4=%08x\n",
345 (uint32_t)env->cr[0],
346 env->cr[2],
347 env->cr[3],
348 (uint32_t)env->cr[4]);
349 for(i = 0; i < 4; i++)
350 cpu_fprintf(f, "DR%d=%016" PRIx64 " ", i, env->dr[i]);
351 cpu_fprintf(f, "\nDR6=%016" PRIx64 " DR7=%016" PRIx64 "\n",
352 env->dr[6], env->dr[7]);
353 } else
354 #endif
356 cpu_fprintf(f, "GDT= %08x %08x\n",
357 (uint32_t)env->gdt.base, env->gdt.limit);
358 cpu_fprintf(f, "IDT= %08x %08x\n",
359 (uint32_t)env->idt.base, env->idt.limit);
360 cpu_fprintf(f, "CR0=%08x CR2=%08x CR3=%08x CR4=%08x\n",
361 (uint32_t)env->cr[0],
362 (uint32_t)env->cr[2],
363 (uint32_t)env->cr[3],
364 (uint32_t)env->cr[4]);
365 for(i = 0; i < 4; i++) {
366 cpu_fprintf(f, "DR%d=" TARGET_FMT_lx " ", i, env->dr[i]);
368 cpu_fprintf(f, "\nDR6=" TARGET_FMT_lx " DR7=" TARGET_FMT_lx "\n",
369 env->dr[6], env->dr[7]);
371 if (flags & X86_DUMP_CCOP) {
372 if ((unsigned)env->cc_op < CC_OP_NB)
373 snprintf(cc_op_name, sizeof(cc_op_name), "%s", cc_op_str[env->cc_op]);
374 else
375 snprintf(cc_op_name, sizeof(cc_op_name), "[%d]", env->cc_op);
376 #ifdef TARGET_X86_64
377 if (env->hflags & HF_CS64_MASK) {
378 cpu_fprintf(f, "CCS=%016" PRIx64 " CCD=%016" PRIx64 " CCO=%-8s\n",
379 env->cc_src, env->cc_dst,
380 cc_op_name);
381 } else
382 #endif
384 cpu_fprintf(f, "CCS=%08x CCD=%08x CCO=%-8s\n",
385 (uint32_t)env->cc_src, (uint32_t)env->cc_dst,
386 cc_op_name);
389 cpu_fprintf(f, "EFER=%016" PRIx64 "\n", env->efer);
390 if (flags & X86_DUMP_FPU) {
391 int fptag;
392 fptag = 0;
393 for(i = 0; i < 8; i++) {
394 fptag |= ((!env->fptags[i]) << i);
396 cpu_fprintf(f, "FCW=%04x FSW=%04x [ST=%d] FTW=%02x MXCSR=%08x\n",
397 env->fpuc,
398 (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11,
399 env->fpstt,
400 fptag,
401 env->mxcsr);
402 for(i=0;i<8;i++) {
403 #if defined(USE_X86LDOUBLE)
404 union {
405 long double d;
406 struct {
407 uint64_t lower;
408 uint16_t upper;
409 } l;
410 } tmp;
411 tmp.d = env->fpregs[i].d;
412 cpu_fprintf(f, "FPR%d=%016" PRIx64 " %04x",
413 i, tmp.l.lower, tmp.l.upper);
414 #else
415 cpu_fprintf(f, "FPR%d=%016" PRIx64,
416 i, env->fpregs[i].mmx.q);
417 #endif
418 if ((i & 1) == 1)
419 cpu_fprintf(f, "\n");
420 else
421 cpu_fprintf(f, " ");
423 if (env->hflags & HF_CS64_MASK)
424 nb = 16;
425 else
426 nb = 8;
427 for(i=0;i<nb;i++) {
428 cpu_fprintf(f, "XMM%02d=%08x%08x%08x%08x",
430 env->xmm_regs[i].XMM_L(3),
431 env->xmm_regs[i].XMM_L(2),
432 env->xmm_regs[i].XMM_L(1),
433 env->xmm_regs[i].XMM_L(0));
434 if ((i & 1) == 1)
435 cpu_fprintf(f, "\n");
436 else
437 cpu_fprintf(f, " ");
440 if (flags & CPU_DUMP_CODE) {
441 target_ulong base = env->segs[R_CS].base + env->eip;
442 target_ulong offs = MIN(env->eip, DUMP_CODE_BYTES_BACKWARD);
443 uint8_t code;
444 char codestr[3];
446 cpu_fprintf(f, "Code=");
447 for (i = 0; i < DUMP_CODE_BYTES_TOTAL; i++) {
448 if (cpu_memory_rw_debug(env, base - offs + i, &code, 1, 0) == 0) {
449 snprintf(codestr, sizeof(codestr), "%02x", code);
450 } else {
451 snprintf(codestr, sizeof(codestr), "??");
453 cpu_fprintf(f, "%s%s%s%s", i > 0 ? " " : "",
454 i == offs ? "<" : "", codestr, i == offs ? ">" : "");
456 cpu_fprintf(f, "\n");
460 /***********************************************************/
461 /* x86 mmu */
462 /* XXX: add PGE support */
464 void cpu_x86_set_a20(CPUX86State *env, int a20_state)
466 a20_state = (a20_state != 0);
467 if (a20_state != ((env->a20_mask >> 20) & 1)) {
468 #if defined(DEBUG_MMU)
469 printf("A20 update: a20=%d\n", a20_state);
470 #endif
471 /* if the cpu is currently executing code, we must unlink it and
472 all the potentially executing TB */
473 cpu_interrupt(env, CPU_INTERRUPT_EXITTB);
475 /* when a20 is changed, all the MMU mappings are invalid, so
476 we must flush everything */
477 tlb_flush(env, 1);
478 env->a20_mask = ~(1 << 20) | (a20_state << 20);
482 void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0)
484 int pe_state;
486 #if defined(DEBUG_MMU)
487 printf("CR0 update: CR0=0x%08x\n", new_cr0);
488 #endif
489 if ((new_cr0 & (CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK)) !=
490 (env->cr[0] & (CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK))) {
491 tlb_flush(env, 1);
494 #ifdef TARGET_X86_64
495 if (!(env->cr[0] & CR0_PG_MASK) && (new_cr0 & CR0_PG_MASK) &&
496 (env->efer & MSR_EFER_LME)) {
497 /* enter in long mode */
498 /* XXX: generate an exception */
499 if (!(env->cr[4] & CR4_PAE_MASK))
500 return;
501 env->efer |= MSR_EFER_LMA;
502 env->hflags |= HF_LMA_MASK;
503 } else if ((env->cr[0] & CR0_PG_MASK) && !(new_cr0 & CR0_PG_MASK) &&
504 (env->efer & MSR_EFER_LMA)) {
505 /* exit long mode */
506 env->efer &= ~MSR_EFER_LMA;
507 env->hflags &= ~(HF_LMA_MASK | HF_CS64_MASK);
508 env->eip &= 0xffffffff;
510 #endif
511 env->cr[0] = new_cr0 | CR0_ET_MASK;
513 /* update PE flag in hidden flags */
514 pe_state = (env->cr[0] & CR0_PE_MASK);
515 env->hflags = (env->hflags & ~HF_PE_MASK) | (pe_state << HF_PE_SHIFT);
516 /* ensure that ADDSEG is always set in real mode */
517 env->hflags |= ((pe_state ^ 1) << HF_ADDSEG_SHIFT);
518 /* update FPU flags */
519 env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) |
520 ((new_cr0 << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK));
523 /* XXX: in legacy PAE mode, generate a GPF if reserved bits are set in
524 the PDPT */
525 void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3)
527 env->cr[3] = new_cr3;
528 if (env->cr[0] & CR0_PG_MASK) {
529 #if defined(DEBUG_MMU)
530 printf("CR3 update: CR3=" TARGET_FMT_lx "\n", new_cr3);
531 #endif
532 tlb_flush(env, 0);
536 void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4)
538 #if defined(DEBUG_MMU)
539 printf("CR4 update: CR4=%08x\n", (uint32_t)env->cr[4]);
540 #endif
541 if ((new_cr4 & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK)) !=
542 (env->cr[4] & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK))) {
543 tlb_flush(env, 1);
545 /* SSE handling */
546 if (!(env->cpuid_features & CPUID_SSE))
547 new_cr4 &= ~CR4_OSFXSR_MASK;
548 if (new_cr4 & CR4_OSFXSR_MASK)
549 env->hflags |= HF_OSFXSR_MASK;
550 else
551 env->hflags &= ~HF_OSFXSR_MASK;
553 env->cr[4] = new_cr4;
556 #if defined(CONFIG_USER_ONLY)
558 int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
559 int is_write, int mmu_idx, int is_softmmu)
561 /* user mode only emulation */
562 is_write &= 1;
563 env->cr[2] = addr;
564 env->error_code = (is_write << PG_ERROR_W_BIT);
565 env->error_code |= PG_ERROR_U_MASK;
566 env->exception_index = EXCP0E_PAGE;
567 return 1;
570 #else
572 /* XXX: This value should match the one returned by CPUID
573 * and in exec.c */
574 # if defined(TARGET_X86_64)
575 # define PHYS_ADDR_MASK 0xfffffff000LL
576 # else
577 # define PHYS_ADDR_MASK 0xffffff000LL
578 # endif
580 /* return value:
581 -1 = cannot handle fault
582 0 = nothing more to do
583 1 = generate PF fault
585 int cpu_x86_handle_mmu_fault(CPUX86State *env, target_ulong addr,
586 int is_write1, int mmu_idx, int is_softmmu)
588 uint64_t ptep, pte;
589 target_ulong pde_addr, pte_addr;
590 int error_code, is_dirty, prot, page_size, is_write, is_user;
591 target_phys_addr_t paddr;
592 uint32_t page_offset;
593 target_ulong vaddr, virt_addr;
595 is_user = mmu_idx == MMU_USER_IDX;
596 #if defined(DEBUG_MMU)
597 printf("MMU fault: addr=" TARGET_FMT_lx " w=%d u=%d eip=" TARGET_FMT_lx "\n",
598 addr, is_write1, is_user, env->eip);
599 #endif
600 is_write = is_write1 & 1;
602 if (!(env->cr[0] & CR0_PG_MASK)) {
603 pte = addr;
604 virt_addr = addr & TARGET_PAGE_MASK;
605 prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
606 page_size = 4096;
607 goto do_mapping;
610 if (env->cr[4] & CR4_PAE_MASK) {
611 uint64_t pde, pdpe;
612 target_ulong pdpe_addr;
614 #ifdef TARGET_X86_64
615 if (env->hflags & HF_LMA_MASK) {
616 uint64_t pml4e_addr, pml4e;
617 int32_t sext;
619 /* test virtual address sign extension */
620 sext = (int64_t)addr >> 47;
621 if (sext != 0 && sext != -1) {
622 env->error_code = 0;
623 env->exception_index = EXCP0D_GPF;
624 return 1;
627 pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
628 env->a20_mask;
629 pml4e = ldq_phys(pml4e_addr);
630 if (!(pml4e & PG_PRESENT_MASK)) {
631 error_code = 0;
632 goto do_fault;
634 if (!(env->efer & MSR_EFER_NXE) && (pml4e & PG_NX_MASK)) {
635 error_code = PG_ERROR_RSVD_MASK;
636 goto do_fault;
638 if (!(pml4e & PG_ACCESSED_MASK)) {
639 pml4e |= PG_ACCESSED_MASK;
640 stl_phys_notdirty(pml4e_addr, pml4e);
642 ptep = pml4e ^ PG_NX_MASK;
643 pdpe_addr = ((pml4e & PHYS_ADDR_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
644 env->a20_mask;
645 pdpe = ldq_phys(pdpe_addr);
646 if (!(pdpe & PG_PRESENT_MASK)) {
647 error_code = 0;
648 goto do_fault;
650 if (!(env->efer & MSR_EFER_NXE) && (pdpe & PG_NX_MASK)) {
651 error_code = PG_ERROR_RSVD_MASK;
652 goto do_fault;
654 ptep &= pdpe ^ PG_NX_MASK;
655 if (!(pdpe & PG_ACCESSED_MASK)) {
656 pdpe |= PG_ACCESSED_MASK;
657 stl_phys_notdirty(pdpe_addr, pdpe);
659 } else
660 #endif
662 /* XXX: load them when cr3 is loaded ? */
663 pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) &
664 env->a20_mask;
665 pdpe = ldq_phys(pdpe_addr);
666 if (!(pdpe & PG_PRESENT_MASK)) {
667 error_code = 0;
668 goto do_fault;
670 ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
673 pde_addr = ((pdpe & PHYS_ADDR_MASK) + (((addr >> 21) & 0x1ff) << 3)) &
674 env->a20_mask;
675 pde = ldq_phys(pde_addr);
676 if (!(pde & PG_PRESENT_MASK)) {
677 error_code = 0;
678 goto do_fault;
680 if (!(env->efer & MSR_EFER_NXE) && (pde & PG_NX_MASK)) {
681 error_code = PG_ERROR_RSVD_MASK;
682 goto do_fault;
684 ptep &= pde ^ PG_NX_MASK;
685 if (pde & PG_PSE_MASK) {
686 /* 2 MB page */
687 page_size = 2048 * 1024;
688 ptep ^= PG_NX_MASK;
689 if ((ptep & PG_NX_MASK) && is_write1 == 2)
690 goto do_fault_protect;
691 if (is_user) {
692 if (!(ptep & PG_USER_MASK))
693 goto do_fault_protect;
694 if (is_write && !(ptep & PG_RW_MASK))
695 goto do_fault_protect;
696 } else {
697 if ((env->cr[0] & CR0_WP_MASK) &&
698 is_write && !(ptep & PG_RW_MASK))
699 goto do_fault_protect;
701 is_dirty = is_write && !(pde & PG_DIRTY_MASK);
702 if (!(pde & PG_ACCESSED_MASK) || is_dirty) {
703 pde |= PG_ACCESSED_MASK;
704 if (is_dirty)
705 pde |= PG_DIRTY_MASK;
706 stl_phys_notdirty(pde_addr, pde);
708 /* align to page_size */
709 pte = pde & ((PHYS_ADDR_MASK & ~(page_size - 1)) | 0xfff);
710 virt_addr = addr & ~(page_size - 1);
711 } else {
712 /* 4 KB page */
713 if (!(pde & PG_ACCESSED_MASK)) {
714 pde |= PG_ACCESSED_MASK;
715 stl_phys_notdirty(pde_addr, pde);
717 pte_addr = ((pde & PHYS_ADDR_MASK) + (((addr >> 12) & 0x1ff) << 3)) &
718 env->a20_mask;
719 pte = ldq_phys(pte_addr);
720 if (!(pte & PG_PRESENT_MASK)) {
721 error_code = 0;
722 goto do_fault;
724 if (!(env->efer & MSR_EFER_NXE) && (pte & PG_NX_MASK)) {
725 error_code = PG_ERROR_RSVD_MASK;
726 goto do_fault;
728 /* combine pde and pte nx, user and rw protections */
729 ptep &= pte ^ PG_NX_MASK;
730 ptep ^= PG_NX_MASK;
731 if ((ptep & PG_NX_MASK) && is_write1 == 2)
732 goto do_fault_protect;
733 if (is_user) {
734 if (!(ptep & PG_USER_MASK))
735 goto do_fault_protect;
736 if (is_write && !(ptep & PG_RW_MASK))
737 goto do_fault_protect;
738 } else {
739 if ((env->cr[0] & CR0_WP_MASK) &&
740 is_write && !(ptep & PG_RW_MASK))
741 goto do_fault_protect;
743 is_dirty = is_write && !(pte & PG_DIRTY_MASK);
744 if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
745 pte |= PG_ACCESSED_MASK;
746 if (is_dirty)
747 pte |= PG_DIRTY_MASK;
748 stl_phys_notdirty(pte_addr, pte);
750 page_size = 4096;
751 virt_addr = addr & ~0xfff;
752 pte = pte & (PHYS_ADDR_MASK | 0xfff);
754 } else {
755 uint32_t pde;
757 /* page directory entry */
758 pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) &
759 env->a20_mask;
760 pde = ldl_phys(pde_addr);
761 if (!(pde & PG_PRESENT_MASK)) {
762 error_code = 0;
763 goto do_fault;
765 /* if PSE bit is set, then we use a 4MB page */
766 if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
767 page_size = 4096 * 1024;
768 if (is_user) {
769 if (!(pde & PG_USER_MASK))
770 goto do_fault_protect;
771 if (is_write && !(pde & PG_RW_MASK))
772 goto do_fault_protect;
773 } else {
774 if ((env->cr[0] & CR0_WP_MASK) &&
775 is_write && !(pde & PG_RW_MASK))
776 goto do_fault_protect;
778 is_dirty = is_write && !(pde & PG_DIRTY_MASK);
779 if (!(pde & PG_ACCESSED_MASK) || is_dirty) {
780 pde |= PG_ACCESSED_MASK;
781 if (is_dirty)
782 pde |= PG_DIRTY_MASK;
783 stl_phys_notdirty(pde_addr, pde);
786 pte = pde & ~( (page_size - 1) & ~0xfff); /* align to page_size */
787 ptep = pte;
788 virt_addr = addr & ~(page_size - 1);
789 } else {
790 if (!(pde & PG_ACCESSED_MASK)) {
791 pde |= PG_ACCESSED_MASK;
792 stl_phys_notdirty(pde_addr, pde);
795 /* page directory entry */
796 pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
797 env->a20_mask;
798 pte = ldl_phys(pte_addr);
799 if (!(pte & PG_PRESENT_MASK)) {
800 error_code = 0;
801 goto do_fault;
803 /* combine pde and pte user and rw protections */
804 ptep = pte & pde;
805 if (is_user) {
806 if (!(ptep & PG_USER_MASK))
807 goto do_fault_protect;
808 if (is_write && !(ptep & PG_RW_MASK))
809 goto do_fault_protect;
810 } else {
811 if ((env->cr[0] & CR0_WP_MASK) &&
812 is_write && !(ptep & PG_RW_MASK))
813 goto do_fault_protect;
815 is_dirty = is_write && !(pte & PG_DIRTY_MASK);
816 if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
817 pte |= PG_ACCESSED_MASK;
818 if (is_dirty)
819 pte |= PG_DIRTY_MASK;
820 stl_phys_notdirty(pte_addr, pte);
822 page_size = 4096;
823 virt_addr = addr & ~0xfff;
826 /* the page can be put in the TLB */
827 prot = PAGE_READ;
828 if (!(ptep & PG_NX_MASK))
829 prot |= PAGE_EXEC;
830 if (pte & PG_DIRTY_MASK) {
831 /* only set write access if already dirty... otherwise wait
832 for dirty access */
833 if (is_user) {
834 if (ptep & PG_RW_MASK)
835 prot |= PAGE_WRITE;
836 } else {
837 if (!(env->cr[0] & CR0_WP_MASK) ||
838 (ptep & PG_RW_MASK))
839 prot |= PAGE_WRITE;
842 do_mapping:
843 pte = pte & env->a20_mask;
845 /* Even if 4MB pages, we map only one 4KB page in the cache to
846 avoid filling it too fast */
847 page_offset = (addr & TARGET_PAGE_MASK) & (page_size - 1);
848 paddr = (pte & TARGET_PAGE_MASK) + page_offset;
849 vaddr = virt_addr + page_offset;
851 tlb_set_page(env, vaddr, paddr, prot, mmu_idx, page_size);
852 return 0;
853 do_fault_protect:
854 error_code = PG_ERROR_P_MASK;
855 do_fault:
856 error_code |= (is_write << PG_ERROR_W_BIT);
857 if (is_user)
858 error_code |= PG_ERROR_U_MASK;
859 if (is_write1 == 2 &&
860 (env->efer & MSR_EFER_NXE) &&
861 (env->cr[4] & CR4_PAE_MASK))
862 error_code |= PG_ERROR_I_D_MASK;
863 if (env->intercept_exceptions & (1 << EXCP0E_PAGE)) {
864 /* cr2 is not modified in case of exceptions */
865 stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
866 addr);
867 } else {
868 env->cr[2] = addr;
870 env->error_code = error_code;
871 env->exception_index = EXCP0E_PAGE;
872 return 1;
875 target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
877 target_ulong pde_addr, pte_addr;
878 uint64_t pte;
879 target_phys_addr_t paddr;
880 uint32_t page_offset;
881 int page_size;
883 if (env->cr[4] & CR4_PAE_MASK) {
884 target_ulong pdpe_addr;
885 uint64_t pde, pdpe;
887 #ifdef TARGET_X86_64
888 if (env->hflags & HF_LMA_MASK) {
889 uint64_t pml4e_addr, pml4e;
890 int32_t sext;
892 /* test virtual address sign extension */
893 sext = (int64_t)addr >> 47;
894 if (sext != 0 && sext != -1)
895 return -1;
897 pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
898 env->a20_mask;
899 pml4e = ldq_phys(pml4e_addr);
900 if (!(pml4e & PG_PRESENT_MASK))
901 return -1;
903 pdpe_addr = ((pml4e & ~0xfff) + (((addr >> 30) & 0x1ff) << 3)) &
904 env->a20_mask;
905 pdpe = ldq_phys(pdpe_addr);
906 if (!(pdpe & PG_PRESENT_MASK))
907 return -1;
908 } else
909 #endif
911 pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) &
912 env->a20_mask;
913 pdpe = ldq_phys(pdpe_addr);
914 if (!(pdpe & PG_PRESENT_MASK))
915 return -1;
918 pde_addr = ((pdpe & ~0xfff) + (((addr >> 21) & 0x1ff) << 3)) &
919 env->a20_mask;
920 pde = ldq_phys(pde_addr);
921 if (!(pde & PG_PRESENT_MASK)) {
922 return -1;
924 if (pde & PG_PSE_MASK) {
925 /* 2 MB page */
926 page_size = 2048 * 1024;
927 pte = pde & ~( (page_size - 1) & ~0xfff); /* align to page_size */
928 } else {
929 /* 4 KB page */
930 pte_addr = ((pde & ~0xfff) + (((addr >> 12) & 0x1ff) << 3)) &
931 env->a20_mask;
932 page_size = 4096;
933 pte = ldq_phys(pte_addr);
935 if (!(pte & PG_PRESENT_MASK))
936 return -1;
937 } else {
938 uint32_t pde;
940 if (!(env->cr[0] & CR0_PG_MASK)) {
941 pte = addr;
942 page_size = 4096;
943 } else {
944 /* page directory entry */
945 pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & env->a20_mask;
946 pde = ldl_phys(pde_addr);
947 if (!(pde & PG_PRESENT_MASK))
948 return -1;
949 if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
950 pte = pde & ~0x003ff000; /* align to 4MB */
951 page_size = 4096 * 1024;
952 } else {
953 /* page directory entry */
954 pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & env->a20_mask;
955 pte = ldl_phys(pte_addr);
956 if (!(pte & PG_PRESENT_MASK))
957 return -1;
958 page_size = 4096;
961 pte = pte & env->a20_mask;
964 page_offset = (addr & TARGET_PAGE_MASK) & (page_size - 1);
965 paddr = (pte & TARGET_PAGE_MASK) + page_offset;
966 return paddr;
969 void hw_breakpoint_insert(CPUState *env, int index)
971 int type, err = 0;
973 switch (hw_breakpoint_type(env->dr[7], index)) {
974 case 0:
975 if (hw_breakpoint_enabled(env->dr[7], index))
976 err = cpu_breakpoint_insert(env, env->dr[index], BP_CPU,
977 &env->cpu_breakpoint[index]);
978 break;
979 case 1:
980 type = BP_CPU | BP_MEM_WRITE;
981 goto insert_wp;
982 case 2:
983 /* No support for I/O watchpoints yet */
984 break;
985 case 3:
986 type = BP_CPU | BP_MEM_ACCESS;
987 insert_wp:
988 err = cpu_watchpoint_insert(env, env->dr[index],
989 hw_breakpoint_len(env->dr[7], index),
990 type, &env->cpu_watchpoint[index]);
991 break;
993 if (err)
994 env->cpu_breakpoint[index] = NULL;
997 void hw_breakpoint_remove(CPUState *env, int index)
999 if (!env->cpu_breakpoint[index])
1000 return;
1001 switch (hw_breakpoint_type(env->dr[7], index)) {
1002 case 0:
1003 if (hw_breakpoint_enabled(env->dr[7], index))
1004 cpu_breakpoint_remove_by_ref(env, env->cpu_breakpoint[index]);
1005 break;
1006 case 1:
1007 case 3:
1008 cpu_watchpoint_remove_by_ref(env, env->cpu_watchpoint[index]);
1009 break;
1010 case 2:
1011 /* No support for I/O watchpoints yet */
1012 break;
1016 int check_hw_breakpoints(CPUState *env, int force_dr6_update)
1018 target_ulong dr6;
1019 int reg, type;
1020 int hit_enabled = 0;
1022 dr6 = env->dr[6] & ~0xf;
1023 for (reg = 0; reg < 4; reg++) {
1024 type = hw_breakpoint_type(env->dr[7], reg);
1025 if ((type == 0 && env->dr[reg] == env->eip) ||
1026 ((type & 1) && env->cpu_watchpoint[reg] &&
1027 (env->cpu_watchpoint[reg]->flags & BP_WATCHPOINT_HIT))) {
1028 dr6 |= 1 << reg;
1029 if (hw_breakpoint_enabled(env->dr[7], reg))
1030 hit_enabled = 1;
1033 if (hit_enabled || force_dr6_update)
1034 env->dr[6] = dr6;
1035 return hit_enabled;
1038 static CPUDebugExcpHandler *prev_debug_excp_handler;
1040 void raise_exception_env(int exception_index, CPUState *env);
1042 static void breakpoint_handler(CPUState *env)
1044 CPUBreakpoint *bp;
1046 if (env->watchpoint_hit) {
1047 if (env->watchpoint_hit->flags & BP_CPU) {
1048 env->watchpoint_hit = NULL;
1049 if (check_hw_breakpoints(env, 0))
1050 raise_exception_env(EXCP01_DB, env);
1051 else
1052 cpu_resume_from_signal(env, NULL);
1054 } else {
1055 QTAILQ_FOREACH(bp, &env->breakpoints, entry)
1056 if (bp->pc == env->eip) {
1057 if (bp->flags & BP_CPU) {
1058 check_hw_breakpoints(env, 1);
1059 raise_exception_env(EXCP01_DB, env);
1061 break;
1064 if (prev_debug_excp_handler)
1065 prev_debug_excp_handler(env);
1068 /* This should come from sysemu.h - if we could include it here... */
1069 void qemu_system_reset_request(void);
1071 static void qemu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
1072 uint64_t mcg_status, uint64_t addr, uint64_t misc)
1074 uint64_t mcg_cap = cenv->mcg_cap;
1075 uint64_t *banks = cenv->mce_banks;
1078 * if MSR_MCG_CTL is not all 1s, the uncorrected error
1079 * reporting is disabled
1081 if ((status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
1082 cenv->mcg_ctl != ~(uint64_t)0)
1083 return;
1084 banks += 4 * bank;
1086 * if MSR_MCi_CTL is not all 1s, the uncorrected error
1087 * reporting is disabled for the bank
1089 if ((status & MCI_STATUS_UC) && banks[0] != ~(uint64_t)0)
1090 return;
1091 if (status & MCI_STATUS_UC) {
1092 if ((cenv->mcg_status & MCG_STATUS_MCIP) ||
1093 !(cenv->cr[4] & CR4_MCE_MASK)) {
1094 fprintf(stderr, "injects mce exception while previous "
1095 "one is in progress!\n");
1096 qemu_log_mask(CPU_LOG_RESET, "Triple fault\n");
1097 qemu_system_reset_request();
1098 return;
1100 if (banks[1] & MCI_STATUS_VAL)
1101 status |= MCI_STATUS_OVER;
1102 banks[2] = addr;
1103 banks[3] = misc;
1104 cenv->mcg_status = mcg_status;
1105 banks[1] = status;
1106 cpu_interrupt(cenv, CPU_INTERRUPT_MCE);
1107 } else if (!(banks[1] & MCI_STATUS_VAL)
1108 || !(banks[1] & MCI_STATUS_UC)) {
1109 if (banks[1] & MCI_STATUS_VAL)
1110 status |= MCI_STATUS_OVER;
1111 banks[2] = addr;
1112 banks[3] = misc;
1113 banks[1] = status;
1114 } else
1115 banks[1] |= MCI_STATUS_OVER;
1118 void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
1119 uint64_t mcg_status, uint64_t addr, uint64_t misc,
1120 int broadcast)
1122 unsigned bank_num = cenv->mcg_cap & 0xff;
1123 CPUState *env;
1124 int flag = 0;
1126 if (bank >= bank_num || !(status & MCI_STATUS_VAL)) {
1127 return;
1130 if (broadcast) {
1131 if (!cpu_x86_support_mca_broadcast(cenv)) {
1132 fprintf(stderr, "Current CPU does not support broadcast\n");
1133 return;
1137 if (kvm_enabled()) {
1138 if (broadcast) {
1139 flag |= MCE_BROADCAST;
1142 kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc, flag);
1143 } else {
1144 qemu_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc);
1145 if (broadcast) {
1146 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1147 if (cenv == env) {
1148 continue;
1150 qemu_inject_x86_mce(env, 1, MCI_STATUS_VAL | MCI_STATUS_UC,
1151 MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0);
1156 #endif /* !CONFIG_USER_ONLY */
1158 static void mce_init(CPUX86State *cenv)
1160 unsigned int bank, bank_num;
1162 if (((cenv->cpuid_version >> 8)&0xf) >= 6
1163 && (cenv->cpuid_features&(CPUID_MCE|CPUID_MCA)) == (CPUID_MCE|CPUID_MCA)) {
1164 cenv->mcg_cap = MCE_CAP_DEF | MCE_BANKS_DEF;
1165 cenv->mcg_ctl = ~(uint64_t)0;
1166 bank_num = MCE_BANKS_DEF;
1167 for (bank = 0; bank < bank_num; bank++)
1168 cenv->mce_banks[bank*4] = ~(uint64_t)0;
1172 int cpu_x86_get_descr_debug(CPUX86State *env, unsigned int selector,
1173 target_ulong *base, unsigned int *limit,
1174 unsigned int *flags)
1176 SegmentCache *dt;
1177 target_ulong ptr;
1178 uint32_t e1, e2;
1179 int index;
1181 if (selector & 0x4)
1182 dt = &env->ldt;
1183 else
1184 dt = &env->gdt;
1185 index = selector & ~7;
1186 ptr = dt->base + index;
1187 if ((index + 7) > dt->limit
1188 || cpu_memory_rw_debug(env, ptr, (uint8_t *)&e1, sizeof(e1), 0) != 0
1189 || cpu_memory_rw_debug(env, ptr+4, (uint8_t *)&e2, sizeof(e2), 0) != 0)
1190 return 0;
1192 *base = ((e1 >> 16) | ((e2 & 0xff) << 16) | (e2 & 0xff000000));
1193 *limit = (e1 & 0xffff) | (e2 & 0x000f0000);
1194 if (e2 & DESC_G_MASK)
1195 *limit = (*limit << 12) | 0xfff;
1196 *flags = e2;
1198 return 1;
1201 CPUX86State *cpu_x86_init(const char *cpu_model)
1203 CPUX86State *env;
1204 static int inited;
1206 env = qemu_mallocz(sizeof(CPUX86State));
1207 cpu_exec_init(env);
1208 env->cpu_model_str = cpu_model;
1210 /* init various static tables */
1211 if (!inited) {
1212 inited = 1;
1213 optimize_flags_init();
1214 #ifndef CONFIG_USER_ONLY
1215 prev_debug_excp_handler =
1216 cpu_set_debug_excp_handler(breakpoint_handler);
1217 #endif
1219 if (cpu_x86_register(env, cpu_model) < 0) {
1220 cpu_x86_close(env);
1221 return NULL;
1223 mce_init(env);
1225 qemu_init_vcpu(env);
1227 return env;
1230 #if !defined(CONFIG_USER_ONLY)
1231 void do_cpu_init(CPUState *env)
1233 int sipi = env->interrupt_request & CPU_INTERRUPT_SIPI;
1234 cpu_reset(env);
1235 env->interrupt_request = sipi;
1236 apic_init_reset(env->apic_state);
1237 env->halted = !cpu_is_bsp(env);
1240 void do_cpu_sipi(CPUState *env)
1242 apic_sipi(env->apic_state);
1244 #else
1245 void do_cpu_init(CPUState *env)
1248 void do_cpu_sipi(CPUState *env)
1251 #endif