Initial import of kqemu-1.4.0pre1
[kqemu.git] / common / monitor.c
blob4a69be10c18439f0560dc566e4b51e2044dd63af
1 /*
2 * KQEMU
4 * Copyright (C) 2004-2008 Fabrice Bellard
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * version 2 as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 #include "kqemu_int.h"
21 //#define DEBUG_TLB
22 //#define DEBUG_MMU
23 //#define DEBUG_PHYS_LOAD_STORE
24 //#define DEBUG_RAM
25 //#define DEBUG_LOCK
26 //#define DEBUG_SOFT_TLB
27 //#define DEBUG_INVALIDATE
29 //#define PROFILE_SOFTMMU
30 //#define DEBUG_DT_CACHE
32 static void mon_set_pte(struct kqemu_state *s,
33 int as_index, unsigned long vaddr,
34 unsigned long paddr, int pte_flags);
35 static void unmap_ram_page(struct kqemu_state *s,
36 struct kqemu_ram_page *rp);
37 static void unlock_ram_page(struct kqemu_state *s,
38 struct kqemu_ram_page *rp);
39 static void *mon_alloc_page(struct kqemu_state *s,
40 unsigned long *ppage_index);
42 #define IN_MONITOR
43 #include "common.c"
46 * Segment state in monitor code:
48 * If CPL = 3 or not USE_SEG_GP:
49 * FS, GS are stored in %fs, %gs.
50 * CS, SS, DS, ES are stored in s->reg1.xx_sel
51 * the content of the CPU seg desc caches are consistent with the dt_table
53 * If CPL != 3 and USE_SEG_GP:
55 * FS, GS are stored in %fs, %gs. If not null and different from
56 * s->reg1.cs_sel and s->reg1.ss_sel, then the content of the CPU
57 * seg desc caches are consistent with s->seg_desc_cache[R_xx]
59 * DS, ES are stored in s1->reg1.xx_sel. Same remark as FS and FS
60 * for CPU seg desc cache consistency.
62 * CS, SS are stored in s1->reg1.xx_sel. The content of the CPU seg
63 * desc caches are consistent with the dt_table
65 * If seg_cache_loaded is true, then s->cpu_state.segs[].base is
66 * updated. For CS and SS, s->cpu_state.segs[].flags is updated too.
70 static inline void save_segs(struct kqemu_state *s)
72 struct kqemu_cpu_state *env = &s->cpu_state;
74 asm volatile ("movw %%fs, %0" : "=m" (env->segs[R_FS].selector));
75 asm volatile ("movw %%gs, %0" : "=m" (env->segs[R_GS].selector));
76 #ifdef __x86_64__
77 rdmsrl(MSR_FSBASE, env->segs[R_FS].base);
78 rdmsrl(MSR_GSBASE, env->segs[R_GS].base);
80 asm volatile ("movw %%ds, %0" : "=m" (env->segs[R_DS].selector));
81 asm volatile ("movw %%es, %0" : "=m" (env->segs[R_ES].selector));
82 #endif
85 static inline void reload_segs(struct kqemu_state *s)
87 struct kqemu_cpu_state *env = &s->cpu_state;
89 #ifdef USE_SEG_GP
90 if (s->cpu_state.cpl != 3) {
91 set_cpu_seg_cache(s, R_FS, env->segs[R_FS].selector);
92 set_cpu_seg_cache(s, R_GS, env->segs[R_GS].selector);
93 #ifdef __x86_64__
94 set_cpu_seg_cache(s, R_DS, env->segs[R_DS].selector);
95 set_cpu_seg_cache(s, R_ES, env->segs[R_ES].selector);
96 #endif
97 } else
98 #endif
100 LOAD_SEG(fs, env->segs[R_FS].selector);
101 LOAD_SEG(gs, env->segs[R_GS].selector);
102 #ifdef __x86_64__
103 LOAD_SEG(ds, env->segs[R_DS].selector);
104 LOAD_SEG(es, env->segs[R_ES].selector);
105 #endif
107 #ifdef __x86_64__
108 wrmsrl(MSR_FSBASE, env->segs[R_FS].base);
109 wrmsrl(MSR_GSBASE, env->segs[R_GS].base);
110 #endif
113 void update_host_cr0(struct kqemu_state *s)
115 unsigned long guest_cr0, host_cr0;
117 guest_cr0 = s->cpu_state.cr0;
118 host_cr0 = s->kernel_cr0;
119 if (guest_cr0 & (CR0_TS_MASK | CR0_EM_MASK)) {
120 host_cr0 |= CR0_TS_MASK;
122 host_cr0 = (host_cr0 & ~(CR0_MP_MASK)) | (guest_cr0 & CR0_MP_MASK);
123 host_cr0 &= ~CR0_AM_MASK;
124 if ((guest_cr0 & CR0_AM_MASK) && s->cpu_state.cpl == 3)
125 host_cr0 |= CR0_AM_MASK;
126 asm volatile ("mov %0, %%cr0" : : "r" (host_cr0));
129 void update_host_cr4(struct kqemu_state *s)
131 unsigned long guest_cr4, host_cr4, mask;
132 asm volatile("mov %%cr4, %0" : "=r" (host_cr4));
133 mask = 0;
134 if (s->cpuid_features & CPUID_FXSR)
135 mask |= CR4_OSFXSR_MASK;
136 if (s->cpuid_features & CPUID_SSE)
137 mask |= CR4_OSXMMEXCPT_MASK;
138 guest_cr4 = s->cpu_state.cr4;
139 host_cr4 = (guest_cr4 & mask) | (host_cr4 & ~mask);
140 if (s->cpu_state.cpl == 0) {
141 host_cr4 &= ~CR4_TSD_MASK; /* rdtsc is enabled */
142 } else {
143 host_cr4 = (guest_cr4 & CR4_TSD_MASK) | (host_cr4 & ~CR4_TSD_MASK);
145 asm volatile ("mov %0, %%cr4" : : "r" (host_cr4));
148 static inline void restore_monitor_nexus_mapping(struct kqemu_state *s)
150 int is_user;
151 /* restore the original mapping */
152 is_user = (s->cpu_state.cpl == 3);
153 if (USE_PAE(s)) {
154 uint64_t *ptep;
155 ptep = s->nexus_kaddr_vptep[is_user];
156 *ptep = s->nexus_orig_pte;
157 } else {
158 uint32_t *ptep;
159 ptep = s->nexus_kaddr_vptep[is_user];
160 *ptep = s->nexus_orig_pte;
162 asm volatile ("invlpg (%0)" : : "r" (s->nexus_kaddr));
165 static void monitor2kernel1(struct kqemu_state *s)
167 struct kqemu_exception_regs *r;
168 int is_user;
170 r = s->regs;
171 if (r) {
172 save_segs(s);
175 /* map the nexus page to its kernel address */
176 is_user = (s->cpu_state.cpl == 3);
177 if (USE_PAE(s)) {
178 uint64_t *ptep;
179 ptep = s->nexus_kaddr_vptep[is_user];
180 s->nexus_orig_pte = *ptep;
181 *ptep = s->nexus_pte;
182 } else {
183 uint32_t *ptep;
184 ptep = s->nexus_kaddr_vptep[is_user];
185 s->nexus_orig_pte = *ptep;
186 *ptep = s->nexus_pte;
188 asm volatile ("invlpg (%0)" : : "r" (s->nexus_kaddr));
190 monitor2kernel(s);
192 update_host_cr0(s);
194 update_host_cr4(s);
196 restore_monitor_nexus_mapping(s);
198 if (r) {
199 reload_segs(s);
203 void monitor_log(struct kqemu_state *s, const char *fmt, ...)
205 va_list ap;
206 va_start(ap, fmt);
207 mon_vsnprintf(s->log_buf, sizeof(s->log_buf), fmt, ap);
208 s->mon_req = MON_REQ_LOG;
209 monitor2kernel1(s);
210 va_end(ap);
213 void monitor_panic(struct kqemu_state *s, const char *fmt, ...)
215 va_list ap;
216 va_start(ap, fmt);
217 mon_vsnprintf(s->log_buf, sizeof(s->log_buf), fmt, ap);
218 s->mon_req = MON_REQ_ABORT;
219 monitor2kernel1(s);
220 /* should never come here */
221 while (1);
224 void __attribute__((noreturn, format (printf, 3, 4)))
225 monitor_panic_regs(struct kqemu_state *s, struct kqemu_exception_regs *r,
226 const char *fmt, ...)
228 va_list ap;
229 int len;
230 va_start(ap, fmt);
231 mon_vsnprintf(s->log_buf, sizeof(s->log_buf), fmt, ap);
232 len = strlen(s->log_buf);
233 mon_snprintf(s->log_buf + len, sizeof(s->log_buf) - len,
234 "err=%04x CS:EIP=%04x:" FMT_lx " SS:SP=%04x:" FMT_lx "\n",
235 (int)r->error_code, r->cs_sel, (long)r->eip,
236 r->ss_sel, (long)r->esp);
237 s->mon_req = MON_REQ_ABORT;
238 monitor2kernel1(s);
239 /* should never come here */
240 while (1);
243 struct kqemu_page *monitor_alloc_page(struct kqemu_state *s,
244 unsigned long *ppage_index)
246 s->mon_req = MON_REQ_ALLOC_PAGE;
247 monitor2kernel1(s);
248 *ppage_index = s->ret2;
249 return (void *)s->ret;
252 static struct kqemu_user_page *monitor_lock_user_page(struct kqemu_state *s,
253 unsigned long *ppage_index,
254 unsigned long uaddr)
256 s->mon_req = MON_REQ_LOCK_USER_PAGE;
257 s->arg0 = uaddr;
258 monitor2kernel1(s);
259 *ppage_index = s->ret2;
260 return (void *)s->ret;
263 static void monitor_unlock_user_page(struct kqemu_state *s,
264 struct kqemu_user_page *page)
266 s->mon_req = MON_REQ_UNLOCK_USER_PAGE;
267 s->arg0 = (long)page;
268 monitor2kernel1(s);
271 /* return NULL if error */
272 static void *mon_alloc_page(struct kqemu_state *s,
273 unsigned long *ppage_index)
275 unsigned long vaddr, page_index;
276 struct kqemu_page *host_page;
278 host_page = monitor_alloc_page(s, &page_index);
279 if (!host_page) {
280 return NULL;
282 vaddr = get_vaddr(s);
283 /* XXX: check error */
284 set_vaddr_page_index(s, vaddr, page_index, host_page, 0);
285 mon_set_pte(s, 0, vaddr, page_index,
286 PG_PRESENT_MASK | PG_GLOBAL(s) | PG_RW_MASK);
287 if (ppage_index)
288 *ppage_index = page_index;
289 return (void *)vaddr;
292 static void mon_set_pte(struct kqemu_state *s,
293 int as_index, unsigned long vaddr,
294 unsigned long page_index, int pte_flags)
296 if (USE_PAE(s)) {
297 uint64_t *ptep;
298 ptep = mon_get_ptep_l3(s, as_index, vaddr, 1);
299 *ptep = ((uint64_t)page_index << PAGE_SHIFT) | pte_flags;
300 } else {
301 uint32_t *ptep;
302 ptep = mon_get_ptep_l2(s, as_index, vaddr, 1);
303 *ptep = (page_index << PAGE_SHIFT) | pte_flags;
305 asm volatile("invlpg %0" : : "m" (*(uint8_t *)vaddr));
308 static uint32_t phys_page_find(struct kqemu_state *s,
309 unsigned long page_index)
311 uint32_t *ptr, pd;
313 ptr = phys_page_findp(s, page_index, 0);
314 if (!ptr)
315 return KQEMU_IO_MEM_UNASSIGNED;
316 pd = *ptr;
317 #ifdef DEBUG_TLB
318 monitor_log(s, "pd=%08x\n", pd);
319 #endif
320 return pd;
323 /* return the ram page only if it is already locked */
324 static struct kqemu_ram_page *get_locked_ram_page(struct kqemu_state *s,
325 unsigned long ram_addr)
327 int ram_page_index;
328 struct kqemu_ram_page *rp;
329 ram_page_index = ram_addr >> PAGE_SHIFT;
330 rp = &s->ram_pages[ram_page_index];
331 if (rp->paddr == -1)
332 return NULL;
333 return rp;
336 /* unlock some pages to be able to allocate at least one page */
337 static void unlock_pages(struct kqemu_state *s)
339 while (s->nb_locked_ram_pages >= s->max_locked_ram_pages) {
340 /* unlock the least recently used pages */
341 unlock_ram_page(s, s->locked_page_head.lock_prev);
345 static struct kqemu_ram_page *lock_ram_page(struct kqemu_state *s,
346 unsigned long ram_addr)
348 int ram_page_index;
349 struct kqemu_ram_page *rp, **p, *rp_prev, *rp_next;
350 unsigned long uaddr, page_index;
351 struct kqemu_user_page *host_page;
353 ram_page_index = ram_addr >> PAGE_SHIFT;
354 rp = &s->ram_pages[ram_page_index];
355 if (rp->paddr == -1) {
357 unlock_pages(s);
359 uaddr = ram_addr + s->ram_base_uaddr;
360 host_page = monitor_lock_user_page(s, &page_index, uaddr);
361 if (!host_page)
362 monitor_panic(s, "Could not lock user page %p", (void *)uaddr);
363 rp->paddr = page_index;
364 rp->host_page = host_page;
366 /* insert in hash table */
367 p = &s->ram_page_hash[ram_page_hash_func(page_index)];
368 rp->hash_next = *p;
369 *p = rp;
371 /* insert at lock list head */
372 rp_prev = &s->locked_page_head;
373 rp_next = s->locked_page_head.lock_next;
374 rp_next->lock_prev = rp;
375 rp->lock_next = rp_next;
376 rp_prev->lock_next = rp;
377 rp->lock_prev = rp_prev;
378 s->nb_locked_ram_pages++;
379 #ifdef DEBUG_LOCK
380 monitor_log(s, "lock_ram_page: %p rp=%p\n", (void *)ram_addr, rp);
381 #endif
383 return rp;
386 static void unlock_ram_page(struct kqemu_state *s,
387 struct kqemu_ram_page *rp)
389 struct kqemu_ram_page **prp;
391 if (rp->paddr == -1)
392 return;
393 #ifdef DEBUG_LOCK
394 monitor_log(s, "unlock_ram_page: rp=%p\n", rp);
395 #endif
396 unmap_ram_page(s, rp);
398 /* remove it from the hash list */
399 prp = &s->ram_page_hash[ram_page_hash_func(rp->paddr)];
400 for(;;) {
401 if (*prp == NULL)
402 break;
403 if (*prp == rp) {
404 *prp = rp->hash_next;
405 break;
407 prp = &(*prp)->hash_next;
410 /* unlock it in the kernel */
411 monitor_unlock_user_page(s, rp->host_page);
413 rp->paddr = -1;
415 /* remove from lock list */
416 rp->lock_prev->lock_next = rp->lock_next;
417 rp->lock_next->lock_prev = rp->lock_prev;
418 s->nb_locked_ram_pages--;
421 static void map_ram_page(struct kqemu_state *s,
422 int as_index, unsigned long vaddr,
423 struct kqemu_ram_page *rp, int pte_flags)
425 unsigned long *rptep;
426 struct kqemu_ram_page *rp_prev, *rp_next;
428 #ifdef DEBUG_RAM
429 monitor_log(s, "map_ram_page: vaddr=%p rp=%p pte_flags=0x%x\n",
430 (void *)vaddr, rp, pte_flags);
431 #endif
432 unmap_virtual_ram_page(s, as_index, vaddr);
434 mon_set_pte(s, as_index, vaddr, rp->paddr, pte_flags);
436 if (rp->vaddr == -1) {
437 /* most common case */
438 rp->vaddr = vaddr | (as_index << 1);
440 /* add in mapping list */
441 rp_prev = s->mapped_page_head.map_prev;
442 rp_next = &s->mapped_page_head;
443 rp_next->map_prev = rp;
444 rp->map_next = rp_next;
445 rp_prev->map_next = rp;
446 rp->map_prev = rp_prev;
447 } else {
448 /* add a new mapping (there is already at least one mapping) */
449 rptep = get_ram_page_next_mapping_alloc(s, as_index, vaddr, 1);
450 if (!rptep)
451 monitor_panic(s, "next_mapping: could not alloc page");
452 *rptep = rp->vaddr;
453 rp->vaddr = vaddr | (as_index << 1) | 1;
456 /* move to head in locked list */
457 rp_prev = &s->locked_page_head;
458 if (rp != rp_prev->lock_next) {
459 /* delete */
460 rp->lock_prev->lock_next = rp->lock_next;
461 rp->lock_next->lock_prev = rp->lock_prev;
463 /* insert at head */
464 rp_next = s->locked_page_head.lock_next;
465 rp_next->lock_prev = rp;
466 rp->lock_next = rp_next;
467 rp_prev->lock_next = rp;
468 rp->lock_prev = rp_prev;
472 static unsigned long ram_ptr_to_ram_addr(struct kqemu_state *s, void *ptr)
474 int slot;
475 slot = ((unsigned long)ptr - s->ram_page_cache_base) >> PAGE_SHIFT;
476 return s->slot_to_ram_addr[slot];
479 static void *get_ram_ptr_slow(struct kqemu_state *s, int slot,
480 unsigned long ram_addr)
482 struct kqemu_ram_page *rp;
483 unsigned long vaddr;
484 void *ptr;
486 #ifdef PROFILE_INTERP2
487 s->ram_map_miss_count++;
488 #endif
489 rp = lock_ram_page(s, ram_addr);
490 vaddr = (slot << PAGE_SHIFT) + s->ram_page_cache_base;
491 /* map the ram page */
492 map_ram_page(s, 0, vaddr, rp,
493 PG_PRESENT_MASK | PG_GLOBAL(s) |
494 PG_ACCESSED_MASK | PG_DIRTY_MASK |
495 PG_RW_MASK);
496 s->slot_to_ram_addr[slot] = ram_addr;
497 ptr = (void *)vaddr;
498 #if defined(DEBUG_SOFT_TLB)
499 monitor_log(s, "get_ram_ptr: slot=%d ram_addr=%p ptr=%p\n",
500 slot, (void *)ram_addr, ptr);
501 #endif
502 return ptr;
505 static inline void *get_ram_ptr(struct kqemu_state *s, int slot,
506 unsigned long ram_addr)
508 unsigned long vaddr;
509 #ifdef PROFILE_INTERP2
510 s->ram_map_count++;
511 #endif
512 if (likely(s->slot_to_ram_addr[slot] == ram_addr)) {
513 vaddr = (slot << PAGE_SHIFT) + s->ram_page_cache_base;
514 return (void *)vaddr;
515 } else {
516 return get_ram_ptr_slow(s, slot, ram_addr);
520 static inline int ram_is_dirty(struct kqemu_state *s, unsigned long ram_addr)
522 return s->ram_dirty[ram_addr >> PAGE_SHIFT] == 0xff;
525 static inline int ram_get_dirty(struct kqemu_state *s, unsigned long ram_addr,
526 int dirty_flags)
528 return s->ram_dirty[ram_addr >> PAGE_SHIFT] & dirty_flags;
531 static void ram_set_read_only(struct kqemu_state *s,
532 unsigned long ram_addr)
534 struct kqemu_ram_page *rp;
535 unsigned long addr, vaddr;
536 unsigned long *nptep;
537 uint32_t *ptep;
539 rp = get_locked_ram_page(s, ram_addr);
540 if (rp) {
541 vaddr = rp->vaddr;
542 if (vaddr == -1)
543 return;
544 for(;;) {
545 addr = vaddr & ~0xfff;
546 if ((addr - s->ram_page_cache_base) < SOFT_TLB_SIZE * PAGE_SIZE) {
547 /* XXX: do it too */
548 } else {
549 if (USE_PAE(s))
550 ptep = (uint32_t *)mon_get_ptep_l3(s,
551 GET_AS(vaddr), addr, 0);
552 else
553 ptep = mon_get_ptep_l2(s, GET_AS(vaddr), addr, 0);
554 *ptep &= ~PG_RW_MASK;
555 asm volatile("invlpg %0" : : "m" (*(uint8_t *)addr));
557 if (IS_LAST_VADDR(vaddr))
558 break;
559 nptep = get_ram_page_next_mapping(s, GET_AS(vaddr), addr);
560 vaddr = *nptep;
565 /* XXX: need to reset user space structures too */
566 static void ram_reset_dirty(struct kqemu_state *s,
567 unsigned long ram_addr, int dirty_flag)
570 /* we must modify the protection of all the user pages if it is
571 not already done */
572 if (ram_is_dirty(s, ram_addr)) {
573 ram_set_read_only(s, ram_addr);
574 /* signal QEMU that it needs to update its TLB info */
575 s->cpu_state.nb_ram_pages_to_update = 1;
577 s->ram_dirty[ram_addr >> PAGE_SHIFT] &= ~dirty_flag;
580 static inline void *get_phys_mem_ptr(struct kqemu_state *s,
581 unsigned long paddr, int write)
583 int io_index, slot;
584 unsigned long pd, ram_addr;
585 uint8_t *ptr;
587 pd = phys_page_find(s, paddr >> PAGE_SHIFT);
588 io_index = (pd & ~PAGE_MASK);
589 if (unlikely(io_index != KQEMU_IO_MEM_RAM)) {
590 if (io_index != KQEMU_IO_MEM_ROM)
591 return NULL;
592 if (write)
593 return NULL;
595 ram_addr = pd & PAGE_MASK;
596 slot = (ram_addr >> PAGE_SHIFT);
597 slot = slot ^ (slot >> PHYS_SLOT_BITS) ^ (slot >> (2 * PHYS_SLOT_BITS));
598 slot = (slot & (PHYS_NB_SLOTS - 1)) + SOFT_TLB_SIZE;
599 ptr = get_ram_ptr(s, slot, ram_addr);
600 #if defined(DEBUG_TLB)
601 monitor_log(s, "get_phys_mem_ptr: paddr=%p ram_addr=%p ptr=%p\n",
602 (void *)paddr,
603 (void *)ram_addr,
604 (void *)ptr);
605 #endif
606 return ptr + (paddr & ~PAGE_MASK);
609 static uint32_t ldl_phys_mmu(struct kqemu_state *s, unsigned long addr)
611 uint32_t *ptr;
612 uint32_t val;
613 ptr = get_phys_mem_ptr(s, addr, 0);
614 if (!ptr)
615 val = 0;
616 else
617 val = *ptr;
618 #ifdef DEBUG_PHYS_LOAD_STORE
619 monitor_log(s, "ldl_phys_mmu: %p = 0x%08x\n", (void *)addr, val);
620 #endif
621 return val;
624 /* NOTE: we do not update the dirty bits. This function is only used
625 to update the D and A bits, so it is not critical */
626 static void stl_phys_mmu(struct kqemu_state *s, unsigned long addr,
627 uint32_t val)
629 uint32_t *ptr;
630 #ifdef DEBUG_PHYS_LOAD_STORE
631 monitor_log(s, "st_phys_mmu: %p = 0x%08x\n", (void *)addr, val);
632 #endif
633 ptr = get_phys_mem_ptr(s, addr, 1);
634 if (ptr)
635 *ptr = val;
638 /* return 0 if OK, 2 if the mapping could not be done because I/O
639 memory region or monitor memory area */
640 static long tlb_set_page(struct kqemu_state *s,
641 unsigned long vaddr, unsigned long paddr,
642 int prot, int is_softmmu)
644 unsigned long pd;
645 int pte_flags, mask, is_user;
646 long ret;
647 struct kqemu_ram_page *rp;
649 #ifdef DEBUG_RAM
650 monitor_log(s, "tlb_set_page: vaddr=%p paddr=%p prot=0x%02x s=%d\n",
651 (void *)vaddr, (void *)paddr, prot, is_softmmu);
652 #endif
653 pd = phys_page_find(s, paddr >> PAGE_SHIFT);
655 if ((pd & ~PAGE_MASK) > KQEMU_IO_MEM_ROM) {
656 if ((pd & ~PAGE_MASK) == KQEMU_IO_MEM_COMM) {
657 /* special case: mapping of the kqemu communication page */
658 pte_flags = PG_PRESENT_MASK | PG_USER_MASK |
659 PG_ACCESSED_MASK | PG_DIRTY_MASK;
660 is_user = (s->cpu_state.cpl == 3);
661 if (is_user)
662 mask = PAGE_UWRITE;
663 else
664 mask = PAGE_KWRITE;
665 if (prot & mask)
666 pte_flags |= PG_ORIG_RW_MASK | PG_RW_MASK;
667 mon_set_pte(s, is_user, vaddr, s->comm_page_index, pte_flags);
668 ret = 0;
669 } else {
670 /* IO access: no mapping is done as it will be handled by the
671 soft MMU */
672 ret = 2;
674 } else {
675 if (is_softmmu) {
676 /* XXX: dirty ram support */
677 /* XXX: rom support */
678 TLBEntry *e;
679 unsigned long vaddr1;
680 int slot;
681 void *ptr;
682 slot = (vaddr >> PAGE_SHIFT) & (SOFT_TLB_SIZE - 1);
683 e = &s->soft_tlb[slot];
684 vaddr1 = vaddr & PAGE_MASK;
685 if (prot & PAGE_KREAD)
686 e->vaddr[0] = vaddr1;
687 else
688 e->vaddr[0] = -1;
689 if (prot & PAGE_KWRITE)
690 e->vaddr[1] = vaddr1;
691 else
692 e->vaddr[1] = -1;
693 if (prot & PAGE_UREAD)
694 e->vaddr[2] = vaddr1;
695 else
696 e->vaddr[2] = -1;
697 if (prot & PAGE_UWRITE)
698 e->vaddr[3] = vaddr1;
699 else
700 e->vaddr[3] = -1;
701 ptr = get_ram_ptr(s, slot, pd & PAGE_MASK);
702 e->addend = (unsigned long)ptr - vaddr1;
703 #ifdef DEBUG_SOFT_TLB
704 monitor_log(s, "tlb_set_page: vaddr=%p paddr=%p prot=0x%02x s=%d\n",
705 (void *)vaddr, (void *)paddr, prot, is_softmmu);
706 #endif
707 ret = 0;
708 } else if ((vaddr - s->monitor_vaddr) < MONITOR_MEM_SIZE) {
709 ret = 2;
710 } else {
711 pte_flags = PG_PRESENT_MASK | PG_USER_MASK |
712 PG_ACCESSED_MASK | PG_DIRTY_MASK;
713 #ifdef USE_USER_PG_GLOBAL
714 /* user pages are marked as global to stay in TLB when
715 switching to kernel mode */
716 /* XXX: check WP bit or ensure once that WP is set in
717 kqemu */
718 if (prot & PAGE_UREAD)
719 pte_flags |= PG_GLOBAL(s);
720 #endif
721 is_user = (s->cpu_state.cpl == 3);
722 if (is_user)
723 mask = PAGE_UWRITE;
724 else
725 mask = PAGE_KWRITE;
726 if (prot & mask) {
727 pte_flags |= PG_ORIG_RW_MASK | PG_RW_MASK;
728 if ((pd & ~PAGE_MASK) == KQEMU_IO_MEM_ROM ||
729 ((pd & ~PAGE_MASK) == KQEMU_IO_MEM_RAM &&
730 !ram_is_dirty(s, pd))) {
731 pte_flags &= ~PG_RW_MASK;
734 rp = lock_ram_page(s, pd & PAGE_MASK);
735 map_ram_page(s, is_user, vaddr, rp, pte_flags);
736 ret = 0;
739 return ret;
742 /* return value:
743 0 = nothing more to do
744 1 = generate PF fault
745 2 = soft MMU activation required for this block
747 long cpu_x86_handle_mmu_fault(struct kqemu_state *s, unsigned long addr,
748 int is_write, int is_user, int is_softmmu)
750 struct kqemu_cpu_state *env = &s->cpu_state;
751 uint32_t pdpe_addr, pde_addr, pte_addr;
752 uint32_t pde, pte, ptep, pdpe;
753 int error_code, is_dirty, prot, page_size;
754 unsigned long paddr, page_offset;
755 unsigned long vaddr, virt_addr;
756 long ret;
758 #ifdef DEBUG_MMU
759 monitor_log(s, "mmu_fault: addr=%08lx w=%d u=%d s=%d\n",
760 addr, is_write, is_user, is_softmmu);
761 #endif
763 is_write &= 1;
765 if (!(env->cr0 & CR0_PG_MASK)) {
766 pte = addr;
767 virt_addr = addr & PAGE_MASK;
768 prot = PAGE_KREAD | PAGE_KWRITE | PAGE_UREAD | PAGE_UWRITE;
769 page_size = 4096;
770 goto do_mapping;
774 if (env->cr4 & CR4_PAE_MASK) {
775 /* XXX: we only use 32 bit physical addresses */
776 #ifdef __x86_64__
777 if (env->efer & MSR_EFER_LMA) {
778 uint32_t pml4e_addr, pml4e;
779 int32_t sext;
781 /* XXX: handle user + rw rights */
782 /* XXX: handle NX flag */
783 /* test virtual address sign extension */
784 sext = (int64_t)addr >> 47;
785 if (sext != 0 && sext != -1) {
786 error_code = 0;
787 goto do_fault;
790 pml4e_addr = ((env->cr3 & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) &
791 env->a20_mask;
792 pml4e = ldl_phys_mmu(s, pml4e_addr);
793 if (!(pml4e & PG_PRESENT_MASK)) {
794 error_code = 0;
795 goto do_fault;
797 if (!(pml4e & PG_ACCESSED_MASK)) {
798 pml4e |= PG_ACCESSED_MASK;
799 stl_phys_mmu(s, pml4e_addr, pml4e);
802 pdpe_addr = ((pml4e & ~0xfff) + (((addr >> 30) & 0x1ff) << 3)) &
803 env->a20_mask;
804 pdpe = ldl_phys_mmu(s, pdpe_addr);
805 if (!(pdpe & PG_PRESENT_MASK)) {
806 error_code = 0;
807 goto do_fault;
809 if (!(pdpe & PG_ACCESSED_MASK)) {
810 pdpe |= PG_ACCESSED_MASK;
811 stl_phys_mmu(s, pdpe_addr, pdpe);
813 } else
814 #endif
816 pdpe_addr = ((env->cr3 & ~0x1f) + ((addr >> 30) << 3)) &
817 env->a20_mask;
818 pdpe = ldl_phys_mmu(s, pdpe_addr);
819 if (!(pdpe & PG_PRESENT_MASK)) {
820 error_code = 0;
821 goto do_fault;
825 pde_addr = ((pdpe & ~0xfff) + (((addr >> 21) & 0x1ff) << 3)) &
826 env->a20_mask;
827 pde = ldl_phys_mmu(s, pde_addr);
828 if (!(pde & PG_PRESENT_MASK)) {
829 error_code = 0;
830 goto do_fault;
832 if (pde & PG_PSE_MASK) {
833 /* 2 MB page */
834 page_size = 2048 * 1024;
835 goto handle_big_page;
836 } else {
837 /* 4 KB page */
838 if (!(pde & PG_ACCESSED_MASK)) {
839 pde |= PG_ACCESSED_MASK;
840 stl_phys_mmu(s, pde_addr, pde);
842 pte_addr = ((pde & ~0xfff) + (((addr >> 12) & 0x1ff) << 3)) &
843 env->a20_mask;
844 goto handle_4k_page;
846 } else {
847 /* page directory entry */
848 pde_addr = ((env->cr3 & ~0xfff) + ((addr >> 20) & ~3)) &
849 env->a20_mask;
850 pde = ldl_phys_mmu(s, pde_addr);
851 if (!(pde & PG_PRESENT_MASK)) {
852 error_code = 0;
853 goto do_fault;
855 /* if PSE bit is set, then we use a 4MB page */
856 if ((pde & PG_PSE_MASK) && (env->cr4 & CR4_PSE_MASK)) {
857 page_size = 4096 * 1024;
858 handle_big_page:
859 if (is_user) {
860 if (!(pde & PG_USER_MASK))
861 goto do_fault_protect;
862 if (is_write && !(pde & PG_RW_MASK))
863 goto do_fault_protect;
864 } else {
865 if ((env->cr0 & CR0_WP_MASK) &&
866 is_write && !(pde & PG_RW_MASK))
867 goto do_fault_protect;
869 is_dirty = is_write && !(pde & PG_DIRTY_MASK);
870 if (!(pde & PG_ACCESSED_MASK) || is_dirty) {
871 pde |= PG_ACCESSED_MASK;
872 if (is_dirty)
873 pde |= PG_DIRTY_MASK;
874 stl_phys_mmu(s, pde_addr, pde);
877 pte = pde & ~( (page_size - 1) & ~0xfff); /* align to page_size */
878 ptep = pte;
879 virt_addr = addr & ~(page_size - 1);
880 } else {
881 if (!(pde & PG_ACCESSED_MASK)) {
882 pde |= PG_ACCESSED_MASK;
883 stl_phys_mmu(s, pde_addr, pde);
886 /* page directory entry */
887 pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
888 env->a20_mask;
889 handle_4k_page:
890 pte = ldl_phys_mmu(s, pte_addr);
891 if (!(pte & PG_PRESENT_MASK)) {
892 error_code = 0;
893 goto do_fault;
895 /* combine pde and pte user and rw protections */
896 ptep = pte & pde;
897 if (is_user) {
898 if (!(ptep & PG_USER_MASK))
899 goto do_fault_protect;
900 if (is_write && !(ptep & PG_RW_MASK))
901 goto do_fault_protect;
902 } else {
903 if ((env->cr0 & CR0_WP_MASK) &&
904 is_write && !(ptep & PG_RW_MASK))
905 goto do_fault_protect;
907 is_dirty = is_write && !(pte & PG_DIRTY_MASK);
908 if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
909 pte |= PG_ACCESSED_MASK;
910 if (is_dirty)
911 pte |= PG_DIRTY_MASK;
912 stl_phys_mmu(s, pte_addr, pte);
914 page_size = 4096;
915 virt_addr = addr & ~0xfff;
918 /* the page can be put in the TLB */
919 prot = PAGE_KREAD;
920 if (ptep & PG_USER_MASK)
921 prot |= PAGE_UREAD;
922 if (pte & PG_DIRTY_MASK) {
923 /* only set write access if already dirty... otherwise wait
924 for dirty access */
925 if (ptep & PG_USER_MASK) {
926 if (ptep & PG_RW_MASK)
927 prot |= PAGE_UWRITE;
929 if (!(env->cr0 & CR0_WP_MASK) ||
930 (ptep & PG_RW_MASK))
931 prot |= PAGE_KWRITE;
934 do_mapping:
935 pte = pte & env->a20_mask;
937 /* Even if 4MB pages, we map only one 4KB page in the cache to
938 avoid filling it too fast */
939 page_offset = (addr & PAGE_MASK) & (page_size - 1);
940 paddr = (pte & PAGE_MASK) + page_offset;
941 vaddr = virt_addr + page_offset;
943 ret = tlb_set_page(s, vaddr, paddr, prot, is_softmmu);
944 return ret;
946 do_fault_protect:
947 error_code = PG_ERROR_P_MASK;
948 do_fault:
949 env->cr2 = addr;
950 env->error_code = (is_write << PG_ERROR_W_BIT) | error_code;
951 if (is_user)
952 env->error_code |= PG_ERROR_U_MASK;
953 return 1;
956 static void soft_tlb_fill(struct kqemu_state *s, unsigned long vaddr,
957 int is_write, int is_user)
959 long ret;
960 #ifdef PROFILE_SOFTMMU
961 int ti;
962 ti = getclock();
963 #endif
964 ret = cpu_x86_handle_mmu_fault(s, vaddr, is_write, is_user, 1);
965 #ifdef PROFILE_SOFTMMU
966 ti = getclock() - ti;
967 monitor_log(s, "soft_tlb_fill: w=%d u=%d addr=%p cycle=%d\n",
968 is_write, is_user, (void *)vaddr, ti);
969 #endif
970 if (ret == 1)
971 raise_exception(s, EXCP0E_PAGE);
972 else if (ret == 2)
973 raise_exception(s, KQEMU_RET_SOFTMMU);
976 static void *map_vaddr(struct kqemu_state *s, unsigned long addr,
977 int is_write, int is_user)
979 TLBEntry *e;
980 unsigned long taddr;
982 e = &s->soft_tlb[(addr >> PAGE_SHIFT) & (SOFT_TLB_SIZE - 1)];
983 redo:
984 if (e->vaddr[(is_user << 1) + is_write] != (addr & PAGE_MASK)) {
985 soft_tlb_fill(s, addr, is_write, is_user);
986 goto redo;
987 } else {
988 taddr = e->addend + addr;
990 return (void *)taddr;
993 uint32_t ldub_slow(struct kqemu_state *s, unsigned long addr,
994 int is_user)
996 TLBEntry *e;
997 uint32_t val;
998 unsigned long taddr;
1000 e = &s->soft_tlb[(addr >> PAGE_SHIFT) & (SOFT_TLB_SIZE - 1)];
1001 redo:
1002 if (unlikely(e->vaddr[(is_user << 1)] != (addr & PAGE_MASK))) {
1003 soft_tlb_fill(s, addr, 0, is_user);
1004 goto redo;
1005 } else {
1006 taddr = e->addend + addr;
1007 val = *(uint8_t *)taddr;
1009 return val;
1012 uint32_t lduw_slow(struct kqemu_state *s, unsigned long addr,
1013 int is_user)
1015 TLBEntry *e;
1016 uint32_t val;
1017 unsigned long taddr;
1019 e = &s->soft_tlb[(addr >> PAGE_SHIFT) & (SOFT_TLB_SIZE - 1)];
1020 redo:
1021 if (unlikely(e->vaddr[(is_user << 1)] != (addr & (PAGE_MASK | 1)))) {
1022 if (e->vaddr[(is_user << 1)] == (addr & PAGE_MASK)) {
1023 /* unaligned access */
1024 if (((addr + 1) & PAGE_MASK) == (addr & PAGE_MASK)) {
1025 goto access_ok;
1026 } else {
1027 uint32_t v0, v1;
1028 /* access spans two pages (rare case) */
1029 v0 = ldub_slow(s, addr, is_user);
1030 v1 = ldub_slow(s, addr + 1, is_user);
1031 val = v0 | (v1 << 8);
1033 } else {
1034 soft_tlb_fill(s, addr, 0, is_user);
1035 goto redo;
1037 } else {
1038 access_ok:
1039 taddr = e->addend + addr;
1040 val = *(uint16_t *)taddr;
1042 return val;
1045 uint32_t ldl_slow(struct kqemu_state *s, unsigned long addr,
1046 int is_user)
1048 TLBEntry *e;
1049 uint32_t val;
1050 unsigned long taddr;
1052 e = &s->soft_tlb[(addr >> PAGE_SHIFT) & (SOFT_TLB_SIZE - 1)];
1053 redo:
1054 if (unlikely(e->vaddr[(is_user << 1)] != (addr & (PAGE_MASK | 3)))) {
1055 if (e->vaddr[(is_user << 1)] == (addr & PAGE_MASK)) {
1056 /* unaligned access */
1057 if (((addr + 3) & PAGE_MASK) == (addr & PAGE_MASK)) {
1058 goto access_ok;
1059 } else {
1060 uint32_t v0, v1;
1061 int shift;
1062 /* access spans two pages (rare case) */
1063 shift = (addr & 3) * 8;
1064 addr &= ~3;
1065 v0 = ldl_slow(s, addr, is_user);
1066 v1 = ldl_slow(s, addr + 4, is_user);
1067 val = (v0 >> shift) | (v1 << (32 - shift));
1069 } else {
1070 soft_tlb_fill(s, addr, 0, is_user);
1071 goto redo;
1073 } else {
1074 access_ok:
1075 taddr = e->addend + addr;
1076 val = *(uint32_t *)taddr;
1078 return val;
1081 uint64_t ldq_slow(struct kqemu_state *s, unsigned long addr,
1082 int is_user)
1084 TLBEntry *e;
1085 uint64_t val;
1086 unsigned long taddr;
1088 e = &s->soft_tlb[(addr >> PAGE_SHIFT) & (SOFT_TLB_SIZE - 1)];
1089 redo:
1090 if (unlikely(e->vaddr[(is_user << 1)] != (addr & (PAGE_MASK | 7)))) {
1091 if (e->vaddr[(is_user << 1)] == (addr & PAGE_MASK)) {
1092 /* unaligned access */
1093 if (((addr + 7) & PAGE_MASK) == (addr & PAGE_MASK)) {
1094 goto access_ok;
1095 } else {
1096 uint64_t v0, v1;
1097 int shift;
1098 /* access spans two pages (rare case) */
1099 shift = (addr & 7) * 8;
1100 addr &= ~7;
1101 v0 = ldq_slow(s, addr, is_user);
1102 v1 = ldq_slow(s, addr + 8, is_user);
1103 val = (v0 >> shift) | (v1 << (64 - shift));
1105 } else {
1106 soft_tlb_fill(s, addr, 0, is_user);
1107 goto redo;
1109 } else {
1110 access_ok:
1111 taddr = e->addend + addr;
1112 val = *(uint64_t *)taddr;
1114 return val;
1117 void stb_slow(struct kqemu_state *s, unsigned long addr,
1118 uint32_t val, int is_user)
1120 TLBEntry *e;
1121 unsigned long taddr;
1123 e = &s->soft_tlb[(addr >> PAGE_SHIFT) & (SOFT_TLB_SIZE - 1)];
1124 redo:
1125 if (unlikely(e->vaddr[(is_user << 1) + 1] != (addr & PAGE_MASK))) {
1126 soft_tlb_fill(s, addr, 1, is_user);
1127 goto redo;
1128 } else {
1129 taddr = e->addend + addr;
1130 *(uint8_t *)taddr = val;
1134 void stw_slow(struct kqemu_state *s, unsigned long addr,
1135 uint32_t val, int is_user)
1137 TLBEntry *e;
1138 unsigned long taddr;
1140 e = &s->soft_tlb[(addr >> PAGE_SHIFT) & (SOFT_TLB_SIZE - 1)];
1141 redo:
1142 if (unlikely(e->vaddr[(is_user << 1) + 1] != (addr & (PAGE_MASK | 1)))) {
1143 if (e->vaddr[(is_user << 1) + 1] == (addr & PAGE_MASK)) {
1144 /* unaligned access */
1145 if (((addr + 1) & PAGE_MASK) == (addr & PAGE_MASK)) {
1146 goto access_ok;
1147 } else {
1148 /* access spans two pages (rare case) */
1149 stb_slow(s, addr, val, is_user);
1150 stb_slow(s, addr + 1, val >> 8, is_user);
1152 } else {
1153 soft_tlb_fill(s, addr, 1, is_user);
1154 goto redo;
1156 } else {
1157 access_ok:
1158 taddr = e->addend + addr;
1159 *(uint16_t *)taddr = val;
1163 void stl_slow(struct kqemu_state *s, unsigned long addr,
1164 uint32_t val, int is_user)
1166 TLBEntry *e;
1167 unsigned long taddr;
1169 e = &s->soft_tlb[(addr >> PAGE_SHIFT) & (SOFT_TLB_SIZE - 1)];
1170 redo:
1171 if (unlikely(e->vaddr[(is_user << 1) + 1] != (addr & (PAGE_MASK | 3)))) {
1172 if (e->vaddr[(is_user << 1) + 1] == (addr & PAGE_MASK)) {
1173 /* unaligned access */
1174 if (((addr + 3) & PAGE_MASK) == (addr & PAGE_MASK)) {
1175 goto access_ok;
1176 } else {
1177 /* access spans two pages (rare case) */
1178 stb_slow(s, addr, val, is_user);
1179 stb_slow(s, addr + 1, val >> 8, is_user);
1180 stb_slow(s, addr + 2, val >> 16, is_user);
1181 stb_slow(s, addr + 3, val >> 24, is_user);
1183 } else {
1184 soft_tlb_fill(s, addr, 1, is_user);
1185 goto redo;
1187 } else {
1188 access_ok:
1189 taddr = e->addend + addr;
1190 *(uint32_t *)taddr = val;
1194 void stq_slow(struct kqemu_state *s, unsigned long addr,
1195 uint64_t val, int is_user)
1197 TLBEntry *e;
1198 unsigned long taddr;
1200 e = &s->soft_tlb[(addr >> PAGE_SHIFT) & (SOFT_TLB_SIZE - 1)];
1201 redo:
1202 if (unlikely(e->vaddr[(is_user << 1) + 1] != (addr & (PAGE_MASK | 7)))) {
1203 if (e->vaddr[(is_user << 1) + 1] == (addr & PAGE_MASK)) {
1204 /* unaligned access */
1205 if (((addr + 7) & PAGE_MASK) == (addr & PAGE_MASK)) {
1206 goto access_ok;
1207 } else {
1208 /* access spans two pages (rare case) */
1209 stb_slow(s, addr, val, is_user);
1210 stb_slow(s, addr + 1, val >> 8, is_user);
1211 stb_slow(s, addr + 2, val >> 16, is_user);
1212 stb_slow(s, addr + 3, val >> 24, is_user);
1213 stb_slow(s, addr + 4, val >> 32, is_user);
1214 stb_slow(s, addr + 5, val >> 40, is_user);
1215 stb_slow(s, addr + 6, val >> 48, is_user);
1216 stb_slow(s, addr + 7, val >> 56, is_user);
1218 } else {
1219 soft_tlb_fill(s, addr, 1, is_user);
1220 goto redo;
1222 } else {
1223 access_ok:
1224 taddr = e->addend + addr;
1225 *(uint64_t *)taddr = val;
1229 extern unsigned long __start_mmu_ex_table;
1230 extern unsigned long __stop_mmu_ex_table;
1231 int sorted = 0;
1233 void lsort(unsigned long *tab, int n)
1235 int i, j;
1236 unsigned long tmp;
1238 for(i = 0; i < n - 1; i++) {
1239 for(j = i + 1; j < n;j++) {
1240 if (tab[i] > tab[j]) {
1241 tmp = tab[i];
1242 tab[i] = tab[j];
1243 tab[j] = tmp;
1247 #if 0
1248 for(i = 0; i < n - 1; i++) {
1249 if (tab[i] > tab[i + 1])
1250 asm volatile("ud2");
1252 #endif
1255 static int expected_monitor_exception(unsigned long pc)
1257 unsigned long *tab, v;
1258 int a, b, m;
1259 if (unlikely(!sorted)) {
1260 lsort(&__start_mmu_ex_table,
1261 &__stop_mmu_ex_table - &__start_mmu_ex_table);
1262 sorted = 1;
1265 tab = &__start_mmu_ex_table;
1266 a = 0;
1267 b = &__stop_mmu_ex_table - &__start_mmu_ex_table - 1;
1268 while (a <= b) {
1269 m = (a + b) >> 1;
1270 v = tab[m];
1271 if (v == pc)
1272 return 1;
1273 else if (v > pc) {
1274 b = m - 1;
1275 } else {
1276 a = m + 1;
1279 return 0;
1282 /* page fault */
1283 void kqemu_exception_0e(struct kqemu_state *s,
1284 struct kqemu_exception_regs regs)
1286 unsigned long address;
1287 int is_write, is_user;
1288 long ret;
1289 #ifdef PROFILE_INTERP2
1290 int64_t ti;
1291 #endif
1292 asm volatile ("mov %%cr2, %0" : "=r" (address));
1293 #ifdef PROFILE_INTERP2
1294 ti = getclock();
1295 #endif
1297 if ((regs.cs_sel & 3) != 3) {
1298 if (!expected_monitor_exception(regs.eip)) {
1299 /* exception in monitor space - we may accept it someday if it
1300 is a user access indicated as such */
1301 monitor_panic_regs(s, &regs,
1302 "Paging exception in monitor address space. CR2=%p\n",
1303 (void *)address);
1305 /* do not reload s->regs because we are already in interpreter */
1306 s->seg_cache_loaded = 1;
1307 } else {
1308 s->regs = &regs;
1309 s->seg_cache_loaded = 0;
1311 is_write = (regs.error_code >> 1) & 1;
1312 #ifdef PROFILE_INTERP2
1313 s->total_page_fault_count++;
1314 #endif
1315 /* see if the page is write protected -> mark it dirty if needed */
1316 is_user = (s->cpu_state.cpl == 3);
1317 if (is_write && (regs.error_code & 1)) {
1318 uint32_t ram_index, *ptep;
1319 struct kqemu_ram_page *rp;
1320 int dirty_mask;
1322 /* get the original writable flag */
1323 if (USE_PAE(s)) {
1324 uint64_t pte;
1325 ptep = (uint32_t *)mon_get_ptep_l3(s, is_user, address, 0);
1326 if (!ptep)
1327 goto fail;
1328 pte = *(uint64_t *)ptep;
1329 if (!(pte & PG_PRESENT_MASK))
1330 goto fail;
1331 if (!(pte & PG_ORIG_RW_MASK))
1332 goto fail;
1333 rp = find_ram_page_from_paddr(s, pte >> PAGE_SHIFT);
1334 } else {
1335 uint32_t pte;
1336 ptep = mon_get_ptep_l2(s, is_user, address, 0);
1337 if (!ptep)
1338 goto fail;
1339 pte = *ptep;
1340 if (!(pte & PG_PRESENT_MASK))
1341 goto fail;
1342 if (!(pte & PG_ORIG_RW_MASK))
1343 goto fail;
1344 rp = find_ram_page_from_paddr(s, pte >> PAGE_SHIFT);
1346 if (!rp)
1347 goto fail;
1348 ram_index = rp - s->ram_pages;
1349 /* cannot write directly on GDT/LDT pages or in pages where
1350 code was translated */
1351 /* XXX: should revalidate or interpret the code to go faster */
1352 #ifdef USE_SEG_GP
1353 dirty_mask = 0;
1354 if (s->cpu_state.cpl == 3)
1355 dirty_mask |= DT_DIRTY_FLAG;
1356 #else
1357 dirty_mask = DT_DIRTY_FLAG;
1358 #endif
1359 if ((s->ram_dirty[ram_index] & dirty_mask) != dirty_mask) {
1360 raise_exception(s, KQEMU_RET_SOFTMMU);
1362 /* code updates need to be signaled */
1363 if ((s->ram_dirty[ram_index] & CODE_DIRTY_FLAG) !=
1364 CODE_DIRTY_FLAG) {
1365 s->modified_ram_pages[s->cpu_state.nb_modified_ram_pages++] =
1366 ram_index << PAGE_SHIFT;
1367 /* too many modified pages: exit */
1368 if (s->cpu_state.nb_modified_ram_pages >=
1369 KQEMU_MAX_MODIFIED_RAM_PAGES)
1370 raise_exception(s, KQEMU_RET_SOFTMMU);
1373 /* set the page as RW and mark the corresponding ram page as
1374 dirty */
1375 s->ram_dirty[ram_index] = 0xff;
1376 *ptep |= PG_RW_MASK;
1377 asm volatile("invlpg %0" : : "m" (*(uint8_t *)address));
1378 return;
1379 fail: ;
1382 #ifdef PROFILE_INTERP2
1383 s->mmu_page_fault_count++;
1384 #endif
1385 /* see if it is an MMU fault */
1386 ret = cpu_x86_handle_mmu_fault(s, address, is_write, is_user, 0);
1387 switch(ret) {
1388 case 0:
1389 #ifdef PROFILE_INTERP2
1390 if ((regs.cs_sel & 3) != 3)
1391 s->tlb_interp_page_fault_count++;
1392 s->tlb_page_fault_count++;
1393 s->tlb_page_fault_cycles += (getclock() - ti);
1394 #endif
1395 break;
1396 case 1:
1397 #ifdef PROFILE_INTERP2
1398 s->mmu_page_fault_cycles += (getclock() - ti);
1399 #endif
1400 /* real MMU fault */
1401 raise_exception(s, EXCP0E_PAGE);
1402 case 2:
1403 default:
1404 #ifdef PROFILE_INTERP2
1405 s->mmu_page_fault_cycles += (getclock() - ti);
1406 #endif
1407 /* cannot map: I/O */
1408 raise_exception(s, KQEMU_RET_SOFTMMU);
1412 /* exit the virtual cpu by raising an exception */
1413 void raise_exception(struct kqemu_state *s, int intno)
1415 /* XXX: the exclusion of exception GPF is needed for correct
1416 Windows XP boot. I don't know the precise explanation yet. */
1417 if (s->cpu_state.user_only || (unsigned int)intno >= 0x20 ||
1418 intno == 0x0d) {
1419 /* exit the monitor if user only */
1420 profile_record(s);
1421 s->mon_req = MON_REQ_EXIT;
1422 s->arg0 = intno;
1423 profile_record(s);
1424 monitor2kernel1(s);
1425 } else {
1426 s->arg0 = intno;
1427 start_func(raise_exception_interp, s,
1428 s->stack_end - sizeof(struct kqemu_exception_regs));
1430 /* never returns */
1431 while (1);
1434 void __raise_exception_err(struct kqemu_state *s,
1435 int intno, int error_code)
1437 s->cpu_state.error_code = error_code;
1438 raise_exception(s, intno);
1441 void do_update_cr3(struct kqemu_state *s, unsigned long new_cr3)
1443 if (s->cpu_state.cr0 & CR0_PG_MASK) {
1444 tlb_flush(s, 1);
1445 /* indicate that all the pages must be flushed in user space */
1446 s->cpu_state.nb_pages_to_flush = KQEMU_FLUSH_ALL;
1448 s->cpu_state.cr3 = new_cr3;
1451 #define CR0_UPDATE_MASK (CR0_TS_MASK | CR0_MP_MASK | CR0_EM_MASK | CR0_AM_MASK)
1453 void do_update_cr0(struct kqemu_state *s, unsigned long new_cr0)
1455 if ((new_cr0 & ~CR0_UPDATE_MASK) !=
1456 (s->cpu_state.cr0 & ~CR0_UPDATE_MASK))
1457 raise_exception(s, KQEMU_RET_SOFTMMU);
1458 if ((new_cr0 & CR0_UPDATE_MASK) !=
1459 (s->cpu_state.cr0 & CR0_UPDATE_MASK)) {
1460 s->cpu_state.cr0 = new_cr0;
1461 update_host_cr0(s);
1465 #define CR4_UPDATE_MASK (CR4_TSD_MASK | CR4_OSFXSR_MASK | CR4_OSXMMEXCPT_MASK)
1467 void do_update_cr4(struct kqemu_state *s, unsigned long new_cr4)
1469 if ((new_cr4 & ~CR4_UPDATE_MASK) !=
1470 (s->cpu_state.cr4 & ~CR4_UPDATE_MASK))
1471 raise_exception(s, KQEMU_RET_SOFTMMU);
1472 if ((new_cr4 & CR4_UPDATE_MASK) !=
1473 (s->cpu_state.cr4 & CR4_UPDATE_MASK)) {
1474 s->cpu_state.cr4 = new_cr4;
1475 update_host_cr4(s);
1479 void do_invlpg(struct kqemu_state *s, unsigned long vaddr)
1481 tlb_flush_page(s, vaddr);
1482 if (s->cpu_state.nb_pages_to_flush >= KQEMU_MAX_PAGES_TO_FLUSH) {
1483 s->cpu_state.nb_pages_to_flush = KQEMU_FLUSH_ALL;
1484 } else {
1485 s->pages_to_flush[s->cpu_state.nb_pages_to_flush++] = vaddr;
1489 extern unsigned long __start_seg_ex_table;
1490 extern unsigned long __stop_seg_ex_table;
1492 static void handle_mon_exception(struct kqemu_state *s,
1493 struct kqemu_exception_regs *regs,
1494 int intno)
1496 unsigned long pc, *p;
1498 pc = regs->eip;
1499 for(p = &__start_seg_ex_table; p != &__stop_seg_ex_table; p++) {
1500 if (*p == pc) goto found;
1502 monitor_panic_regs(s, regs,
1503 "Unexpected exception 0x%02x in monitor space\n",
1504 intno);
1505 found:
1506 if (intno == 0x00) {
1507 /* division exception from interp */
1508 /* XXX: verify for fxsave/fxrstor */
1509 s->regs = &s->regs1;
1510 } else {
1511 /* Note: the exception state is reliable only for goto_user
1512 handling */
1513 s->regs = NULL;
1515 raise_exception_err(s, intno, regs->error_code);
1518 #ifdef PROFILE_INTERP_PC
1519 static void profile_interp_add(struct kqemu_state *s,
1520 unsigned long eip,
1521 int64_t cycles,
1522 int insn_count)
1524 int h, idx;
1525 ProfileInterpEntry *pe;
1527 h = (eip ^ (eip >> PROFILE_INTERP_PC_HASH_BITS) ^
1528 (eip >> (2 * PROFILE_INTERP_PC_HASH_BITS))) &
1529 (PROFILE_INTERP_PC_HASH_SIZE - 1);
1530 idx = s->profile_interp_hash_table[h];
1531 while (idx != 0) {
1532 pe = &s->profile_interp_entries[idx - 1];
1533 if (pe->eip == eip)
1534 goto found;
1535 idx = pe->next;
1537 /* not found */
1538 if (s->nb_profile_interp_entries >= (PROFILE_INTERP_PC_NB_ENTRIES - 1)) {
1539 /* too many entries : use last entry */
1540 if (s->nb_profile_interp_entries < PROFILE_INTERP_PC_NB_ENTRIES)
1541 s->nb_profile_interp_entries++;
1542 pe = &s->profile_interp_entries[PROFILE_INTERP_PC_NB_ENTRIES - 1];
1543 } else {
1544 /* add one more entry */
1545 pe = &s->profile_interp_entries[s->nb_profile_interp_entries++];
1546 pe->next = s->profile_interp_hash_table[h];
1547 s->profile_interp_hash_table[h] = s->nb_profile_interp_entries;
1548 pe->eip = eip;
1550 found:
1551 pe->count++;
1552 pe->cycles += cycles;
1553 pe->insn_count += insn_count;
1555 #endif
1557 static inline void kqemu_exception_interp(struct kqemu_state *s, int intno,
1558 struct kqemu_exception_regs *regs)
1560 #ifdef PROFILE_INTERP2
1561 int64_t ti0, ti1, ti2;
1562 int c1;
1563 unsigned long start_eip;
1564 ti0 = getclock();
1565 #endif
1566 if ((regs->cs_sel & 3) != 3)
1567 handle_mon_exception(s, regs, intno);
1569 profile_record(s);
1571 s->regs = regs;
1573 profile_record(s);
1574 update_seg_cache(s);
1575 #ifdef PROFILE_INTERP2
1576 ti1 = getclock();
1577 c1 = s->insn_count;
1578 start_eip = s->regs1.eip;
1579 #endif
1581 insn_interp(s);
1582 #ifdef PROFILE_INTERP2
1583 ti2 = getclock();
1584 s->exc_interp_count++;
1585 s->exc_seg_cycles += ti1 - ti0;
1586 s->exc_interp_cycles += ti2 - ti1;
1587 c1 -= s->insn_count;
1588 s->exc_insn_count += c1;
1589 if (c1 > s->exc_insn_count_max) {
1590 s->exc_insn_count_max = c1;
1591 s->exc_start_eip_max = start_eip;
1593 #ifdef PROFILE_INTERP_PC
1594 profile_interp_add(s, start_eip, ti2 - ti0, c1 + 1);
1595 #endif
1596 #endif
1599 /* XXX: remove L bit on x86_64 in legacy emulation ? */
1600 static void check_dt_entries(uint8_t *d, const uint8_t *s, int n)
1602 int i;
1603 uint32_t e1, e2;
1604 for(i = 0; i < n; i++) {
1605 e1 = ((uint32_t *)s)[0];
1606 e2 = ((uint32_t *)s)[1];
1607 if (!(e2 & DESC_S_MASK)) {
1608 /* not a segment: reset DPL to ensure it cannot be used
1609 from user space */
1610 e2 &= ~(3 << DESC_DPL_SHIFT);
1611 #ifndef USE_SEG_GP
1612 ((uint32_t *)d)[32768 * 0 + 0] = e1; /* CPL = 0 */
1613 ((uint32_t *)d)[32768 * 0 + 1] = e2;
1614 ((uint32_t *)d)[32768 * 1 + 0] = e1; /* CPL = 1 */
1615 ((uint32_t *)d)[32768 * 1 + 1] = e2;
1616 ((uint32_t *)d)[32768 * 2 + 0] = e1; /* CPL = 2 */
1617 ((uint32_t *)d)[32768 * 2 + 1] = e2;
1618 #endif
1619 ((uint32_t *)d)[32768 * (NB_DT_TABLES - 1) + 0] = e1; /* CPL = 3 */
1620 ((uint32_t *)d)[32768 * (NB_DT_TABLES - 1) + 1] = e2;
1621 } else if (unlikely(((e2 & (DESC_CS_MASK | DESC_C_MASK)) ==
1622 (DESC_CS_MASK | DESC_C_MASK)))) {
1623 /* conforming segment : no need to modify */
1624 #ifndef USE_SEG_GP
1625 ((uint32_t *)d)[32768 * 0 + 0] = e1; /* CPL = 0 */
1626 ((uint32_t *)d)[32768 * 0 + 1] = e2;
1627 ((uint32_t *)d)[32768 * 1 + 0] = e1; /* CPL = 1 */
1628 ((uint32_t *)d)[32768 * 1 + 1] = e2;
1629 ((uint32_t *)d)[32768 * 2 + 0] = e1; /* CPL = 2 */
1630 ((uint32_t *)d)[32768 * 2 + 1] = e2;
1631 #endif
1632 ((uint32_t *)d)[32768 * (NB_DT_TABLES - 1) + 0] = e1; /* CPL = 3 */
1633 ((uint32_t *)d)[32768 * (NB_DT_TABLES - 1) + 1] = e2;
1634 } else {
1635 #ifndef USE_SEG_GP
1636 int dpl;
1637 uint32_t e2tmp, e2dpl3;
1639 dpl = (e2 >> DESC_DPL_SHIFT) & 3;
1640 /* standard segment: need to patch the DPL so that
1641 if (DPL >= CPL) then DPL = 3
1643 e2dpl3 = e2 | (3 << DESC_DPL_SHIFT);
1644 ((uint32_t *)d)[32768 * 0 + 0] = e1; /* CPL = 0 */
1645 ((uint32_t *)d)[32768 * 0 + 1] = e2dpl3;
1647 e2tmp = e2;
1648 if (dpl >= 1)
1649 e2tmp = e2dpl3;
1650 ((uint32_t *)d)[32768 * 1 + 0] = e1; /* CPL = 1 */
1651 ((uint32_t *)d)[32768 * 1 + 1] = e2tmp;
1653 e2tmp = e2;
1654 if (dpl >= 2)
1655 e2tmp = e2dpl3;
1656 ((uint32_t *)d)[32768 * 2 + 0] = e1; /* CPL = 2 */
1657 ((uint32_t *)d)[32768 * 2 + 1] = e2tmp;
1658 #endif
1659 ((uint32_t *)d)[32768 * (NB_DT_TABLES - 1) + 0] = e1; /* CPL = 3 */
1660 ((uint32_t *)d)[32768 * (NB_DT_TABLES - 1) + 1] = e2;
1663 s += 8;
1664 d += 8;
1668 static void check_dt_entries_page(struct kqemu_state *s, int dt_type,
1669 int sel, int sel_end, const uint8_t *src)
1671 uint8_t *dt;
1672 int mon_sel_start, mon_sel_end, sel1, sel2;
1674 dt = (uint8_t *)(s->dt_table + (dt_type * 8192));
1675 if (dt_type == 0) {
1676 mon_sel_start = s->monitor_selector_base;
1677 mon_sel_end = s->monitor_selector_base + MONITOR_SEL_RANGE;
1678 sel1 = sel;
1679 while (sel1 < sel_end) {
1680 if (sel1 >= mon_sel_start && sel1 < mon_sel_end)
1681 sel1 = mon_sel_end;
1682 if (sel1 < mon_sel_start) {
1683 sel2 = mon_sel_start;
1684 if (sel2 > sel_end)
1685 sel2 = sel_end;
1686 } else {
1687 sel2 = sel_end;
1689 if (sel1 >= sel2)
1690 break;
1691 #ifdef DEBUG_DT_CACHE
1692 monitor_log(s, "check_dt: type=%d sel=%d-%d\n",
1693 dt_type, sel1, sel2);
1694 #endif
1695 check_dt_entries(dt + sel1,
1696 src + sel1 - sel, (sel2 - sel1) >> 3);
1697 sel1 = sel2;
1699 } else {
1700 #ifdef DEBUG_DT_CACHE
1701 monitor_log(s, "check_dt: type=%d sel=%d-%d\n",
1702 dt_type, sel, sel_end);
1703 #endif
1704 check_dt_entries(dt + sel, src, (sel_end - sel) >> 3);
1708 static void reset_dt_entries2(void *dt1, int n)
1710 uint32_t *dt = dt1;
1711 #ifndef USE_SEG_GP
1712 memset(dt + 32768 * 0, 0, n);
1713 memset(dt + 32768 * 1, 0, n);
1714 memset(dt + 32768 * 2, 0, n);
1715 #endif
1716 memset(dt + 32768 * (NB_DT_TABLES - 1), 0, n);
1719 static void reset_dt_entries(struct kqemu_state *s, int dt_type,
1720 int sel, int sel_end)
1722 uint8_t *dt;
1723 int mon_sel_start, mon_sel_end, sel1, sel2;
1725 dt = (uint8_t *)(s->dt_table + (dt_type * 8192));
1726 if (dt_type == 0) {
1727 mon_sel_start = s->monitor_selector_base;
1728 mon_sel_end = s->monitor_selector_base + MONITOR_SEL_RANGE;
1729 sel1 = sel;
1730 while (sel1 < sel_end) {
1731 if (sel1 >= mon_sel_start && sel1 < mon_sel_end)
1732 sel1 = mon_sel_end;
1733 if (sel1 < mon_sel_start) {
1734 sel2 = mon_sel_start;
1735 if (sel2 > sel_end)
1736 sel2 = sel_end;
1737 } else {
1738 sel2 = sel_end;
1740 if (sel1 >= sel2)
1741 break;
1742 #ifdef DEBUG_DT_CACHE
1743 monitor_log(s, "reset_dt: type=%d sel=%d-%d\n",
1744 dt_type, sel1, sel2);
1745 #endif
1746 reset_dt_entries2(dt + sel1, sel2 - sel1);
1747 sel1 = sel2;
1749 } else {
1750 #ifdef DEBUG_DT_CACHE
1751 monitor_log(s, "reset_dt: type=%d sel=%d-%d\n",
1752 dt_type, sel, sel_end);
1753 #endif
1754 reset_dt_entries2(dt + sel, sel_end - sel);
1758 /* Note: this function can raise an exception in case of MMU fault or
1759 unaligned DT table */
1760 static void update_dt_cache(struct kqemu_state *s, int dt_type)
1762 unsigned long base, dt_end, page_end, dt_ptr, ram_addr;
1763 uint32_t limit;
1764 uint8_t *ptr;
1765 int pindex, sel, sel_end, dt_changed, sel2;
1767 if (dt_type) {
1768 /* XXX: check the exact behaviour of zero LDT */
1769 if ((s->cpu_state.ldt.selector & 0xfffc) == 0) {
1770 base = 0;
1771 limit = 0;
1772 } else {
1773 base = s->cpu_state.ldt.base;
1774 limit = s->cpu_state.ldt.limit;
1776 } else {
1777 base = s->cpu_state.gdt.base;
1778 limit = s->cpu_state.gdt.limit;
1780 dt_changed = (base != s->dt_base[dt_type] ||
1781 limit != s->dt_limit[dt_type]);
1783 sel_end = (limit + 1) & ~7;
1784 dt_end = base + sel_end;
1785 if (dt_end < base || (base & 7) != 0)
1786 raise_exception(s, KQEMU_RET_SOFTMMU);
1788 pindex = 0;
1789 sel = 0;
1790 while (sel < sel_end) {
1791 dt_ptr = base + sel;
1792 page_end = (dt_ptr & PAGE_MASK) + PAGE_SIZE;
1793 if (page_end > dt_end)
1794 page_end = dt_end;
1795 sel2 = sel + (page_end - dt_ptr);
1796 ptr = map_vaddr(s, dt_ptr, 0, 0);
1797 ram_addr = ram_ptr_to_ram_addr(s, ptr);
1798 if (dt_changed ||
1799 s->dt_ram_addr[dt_type][pindex] != ram_addr ||
1800 ram_get_dirty(s, ram_addr, DT_DIRTY_FLAG)) {
1801 s->dt_ram_addr[dt_type][pindex] = ram_addr;
1802 check_dt_entries_page(s, dt_type, sel, sel2, ptr);
1803 ram_reset_dirty(s, ram_addr, DT_DIRTY_FLAG);
1805 sel = sel2;
1806 pindex++;
1809 /* reset the remaining DT entries up to the last limit */
1810 sel_end = (s->dt_limit[dt_type] + 1) & ~7;
1811 if (sel < sel_end)
1812 reset_dt_entries(s, dt_type, sel, sel_end);
1814 s->dt_base[dt_type] = base;
1815 s->dt_limit[dt_type] = limit;
1818 void update_gdt_ldt_cache(struct kqemu_state *s)
1820 update_dt_cache(s, 0);
1821 update_dt_cache(s, 1);
1824 void monitor_exec(struct kqemu_state *s)
1826 struct kqemu_cpu_state *env = &s->cpu_state;
1827 struct kqemu_exception_regs *r =
1828 (void *)(s->stack_end - sizeof(struct kqemu_exception_regs));
1829 #ifdef PROFILE_INTERP2
1830 int64_t ti = getclock();
1831 #endif
1832 update_host_cr0(s);
1834 update_host_cr4(s);
1836 restore_monitor_nexus_mapping(s);
1838 s->regs = NULL;
1840 /* if max_locked_ram_pages was modified because some instances
1841 were added, we unlock some pages here */
1842 unlock_pages(s);
1844 /* first we flush the pages if needed */
1845 if (env->nb_pages_to_flush != 0) {
1846 if (env->nb_pages_to_flush > KQEMU_MAX_PAGES_TO_FLUSH) {
1847 tlb_flush(s, 1);
1848 } else {
1849 int i;
1850 for(i = 0; i < env->nb_pages_to_flush; i++) {
1851 tlb_flush_page(s, s->pages_to_flush[i]);
1854 env->nb_pages_to_flush = 0;
1857 /* XXX: invalidate modified ram pages */
1858 env->nb_modified_ram_pages = 0;
1860 /* unmap pages corresponding to notdirty ram pages */
1861 if (env->nb_ram_pages_to_update != 0) {
1862 unsigned long ram_addr;
1863 int i;
1865 if (env->nb_ram_pages_to_update > KQEMU_MAX_RAM_PAGES_TO_UPDATE) {
1866 for(ram_addr = 0; ram_addr < s->ram_size; ram_addr += PAGE_SIZE) {
1867 if (!ram_is_dirty(s, ram_addr)) {
1868 ram_set_read_only(s, ram_addr);
1871 } else {
1872 for(i = 0; i < env->nb_ram_pages_to_update; i++) {
1873 ram_addr = s->ram_pages_to_update[i];
1874 if (ram_addr < s->ram_size &&
1875 !ram_is_dirty(s, ram_addr)) {
1876 ram_set_read_only(s, ram_addr);
1880 env->nb_ram_pages_to_update = 0;
1883 #ifdef USE_SEG_GP
1884 if (s->cpu_state.cpl == 3)
1885 update_gdt_ldt_cache(s);
1886 #else
1887 update_gdt_ldt_cache(s);
1888 #endif
1890 #ifdef PROFILE_INTERP2
1891 s->exec_init_cycles += (getclock() - ti);
1892 s->exec_init_count++;
1893 #endif
1895 /* since this is not costly, we ensure here that the CPU state is
1896 consistent with what we can handle */
1897 if (!(env->cr0 & CR0_PE_MASK) ||
1898 (env->eflags & VM_MASK)) {
1899 raise_exception(s, KQEMU_RET_SOFTMMU);
1902 r->eip = env->eip;
1903 r->eflags = compute_eflags_user(s, env->eflags);
1904 s->comm_page.virt_eflags = env->eflags & EFLAGS_MASK;
1905 r->cs_sel = env->segs[R_CS].selector | 3;
1906 r->ss_sel = env->segs[R_SS].selector | 3;
1908 r->eax = env->regs[R_EAX];
1909 r->ecx = env->regs[R_ECX];
1910 r->edx = env->regs[R_EDX];
1911 r->ebx = env->regs[R_EBX];
1912 r->esp = env->regs[R_ESP];
1913 r->ebp = env->regs[R_EBP];
1914 r->esi = env->regs[R_ESI];
1915 r->edi = env->regs[R_EDI];
1916 #ifdef __x86_64__
1917 r->r8 = env->regs[8];
1918 r->r9 = env->regs[9];
1919 r->r10 = env->regs[10];
1920 r->r11 = env->regs[11];
1921 r->r12 = env->regs[12];
1922 r->r13 = env->regs[13];
1923 r->r14 = env->regs[14];
1924 r->r15 = env->regs[15];
1925 #else
1926 r->ds_sel = env->segs[R_DS].selector;
1927 r->es_sel = env->segs[R_ES].selector;
1928 #endif
1930 update_seg_desc_caches(s);
1932 /* NOTE: exceptions can occur here */
1933 reload_segs(s);
1935 /* for consistency, we accept to start the interpreter here if
1936 needed */
1937 if (!(s->comm_page.virt_eflags & IF_MASK)) {
1938 s->regs = r;
1939 s->seg_cache_loaded = 1;
1940 s->insn_count = MAX_INSN_COUNT;
1941 insn_interp(s);
1944 goto_user(s, r);
1947 /* General Protection Fault. In all cases we need to interpret the
1948 code to know more */
1949 void kqemu_exception_0d(struct kqemu_state *s,
1950 struct kqemu_exception_regs regs)
1952 kqemu_exception_interp(s, 0x0d, &regs);
1955 /* illegal intruction. We need to interpret just for the syscall case */
1956 void kqemu_exception_06(struct kqemu_state *s,
1957 struct kqemu_exception_regs regs)
1959 kqemu_exception_interp(s, 0x06, &regs);
1962 /* Coproprocessor emulation fault. We handle here the fact that the
1963 FPU state can be temporarily stored in the host OS */
1964 void kqemu_exception_07(struct kqemu_state *s,
1965 struct kqemu_exception_regs regs)
1967 if ((regs.cs_sel & 3) != 3) {
1968 if (!expected_monitor_exception(regs.eip)) {
1969 monitor_panic_regs(s, &regs, "Unexpected exception 0x%02x in monitor space\n", 0x07);
1971 /* this can happen for fxsave/fxrstor instructions in the
1972 interpreter */
1973 s->seg_cache_loaded = 1;
1974 } else {
1975 s->seg_cache_loaded = 0;
1977 s->regs = &s->regs1;
1978 if (s->cpu_state.cr0 & (CR0_TS_MASK | CR0_EM_MASK)) {
1979 /* real FPU fault needed */
1980 raise_exception_err(s, EXCP07_PREX, 0);
1981 } else {
1982 /* the host needs to restore the FPU state for us */
1983 s->mon_req = MON_REQ_EXCEPTION;
1984 s->arg0 = 0x07;
1985 monitor2kernel1(s);
1989 /* single step/debug */
1990 void kqemu_exception_01(struct kqemu_state *s,
1991 struct kqemu_exception_regs regs)
1993 unsigned long dr6, val;
1995 asm volatile ("mov %%dr6, %0" : "=r" (dr6));
1996 /* Linux uses lazy dr7 clearing, so we must verify we are in this
1997 case */
1998 /* XXX: check that because TF should have the priority */
1999 if ((dr6 & 0xf) != 0 && !s->monitor_dr7)
2000 goto clear_dr7;
2002 if ((regs.cs_sel & 3) != 3)
2003 monitor_panic_regs(s, &regs, "Unexpected exception 0x%02x in monitor space\n", 0x07);
2005 s->regs = &regs;
2006 s->seg_cache_loaded = 0;
2007 /* update DR6 register */
2008 s->cpu_state.dr6 = dr6;
2009 raise_exception_err(s, EXCP01_SSTP, 0);
2010 clear_dr7:
2011 val = 0;
2012 asm volatile ("mov %0, %%dr7" : : "r" (val));
2015 #define DEFAULT_EXCEPTION(n) \
2016 void kqemu_exception_ ## n (struct kqemu_state *s, \
2017 struct kqemu_exception_regs regs) \
2019 if ((regs.cs_sel & 3) != 3)\
2020 handle_mon_exception(s, &regs, 0x ## n);\
2021 s->regs = &regs;\
2022 s->seg_cache_loaded = 0;\
2023 s->cpu_state.error_code = regs.error_code;\
2024 raise_exception(s, 0x ## n);\
2027 DEFAULT_EXCEPTION(00)
2028 DEFAULT_EXCEPTION(02)
2029 DEFAULT_EXCEPTION(03)
2030 DEFAULT_EXCEPTION(04)
2031 DEFAULT_EXCEPTION(05)
2032 DEFAULT_EXCEPTION(08)
2033 DEFAULT_EXCEPTION(09)
2034 DEFAULT_EXCEPTION(0a)
2035 DEFAULT_EXCEPTION(0b)
2036 DEFAULT_EXCEPTION(0c)
2037 DEFAULT_EXCEPTION(0f)
2038 DEFAULT_EXCEPTION(10)
2039 DEFAULT_EXCEPTION(11)
2040 DEFAULT_EXCEPTION(12)
2041 DEFAULT_EXCEPTION(13)
2043 void monitor_interrupt(struct kqemu_state *s, struct kqemu_exception_regs regs)
2045 int intno;
2046 #ifdef PROFILE_INTERP2
2047 int64_t ti = getclock();
2048 s->hw_interrupt_start_count++;
2049 #endif
2051 intno = regs.error_code;
2053 if ((regs.cs_sel & 3) != 3) {
2054 monitor_panic_regs(s, &regs, "Interrupt 0x%02x in monitor space\n",
2055 intno);
2058 s->regs = &regs;
2059 s->seg_cache_loaded = 0;
2060 /* execute the irq code in kernel space */
2061 s->mon_req = MON_REQ_IRQ;
2062 s->arg0 = intno;
2063 /* NOTE: if interrupting user code, the host kernel will schedule
2064 and eventually exit from the monitor_exec loop */
2065 monitor2kernel1(s);
2066 /* ... and come back to monitor space */
2068 #ifdef PROFILE_INTERP2
2069 s->hw_interrupt_count++;
2070 s->hw_interrupt_cycles += (getclock() - ti);
2071 #endif