4 * Copyright (c) 2005 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <sys/types.h>
27 #include <sys/ioctl.h>
47 #include "kqemu/kqemu.h"
49 /* compatibility stuff */
50 #ifndef KQEMU_RET_SYSCALL
51 #define KQEMU_RET_SYSCALL 0x0300 /* syscall insn */
53 #ifndef KQEMU_MAX_RAM_PAGES_TO_UPDATE
54 #define KQEMU_MAX_RAM_PAGES_TO_UPDATE 512
55 #define KQEMU_RAM_PAGES_UPDATE_ALL (KQEMU_MAX_RAM_PAGES_TO_UPDATE + 1)
59 #define KQEMU_DEVICE "\\\\.\\kqemu"
61 #define KQEMU_DEVICE "/dev/kqemu"
65 #define KQEMU_INVALID_FD INVALID_HANDLE_VALUE
66 HANDLE kqemu_fd
= KQEMU_INVALID_FD
;
67 #define kqemu_closefd(x) CloseHandle(x)
69 #define KQEMU_INVALID_FD -1
70 int kqemu_fd
= KQEMU_INVALID_FD
;
71 #define kqemu_closefd(x) close(x)
74 int kqemu_allowed
= 1;
75 unsigned long *pages_to_flush
;
76 unsigned int nb_pages_to_flush
;
77 unsigned long *ram_pages_to_update
;
78 unsigned int nb_ram_pages_to_update
;
79 extern uint32_t **l1_phys_map
;
81 #define cpuid(index, eax, ebx, ecx, edx) \
82 asm volatile ("cpuid" \
83 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \
87 static int is_cpuid_supported(void)
92 static int is_cpuid_supported(void)
95 asm volatile ("pushf\n"
98 "xorl $0x00200000, %0\n"
103 : "=a" (v0
), "=d" (v1
)
110 static void kqemu_update_cpuid(CPUState
*env
)
112 int critical_features_mask
, features
;
113 uint32_t eax
, ebx
, ecx
, edx
;
115 /* the following features are kept identical on the host and
116 target cpus because they are important for user code. Strictly
117 speaking, only SSE really matters because the OS must support
118 it if the user code uses it. */
119 critical_features_mask
=
120 CPUID_CMOV
| CPUID_CX8
|
121 CPUID_FXSR
| CPUID_MMX
| CPUID_SSE
|
122 CPUID_SSE2
| CPUID_SEP
;
123 if (!is_cpuid_supported()) {
126 cpuid(1, eax
, ebx
, ecx
, edx
);
130 /* NOTE: on x86_64 CPUs, SYSENTER is not supported in
131 compatibility mode, so in order to have the best performances
132 it is better not to use it */
133 features
&= ~CPUID_SEP
;
135 env
->cpuid_features
= (env
->cpuid_features
& ~critical_features_mask
) |
136 (features
& critical_features_mask
);
137 /* XXX: we could update more of the target CPUID state so that the
138 non accelerated code sees exactly the same CPU features as the
142 int kqemu_init(CPUState
*env
)
144 struct kqemu_init init
;
154 kqemu_fd
= CreateFile(KQEMU_DEVICE
, GENERIC_WRITE
| GENERIC_READ
,
155 FILE_SHARE_READ
| FILE_SHARE_WRITE
,
156 NULL
, OPEN_EXISTING
, FILE_ATTRIBUTE_NORMAL
,
159 kqemu_fd
= open(KQEMU_DEVICE
, O_RDWR
);
161 if (kqemu_fd
== KQEMU_INVALID_FD
) {
162 fprintf(stderr
, "Could not open '%s' - QEMU acceleration layer not activated\n", KQEMU_DEVICE
);
167 DeviceIoControl(kqemu_fd
, KQEMU_GET_VERSION
, NULL
, 0,
168 &version
, sizeof(version
), &temp
, NULL
);
170 ioctl(kqemu_fd
, KQEMU_GET_VERSION
, &version
);
172 if (version
!= KQEMU_VERSION
) {
173 fprintf(stderr
, "Version mismatch between kqemu module and qemu (%08x %08x) - disabling kqemu use\n",
174 version
, KQEMU_VERSION
);
178 pages_to_flush
= qemu_vmalloc(KQEMU_MAX_PAGES_TO_FLUSH
*
179 sizeof(unsigned long));
183 ram_pages_to_update
= qemu_vmalloc(KQEMU_MAX_RAM_PAGES_TO_UPDATE
*
184 sizeof(unsigned long));
185 if (!ram_pages_to_update
)
188 init
.ram_base
= phys_ram_base
;
189 init
.ram_size
= phys_ram_size
;
190 init
.ram_dirty
= phys_ram_dirty
;
191 init
.phys_to_ram_map
= l1_phys_map
;
192 init
.pages_to_flush
= pages_to_flush
;
193 #if KQEMU_VERSION >= 0x010200
194 init
.ram_pages_to_update
= ram_pages_to_update
;
197 ret
= DeviceIoControl(kqemu_fd
, KQEMU_INIT
, &init
, sizeof(init
),
198 NULL
, 0, &temp
, NULL
) == TRUE
? 0 : -1;
200 ret
= ioctl(kqemu_fd
, KQEMU_INIT
, &init
);
203 fprintf(stderr
, "Error %d while initializing QEMU acceleration layer - disabling it for now\n", ret
);
205 kqemu_closefd(kqemu_fd
);
206 kqemu_fd
= KQEMU_INVALID_FD
;
209 kqemu_update_cpuid(env
);
210 env
->kqemu_enabled
= 1;
211 nb_pages_to_flush
= 0;
212 nb_ram_pages_to_update
= 0;
216 void kqemu_flush_page(CPUState
*env
, target_ulong addr
)
219 if (loglevel
& CPU_LOG_INT
) {
220 fprintf(logfile
, "kqemu_flush_page: addr=" TARGET_FMT_lx
"\n", addr
);
223 if (nb_pages_to_flush
>= KQEMU_MAX_PAGES_TO_FLUSH
)
224 nb_pages_to_flush
= KQEMU_FLUSH_ALL
;
226 pages_to_flush
[nb_pages_to_flush
++] = addr
;
229 void kqemu_flush(CPUState
*env
, int global
)
232 if (loglevel
& CPU_LOG_INT
) {
233 fprintf(logfile
, "kqemu_flush:\n");
236 nb_pages_to_flush
= KQEMU_FLUSH_ALL
;
239 void kqemu_set_notdirty(CPUState
*env
, ram_addr_t ram_addr
)
242 if (loglevel
& CPU_LOG_INT
) {
243 fprintf(logfile
, "kqemu_set_notdirty: addr=%08lx\n", ram_addr
);
246 /* we only track transitions to dirty state */
247 if (phys_ram_dirty
[ram_addr
>> TARGET_PAGE_BITS
] != 0xff)
249 if (nb_ram_pages_to_update
>= KQEMU_MAX_RAM_PAGES_TO_UPDATE
)
250 nb_ram_pages_to_update
= KQEMU_RAM_PAGES_UPDATE_ALL
;
252 ram_pages_to_update
[nb_ram_pages_to_update
++] = ram_addr
;
267 uint8_t fpregs1
[8 * 10];
283 uint8_t fpregs1
[8 * 16];
284 uint8_t xmm_regs
[16 * 16];
288 static struct fpxstate fpx1
__attribute__((aligned(16)));
290 static void restore_native_fp_frstor(CPUState
*env
)
293 struct fpstate fp1
, *fp
= &fp1
;
295 fp
->fpuc
= env
->fpuc
;
296 fp
->fpus
= (env
->fpus
& ~0x3800) | (env
->fpstt
& 0x7) << 11;
298 for (i
=7; i
>=0; i
--) {
300 if (env
->fptags
[i
]) {
303 /* the FPU automatically computes it */
308 for(i
= 0;i
< 8; i
++) {
309 memcpy(&fp
->fpregs1
[i
* 10], &env
->fpregs
[j
].d
, 10);
312 asm volatile ("frstor %0" : "=m" (*fp
));
315 static void save_native_fp_fsave(CPUState
*env
)
319 struct fpstate fp1
, *fp
= &fp1
;
321 asm volatile ("fsave %0" : : "m" (*fp
));
322 env
->fpuc
= fp
->fpuc
;
323 env
->fpstt
= (fp
->fpus
>> 11) & 7;
324 env
->fpus
= fp
->fpus
& ~0x3800;
326 for(i
= 0;i
< 8; i
++) {
327 env
->fptags
[i
] = ((fptag
& 3) == 3);
331 for(i
= 0;i
< 8; i
++) {
332 memcpy(&env
->fpregs
[j
].d
, &fp
->fpregs1
[i
* 10], 10);
335 /* we must restore the default rounding state */
336 fpuc
= 0x037f | (env
->fpuc
& (3 << 10));
337 asm volatile("fldcw %0" : : "m" (fpuc
));
340 static void restore_native_fp_fxrstor(CPUState
*env
)
342 struct fpxstate
*fp
= &fpx1
;
345 fp
->fpuc
= env
->fpuc
;
346 fp
->fpus
= (env
->fpus
& ~0x3800) | (env
->fpstt
& 0x7) << 11;
348 for(i
= 0; i
< 8; i
++)
349 fptag
|= (env
->fptags
[i
] << i
);
350 fp
->fptag
= fptag
^ 0xff;
353 for(i
= 0;i
< 8; i
++) {
354 memcpy(&fp
->fpregs1
[i
* 16], &env
->fpregs
[j
].d
, 10);
357 if (env
->cpuid_features
& CPUID_SSE
) {
358 fp
->mxcsr
= env
->mxcsr
;
359 /* XXX: check if DAZ is not available */
360 fp
->mxcsr_mask
= 0xffff;
361 memcpy(fp
->xmm_regs
, env
->xmm_regs
, CPU_NB_REGS
* 16);
363 asm volatile ("fxrstor %0" : "=m" (*fp
));
366 static void save_native_fp_fxsave(CPUState
*env
)
368 struct fpxstate
*fp
= &fpx1
;
372 asm volatile ("fxsave %0" : : "m" (*fp
));
373 env
->fpuc
= fp
->fpuc
;
374 env
->fpstt
= (fp
->fpus
>> 11) & 7;
375 env
->fpus
= fp
->fpus
& ~0x3800;
376 fptag
= fp
->fptag
^ 0xff;
377 for(i
= 0;i
< 8; i
++) {
378 env
->fptags
[i
] = (fptag
>> i
) & 1;
381 for(i
= 0;i
< 8; i
++) {
382 memcpy(&env
->fpregs
[j
].d
, &fp
->fpregs1
[i
* 16], 10);
385 if (env
->cpuid_features
& CPUID_SSE
) {
386 env
->mxcsr
= fp
->mxcsr
;
387 memcpy(env
->xmm_regs
, fp
->xmm_regs
, CPU_NB_REGS
* 16);
390 /* we must restore the default rounding state */
391 asm volatile ("fninit");
392 fpuc
= 0x037f | (env
->fpuc
& (3 << 10));
393 asm volatile("fldcw %0" : : "m" (fpuc
));
396 static int do_syscall(CPUState
*env
,
397 struct kqemu_cpu_state
*kenv
)
401 selector
= (env
->star
>> 32) & 0xffff;
403 if (env
->hflags
& HF_LMA_MASK
) {
404 env
->regs
[R_ECX
] = kenv
->next_eip
;
405 env
->regs
[11] = env
->eflags
;
407 cpu_x86_set_cpl(env
, 0);
408 cpu_x86_load_seg_cache(env
, R_CS
, selector
& 0xfffc,
410 DESC_G_MASK
| DESC_B_MASK
| DESC_P_MASK
|
412 DESC_CS_MASK
| DESC_R_MASK
| DESC_A_MASK
| DESC_L_MASK
);
413 cpu_x86_load_seg_cache(env
, R_SS
, (selector
+ 8) & 0xfffc,
415 DESC_G_MASK
| DESC_B_MASK
| DESC_P_MASK
|
417 DESC_W_MASK
| DESC_A_MASK
);
418 env
->eflags
&= ~env
->fmask
;
419 if (env
->hflags
& HF_CS64_MASK
)
420 env
->eip
= env
->lstar
;
422 env
->eip
= env
->cstar
;
426 env
->regs
[R_ECX
] = (uint32_t)kenv
->next_eip
;
428 cpu_x86_set_cpl(env
, 0);
429 cpu_x86_load_seg_cache(env
, R_CS
, selector
& 0xfffc,
431 DESC_G_MASK
| DESC_B_MASK
| DESC_P_MASK
|
433 DESC_CS_MASK
| DESC_R_MASK
| DESC_A_MASK
);
434 cpu_x86_load_seg_cache(env
, R_SS
, (selector
+ 8) & 0xfffc,
436 DESC_G_MASK
| DESC_B_MASK
| DESC_P_MASK
|
438 DESC_W_MASK
| DESC_A_MASK
);
439 env
->eflags
&= ~(IF_MASK
| RF_MASK
| VM_MASK
);
440 env
->eip
= (uint32_t)env
->star
;
447 #define PC_REC_SIZE 1
448 #define PC_REC_HASH_BITS 16
449 #define PC_REC_HASH_SIZE (1 << PC_REC_HASH_BITS)
451 typedef struct PCRecord
{
454 struct PCRecord
*next
;
457 PCRecord
*pc_rec_hash
[PC_REC_HASH_SIZE
];
460 void kqemu_record_pc(unsigned long pc
)
465 h
= pc
/ PC_REC_SIZE
;
466 h
= h
^ (h
>> PC_REC_HASH_BITS
);
467 h
&= (PC_REC_HASH_SIZE
- 1);
468 pr
= &pc_rec_hash
[h
];
479 r
= malloc(sizeof(PCRecord
));
487 int pc_rec_cmp(const void *p1
, const void *p2
)
489 PCRecord
*r1
= *(PCRecord
**)p1
;
490 PCRecord
*r2
= *(PCRecord
**)p2
;
491 if (r1
->count
< r2
->count
)
493 else if (r1
->count
== r2
->count
)
499 void kqemu_record_dump(void)
506 pr
= malloc(sizeof(PCRecord
*) * nb_pc_records
);
509 for(h
= 0; h
< PC_REC_HASH_SIZE
; h
++) {
510 for(r
= pc_rec_hash
[h
]; r
!= NULL
; r
= r
->next
) {
515 qsort(pr
, nb_pc_records
, sizeof(PCRecord
*), pc_rec_cmp
);
517 f
= fopen("/tmp/kqemu.stats", "w");
519 perror("/tmp/kqemu.stats");
522 fprintf(f
, "total: %lld\n", total
);
524 for(i
= 0; i
< nb_pc_records
; i
++) {
527 fprintf(f
, "%08lx: %lld %0.2f%% %0.2f%%\n",
530 (double)r
->count
/ (double)total
* 100.0,
531 (double)sum
/ (double)total
* 100.0);
537 void kqemu_record_dump(void)
542 int kqemu_cpu_exec(CPUState
*env
)
544 struct kqemu_cpu_state kcpu_state
, *kenv
= &kcpu_state
;
551 if (loglevel
& CPU_LOG_INT
) {
552 fprintf(logfile
, "kqemu: cpu_exec: enter\n");
553 cpu_dump_state(env
, logfile
, fprintf
, 0);
556 memcpy(kenv
->regs
, env
->regs
, sizeof(kenv
->regs
));
557 kenv
->eip
= env
->eip
;
558 kenv
->eflags
= env
->eflags
;
559 memcpy(&kenv
->segs
, &env
->segs
, sizeof(env
->segs
));
560 memcpy(&kenv
->ldt
, &env
->ldt
, sizeof(env
->ldt
));
561 memcpy(&kenv
->tr
, &env
->tr
, sizeof(env
->tr
));
562 memcpy(&kenv
->gdt
, &env
->gdt
, sizeof(env
->gdt
));
563 memcpy(&kenv
->idt
, &env
->idt
, sizeof(env
->idt
));
564 kenv
->cr0
= env
->cr
[0];
565 kenv
->cr2
= env
->cr
[2];
566 kenv
->cr3
= env
->cr
[3];
567 kenv
->cr4
= env
->cr
[4];
568 kenv
->a20_mask
= env
->a20_mask
;
569 #if KQEMU_VERSION >= 0x010100
570 kenv
->efer
= env
->efer
;
572 if (env
->dr
[7] & 0xff) {
573 kenv
->dr7
= env
->dr
[7];
574 kenv
->dr0
= env
->dr
[0];
575 kenv
->dr1
= env
->dr
[1];
576 kenv
->dr2
= env
->dr
[2];
577 kenv
->dr3
= env
->dr
[3];
581 kenv
->dr6
= env
->dr
[6];
583 kenv
->nb_pages_to_flush
= nb_pages_to_flush
;
584 nb_pages_to_flush
= 0;
585 #if KQEMU_VERSION >= 0x010200
587 kenv
->nb_ram_pages_to_update
= nb_ram_pages_to_update
;
589 nb_ram_pages_to_update
= 0;
591 if (!(kenv
->cr0
& CR0_TS_MASK
)) {
592 if (env
->cpuid_features
& CPUID_FXSR
)
593 restore_native_fp_fxrstor(env
);
595 restore_native_fp_frstor(env
);
599 if (DeviceIoControl(kqemu_fd
, KQEMU_EXEC
,
600 kenv
, sizeof(struct kqemu_cpu_state
),
601 kenv
, sizeof(struct kqemu_cpu_state
),
608 #if KQEMU_VERSION >= 0x010100
609 ioctl(kqemu_fd
, KQEMU_EXEC
, kenv
);
612 ret
= ioctl(kqemu_fd
, KQEMU_EXEC
, kenv
);
615 if (!(kenv
->cr0
& CR0_TS_MASK
)) {
616 if (env
->cpuid_features
& CPUID_FXSR
)
617 save_native_fp_fxsave(env
);
619 save_native_fp_fsave(env
);
622 memcpy(env
->regs
, kenv
->regs
, sizeof(env
->regs
));
623 env
->eip
= kenv
->eip
;
624 env
->eflags
= kenv
->eflags
;
625 memcpy(env
->segs
, kenv
->segs
, sizeof(env
->segs
));
627 /* no need to restore that */
628 memcpy(env
->ldt
, kenv
->ldt
, sizeof(env
->ldt
));
629 memcpy(env
->tr
, kenv
->tr
, sizeof(env
->tr
));
630 memcpy(env
->gdt
, kenv
->gdt
, sizeof(env
->gdt
));
631 memcpy(env
->idt
, kenv
->idt
, sizeof(env
->idt
));
632 env
->cr
[0] = kenv
->cr0
;
633 env
->cr
[3] = kenv
->cr3
;
634 env
->cr
[4] = kenv
->cr4
;
635 env
->a20_mask
= kenv
->a20_mask
;
637 env
->cr
[2] = kenv
->cr2
;
638 env
->dr
[6] = kenv
->dr6
;
640 #if KQEMU_VERSION >= 0x010200
641 if (kenv
->nb_ram_pages_to_update
> 0) {
642 cpu_tlb_update_dirty(env
);
646 /* restore the hidden flags */
648 unsigned int new_hflags
;
650 if ((env
->hflags
& HF_LMA_MASK
) &&
651 (env
->segs
[R_CS
].flags
& DESC_L_MASK
)) {
653 new_hflags
= HF_CS32_MASK
| HF_SS32_MASK
| HF_CS64_MASK
;
657 /* legacy / compatibility case */
658 new_hflags
= (env
->segs
[R_CS
].flags
& DESC_B_MASK
)
659 >> (DESC_B_SHIFT
- HF_CS32_SHIFT
);
660 new_hflags
|= (env
->segs
[R_SS
].flags
& DESC_B_MASK
)
661 >> (DESC_B_SHIFT
- HF_SS32_SHIFT
);
662 if (!(env
->cr
[0] & CR0_PE_MASK
) ||
663 (env
->eflags
& VM_MASK
) ||
664 !(env
->hflags
& HF_CS32_MASK
)) {
665 /* XXX: try to avoid this test. The problem comes from the
666 fact that is real mode or vm86 mode we only modify the
667 'base' and 'selector' fields of the segment cache to go
668 faster. A solution may be to force addseg to one in
670 new_hflags
|= HF_ADDSEG_MASK
;
672 new_hflags
|= ((env
->segs
[R_DS
].base
|
673 env
->segs
[R_ES
].base
|
674 env
->segs
[R_SS
].base
) != 0) <<
678 env
->hflags
= (env
->hflags
&
679 ~(HF_CS32_MASK
| HF_SS32_MASK
| HF_CS64_MASK
| HF_ADDSEG_MASK
)) |
684 if (loglevel
& CPU_LOG_INT
) {
685 fprintf(logfile
, "kqemu: kqemu_cpu_exec: ret=0x%x\n", ret
);
688 if (ret
== KQEMU_RET_SYSCALL
) {
689 /* syscall instruction */
690 return do_syscall(env
, kenv
);
692 if ((ret
& 0xff00) == KQEMU_RET_INT
) {
693 env
->exception_index
= ret
& 0xff;
695 env
->exception_is_int
= 1;
696 env
->exception_next_eip
= kenv
->next_eip
;
698 if (loglevel
& CPU_LOG_INT
) {
699 fprintf(logfile
, "kqemu: interrupt v=%02x:\n",
700 env
->exception_index
);
701 cpu_dump_state(env
, logfile
, fprintf
, 0);
705 } else if ((ret
& 0xff00) == KQEMU_RET_EXCEPTION
) {
706 env
->exception_index
= ret
& 0xff;
707 env
->error_code
= kenv
->error_code
;
708 env
->exception_is_int
= 0;
709 env
->exception_next_eip
= 0;
711 if (loglevel
& CPU_LOG_INT
) {
712 fprintf(logfile
, "kqemu: exception v=%02x e=%04x:\n",
713 env
->exception_index
, env
->error_code
);
714 cpu_dump_state(env
, logfile
, fprintf
, 0);
718 } else if (ret
== KQEMU_RET_INTR
) {
720 if (loglevel
& CPU_LOG_INT
) {
721 cpu_dump_state(env
, logfile
, fprintf
, 0);
725 } else if (ret
== KQEMU_RET_SOFTMMU
) {
727 kqemu_record_pc(env
->eip
+ env
->segs
[R_CS
].base
);
730 if (loglevel
& CPU_LOG_INT
) {
731 cpu_dump_state(env
, logfile
, fprintf
, 0);
736 cpu_dump_state(env
, stderr
, fprintf
, 0);
737 fprintf(stderr
, "Unsupported return value: 0x%x\n", ret
);
743 void kqemu_cpu_interrupt(CPUState
*env
)
745 #if defined(_WIN32) && KQEMU_VERSION >= 0x010101
746 /* cancelling the I/O request causes KQEMU to finish executing the
747 current block and successfully returning. */