2 * s390host.c -- hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
15 #include <linux/compiler.h>
16 #include <linux/err.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/module.h>
22 #include <linux/slab.h>
23 #include <linux/timer.h>
24 #include <asm/lowcore.h>
25 #include <asm/pgtable.h>
30 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
32 struct kvm_stats_debugfs_item debugfs_entries
[] = {
33 { "userspace_handled", VCPU_STAT(exit_userspace
) },
34 { "exit_null", VCPU_STAT(exit_null
) },
35 { "exit_validity", VCPU_STAT(exit_validity
) },
36 { "exit_stop_request", VCPU_STAT(exit_stop_request
) },
37 { "exit_external_request", VCPU_STAT(exit_external_request
) },
38 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt
) },
39 { "exit_instruction", VCPU_STAT(exit_instruction
) },
40 { "exit_program_interruption", VCPU_STAT(exit_program_interruption
) },
41 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program
) },
42 { "instruction_lctg", VCPU_STAT(instruction_lctg
) },
43 { "instruction_lctl", VCPU_STAT(instruction_lctl
) },
44 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal
) },
45 { "deliver_service_signal", VCPU_STAT(deliver_service_signal
) },
46 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt
) },
47 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal
) },
48 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal
) },
49 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal
) },
50 { "deliver_program_interruption", VCPU_STAT(deliver_program_int
) },
51 { "exit_wait_state", VCPU_STAT(exit_wait_state
) },
52 { "instruction_stidp", VCPU_STAT(instruction_stidp
) },
53 { "instruction_spx", VCPU_STAT(instruction_spx
) },
54 { "instruction_stpx", VCPU_STAT(instruction_stpx
) },
55 { "instruction_stap", VCPU_STAT(instruction_stap
) },
56 { "instruction_storage_key", VCPU_STAT(instruction_storage_key
) },
57 { "instruction_stsch", VCPU_STAT(instruction_stsch
) },
58 { "instruction_chsc", VCPU_STAT(instruction_chsc
) },
59 { "instruction_stsi", VCPU_STAT(instruction_stsi
) },
60 { "instruction_stfl", VCPU_STAT(instruction_stfl
) },
61 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense
) },
62 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency
) },
63 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop
) },
64 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch
) },
65 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix
) },
66 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart
) },
67 { "diagnose_44", VCPU_STAT(diagnose_44
) },
72 /* Section: not file related */
73 void kvm_arch_hardware_enable(void *garbage
)
75 /* every s390 is virtualization enabled ;-) */
78 void kvm_arch_hardware_disable(void *garbage
)
82 int kvm_arch_hardware_setup(void)
87 void kvm_arch_hardware_unsetup(void)
91 void kvm_arch_check_processor_compat(void *rtn
)
95 int kvm_arch_init(void *opaque
)
100 void kvm_arch_exit(void)
104 /* Section: device related */
105 long kvm_arch_dev_ioctl(struct file
*filp
,
106 unsigned int ioctl
, unsigned long arg
)
108 if (ioctl
== KVM_S390_ENABLE_SIE
)
109 return s390_enable_sie();
113 int kvm_dev_ioctl_check_extension(long ext
)
118 /* Section: vm related */
120 * Get (and clear) the dirty memory log for a memory slot.
122 int kvm_vm_ioctl_get_dirty_log(struct kvm
*kvm
,
123 struct kvm_dirty_log
*log
)
128 long kvm_arch_vm_ioctl(struct file
*filp
,
129 unsigned int ioctl
, unsigned long arg
)
131 struct kvm
*kvm
= filp
->private_data
;
132 void __user
*argp
= (void __user
*)arg
;
136 case KVM_S390_INTERRUPT
: {
137 struct kvm_s390_interrupt s390int
;
140 if (copy_from_user(&s390int
, argp
, sizeof(s390int
)))
142 r
= kvm_s390_inject_vm(kvm
, &s390int
);
152 struct kvm
*kvm_arch_create_vm(void)
158 rc
= s390_enable_sie();
163 kvm
= kzalloc(sizeof(struct kvm
), GFP_KERNEL
);
167 kvm
->arch
.sca
= (struct sca_block
*) get_zeroed_page(GFP_KERNEL
);
171 sprintf(debug_name
, "kvm-%u", current
->pid
);
173 kvm
->arch
.dbf
= debug_register(debug_name
, 8, 2, 8 * sizeof(long));
177 spin_lock_init(&kvm
->arch
.float_int
.lock
);
178 INIT_LIST_HEAD(&kvm
->arch
.float_int
.list
);
180 debug_register_view(kvm
->arch
.dbf
, &debug_sprintf_view
);
181 VM_EVENT(kvm
, 3, "%s", "vm created");
183 try_module_get(THIS_MODULE
);
187 free_page((unsigned long)(kvm
->arch
.sca
));
194 void kvm_arch_destroy_vm(struct kvm
*kvm
)
196 debug_unregister(kvm
->arch
.dbf
);
197 kvm_free_physmem(kvm
);
198 free_page((unsigned long)(kvm
->arch
.sca
));
200 module_put(THIS_MODULE
);
203 /* Section: vcpu related */
204 int kvm_arch_vcpu_init(struct kvm_vcpu
*vcpu
)
209 void kvm_arch_vcpu_uninit(struct kvm_vcpu
*vcpu
)
211 /* kvm common code refers to this, but does'nt call it */
215 void kvm_arch_vcpu_load(struct kvm_vcpu
*vcpu
, int cpu
)
217 save_fp_regs(&vcpu
->arch
.host_fpregs
);
218 save_access_regs(vcpu
->arch
.host_acrs
);
219 vcpu
->arch
.guest_fpregs
.fpc
&= FPC_VALID_MASK
;
220 restore_fp_regs(&vcpu
->arch
.guest_fpregs
);
221 restore_access_regs(vcpu
->arch
.guest_acrs
);
224 void kvm_arch_vcpu_put(struct kvm_vcpu
*vcpu
)
226 save_fp_regs(&vcpu
->arch
.guest_fpregs
);
227 save_access_regs(vcpu
->arch
.guest_acrs
);
228 restore_fp_regs(&vcpu
->arch
.host_fpregs
);
229 restore_access_regs(vcpu
->arch
.host_acrs
);
232 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu
*vcpu
)
234 /* this equals initial cpu reset in pop, but we don't switch to ESA */
235 vcpu
->arch
.sie_block
->gpsw
.mask
= 0UL;
236 vcpu
->arch
.sie_block
->gpsw
.addr
= 0UL;
237 vcpu
->arch
.sie_block
->prefix
= 0UL;
238 vcpu
->arch
.sie_block
->ihcpu
= 0xffff;
239 vcpu
->arch
.sie_block
->cputm
= 0UL;
240 vcpu
->arch
.sie_block
->ckc
= 0UL;
241 vcpu
->arch
.sie_block
->todpr
= 0;
242 memset(vcpu
->arch
.sie_block
->gcr
, 0, 16 * sizeof(__u64
));
243 vcpu
->arch
.sie_block
->gcr
[0] = 0xE0UL
;
244 vcpu
->arch
.sie_block
->gcr
[14] = 0xC2000000UL
;
245 vcpu
->arch
.guest_fpregs
.fpc
= 0;
246 asm volatile("lfpc %0" : : "Q" (vcpu
->arch
.guest_fpregs
.fpc
));
247 vcpu
->arch
.sie_block
->gbea
= 1;
250 int kvm_arch_vcpu_setup(struct kvm_vcpu
*vcpu
)
252 atomic_set(&vcpu
->arch
.sie_block
->cpuflags
, CPUSTAT_ZARCH
);
253 vcpu
->arch
.sie_block
->gmslm
= 0xffffffffffUL
;
254 vcpu
->arch
.sie_block
->gmsor
= 0x000000000000;
255 vcpu
->arch
.sie_block
->ecb
= 2;
256 vcpu
->arch
.sie_block
->eca
= 0xC1002001U
;
257 setup_timer(&vcpu
->arch
.ckc_timer
, kvm_s390_idle_wakeup
,
258 (unsigned long) vcpu
);
259 get_cpu_id(&vcpu
->arch
.cpu_id
);
260 vcpu
->arch
.cpu_id
.version
= 0xfe;
264 struct kvm_vcpu
*kvm_arch_vcpu_create(struct kvm
*kvm
,
267 struct kvm_vcpu
*vcpu
= kzalloc(sizeof(struct kvm_vcpu
), GFP_KERNEL
);
273 vcpu
->arch
.sie_block
= (struct sie_block
*) get_zeroed_page(GFP_KERNEL
);
275 if (!vcpu
->arch
.sie_block
)
278 vcpu
->arch
.sie_block
->icpua
= id
;
279 BUG_ON(!kvm
->arch
.sca
);
280 BUG_ON(kvm
->arch
.sca
->cpu
[id
].sda
);
281 kvm
->arch
.sca
->cpu
[id
].sda
= (__u64
) vcpu
->arch
.sie_block
;
282 vcpu
->arch
.sie_block
->scaoh
= (__u32
)(((__u64
)kvm
->arch
.sca
) >> 32);
283 vcpu
->arch
.sie_block
->scaol
= (__u32
)(__u64
)kvm
->arch
.sca
;
285 spin_lock_init(&vcpu
->arch
.local_int
.lock
);
286 INIT_LIST_HEAD(&vcpu
->arch
.local_int
.list
);
287 vcpu
->arch
.local_int
.float_int
= &kvm
->arch
.float_int
;
288 spin_lock_bh(&kvm
->arch
.float_int
.lock
);
289 kvm
->arch
.float_int
.local_int
[id
] = &vcpu
->arch
.local_int
;
290 init_waitqueue_head(&vcpu
->arch
.local_int
.wq
);
291 vcpu
->arch
.local_int
.cpuflags
= &vcpu
->arch
.sie_block
->cpuflags
;
292 spin_unlock_bh(&kvm
->arch
.float_int
.lock
);
294 rc
= kvm_vcpu_init(vcpu
, kvm
, id
);
297 VM_EVENT(kvm
, 3, "create cpu %d at %p, sie block at %p", id
, vcpu
,
298 vcpu
->arch
.sie_block
);
300 try_module_get(THIS_MODULE
);
309 void kvm_arch_vcpu_destroy(struct kvm_vcpu
*vcpu
)
311 VCPU_EVENT(vcpu
, 3, "%s", "destroy cpu");
312 free_page((unsigned long)(vcpu
->arch
.sie_block
));
314 module_put(THIS_MODULE
);
317 int kvm_arch_vcpu_runnable(struct kvm_vcpu
*vcpu
)
319 /* kvm common code refers to this, but never calls it */
324 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu
*vcpu
)
327 kvm_s390_vcpu_initial_reset(vcpu
);
332 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu
*vcpu
, struct kvm_regs
*regs
)
335 memcpy(&vcpu
->arch
.guest_gprs
, ®s
->gprs
, sizeof(regs
->gprs
));
340 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu
*vcpu
, struct kvm_regs
*regs
)
343 memcpy(®s
->gprs
, &vcpu
->arch
.guest_gprs
, sizeof(regs
->gprs
));
348 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu
*vcpu
,
349 struct kvm_sregs
*sregs
)
352 memcpy(&vcpu
->arch
.guest_acrs
, &sregs
->acrs
, sizeof(sregs
->acrs
));
353 memcpy(&vcpu
->arch
.sie_block
->gcr
, &sregs
->crs
, sizeof(sregs
->crs
));
358 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu
*vcpu
,
359 struct kvm_sregs
*sregs
)
362 memcpy(&sregs
->acrs
, &vcpu
->arch
.guest_acrs
, sizeof(sregs
->acrs
));
363 memcpy(&sregs
->crs
, &vcpu
->arch
.sie_block
->gcr
, sizeof(sregs
->crs
));
368 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu
*vcpu
, struct kvm_fpu
*fpu
)
371 memcpy(&vcpu
->arch
.guest_fpregs
.fprs
, &fpu
->fprs
, sizeof(fpu
->fprs
));
372 vcpu
->arch
.guest_fpregs
.fpc
= fpu
->fpc
;
377 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu
*vcpu
, struct kvm_fpu
*fpu
)
380 memcpy(&fpu
->fprs
, &vcpu
->arch
.guest_fpregs
.fprs
, sizeof(fpu
->fprs
));
381 fpu
->fpc
= vcpu
->arch
.guest_fpregs
.fpc
;
386 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu
*vcpu
, psw_t psw
)
391 if (atomic_read(&vcpu
->arch
.sie_block
->cpuflags
) & CPUSTAT_RUNNING
)
394 vcpu
->arch
.sie_block
->gpsw
= psw
;
399 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu
*vcpu
,
400 struct kvm_translation
*tr
)
402 return -EINVAL
; /* not implemented yet */
405 int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu
*vcpu
,
406 struct kvm_debug_guest
*dbg
)
408 return -EINVAL
; /* not implemented yet */
411 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu
*vcpu
,
412 struct kvm_mp_state
*mp_state
)
414 return -EINVAL
; /* not implemented yet */
417 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu
*vcpu
,
418 struct kvm_mp_state
*mp_state
)
420 return -EINVAL
; /* not implemented yet */
423 extern void s390_handle_mcck(void);
425 static void __vcpu_run(struct kvm_vcpu
*vcpu
)
427 memcpy(&vcpu
->arch
.sie_block
->gg14
, &vcpu
->arch
.guest_gprs
[14], 16);
432 if (test_thread_flag(TIF_MCCK_PENDING
))
435 kvm_s390_deliver_pending_interrupts(vcpu
);
437 vcpu
->arch
.sie_block
->icptcode
= 0;
441 VCPU_EVENT(vcpu
, 6, "entering sie flags %x",
442 atomic_read(&vcpu
->arch
.sie_block
->cpuflags
));
443 if (sie64a(vcpu
->arch
.sie_block
, vcpu
->arch
.guest_gprs
)) {
444 VCPU_EVENT(vcpu
, 3, "%s", "fault in sie instruction");
445 kvm_s390_inject_program_int(vcpu
, PGM_ADDRESSING
);
447 VCPU_EVENT(vcpu
, 6, "exit sie icptcode %d",
448 vcpu
->arch
.sie_block
->icptcode
);
453 memcpy(&vcpu
->arch
.guest_gprs
[14], &vcpu
->arch
.sie_block
->gg14
, 16);
456 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu
*vcpu
, struct kvm_run
*kvm_run
)
463 if (vcpu
->sigset_active
)
464 sigprocmask(SIG_SETMASK
, &vcpu
->sigset
, &sigsaved
);
466 atomic_set_mask(CPUSTAT_RUNNING
, &vcpu
->arch
.sie_block
->cpuflags
);
468 BUG_ON(vcpu
->kvm
->arch
.float_int
.local_int
[vcpu
->vcpu_id
] == NULL
);
470 switch (kvm_run
->exit_reason
) {
471 case KVM_EXIT_S390_SIEIC
:
472 vcpu
->arch
.sie_block
->gpsw
.mask
= kvm_run
->s390_sieic
.mask
;
473 vcpu
->arch
.sie_block
->gpsw
.addr
= kvm_run
->s390_sieic
.addr
;
475 case KVM_EXIT_UNKNOWN
:
476 case KVM_EXIT_S390_RESET
:
486 rc
= kvm_handle_sie_intercept(vcpu
);
487 } while (!signal_pending(current
) && !rc
);
489 if (signal_pending(current
) && !rc
)
492 if (rc
== -ENOTSUPP
) {
493 /* intercept cannot be handled in-kernel, prepare kvm-run */
494 kvm_run
->exit_reason
= KVM_EXIT_S390_SIEIC
;
495 kvm_run
->s390_sieic
.icptcode
= vcpu
->arch
.sie_block
->icptcode
;
496 kvm_run
->s390_sieic
.mask
= vcpu
->arch
.sie_block
->gpsw
.mask
;
497 kvm_run
->s390_sieic
.addr
= vcpu
->arch
.sie_block
->gpsw
.addr
;
498 kvm_run
->s390_sieic
.ipa
= vcpu
->arch
.sie_block
->ipa
;
499 kvm_run
->s390_sieic
.ipb
= vcpu
->arch
.sie_block
->ipb
;
503 if (rc
== -EREMOTE
) {
504 /* intercept was handled, but userspace support is needed
505 * kvm_run has been prepared by the handler */
509 if (vcpu
->sigset_active
)
510 sigprocmask(SIG_SETMASK
, &sigsaved
, NULL
);
514 vcpu
->stat
.exit_userspace
++;
518 static int __guestcopy(struct kvm_vcpu
*vcpu
, u64 guestdest
, const void *from
,
519 unsigned long n
, int prefix
)
522 return copy_to_guest(vcpu
, guestdest
, from
, n
);
524 return copy_to_guest_absolute(vcpu
, guestdest
, from
, n
);
528 * store status at address
529 * we use have two special cases:
530 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
531 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
533 int __kvm_s390_vcpu_store_status(struct kvm_vcpu
*vcpu
, unsigned long addr
)
535 const unsigned char archmode
= 1;
538 if (addr
== KVM_S390_STORE_STATUS_NOADDR
) {
539 if (copy_to_guest_absolute(vcpu
, 163ul, &archmode
, 1))
541 addr
= SAVE_AREA_BASE
;
543 } else if (addr
== KVM_S390_STORE_STATUS_PREFIXED
) {
544 if (copy_to_guest(vcpu
, 163ul, &archmode
, 1))
546 addr
= SAVE_AREA_BASE
;
551 if (__guestcopy(vcpu
, addr
+ offsetof(struct save_area_s390x
, fp_regs
),
552 vcpu
->arch
.guest_fpregs
.fprs
, 128, prefix
))
555 if (__guestcopy(vcpu
, addr
+ offsetof(struct save_area_s390x
, gp_regs
),
556 vcpu
->arch
.guest_gprs
, 128, prefix
))
559 if (__guestcopy(vcpu
, addr
+ offsetof(struct save_area_s390x
, psw
),
560 &vcpu
->arch
.sie_block
->gpsw
, 16, prefix
))
563 if (__guestcopy(vcpu
, addr
+ offsetof(struct save_area_s390x
, pref_reg
),
564 &vcpu
->arch
.sie_block
->prefix
, 4, prefix
))
567 if (__guestcopy(vcpu
,
568 addr
+ offsetof(struct save_area_s390x
, fp_ctrl_reg
),
569 &vcpu
->arch
.guest_fpregs
.fpc
, 4, prefix
))
572 if (__guestcopy(vcpu
, addr
+ offsetof(struct save_area_s390x
, tod_reg
),
573 &vcpu
->arch
.sie_block
->todpr
, 4, prefix
))
576 if (__guestcopy(vcpu
, addr
+ offsetof(struct save_area_s390x
, timer
),
577 &vcpu
->arch
.sie_block
->cputm
, 8, prefix
))
580 if (__guestcopy(vcpu
, addr
+ offsetof(struct save_area_s390x
, clk_cmp
),
581 &vcpu
->arch
.sie_block
->ckc
, 8, prefix
))
584 if (__guestcopy(vcpu
, addr
+ offsetof(struct save_area_s390x
, acc_regs
),
585 &vcpu
->arch
.guest_acrs
, 64, prefix
))
588 if (__guestcopy(vcpu
,
589 addr
+ offsetof(struct save_area_s390x
, ctrl_regs
),
590 &vcpu
->arch
.sie_block
->gcr
, 128, prefix
))
595 static int kvm_s390_vcpu_store_status(struct kvm_vcpu
*vcpu
, unsigned long addr
)
600 rc
= __kvm_s390_vcpu_store_status(vcpu
, addr
);
605 long kvm_arch_vcpu_ioctl(struct file
*filp
,
606 unsigned int ioctl
, unsigned long arg
)
608 struct kvm_vcpu
*vcpu
= filp
->private_data
;
609 void __user
*argp
= (void __user
*)arg
;
612 case KVM_S390_INTERRUPT
: {
613 struct kvm_s390_interrupt s390int
;
615 if (copy_from_user(&s390int
, argp
, sizeof(s390int
)))
617 return kvm_s390_inject_vcpu(vcpu
, &s390int
);
619 case KVM_S390_STORE_STATUS
:
620 return kvm_s390_vcpu_store_status(vcpu
, arg
);
621 case KVM_S390_SET_INITIAL_PSW
: {
624 if (copy_from_user(&psw
, argp
, sizeof(psw
)))
626 return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu
, psw
);
628 case KVM_S390_INITIAL_RESET
:
629 return kvm_arch_vcpu_ioctl_initial_reset(vcpu
);
636 /* Section: memory related */
637 int kvm_arch_set_memory_region(struct kvm
*kvm
,
638 struct kvm_userspace_memory_region
*mem
,
639 struct kvm_memory_slot old
,
642 /* A few sanity checks. We can have exactly one memory slot which has
643 to start at guest virtual zero and which has to be located at a
644 page boundary in userland and which has to end at a page boundary.
645 The memory in userland is ok to be fragmented into various different
646 vmas. It is okay to mmap() and munmap() stuff in this slot after
647 doing this call at any time */
652 if (mem
->guest_phys_addr
)
655 if (mem
->userspace_addr
& (PAGE_SIZE
- 1))
658 if (mem
->memory_size
& (PAGE_SIZE
- 1))
661 kvm
->arch
.guest_origin
= mem
->userspace_addr
;
662 kvm
->arch
.guest_memsize
= mem
->memory_size
;
664 /* FIXME: we do want to interrupt running CPUs and update their memory
665 configuration now to avoid race conditions. But hey, changing the
666 memory layout while virtual CPUs are running is usually bad
667 programming practice. */
672 gfn_t
unalias_gfn(struct kvm
*kvm
, gfn_t gfn
)
677 static int __init
kvm_s390_init(void)
679 return kvm_init(NULL
, sizeof(struct kvm_vcpu
), THIS_MODULE
);
682 static void __exit
kvm_s390_exit(void)
687 module_init(kvm_s390_init
);
688 module_exit(kvm_s390_exit
);