4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
26 * sun4v domain services PRI driver
29 #include <sys/types.h>
31 #include <sys/errno.h>
36 #include <sys/ksynch.h>
37 #include <sys/modctl.h>
39 #include <sys/devops.h>
40 #include <sys/debug.h>
41 #include <sys/cmn_err.h>
43 #include <sys/sunddi.h>
45 #include <sys/hypervisor_api.h>
46 #include <sys/machsystm.h>
47 #include <sys/sysmacros.h>
49 #include <sys/bitmap.h>
50 #include <sys/ds_pri.h>
52 static uint_t ds_pri_debug
= 0;
53 #define DS_PRI_DBG if (ds_pri_debug) printf
55 #define DS_PRI_NAME "ds_pri"
59 #define DS_PRI_MAX_PRI_SIZE (64 * 1024)
61 #define DSIOC_TEST_REG 97
62 #define DSIOC_TEST_UNREG 98
63 #define DSIOC_TEST_DATA 99
65 struct ds_pri_test_data
{
70 struct ds_pri_test_data32
{
74 #endif /* TEST_HARNESS */
91 * The following are bit field flags. No service implies no DS PRI and
92 * no outstanding request.
95 DS_PRI_NO_SERVICE
= 0x0,
96 DS_PRI_HAS_SERVICE
= 0x1,
97 DS_PRI_REQUESTED
= 0x2,
101 struct ds_pri_state
{
109 ds_pri_flags_t state
;
111 ds_svc_hdl_t ds_pri_handle
;
115 uint64_t last_req_id
;
119 typedef struct ds_pri_state ds_pri_state_t
;
121 static void *ds_pri_statep
;
123 static void request_pri(ds_pri_state_t
*sp
);
124 static uint64_t ds_get_hv_pri(ds_pri_state_t
*sp
);
126 static int ds_pri_getinfo(dev_info_t
*, ddi_info_cmd_t
, void *, void **);
127 static int ds_pri_attach(dev_info_t
*, ddi_attach_cmd_t
);
128 static int ds_pri_detach(dev_info_t
*, ddi_detach_cmd_t
);
129 static int ds_pri_open(dev_t
*, int, int, cred_t
*);
130 static int ds_pri_close(dev_t
, int, int, cred_t
*);
131 static int ds_pri_read(dev_t
, struct uio
*, cred_t
*);
132 static int ds_pri_ioctl(dev_t
, int, intptr_t, int, cred_t
*, int *);
137 static void ds_pri_reg_handler(ds_cb_arg_t
, ds_ver_t
*, ds_svc_hdl_t
);
138 static void ds_pri_unreg_handler(ds_cb_arg_t arg
);
139 static void ds_pri_data_handler(ds_cb_arg_t arg
, void *buf
, size_t buflen
);
142 * PRI DS capability registration
145 static ds_ver_t ds_pri_ver_1_0
= { 1, 0 };
147 static ds_capability_t ds_pri_cap
= {
154 * PRI DS Client callback vector
156 static ds_clnt_ops_t ds_pri_ops
= {
157 ds_pri_reg_handler
, /* ds_reg_cb */
158 ds_pri_unreg_handler
, /* ds_unreg_cb */
159 ds_pri_data_handler
, /* ds_data_cb */
164 * DS PRI driver Ops Vector
166 static struct cb_ops ds_pri_cb_ops
= {
167 ds_pri_open
, /* cb_open */
168 ds_pri_close
, /* cb_close */
169 nodev
, /* cb_strategy */
170 nodev
, /* cb_print */
172 ds_pri_read
, /* cb_read */
173 nodev
, /* cb_write */
174 ds_pri_ioctl
, /* cb_ioctl */
175 nodev
, /* cb_devmap */
177 nodev
, /* cb_segmap */
178 nochpoll
, /* cb_chpoll */
179 ddi_prop_op
, /* cb_prop_op */
180 (struct streamtab
*)NULL
, /* cb_str */
181 D_MP
| D_64BIT
, /* cb_flag */
183 nodev
, /* cb_aread */
184 nodev
/* cb_awrite */
187 static struct dev_ops ds_pri_dev_ops
= {
188 DEVO_REV
, /* devo_rev */
190 ds_pri_getinfo
, /* devo_getinfo */
191 nulldev
, /* devo_identify */
192 nulldev
, /* devo_probe */
193 ds_pri_attach
, /* devo_attach */
194 ds_pri_detach
, /* devo_detach */
195 nodev
, /* devo_reset */
196 &ds_pri_cb_ops
, /* devo_cb_ops */
197 (struct bus_ops
*)NULL
, /* devo_bus_ops */
198 nulldev
, /* devo_power */
199 ddi_quiesce_not_needed
, /* devo_quiesce */
202 static struct modldrv modldrv
= {
204 "Domain Services PRI Driver",
208 static struct modlinkage modlinkage
= {
214 static boolean_t hsvc_pboot_available
= B_FALSE
;
215 static hsvc_info_t pboot_hsvc
= {
216 HSVC_REV_1
, NULL
, HSVC_GROUP_PBOOT
, 1, 0, NULL
223 uint64_t hsvc_pboot_minor
;
226 status
= hsvc_register(&pboot_hsvc
, &hsvc_pboot_minor
);
227 if (status
== H_EOK
) {
228 hsvc_pboot_available
= B_TRUE
;
230 DS_PRI_DBG("hypervisor services not negotiated "
231 "for group number: 0x%lx errorno: 0x%lx\n",
232 pboot_hsvc
.hsvc_group
, status
);
235 retval
= ddi_soft_state_init(&ds_pri_statep
,
236 sizeof (ds_pri_state_t
), 0);
240 retval
= mod_install(&modlinkage
);
242 ddi_soft_state_fini(&ds_pri_statep
);
251 _info(struct modinfo
*modinfop
)
253 return (mod_info(&modlinkage
, modinfop
));
262 if ((retval
= mod_remove(&modlinkage
)) != 0)
265 ddi_soft_state_fini(&ds_pri_statep
);
267 if (hsvc_pboot_available
)
268 (void) hsvc_unregister(&pboot_hsvc
);
276 ds_pri_getinfo(dev_info_t
*dip
, ddi_info_cmd_t cmd
, void *arg
, void **resultp
)
279 int retval
= DDI_FAILURE
;
281 ASSERT(resultp
!= NULL
);
284 case DDI_INFO_DEVT2DEVINFO
:
285 sp
= ddi_get_soft_state(ds_pri_statep
, getminor((dev_t
)arg
));
288 retval
= DDI_SUCCESS
;
293 case DDI_INFO_DEVT2INSTANCE
:
294 *resultp
= (void *)(uintptr_t)getminor((dev_t
)arg
);
295 retval
= DDI_SUCCESS
;
307 ds_pri_attach(dev_info_t
*dip
, ddi_attach_cmd_t cmd
)
319 return (DDI_SUCCESS
);
322 return (DDI_FAILURE
);
325 instance
= ddi_get_instance(dip
);
327 if (ddi_soft_state_zalloc(ds_pri_statep
, instance
) !=
329 cmn_err(CE_WARN
, "%s@%d: Unable to allocate state",
330 DS_PRI_NAME
, instance
);
331 return (DDI_FAILURE
);
333 sp
= ddi_get_soft_state(ds_pri_statep
, instance
);
335 mutex_init(&sp
->lock
, NULL
, MUTEX_DEFAULT
, NULL
);
336 cv_init(&sp
->cv
, NULL
, CV_DEFAULT
, NULL
);
338 if (ddi_create_minor_node(dip
, DS_PRI_NAME
, S_IFCHR
, instance
,
339 DDI_PSEUDO
, 0) != DDI_SUCCESS
) {
340 cmn_err(CE_WARN
, "%s@%d: Unable to create minor node",
341 DS_PRI_NAME
, instance
);
345 if (ds_pri_ops
.cb_arg
!= NULL
)
347 ds_pri_ops
.cb_arg
= dip
;
349 sp
->state
= DS_PRI_NO_SERVICE
;
351 /* Until the service registers the handle is invalid */
352 sp
->ds_pri_handle
= DS_INVALID_HDL
;
360 * See if we can get the static hv pri data. Static pri data
361 * is only available for privileged domains.
363 if (hsvc_pboot_available
) {
364 if ((status
= ds_get_hv_pri(sp
)) != 0) {
365 cmn_err(CE_NOTE
, "ds_get_hv_pri failed: 0x%lx", status
);
369 if ((rv
= ds_cap_init(&ds_pri_cap
, &ds_pri_ops
)) != 0) {
370 cmn_err(CE_NOTE
, "ds_cap_init failed: %d", rv
);
376 return (DDI_SUCCESS
);
380 kmem_free(sp
->ds_pri
, sp
->ds_pri_len
);
381 ddi_remove_minor_node(dip
, NULL
);
383 mutex_destroy(&sp
->lock
);
384 ddi_soft_state_free(ds_pri_statep
, instance
);
385 return (DDI_FAILURE
);
392 ds_pri_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
398 instance
= ddi_get_instance(dip
);
399 sp
= ddi_get_soft_state(ds_pri_statep
, instance
);
406 return (DDI_SUCCESS
);
409 return (DDI_FAILURE
);
412 /* This really shouldn't fail - but check anyway */
413 if ((rv
= ds_cap_fini(&ds_pri_cap
)) != 0) {
414 cmn_err(CE_WARN
, "ds_cap_fini failed: %d", rv
);
417 if (sp
!= NULL
&& sp
->ds_pri_len
!= 0)
418 kmem_free(sp
->ds_pri
, sp
->ds_pri_len
);
420 ds_pri_ops
.cb_arg
= NULL
;
422 ddi_remove_minor_node(dip
, NULL
);
424 mutex_destroy(&sp
->lock
);
425 ddi_soft_state_free(ds_pri_statep
, instance
);
427 return (DDI_SUCCESS
);
433 ds_pri_open(dev_t
*devp
, int flag
, int otyp
, cred_t
*credp
)
438 if (otyp
!= OTYP_CHR
)
441 instance
= getminor(*devp
);
442 sp
= ddi_get_soft_state(ds_pri_statep
, instance
);
446 mutex_enter(&sp
->lock
);
449 * Proceed if we have PRI data (possibly obtained from
450 * static HV PRI or last pushed DS PRI data update).
451 * If no PRI data and we have no DS PRI service then this
452 * means that PRI DS has never called the registration callback.
453 * A while loop is necessary as we might have been woken up
454 * prematurely, e.g., due to a debugger or "pstack" etc.
455 * Wait here and the callback will signal us when it has completed
458 if (!(sp
->state
& DS_PRI_HAS_PRI
)) {
459 while (!(sp
->state
& DS_PRI_HAS_SERVICE
)) {
460 if (cv_wait_sig(&sp
->cv
, &sp
->lock
) == 0) {
461 mutex_exit(&sp
->lock
);
468 mutex_exit(&sp
->lock
);
470 DS_PRI_DBG("ds_pri_open: state = 0x%x\n", sp
->state
);
478 ds_pri_close(dev_t dev
, int flag
, int otyp
, cred_t
*credp
)
483 if (otyp
!= OTYP_CHR
)
486 DS_PRI_DBG("ds_pri_close\n");
488 instance
= getminor(dev
);
489 if ((sp
= ddi_get_soft_state(ds_pri_statep
, instance
)) == NULL
)
492 mutex_enter(&sp
->lock
);
493 if (!(sp
->state
& DS_PRI_HAS_SERVICE
)) {
494 mutex_exit(&sp
->lock
);
498 if (--sp
->num_opens
> 0) {
499 mutex_exit(&sp
->lock
);
503 sp
->state
&= ~DS_PRI_REQUESTED
;
504 mutex_exit(&sp
->lock
);
511 ds_pri_read(dev_t dev
, struct uio
*uiop
, cred_t
*credp
)
518 offset_t off
= uiop
->uio_offset
;
520 instance
= getminor(dev
);
521 if ((sp
= ddi_get_soft_state(ds_pri_statep
, instance
)) == NULL
)
524 len
= uiop
->uio_resid
;
529 mutex_enter(&sp
->lock
);
531 DS_PRI_DBG("ds_pri_read: state = 0x%x\n", sp
->state
);
533 /* block or bail if there is no current PRI */
534 if (!(sp
->state
& DS_PRI_HAS_PRI
)) {
535 DS_PRI_DBG("ds_pri_read: no PRI held\n");
537 if (uiop
->uio_fmode
& (FNDELAY
| FNONBLOCK
)) {
538 mutex_exit(&sp
->lock
);
542 while (!(sp
->state
& DS_PRI_HAS_PRI
)) {
543 DS_PRI_DBG("ds_pri_read: state = 0x%x\n", sp
->state
);
545 if (cv_wait_sig(&sp
->cv
, &sp
->lock
) == 0) {
546 mutex_exit(&sp
->lock
);
552 if (len
> sp
->ds_pri_len
)
553 len
= sp
->ds_pri_len
;
556 mutex_exit(&sp
->lock
);
561 * We're supposed to move the data out to userland, but
562 * that can suspend because of page faults etc., and meanwhile
563 * other parts of this driver want to update the PRI buffer ...
564 * we could hold the data buffer locked with a flag etc.,
565 * but that's still a lock ... a simpler mechanism - if not quite
566 * as performance efficient is to simply clone here the part of
567 * the buffer we care about and then the original can be released
568 * for further updates while the uiomove continues.
571 tmpbufp
= kmem_alloc(len
, KM_SLEEP
);
572 bcopy(((caddr_t
)sp
->ds_pri
), tmpbufp
, len
);
573 mutex_exit(&sp
->lock
);
575 retval
= uiomove(tmpbufp
, len
, UIO_READ
, uiop
);
577 kmem_free(tmpbufp
, len
);
580 * restore uio_offset after uiomove since the driver
581 * does not support the concept of position.
583 uiop
->uio_offset
= off
;
591 ds_pri_ioctl(dev_t dev
, int cmd
, intptr_t arg
, int mode
, cred_t
*credp
,
597 instance
= getminor(dev
);
598 if ((sp
= ddi_get_soft_state(ds_pri_statep
, instance
)) == NULL
)
602 case DSPRI_GETINFO
: {
603 struct dspri_info info
;
609 * We are not guaranteed that ddi_copyout(9F) will read
610 * atomically anything larger than a byte. Therefore we
611 * must duplicate the size before copying it out to the user.
613 mutex_enter(&sp
->lock
);
616 if (sp
->state
& DS_PRI_HAS_PRI
) {
617 /* If we have a PRI simply return the info */
618 info
.size
= sp
->ds_pri_len
;
619 info
.token
= sp
->gencount
;
621 if (!(sp
->state
& DS_PRI_HAS_SERVICE
)) {
622 /* If we have no service return a nil response */
627 /* wait for something & check again */
628 if (cv_wait_sig(&sp
->cv
, &sp
->lock
) == 0) {
629 mutex_exit(&sp
->lock
);
634 DS_PRI_DBG("ds_pri_ioctl: DSPRI_GETINFO sz=0x%lx tok=0x%lx\n",
635 info
.size
, info
.token
);
636 mutex_exit(&sp
->lock
);
638 if (ddi_copyout(&info
, (void *)arg
, sizeof (info
), mode
) != 0)
646 if (ddi_copyin((void *)arg
, &gencount
, sizeof (gencount
),
650 mutex_enter(&sp
->lock
);
652 DS_PRI_DBG("ds_pri_ioctl: DSPRI_WAIT gen=0x%lx sp->gen=0x%lx\n",
653 gencount
, sp
->gencount
);
655 while ((sp
->state
& DS_PRI_HAS_PRI
) == 0 ||
656 gencount
== sp
->gencount
) {
657 if ((sp
->state
& DS_PRI_HAS_PRI
) == 0)
659 if (cv_wait_sig(&sp
->cv
, &sp
->lock
) == 0) {
660 mutex_exit(&sp
->lock
);
664 mutex_exit(&sp
->lock
);
675 /* assumes sp->lock is held when called */
677 request_pri(ds_pri_state_t
*sp
)
681 ASSERT(MUTEX_HELD(&sp
->lock
));
683 /* If a request is already pending we're done */
684 if (!(sp
->state
& DS_PRI_HAS_SERVICE
))
686 if (sp
->state
& DS_PRI_REQUESTED
)
689 /* If we have an old PRI - remove it */
690 if (sp
->state
& DS_PRI_HAS_PRI
) {
691 ASSERT(sp
->ds_pri_len
!= 0);
692 ASSERT(sp
->ds_pri
!= NULL
);
694 /* remove the old data if we have an outstanding request */
695 kmem_free(sp
->ds_pri
, sp
->ds_pri_len
);
698 sp
->state
&= ~DS_PRI_HAS_PRI
;
700 ASSERT(sp
->ds_pri
== NULL
);
701 ASSERT(sp
->ds_pri_len
== 0);
704 reqmsg
.hdr
.seq_num
= ++(sp
->req_id
);
705 reqmsg
.hdr
.type
= DS_PRI_REQUEST
;
707 DS_PRI_DBG("request_pri: request id 0x%lx\n", sp
->req_id
);
710 * Request consists of header only.
711 * We don't care about fail status for ds_send;
712 * if it does fail we will get an unregister callback
713 * from the DS framework and we handle the state change
716 (void) ds_cap_send(sp
->ds_pri_handle
, &reqmsg
, sizeof (reqmsg
.hdr
));
718 sp
->state
|= DS_PRI_REQUESTED
;
719 sp
->last_req_id
= sp
->req_id
;
727 ds_pri_reg_handler(ds_cb_arg_t arg
, ds_ver_t
*ver
, ds_svc_hdl_t hdl
)
729 dev_info_t
*dip
= arg
;
733 instance
= ddi_get_instance(dip
);
734 if ((sp
= ddi_get_soft_state(ds_pri_statep
, instance
)) == NULL
)
737 DS_PRI_DBG("ds_pri_reg_handler: registering handle 0x%lx for version "
738 "0x%x:0x%x\n", (uint64_t)hdl
, ver
->major
, ver
->minor
);
740 /* When the domain service comes up automatically update the state */
741 mutex_enter(&sp
->lock
);
743 ASSERT(sp
->ds_pri_handle
== DS_INVALID_HDL
);
744 sp
->ds_pri_handle
= hdl
;
746 ASSERT(!(sp
->state
& DS_PRI_HAS_SERVICE
));
747 sp
->state
|= DS_PRI_HAS_SERVICE
;
750 * Cannot request a PRI here, because the reg handler cannot
751 * do a DS send operation - we take care of this later.
752 * Static hv pri data might be available.
755 /* Wake up anyone waiting in open() */
756 cv_broadcast(&sp
->cv
);
758 mutex_exit(&sp
->lock
);
763 ds_pri_unreg_handler(ds_cb_arg_t arg
)
765 dev_info_t
*dip
= arg
;
769 instance
= ddi_get_instance(dip
);
770 if ((sp
= ddi_get_soft_state(ds_pri_statep
, instance
)) == NULL
)
773 DS_PRI_DBG("ds_pri_unreg_handler: un-registering ds_pri service\n");
775 mutex_enter(&sp
->lock
);
778 * Note that if the service goes offline, we don't
779 * free up the current PRI data at hand. It is assumed
780 * that PRI DS service will only push new update when
781 * it comes online. We mark the state to indicate no
782 * DS PRI service is available. The current PRI data if
783 * available is provided to the consumers.
785 sp
->ds_pri_handle
= DS_INVALID_HDL
;
786 sp
->state
&= ~DS_PRI_HAS_SERVICE
;
788 mutex_exit(&sp
->lock
);
793 ds_pri_data_handler(ds_cb_arg_t arg
, void *buf
, size_t buflen
)
795 dev_info_t
*dip
= arg
;
802 msgp
= (ds_pri_msg_t
*)buf
;
804 /* make sure the header is at least valid */
805 if (buflen
< sizeof (msgp
->hdr
))
808 DS_PRI_DBG("ds_pri_data_handler: msg buf len 0x%lx : type 0x%lx, "
809 "seqn 0x%lx\n", buflen
, msgp
->hdr
.type
, msgp
->hdr
.seq_num
);
811 instance
= ddi_get_instance(dip
);
812 if ((sp
= ddi_get_soft_state(ds_pri_statep
, instance
)) == NULL
)
815 mutex_enter(&sp
->lock
);
817 ASSERT(sp
->state
& DS_PRI_HAS_SERVICE
);
819 switch (msgp
->hdr
.type
) {
820 case DS_PRI_DATA
: /* in response to a request from us */
822 case DS_PRI_UPDATE
: /* aynch notification */
823 /* our default response to this is to request the PRI */
824 /* simply issue a request for the new PRI */
827 default: /* ignore garbage or unknown message types */
832 * If there is no pending PRI request, then we've received a
833 * bogus data message ... so ignore it.
836 if (!(sp
->state
& DS_PRI_REQUESTED
)) {
837 cmn_err(CE_WARN
, "Received DS pri data without request");
841 /* response to a request therefore old PRI must be gone */
842 ASSERT(!(sp
->state
& DS_PRI_HAS_PRI
));
843 ASSERT(sp
->ds_pri_len
== 0);
844 ASSERT(sp
->ds_pri
== NULL
);
846 /* response seq_num should match our request seq_num */
847 if (msgp
->hdr
.seq_num
!= sp
->last_req_id
) {
848 cmn_err(CE_WARN
, "Received DS pri data out of sequence with "
853 pri_size
= buflen
- sizeof (msgp
->hdr
);
855 cmn_err(CE_WARN
, "Received DS pri data of size 0");
858 data
= kmem_alloc(pri_size
, KM_SLEEP
);
860 sp
->ds_pri_len
= pri_size
;
861 bcopy(msgp
->data
, data
, sp
->ds_pri_len
);
862 sp
->state
&= ~DS_PRI_REQUESTED
;
863 sp
->state
|= DS_PRI_HAS_PRI
;
866 cv_broadcast(&sp
->cv
);
869 mutex_exit(&sp
->lock
);
873 * Routine to get static PRI data from the Hypervisor.
874 * If successful, this PRI data is the last known PRI
875 * data generated since the last poweron reset.
878 ds_get_hv_pri(ds_pri_state_t
*sp
)
884 caddr_t buf_va
= NULL
;
888 * Get pri buffer size by calling hcall with buffer size 0.
891 status
= hv_mach_pri((uint64_t)0, &pri_size
);
892 if (status
== H_ENOTSUPPORTED
|| status
== H_ENOACCESS
) {
894 * hv_mach_pri() is not supported on a guest domain.
895 * Unregister pboot API group to prevent failures.
897 (void) hsvc_unregister(&pboot_hsvc
);
898 hsvc_pboot_available
= B_FALSE
;
899 DS_PRI_DBG("ds_get_hv_pri: hv_mach_pri service is not "
900 "available. errorno: 0x%lx\n", status
);
902 } else if (pri_size
== 0) {
905 DS_PRI_DBG("ds_get_hv_pri: hv_mach_pri pri size: 0x%lx\n",
910 * contig_mem_alloc requires size to be a power of 2.
911 * Increase size to next power of 2 if necessary.
914 buf_size
= 1 << highbit(pri_size
);
915 DS_PRI_DBG("ds_get_hv_pri: buf_size = 0x%lx\n", buf_size
);
917 buf_va
= contig_mem_alloc(buf_size
);
921 buf_pa
= va_to_pa(buf_va
);
922 DS_PRI_DBG("ds_get_hv_pri: buf_pa 0x%lx\n", buf_pa
);
923 status
= hv_mach_pri(buf_pa
, &pri_size
);
924 DS_PRI_DBG("ds_get_hv_pri: hv_mach_pri status = 0x%lx\n", status
);
926 if (status
== H_EOK
) {
927 pri_data
= kmem_alloc(pri_size
, KM_SLEEP
);
928 sp
->ds_pri
= pri_data
;
929 sp
->ds_pri_len
= pri_size
;
930 bcopy(buf_va
, pri_data
, sp
->ds_pri_len
);
931 sp
->state
|= DS_PRI_HAS_PRI
;
935 contig_mem_free(buf_va
, buf_size
);