2 * (c) 2017 Stefano Stabellini <stefano@aporeto.com>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
15 #include <linux/inet.h>
16 #include <linux/kthread.h>
17 #include <linux/list.h>
18 #include <linux/radix-tree.h>
19 #include <linux/module.h>
20 #include <linux/semaphore.h>
21 #include <linux/wait.h>
23 #include <net/inet_common.h>
24 #include <net/inet_connection_sock.h>
25 #include <net/request_sock.h>
27 #include <xen/events.h>
28 #include <xen/grant_table.h>
30 #include <xen/xenbus.h>
31 #include <xen/interface/io/pvcalls.h>
33 #define PVCALLS_VERSIONS "1"
34 #define MAX_RING_ORDER XENBUS_MAX_RING_GRANT_ORDER
36 struct pvcalls_back_global
{
37 struct list_head frontends
;
38 struct semaphore frontends_lock
;
39 } pvcalls_back_global
;
42 * Per-frontend data structure. It contains pointers to the command
43 * ring, its event channel, a list of active sockets and a tree of
46 struct pvcalls_fedata
{
47 struct list_head list
;
48 struct xenbus_device
*dev
;
49 struct xen_pvcalls_sring
*sring
;
50 struct xen_pvcalls_back_ring ring
;
52 struct list_head socket_mappings
;
53 struct radix_tree_root socketpass_mappings
;
54 struct semaphore socket_lock
;
57 struct pvcalls_ioworker
{
58 struct work_struct register_work
;
59 struct workqueue_struct
*wq
;
63 struct list_head list
;
64 struct pvcalls_fedata
*fedata
;
65 struct sockpass_mapping
*sockpass
;
69 struct pvcalls_data_intf
*ring
;
71 struct pvcalls_data data
;
78 void (*saved_data_ready
)(struct sock
*sk
);
79 struct pvcalls_ioworker ioworker
;
82 struct sockpass_mapping
{
83 struct list_head list
;
84 struct pvcalls_fedata
*fedata
;
87 struct xen_pvcalls_request reqcopy
;
89 struct workqueue_struct
*wq
;
90 struct work_struct register_work
;
91 void (*saved_data_ready
)(struct sock
*sk
);
94 static irqreturn_t
pvcalls_back_conn_event(int irq
, void *sock_map
);
95 static int pvcalls_back_release_active(struct xenbus_device
*dev
,
96 struct pvcalls_fedata
*fedata
,
97 struct sock_mapping
*map
);
99 static void pvcalls_conn_back_read(void *opaque
)
101 struct sock_mapping
*map
= (struct sock_mapping
*)opaque
;
104 RING_IDX cons
, prod
, size
, wanted
, array_size
, masked_prod
, masked_cons
;
106 struct pvcalls_data_intf
*intf
= map
->ring
;
107 struct pvcalls_data
*data
= &map
->data
;
111 array_size
= XEN_FLEX_RING_SIZE(map
->ring_order
);
112 cons
= intf
->in_cons
;
113 prod
= intf
->in_prod
;
114 error
= intf
->in_error
;
115 /* read the indexes first, then deal with the data */
121 size
= pvcalls_queued(prod
, cons
, array_size
);
122 if (size
>= array_size
)
124 spin_lock_irqsave(&map
->sock
->sk
->sk_receive_queue
.lock
, flags
);
125 if (skb_queue_empty(&map
->sock
->sk
->sk_receive_queue
)) {
126 atomic_set(&map
->read
, 0);
127 spin_unlock_irqrestore(&map
->sock
->sk
->sk_receive_queue
.lock
,
131 spin_unlock_irqrestore(&map
->sock
->sk
->sk_receive_queue
.lock
, flags
);
132 wanted
= array_size
- size
;
133 masked_prod
= pvcalls_mask(prod
, array_size
);
134 masked_cons
= pvcalls_mask(cons
, array_size
);
136 memset(&msg
, 0, sizeof(msg
));
137 msg
.msg_iter
.type
= ITER_KVEC
|WRITE
;
138 msg
.msg_iter
.count
= wanted
;
139 if (masked_prod
< masked_cons
) {
140 vec
[0].iov_base
= data
->in
+ masked_prod
;
141 vec
[0].iov_len
= wanted
;
142 msg
.msg_iter
.kvec
= vec
;
143 msg
.msg_iter
.nr_segs
= 1;
145 vec
[0].iov_base
= data
->in
+ masked_prod
;
146 vec
[0].iov_len
= array_size
- masked_prod
;
147 vec
[1].iov_base
= data
->in
;
148 vec
[1].iov_len
= wanted
- vec
[0].iov_len
;
149 msg
.msg_iter
.kvec
= vec
;
150 msg
.msg_iter
.nr_segs
= 2;
153 atomic_set(&map
->read
, 0);
154 ret
= inet_recvmsg(map
->sock
, &msg
, wanted
, MSG_DONTWAIT
);
155 WARN_ON(ret
> wanted
);
156 if (ret
== -EAGAIN
) /* shouldn't happen */
160 spin_lock_irqsave(&map
->sock
->sk
->sk_receive_queue
.lock
, flags
);
161 if (ret
> 0 && !skb_queue_empty(&map
->sock
->sk
->sk_receive_queue
))
162 atomic_inc(&map
->read
);
163 spin_unlock_irqrestore(&map
->sock
->sk
->sk_receive_queue
.lock
, flags
);
165 /* write the data, then modify the indexes */
168 intf
->in_error
= ret
;
170 intf
->in_prod
= prod
+ ret
;
171 /* update the indexes, then notify the other end */
173 notify_remote_via_irq(map
->irq
);
178 static void pvcalls_conn_back_write(struct sock_mapping
*map
)
180 struct pvcalls_data_intf
*intf
= map
->ring
;
181 struct pvcalls_data
*data
= &map
->data
;
184 RING_IDX cons
, prod
, size
, array_size
;
187 cons
= intf
->out_cons
;
188 prod
= intf
->out_prod
;
189 /* read the indexes before dealing with the data */
192 array_size
= XEN_FLEX_RING_SIZE(map
->ring_order
);
193 size
= pvcalls_queued(prod
, cons
, array_size
);
197 memset(&msg
, 0, sizeof(msg
));
198 msg
.msg_flags
|= MSG_DONTWAIT
;
199 msg
.msg_iter
.type
= ITER_KVEC
|READ
;
200 msg
.msg_iter
.count
= size
;
201 if (pvcalls_mask(prod
, array_size
) > pvcalls_mask(cons
, array_size
)) {
202 vec
[0].iov_base
= data
->out
+ pvcalls_mask(cons
, array_size
);
203 vec
[0].iov_len
= size
;
204 msg
.msg_iter
.kvec
= vec
;
205 msg
.msg_iter
.nr_segs
= 1;
207 vec
[0].iov_base
= data
->out
+ pvcalls_mask(cons
, array_size
);
208 vec
[0].iov_len
= array_size
- pvcalls_mask(cons
, array_size
);
209 vec
[1].iov_base
= data
->out
;
210 vec
[1].iov_len
= size
- vec
[0].iov_len
;
211 msg
.msg_iter
.kvec
= vec
;
212 msg
.msg_iter
.nr_segs
= 2;
215 atomic_set(&map
->write
, 0);
216 ret
= inet_sendmsg(map
->sock
, &msg
, size
);
217 if (ret
== -EAGAIN
|| (ret
>= 0 && ret
< size
)) {
218 atomic_inc(&map
->write
);
219 atomic_inc(&map
->io
);
224 /* write the data, then update the indexes */
227 intf
->out_error
= ret
;
230 intf
->out_cons
= cons
+ ret
;
231 prod
= intf
->out_prod
;
233 /* update the indexes, then notify the other end */
235 if (prod
!= cons
+ ret
)
236 atomic_inc(&map
->write
);
237 notify_remote_via_irq(map
->irq
);
240 static void pvcalls_back_ioworker(struct work_struct
*work
)
242 struct pvcalls_ioworker
*ioworker
= container_of(work
,
243 struct pvcalls_ioworker
, register_work
);
244 struct sock_mapping
*map
= container_of(ioworker
, struct sock_mapping
,
247 while (atomic_read(&map
->io
) > 0) {
248 if (atomic_read(&map
->release
) > 0) {
249 atomic_set(&map
->release
, 0);
253 if (atomic_read(&map
->read
) > 0)
254 pvcalls_conn_back_read(map
);
255 if (atomic_read(&map
->write
) > 0)
256 pvcalls_conn_back_write(map
);
258 atomic_dec(&map
->io
);
262 static int pvcalls_back_socket(struct xenbus_device
*dev
,
263 struct xen_pvcalls_request
*req
)
265 struct pvcalls_fedata
*fedata
;
267 struct xen_pvcalls_response
*rsp
;
269 fedata
= dev_get_drvdata(&dev
->dev
);
271 if (req
->u
.socket
.domain
!= AF_INET
||
272 req
->u
.socket
.type
!= SOCK_STREAM
||
273 (req
->u
.socket
.protocol
!= IPPROTO_IP
&&
274 req
->u
.socket
.protocol
!= AF_INET
))
279 /* leave the actual socket allocation for later */
281 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
282 rsp
->req_id
= req
->req_id
;
284 rsp
->u
.socket
.id
= req
->u
.socket
.id
;
290 static void pvcalls_sk_state_change(struct sock
*sock
)
292 struct sock_mapping
*map
= sock
->sk_user_data
;
293 struct pvcalls_data_intf
*intf
;
299 intf
->in_error
= -ENOTCONN
;
300 notify_remote_via_irq(map
->irq
);
303 static void pvcalls_sk_data_ready(struct sock
*sock
)
305 struct sock_mapping
*map
= sock
->sk_user_data
;
306 struct pvcalls_ioworker
*iow
;
311 iow
= &map
->ioworker
;
312 atomic_inc(&map
->read
);
313 atomic_inc(&map
->io
);
314 queue_work(iow
->wq
, &iow
->register_work
);
317 static struct sock_mapping
*pvcalls_new_active_socket(
318 struct pvcalls_fedata
*fedata
,
325 struct sock_mapping
*map
;
328 map
= kzalloc(sizeof(*map
), GFP_KERNEL
);
332 map
->fedata
= fedata
;
337 ret
= xenbus_map_ring_valloc(fedata
->dev
, &ref
, 1, &page
);
341 map
->ring_order
= map
->ring
->ring_order
;
342 /* first read the order, then map the data ring */
344 if (map
->ring_order
> MAX_RING_ORDER
) {
345 pr_warn("%s frontend requested ring_order %u, which is > MAX (%u)\n",
346 __func__
, map
->ring_order
, MAX_RING_ORDER
);
349 ret
= xenbus_map_ring_valloc(fedata
->dev
, map
->ring
->ref
,
350 (1 << map
->ring_order
), &page
);
355 ret
= bind_interdomain_evtchn_to_irqhandler(fedata
->dev
->otherend_id
,
357 pvcalls_back_conn_event
,
365 map
->data
.in
= map
->bytes
;
366 map
->data
.out
= map
->bytes
+ XEN_FLEX_RING_SIZE(map
->ring_order
);
368 map
->ioworker
.wq
= alloc_workqueue("pvcalls_io", WQ_UNBOUND
, 1);
369 if (!map
->ioworker
.wq
)
371 atomic_set(&map
->io
, 1);
372 INIT_WORK(&map
->ioworker
.register_work
, pvcalls_back_ioworker
);
374 down(&fedata
->socket_lock
);
375 list_add_tail(&map
->list
, &fedata
->socket_mappings
);
376 up(&fedata
->socket_lock
);
378 write_lock_bh(&map
->sock
->sk
->sk_callback_lock
);
379 map
->saved_data_ready
= map
->sock
->sk
->sk_data_ready
;
380 map
->sock
->sk
->sk_user_data
= map
;
381 map
->sock
->sk
->sk_data_ready
= pvcalls_sk_data_ready
;
382 map
->sock
->sk
->sk_state_change
= pvcalls_sk_state_change
;
383 write_unlock_bh(&map
->sock
->sk
->sk_callback_lock
);
387 down(&fedata
->socket_lock
);
388 list_del(&map
->list
);
389 pvcalls_back_release_active(fedata
->dev
, fedata
, map
);
390 up(&fedata
->socket_lock
);
394 static int pvcalls_back_connect(struct xenbus_device
*dev
,
395 struct xen_pvcalls_request
*req
)
397 struct pvcalls_fedata
*fedata
;
400 struct sock_mapping
*map
;
401 struct xen_pvcalls_response
*rsp
;
402 struct sockaddr
*sa
= (struct sockaddr
*)&req
->u
.connect
.addr
;
404 fedata
= dev_get_drvdata(&dev
->dev
);
406 if (req
->u
.connect
.len
< sizeof(sa
->sa_family
) ||
407 req
->u
.connect
.len
> sizeof(req
->u
.connect
.addr
) ||
408 sa
->sa_family
!= AF_INET
)
411 ret
= sock_create(AF_INET
, SOCK_STREAM
, 0, &sock
);
414 ret
= inet_stream_connect(sock
, sa
, req
->u
.connect
.len
, 0);
420 map
= pvcalls_new_active_socket(fedata
,
423 req
->u
.connect
.evtchn
,
427 sock_release(map
->sock
);
431 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
432 rsp
->req_id
= req
->req_id
;
434 rsp
->u
.connect
.id
= req
->u
.connect
.id
;
440 static int pvcalls_back_release_active(struct xenbus_device
*dev
,
441 struct pvcalls_fedata
*fedata
,
442 struct sock_mapping
*map
)
444 disable_irq(map
->irq
);
445 if (map
->sock
->sk
!= NULL
) {
446 write_lock_bh(&map
->sock
->sk
->sk_callback_lock
);
447 map
->sock
->sk
->sk_user_data
= NULL
;
448 map
->sock
->sk
->sk_data_ready
= map
->saved_data_ready
;
449 write_unlock_bh(&map
->sock
->sk
->sk_callback_lock
);
452 atomic_set(&map
->release
, 1);
453 flush_work(&map
->ioworker
.register_work
);
455 xenbus_unmap_ring_vfree(dev
, map
->bytes
);
456 xenbus_unmap_ring_vfree(dev
, (void *)map
->ring
);
457 unbind_from_irqhandler(map
->irq
, map
);
459 sock_release(map
->sock
);
465 static int pvcalls_back_release_passive(struct xenbus_device
*dev
,
466 struct pvcalls_fedata
*fedata
,
467 struct sockpass_mapping
*mappass
)
469 if (mappass
->sock
->sk
!= NULL
) {
470 write_lock_bh(&mappass
->sock
->sk
->sk_callback_lock
);
471 mappass
->sock
->sk
->sk_user_data
= NULL
;
472 mappass
->sock
->sk
->sk_data_ready
= mappass
->saved_data_ready
;
473 write_unlock_bh(&mappass
->sock
->sk
->sk_callback_lock
);
475 sock_release(mappass
->sock
);
476 flush_workqueue(mappass
->wq
);
477 destroy_workqueue(mappass
->wq
);
483 static int pvcalls_back_release(struct xenbus_device
*dev
,
484 struct xen_pvcalls_request
*req
)
486 struct pvcalls_fedata
*fedata
;
487 struct sock_mapping
*map
, *n
;
488 struct sockpass_mapping
*mappass
;
490 struct xen_pvcalls_response
*rsp
;
492 fedata
= dev_get_drvdata(&dev
->dev
);
494 down(&fedata
->socket_lock
);
495 list_for_each_entry_safe(map
, n
, &fedata
->socket_mappings
, list
) {
496 if (map
->id
== req
->u
.release
.id
) {
497 list_del(&map
->list
);
498 up(&fedata
->socket_lock
);
499 ret
= pvcalls_back_release_active(dev
, fedata
, map
);
503 mappass
= radix_tree_lookup(&fedata
->socketpass_mappings
,
505 if (mappass
!= NULL
) {
506 radix_tree_delete(&fedata
->socketpass_mappings
, mappass
->id
);
507 up(&fedata
->socket_lock
);
508 ret
= pvcalls_back_release_passive(dev
, fedata
, mappass
);
510 up(&fedata
->socket_lock
);
513 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
514 rsp
->req_id
= req
->req_id
;
515 rsp
->u
.release
.id
= req
->u
.release
.id
;
521 static void __pvcalls_back_accept(struct work_struct
*work
)
523 struct sockpass_mapping
*mappass
= container_of(
524 work
, struct sockpass_mapping
, register_work
);
525 struct sock_mapping
*map
;
526 struct pvcalls_ioworker
*iow
;
527 struct pvcalls_fedata
*fedata
;
529 struct xen_pvcalls_response
*rsp
;
530 struct xen_pvcalls_request
*req
;
535 fedata
= mappass
->fedata
;
537 * __pvcalls_back_accept can race against pvcalls_back_accept.
538 * We only need to check the value of "cmd" on read. It could be
539 * done atomically, but to simplify the code on the write side, we
542 spin_lock_irqsave(&mappass
->copy_lock
, flags
);
543 req
= &mappass
->reqcopy
;
544 if (req
->cmd
!= PVCALLS_ACCEPT
) {
545 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
548 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
553 sock
->type
= mappass
->sock
->type
;
554 sock
->ops
= mappass
->sock
->ops
;
556 ret
= inet_accept(mappass
->sock
, sock
, O_NONBLOCK
, true);
557 if (ret
== -EAGAIN
) {
562 map
= pvcalls_new_active_socket(fedata
,
563 req
->u
.accept
.id_new
,
565 req
->u
.accept
.evtchn
,
573 map
->sockpass
= mappass
;
574 iow
= &map
->ioworker
;
575 atomic_inc(&map
->read
);
576 atomic_inc(&map
->io
);
577 queue_work(iow
->wq
, &iow
->register_work
);
580 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
581 rsp
->req_id
= req
->req_id
;
583 rsp
->u
.accept
.id
= req
->u
.accept
.id
;
585 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&fedata
->ring
, notify
);
587 notify_remote_via_irq(fedata
->irq
);
589 mappass
->reqcopy
.cmd
= 0;
592 static void pvcalls_pass_sk_data_ready(struct sock
*sock
)
594 struct sockpass_mapping
*mappass
= sock
->sk_user_data
;
595 struct pvcalls_fedata
*fedata
;
596 struct xen_pvcalls_response
*rsp
;
603 fedata
= mappass
->fedata
;
604 spin_lock_irqsave(&mappass
->copy_lock
, flags
);
605 if (mappass
->reqcopy
.cmd
== PVCALLS_POLL
) {
606 rsp
= RING_GET_RESPONSE(&fedata
->ring
,
607 fedata
->ring
.rsp_prod_pvt
++);
608 rsp
->req_id
= mappass
->reqcopy
.req_id
;
609 rsp
->u
.poll
.id
= mappass
->reqcopy
.u
.poll
.id
;
610 rsp
->cmd
= mappass
->reqcopy
.cmd
;
613 mappass
->reqcopy
.cmd
= 0;
614 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
616 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&fedata
->ring
, notify
);
618 notify_remote_via_irq(mappass
->fedata
->irq
);
620 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
621 queue_work(mappass
->wq
, &mappass
->register_work
);
625 static int pvcalls_back_bind(struct xenbus_device
*dev
,
626 struct xen_pvcalls_request
*req
)
628 struct pvcalls_fedata
*fedata
;
630 struct sockpass_mapping
*map
;
631 struct xen_pvcalls_response
*rsp
;
633 fedata
= dev_get_drvdata(&dev
->dev
);
635 map
= kzalloc(sizeof(*map
), GFP_KERNEL
);
641 INIT_WORK(&map
->register_work
, __pvcalls_back_accept
);
642 spin_lock_init(&map
->copy_lock
);
643 map
->wq
= alloc_workqueue("pvcalls_wq", WQ_UNBOUND
, 1);
649 ret
= sock_create(AF_INET
, SOCK_STREAM
, 0, &map
->sock
);
653 ret
= inet_bind(map
->sock
, (struct sockaddr
*)&req
->u
.bind
.addr
,
658 map
->fedata
= fedata
;
659 map
->id
= req
->u
.bind
.id
;
661 down(&fedata
->socket_lock
);
662 ret
= radix_tree_insert(&fedata
->socketpass_mappings
, map
->id
,
664 up(&fedata
->socket_lock
);
668 write_lock_bh(&map
->sock
->sk
->sk_callback_lock
);
669 map
->saved_data_ready
= map
->sock
->sk
->sk_data_ready
;
670 map
->sock
->sk
->sk_user_data
= map
;
671 map
->sock
->sk
->sk_data_ready
= pvcalls_pass_sk_data_ready
;
672 write_unlock_bh(&map
->sock
->sk
->sk_callback_lock
);
676 if (map
&& map
->sock
)
677 sock_release(map
->sock
);
679 destroy_workqueue(map
->wq
);
682 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
683 rsp
->req_id
= req
->req_id
;
685 rsp
->u
.bind
.id
= req
->u
.bind
.id
;
690 static int pvcalls_back_listen(struct xenbus_device
*dev
,
691 struct xen_pvcalls_request
*req
)
693 struct pvcalls_fedata
*fedata
;
695 struct sockpass_mapping
*map
;
696 struct xen_pvcalls_response
*rsp
;
698 fedata
= dev_get_drvdata(&dev
->dev
);
700 down(&fedata
->socket_lock
);
701 map
= radix_tree_lookup(&fedata
->socketpass_mappings
, req
->u
.listen
.id
);
702 up(&fedata
->socket_lock
);
706 ret
= inet_listen(map
->sock
, req
->u
.listen
.backlog
);
709 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
710 rsp
->req_id
= req
->req_id
;
712 rsp
->u
.listen
.id
= req
->u
.listen
.id
;
717 static int pvcalls_back_accept(struct xenbus_device
*dev
,
718 struct xen_pvcalls_request
*req
)
720 struct pvcalls_fedata
*fedata
;
721 struct sockpass_mapping
*mappass
;
723 struct xen_pvcalls_response
*rsp
;
726 fedata
= dev_get_drvdata(&dev
->dev
);
728 down(&fedata
->socket_lock
);
729 mappass
= radix_tree_lookup(&fedata
->socketpass_mappings
,
731 up(&fedata
->socket_lock
);
736 * Limitation of the current implementation: only support one
737 * concurrent accept or poll call on one socket.
739 spin_lock_irqsave(&mappass
->copy_lock
, flags
);
740 if (mappass
->reqcopy
.cmd
!= 0) {
741 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
746 mappass
->reqcopy
= *req
;
747 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
748 queue_work(mappass
->wq
, &mappass
->register_work
);
750 /* Tell the caller we don't need to send back a notification yet */
754 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
755 rsp
->req_id
= req
->req_id
;
757 rsp
->u
.accept
.id
= req
->u
.accept
.id
;
762 static int pvcalls_back_poll(struct xenbus_device
*dev
,
763 struct xen_pvcalls_request
*req
)
765 struct pvcalls_fedata
*fedata
;
766 struct sockpass_mapping
*mappass
;
767 struct xen_pvcalls_response
*rsp
;
768 struct inet_connection_sock
*icsk
;
769 struct request_sock_queue
*queue
;
774 fedata
= dev_get_drvdata(&dev
->dev
);
776 down(&fedata
->socket_lock
);
777 mappass
= radix_tree_lookup(&fedata
->socketpass_mappings
,
779 up(&fedata
->socket_lock
);
784 * Limitation of the current implementation: only support one
785 * concurrent accept or poll call on one socket.
787 spin_lock_irqsave(&mappass
->copy_lock
, flags
);
788 if (mappass
->reqcopy
.cmd
!= 0) {
793 mappass
->reqcopy
= *req
;
794 icsk
= inet_csk(mappass
->sock
->sk
);
795 queue
= &icsk
->icsk_accept_queue
;
796 data
= queue
->rskq_accept_head
!= NULL
;
798 mappass
->reqcopy
.cmd
= 0;
802 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
804 /* Tell the caller we don't need to send back a notification yet */
808 spin_unlock_irqrestore(&mappass
->copy_lock
, flags
);
810 rsp
= RING_GET_RESPONSE(&fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
811 rsp
->req_id
= req
->req_id
;
813 rsp
->u
.poll
.id
= req
->u
.poll
.id
;
818 static int pvcalls_back_handle_cmd(struct xenbus_device
*dev
,
819 struct xen_pvcalls_request
*req
)
825 ret
= pvcalls_back_socket(dev
, req
);
827 case PVCALLS_CONNECT
:
828 ret
= pvcalls_back_connect(dev
, req
);
830 case PVCALLS_RELEASE
:
831 ret
= pvcalls_back_release(dev
, req
);
834 ret
= pvcalls_back_bind(dev
, req
);
837 ret
= pvcalls_back_listen(dev
, req
);
840 ret
= pvcalls_back_accept(dev
, req
);
843 ret
= pvcalls_back_poll(dev
, req
);
847 struct pvcalls_fedata
*fedata
;
848 struct xen_pvcalls_response
*rsp
;
850 fedata
= dev_get_drvdata(&dev
->dev
);
851 rsp
= RING_GET_RESPONSE(
852 &fedata
->ring
, fedata
->ring
.rsp_prod_pvt
++);
853 rsp
->req_id
= req
->req_id
;
855 rsp
->ret
= -ENOTSUPP
;
862 static void pvcalls_back_work(struct pvcalls_fedata
*fedata
)
864 int notify
, notify_all
= 0, more
= 1;
865 struct xen_pvcalls_request req
;
866 struct xenbus_device
*dev
= fedata
->dev
;
869 while (RING_HAS_UNCONSUMED_REQUESTS(&fedata
->ring
)) {
870 RING_COPY_REQUEST(&fedata
->ring
,
871 fedata
->ring
.req_cons
++,
874 if (!pvcalls_back_handle_cmd(dev
, &req
)) {
875 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(
876 &fedata
->ring
, notify
);
877 notify_all
+= notify
;
882 notify_remote_via_irq(fedata
->irq
);
886 RING_FINAL_CHECK_FOR_REQUESTS(&fedata
->ring
, more
);
890 static irqreturn_t
pvcalls_back_event(int irq
, void *dev_id
)
892 struct xenbus_device
*dev
= dev_id
;
893 struct pvcalls_fedata
*fedata
= NULL
;
898 fedata
= dev_get_drvdata(&dev
->dev
);
902 pvcalls_back_work(fedata
);
906 static irqreturn_t
pvcalls_back_conn_event(int irq
, void *sock_map
)
908 struct sock_mapping
*map
= sock_map
;
909 struct pvcalls_ioworker
*iow
;
911 if (map
== NULL
|| map
->sock
== NULL
|| map
->sock
->sk
== NULL
||
912 map
->sock
->sk
->sk_user_data
!= map
)
915 iow
= &map
->ioworker
;
917 atomic_inc(&map
->write
);
918 atomic_inc(&map
->io
);
919 queue_work(iow
->wq
, &iow
->register_work
);
924 static int backend_connect(struct xenbus_device
*dev
)
927 grant_ref_t ring_ref
;
928 struct pvcalls_fedata
*fedata
= NULL
;
930 fedata
= kzalloc(sizeof(struct pvcalls_fedata
), GFP_KERNEL
);
935 err
= xenbus_scanf(XBT_NIL
, dev
->otherend
, "port", "%u",
939 xenbus_dev_fatal(dev
, err
, "reading %s/event-channel",
944 err
= xenbus_scanf(XBT_NIL
, dev
->otherend
, "ring-ref", "%u", &ring_ref
);
947 xenbus_dev_fatal(dev
, err
, "reading %s/ring-ref",
952 err
= bind_interdomain_evtchn_to_irq(dev
->otherend_id
, evtchn
);
957 err
= request_threaded_irq(fedata
->irq
, NULL
, pvcalls_back_event
,
958 IRQF_ONESHOT
, "pvcalls-back", dev
);
962 err
= xenbus_map_ring_valloc(dev
, &ring_ref
, 1,
963 (void **)&fedata
->sring
);
967 BACK_RING_INIT(&fedata
->ring
, fedata
->sring
, XEN_PAGE_SIZE
* 1);
970 INIT_LIST_HEAD(&fedata
->socket_mappings
);
971 INIT_RADIX_TREE(&fedata
->socketpass_mappings
, GFP_KERNEL
);
972 sema_init(&fedata
->socket_lock
, 1);
973 dev_set_drvdata(&dev
->dev
, fedata
);
975 down(&pvcalls_back_global
.frontends_lock
);
976 list_add_tail(&fedata
->list
, &pvcalls_back_global
.frontends
);
977 up(&pvcalls_back_global
.frontends_lock
);
982 if (fedata
->irq
>= 0)
983 unbind_from_irqhandler(fedata
->irq
, dev
);
984 if (fedata
->sring
!= NULL
)
985 xenbus_unmap_ring_vfree(dev
, fedata
->sring
);
990 static int backend_disconnect(struct xenbus_device
*dev
)
992 struct pvcalls_fedata
*fedata
;
993 struct sock_mapping
*map
, *n
;
994 struct sockpass_mapping
*mappass
;
995 struct radix_tree_iter iter
;
999 fedata
= dev_get_drvdata(&dev
->dev
);
1001 down(&fedata
->socket_lock
);
1002 list_for_each_entry_safe(map
, n
, &fedata
->socket_mappings
, list
) {
1003 list_del(&map
->list
);
1004 pvcalls_back_release_active(dev
, fedata
, map
);
1007 radix_tree_for_each_slot(slot
, &fedata
->socketpass_mappings
, &iter
, 0) {
1008 mappass
= radix_tree_deref_slot(slot
);
1011 if (radix_tree_exception(mappass
)) {
1012 if (radix_tree_deref_retry(mappass
))
1013 slot
= radix_tree_iter_retry(&iter
);
1015 radix_tree_delete(&fedata
->socketpass_mappings
,
1017 pvcalls_back_release_passive(dev
, fedata
, mappass
);
1020 up(&fedata
->socket_lock
);
1022 unbind_from_irqhandler(fedata
->irq
, dev
);
1023 xenbus_unmap_ring_vfree(dev
, fedata
->sring
);
1025 list_del(&fedata
->list
);
1027 dev_set_drvdata(&dev
->dev
, NULL
);
1032 static int pvcalls_back_probe(struct xenbus_device
*dev
,
1033 const struct xenbus_device_id
*id
)
1036 struct xenbus_transaction xbt
;
1041 err
= xenbus_transaction_start(&xbt
);
1043 pr_warn("%s cannot create xenstore transaction\n", __func__
);
1047 err
= xenbus_printf(xbt
, dev
->nodename
, "versions", "%s",
1050 pr_warn("%s write out 'versions' failed\n", __func__
);
1054 err
= xenbus_printf(xbt
, dev
->nodename
, "max-page-order", "%u",
1057 pr_warn("%s write out 'max-page-order' failed\n", __func__
);
1061 err
= xenbus_printf(xbt
, dev
->nodename
, "function-calls",
1062 XENBUS_FUNCTIONS_CALLS
);
1064 pr_warn("%s write out 'function-calls' failed\n", __func__
);
1070 err
= xenbus_transaction_end(xbt
, abort
);
1072 if (err
== -EAGAIN
&& !abort
)
1074 pr_warn("%s cannot complete xenstore transaction\n", __func__
);
1081 xenbus_switch_state(dev
, XenbusStateInitWait
);
1086 static void set_backend_state(struct xenbus_device
*dev
,
1087 enum xenbus_state state
)
1089 while (dev
->state
!= state
) {
1090 switch (dev
->state
) {
1091 case XenbusStateClosed
:
1093 case XenbusStateInitWait
:
1094 case XenbusStateConnected
:
1095 xenbus_switch_state(dev
, XenbusStateInitWait
);
1097 case XenbusStateClosing
:
1098 xenbus_switch_state(dev
, XenbusStateClosing
);
1104 case XenbusStateInitWait
:
1105 case XenbusStateInitialised
:
1107 case XenbusStateConnected
:
1108 backend_connect(dev
);
1109 xenbus_switch_state(dev
, XenbusStateConnected
);
1111 case XenbusStateClosing
:
1112 case XenbusStateClosed
:
1113 xenbus_switch_state(dev
, XenbusStateClosing
);
1119 case XenbusStateConnected
:
1121 case XenbusStateInitWait
:
1122 case XenbusStateClosing
:
1123 case XenbusStateClosed
:
1124 down(&pvcalls_back_global
.frontends_lock
);
1125 backend_disconnect(dev
);
1126 up(&pvcalls_back_global
.frontends_lock
);
1127 xenbus_switch_state(dev
, XenbusStateClosing
);
1133 case XenbusStateClosing
:
1135 case XenbusStateInitWait
:
1136 case XenbusStateConnected
:
1137 case XenbusStateClosed
:
1138 xenbus_switch_state(dev
, XenbusStateClosed
);
1150 static void pvcalls_back_changed(struct xenbus_device
*dev
,
1151 enum xenbus_state frontend_state
)
1153 switch (frontend_state
) {
1154 case XenbusStateInitialising
:
1155 set_backend_state(dev
, XenbusStateInitWait
);
1158 case XenbusStateInitialised
:
1159 case XenbusStateConnected
:
1160 set_backend_state(dev
, XenbusStateConnected
);
1163 case XenbusStateClosing
:
1164 set_backend_state(dev
, XenbusStateClosing
);
1167 case XenbusStateClosed
:
1168 set_backend_state(dev
, XenbusStateClosed
);
1169 if (xenbus_dev_is_online(dev
))
1171 device_unregister(&dev
->dev
);
1173 case XenbusStateUnknown
:
1174 set_backend_state(dev
, XenbusStateClosed
);
1175 device_unregister(&dev
->dev
);
1179 xenbus_dev_fatal(dev
, -EINVAL
, "saw state %d at frontend",
1185 static int pvcalls_back_remove(struct xenbus_device
*dev
)
1190 static int pvcalls_back_uevent(struct xenbus_device
*xdev
,
1191 struct kobj_uevent_env
*env
)
1196 static const struct xenbus_device_id pvcalls_back_ids
[] = {
1201 static struct xenbus_driver pvcalls_back_driver
= {
1202 .ids
= pvcalls_back_ids
,
1203 .probe
= pvcalls_back_probe
,
1204 .remove
= pvcalls_back_remove
,
1205 .uevent
= pvcalls_back_uevent
,
1206 .otherend_changed
= pvcalls_back_changed
,
1209 static int __init
pvcalls_back_init(void)
1216 ret
= xenbus_register_backend(&pvcalls_back_driver
);
1220 sema_init(&pvcalls_back_global
.frontends_lock
, 1);
1221 INIT_LIST_HEAD(&pvcalls_back_global
.frontends
);
1224 module_init(pvcalls_back_init
);
1226 static void __exit
pvcalls_back_fin(void)
1228 struct pvcalls_fedata
*fedata
, *nfedata
;
1230 down(&pvcalls_back_global
.frontends_lock
);
1231 list_for_each_entry_safe(fedata
, nfedata
,
1232 &pvcalls_back_global
.frontends
, list
) {
1233 backend_disconnect(fedata
->dev
);
1235 up(&pvcalls_back_global
.frontends_lock
);
1237 xenbus_unregister_driver(&pvcalls_back_driver
);
1240 module_exit(pvcalls_back_fin
);