2 * Back-end of the driver for virtual network devices. This portion of the
3 * driver exports a 'unified' network-device interface that can be accessed
4 * by any operating system that implements a compatible front end. A
5 * reference front-end implementation can be found in:
6 * drivers/net/xen-netfront.c
8 * Copyright (c) 2002-2005, K A Fraser
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation; or, when distributed
13 * separately from the Linux kernel or incorporated into other
14 * software packages, subject to the following license:
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this source file (the "Software"), to deal in the Software without
18 * restriction, including without limitation the rights to use, copy, modify,
19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20 * and to permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
23 * The above copyright notice and this permission notice shall be included in
24 * all copies or substantial portions of the Software.
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
37 #include <linux/kthread.h>
38 #include <linux/if_vlan.h>
39 #include <linux/udp.h>
44 #include <xen/events.h>
45 #include <xen/interface/memory.h>
47 #include <asm/xen/hypercall.h>
48 #include <asm/xen/page.h>
50 /* Provide an option to disable split event channels at load time as
51 * event channels are limited resource. Split event channels are
54 bool separate_tx_rx_irq
= 1;
55 module_param(separate_tx_rx_irq
, bool, 0644);
58 * This is the maximum slots a skb can have. If a guest sends a skb
59 * which exceeds this limit it is considered malicious.
61 #define FATAL_SKB_SLOTS_DEFAULT 20
62 static unsigned int fatal_skb_slots
= FATAL_SKB_SLOTS_DEFAULT
;
63 module_param(fatal_skb_slots
, uint
, 0444);
66 * To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating
67 * the maximum slots a valid packet can use. Now this value is defined
68 * to be XEN_NETIF_NR_SLOTS_MIN, which is supposed to be supported by
71 #define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN
73 typedef unsigned int pending_ring_idx_t
;
74 #define INVALID_PENDING_RING_IDX (~0U)
76 struct pending_tx_info
{
77 struct xen_netif_tx_request req
; /* coalesced tx request */
79 pending_ring_idx_t head
; /* head != INVALID_PENDING_RING_IDX
80 * if it is head of one or more tx
85 struct netbk_rx_meta
{
91 #define MAX_PENDING_REQS 256
93 /* Discriminate from any valid pending_idx value. */
94 #define INVALID_PENDING_IDX 0xFFFF
96 #define MAX_BUFFER_OFFSET PAGE_SIZE
98 /* extra field used in struct page */
101 #if BITS_PER_LONG < 64
103 #define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
104 unsigned int group
:GROUP_WIDTH
;
105 unsigned int idx
:IDX_WIDTH
;
107 unsigned int group
, idx
;
114 wait_queue_head_t wq
;
115 struct task_struct
*task
;
117 struct sk_buff_head rx_queue
;
118 struct sk_buff_head tx_queue
;
120 struct timer_list net_timer
;
122 struct page
*mmap_pages
[MAX_PENDING_REQS
];
124 pending_ring_idx_t pending_prod
;
125 pending_ring_idx_t pending_cons
;
126 struct list_head net_schedule_list
;
128 /* Protect the net_schedule_list in netif. */
129 spinlock_t net_schedule_list_lock
;
131 atomic_t netfront_count
;
133 struct pending_tx_info pending_tx_info
[MAX_PENDING_REQS
];
134 /* Coalescing tx requests before copying makes number of grant
135 * copy ops greater or equal to number of slots required. In
136 * worst case a tx request consumes 2 gnttab_copy.
138 struct gnttab_copy tx_copy_ops
[2*MAX_PENDING_REQS
];
140 u16 pending_ring
[MAX_PENDING_REQS
];
143 * Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
144 * head/fragment page uses 2 copy operations because it
145 * straddles two buffers in the frontend.
147 struct gnttab_copy grant_copy_op
[2*XEN_NETIF_RX_RING_SIZE
];
148 struct netbk_rx_meta meta
[2*XEN_NETIF_RX_RING_SIZE
];
151 static struct xen_netbk
*xen_netbk
;
152 static int xen_netbk_group_nr
;
155 * If head != INVALID_PENDING_RING_IDX, it means this tx request is head of
156 * one or more merged tx requests, otherwise it is the continuation of
157 * previous tx request.
159 static inline int pending_tx_is_head(struct xen_netbk
*netbk
, RING_IDX idx
)
161 return netbk
->pending_tx_info
[idx
].head
!= INVALID_PENDING_RING_IDX
;
164 void xen_netbk_add_xenvif(struct xenvif
*vif
)
167 int min_netfront_count
;
169 struct xen_netbk
*netbk
;
171 min_netfront_count
= atomic_read(&xen_netbk
[0].netfront_count
);
172 for (i
= 0; i
< xen_netbk_group_nr
; i
++) {
173 int netfront_count
= atomic_read(&xen_netbk
[i
].netfront_count
);
174 if (netfront_count
< min_netfront_count
) {
176 min_netfront_count
= netfront_count
;
180 netbk
= &xen_netbk
[min_group
];
183 atomic_inc(&netbk
->netfront_count
);
186 void xen_netbk_remove_xenvif(struct xenvif
*vif
)
188 struct xen_netbk
*netbk
= vif
->netbk
;
190 atomic_dec(&netbk
->netfront_count
);
193 static void xen_netbk_idx_release(struct xen_netbk
*netbk
, u16 pending_idx
,
195 static void make_tx_response(struct xenvif
*vif
,
196 struct xen_netif_tx_request
*txp
,
198 static struct xen_netif_rx_response
*make_rx_response(struct xenvif
*vif
,
205 static inline unsigned long idx_to_pfn(struct xen_netbk
*netbk
,
208 return page_to_pfn(netbk
->mmap_pages
[idx
]);
211 static inline unsigned long idx_to_kaddr(struct xen_netbk
*netbk
,
214 return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk
, idx
));
217 /* extra field used in struct page */
218 static inline void set_page_ext(struct page
*pg
, struct xen_netbk
*netbk
,
221 unsigned int group
= netbk
- xen_netbk
;
222 union page_ext ext
= { .e
= { .group
= group
+ 1, .idx
= idx
} };
224 BUILD_BUG_ON(sizeof(ext
) > sizeof(ext
.mapping
));
225 pg
->mapping
= ext
.mapping
;
228 static int get_page_ext(struct page
*pg
,
229 unsigned int *pgroup
, unsigned int *pidx
)
231 union page_ext ext
= { .mapping
= pg
->mapping
};
232 struct xen_netbk
*netbk
;
233 unsigned int group
, idx
;
235 group
= ext
.e
.group
- 1;
237 if (group
< 0 || group
>= xen_netbk_group_nr
)
240 netbk
= &xen_netbk
[group
];
244 if ((idx
< 0) || (idx
>= MAX_PENDING_REQS
))
247 if (netbk
->mmap_pages
[idx
] != pg
)
257 * This is the amount of packet we copy rather than map, so that the
258 * guest can't fiddle with the contents of the headers while we do
259 * packet processing on them (netfilter, routing, etc).
261 #define PKT_PROT_LEN (ETH_HLEN + \
263 sizeof(struct iphdr) + MAX_IPOPTLEN + \
264 sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
266 static u16
frag_get_pending_idx(skb_frag_t
*frag
)
268 return (u16
)frag
->page_offset
;
271 static void frag_set_pending_idx(skb_frag_t
*frag
, u16 pending_idx
)
273 frag
->page_offset
= pending_idx
;
276 static inline pending_ring_idx_t
pending_index(unsigned i
)
278 return i
& (MAX_PENDING_REQS
-1);
281 static inline pending_ring_idx_t
nr_pending_reqs(struct xen_netbk
*netbk
)
283 return MAX_PENDING_REQS
-
284 netbk
->pending_prod
+ netbk
->pending_cons
;
287 static void xen_netbk_kick_thread(struct xen_netbk
*netbk
)
292 static int max_required_rx_slots(struct xenvif
*vif
)
294 int max
= DIV_ROUND_UP(vif
->dev
->mtu
, PAGE_SIZE
);
296 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
297 if (vif
->can_sg
|| vif
->gso
|| vif
->gso_prefix
)
298 max
+= MAX_SKB_FRAGS
+ 1; /* extra_info + frags */
303 int xen_netbk_rx_ring_full(struct xenvif
*vif
)
305 RING_IDX peek
= vif
->rx_req_cons_peek
;
306 RING_IDX needed
= max_required_rx_slots(vif
);
308 return ((vif
->rx
.sring
->req_prod
- peek
) < needed
) ||
309 ((vif
->rx
.rsp_prod_pvt
+ XEN_NETIF_RX_RING_SIZE
- peek
) < needed
);
312 int xen_netbk_must_stop_queue(struct xenvif
*vif
)
314 if (!xen_netbk_rx_ring_full(vif
))
317 vif
->rx
.sring
->req_event
= vif
->rx_req_cons_peek
+
318 max_required_rx_slots(vif
);
319 mb(); /* request notification /then/ check the queue */
321 return xen_netbk_rx_ring_full(vif
);
325 * Returns true if we should start a new receive buffer instead of
326 * adding 'size' bytes to a buffer which currently contains 'offset'
329 static bool start_new_rx_buffer(int offset
, unsigned long size
, int head
)
331 /* simple case: we have completely filled the current buffer. */
332 if (offset
== MAX_BUFFER_OFFSET
)
336 * complex case: start a fresh buffer if the current frag
337 * would overflow the current buffer but only if:
338 * (i) this frag would fit completely in the next buffer
339 * and (ii) there is already some data in the current buffer
340 * and (iii) this is not the head buffer.
343 * - (i) stops us splitting a frag into two copies
344 * unless the frag is too large for a single buffer.
345 * - (ii) stops us from leaving a buffer pointlessly empty.
346 * - (iii) stops us leaving the first buffer
347 * empty. Strictly speaking this is already covered
348 * by (ii) but is explicitly checked because
349 * netfront relies on the first buffer being
350 * non-empty and can crash otherwise.
352 * This means we will effectively linearise small
353 * frags but do not needlessly split large buffers
354 * into multiple copies tend to give large frags their
355 * own buffers as before.
357 if ((offset
+ size
> MAX_BUFFER_OFFSET
) &&
358 (size
<= MAX_BUFFER_OFFSET
) && offset
&& !head
)
365 * Figure out how many ring slots we're going to need to send @skb to
366 * the guest. This function is essentially a dry run of
367 * netbk_gop_frag_copy.
369 unsigned int xen_netbk_count_skb_slots(struct xenvif
*vif
, struct sk_buff
*skb
)
374 count
= DIV_ROUND_UP(skb_headlen(skb
), PAGE_SIZE
);
376 copy_off
= skb_headlen(skb
) % PAGE_SIZE
;
378 if (skb_shinfo(skb
)->gso_size
)
381 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
382 unsigned long size
= skb_frag_size(&skb_shinfo(skb
)->frags
[i
]);
383 unsigned long offset
= skb_shinfo(skb
)->frags
[i
].page_offset
;
386 offset
&= ~PAGE_MASK
;
389 BUG_ON(offset
>= PAGE_SIZE
);
390 BUG_ON(copy_off
> MAX_BUFFER_OFFSET
);
392 bytes
= PAGE_SIZE
- offset
;
397 if (start_new_rx_buffer(copy_off
, bytes
, 0)) {
402 if (copy_off
+ bytes
> MAX_BUFFER_OFFSET
)
403 bytes
= MAX_BUFFER_OFFSET
- copy_off
;
410 if (offset
== PAGE_SIZE
)
417 struct netrx_pending_operations
{
418 unsigned copy_prod
, copy_cons
;
419 unsigned meta_prod
, meta_cons
;
420 struct gnttab_copy
*copy
;
421 struct netbk_rx_meta
*meta
;
423 grant_ref_t copy_gref
;
426 static struct netbk_rx_meta
*get_next_rx_buffer(struct xenvif
*vif
,
427 struct netrx_pending_operations
*npo
)
429 struct netbk_rx_meta
*meta
;
430 struct xen_netif_rx_request
*req
;
432 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
434 meta
= npo
->meta
+ npo
->meta_prod
++;
440 npo
->copy_gref
= req
->gref
;
446 * Set up the grant operations for this fragment. If it's a flipping
447 * interface, we also set up the unmap request from here.
449 static void netbk_gop_frag_copy(struct xenvif
*vif
, struct sk_buff
*skb
,
450 struct netrx_pending_operations
*npo
,
451 struct page
*page
, unsigned long size
,
452 unsigned long offset
, int *head
)
454 struct gnttab_copy
*copy_gop
;
455 struct netbk_rx_meta
*meta
;
457 * These variables are used iff get_page_ext returns true,
458 * in which case they are guaranteed to be initialized.
460 unsigned int uninitialized_var(group
), uninitialized_var(idx
);
461 int foreign
= get_page_ext(page
, &group
, &idx
);
464 /* Data must not cross a page boundary. */
465 BUG_ON(size
+ offset
> PAGE_SIZE
<<compound_order(page
));
467 meta
= npo
->meta
+ npo
->meta_prod
- 1;
469 /* Skip unused frames from start of page */
470 page
+= offset
>> PAGE_SHIFT
;
471 offset
&= ~PAGE_MASK
;
474 BUG_ON(offset
>= PAGE_SIZE
);
475 BUG_ON(npo
->copy_off
> MAX_BUFFER_OFFSET
);
477 bytes
= PAGE_SIZE
- offset
;
482 if (start_new_rx_buffer(npo
->copy_off
, bytes
, *head
)) {
484 * Netfront requires there to be some data in the head
489 meta
= get_next_rx_buffer(vif
, npo
);
492 if (npo
->copy_off
+ bytes
> MAX_BUFFER_OFFSET
)
493 bytes
= MAX_BUFFER_OFFSET
- npo
->copy_off
;
495 copy_gop
= npo
->copy
+ npo
->copy_prod
++;
496 copy_gop
->flags
= GNTCOPY_dest_gref
;
498 struct xen_netbk
*netbk
= &xen_netbk
[group
];
499 struct pending_tx_info
*src_pend
;
501 src_pend
= &netbk
->pending_tx_info
[idx
];
503 copy_gop
->source
.domid
= src_pend
->vif
->domid
;
504 copy_gop
->source
.u
.ref
= src_pend
->req
.gref
;
505 copy_gop
->flags
|= GNTCOPY_source_gref
;
507 void *vaddr
= page_address(page
);
508 copy_gop
->source
.domid
= DOMID_SELF
;
509 copy_gop
->source
.u
.gmfn
= virt_to_mfn(vaddr
);
511 copy_gop
->source
.offset
= offset
;
512 copy_gop
->dest
.domid
= vif
->domid
;
514 copy_gop
->dest
.offset
= npo
->copy_off
;
515 copy_gop
->dest
.u
.ref
= npo
->copy_gref
;
516 copy_gop
->len
= bytes
;
518 npo
->copy_off
+= bytes
;
525 if (offset
== PAGE_SIZE
&& size
) {
526 BUG_ON(!PageCompound(page
));
531 /* Leave a gap for the GSO descriptor. */
532 if (*head
&& skb_shinfo(skb
)->gso_size
&& !vif
->gso_prefix
)
535 *head
= 0; /* There must be something in this buffer now. */
541 * Prepare an SKB to be transmitted to the frontend.
543 * This function is responsible for allocating grant operations, meta
546 * It returns the number of meta structures consumed. The number of
547 * ring slots used is always equal to the number of meta slots used
548 * plus the number of GSO descriptors used. Currently, we use either
549 * zero GSO descriptors (for non-GSO packets) or one descriptor (for
550 * frontend-side LRO).
552 static int netbk_gop_skb(struct sk_buff
*skb
,
553 struct netrx_pending_operations
*npo
)
555 struct xenvif
*vif
= netdev_priv(skb
->dev
);
556 int nr_frags
= skb_shinfo(skb
)->nr_frags
;
558 struct xen_netif_rx_request
*req
;
559 struct netbk_rx_meta
*meta
;
564 old_meta_prod
= npo
->meta_prod
;
566 /* Set up a GSO prefix descriptor, if necessary */
567 if (skb_shinfo(skb
)->gso_size
&& vif
->gso_prefix
) {
568 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
569 meta
= npo
->meta
+ npo
->meta_prod
++;
570 meta
->gso_size
= skb_shinfo(skb
)->gso_size
;
575 req
= RING_GET_REQUEST(&vif
->rx
, vif
->rx
.req_cons
++);
576 meta
= npo
->meta
+ npo
->meta_prod
++;
578 if (!vif
->gso_prefix
)
579 meta
->gso_size
= skb_shinfo(skb
)->gso_size
;
586 npo
->copy_gref
= req
->gref
;
589 while (data
< skb_tail_pointer(skb
)) {
590 unsigned int offset
= offset_in_page(data
);
591 unsigned int len
= PAGE_SIZE
- offset
;
593 if (data
+ len
> skb_tail_pointer(skb
))
594 len
= skb_tail_pointer(skb
) - data
;
596 netbk_gop_frag_copy(vif
, skb
, npo
,
597 virt_to_page(data
), len
, offset
, &head
);
601 for (i
= 0; i
< nr_frags
; i
++) {
602 netbk_gop_frag_copy(vif
, skb
, npo
,
603 skb_frag_page(&skb_shinfo(skb
)->frags
[i
]),
604 skb_frag_size(&skb_shinfo(skb
)->frags
[i
]),
605 skb_shinfo(skb
)->frags
[i
].page_offset
,
609 return npo
->meta_prod
- old_meta_prod
;
613 * This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was
614 * used to set up the operations on the top of
615 * netrx_pending_operations, which have since been done. Check that
616 * they didn't give any errors and advance over them.
618 static int netbk_check_gop(struct xenvif
*vif
, int nr_meta_slots
,
619 struct netrx_pending_operations
*npo
)
621 struct gnttab_copy
*copy_op
;
622 int status
= XEN_NETIF_RSP_OKAY
;
625 for (i
= 0; i
< nr_meta_slots
; i
++) {
626 copy_op
= npo
->copy
+ npo
->copy_cons
++;
627 if (copy_op
->status
!= GNTST_okay
) {
629 "Bad status %d from copy to DOM%d.\n",
630 copy_op
->status
, vif
->domid
);
631 status
= XEN_NETIF_RSP_ERROR
;
638 static void netbk_add_frag_responses(struct xenvif
*vif
, int status
,
639 struct netbk_rx_meta
*meta
,
643 unsigned long offset
;
645 /* No fragments used */
646 if (nr_meta_slots
<= 1)
651 for (i
= 0; i
< nr_meta_slots
; i
++) {
653 if (i
== nr_meta_slots
- 1)
656 flags
= XEN_NETRXF_more_data
;
659 make_rx_response(vif
, meta
[i
].id
, status
, offset
,
660 meta
[i
].size
, flags
);
664 struct skb_cb_overlay
{
668 static void xen_netbk_rx_action(struct xen_netbk
*netbk
)
670 struct xenvif
*vif
= NULL
, *tmp
;
673 struct xen_netif_rx_response
*resp
;
674 struct sk_buff_head rxq
;
680 unsigned long offset
;
681 struct skb_cb_overlay
*sco
;
683 struct netrx_pending_operations npo
= {
684 .copy
= netbk
->grant_copy_op
,
688 skb_queue_head_init(&rxq
);
692 while ((skb
= skb_dequeue(&netbk
->rx_queue
)) != NULL
) {
693 vif
= netdev_priv(skb
->dev
);
694 nr_frags
= skb_shinfo(skb
)->nr_frags
;
696 sco
= (struct skb_cb_overlay
*)skb
->cb
;
697 sco
->meta_slots_used
= netbk_gop_skb(skb
, &npo
);
699 count
+= nr_frags
+ 1;
701 __skb_queue_tail(&rxq
, skb
);
703 /* Filled the batch queue? */
704 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
705 if (count
+ MAX_SKB_FRAGS
>= XEN_NETIF_RX_RING_SIZE
)
709 BUG_ON(npo
.meta_prod
> ARRAY_SIZE(netbk
->meta
));
714 BUG_ON(npo
.copy_prod
> ARRAY_SIZE(netbk
->grant_copy_op
));
715 gnttab_batch_copy(netbk
->grant_copy_op
, npo
.copy_prod
);
717 while ((skb
= __skb_dequeue(&rxq
)) != NULL
) {
718 sco
= (struct skb_cb_overlay
*)skb
->cb
;
720 vif
= netdev_priv(skb
->dev
);
722 if (netbk
->meta
[npo
.meta_cons
].gso_size
&& vif
->gso_prefix
) {
723 resp
= RING_GET_RESPONSE(&vif
->rx
,
724 vif
->rx
.rsp_prod_pvt
++);
726 resp
->flags
= XEN_NETRXF_gso_prefix
| XEN_NETRXF_more_data
;
728 resp
->offset
= netbk
->meta
[npo
.meta_cons
].gso_size
;
729 resp
->id
= netbk
->meta
[npo
.meta_cons
].id
;
730 resp
->status
= sco
->meta_slots_used
;
733 sco
->meta_slots_used
--;
737 vif
->dev
->stats
.tx_bytes
+= skb
->len
;
738 vif
->dev
->stats
.tx_packets
++;
740 status
= netbk_check_gop(vif
, sco
->meta_slots_used
, &npo
);
742 if (sco
->meta_slots_used
== 1)
745 flags
= XEN_NETRXF_more_data
;
747 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) /* local packet? */
748 flags
|= XEN_NETRXF_csum_blank
| XEN_NETRXF_data_validated
;
749 else if (skb
->ip_summed
== CHECKSUM_UNNECESSARY
)
750 /* remote but checksummed. */
751 flags
|= XEN_NETRXF_data_validated
;
754 resp
= make_rx_response(vif
, netbk
->meta
[npo
.meta_cons
].id
,
756 netbk
->meta
[npo
.meta_cons
].size
,
759 if (netbk
->meta
[npo
.meta_cons
].gso_size
&& !vif
->gso_prefix
) {
760 struct xen_netif_extra_info
*gso
=
761 (struct xen_netif_extra_info
*)
762 RING_GET_RESPONSE(&vif
->rx
,
763 vif
->rx
.rsp_prod_pvt
++);
765 resp
->flags
|= XEN_NETRXF_extra_info
;
767 gso
->u
.gso
.size
= netbk
->meta
[npo
.meta_cons
].gso_size
;
768 gso
->u
.gso
.type
= XEN_NETIF_GSO_TYPE_TCPV4
;
770 gso
->u
.gso
.features
= 0;
772 gso
->type
= XEN_NETIF_EXTRA_TYPE_GSO
;
776 netbk_add_frag_responses(vif
, status
,
777 netbk
->meta
+ npo
.meta_cons
+ 1,
778 sco
->meta_slots_used
);
780 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif
->rx
, ret
);
782 xenvif_notify_tx_completion(vif
);
784 if (ret
&& list_empty(&vif
->notify_list
))
785 list_add_tail(&vif
->notify_list
, ¬ify
);
788 npo
.meta_cons
+= sco
->meta_slots_used
;
792 list_for_each_entry_safe(vif
, tmp
, ¬ify
, notify_list
) {
793 notify_remote_via_irq(vif
->rx_irq
);
794 list_del_init(&vif
->notify_list
);
798 /* More work to do? */
799 if (!skb_queue_empty(&netbk
->rx_queue
) &&
800 !timer_pending(&netbk
->net_timer
))
801 xen_netbk_kick_thread(netbk
);
804 void xen_netbk_queue_tx_skb(struct xenvif
*vif
, struct sk_buff
*skb
)
806 struct xen_netbk
*netbk
= vif
->netbk
;
808 skb_queue_tail(&netbk
->rx_queue
, skb
);
810 xen_netbk_kick_thread(netbk
);
813 static void xen_netbk_alarm(unsigned long data
)
815 struct xen_netbk
*netbk
= (struct xen_netbk
*)data
;
816 xen_netbk_kick_thread(netbk
);
819 static int __on_net_schedule_list(struct xenvif
*vif
)
821 return !list_empty(&vif
->schedule_list
);
824 /* Must be called with net_schedule_list_lock held */
825 static void remove_from_net_schedule_list(struct xenvif
*vif
)
827 if (likely(__on_net_schedule_list(vif
))) {
828 list_del_init(&vif
->schedule_list
);
833 static struct xenvif
*poll_net_schedule_list(struct xen_netbk
*netbk
)
835 struct xenvif
*vif
= NULL
;
837 spin_lock_irq(&netbk
->net_schedule_list_lock
);
838 if (list_empty(&netbk
->net_schedule_list
))
841 vif
= list_first_entry(&netbk
->net_schedule_list
,
842 struct xenvif
, schedule_list
);
848 remove_from_net_schedule_list(vif
);
850 spin_unlock_irq(&netbk
->net_schedule_list_lock
);
854 void xen_netbk_schedule_xenvif(struct xenvif
*vif
)
857 struct xen_netbk
*netbk
= vif
->netbk
;
859 if (__on_net_schedule_list(vif
))
862 spin_lock_irqsave(&netbk
->net_schedule_list_lock
, flags
);
863 if (!__on_net_schedule_list(vif
) &&
864 likely(xenvif_schedulable(vif
))) {
865 list_add_tail(&vif
->schedule_list
, &netbk
->net_schedule_list
);
868 spin_unlock_irqrestore(&netbk
->net_schedule_list_lock
, flags
);
872 if ((nr_pending_reqs(netbk
) < (MAX_PENDING_REQS
/2)) &&
873 !list_empty(&netbk
->net_schedule_list
))
874 xen_netbk_kick_thread(netbk
);
877 void xen_netbk_deschedule_xenvif(struct xenvif
*vif
)
879 struct xen_netbk
*netbk
= vif
->netbk
;
880 spin_lock_irq(&netbk
->net_schedule_list_lock
);
881 remove_from_net_schedule_list(vif
);
882 spin_unlock_irq(&netbk
->net_schedule_list_lock
);
885 void xen_netbk_check_rx_xenvif(struct xenvif
*vif
)
889 RING_FINAL_CHECK_FOR_REQUESTS(&vif
->tx
, more_to_do
);
892 xen_netbk_schedule_xenvif(vif
);
895 static void tx_add_credit(struct xenvif
*vif
)
897 unsigned long max_burst
, max_credit
;
900 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
901 * Otherwise the interface can seize up due to insufficient credit.
903 max_burst
= RING_GET_REQUEST(&vif
->tx
, vif
->tx
.req_cons
)->size
;
904 max_burst
= min(max_burst
, 131072UL);
905 max_burst
= max(max_burst
, vif
->credit_bytes
);
907 /* Take care that adding a new chunk of credit doesn't wrap to zero. */
908 max_credit
= vif
->remaining_credit
+ vif
->credit_bytes
;
909 if (max_credit
< vif
->remaining_credit
)
910 max_credit
= ULONG_MAX
; /* wrapped: clamp to ULONG_MAX */
912 vif
->remaining_credit
= min(max_credit
, max_burst
);
915 static void tx_credit_callback(unsigned long data
)
917 struct xenvif
*vif
= (struct xenvif
*)data
;
919 xen_netbk_check_rx_xenvif(vif
);
922 static void netbk_tx_err(struct xenvif
*vif
,
923 struct xen_netif_tx_request
*txp
, RING_IDX end
)
925 RING_IDX cons
= vif
->tx
.req_cons
;
928 make_tx_response(vif
, txp
, XEN_NETIF_RSP_ERROR
);
931 txp
= RING_GET_REQUEST(&vif
->tx
, cons
++);
933 vif
->tx
.req_cons
= cons
;
934 xen_netbk_check_rx_xenvif(vif
);
938 static void netbk_fatal_tx_err(struct xenvif
*vif
)
940 netdev_err(vif
->dev
, "fatal error; disabling device\n");
941 xenvif_carrier_off(vif
);
945 static int netbk_count_requests(struct xenvif
*vif
,
946 struct xen_netif_tx_request
*first
,
947 struct xen_netif_tx_request
*txp
,
950 RING_IDX cons
= vif
->tx
.req_cons
;
955 if (!(first
->flags
& XEN_NETTXF_more_data
))
959 struct xen_netif_tx_request dropped_tx
= { 0 };
961 if (slots
>= work_to_do
) {
963 "Asked for %d slots but exceeds this limit\n",
965 netbk_fatal_tx_err(vif
);
969 /* This guest is really using too many slots and
970 * considered malicious.
972 if (unlikely(slots
>= fatal_skb_slots
)) {
974 "Malicious frontend using %d slots, threshold %u\n",
975 slots
, fatal_skb_slots
);
976 netbk_fatal_tx_err(vif
);
980 /* Xen network protocol had implicit dependency on
981 * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to
982 * the historical MAX_SKB_FRAGS value 18 to honor the
983 * same behavior as before. Any packet using more than
984 * 18 slots but less than fatal_skb_slots slots is
987 if (!drop_err
&& slots
>= XEN_NETBK_LEGACY_SLOTS_MAX
) {
990 "Too many slots (%d) exceeding limit (%d), dropping packet\n",
991 slots
, XEN_NETBK_LEGACY_SLOTS_MAX
);
998 memcpy(txp
, RING_GET_REQUEST(&vif
->tx
, cons
+ slots
),
1001 /* If the guest submitted a frame >= 64 KiB then
1002 * first->size overflowed and following slots will
1003 * appear to be larger than the frame.
1005 * This cannot be fatal error as there are buggy
1006 * frontends that do this.
1008 * Consume all slots and drop the packet.
1010 if (!drop_err
&& txp
->size
> first
->size
) {
1011 if (net_ratelimit())
1012 netdev_dbg(vif
->dev
,
1013 "Invalid tx request, slot size %u > remaining size %u\n",
1014 txp
->size
, first
->size
);
1018 first
->size
-= txp
->size
;
1021 if (unlikely((txp
->offset
+ txp
->size
) > PAGE_SIZE
)) {
1022 netdev_err(vif
->dev
, "Cross page boundary, txp->offset: %x, size: %u\n",
1023 txp
->offset
, txp
->size
);
1024 netbk_fatal_tx_err(vif
);
1028 more_data
= txp
->flags
& XEN_NETTXF_more_data
;
1033 } while (more_data
);
1036 netbk_tx_err(vif
, first
, cons
+ slots
);
1043 static struct page
*xen_netbk_alloc_page(struct xen_netbk
*netbk
,
1047 page
= alloc_page(GFP_KERNEL
|__GFP_COLD
);
1050 set_page_ext(page
, netbk
, pending_idx
);
1051 netbk
->mmap_pages
[pending_idx
] = page
;
1055 static struct gnttab_copy
*xen_netbk_get_requests(struct xen_netbk
*netbk
,
1057 struct sk_buff
*skb
,
1058 struct xen_netif_tx_request
*txp
,
1059 struct gnttab_copy
*gop
)
1061 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
1062 skb_frag_t
*frags
= shinfo
->frags
;
1063 u16 pending_idx
= *((u16
*)skb
->data
);
1067 pending_ring_idx_t index
, start_idx
= 0;
1068 uint16_t dst_offset
;
1069 unsigned int nr_slots
;
1070 struct pending_tx_info
*first
= NULL
;
1072 /* At this point shinfo->nr_frags is in fact the number of
1073 * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
1075 nr_slots
= shinfo
->nr_frags
;
1077 /* Skip first skb fragment if it is on same page as header fragment. */
1078 start
= (frag_get_pending_idx(&shinfo
->frags
[0]) == pending_idx
);
1080 /* Coalesce tx requests, at this point the packet passed in
1081 * should be <= 64K. Any packets larger than 64K have been
1082 * handled in netbk_count_requests().
1084 for (shinfo
->nr_frags
= slot
= start
; slot
< nr_slots
;
1085 shinfo
->nr_frags
++) {
1086 struct pending_tx_info
*pending_tx_info
=
1087 netbk
->pending_tx_info
;
1089 page
= alloc_page(GFP_KERNEL
|__GFP_COLD
);
1095 while (dst_offset
< PAGE_SIZE
&& slot
< nr_slots
) {
1096 gop
->flags
= GNTCOPY_source_gref
;
1098 gop
->source
.u
.ref
= txp
->gref
;
1099 gop
->source
.domid
= vif
->domid
;
1100 gop
->source
.offset
= txp
->offset
;
1102 gop
->dest
.domid
= DOMID_SELF
;
1104 gop
->dest
.offset
= dst_offset
;
1105 gop
->dest
.u
.gmfn
= virt_to_mfn(page_address(page
));
1107 if (dst_offset
+ txp
->size
> PAGE_SIZE
) {
1108 /* This page can only merge a portion
1109 * of tx request. Do not increment any
1110 * pointer / counter here. The txp
1111 * will be dealt with in future
1112 * rounds, eventually hitting the
1115 gop
->len
= PAGE_SIZE
- dst_offset
;
1116 txp
->offset
+= gop
->len
;
1117 txp
->size
-= gop
->len
;
1118 dst_offset
+= gop
->len
; /* quit loop */
1120 /* This tx request can be merged in the page */
1121 gop
->len
= txp
->size
;
1122 dst_offset
+= gop
->len
;
1124 index
= pending_index(netbk
->pending_cons
++);
1126 pending_idx
= netbk
->pending_ring
[index
];
1128 memcpy(&pending_tx_info
[pending_idx
].req
, txp
,
1132 pending_tx_info
[pending_idx
].vif
= vif
;
1134 /* Poison these fields, corresponding
1135 * fields for head tx req will be set
1136 * to correct values after the loop.
1138 netbk
->mmap_pages
[pending_idx
] = (void *)(~0UL);
1139 pending_tx_info
[pending_idx
].head
=
1140 INVALID_PENDING_RING_IDX
;
1143 first
= &pending_tx_info
[pending_idx
];
1145 head_idx
= pending_idx
;
1155 first
->req
.offset
= 0;
1156 first
->req
.size
= dst_offset
;
1157 first
->head
= start_idx
;
1158 set_page_ext(page
, netbk
, head_idx
);
1159 netbk
->mmap_pages
[head_idx
] = page
;
1160 frag_set_pending_idx(&frags
[shinfo
->nr_frags
], head_idx
);
1163 BUG_ON(shinfo
->nr_frags
> MAX_SKB_FRAGS
);
1167 /* Unwind, freeing all pages and sending error responses. */
1168 while (shinfo
->nr_frags
-- > start
) {
1169 xen_netbk_idx_release(netbk
,
1170 frag_get_pending_idx(&frags
[shinfo
->nr_frags
]),
1171 XEN_NETIF_RSP_ERROR
);
1173 /* The head too, if necessary. */
1175 xen_netbk_idx_release(netbk
, pending_idx
, XEN_NETIF_RSP_ERROR
);
1180 static int xen_netbk_tx_check_gop(struct xen_netbk
*netbk
,
1181 struct sk_buff
*skb
,
1182 struct gnttab_copy
**gopp
)
1184 struct gnttab_copy
*gop
= *gopp
;
1185 u16 pending_idx
= *((u16
*)skb
->data
);
1186 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
1187 struct pending_tx_info
*tx_info
;
1188 int nr_frags
= shinfo
->nr_frags
;
1190 u16 peek
; /* peek into next tx request */
1192 /* Check status of header. */
1195 xen_netbk_idx_release(netbk
, pending_idx
, XEN_NETIF_RSP_ERROR
);
1197 /* Skip first skb fragment if it is on same page as header fragment. */
1198 start
= (frag_get_pending_idx(&shinfo
->frags
[0]) == pending_idx
);
1200 for (i
= start
; i
< nr_frags
; i
++) {
1202 pending_ring_idx_t head
;
1204 pending_idx
= frag_get_pending_idx(&shinfo
->frags
[i
]);
1205 tx_info
= &netbk
->pending_tx_info
[pending_idx
];
1206 head
= tx_info
->head
;
1208 /* Check error status: if okay then remember grant handle. */
1210 newerr
= (++gop
)->status
;
1213 peek
= netbk
->pending_ring
[pending_index(++head
)];
1214 } while (!pending_tx_is_head(netbk
, peek
));
1216 if (likely(!newerr
)) {
1217 /* Had a previous error? Invalidate this fragment. */
1219 xen_netbk_idx_release(netbk
, pending_idx
, XEN_NETIF_RSP_OKAY
);
1223 /* Error on this fragment: respond to client with an error. */
1224 xen_netbk_idx_release(netbk
, pending_idx
, XEN_NETIF_RSP_ERROR
);
1226 /* Not the first error? Preceding frags already invalidated. */
1230 /* First error: invalidate header and preceding fragments. */
1231 pending_idx
= *((u16
*)skb
->data
);
1232 xen_netbk_idx_release(netbk
, pending_idx
, XEN_NETIF_RSP_OKAY
);
1233 for (j
= start
; j
< i
; j
++) {
1234 pending_idx
= frag_get_pending_idx(&shinfo
->frags
[j
]);
1235 xen_netbk_idx_release(netbk
, pending_idx
, XEN_NETIF_RSP_OKAY
);
1238 /* Remember the error: invalidate all subsequent fragments. */
1246 static void xen_netbk_fill_frags(struct xen_netbk
*netbk
, struct sk_buff
*skb
)
1248 struct skb_shared_info
*shinfo
= skb_shinfo(skb
);
1249 int nr_frags
= shinfo
->nr_frags
;
1252 for (i
= 0; i
< nr_frags
; i
++) {
1253 skb_frag_t
*frag
= shinfo
->frags
+ i
;
1254 struct xen_netif_tx_request
*txp
;
1258 pending_idx
= frag_get_pending_idx(frag
);
1260 txp
= &netbk
->pending_tx_info
[pending_idx
].req
;
1261 page
= virt_to_page(idx_to_kaddr(netbk
, pending_idx
));
1262 __skb_fill_page_desc(skb
, i
, page
, txp
->offset
, txp
->size
);
1263 skb
->len
+= txp
->size
;
1264 skb
->data_len
+= txp
->size
;
1265 skb
->truesize
+= txp
->size
;
1267 /* Take an extra reference to offset xen_netbk_idx_release */
1268 get_page(netbk
->mmap_pages
[pending_idx
]);
1269 xen_netbk_idx_release(netbk
, pending_idx
, XEN_NETIF_RSP_OKAY
);
1273 static int xen_netbk_get_extras(struct xenvif
*vif
,
1274 struct xen_netif_extra_info
*extras
,
1277 struct xen_netif_extra_info extra
;
1278 RING_IDX cons
= vif
->tx
.req_cons
;
1281 if (unlikely(work_to_do
-- <= 0)) {
1282 netdev_err(vif
->dev
, "Missing extra info\n");
1283 netbk_fatal_tx_err(vif
);
1287 memcpy(&extra
, RING_GET_REQUEST(&vif
->tx
, cons
),
1289 if (unlikely(!extra
.type
||
1290 extra
.type
>= XEN_NETIF_EXTRA_TYPE_MAX
)) {
1291 vif
->tx
.req_cons
= ++cons
;
1292 netdev_err(vif
->dev
,
1293 "Invalid extra type: %d\n", extra
.type
);
1294 netbk_fatal_tx_err(vif
);
1298 memcpy(&extras
[extra
.type
- 1], &extra
, sizeof(extra
));
1299 vif
->tx
.req_cons
= ++cons
;
1300 } while (extra
.flags
& XEN_NETIF_EXTRA_FLAG_MORE
);
1305 static int netbk_set_skb_gso(struct xenvif
*vif
,
1306 struct sk_buff
*skb
,
1307 struct xen_netif_extra_info
*gso
)
1309 if (!gso
->u
.gso
.size
) {
1310 netdev_err(vif
->dev
, "GSO size must not be zero.\n");
1311 netbk_fatal_tx_err(vif
);
1315 /* Currently only TCPv4 S.O. is supported. */
1316 if (gso
->u
.gso
.type
!= XEN_NETIF_GSO_TYPE_TCPV4
) {
1317 netdev_err(vif
->dev
, "Bad GSO type %d.\n", gso
->u
.gso
.type
);
1318 netbk_fatal_tx_err(vif
);
1322 skb_shinfo(skb
)->gso_size
= gso
->u
.gso
.size
;
1323 skb_shinfo(skb
)->gso_type
= SKB_GSO_TCPV4
;
1325 /* Header must be checked, and gso_segs computed. */
1326 skb_shinfo(skb
)->gso_type
|= SKB_GSO_DODGY
;
1327 skb_shinfo(skb
)->gso_segs
= 0;
1332 static int checksum_setup(struct xenvif
*vif
, struct sk_buff
*skb
)
1336 int recalculate_partial_csum
= 0;
1339 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1340 * peers can fail to set NETRXF_csum_blank when sending a GSO
1341 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
1342 * recalculate the partial checksum.
1344 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
&& skb_is_gso(skb
)) {
1345 vif
->rx_gso_checksum_fixup
++;
1346 skb
->ip_summed
= CHECKSUM_PARTIAL
;
1347 recalculate_partial_csum
= 1;
1350 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
1351 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
)
1354 if (skb
->protocol
!= htons(ETH_P_IP
))
1357 iph
= (void *)skb
->data
;
1358 switch (iph
->protocol
) {
1360 if (!skb_partial_csum_set(skb
, 4 * iph
->ihl
,
1361 offsetof(struct tcphdr
, check
)))
1364 if (recalculate_partial_csum
) {
1365 struct tcphdr
*tcph
= tcp_hdr(skb
);
1366 tcph
->check
= ~csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
1367 skb
->len
- iph
->ihl
*4,
1372 if (!skb_partial_csum_set(skb
, 4 * iph
->ihl
,
1373 offsetof(struct udphdr
, check
)))
1376 if (recalculate_partial_csum
) {
1377 struct udphdr
*udph
= udp_hdr(skb
);
1378 udph
->check
= ~csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
1379 skb
->len
- iph
->ihl
*4,
1384 if (net_ratelimit())
1385 netdev_err(vif
->dev
,
1386 "Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n",
1397 static bool tx_credit_exceeded(struct xenvif
*vif
, unsigned size
)
1399 unsigned long now
= jiffies
;
1400 unsigned long next_credit
=
1401 vif
->credit_timeout
.expires
+
1402 msecs_to_jiffies(vif
->credit_usec
/ 1000);
1404 /* Timer could already be pending in rare cases. */
1405 if (timer_pending(&vif
->credit_timeout
))
1408 /* Passed the point where we can replenish credit? */
1409 if (time_after_eq(now
, next_credit
)) {
1410 vif
->credit_timeout
.expires
= now
;
1414 /* Still too big to send right now? Set a callback. */
1415 if (size
> vif
->remaining_credit
) {
1416 vif
->credit_timeout
.data
=
1418 vif
->credit_timeout
.function
=
1420 mod_timer(&vif
->credit_timeout
,
1429 static unsigned xen_netbk_tx_build_gops(struct xen_netbk
*netbk
)
1431 struct gnttab_copy
*gop
= netbk
->tx_copy_ops
, *request_gop
;
1432 struct sk_buff
*skb
;
1435 while ((nr_pending_reqs(netbk
) + XEN_NETBK_LEGACY_SLOTS_MAX
1436 < MAX_PENDING_REQS
) &&
1437 !list_empty(&netbk
->net_schedule_list
)) {
1439 struct xen_netif_tx_request txreq
;
1440 struct xen_netif_tx_request txfrags
[XEN_NETBK_LEGACY_SLOTS_MAX
];
1442 struct xen_netif_extra_info extras
[XEN_NETIF_EXTRA_TYPE_MAX
-1];
1446 unsigned int data_len
;
1447 pending_ring_idx_t index
;
1449 /* Get a netif from the list with work to do. */
1450 vif
= poll_net_schedule_list(netbk
);
1451 /* This can sometimes happen because the test of
1452 * list_empty(net_schedule_list) at the top of the
1453 * loop is unlocked. Just go back and have another
1459 if (vif
->tx
.sring
->req_prod
- vif
->tx
.req_cons
>
1460 XEN_NETIF_TX_RING_SIZE
) {
1461 netdev_err(vif
->dev
,
1462 "Impossible number of requests. "
1463 "req_prod %d, req_cons %d, size %ld\n",
1464 vif
->tx
.sring
->req_prod
, vif
->tx
.req_cons
,
1465 XEN_NETIF_TX_RING_SIZE
);
1466 netbk_fatal_tx_err(vif
);
1470 RING_FINAL_CHECK_FOR_REQUESTS(&vif
->tx
, work_to_do
);
1476 idx
= vif
->tx
.req_cons
;
1477 rmb(); /* Ensure that we see the request before we copy it. */
1478 memcpy(&txreq
, RING_GET_REQUEST(&vif
->tx
, idx
), sizeof(txreq
));
1480 /* Credit-based scheduling. */
1481 if (txreq
.size
> vif
->remaining_credit
&&
1482 tx_credit_exceeded(vif
, txreq
.size
)) {
1487 vif
->remaining_credit
-= txreq
.size
;
1490 vif
->tx
.req_cons
= ++idx
;
1492 memset(extras
, 0, sizeof(extras
));
1493 if (txreq
.flags
& XEN_NETTXF_extra_info
) {
1494 work_to_do
= xen_netbk_get_extras(vif
, extras
,
1496 idx
= vif
->tx
.req_cons
;
1497 if (unlikely(work_to_do
< 0))
1501 ret
= netbk_count_requests(vif
, &txreq
, txfrags
, work_to_do
);
1502 if (unlikely(ret
< 0))
1507 if (unlikely(txreq
.size
< ETH_HLEN
)) {
1508 netdev_dbg(vif
->dev
,
1509 "Bad packet size: %d\n", txreq
.size
);
1510 netbk_tx_err(vif
, &txreq
, idx
);
1514 /* No crossing a page as the payload mustn't fragment. */
1515 if (unlikely((txreq
.offset
+ txreq
.size
) > PAGE_SIZE
)) {
1516 netdev_err(vif
->dev
,
1517 "txreq.offset: %x, size: %u, end: %lu\n",
1518 txreq
.offset
, txreq
.size
,
1519 (txreq
.offset
&~PAGE_MASK
) + txreq
.size
);
1520 netbk_fatal_tx_err(vif
);
1524 index
= pending_index(netbk
->pending_cons
);
1525 pending_idx
= netbk
->pending_ring
[index
];
1527 data_len
= (txreq
.size
> PKT_PROT_LEN
&&
1528 ret
< XEN_NETBK_LEGACY_SLOTS_MAX
) ?
1529 PKT_PROT_LEN
: txreq
.size
;
1531 skb
= alloc_skb(data_len
+ NET_SKB_PAD
+ NET_IP_ALIGN
,
1532 GFP_ATOMIC
| __GFP_NOWARN
);
1533 if (unlikely(skb
== NULL
)) {
1534 netdev_dbg(vif
->dev
,
1535 "Can't allocate a skb in start_xmit.\n");
1536 netbk_tx_err(vif
, &txreq
, idx
);
1540 /* Packets passed to netif_rx() must have some headroom. */
1541 skb_reserve(skb
, NET_SKB_PAD
+ NET_IP_ALIGN
);
1543 if (extras
[XEN_NETIF_EXTRA_TYPE_GSO
- 1].type
) {
1544 struct xen_netif_extra_info
*gso
;
1545 gso
= &extras
[XEN_NETIF_EXTRA_TYPE_GSO
- 1];
1547 if (netbk_set_skb_gso(vif
, skb
, gso
)) {
1548 /* Failure in netbk_set_skb_gso is fatal. */
1554 /* XXX could copy straight to head */
1555 page
= xen_netbk_alloc_page(netbk
, pending_idx
);
1558 netbk_tx_err(vif
, &txreq
, idx
);
1562 gop
->source
.u
.ref
= txreq
.gref
;
1563 gop
->source
.domid
= vif
->domid
;
1564 gop
->source
.offset
= txreq
.offset
;
1566 gop
->dest
.u
.gmfn
= virt_to_mfn(page_address(page
));
1567 gop
->dest
.domid
= DOMID_SELF
;
1568 gop
->dest
.offset
= txreq
.offset
;
1570 gop
->len
= txreq
.size
;
1571 gop
->flags
= GNTCOPY_source_gref
;
1575 memcpy(&netbk
->pending_tx_info
[pending_idx
].req
,
1576 &txreq
, sizeof(txreq
));
1577 netbk
->pending_tx_info
[pending_idx
].vif
= vif
;
1578 netbk
->pending_tx_info
[pending_idx
].head
= index
;
1579 *((u16
*)skb
->data
) = pending_idx
;
1581 __skb_put(skb
, data_len
);
1583 skb_shinfo(skb
)->nr_frags
= ret
;
1584 if (data_len
< txreq
.size
) {
1585 skb_shinfo(skb
)->nr_frags
++;
1586 frag_set_pending_idx(&skb_shinfo(skb
)->frags
[0],
1589 frag_set_pending_idx(&skb_shinfo(skb
)->frags
[0],
1590 INVALID_PENDING_IDX
);
1593 netbk
->pending_cons
++;
1595 request_gop
= xen_netbk_get_requests(netbk
, vif
,
1597 if (request_gop
== NULL
) {
1599 netbk_tx_err(vif
, &txreq
, idx
);
1604 __skb_queue_tail(&netbk
->tx_queue
, skb
);
1606 vif
->tx
.req_cons
= idx
;
1607 xen_netbk_check_rx_xenvif(vif
);
1609 if ((gop
-netbk
->tx_copy_ops
) >= ARRAY_SIZE(netbk
->tx_copy_ops
))
1613 return gop
- netbk
->tx_copy_ops
;
1616 static void xen_netbk_tx_submit(struct xen_netbk
*netbk
)
1618 struct gnttab_copy
*gop
= netbk
->tx_copy_ops
;
1619 struct sk_buff
*skb
;
1621 while ((skb
= __skb_dequeue(&netbk
->tx_queue
)) != NULL
) {
1622 struct xen_netif_tx_request
*txp
;
1627 pending_idx
= *((u16
*)skb
->data
);
1628 vif
= netbk
->pending_tx_info
[pending_idx
].vif
;
1629 txp
= &netbk
->pending_tx_info
[pending_idx
].req
;
1631 /* Check the remap error code. */
1632 if (unlikely(xen_netbk_tx_check_gop(netbk
, skb
, &gop
))) {
1633 netdev_dbg(vif
->dev
, "netback grant failed.\n");
1634 skb_shinfo(skb
)->nr_frags
= 0;
1639 data_len
= skb
->len
;
1641 (void *)(idx_to_kaddr(netbk
, pending_idx
)|txp
->offset
),
1643 if (data_len
< txp
->size
) {
1644 /* Append the packet payload as a fragment. */
1645 txp
->offset
+= data_len
;
1646 txp
->size
-= data_len
;
1648 /* Schedule a response immediately. */
1649 xen_netbk_idx_release(netbk
, pending_idx
, XEN_NETIF_RSP_OKAY
);
1652 if (txp
->flags
& XEN_NETTXF_csum_blank
)
1653 skb
->ip_summed
= CHECKSUM_PARTIAL
;
1654 else if (txp
->flags
& XEN_NETTXF_data_validated
)
1655 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1657 xen_netbk_fill_frags(netbk
, skb
);
1660 * If the initial fragment was < PKT_PROT_LEN then
1661 * pull through some bytes from the other fragments to
1662 * increase the linear region to PKT_PROT_LEN bytes.
1664 if (skb_headlen(skb
) < PKT_PROT_LEN
&& skb_is_nonlinear(skb
)) {
1665 int target
= min_t(int, skb
->len
, PKT_PROT_LEN
);
1666 __pskb_pull_tail(skb
, target
- skb_headlen(skb
));
1669 skb
->dev
= vif
->dev
;
1670 skb
->protocol
= eth_type_trans(skb
, skb
->dev
);
1671 skb_reset_network_header(skb
);
1673 if (checksum_setup(vif
, skb
)) {
1674 netdev_dbg(vif
->dev
,
1675 "Can't setup checksum in net_tx_action\n");
1680 skb_probe_transport_header(skb
, 0);
1682 vif
->dev
->stats
.rx_bytes
+= skb
->len
;
1683 vif
->dev
->stats
.rx_packets
++;
1685 xenvif_receive_skb(vif
, skb
);
1689 /* Called after netfront has transmitted */
1690 static void xen_netbk_tx_action(struct xen_netbk
*netbk
)
1694 nr_gops
= xen_netbk_tx_build_gops(netbk
);
1699 gnttab_batch_copy(netbk
->tx_copy_ops
, nr_gops
);
1701 xen_netbk_tx_submit(netbk
);
1704 static void xen_netbk_idx_release(struct xen_netbk
*netbk
, u16 pending_idx
,
1708 struct pending_tx_info
*pending_tx_info
;
1709 pending_ring_idx_t head
;
1710 u16 peek
; /* peek into next tx request */
1712 BUG_ON(netbk
->mmap_pages
[pending_idx
] == (void *)(~0UL));
1714 /* Already complete? */
1715 if (netbk
->mmap_pages
[pending_idx
] == NULL
)
1718 pending_tx_info
= &netbk
->pending_tx_info
[pending_idx
];
1720 vif
= pending_tx_info
->vif
;
1721 head
= pending_tx_info
->head
;
1723 BUG_ON(!pending_tx_is_head(netbk
, head
));
1724 BUG_ON(netbk
->pending_ring
[pending_index(head
)] != pending_idx
);
1727 pending_ring_idx_t index
;
1728 pending_ring_idx_t idx
= pending_index(head
);
1729 u16 info_idx
= netbk
->pending_ring
[idx
];
1731 pending_tx_info
= &netbk
->pending_tx_info
[info_idx
];
1732 make_tx_response(vif
, &pending_tx_info
->req
, status
);
1734 /* Setting any number other than
1735 * INVALID_PENDING_RING_IDX indicates this slot is
1736 * starting a new packet / ending a previous packet.
1738 pending_tx_info
->head
= 0;
1740 index
= pending_index(netbk
->pending_prod
++);
1741 netbk
->pending_ring
[index
] = netbk
->pending_ring
[info_idx
];
1745 peek
= netbk
->pending_ring
[pending_index(++head
)];
1747 } while (!pending_tx_is_head(netbk
, peek
));
1749 netbk
->mmap_pages
[pending_idx
]->mapping
= 0;
1750 put_page(netbk
->mmap_pages
[pending_idx
]);
1751 netbk
->mmap_pages
[pending_idx
] = NULL
;
1755 static void make_tx_response(struct xenvif
*vif
,
1756 struct xen_netif_tx_request
*txp
,
1759 RING_IDX i
= vif
->tx
.rsp_prod_pvt
;
1760 struct xen_netif_tx_response
*resp
;
1763 resp
= RING_GET_RESPONSE(&vif
->tx
, i
);
1767 if (txp
->flags
& XEN_NETTXF_extra_info
)
1768 RING_GET_RESPONSE(&vif
->tx
, ++i
)->status
= XEN_NETIF_RSP_NULL
;
1770 vif
->tx
.rsp_prod_pvt
= ++i
;
1771 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif
->tx
, notify
);
1773 notify_remote_via_irq(vif
->tx_irq
);
1776 static struct xen_netif_rx_response
*make_rx_response(struct xenvif
*vif
,
1783 RING_IDX i
= vif
->rx
.rsp_prod_pvt
;
1784 struct xen_netif_rx_response
*resp
;
1786 resp
= RING_GET_RESPONSE(&vif
->rx
, i
);
1787 resp
->offset
= offset
;
1788 resp
->flags
= flags
;
1790 resp
->status
= (s16
)size
;
1792 resp
->status
= (s16
)st
;
1794 vif
->rx
.rsp_prod_pvt
= ++i
;
1799 static inline int rx_work_todo(struct xen_netbk
*netbk
)
1801 return !skb_queue_empty(&netbk
->rx_queue
);
1804 static inline int tx_work_todo(struct xen_netbk
*netbk
)
1807 if ((nr_pending_reqs(netbk
) + XEN_NETBK_LEGACY_SLOTS_MAX
1808 < MAX_PENDING_REQS
) &&
1809 !list_empty(&netbk
->net_schedule_list
))
1815 static int xen_netbk_kthread(void *data
)
1817 struct xen_netbk
*netbk
= data
;
1818 while (!kthread_should_stop()) {
1819 wait_event_interruptible(netbk
->wq
,
1820 rx_work_todo(netbk
) ||
1821 tx_work_todo(netbk
) ||
1822 kthread_should_stop());
1825 if (kthread_should_stop())
1828 if (rx_work_todo(netbk
))
1829 xen_netbk_rx_action(netbk
);
1831 if (tx_work_todo(netbk
))
1832 xen_netbk_tx_action(netbk
);
1838 void xen_netbk_unmap_frontend_rings(struct xenvif
*vif
)
1841 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif
),
1844 xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif
),
1848 int xen_netbk_map_frontend_rings(struct xenvif
*vif
,
1849 grant_ref_t tx_ring_ref
,
1850 grant_ref_t rx_ring_ref
)
1853 struct xen_netif_tx_sring
*txs
;
1854 struct xen_netif_rx_sring
*rxs
;
1858 err
= xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif
),
1859 tx_ring_ref
, &addr
);
1863 txs
= (struct xen_netif_tx_sring
*)addr
;
1864 BACK_RING_INIT(&vif
->tx
, txs
, PAGE_SIZE
);
1866 err
= xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif
),
1867 rx_ring_ref
, &addr
);
1871 rxs
= (struct xen_netif_rx_sring
*)addr
;
1872 BACK_RING_INIT(&vif
->rx
, rxs
, PAGE_SIZE
);
1874 vif
->rx_req_cons_peek
= 0;
1879 xen_netbk_unmap_frontend_rings(vif
);
1883 static int __init
netback_init(void)
1892 if (fatal_skb_slots
< XEN_NETBK_LEGACY_SLOTS_MAX
) {
1893 pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
1894 fatal_skb_slots
, XEN_NETBK_LEGACY_SLOTS_MAX
);
1895 fatal_skb_slots
= XEN_NETBK_LEGACY_SLOTS_MAX
;
1898 xen_netbk_group_nr
= num_online_cpus();
1899 xen_netbk
= vzalloc(sizeof(struct xen_netbk
) * xen_netbk_group_nr
);
1903 for (group
= 0; group
< xen_netbk_group_nr
; group
++) {
1904 struct xen_netbk
*netbk
= &xen_netbk
[group
];
1905 skb_queue_head_init(&netbk
->rx_queue
);
1906 skb_queue_head_init(&netbk
->tx_queue
);
1908 init_timer(&netbk
->net_timer
);
1909 netbk
->net_timer
.data
= (unsigned long)netbk
;
1910 netbk
->net_timer
.function
= xen_netbk_alarm
;
1912 netbk
->pending_cons
= 0;
1913 netbk
->pending_prod
= MAX_PENDING_REQS
;
1914 for (i
= 0; i
< MAX_PENDING_REQS
; i
++)
1915 netbk
->pending_ring
[i
] = i
;
1917 init_waitqueue_head(&netbk
->wq
);
1918 netbk
->task
= kthread_create(xen_netbk_kthread
,
1920 "netback/%u", group
);
1922 if (IS_ERR(netbk
->task
)) {
1923 pr_alert("kthread_create() fails at netback\n");
1924 del_timer(&netbk
->net_timer
);
1925 rc
= PTR_ERR(netbk
->task
);
1929 kthread_bind(netbk
->task
, group
);
1931 INIT_LIST_HEAD(&netbk
->net_schedule_list
);
1933 spin_lock_init(&netbk
->net_schedule_list_lock
);
1935 atomic_set(&netbk
->netfront_count
, 0);
1937 wake_up_process(netbk
->task
);
1940 rc
= xenvif_xenbus_init();
1947 while (--group
>= 0) {
1948 struct xen_netbk
*netbk
= &xen_netbk
[group
];
1949 del_timer(&netbk
->net_timer
);
1950 kthread_stop(netbk
->task
);
1957 module_init(netback_init
);
1959 static void __exit
netback_fini(void)
1963 xenvif_xenbus_fini();
1965 for (i
= 0; i
< xen_netbk_group_nr
; i
++) {
1966 struct xen_netbk
*netbk
= &xen_netbk
[i
];
1967 del_timer_sync(&netbk
->net_timer
);
1968 kthread_stop(netbk
->task
);
1969 for (j
= 0; j
< MAX_PENDING_REQS
; j
++) {
1970 if (netbk
->mmap_pages
[j
])
1971 __free_page(netbk
->mmap_pages
[j
]);
1977 module_exit(netback_fini
);
1979 MODULE_LICENSE("Dual BSD/GPL");
1980 MODULE_ALIAS("xen-backend:vif");