1 /* Copyright (c) 2012 - 2015 UNISYS CORPORATION
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for more
15 /* This driver lives in a spar partition, and registers to ethernet io
16 * channels from the visorbus driver. It creates netdev devices and
17 * forwards transmit to the IO channel and accepts rcvs from the IO
18 * Partition via the IO channel.
21 #include <linux/debugfs.h>
22 #include <linux/etherdevice.h>
23 #include <linux/netdevice.h>
24 #include <linux/kthread.h>
25 #include <linux/skbuff.h>
26 #include <linux/rtnetlink.h>
29 #include "iochannel.h"
31 #define VISORNIC_INFINITE_RSP_WAIT 0
33 /* MAX_BUF = 64 lines x 32 MAXVNIC x 80 characters
36 #define MAX_BUF 163840
37 #define NAPI_WEIGHT 64
39 /* GUIDS for director channel type supported by this driver. */
40 static struct visor_channeltype_descriptor visornic_channel_types
[] = {
41 /* Note that the only channel type we expect to be reported by the
42 * bus driver is the SPAR_VNIC channel.
44 { SPAR_VNIC_CHANNEL_PROTOCOL_UUID
, "ultravnic" },
45 { NULL_UUID_LE
, NULL
}
47 MODULE_DEVICE_TABLE(visorbus
, visornic_channel_types
);
49 * FIXME XXX: This next line of code must be fixed and removed before
50 * acceptance into the 'normal' part of the kernel. It is only here as a place
51 * holder to get module autoloading functionality working for visorbus. Code
52 * must be added to scripts/mode/file2alias.c, etc., to get this working
55 MODULE_ALIAS("visorbus:" SPAR_VNIC_CHANNEL_PROTOCOL_UUID_STR
);
58 unsigned long got_rcv
;
59 unsigned long got_enbdisack
;
60 unsigned long got_xmit_done
;
61 unsigned long xmit_fail
;
62 unsigned long sent_enbdis
;
63 unsigned long sent_promisc
;
64 unsigned long sent_post
;
65 unsigned long sent_post_failed
;
66 unsigned long sent_xmit
;
67 unsigned long reject_count
;
68 unsigned long extra_rcvbufs_sent
;
71 struct visornic_devdata
{
72 /* 0 disabled 1 enabled to receive */
73 unsigned short enabled
;
74 /* NET_RCV_ENABLE/DISABLE acked by IOPART */
75 unsigned short enab_dis_acked
;
77 struct visor_device
*dev
;
78 struct net_device
*netdev
;
79 struct net_device_stats net_stats
;
80 atomic_t interrupt_rcvd
;
81 wait_queue_head_t rsp_queue
;
82 struct sk_buff
**rcvbuf
;
83 /* incarnation_id lets IOPART know about re-birth */
85 /* flags as they were prior to set_multicast_list */
86 unsigned short old_flags
;
87 atomic_t usage
; /* count of users */
89 /* number of rcv buffers the vnic will post */
91 int num_rcv_bufs_could_not_alloc
;
92 atomic_t num_rcvbuf_in_iovm
;
93 unsigned long alloc_failed_in_if_needed_cnt
;
94 unsigned long alloc_failed_in_repost_rtn_cnt
;
96 /* absolute max number of outstanding xmits - should never hit this */
97 unsigned long max_outstanding_net_xmits
;
98 /* high water mark for calling netif_stop_queue() */
99 unsigned long upper_threshold_net_xmits
;
100 /* high water mark for calling netif_wake_queue() */
101 unsigned long lower_threshold_net_xmits
;
102 /* xmitbufhead - head of the xmit buffer list sent to the IOPART end */
103 struct sk_buff_head xmitbufhead
;
105 visorbus_state_complete_func server_down_complete_func
;
106 struct work_struct timeout_reset
;
107 /* cmdrsp_rcv is used for posting/unposting rcv buffers */
108 struct uiscmdrsp
*cmdrsp_rcv
;
109 /* xmit_cmdrsp - issues NET_XMIT - only one active xmit at a time */
110 struct uiscmdrsp
*xmit_cmdrsp
;
112 bool server_down
; /* IOPART is down */
113 bool server_change_state
; /* Processing SERVER_CHANGESTATE msg */
114 bool going_away
; /* device is being torn down */
115 struct dentry
*eth_debugfs_dir
;
117 u64 interrupts_notme
;
118 u64 interrupts_disabled
;
120 spinlock_t priv_lock
; /* spinlock to access devdata structures */
122 /* flow control counter */
123 u64 flow_control_upper_hits
;
124 u64 flow_control_lower_hits
;
127 unsigned long n_rcv0
; /* # rcvs of 0 buffers */
128 unsigned long n_rcv1
; /* # rcvs of 1 buffers */
129 unsigned long n_rcv2
; /* # rcvs of 2 buffers */
130 unsigned long n_rcvx
; /* # rcvs of >2 buffers */
131 unsigned long found_repost_rcvbuf_cnt
; /* # repost_rcvbuf_cnt */
132 unsigned long repost_found_skb_cnt
; /* # of found the skb */
133 unsigned long n_repost_deficit
; /* # of lost rcv buffers */
134 unsigned long bad_rcv_buf
; /* # of unknown rcv skb not freed */
135 unsigned long n_rcv_packets_not_accepted
;/* # bogs rcv packets */
137 int queuefullmsg_logged
;
138 struct chanstat chstat
;
139 struct timer_list irq_poll_timer
;
140 struct napi_struct napi
;
141 struct uiscmdrsp cmdrsp
[SIZEOF_CMDRSP
];
145 * visor_copy_fragsinfo_from_skb(
146 * @skb_in: skbuff that we are pulling the frags from
147 * @firstfraglen: length of first fragment in skb
148 * @frags_max: max len of frags array
149 * @frags: frags array filled in on output
151 * Copy the fragment list in the SKB to a phys_info
152 * array that the IOPART understands.
153 * Return value indicates number of entries filled in frags
154 * Negative values indicate an error.
157 visor_copy_fragsinfo_from_skb(struct sk_buff
*skb
, unsigned int firstfraglen
,
158 unsigned int frags_max
,
159 struct phys_info frags
[])
161 unsigned int count
= 0, frag
, size
, offset
= 0, numfrags
;
162 unsigned int total_count
;
164 numfrags
= skb_shinfo(skb
)->nr_frags
;
166 /* Compute the number of fragments this skb has, and if its more than
167 * frag array can hold, linearize the skb
169 total_count
= numfrags
+ (firstfraglen
/ PI_PAGE_SIZE
);
170 if (firstfraglen
% PI_PAGE_SIZE
)
173 if (total_count
> frags_max
) {
174 if (skb_linearize(skb
))
176 numfrags
= skb_shinfo(skb
)->nr_frags
;
180 while (firstfraglen
) {
181 if (count
== frags_max
)
184 frags
[count
].pi_pfn
=
185 page_to_pfn(virt_to_page(skb
->data
+ offset
));
186 frags
[count
].pi_off
=
187 (unsigned long)(skb
->data
+ offset
) & PI_PAGE_MASK
;
188 size
= min_t(unsigned int, firstfraglen
,
189 PI_PAGE_SIZE
- frags
[count
].pi_off
);
191 /* can take smallest of firstfraglen (what's left) OR
192 * bytes left in the page
194 frags
[count
].pi_len
= size
;
195 firstfraglen
-= size
;
200 if ((count
+ numfrags
) > frags_max
)
203 for (frag
= 0; frag
< numfrags
; frag
++) {
204 count
= add_physinfo_entries(page_to_pfn(
205 skb_frag_page(&skb_shinfo(skb
)->frags
[frag
])),
206 skb_shinfo(skb
)->frags
[frag
].
208 skb_shinfo(skb
)->frags
[frag
].
209 size
, count
, frags_max
, frags
);
210 /* add_physinfo_entries only returns
211 * zero if the frags array is out of room
212 * That should never happen because we
213 * fail above, if count+numfrags > frags_max.
219 if (skb_shinfo(skb
)->frag_list
) {
220 struct sk_buff
*skbinlist
;
223 for (skbinlist
= skb_shinfo(skb
)->frag_list
; skbinlist
;
224 skbinlist
= skbinlist
->next
) {
225 c
= visor_copy_fragsinfo_from_skb(skbinlist
,
238 static ssize_t
enable_ints_write(struct file
*file
,
239 const char __user
*buffer
,
240 size_t count
, loff_t
*ppos
)
242 /* Don't want to break ABI here by having a debugfs
243 * file that no longer exists or is writable, so
244 * lets just make this a vestigual function
249 static const struct file_operations debugfs_enable_ints_fops
= {
250 .write
= enable_ints_write
,
254 * visornic_serverdown_complete - IOPART went down, pause device
255 * @work: Work queue it was scheduled on
257 * The IO partition has gone down and we need to do some cleanup
258 * for when it comes back. Treat the IO partition as the link
263 visornic_serverdown_complete(struct visornic_devdata
*devdata
)
265 struct net_device
*netdev
;
267 netdev
= devdata
->netdev
;
269 /* Stop polling for interrupts */
270 del_timer_sync(&devdata
->irq_poll_timer
);
276 atomic_set(&devdata
->num_rcvbuf_in_iovm
, 0);
277 devdata
->chstat
.sent_xmit
= 0;
278 devdata
->chstat
.got_xmit_done
= 0;
280 if (devdata
->server_down_complete_func
)
281 (*devdata
->server_down_complete_func
)(devdata
->dev
, 0);
283 devdata
->server_down
= true;
284 devdata
->server_change_state
= false;
285 devdata
->server_down_complete_func
= NULL
;
289 * visornic_serverdown - Command has notified us that IOPART is down
290 * @devdata: device that is being managed by IOPART
292 * Schedule the work needed to handle the server down request. Make
293 * sure we haven't already handled the server change state event.
294 * Returns 0 if we scheduled the work, -EINVAL on error.
297 visornic_serverdown(struct visornic_devdata
*devdata
,
298 visorbus_state_complete_func complete_func
)
303 spin_lock_irqsave(&devdata
->priv_lock
, flags
);
304 if (devdata
->server_change_state
) {
305 dev_dbg(&devdata
->dev
->device
, "%s changing state\n",
310 if (devdata
->server_down
) {
311 dev_dbg(&devdata
->dev
->device
, "%s already down\n",
316 if (devdata
->going_away
) {
317 dev_dbg(&devdata
->dev
->device
,
318 "%s aborting because device removal pending\n",
323 devdata
->server_change_state
= true;
324 devdata
->server_down_complete_func
= complete_func
;
325 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
327 visornic_serverdown_complete(devdata
);
331 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
336 * alloc_rcv_buf - alloc rcv buffer to be given to the IO Partition.
337 * @netdev: network adapter the rcv bufs are attached too.
339 * Create an sk_buff (rcv_buf) that will be passed to the IO Partition
340 * so that it can write rcv data into our memory space.
341 * Return pointer to sk_buff
343 static struct sk_buff
*
344 alloc_rcv_buf(struct net_device
*netdev
)
348 /* NOTE: the first fragment in each rcv buffer is pointed to by
349 * rcvskb->data. For now all rcv buffers will be RCVPOST_BUF_SIZE
350 * in length, so the first frag is large enough to hold 1514.
352 skb
= alloc_skb(RCVPOST_BUF_SIZE
, GFP_ATOMIC
);
356 /* current value of mtu doesn't come into play here; large
357 * packets will just end up using multiple rcv buffers all of
360 skb
->len
= RCVPOST_BUF_SIZE
;
361 /* alloc_skb already zeroes it out for clarification. */
367 * post_skb - post a skb to the IO Partition.
368 * @cmdrsp: cmdrsp packet to be send to the IO Partition
369 * @devdata: visornic_devdata to post the skb too
370 * @skb: skb to give to the IO partition
372 * Send the skb to the IO Partition.
376 post_skb(struct uiscmdrsp
*cmdrsp
,
377 struct visornic_devdata
*devdata
, struct sk_buff
*skb
)
379 cmdrsp
->net
.buf
= skb
;
380 cmdrsp
->net
.rcvpost
.frag
.pi_pfn
= page_to_pfn(virt_to_page(skb
->data
));
381 cmdrsp
->net
.rcvpost
.frag
.pi_off
=
382 (unsigned long)skb
->data
& PI_PAGE_MASK
;
383 cmdrsp
->net
.rcvpost
.frag
.pi_len
= skb
->len
;
384 cmdrsp
->net
.rcvpost
.unique_num
= devdata
->incarnation_id
;
386 if ((cmdrsp
->net
.rcvpost
.frag
.pi_off
+ skb
->len
) <= PI_PAGE_SIZE
) {
387 cmdrsp
->net
.type
= NET_RCV_POST
;
388 cmdrsp
->cmdtype
= CMD_NET_TYPE
;
389 if (!visorchannel_signalinsert(devdata
->dev
->visorchannel
,
392 atomic_inc(&devdata
->num_rcvbuf_in_iovm
);
393 devdata
->chstat
.sent_post
++;
395 devdata
->chstat
.sent_post_failed
++;
401 * send_enbdis - send NET_RCV_ENBDIS to IO Partition
402 * @netdev: netdevice we are enable/disable, used as context
404 * @state: enable = 1/disable = 0
405 * @devdata: visornic device we are enabling/disabling
407 * Send the enable/disable message to the IO Partition.
411 send_enbdis(struct net_device
*netdev
, int state
,
412 struct visornic_devdata
*devdata
)
414 devdata
->cmdrsp_rcv
->net
.enbdis
.enable
= state
;
415 devdata
->cmdrsp_rcv
->net
.enbdis
.context
= netdev
;
416 devdata
->cmdrsp_rcv
->net
.type
= NET_RCV_ENBDIS
;
417 devdata
->cmdrsp_rcv
->cmdtype
= CMD_NET_TYPE
;
418 if (!visorchannel_signalinsert(devdata
->dev
->visorchannel
,
420 devdata
->cmdrsp_rcv
))
421 devdata
->chstat
.sent_enbdis
++;
425 * visornic_disable_with_timeout - Disable network adapter
426 * @netdev: netdevice to disale
427 * @timeout: timeout to wait for disable
429 * Disable the network adapter and inform the IO Partition that we
430 * are disabled, reclaim memory from rcv bufs.
431 * Returns 0 on success, negative for failure of IO Partition
436 visornic_disable_with_timeout(struct net_device
*netdev
, const int timeout
)
438 struct visornic_devdata
*devdata
= netdev_priv(netdev
);
443 /* send a msg telling the other end we are stopping incoming pkts */
444 spin_lock_irqsave(&devdata
->priv_lock
, flags
);
445 devdata
->enabled
= 0;
446 devdata
->enab_dis_acked
= 0; /* must wait for ack */
447 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
449 /* send disable and wait for ack -- don't hold lock when sending
450 * disable because if the queue is full, insert might sleep.
452 send_enbdis(netdev
, 0, devdata
);
454 /* wait for ack to arrive before we try to free rcv buffers
455 * NOTE: the other end automatically unposts the rcv buffers when
456 * when it gets a disable.
458 spin_lock_irqsave(&devdata
->priv_lock
, flags
);
459 while ((timeout
== VISORNIC_INFINITE_RSP_WAIT
) ||
461 if (devdata
->enab_dis_acked
)
463 if (devdata
->server_down
|| devdata
->server_change_state
) {
464 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
465 dev_dbg(&netdev
->dev
, "%s server went away\n",
469 set_current_state(TASK_INTERRUPTIBLE
);
470 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
471 wait
+= schedule_timeout(msecs_to_jiffies(10));
472 spin_lock_irqsave(&devdata
->priv_lock
, flags
);
475 /* Wait for usage to go to 1 (no other users) before freeing
478 if (atomic_read(&devdata
->usage
) > 1) {
480 set_current_state(TASK_INTERRUPTIBLE
);
481 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
482 schedule_timeout(msecs_to_jiffies(10));
483 spin_lock_irqsave(&devdata
->priv_lock
, flags
);
484 if (atomic_read(&devdata
->usage
))
488 /* we've set enabled to 0, so we can give up the lock. */
489 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
491 /* stop the transmit queue so nothing more can be transmitted */
492 netif_stop_queue(netdev
);
494 napi_disable(&devdata
->napi
);
496 skb_queue_purge(&devdata
->xmitbufhead
);
498 /* Free rcv buffers - other end has automatically unposed them on
501 for (i
= 0; i
< devdata
->num_rcv_bufs
; i
++) {
502 if (devdata
->rcvbuf
[i
]) {
503 kfree_skb(devdata
->rcvbuf
[i
]);
504 devdata
->rcvbuf
[i
] = NULL
;
512 * init_rcv_bufs -- initialize receive bufs and send them to the IO Part
513 * @netdev: struct netdevice
514 * @devdata: visornic_devdata
516 * Allocate rcv buffers and post them to the IO Partition.
517 * Return 0 for success, and negative for failure.
520 init_rcv_bufs(struct net_device
*netdev
, struct visornic_devdata
*devdata
)
524 /* allocate fixed number of receive buffers to post to uisnic
525 * post receive buffers after we've allocated a required amount
527 for (i
= 0; i
< devdata
->num_rcv_bufs
; i
++) {
528 devdata
->rcvbuf
[i
] = alloc_rcv_buf(netdev
);
529 if (!devdata
->rcvbuf
[i
])
530 break; /* if we failed to allocate one let us stop */
532 if (i
== 0) /* couldn't even allocate one -- bail out */
536 /* Ensure we can alloc 2/3rd of the requeested number of buffers.
537 * 2/3 is an arbitrary choice; used also in ndis init.c
539 if (count
< ((2 * devdata
->num_rcv_bufs
) / 3)) {
540 /* free receive buffers we did alloc and then bail out */
541 for (i
= 0; i
< count
; i
++) {
542 kfree_skb(devdata
->rcvbuf
[i
]);
543 devdata
->rcvbuf
[i
] = NULL
;
548 /* post receive buffers to receive incoming input - without holding
549 * lock - we've not enabled nor started the queue so there shouldn't
550 * be any rcv or xmit activity
552 for (i
= 0; i
< count
; i
++)
553 post_skb(devdata
->cmdrsp_rcv
, devdata
, devdata
->rcvbuf
[i
]);
559 * visornic_enable_with_timeout - send enable to IO Part
560 * @netdev: struct net_device
561 * @timeout: Time to wait for the ACK from the enable
563 * Sends enable to IOVM, inits, and posts receive buffers to IOVM
564 * timeout is defined in msecs (timeout of 0 specifies infinite wait)
565 * Return 0 for success, negavite for failure.
568 visornic_enable_with_timeout(struct net_device
*netdev
, const int timeout
)
571 struct visornic_devdata
*devdata
= netdev_priv(netdev
);
575 /* NOTE: the other end automatically unposts the rcv buffers when it
578 i
= init_rcv_bufs(netdev
, devdata
);
580 dev_err(&netdev
->dev
,
581 "%s failed to init rcv bufs (%d)\n", __func__
, i
);
585 spin_lock_irqsave(&devdata
->priv_lock
, flags
);
586 devdata
->enabled
= 1;
587 devdata
->enab_dis_acked
= 0;
589 /* now we're ready, let's send an ENB to uisnic but until we get
590 * an ACK back from uisnic, we'll drop the packets
592 devdata
->n_rcv_packets_not_accepted
= 0;
593 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
595 /* send enable and wait for ack -- don't hold lock when sending enable
596 * because if the queue is full, insert might sleep.
598 napi_enable(&devdata
->napi
);
599 send_enbdis(netdev
, 1, devdata
);
601 spin_lock_irqsave(&devdata
->priv_lock
, flags
);
602 while ((timeout
== VISORNIC_INFINITE_RSP_WAIT
) ||
604 if (devdata
->enab_dis_acked
)
606 if (devdata
->server_down
|| devdata
->server_change_state
) {
607 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
608 dev_dbg(&netdev
->dev
, "%s server went away\n",
612 set_current_state(TASK_INTERRUPTIBLE
);
613 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
614 wait
+= schedule_timeout(msecs_to_jiffies(10));
615 spin_lock_irqsave(&devdata
->priv_lock
, flags
);
618 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
620 if (!devdata
->enab_dis_acked
) {
621 dev_err(&netdev
->dev
, "%s missing ACK\n", __func__
);
625 netif_start_queue(netdev
);
631 * visornic_timeout_reset - handle xmit timeout resets
632 * @work work item that scheduled the work
634 * Transmit Timeouts are typically handled by resetting the
635 * device for our virtual NIC we will send a Disable and Enable
636 * to the IOVM. If it doesn't respond we will trigger a serverdown.
639 visornic_timeout_reset(struct work_struct
*work
)
641 struct visornic_devdata
*devdata
;
642 struct net_device
*netdev
;
645 devdata
= container_of(work
, struct visornic_devdata
, timeout_reset
);
646 netdev
= devdata
->netdev
;
649 if (!netif_running(netdev
)) {
654 response
= visornic_disable_with_timeout(netdev
,
655 VISORNIC_INFINITE_RSP_WAIT
);
657 goto call_serverdown
;
659 response
= visornic_enable_with_timeout(netdev
,
660 VISORNIC_INFINITE_RSP_WAIT
);
662 goto call_serverdown
;
669 visornic_serverdown(devdata
, NULL
);
674 * visornic_open - Enable the visornic device and mark the queue started
675 * @netdev: netdevice to start
677 * Enable the device and start the transmit queue.
678 * Return 0 for success
681 visornic_open(struct net_device
*netdev
)
683 visornic_enable_with_timeout(netdev
, VISORNIC_INFINITE_RSP_WAIT
);
689 * visornic_close - Disables the visornic device and stops the queues
690 * @netdev: netdevice to start
692 * Disable the device and stop the transmit queue.
693 * Return 0 for success
696 visornic_close(struct net_device
*netdev
)
698 visornic_disable_with_timeout(netdev
, VISORNIC_INFINITE_RSP_WAIT
);
704 * devdata_xmits_outstanding - compute outstanding xmits
705 * @devdata: visornic_devdata for device
707 * Return value is the number of outstanding xmits.
709 static unsigned long devdata_xmits_outstanding(struct visornic_devdata
*devdata
)
711 if (devdata
->chstat
.sent_xmit
>= devdata
->chstat
.got_xmit_done
)
712 return devdata
->chstat
.sent_xmit
-
713 devdata
->chstat
.got_xmit_done
;
714 return (ULONG_MAX
- devdata
->chstat
.got_xmit_done
715 + devdata
->chstat
.sent_xmit
+ 1);
719 * vnic_hit_high_watermark
720 * @devdata: indicates visornic device we are checking
721 * @high_watermark: max num of unacked xmits we will tolerate,
722 * before we will start throttling
724 * Returns true iff the number of unacked xmits sent to
725 * the IO partition is >= high_watermark.
727 static inline bool vnic_hit_high_watermark(struct visornic_devdata
*devdata
,
728 ulong high_watermark
)
730 return (devdata_xmits_outstanding(devdata
) >= high_watermark
);
734 * vnic_hit_low_watermark
735 * @devdata: indicates visornic device we are checking
736 * @low_watermark: we will wait until the num of unacked xmits
737 * drops to this value or lower before we start
740 * Returns true iff the number of unacked xmits sent to
741 * the IO partition is <= low_watermark.
743 static inline bool vnic_hit_low_watermark(struct visornic_devdata
*devdata
,
746 return (devdata_xmits_outstanding(devdata
) <= low_watermark
);
750 * visornic_xmit - send a packet to the IO Partition
751 * @skb: Packet to be sent
752 * @netdev: net device the packet is being sent from
754 * Convert the skb to a cmdrsp so the IO Partition can undersand it.
755 * Send the XMIT command to the IO Partition for processing. This
756 * function is protected from concurrent calls by a spinlock xmit_lock
757 * in the net_device struct, but as soon as the function returns it
758 * can be called again.
759 * Returns NETDEV_TX_OK.
762 visornic_xmit(struct sk_buff
*skb
, struct net_device
*netdev
)
764 struct visornic_devdata
*devdata
;
765 int len
, firstfraglen
, padlen
;
766 struct uiscmdrsp
*cmdrsp
= NULL
;
769 devdata
= netdev_priv(netdev
);
770 spin_lock_irqsave(&devdata
->priv_lock
, flags
);
772 if (netif_queue_stopped(netdev
) || devdata
->server_down
||
773 devdata
->server_change_state
) {
774 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
776 dev_dbg(&netdev
->dev
,
777 "%s busy - queue stopped\n", __func__
);
782 /* sk_buff struct is used to host network data throughout all the
783 * linux network subsystems
787 /* skb->len is the FULL length of data (including fragmentary portion)
788 * skb->data_len is the length of the fragment portion in frags
789 * skb->len - skb->data_len is size of the 1st fragment in skb->data
790 * calculate the length of the first fragment that skb->data is
793 firstfraglen
= skb
->len
- skb
->data_len
;
794 if (firstfraglen
< ETH_HLEN
) {
795 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
797 dev_err(&netdev
->dev
,
798 "%s busy - first frag too small (%d)\n",
799 __func__
, firstfraglen
);
804 if ((len
< ETH_MIN_PACKET_SIZE
) &&
805 ((skb_end_pointer(skb
) - skb
->data
) >= ETH_MIN_PACKET_SIZE
)) {
806 /* pad the packet out to minimum size */
807 padlen
= ETH_MIN_PACKET_SIZE
- len
;
808 memset(&skb
->data
[len
], 0, padlen
);
812 firstfraglen
+= padlen
;
815 cmdrsp
= devdata
->xmit_cmdrsp
;
817 memset(cmdrsp
, 0, SIZEOF_CMDRSP
);
818 cmdrsp
->net
.type
= NET_XMIT
;
819 cmdrsp
->cmdtype
= CMD_NET_TYPE
;
821 /* save the pointer to skb -- we'll need it for completion */
822 cmdrsp
->net
.buf
= skb
;
824 if (vnic_hit_high_watermark(devdata
,
825 devdata
->max_outstanding_net_xmits
)) {
826 /* extra NET_XMITs queued over to IOVM - need to wait */
827 devdata
->chstat
.reject_count
++;
828 if (!devdata
->queuefullmsg_logged
&&
829 ((devdata
->chstat
.reject_count
& 0x3ff) == 1))
830 devdata
->queuefullmsg_logged
= 1;
831 netif_stop_queue(netdev
);
832 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
834 dev_dbg(&netdev
->dev
,
835 "%s busy - waiting for iovm to catch up\n",
840 if (devdata
->queuefullmsg_logged
)
841 devdata
->queuefullmsg_logged
= 0;
843 if (skb
->ip_summed
== CHECKSUM_UNNECESSARY
) {
844 cmdrsp
->net
.xmt
.lincsum
.valid
= 1;
845 cmdrsp
->net
.xmt
.lincsum
.protocol
= skb
->protocol
;
846 if (skb_transport_header(skb
) > skb
->data
) {
847 cmdrsp
->net
.xmt
.lincsum
.hrawoff
=
848 skb_transport_header(skb
) - skb
->data
;
849 cmdrsp
->net
.xmt
.lincsum
.hrawoff
= 1;
851 if (skb_network_header(skb
) > skb
->data
) {
852 cmdrsp
->net
.xmt
.lincsum
.nhrawoff
=
853 skb_network_header(skb
) - skb
->data
;
854 cmdrsp
->net
.xmt
.lincsum
.nhrawoffv
= 1;
856 cmdrsp
->net
.xmt
.lincsum
.csum
= skb
->csum
;
858 cmdrsp
->net
.xmt
.lincsum
.valid
= 0;
861 /* save off the length of the entire data packet */
862 cmdrsp
->net
.xmt
.len
= len
;
864 /* copy ethernet header from first frag into ocmdrsp
865 * - everything else will be pass in frags & DMA'ed
867 memcpy(cmdrsp
->net
.xmt
.ethhdr
, skb
->data
, ETH_HLEN
);
868 /* copy frags info - from skb->data we need to only provide access
871 cmdrsp
->net
.xmt
.num_frags
=
872 visor_copy_fragsinfo_from_skb(skb
, firstfraglen
,
874 cmdrsp
->net
.xmt
.frags
);
875 if (cmdrsp
->net
.xmt
.num_frags
< 0) {
876 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
878 dev_err(&netdev
->dev
,
879 "%s busy - copy frags failed\n", __func__
);
884 if (visorchannel_signalinsert(devdata
->dev
->visorchannel
,
885 IOCHAN_TO_IOPART
, cmdrsp
)) {
886 netif_stop_queue(netdev
);
887 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
889 dev_dbg(&netdev
->dev
,
890 "%s busy - signalinsert failed\n", __func__
);
895 /* Track the skbs that have been sent to the IOVM for XMIT */
896 skb_queue_head(&devdata
->xmitbufhead
, skb
);
898 /* update xmt stats */
899 devdata
->net_stats
.tx_packets
++;
900 devdata
->net_stats
.tx_bytes
+= skb
->len
;
901 devdata
->chstat
.sent_xmit
++;
903 /* check if we have hit the high watermark for netif_stop_queue() */
904 if (vnic_hit_high_watermark(devdata
,
905 devdata
->upper_threshold_net_xmits
)) {
906 /* extra NET_XMITs queued over to IOVM - need to wait */
907 /* stop queue - call netif_wake_queue() after lower threshold */
908 netif_stop_queue(netdev
);
909 dev_dbg(&netdev
->dev
,
910 "%s busy - invoking iovm flow control\n",
912 devdata
->flow_control_upper_hits
++;
914 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
916 /* skb will be freed when we get back NET_XMIT_DONE */
921 * visornic_get_stats - returns net_stats of the visornic device
924 * Returns the net_device_stats for the device
926 static struct net_device_stats
*
927 visornic_get_stats(struct net_device
*netdev
)
929 struct visornic_devdata
*devdata
= netdev_priv(netdev
);
931 return &devdata
->net_stats
;
935 * visornic_change_mtu - changes mtu of device.
937 * @new_mtu: value of new mtu
939 * MTU cannot be changed by system, must be changed via
940 * CONTROLVM message. All vnics and pnics in a switch have
941 * to have the same MTU for everything to work.
942 * Currently not supported.
946 visornic_change_mtu(struct net_device
*netdev
, int new_mtu
)
952 * visornic_set_multi - changes mtu of device.
955 * Only flag we support currently is IFF_PROMISC
959 visornic_set_multi(struct net_device
*netdev
)
961 struct uiscmdrsp
*cmdrsp
;
962 struct visornic_devdata
*devdata
= netdev_priv(netdev
);
964 if (devdata
->old_flags
== netdev
->flags
)
967 if ((netdev
->flags
& IFF_PROMISC
) ==
968 (devdata
->old_flags
& IFF_PROMISC
))
971 cmdrsp
= kmalloc(SIZEOF_CMDRSP
, GFP_ATOMIC
);
974 cmdrsp
->cmdtype
= CMD_NET_TYPE
;
975 cmdrsp
->net
.type
= NET_RCV_PROMISC
;
976 cmdrsp
->net
.enbdis
.context
= netdev
;
977 cmdrsp
->net
.enbdis
.enable
=
978 netdev
->flags
& IFF_PROMISC
;
979 visorchannel_signalinsert(devdata
->dev
->visorchannel
,
985 devdata
->old_flags
= netdev
->flags
;
989 * visornic_xmit_timeout - request to timeout the xmit
992 * Queue the work and return. Make sure we have not already
993 * been informed the IO Partition is gone, if it is gone
994 * we will already timeout the xmits.
997 visornic_xmit_timeout(struct net_device
*netdev
)
999 struct visornic_devdata
*devdata
= netdev_priv(netdev
);
1000 unsigned long flags
;
1002 spin_lock_irqsave(&devdata
->priv_lock
, flags
);
1003 if (devdata
->going_away
) {
1004 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
1005 dev_dbg(&devdata
->dev
->device
,
1006 "%s aborting because device removal pending\n",
1011 /* Ensure that a ServerDown message hasn't been received */
1012 if (!devdata
->enabled
||
1013 (devdata
->server_down
&& !devdata
->server_change_state
)) {
1014 dev_dbg(&netdev
->dev
, "%s no processing\n",
1016 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
1019 schedule_work(&devdata
->timeout_reset
);
1020 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
1024 * repost_return - repost rcv bufs that have come back
1025 * @cmdrsp: io channel command struct to post
1026 * @devdata: visornic devdata for the device
1028 * @netdev: netdevice
1030 * Repost rcv buffers that have been returned to us when
1031 * we are finished with them.
1032 * Returns 0 for success, -1 for error.
1035 repost_return(struct uiscmdrsp
*cmdrsp
, struct visornic_devdata
*devdata
,
1036 struct sk_buff
*skb
, struct net_device
*netdev
)
1038 struct net_pkt_rcv copy
;
1039 int i
= 0, cc
, numreposted
;
1043 copy
= cmdrsp
->net
.rcv
;
1044 switch (copy
.numrcvbufs
) {
1058 for (cc
= 0, numreposted
= 0; cc
< copy
.numrcvbufs
; cc
++) {
1059 for (i
= 0; i
< devdata
->num_rcv_bufs
; i
++) {
1060 if (devdata
->rcvbuf
[i
] != copy
.rcvbuf
[cc
])
1063 if ((skb
) && devdata
->rcvbuf
[i
] == skb
) {
1064 devdata
->found_repost_rcvbuf_cnt
++;
1066 devdata
->repost_found_skb_cnt
++;
1068 devdata
->rcvbuf
[i
] = alloc_rcv_buf(netdev
);
1069 if (!devdata
->rcvbuf
[i
]) {
1070 devdata
->num_rcv_bufs_could_not_alloc
++;
1071 devdata
->alloc_failed_in_repost_rtn_cnt
++;
1075 post_skb(cmdrsp
, devdata
, devdata
->rcvbuf
[i
]);
1080 if (numreposted
!= copy
.numrcvbufs
) {
1081 devdata
->n_repost_deficit
++;
1089 devdata
->bad_rcv_buf
++;
1096 * visornic_rx - Handle receive packets coming back from IO Part
1097 * @cmdrsp: Receive packet returned from IO Part
1099 * Got a receive packet back from the IO Part, handle it and send
1101 * Returns 1 iff an skb was receieved, otherwise 0
1104 visornic_rx(struct uiscmdrsp
*cmdrsp
)
1106 struct visornic_devdata
*devdata
;
1107 struct sk_buff
*skb
, *prev
, *curr
;
1108 struct net_device
*netdev
;
1109 int cc
, currsize
, off
;
1111 unsigned long flags
;
1113 /* post new rcv buf to the other end using the cmdrsp we have at hand
1114 * post it without holding lock - but we'll use the signal lock to
1115 * synchronize the queue insert the cmdrsp that contains the net.rcv
1116 * is the one we are using to repost, so copy the info we need from it.
1118 skb
= cmdrsp
->net
.buf
;
1121 devdata
= netdev_priv(netdev
);
1123 spin_lock_irqsave(&devdata
->priv_lock
, flags
);
1124 atomic_dec(&devdata
->num_rcvbuf_in_iovm
);
1126 /* set length to how much was ACTUALLY received -
1127 * NOTE: rcv_done_len includes actual length of data rcvd
1130 skb
->len
= cmdrsp
->net
.rcv
.rcv_done_len
;
1132 /* update rcv stats - call it with priv_lock held */
1133 devdata
->net_stats
.rx_packets
++;
1134 devdata
->net_stats
.rx_bytes
+= skb
->len
;
1136 /* test enabled while holding lock */
1137 if (!(devdata
->enabled
&& devdata
->enab_dis_acked
)) {
1138 /* don't process it unless we're in enable mode and until
1139 * we've gotten an ACK saying the other end got our RCV enable
1141 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
1142 repost_return(cmdrsp
, devdata
, skb
, netdev
);
1146 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
1148 /* when skb was allocated, skb->dev, skb->data, skb->len and
1149 * skb->data_len were setup. AND, data has already put into the
1150 * skb (both first frag and in frags pages)
1151 * NOTE: firstfragslen is the amount of data in skb->data and that
1152 * which is not in nr_frags or frag_list. This is now simply
1153 * RCVPOST_BUF_SIZE. bump tail to show how much data is in
1154 * firstfrag & set data_len to show rest see if we have to chain
1157 if (skb
->len
> RCVPOST_BUF_SIZE
) { /* do PRECAUTIONARY check */
1158 if (cmdrsp
->net
.rcv
.numrcvbufs
< 2) {
1159 if (repost_return(cmdrsp
, devdata
, skb
, netdev
) < 0)
1160 dev_err(&devdata
->netdev
->dev
,
1161 "repost_return failed");
1164 /* length rcvd is greater than firstfrag in this skb rcv buf */
1165 skb
->tail
+= RCVPOST_BUF_SIZE
; /* amount in skb->data */
1166 skb
->data_len
= skb
->len
- RCVPOST_BUF_SIZE
; /* amount that
1171 /* data fits in this skb - no chaining - do
1172 * PRECAUTIONARY check
1174 if (cmdrsp
->net
.rcv
.numrcvbufs
!= 1) { /* should be 1 */
1175 if (repost_return(cmdrsp
, devdata
, skb
, netdev
) < 0)
1176 dev_err(&devdata
->netdev
->dev
,
1177 "repost_return failed");
1180 skb
->tail
+= skb
->len
;
1181 skb
->data_len
= 0; /* nothing rcvd in frag_list */
1183 off
= skb_tail_pointer(skb
) - skb
->data
;
1185 /* amount we bumped tail by in the head skb
1186 * it is used to calculate the size of each chained skb below
1187 * it is also used to index into bufline to continue the copy
1188 * (for chansocktwopc)
1189 * if necessary chain the rcv skbs together.
1190 * NOTE: index 0 has the same as cmdrsp->net.rcv.skb; we need to
1191 * chain the rest to that one.
1192 * - do PRECAUTIONARY check
1194 if (cmdrsp
->net
.rcv
.rcvbuf
[0] != skb
) {
1195 if (repost_return(cmdrsp
, devdata
, skb
, netdev
) < 0)
1196 dev_err(&devdata
->netdev
->dev
, "repost_return failed");
1200 if (cmdrsp
->net
.rcv
.numrcvbufs
> 1) {
1201 /* chain the various rcv buffers into the skb's frag_list. */
1202 /* Note: off was initialized above */
1203 for (cc
= 1, prev
= NULL
;
1204 cc
< cmdrsp
->net
.rcv
.numrcvbufs
; cc
++) {
1205 curr
= (struct sk_buff
*)cmdrsp
->net
.rcv
.rcvbuf
[cc
];
1207 if (!prev
) /* start of list- set head */
1208 skb_shinfo(skb
)->frag_list
= curr
;
1213 /* should we set skb->len and skb->data_len for each
1214 * buffer being chained??? can't hurt!
1216 currsize
= min(skb
->len
- off
,
1217 (unsigned int)RCVPOST_BUF_SIZE
);
1218 curr
->len
= currsize
;
1219 curr
->tail
+= currsize
;
1223 /* assert skb->len == off */
1224 if (skb
->len
!= off
) {
1225 netdev_err(devdata
->netdev
,
1226 "something wrong; skb->len:%d != off:%d\n",
1231 /* set up packet's protocl type using ethernet header - this
1232 * sets up skb->pkt_type & it also PULLS out the eth header
1234 skb
->protocol
= eth_type_trans(skb
, netdev
);
1239 skb
->ip_summed
= CHECKSUM_NONE
;
1242 if (netdev
->flags
& IFF_PROMISC
)
1243 break; /* accept all packets */
1244 if (skb
->pkt_type
== PACKET_BROADCAST
) {
1245 if (netdev
->flags
& IFF_BROADCAST
)
1246 break; /* accept all broadcast packets */
1247 } else if (skb
->pkt_type
== PACKET_MULTICAST
) {
1248 if ((netdev
->flags
& IFF_MULTICAST
) &&
1249 (netdev_mc_count(netdev
))) {
1250 struct netdev_hw_addr
*ha
;
1253 /* only accept multicast packets that we can
1254 * find in our multicast address list
1256 netdev_for_each_mc_addr(ha
, netdev
) {
1257 if (ether_addr_equal(eth
->h_dest
,
1263 /* accept pkt, dest matches a multicast addr */
1267 /* accept packet, h_dest must match vnic mac address */
1268 } else if (skb
->pkt_type
== PACKET_HOST
) {
1270 } else if (skb
->pkt_type
== PACKET_OTHERHOST
) {
1271 /* something is not right */
1272 dev_err(&devdata
->netdev
->dev
,
1273 "**** FAILED to deliver rcv packet to OS; name:%s Dest:%pM VNIC:%pM\n",
1274 netdev
->name
, eth
->h_dest
, netdev
->dev_addr
);
1276 /* drop packet - don't forward it up to OS */
1277 devdata
->n_rcv_packets_not_accepted
++;
1278 repost_return(cmdrsp
, devdata
, skb
, netdev
);
1282 netif_receive_skb(skb
);
1283 /* netif_rx returns various values, but "in practice most drivers
1284 * ignore the return value
1289 * whether the packet got dropped or handled, the skb is freed by
1290 * kernel code, so we shouldn't free it. but we should repost a
1293 repost_return(cmdrsp
, devdata
, skb
, netdev
);
1298 * devdata_initialize - Initialize devdata structure
1299 * @devdata: visornic_devdata structure to initialize
1300 * #dev: visorbus_deviced it belongs to
1302 * Setup initial values for the visornic based on channel and default
1304 * Returns a pointer to the devdata structure
1306 static struct visornic_devdata
*
1307 devdata_initialize(struct visornic_devdata
*devdata
, struct visor_device
*dev
)
1310 devdata
->incarnation_id
= get_jiffies_64();
1315 * devdata_release - Frees up references in devdata
1316 * @devdata: struct to clean up
1318 * Frees up references in devdata.
1321 static void devdata_release(struct visornic_devdata
*devdata
)
1323 kfree(devdata
->rcvbuf
);
1324 kfree(devdata
->cmdrsp_rcv
);
1325 kfree(devdata
->xmit_cmdrsp
);
1328 static const struct net_device_ops visornic_dev_ops
= {
1329 .ndo_open
= visornic_open
,
1330 .ndo_stop
= visornic_close
,
1331 .ndo_start_xmit
= visornic_xmit
,
1332 .ndo_get_stats
= visornic_get_stats
,
1333 .ndo_change_mtu
= visornic_change_mtu
,
1334 .ndo_tx_timeout
= visornic_xmit_timeout
,
1335 .ndo_set_rx_mode
= visornic_set_multi
,
1339 static ssize_t
info_debugfs_read(struct file
*file
, char __user
*buf
,
1340 size_t len
, loff_t
*offset
)
1342 ssize_t bytes_read
= 0;
1344 struct visornic_devdata
*devdata
;
1345 struct net_device
*dev
;
1350 vbuf
= kzalloc(len
, GFP_KERNEL
);
1354 /* for each vnic channel dump out channel specific data */
1356 for_each_netdev_rcu(current
->nsproxy
->net_ns
, dev
) {
1357 /* Only consider netdevs that are visornic, and are open */
1358 if ((dev
->netdev_ops
!= &visornic_dev_ops
) ||
1359 (!netif_queue_stopped(dev
)))
1362 devdata
= netdev_priv(dev
);
1363 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1364 "netdev = %s (0x%p), MAC Addr %pM\n",
1368 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1369 "VisorNic Dev Info = 0x%p\n", devdata
);
1370 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1371 " num_rcv_bufs = %d\n",
1372 devdata
->num_rcv_bufs
);
1373 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1374 " max_oustanding_next_xmits = %lu\n",
1375 devdata
->max_outstanding_net_xmits
);
1376 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1377 " upper_threshold_net_xmits = %lu\n",
1378 devdata
->upper_threshold_net_xmits
);
1379 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1380 " lower_threshold_net_xmits = %lu\n",
1381 devdata
->lower_threshold_net_xmits
);
1382 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1383 " queuefullmsg_logged = %d\n",
1384 devdata
->queuefullmsg_logged
);
1385 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1386 " chstat.got_rcv = %lu\n",
1387 devdata
->chstat
.got_rcv
);
1388 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1389 " chstat.got_enbdisack = %lu\n",
1390 devdata
->chstat
.got_enbdisack
);
1391 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1392 " chstat.got_xmit_done = %lu\n",
1393 devdata
->chstat
.got_xmit_done
);
1394 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1395 " chstat.xmit_fail = %lu\n",
1396 devdata
->chstat
.xmit_fail
);
1397 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1398 " chstat.sent_enbdis = %lu\n",
1399 devdata
->chstat
.sent_enbdis
);
1400 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1401 " chstat.sent_promisc = %lu\n",
1402 devdata
->chstat
.sent_promisc
);
1403 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1404 " chstat.sent_post = %lu\n",
1405 devdata
->chstat
.sent_post
);
1406 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1407 " chstat.sent_post_failed = %lu\n",
1408 devdata
->chstat
.sent_post_failed
);
1409 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1410 " chstat.sent_xmit = %lu\n",
1411 devdata
->chstat
.sent_xmit
);
1412 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1413 " chstat.reject_count = %lu\n",
1414 devdata
->chstat
.reject_count
);
1415 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1416 " chstat.extra_rcvbufs_sent = %lu\n",
1417 devdata
->chstat
.extra_rcvbufs_sent
);
1418 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1419 " n_rcv0 = %lu\n", devdata
->n_rcv0
);
1420 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1421 " n_rcv1 = %lu\n", devdata
->n_rcv1
);
1422 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1423 " n_rcv2 = %lu\n", devdata
->n_rcv2
);
1424 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1425 " n_rcvx = %lu\n", devdata
->n_rcvx
);
1426 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1427 " num_rcvbuf_in_iovm = %d\n",
1428 atomic_read(&devdata
->num_rcvbuf_in_iovm
));
1429 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1430 " alloc_failed_in_if_needed_cnt = %lu\n",
1431 devdata
->alloc_failed_in_if_needed_cnt
);
1432 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1433 " alloc_failed_in_repost_rtn_cnt = %lu\n",
1434 devdata
->alloc_failed_in_repost_rtn_cnt
);
1435 /* str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1436 * " inner_loop_limit_reached_cnt = %lu\n",
1437 * devdata->inner_loop_limit_reached_cnt);
1439 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1440 " found_repost_rcvbuf_cnt = %lu\n",
1441 devdata
->found_repost_rcvbuf_cnt
);
1442 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1443 " repost_found_skb_cnt = %lu\n",
1444 devdata
->repost_found_skb_cnt
);
1445 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1446 " n_repost_deficit = %lu\n",
1447 devdata
->n_repost_deficit
);
1448 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1449 " bad_rcv_buf = %lu\n",
1450 devdata
->bad_rcv_buf
);
1451 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1452 " n_rcv_packets_not_accepted = %lu\n",
1453 devdata
->n_rcv_packets_not_accepted
);
1454 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1455 " interrupts_rcvd = %llu\n",
1456 devdata
->interrupts_rcvd
);
1457 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1458 " interrupts_notme = %llu\n",
1459 devdata
->interrupts_notme
);
1460 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1461 " interrupts_disabled = %llu\n",
1462 devdata
->interrupts_disabled
);
1463 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1464 " busy_cnt = %llu\n",
1466 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1467 " flow_control_upper_hits = %llu\n",
1468 devdata
->flow_control_upper_hits
);
1469 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1470 " flow_control_lower_hits = %llu\n",
1471 devdata
->flow_control_lower_hits
);
1472 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1473 " netif_queue = %s\n",
1474 netif_queue_stopped(devdata
->netdev
) ?
1475 "stopped" : "running");
1476 str_pos
+= scnprintf(vbuf
+ str_pos
, len
- str_pos
,
1477 " xmits_outstanding = %lu\n",
1478 devdata_xmits_outstanding(devdata
));
1481 bytes_read
= simple_read_from_buffer(buf
, len
, offset
, vbuf
, str_pos
);
1486 static struct dentry
*visornic_debugfs_dir
;
1487 static const struct file_operations debugfs_info_fops
= {
1488 .read
= info_debugfs_read
,
1492 * send_rcv_posts_if_needed
1493 * @devdata: visornic device
1495 * Send receive buffers to the IO Partition.
1499 send_rcv_posts_if_needed(struct visornic_devdata
*devdata
)
1502 struct net_device
*netdev
;
1503 struct uiscmdrsp
*cmdrsp
= devdata
->cmdrsp_rcv
;
1504 int cur_num_rcv_bufs_to_alloc
, rcv_bufs_allocated
;
1506 /* don't do this until vnic is marked ready */
1507 if (!(devdata
->enabled
&& devdata
->enab_dis_acked
))
1510 netdev
= devdata
->netdev
;
1511 rcv_bufs_allocated
= 0;
1512 /* this code is trying to prevent getting stuck here forever,
1513 * but still retry it if you cant allocate them all this time.
1515 cur_num_rcv_bufs_to_alloc
= devdata
->num_rcv_bufs_could_not_alloc
;
1516 while (cur_num_rcv_bufs_to_alloc
> 0) {
1517 cur_num_rcv_bufs_to_alloc
--;
1518 for (i
= 0; i
< devdata
->num_rcv_bufs
; i
++) {
1519 if (devdata
->rcvbuf
[i
])
1521 devdata
->rcvbuf
[i
] = alloc_rcv_buf(netdev
);
1522 if (!devdata
->rcvbuf
[i
]) {
1523 devdata
->alloc_failed_in_if_needed_cnt
++;
1526 rcv_bufs_allocated
++;
1527 post_skb(cmdrsp
, devdata
, devdata
->rcvbuf
[i
]);
1528 devdata
->chstat
.extra_rcvbufs_sent
++;
1531 devdata
->num_rcv_bufs_could_not_alloc
-= rcv_bufs_allocated
;
1535 * drain_resp_queue - drains and ignores all messages from the resp queue
1536 * @cmdrsp: io channel command response message
1537 * @devdata: visornic device to drain
1540 drain_resp_queue(struct uiscmdrsp
*cmdrsp
, struct visornic_devdata
*devdata
)
1542 while (!visorchannel_signalremove(devdata
->dev
->visorchannel
,
1549 * service_resp_queue - drains the response queue
1550 * @cmdrsp: io channel command response message
1551 * @devdata: visornic device to drain
1553 * Drain the respones queue of any responses from the IO partition.
1554 * Process the responses as we get them.
1555 * Returns when response queue is empty or when the thread stops.
1558 service_resp_queue(struct uiscmdrsp
*cmdrsp
, struct visornic_devdata
*devdata
,
1559 int *rx_work_done
, int budget
)
1561 unsigned long flags
;
1562 struct net_device
*netdev
;
1564 while (*rx_work_done
< budget
) {
1565 /* TODO: CLIENT ACQUIRE -- Don't really need this at the
1568 if (visorchannel_signalremove(devdata
->dev
->visorchannel
,
1571 break; /* queue empty */
1573 switch (cmdrsp
->net
.type
) {
1575 devdata
->chstat
.got_rcv
++;
1576 /* process incoming packet */
1577 *rx_work_done
+= visornic_rx(cmdrsp
);
1580 spin_lock_irqsave(&devdata
->priv_lock
, flags
);
1581 devdata
->chstat
.got_xmit_done
++;
1582 if (cmdrsp
->net
.xmtdone
.xmt_done_result
)
1583 devdata
->chstat
.xmit_fail
++;
1584 /* only call queue wake if we stopped it */
1585 netdev
= ((struct sk_buff
*)cmdrsp
->net
.buf
)->dev
;
1586 /* ASSERT netdev == vnicinfo->netdev; */
1587 if ((netdev
== devdata
->netdev
) &&
1588 netif_queue_stopped(netdev
)) {
1589 /* check if we have crossed the lower watermark
1590 * for netif_wake_queue()
1592 if (vnic_hit_low_watermark
1594 devdata
->lower_threshold_net_xmits
)) {
1595 /* enough NET_XMITs completed
1596 * so can restart netif queue
1598 netif_wake_queue(netdev
);
1599 devdata
->flow_control_lower_hits
++;
1602 skb_unlink(cmdrsp
->net
.buf
, &devdata
->xmitbufhead
);
1603 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
1604 kfree_skb(cmdrsp
->net
.buf
);
1606 case NET_RCV_ENBDIS_ACK
:
1607 devdata
->chstat
.got_enbdisack
++;
1608 netdev
= (struct net_device
*)
1609 cmdrsp
->net
.enbdis
.context
;
1610 spin_lock_irqsave(&devdata
->priv_lock
, flags
);
1611 devdata
->enab_dis_acked
= 1;
1612 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
1614 if (devdata
->server_down
&&
1615 devdata
->server_change_state
) {
1616 /* Inform Linux that the link is up */
1617 devdata
->server_down
= false;
1618 devdata
->server_change_state
= false;
1619 netif_wake_queue(netdev
);
1620 netif_carrier_on(netdev
);
1623 case NET_CONNECT_STATUS
:
1624 netdev
= devdata
->netdev
;
1625 if (cmdrsp
->net
.enbdis
.enable
== 1) {
1626 spin_lock_irqsave(&devdata
->priv_lock
, flags
);
1627 devdata
->enabled
= cmdrsp
->net
.enbdis
.enable
;
1628 spin_unlock_irqrestore(&devdata
->priv_lock
,
1630 netif_wake_queue(netdev
);
1631 netif_carrier_on(netdev
);
1633 netif_stop_queue(netdev
);
1634 netif_carrier_off(netdev
);
1635 spin_lock_irqsave(&devdata
->priv_lock
, flags
);
1636 devdata
->enabled
= cmdrsp
->net
.enbdis
.enable
;
1637 spin_unlock_irqrestore(&devdata
->priv_lock
,
1644 /* cmdrsp is now available for reuse */
1648 static int visornic_poll(struct napi_struct
*napi
, int budget
)
1650 struct visornic_devdata
*devdata
= container_of(napi
,
1651 struct visornic_devdata
,
1655 send_rcv_posts_if_needed(devdata
);
1656 service_resp_queue(devdata
->cmdrsp
, devdata
, &rx_count
, budget
);
1658 /* If there aren't any more packets to receive stop the poll */
1659 if (rx_count
< budget
)
1660 napi_complete(napi
);
1666 * poll_for_irq - Checks the status of the response queue.
1667 * @v: void pointer to the visronic devdata
1669 * Main function of the vnic_incoming thread. Peridocially check the
1670 * response queue and drain it if needed.
1671 * Returns when thread has stopped.
1674 poll_for_irq(unsigned long v
)
1676 struct visornic_devdata
*devdata
= (struct visornic_devdata
*)v
;
1678 if (!visorchannel_signalempty(
1679 devdata
->dev
->visorchannel
,
1680 IOCHAN_FROM_IOPART
))
1681 napi_schedule(&devdata
->napi
);
1683 atomic_set(&devdata
->interrupt_rcvd
, 0);
1685 mod_timer(&devdata
->irq_poll_timer
, msecs_to_jiffies(2));
1689 * visornic_probe - probe function for visornic devices
1690 * @dev: The visor device discovered
1692 * Called when visorbus discovers a visornic device on its
1693 * bus. It creates a new visornic ethernet adapter.
1694 * Returns 0 or negative for error.
1696 static int visornic_probe(struct visor_device
*dev
)
1698 struct visornic_devdata
*devdata
= NULL
;
1699 struct net_device
*netdev
= NULL
;
1701 int channel_offset
= 0;
1704 netdev
= alloc_etherdev(sizeof(struct visornic_devdata
));
1706 dev_err(&dev
->device
,
1707 "%s alloc_etherdev failed\n", __func__
);
1711 netdev
->netdev_ops
= &visornic_dev_ops
;
1712 netdev
->watchdog_timeo
= 5 * HZ
;
1713 SET_NETDEV_DEV(netdev
, &dev
->device
);
1715 /* Get MAC adddress from channel and read it into the device. */
1716 netdev
->addr_len
= ETH_ALEN
;
1717 channel_offset
= offsetof(struct spar_io_channel_protocol
,
1719 err
= visorbus_read_channel(dev
, channel_offset
, netdev
->dev_addr
,
1722 dev_err(&dev
->device
,
1723 "%s failed to get mac addr from chan (%d)\n",
1725 goto cleanup_netdev
;
1728 devdata
= devdata_initialize(netdev_priv(netdev
), dev
);
1730 dev_err(&dev
->device
,
1731 "%s devdata_initialize failed\n", __func__
);
1733 goto cleanup_netdev
;
1735 /* don't trust messages laying around in the channel */
1736 drain_resp_queue(devdata
->cmdrsp
, devdata
);
1738 devdata
->netdev
= netdev
;
1739 dev_set_drvdata(&dev
->device
, devdata
);
1740 init_waitqueue_head(&devdata
->rsp_queue
);
1741 spin_lock_init(&devdata
->priv_lock
);
1742 devdata
->enabled
= 0; /* not yet */
1743 atomic_set(&devdata
->usage
, 1);
1745 /* Setup rcv bufs */
1746 channel_offset
= offsetof(struct spar_io_channel_protocol
,
1748 err
= visorbus_read_channel(dev
, channel_offset
,
1749 &devdata
->num_rcv_bufs
, 4);
1751 dev_err(&dev
->device
,
1752 "%s failed to get #rcv bufs from chan (%d)\n",
1754 goto cleanup_netdev
;
1757 devdata
->rcvbuf
= kcalloc(devdata
->num_rcv_bufs
,
1758 sizeof(struct sk_buff
*), GFP_KERNEL
);
1759 if (!devdata
->rcvbuf
) {
1761 goto cleanup_netdev
;
1764 /* set the net_xmit outstanding threshold */
1765 /* always leave two slots open but you should have 3 at a minimum */
1766 /* note that max_outstanding_net_xmits must be > 0 */
1767 devdata
->max_outstanding_net_xmits
=
1768 max_t(unsigned long, 3, ((devdata
->num_rcv_bufs
/ 3) - 2));
1769 devdata
->upper_threshold_net_xmits
=
1770 max_t(unsigned long,
1771 2, (devdata
->max_outstanding_net_xmits
- 1));
1772 devdata
->lower_threshold_net_xmits
=
1773 max_t(unsigned long,
1774 1, (devdata
->max_outstanding_net_xmits
/ 2));
1776 skb_queue_head_init(&devdata
->xmitbufhead
);
1778 /* create a cmdrsp we can use to post and unpost rcv buffers */
1779 devdata
->cmdrsp_rcv
= kmalloc(SIZEOF_CMDRSP
, GFP_ATOMIC
);
1780 if (!devdata
->cmdrsp_rcv
) {
1782 goto cleanup_rcvbuf
;
1784 devdata
->xmit_cmdrsp
= kmalloc(SIZEOF_CMDRSP
, GFP_ATOMIC
);
1785 if (!devdata
->xmit_cmdrsp
) {
1787 goto cleanup_cmdrsp_rcv
;
1789 INIT_WORK(&devdata
->timeout_reset
, visornic_timeout_reset
);
1790 devdata
->server_down
= false;
1791 devdata
->server_change_state
= false;
1793 /*set the default mtu */
1794 channel_offset
= offsetof(struct spar_io_channel_protocol
,
1796 err
= visorbus_read_channel(dev
, channel_offset
, &netdev
->mtu
, 4);
1798 dev_err(&dev
->device
,
1799 "%s failed to get mtu from chan (%d)\n",
1801 goto cleanup_xmit_cmdrsp
;
1804 /* TODO: Setup Interrupt information */
1805 /* Let's start our threads to get responses */
1806 netif_napi_add(netdev
, &devdata
->napi
, visornic_poll
, 64);
1808 setup_timer(&devdata
->irq_poll_timer
, poll_for_irq
,
1809 (unsigned long)devdata
);
1810 /* Note: This time has to start running before the while
1811 * loop below because the napi routine is responsible for
1812 * setting enab_dis_acked
1814 mod_timer(&devdata
->irq_poll_timer
, msecs_to_jiffies(2));
1816 channel_offset
= offsetof(struct spar_io_channel_protocol
,
1817 channel_header
.features
);
1818 err
= visorbus_read_channel(dev
, channel_offset
, &features
, 8);
1820 dev_err(&dev
->device
,
1821 "%s failed to get features from chan (%d)\n",
1823 goto cleanup_napi_add
;
1826 features
|= ULTRA_IO_CHANNEL_IS_POLLING
;
1827 features
|= ULTRA_IO_DRIVER_SUPPORTS_ENHANCED_RCVBUF_CHECKING
;
1828 err
= visorbus_write_channel(dev
, channel_offset
, &features
, 8);
1830 dev_err(&dev
->device
,
1831 "%s failed to set features in chan (%d)\n",
1833 goto cleanup_napi_add
;
1836 /* Let's start our threads to get responses */
1837 netif_napi_add(netdev
, &devdata
->napi
, visornic_poll
, NAPI_WEIGHT
);
1839 /* Note: Interupts have to be enable before the while
1840 * loop below because the napi routine is responsible for
1841 * setting enab_dis_acked
1843 visorbus_enable_channel_interrupts(dev
);
1845 err
= register_netdev(netdev
);
1847 dev_err(&dev
->device
,
1848 "%s register_netdev failed (%d)\n", __func__
, err
);
1849 goto cleanup_napi_add
;
1852 /* create debgug/sysfs directories */
1853 devdata
->eth_debugfs_dir
= debugfs_create_dir(netdev
->name
,
1854 visornic_debugfs_dir
);
1855 if (!devdata
->eth_debugfs_dir
) {
1856 dev_err(&dev
->device
,
1857 "%s debugfs_create_dir %s failed\n",
1858 __func__
, netdev
->name
);
1860 goto cleanup_register_netdev
;
1863 dev_info(&dev
->device
, "%s success netdev=%s\n",
1864 __func__
, netdev
->name
);
1867 cleanup_register_netdev
:
1868 unregister_netdev(netdev
);
1871 del_timer_sync(&devdata
->irq_poll_timer
);
1872 netif_napi_del(&devdata
->napi
);
1874 cleanup_xmit_cmdrsp
:
1875 kfree(devdata
->xmit_cmdrsp
);
1878 kfree(devdata
->cmdrsp_rcv
);
1881 kfree(devdata
->rcvbuf
);
1884 free_netdev(netdev
);
1889 * host_side_disappeared - IO part is gone.
1890 * @devdata: device object
1892 * IO partition servicing this device is gone, do cleanup
1895 static void host_side_disappeared(struct visornic_devdata
*devdata
)
1897 unsigned long flags
;
1899 spin_lock_irqsave(&devdata
->priv_lock
, flags
);
1900 devdata
->dev
= NULL
; /* indicate device destroyed */
1901 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
1905 * visornic_remove - Called when visornic dev goes away
1906 * @dev: visornic device that is being removed
1908 * Called when DEVICE_DESTROY gets called to remove device.
1911 static void visornic_remove(struct visor_device
*dev
)
1913 struct visornic_devdata
*devdata
= dev_get_drvdata(&dev
->device
);
1914 struct net_device
*netdev
;
1915 unsigned long flags
;
1918 dev_err(&dev
->device
, "%s no devdata\n", __func__
);
1921 spin_lock_irqsave(&devdata
->priv_lock
, flags
);
1922 if (devdata
->going_away
) {
1923 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
1924 dev_err(&dev
->device
, "%s already being removed\n", __func__
);
1927 devdata
->going_away
= true;
1928 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
1929 netdev
= devdata
->netdev
;
1931 dev_err(&dev
->device
, "%s not net device\n", __func__
);
1935 /* going_away prevents new items being added to the workqueues */
1936 cancel_work_sync(&devdata
->timeout_reset
);
1938 debugfs_remove_recursive(devdata
->eth_debugfs_dir
);
1940 unregister_netdev(netdev
); /* this will call visornic_close() */
1942 del_timer_sync(&devdata
->irq_poll_timer
);
1943 netif_napi_del(&devdata
->napi
);
1945 dev_set_drvdata(&dev
->device
, NULL
);
1946 host_side_disappeared(devdata
);
1947 devdata_release(devdata
);
1948 free_netdev(netdev
);
1952 * visornic_pause - Called when IO Part disappears
1953 * @dev: visornic device that is being serviced
1954 * @complete_func: call when finished.
1956 * Called when the IO Partition has gone down. Need to free
1957 * up resources and wait for IO partition to come back. Mark
1958 * link as down and don't attempt any DMA. When we have freed
1959 * memory call the complete_func so that Command knows we are
1960 * done. If we don't call complete_func, IO part will never
1962 * Returns 0 for success.
1964 static int visornic_pause(struct visor_device
*dev
,
1965 visorbus_state_complete_func complete_func
)
1967 struct visornic_devdata
*devdata
= dev_get_drvdata(&dev
->device
);
1969 visornic_serverdown(devdata
, complete_func
);
1974 * visornic_resume - Called when IO part has recovered
1975 * @dev: visornic device that is being serviced
1976 * @compelte_func: call when finished
1978 * Called when the IO partition has recovered. Reestablish
1979 * connection to the IO part and set the link up. Okay to do
1981 * Returns 0 for success.
1983 static int visornic_resume(struct visor_device
*dev
,
1984 visorbus_state_complete_func complete_func
)
1986 struct visornic_devdata
*devdata
;
1987 struct net_device
*netdev
;
1988 unsigned long flags
;
1990 devdata
= dev_get_drvdata(&dev
->device
);
1992 dev_err(&dev
->device
, "%s no devdata\n", __func__
);
1996 netdev
= devdata
->netdev
;
1998 spin_lock_irqsave(&devdata
->priv_lock
, flags
);
1999 if (devdata
->server_change_state
) {
2000 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
2001 dev_err(&dev
->device
, "%s server already changing state\n",
2005 if (!devdata
->server_down
) {
2006 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
2007 dev_err(&dev
->device
, "%s server not down\n", __func__
);
2008 complete_func(dev
, 0);
2011 devdata
->server_change_state
= true;
2012 spin_unlock_irqrestore(&devdata
->priv_lock
, flags
);
2014 /* Must transition channel to ATTACHED state BEFORE
2015 * we can start using the device again.
2016 * TODO: State transitions
2018 mod_timer(&devdata
->irq_poll_timer
, msecs_to_jiffies(2));
2020 init_rcv_bufs(netdev
, devdata
);
2026 complete_func(dev
, 0);
2030 /* This is used to tell the visor bus driver which types of visor devices
2031 * we support, and what functions to call when a visor device that we support
2032 * is attached or removed.
2034 static struct visor_driver visornic_driver
= {
2036 .owner
= THIS_MODULE
,
2037 .channel_types
= visornic_channel_types
,
2038 .probe
= visornic_probe
,
2039 .remove
= visornic_remove
,
2040 .pause
= visornic_pause
,
2041 .resume
= visornic_resume
,
2042 .channel_interrupt
= NULL
,
2046 * visornic_init - Init function
2048 * Init function for the visornic driver. Do initial driver setup
2049 * and wait for devices.
2050 * Returns 0 for success, negative for error.
2052 static int visornic_init(void)
2057 visornic_debugfs_dir
= debugfs_create_dir("visornic", NULL
);
2058 if (!visornic_debugfs_dir
)
2061 ret
= debugfs_create_file("info", S_IRUSR
, visornic_debugfs_dir
, NULL
,
2062 &debugfs_info_fops
);
2064 goto cleanup_debugfs
;
2065 ret
= debugfs_create_file("enable_ints", S_IWUSR
, visornic_debugfs_dir
,
2066 NULL
, &debugfs_enable_ints_fops
);
2068 goto cleanup_debugfs
;
2070 err
= visorbus_register_visor_driver(&visornic_driver
);
2072 goto cleanup_debugfs
;
2077 debugfs_remove_recursive(visornic_debugfs_dir
);
2083 * visornic_cleanup - driver exit routine
2085 * Unregister driver from the bus and free up memory.
2087 static void visornic_cleanup(void)
2089 visorbus_unregister_visor_driver(&visornic_driver
);
2091 debugfs_remove_recursive(visornic_debugfs_dir
);
2094 module_init(visornic_init
);
2095 module_exit(visornic_cleanup
);
2097 MODULE_AUTHOR("Unisys");
2098 MODULE_LICENSE("GPL");
2099 MODULE_DESCRIPTION("s-Par NIC driver for virtual network devices");