packet: deliver VLAN TCI to userspace
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / packet / af_packet.c
blobdb792e02a37feec8b65d9c61976cf69f49c3b140
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * PACKET - implements raw packet sockets.
8 * Authors: Ross Biro
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
12 * Fixes:
13 * Alan Cox : verify_area() now used correctly
14 * Alan Cox : new skbuff lists, look ma no backlogs!
15 * Alan Cox : tidied skbuff lists.
16 * Alan Cox : Now uses generic datagram routines I
17 * added. Also fixed the peek/read crash
18 * from all old Linux datagram code.
19 * Alan Cox : Uses the improved datagram code.
20 * Alan Cox : Added NULL's for socket options.
21 * Alan Cox : Re-commented the code.
22 * Alan Cox : Use new kernel side addressing
23 * Rob Janssen : Correct MTU usage.
24 * Dave Platt : Counter leaks caused by incorrect
25 * interrupt locking and some slightly
26 * dubious gcc output. Can you read
27 * compiler: it said _VOLATILE_
28 * Richard Kooijman : Timestamp fixes.
29 * Alan Cox : New buffers. Use sk->mac.raw.
30 * Alan Cox : sendmsg/recvmsg support.
31 * Alan Cox : Protocol setting support
32 * Alexey Kuznetsov : Untied from IPv4 stack.
33 * Cyrus Durgin : Fixed kerneld for kmod.
34 * Michal Ostrowski : Module initialization cleanup.
35 * Ulises Alonso : Frame number limit removal and
36 * packet_set_ring memory leak.
37 * Eric Biederman : Allow for > 8 byte hardware addresses.
38 * The convention is that longer addresses
39 * will simply extend the hardware address
40 * byte arrays at the end of sockaddr_ll
41 * and packet_mreq.
43 * This program is free software; you can redistribute it and/or
44 * modify it under the terms of the GNU General Public License
45 * as published by the Free Software Foundation; either version
46 * 2 of the License, or (at your option) any later version.
50 #include <linux/types.h>
51 #include <linux/mm.h>
52 #include <linux/capability.h>
53 #include <linux/fcntl.h>
54 #include <linux/socket.h>
55 #include <linux/in.h>
56 #include <linux/inet.h>
57 #include <linux/netdevice.h>
58 #include <linux/if_packet.h>
59 #include <linux/wireless.h>
60 #include <linux/kernel.h>
61 #include <linux/kmod.h>
62 #include <net/net_namespace.h>
63 #include <net/ip.h>
64 #include <net/protocol.h>
65 #include <linux/skbuff.h>
66 #include <net/sock.h>
67 #include <linux/errno.h>
68 #include <linux/timer.h>
69 #include <asm/system.h>
70 #include <asm/uaccess.h>
71 #include <asm/ioctls.h>
72 #include <asm/page.h>
73 #include <asm/cacheflush.h>
74 #include <asm/io.h>
75 #include <linux/proc_fs.h>
76 #include <linux/seq_file.h>
77 #include <linux/poll.h>
78 #include <linux/module.h>
79 #include <linux/init.h>
81 #ifdef CONFIG_INET
82 #include <net/inet_common.h>
83 #endif
86 Assumptions:
87 - if device has no dev->hard_header routine, it adds and removes ll header
88 inside itself. In this case ll header is invisible outside of device,
89 but higher levels still should reserve dev->hard_header_len.
90 Some devices are enough clever to reallocate skb, when header
91 will not fit to reserved space (tunnel), another ones are silly
92 (PPP).
93 - packet socket receives packets with pulled ll header,
94 so that SOCK_RAW should push it back.
96 On receive:
97 -----------
99 Incoming, dev->hard_header!=NULL
100 mac_header -> ll header
101 data -> data
103 Outgoing, dev->hard_header!=NULL
104 mac_header -> ll header
105 data -> ll header
107 Incoming, dev->hard_header==NULL
108 mac_header -> UNKNOWN position. It is very likely, that it points to ll
109 header. PPP makes it, that is wrong, because introduce
110 assymetry between rx and tx paths.
111 data -> data
113 Outgoing, dev->hard_header==NULL
114 mac_header -> data. ll header is still not built!
115 data -> data
117 Resume
118 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
121 On transmit:
122 ------------
124 dev->hard_header != NULL
125 mac_header -> ll header
126 data -> ll header
128 dev->hard_header == NULL (ll header is added by device, we cannot control it)
129 mac_header -> data
130 data -> data
132 We should set nh.raw on output to correct posistion,
133 packet classifier depends on it.
136 /* Private packet socket structures. */
138 struct packet_mclist
140 struct packet_mclist *next;
141 int ifindex;
142 int count;
143 unsigned short type;
144 unsigned short alen;
145 unsigned char addr[MAX_ADDR_LEN];
147 /* identical to struct packet_mreq except it has
148 * a longer address field.
150 struct packet_mreq_max
152 int mr_ifindex;
153 unsigned short mr_type;
154 unsigned short mr_alen;
155 unsigned char mr_address[MAX_ADDR_LEN];
158 #ifdef CONFIG_PACKET_MMAP
159 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
160 #endif
162 static void packet_flush_mclist(struct sock *sk);
164 struct packet_sock {
165 /* struct sock has to be the first member of packet_sock */
166 struct sock sk;
167 struct tpacket_stats stats;
168 #ifdef CONFIG_PACKET_MMAP
169 char * *pg_vec;
170 unsigned int head;
171 unsigned int frames_per_block;
172 unsigned int frame_size;
173 unsigned int frame_max;
174 int copy_thresh;
175 #endif
176 struct packet_type prot_hook;
177 spinlock_t bind_lock;
178 unsigned int running:1, /* prot_hook is attached*/
179 auxdata:1,
180 origdev:1;
181 int ifindex; /* bound device */
182 __be16 num;
183 struct packet_mclist *mclist;
184 #ifdef CONFIG_PACKET_MMAP
185 atomic_t mapped;
186 unsigned int pg_vec_order;
187 unsigned int pg_vec_pages;
188 unsigned int pg_vec_len;
189 enum tpacket_versions tp_version;
190 unsigned int tp_hdrlen;
191 #endif
194 struct packet_skb_cb {
195 unsigned int origlen;
196 union {
197 struct sockaddr_pkt pkt;
198 struct sockaddr_ll ll;
199 } sa;
202 #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
204 #ifdef CONFIG_PACKET_MMAP
206 static void *packet_lookup_frame(struct packet_sock *po, unsigned int position,
207 int status)
209 unsigned int pg_vec_pos, frame_offset;
210 union {
211 struct tpacket_hdr *h1;
212 struct tpacket2_hdr *h2;
213 void *raw;
214 } h;
216 pg_vec_pos = position / po->frames_per_block;
217 frame_offset = position % po->frames_per_block;
219 h.raw = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size);
220 switch (po->tp_version) {
221 case TPACKET_V1:
222 if (status != h.h1->tp_status ? TP_STATUS_USER :
223 TP_STATUS_KERNEL)
224 return NULL;
225 break;
226 case TPACKET_V2:
227 if (status != h.h2->tp_status ? TP_STATUS_USER :
228 TP_STATUS_KERNEL)
229 return NULL;
230 break;
232 return h.raw;
235 static void __packet_set_status(struct packet_sock *po, void *frame, int status)
237 union {
238 struct tpacket_hdr *h1;
239 struct tpacket2_hdr *h2;
240 void *raw;
241 } h;
243 h.raw = frame;
244 switch (po->tp_version) {
245 case TPACKET_V1:
246 h.h1->tp_status = status;
247 break;
248 case TPACKET_V2:
249 h.h2->tp_status = status;
250 break;
253 #endif
255 static inline struct packet_sock *pkt_sk(struct sock *sk)
257 return (struct packet_sock *)sk;
260 static void packet_sock_destruct(struct sock *sk)
262 BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
263 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
265 if (!sock_flag(sk, SOCK_DEAD)) {
266 printk("Attempt to release alive packet socket: %p\n", sk);
267 return;
270 sk_refcnt_debug_dec(sk);
274 static const struct proto_ops packet_ops;
276 static const struct proto_ops packet_ops_spkt;
278 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
280 struct sock *sk;
281 struct sockaddr_pkt *spkt;
284 * When we registered the protocol we saved the socket in the data
285 * field for just this event.
288 sk = pt->af_packet_priv;
291 * Yank back the headers [hope the device set this
292 * right or kerboom...]
294 * Incoming packets have ll header pulled,
295 * push it back.
297 * For outgoing ones skb->data == skb_mac_header(skb)
298 * so that this procedure is noop.
301 if (skb->pkt_type == PACKET_LOOPBACK)
302 goto out;
304 if (dev_net(dev) != sock_net(sk))
305 goto out;
307 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
308 goto oom;
310 /* drop any routing info */
311 dst_release(skb->dst);
312 skb->dst = NULL;
314 /* drop conntrack reference */
315 nf_reset(skb);
317 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
319 skb_push(skb, skb->data - skb_mac_header(skb));
322 * The SOCK_PACKET socket receives _all_ frames.
325 spkt->spkt_family = dev->type;
326 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
327 spkt->spkt_protocol = skb->protocol;
330 * Charge the memory to the socket. This is done specifically
331 * to prevent sockets using all the memory up.
334 if (sock_queue_rcv_skb(sk,skb) == 0)
335 return 0;
337 out:
338 kfree_skb(skb);
339 oom:
340 return 0;
345 * Output a raw packet to a device layer. This bypasses all the other
346 * protocol layers and you must therefore supply it with a complete frame
349 static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
350 struct msghdr *msg, size_t len)
352 struct sock *sk = sock->sk;
353 struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
354 struct sk_buff *skb;
355 struct net_device *dev;
356 __be16 proto=0;
357 int err;
360 * Get and verify the address.
363 if (saddr)
365 if (msg->msg_namelen < sizeof(struct sockaddr))
366 return(-EINVAL);
367 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
368 proto=saddr->spkt_protocol;
370 else
371 return(-ENOTCONN); /* SOCK_PACKET must be sent giving an address */
374 * Find the device first to size check it
377 saddr->spkt_device[13] = 0;
378 dev = dev_get_by_name(sock_net(sk), saddr->spkt_device);
379 err = -ENODEV;
380 if (dev == NULL)
381 goto out_unlock;
383 err = -ENETDOWN;
384 if (!(dev->flags & IFF_UP))
385 goto out_unlock;
388 * You may not queue a frame bigger than the mtu. This is the lowest level
389 * raw protocol and you must do your own fragmentation at this level.
392 err = -EMSGSIZE;
393 if (len > dev->mtu + dev->hard_header_len)
394 goto out_unlock;
396 err = -ENOBUFS;
397 skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
400 * If the write buffer is full, then tough. At this level the user gets to
401 * deal with the problem - do your own algorithmic backoffs. That's far
402 * more flexible.
405 if (skb == NULL)
406 goto out_unlock;
409 * Fill it in
412 /* FIXME: Save some space for broken drivers that write a
413 * hard header at transmission time by themselves. PPP is the
414 * notable one here. This should really be fixed at the driver level.
416 skb_reserve(skb, LL_RESERVED_SPACE(dev));
417 skb_reset_network_header(skb);
419 /* Try to align data part correctly */
420 if (dev->header_ops) {
421 skb->data -= dev->hard_header_len;
422 skb->tail -= dev->hard_header_len;
423 if (len < dev->hard_header_len)
424 skb_reset_network_header(skb);
427 /* Returns -EFAULT on error */
428 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
429 skb->protocol = proto;
430 skb->dev = dev;
431 skb->priority = sk->sk_priority;
432 if (err)
433 goto out_free;
436 * Now send it
439 dev_queue_xmit(skb);
440 dev_put(dev);
441 return(len);
443 out_free:
444 kfree_skb(skb);
445 out_unlock:
446 if (dev)
447 dev_put(dev);
448 return err;
451 static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
452 unsigned int res)
454 struct sk_filter *filter;
456 rcu_read_lock_bh();
457 filter = rcu_dereference(sk->sk_filter);
458 if (filter != NULL)
459 res = sk_run_filter(skb, filter->insns, filter->len);
460 rcu_read_unlock_bh();
462 return res;
466 This function makes lazy skb cloning in hope that most of packets
467 are discarded by BPF.
469 Note tricky part: we DO mangle shared skb! skb->data, skb->len
470 and skb->cb are mangled. It works because (and until) packets
471 falling here are owned by current CPU. Output packets are cloned
472 by dev_queue_xmit_nit(), input packets are processed by net_bh
473 sequencially, so that if we return skb to original state on exit,
474 we will not harm anyone.
477 static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
479 struct sock *sk;
480 struct sockaddr_ll *sll;
481 struct packet_sock *po;
482 u8 * skb_head = skb->data;
483 int skb_len = skb->len;
484 unsigned int snaplen, res;
486 if (skb->pkt_type == PACKET_LOOPBACK)
487 goto drop;
489 sk = pt->af_packet_priv;
490 po = pkt_sk(sk);
492 if (dev_net(dev) != sock_net(sk))
493 goto drop;
495 skb->dev = dev;
497 if (dev->header_ops) {
498 /* The device has an explicit notion of ll header,
499 exported to higher levels.
501 Otherwise, the device hides datails of it frame
502 structure, so that corresponding packet head
503 never delivered to user.
505 if (sk->sk_type != SOCK_DGRAM)
506 skb_push(skb, skb->data - skb_mac_header(skb));
507 else if (skb->pkt_type == PACKET_OUTGOING) {
508 /* Special case: outgoing packets have ll header at head */
509 skb_pull(skb, skb_network_offset(skb));
513 snaplen = skb->len;
515 res = run_filter(skb, sk, snaplen);
516 if (!res)
517 goto drop_n_restore;
518 if (snaplen > res)
519 snaplen = res;
521 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
522 (unsigned)sk->sk_rcvbuf)
523 goto drop_n_acct;
525 if (skb_shared(skb)) {
526 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
527 if (nskb == NULL)
528 goto drop_n_acct;
530 if (skb_head != skb->data) {
531 skb->data = skb_head;
532 skb->len = skb_len;
534 kfree_skb(skb);
535 skb = nskb;
538 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
539 sizeof(skb->cb));
541 sll = &PACKET_SKB_CB(skb)->sa.ll;
542 sll->sll_family = AF_PACKET;
543 sll->sll_hatype = dev->type;
544 sll->sll_protocol = skb->protocol;
545 sll->sll_pkttype = skb->pkt_type;
546 if (unlikely(po->origdev))
547 sll->sll_ifindex = orig_dev->ifindex;
548 else
549 sll->sll_ifindex = dev->ifindex;
551 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
553 PACKET_SKB_CB(skb)->origlen = skb->len;
555 if (pskb_trim(skb, snaplen))
556 goto drop_n_acct;
558 skb_set_owner_r(skb, sk);
559 skb->dev = NULL;
560 dst_release(skb->dst);
561 skb->dst = NULL;
563 /* drop conntrack reference */
564 nf_reset(skb);
566 spin_lock(&sk->sk_receive_queue.lock);
567 po->stats.tp_packets++;
568 __skb_queue_tail(&sk->sk_receive_queue, skb);
569 spin_unlock(&sk->sk_receive_queue.lock);
570 sk->sk_data_ready(sk, skb->len);
571 return 0;
573 drop_n_acct:
574 spin_lock(&sk->sk_receive_queue.lock);
575 po->stats.tp_drops++;
576 spin_unlock(&sk->sk_receive_queue.lock);
578 drop_n_restore:
579 if (skb_head != skb->data && skb_shared(skb)) {
580 skb->data = skb_head;
581 skb->len = skb_len;
583 drop:
584 kfree_skb(skb);
585 return 0;
588 #ifdef CONFIG_PACKET_MMAP
589 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
591 struct sock *sk;
592 struct packet_sock *po;
593 struct sockaddr_ll *sll;
594 union {
595 struct tpacket_hdr *h1;
596 struct tpacket2_hdr *h2;
597 void *raw;
598 } h;
599 u8 * skb_head = skb->data;
600 int skb_len = skb->len;
601 unsigned int snaplen, res;
602 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
603 unsigned short macoff, netoff, hdrlen;
604 struct sk_buff *copy_skb = NULL;
605 struct timeval tv;
606 struct timespec ts;
608 if (skb->pkt_type == PACKET_LOOPBACK)
609 goto drop;
611 sk = pt->af_packet_priv;
612 po = pkt_sk(sk);
614 if (dev_net(dev) != sock_net(sk))
615 goto drop;
617 if (dev->header_ops) {
618 if (sk->sk_type != SOCK_DGRAM)
619 skb_push(skb, skb->data - skb_mac_header(skb));
620 else if (skb->pkt_type == PACKET_OUTGOING) {
621 /* Special case: outgoing packets have ll header at head */
622 skb_pull(skb, skb_network_offset(skb));
626 if (skb->ip_summed == CHECKSUM_PARTIAL)
627 status |= TP_STATUS_CSUMNOTREADY;
629 snaplen = skb->len;
631 res = run_filter(skb, sk, snaplen);
632 if (!res)
633 goto drop_n_restore;
634 if (snaplen > res)
635 snaplen = res;
637 if (sk->sk_type == SOCK_DGRAM) {
638 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16;
639 } else {
640 unsigned maclen = skb_network_offset(skb);
641 netoff = TPACKET_ALIGN(po->tp_hdrlen +
642 (maclen < 16 ? 16 : maclen));
643 macoff = netoff - maclen;
646 if (macoff + snaplen > po->frame_size) {
647 if (po->copy_thresh &&
648 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
649 (unsigned)sk->sk_rcvbuf) {
650 if (skb_shared(skb)) {
651 copy_skb = skb_clone(skb, GFP_ATOMIC);
652 } else {
653 copy_skb = skb_get(skb);
654 skb_head = skb->data;
656 if (copy_skb)
657 skb_set_owner_r(copy_skb, sk);
659 snaplen = po->frame_size - macoff;
660 if ((int)snaplen < 0)
661 snaplen = 0;
664 spin_lock(&sk->sk_receive_queue.lock);
665 h.raw = packet_lookup_frame(po, po->head, TP_STATUS_KERNEL);
666 if (!h.raw)
667 goto ring_is_full;
668 po->head = po->head != po->frame_max ? po->head+1 : 0;
669 po->stats.tp_packets++;
670 if (copy_skb) {
671 status |= TP_STATUS_COPY;
672 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
674 if (!po->stats.tp_drops)
675 status &= ~TP_STATUS_LOSING;
676 spin_unlock(&sk->sk_receive_queue.lock);
678 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
680 switch (po->tp_version) {
681 case TPACKET_V1:
682 h.h1->tp_len = skb->len;
683 h.h1->tp_snaplen = snaplen;
684 h.h1->tp_mac = macoff;
685 h.h1->tp_net = netoff;
686 if (skb->tstamp.tv64)
687 tv = ktime_to_timeval(skb->tstamp);
688 else
689 do_gettimeofday(&tv);
690 h.h1->tp_sec = tv.tv_sec;
691 h.h1->tp_usec = tv.tv_usec;
692 hdrlen = sizeof(*h.h1);
693 break;
694 case TPACKET_V2:
695 h.h2->tp_len = skb->len;
696 h.h2->tp_snaplen = snaplen;
697 h.h2->tp_mac = macoff;
698 h.h2->tp_net = netoff;
699 if (skb->tstamp.tv64)
700 ts = ktime_to_timespec(skb->tstamp);
701 else
702 getnstimeofday(&ts);
703 h.h2->tp_sec = ts.tv_sec;
704 h.h2->tp_nsec = ts.tv_nsec;
705 h.h2->tp_vlan_tci = skb->vlan_tci;
706 hdrlen = sizeof(*h.h2);
707 break;
708 default:
709 BUG();
712 sll = h.raw + TPACKET_ALIGN(hdrlen);
713 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
714 sll->sll_family = AF_PACKET;
715 sll->sll_hatype = dev->type;
716 sll->sll_protocol = skb->protocol;
717 sll->sll_pkttype = skb->pkt_type;
718 if (unlikely(po->origdev))
719 sll->sll_ifindex = orig_dev->ifindex;
720 else
721 sll->sll_ifindex = dev->ifindex;
723 __packet_set_status(po, h.raw, status);
724 smp_mb();
727 struct page *p_start, *p_end;
728 u8 *h_end = h.raw + macoff + snaplen - 1;
730 p_start = virt_to_page(h.raw);
731 p_end = virt_to_page(h_end);
732 while (p_start <= p_end) {
733 flush_dcache_page(p_start);
734 p_start++;
738 sk->sk_data_ready(sk, 0);
740 drop_n_restore:
741 if (skb_head != skb->data && skb_shared(skb)) {
742 skb->data = skb_head;
743 skb->len = skb_len;
745 drop:
746 kfree_skb(skb);
747 return 0;
749 ring_is_full:
750 po->stats.tp_drops++;
751 spin_unlock(&sk->sk_receive_queue.lock);
753 sk->sk_data_ready(sk, 0);
754 if (copy_skb)
755 kfree_skb(copy_skb);
756 goto drop_n_restore;
759 #endif
762 static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
763 struct msghdr *msg, size_t len)
765 struct sock *sk = sock->sk;
766 struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
767 struct sk_buff *skb;
768 struct net_device *dev;
769 __be16 proto;
770 unsigned char *addr;
771 int ifindex, err, reserve = 0;
774 * Get and verify the address.
777 if (saddr == NULL) {
778 struct packet_sock *po = pkt_sk(sk);
780 ifindex = po->ifindex;
781 proto = po->num;
782 addr = NULL;
783 } else {
784 err = -EINVAL;
785 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
786 goto out;
787 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
788 goto out;
789 ifindex = saddr->sll_ifindex;
790 proto = saddr->sll_protocol;
791 addr = saddr->sll_addr;
795 dev = dev_get_by_index(sock_net(sk), ifindex);
796 err = -ENXIO;
797 if (dev == NULL)
798 goto out_unlock;
799 if (sock->type == SOCK_RAW)
800 reserve = dev->hard_header_len;
802 err = -ENETDOWN;
803 if (!(dev->flags & IFF_UP))
804 goto out_unlock;
806 err = -EMSGSIZE;
807 if (len > dev->mtu+reserve)
808 goto out_unlock;
810 skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev),
811 msg->msg_flags & MSG_DONTWAIT, &err);
812 if (skb==NULL)
813 goto out_unlock;
815 skb_reserve(skb, LL_RESERVED_SPACE(dev));
816 skb_reset_network_header(skb);
818 err = -EINVAL;
819 if (sock->type == SOCK_DGRAM &&
820 dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0)
821 goto out_free;
823 /* Returns -EFAULT on error */
824 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
825 if (err)
826 goto out_free;
828 skb->protocol = proto;
829 skb->dev = dev;
830 skb->priority = sk->sk_priority;
833 * Now send it
836 err = dev_queue_xmit(skb);
837 if (err > 0 && (err = net_xmit_errno(err)) != 0)
838 goto out_unlock;
840 dev_put(dev);
842 return(len);
844 out_free:
845 kfree_skb(skb);
846 out_unlock:
847 if (dev)
848 dev_put(dev);
849 out:
850 return err;
854 * Close a PACKET socket. This is fairly simple. We immediately go
855 * to 'closed' state and remove our protocol entry in the device list.
858 static int packet_release(struct socket *sock)
860 struct sock *sk = sock->sk;
861 struct packet_sock *po;
862 struct net *net;
864 if (!sk)
865 return 0;
867 net = sock_net(sk);
868 po = pkt_sk(sk);
870 write_lock_bh(&net->packet.sklist_lock);
871 sk_del_node_init(sk);
872 write_unlock_bh(&net->packet.sklist_lock);
875 * Unhook packet receive handler.
878 if (po->running) {
880 * Remove the protocol hook
882 dev_remove_pack(&po->prot_hook);
883 po->running = 0;
884 po->num = 0;
885 __sock_put(sk);
888 packet_flush_mclist(sk);
890 #ifdef CONFIG_PACKET_MMAP
891 if (po->pg_vec) {
892 struct tpacket_req req;
893 memset(&req, 0, sizeof(req));
894 packet_set_ring(sk, &req, 1);
896 #endif
899 * Now the socket is dead. No more input will appear.
902 sock_orphan(sk);
903 sock->sk = NULL;
905 /* Purge queues */
907 skb_queue_purge(&sk->sk_receive_queue);
908 sk_refcnt_debug_release(sk);
910 sock_put(sk);
911 return 0;
915 * Attach a packet hook.
918 static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
920 struct packet_sock *po = pkt_sk(sk);
922 * Detach an existing hook if present.
925 lock_sock(sk);
927 spin_lock(&po->bind_lock);
928 if (po->running) {
929 __sock_put(sk);
930 po->running = 0;
931 po->num = 0;
932 spin_unlock(&po->bind_lock);
933 dev_remove_pack(&po->prot_hook);
934 spin_lock(&po->bind_lock);
937 po->num = protocol;
938 po->prot_hook.type = protocol;
939 po->prot_hook.dev = dev;
941 po->ifindex = dev ? dev->ifindex : 0;
943 if (protocol == 0)
944 goto out_unlock;
946 if (!dev || (dev->flags & IFF_UP)) {
947 dev_add_pack(&po->prot_hook);
948 sock_hold(sk);
949 po->running = 1;
950 } else {
951 sk->sk_err = ENETDOWN;
952 if (!sock_flag(sk, SOCK_DEAD))
953 sk->sk_error_report(sk);
956 out_unlock:
957 spin_unlock(&po->bind_lock);
958 release_sock(sk);
959 return 0;
963 * Bind a packet socket to a device
966 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
968 struct sock *sk=sock->sk;
969 char name[15];
970 struct net_device *dev;
971 int err = -ENODEV;
974 * Check legality
977 if (addr_len != sizeof(struct sockaddr))
978 return -EINVAL;
979 strlcpy(name,uaddr->sa_data,sizeof(name));
981 dev = dev_get_by_name(sock_net(sk), name);
982 if (dev) {
983 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
984 dev_put(dev);
986 return err;
989 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
991 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
992 struct sock *sk=sock->sk;
993 struct net_device *dev = NULL;
994 int err;
998 * Check legality
1001 if (addr_len < sizeof(struct sockaddr_ll))
1002 return -EINVAL;
1003 if (sll->sll_family != AF_PACKET)
1004 return -EINVAL;
1006 if (sll->sll_ifindex) {
1007 err = -ENODEV;
1008 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
1009 if (dev == NULL)
1010 goto out;
1012 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1013 if (dev)
1014 dev_put(dev);
1016 out:
1017 return err;
1020 static struct proto packet_proto = {
1021 .name = "PACKET",
1022 .owner = THIS_MODULE,
1023 .obj_size = sizeof(struct packet_sock),
1027 * Create a packet of type SOCK_PACKET.
1030 static int packet_create(struct net *net, struct socket *sock, int protocol)
1032 struct sock *sk;
1033 struct packet_sock *po;
1034 __be16 proto = (__force __be16)protocol; /* weird, but documented */
1035 int err;
1037 if (!capable(CAP_NET_RAW))
1038 return -EPERM;
1039 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1040 sock->type != SOCK_PACKET)
1041 return -ESOCKTNOSUPPORT;
1043 sock->state = SS_UNCONNECTED;
1045 err = -ENOBUFS;
1046 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
1047 if (sk == NULL)
1048 goto out;
1050 sock->ops = &packet_ops;
1051 if (sock->type == SOCK_PACKET)
1052 sock->ops = &packet_ops_spkt;
1054 sock_init_data(sock, sk);
1056 po = pkt_sk(sk);
1057 sk->sk_family = PF_PACKET;
1058 po->num = proto;
1060 sk->sk_destruct = packet_sock_destruct;
1061 sk_refcnt_debug_inc(sk);
1064 * Attach a protocol block
1067 spin_lock_init(&po->bind_lock);
1068 po->prot_hook.func = packet_rcv;
1070 if (sock->type == SOCK_PACKET)
1071 po->prot_hook.func = packet_rcv_spkt;
1073 po->prot_hook.af_packet_priv = sk;
1075 if (proto) {
1076 po->prot_hook.type = proto;
1077 dev_add_pack(&po->prot_hook);
1078 sock_hold(sk);
1079 po->running = 1;
1082 write_lock_bh(&net->packet.sklist_lock);
1083 sk_add_node(sk, &net->packet.sklist);
1084 write_unlock_bh(&net->packet.sklist_lock);
1085 return(0);
1086 out:
1087 return err;
1091 * Pull a packet from our receive queue and hand it to the user.
1092 * If necessary we block.
1095 static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1096 struct msghdr *msg, size_t len, int flags)
1098 struct sock *sk = sock->sk;
1099 struct sk_buff *skb;
1100 int copied, err;
1101 struct sockaddr_ll *sll;
1103 err = -EINVAL;
1104 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1105 goto out;
1107 #if 0
1108 /* What error should we return now? EUNATTACH? */
1109 if (pkt_sk(sk)->ifindex < 0)
1110 return -ENODEV;
1111 #endif
1114 * Call the generic datagram receiver. This handles all sorts
1115 * of horrible races and re-entrancy so we can forget about it
1116 * in the protocol layers.
1118 * Now it will return ENETDOWN, if device have just gone down,
1119 * but then it will block.
1122 skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1125 * An error occurred so return it. Because skb_recv_datagram()
1126 * handles the blocking we don't see and worry about blocking
1127 * retries.
1130 if (skb == NULL)
1131 goto out;
1134 * If the address length field is there to be filled in, we fill
1135 * it in now.
1138 sll = &PACKET_SKB_CB(skb)->sa.ll;
1139 if (sock->type == SOCK_PACKET)
1140 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1141 else
1142 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1145 * You lose any data beyond the buffer you gave. If it worries a
1146 * user program they can ask the device for its MTU anyway.
1149 copied = skb->len;
1150 if (copied > len)
1152 copied=len;
1153 msg->msg_flags|=MSG_TRUNC;
1156 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1157 if (err)
1158 goto out_free;
1160 sock_recv_timestamp(msg, sk, skb);
1162 if (msg->msg_name)
1163 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1164 msg->msg_namelen);
1166 if (pkt_sk(sk)->auxdata) {
1167 struct tpacket_auxdata aux;
1169 aux.tp_status = TP_STATUS_USER;
1170 if (skb->ip_summed == CHECKSUM_PARTIAL)
1171 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1172 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1173 aux.tp_snaplen = skb->len;
1174 aux.tp_mac = 0;
1175 aux.tp_net = skb_network_offset(skb);
1176 aux.tp_vlan_tci = skb->vlan_tci;
1178 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
1182 * Free or return the buffer as appropriate. Again this
1183 * hides all the races and re-entrancy issues from us.
1185 err = (flags&MSG_TRUNC) ? skb->len : copied;
1187 out_free:
1188 skb_free_datagram(sk, skb);
1189 out:
1190 return err;
1193 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1194 int *uaddr_len, int peer)
1196 struct net_device *dev;
1197 struct sock *sk = sock->sk;
1199 if (peer)
1200 return -EOPNOTSUPP;
1202 uaddr->sa_family = AF_PACKET;
1203 dev = dev_get_by_index(sock_net(sk), pkt_sk(sk)->ifindex);
1204 if (dev) {
1205 strlcpy(uaddr->sa_data, dev->name, 15);
1206 dev_put(dev);
1207 } else
1208 memset(uaddr->sa_data, 0, 14);
1209 *uaddr_len = sizeof(*uaddr);
1211 return 0;
1214 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1215 int *uaddr_len, int peer)
1217 struct net_device *dev;
1218 struct sock *sk = sock->sk;
1219 struct packet_sock *po = pkt_sk(sk);
1220 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1222 if (peer)
1223 return -EOPNOTSUPP;
1225 sll->sll_family = AF_PACKET;
1226 sll->sll_ifindex = po->ifindex;
1227 sll->sll_protocol = po->num;
1228 dev = dev_get_by_index(sock_net(sk), po->ifindex);
1229 if (dev) {
1230 sll->sll_hatype = dev->type;
1231 sll->sll_halen = dev->addr_len;
1232 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1233 dev_put(dev);
1234 } else {
1235 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1236 sll->sll_halen = 0;
1238 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1240 return 0;
1243 static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1244 int what)
1246 switch (i->type) {
1247 case PACKET_MR_MULTICAST:
1248 if (what > 0)
1249 dev_mc_add(dev, i->addr, i->alen, 0);
1250 else
1251 dev_mc_delete(dev, i->addr, i->alen, 0);
1252 break;
1253 case PACKET_MR_PROMISC:
1254 return dev_set_promiscuity(dev, what);
1255 break;
1256 case PACKET_MR_ALLMULTI:
1257 return dev_set_allmulti(dev, what);
1258 break;
1259 default:;
1261 return 0;
1264 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1266 for ( ; i; i=i->next) {
1267 if (i->ifindex == dev->ifindex)
1268 packet_dev_mc(dev, i, what);
1272 static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1274 struct packet_sock *po = pkt_sk(sk);
1275 struct packet_mclist *ml, *i;
1276 struct net_device *dev;
1277 int err;
1279 rtnl_lock();
1281 err = -ENODEV;
1282 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
1283 if (!dev)
1284 goto done;
1286 err = -EINVAL;
1287 if (mreq->mr_alen > dev->addr_len)
1288 goto done;
1290 err = -ENOBUFS;
1291 i = kmalloc(sizeof(*i), GFP_KERNEL);
1292 if (i == NULL)
1293 goto done;
1295 err = 0;
1296 for (ml = po->mclist; ml; ml = ml->next) {
1297 if (ml->ifindex == mreq->mr_ifindex &&
1298 ml->type == mreq->mr_type &&
1299 ml->alen == mreq->mr_alen &&
1300 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1301 ml->count++;
1302 /* Free the new element ... */
1303 kfree(i);
1304 goto done;
1308 i->type = mreq->mr_type;
1309 i->ifindex = mreq->mr_ifindex;
1310 i->alen = mreq->mr_alen;
1311 memcpy(i->addr, mreq->mr_address, i->alen);
1312 i->count = 1;
1313 i->next = po->mclist;
1314 po->mclist = i;
1315 err = packet_dev_mc(dev, i, 1);
1316 if (err) {
1317 po->mclist = i->next;
1318 kfree(i);
1321 done:
1322 rtnl_unlock();
1323 return err;
1326 static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1328 struct packet_mclist *ml, **mlp;
1330 rtnl_lock();
1332 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1333 if (ml->ifindex == mreq->mr_ifindex &&
1334 ml->type == mreq->mr_type &&
1335 ml->alen == mreq->mr_alen &&
1336 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1337 if (--ml->count == 0) {
1338 struct net_device *dev;
1339 *mlp = ml->next;
1340 dev = dev_get_by_index(sock_net(sk), ml->ifindex);
1341 if (dev) {
1342 packet_dev_mc(dev, ml, -1);
1343 dev_put(dev);
1345 kfree(ml);
1347 rtnl_unlock();
1348 return 0;
1351 rtnl_unlock();
1352 return -EADDRNOTAVAIL;
1355 static void packet_flush_mclist(struct sock *sk)
1357 struct packet_sock *po = pkt_sk(sk);
1358 struct packet_mclist *ml;
1360 if (!po->mclist)
1361 return;
1363 rtnl_lock();
1364 while ((ml = po->mclist) != NULL) {
1365 struct net_device *dev;
1367 po->mclist = ml->next;
1368 if ((dev = dev_get_by_index(sock_net(sk), ml->ifindex)) != NULL) {
1369 packet_dev_mc(dev, ml, -1);
1370 dev_put(dev);
1372 kfree(ml);
1374 rtnl_unlock();
1377 static int
1378 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1380 struct sock *sk = sock->sk;
1381 struct packet_sock *po = pkt_sk(sk);
1382 int ret;
1384 if (level != SOL_PACKET)
1385 return -ENOPROTOOPT;
1387 switch(optname) {
1388 case PACKET_ADD_MEMBERSHIP:
1389 case PACKET_DROP_MEMBERSHIP:
1391 struct packet_mreq_max mreq;
1392 int len = optlen;
1393 memset(&mreq, 0, sizeof(mreq));
1394 if (len < sizeof(struct packet_mreq))
1395 return -EINVAL;
1396 if (len > sizeof(mreq))
1397 len = sizeof(mreq);
1398 if (copy_from_user(&mreq,optval,len))
1399 return -EFAULT;
1400 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1401 return -EINVAL;
1402 if (optname == PACKET_ADD_MEMBERSHIP)
1403 ret = packet_mc_add(sk, &mreq);
1404 else
1405 ret = packet_mc_drop(sk, &mreq);
1406 return ret;
1409 #ifdef CONFIG_PACKET_MMAP
1410 case PACKET_RX_RING:
1412 struct tpacket_req req;
1414 if (optlen<sizeof(req))
1415 return -EINVAL;
1416 if (copy_from_user(&req,optval,sizeof(req)))
1417 return -EFAULT;
1418 return packet_set_ring(sk, &req, 0);
1420 case PACKET_COPY_THRESH:
1422 int val;
1424 if (optlen!=sizeof(val))
1425 return -EINVAL;
1426 if (copy_from_user(&val,optval,sizeof(val)))
1427 return -EFAULT;
1429 pkt_sk(sk)->copy_thresh = val;
1430 return 0;
1432 case PACKET_VERSION:
1434 int val;
1436 if (optlen != sizeof(val))
1437 return -EINVAL;
1438 if (po->pg_vec)
1439 return -EBUSY;
1440 if (copy_from_user(&val, optval, sizeof(val)))
1441 return -EFAULT;
1442 switch (val) {
1443 case TPACKET_V1:
1444 case TPACKET_V2:
1445 po->tp_version = val;
1446 return 0;
1447 default:
1448 return -EINVAL;
1451 #endif
1452 case PACKET_AUXDATA:
1454 int val;
1456 if (optlen < sizeof(val))
1457 return -EINVAL;
1458 if (copy_from_user(&val, optval, sizeof(val)))
1459 return -EFAULT;
1461 po->auxdata = !!val;
1462 return 0;
1464 case PACKET_ORIGDEV:
1466 int val;
1468 if (optlen < sizeof(val))
1469 return -EINVAL;
1470 if (copy_from_user(&val, optval, sizeof(val)))
1471 return -EFAULT;
1473 po->origdev = !!val;
1474 return 0;
1476 default:
1477 return -ENOPROTOOPT;
1481 static int packet_getsockopt(struct socket *sock, int level, int optname,
1482 char __user *optval, int __user *optlen)
1484 int len;
1485 int val;
1486 struct sock *sk = sock->sk;
1487 struct packet_sock *po = pkt_sk(sk);
1488 void *data;
1489 struct tpacket_stats st;
1491 if (level != SOL_PACKET)
1492 return -ENOPROTOOPT;
1494 if (get_user(len, optlen))
1495 return -EFAULT;
1497 if (len < 0)
1498 return -EINVAL;
1500 switch(optname) {
1501 case PACKET_STATISTICS:
1502 if (len > sizeof(struct tpacket_stats))
1503 len = sizeof(struct tpacket_stats);
1504 spin_lock_bh(&sk->sk_receive_queue.lock);
1505 st = po->stats;
1506 memset(&po->stats, 0, sizeof(st));
1507 spin_unlock_bh(&sk->sk_receive_queue.lock);
1508 st.tp_packets += st.tp_drops;
1510 data = &st;
1511 break;
1512 case PACKET_AUXDATA:
1513 if (len > sizeof(int))
1514 len = sizeof(int);
1515 val = po->auxdata;
1517 data = &val;
1518 break;
1519 case PACKET_ORIGDEV:
1520 if (len > sizeof(int))
1521 len = sizeof(int);
1522 val = po->origdev;
1524 data = &val;
1525 break;
1526 #ifdef CONFIG_PACKET_MMAP
1527 case PACKET_VERSION:
1528 if (len > sizeof(int))
1529 len = sizeof(int);
1530 val = po->tp_version;
1531 data = &val;
1532 break;
1533 case PACKET_HDRLEN:
1534 if (len > sizeof(int))
1535 len = sizeof(int);
1536 if (copy_from_user(&val, optval, len))
1537 return -EFAULT;
1538 switch (val) {
1539 case TPACKET_V1:
1540 val = sizeof(struct tpacket_hdr);
1541 break;
1542 case TPACKET_V2:
1543 val = sizeof(struct tpacket2_hdr);
1544 break;
1545 default:
1546 return -EINVAL;
1548 data = &val;
1549 break;
1550 #endif
1551 default:
1552 return -ENOPROTOOPT;
1555 if (put_user(len, optlen))
1556 return -EFAULT;
1557 if (copy_to_user(optval, data, len))
1558 return -EFAULT;
1559 return 0;
1563 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1565 struct sock *sk;
1566 struct hlist_node *node;
1567 struct net_device *dev = data;
1568 struct net *net = dev_net(dev);
1570 read_lock(&net->packet.sklist_lock);
1571 sk_for_each(sk, node, &net->packet.sklist) {
1572 struct packet_sock *po = pkt_sk(sk);
1574 switch (msg) {
1575 case NETDEV_UNREGISTER:
1576 if (po->mclist)
1577 packet_dev_mclist(dev, po->mclist, -1);
1578 /* fallthrough */
1580 case NETDEV_DOWN:
1581 if (dev->ifindex == po->ifindex) {
1582 spin_lock(&po->bind_lock);
1583 if (po->running) {
1584 __dev_remove_pack(&po->prot_hook);
1585 __sock_put(sk);
1586 po->running = 0;
1587 sk->sk_err = ENETDOWN;
1588 if (!sock_flag(sk, SOCK_DEAD))
1589 sk->sk_error_report(sk);
1591 if (msg == NETDEV_UNREGISTER) {
1592 po->ifindex = -1;
1593 po->prot_hook.dev = NULL;
1595 spin_unlock(&po->bind_lock);
1597 break;
1598 case NETDEV_UP:
1599 spin_lock(&po->bind_lock);
1600 if (dev->ifindex == po->ifindex && po->num &&
1601 !po->running) {
1602 dev_add_pack(&po->prot_hook);
1603 sock_hold(sk);
1604 po->running = 1;
1606 spin_unlock(&po->bind_lock);
1607 break;
1610 read_unlock(&net->packet.sklist_lock);
1611 return NOTIFY_DONE;
1615 static int packet_ioctl(struct socket *sock, unsigned int cmd,
1616 unsigned long arg)
1618 struct sock *sk = sock->sk;
1620 switch(cmd) {
1621 case SIOCOUTQ:
1623 int amount = atomic_read(&sk->sk_wmem_alloc);
1624 return put_user(amount, (int __user *)arg);
1626 case SIOCINQ:
1628 struct sk_buff *skb;
1629 int amount = 0;
1631 spin_lock_bh(&sk->sk_receive_queue.lock);
1632 skb = skb_peek(&sk->sk_receive_queue);
1633 if (skb)
1634 amount = skb->len;
1635 spin_unlock_bh(&sk->sk_receive_queue.lock);
1636 return put_user(amount, (int __user *)arg);
1638 case SIOCGSTAMP:
1639 return sock_get_timestamp(sk, (struct timeval __user *)arg);
1640 case SIOCGSTAMPNS:
1641 return sock_get_timestampns(sk, (struct timespec __user *)arg);
1643 #ifdef CONFIG_INET
1644 case SIOCADDRT:
1645 case SIOCDELRT:
1646 case SIOCDARP:
1647 case SIOCGARP:
1648 case SIOCSARP:
1649 case SIOCGIFADDR:
1650 case SIOCSIFADDR:
1651 case SIOCGIFBRDADDR:
1652 case SIOCSIFBRDADDR:
1653 case SIOCGIFNETMASK:
1654 case SIOCSIFNETMASK:
1655 case SIOCGIFDSTADDR:
1656 case SIOCSIFDSTADDR:
1657 case SIOCSIFFLAGS:
1658 if (sock_net(sk) != &init_net)
1659 return -ENOIOCTLCMD;
1660 return inet_dgram_ops.ioctl(sock, cmd, arg);
1661 #endif
1663 default:
1664 return -ENOIOCTLCMD;
1666 return 0;
1669 #ifndef CONFIG_PACKET_MMAP
1670 #define packet_mmap sock_no_mmap
1671 #define packet_poll datagram_poll
1672 #else
1674 static unsigned int packet_poll(struct file * file, struct socket *sock,
1675 poll_table *wait)
1677 struct sock *sk = sock->sk;
1678 struct packet_sock *po = pkt_sk(sk);
1679 unsigned int mask = datagram_poll(file, sock, wait);
1681 spin_lock_bh(&sk->sk_receive_queue.lock);
1682 if (po->pg_vec) {
1683 unsigned last = po->head ? po->head-1 : po->frame_max;
1685 if (packet_lookup_frame(po, last, TP_STATUS_USER))
1686 mask |= POLLIN | POLLRDNORM;
1688 spin_unlock_bh(&sk->sk_receive_queue.lock);
1689 return mask;
1693 /* Dirty? Well, I still did not learn better way to account
1694 * for user mmaps.
1697 static void packet_mm_open(struct vm_area_struct *vma)
1699 struct file *file = vma->vm_file;
1700 struct socket * sock = file->private_data;
1701 struct sock *sk = sock->sk;
1703 if (sk)
1704 atomic_inc(&pkt_sk(sk)->mapped);
1707 static void packet_mm_close(struct vm_area_struct *vma)
1709 struct file *file = vma->vm_file;
1710 struct socket * sock = file->private_data;
1711 struct sock *sk = sock->sk;
1713 if (sk)
1714 atomic_dec(&pkt_sk(sk)->mapped);
1717 static struct vm_operations_struct packet_mmap_ops = {
1718 .open = packet_mm_open,
1719 .close =packet_mm_close,
1722 static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
1724 int i;
1726 for (i = 0; i < len; i++) {
1727 if (likely(pg_vec[i]))
1728 free_pages((unsigned long) pg_vec[i], order);
1730 kfree(pg_vec);
1733 static inline char *alloc_one_pg_vec_page(unsigned long order)
1735 return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
1736 order);
1739 static char **alloc_pg_vec(struct tpacket_req *req, int order)
1741 unsigned int block_nr = req->tp_block_nr;
1742 char **pg_vec;
1743 int i;
1745 pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
1746 if (unlikely(!pg_vec))
1747 goto out;
1749 for (i = 0; i < block_nr; i++) {
1750 pg_vec[i] = alloc_one_pg_vec_page(order);
1751 if (unlikely(!pg_vec[i]))
1752 goto out_free_pgvec;
1755 out:
1756 return pg_vec;
1758 out_free_pgvec:
1759 free_pg_vec(pg_vec, order, block_nr);
1760 pg_vec = NULL;
1761 goto out;
1764 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1766 char **pg_vec = NULL;
1767 struct packet_sock *po = pkt_sk(sk);
1768 int was_running, order = 0;
1769 __be16 num;
1770 int err = 0;
1772 if (req->tp_block_nr) {
1773 int i;
1775 /* Sanity tests and some calculations */
1777 if (unlikely(po->pg_vec))
1778 return -EBUSY;
1780 switch (po->tp_version) {
1781 case TPACKET_V1:
1782 po->tp_hdrlen = TPACKET_HDRLEN;
1783 break;
1784 case TPACKET_V2:
1785 po->tp_hdrlen = TPACKET2_HDRLEN;
1786 break;
1789 if (unlikely((int)req->tp_block_size <= 0))
1790 return -EINVAL;
1791 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
1792 return -EINVAL;
1793 if (unlikely(req->tp_frame_size < po->tp_hdrlen))
1794 return -EINVAL;
1795 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
1796 return -EINVAL;
1798 po->frames_per_block = req->tp_block_size/req->tp_frame_size;
1799 if (unlikely(po->frames_per_block <= 0))
1800 return -EINVAL;
1801 if (unlikely((po->frames_per_block * req->tp_block_nr) !=
1802 req->tp_frame_nr))
1803 return -EINVAL;
1805 err = -ENOMEM;
1806 order = get_order(req->tp_block_size);
1807 pg_vec = alloc_pg_vec(req, order);
1808 if (unlikely(!pg_vec))
1809 goto out;
1811 for (i = 0; i < req->tp_block_nr; i++) {
1812 void *ptr = pg_vec[i];
1813 int k;
1815 for (k = 0; k < po->frames_per_block; k++) {
1816 __packet_set_status(po, ptr, TP_STATUS_KERNEL);
1817 ptr += req->tp_frame_size;
1820 /* Done */
1821 } else {
1822 if (unlikely(req->tp_frame_nr))
1823 return -EINVAL;
1826 lock_sock(sk);
1828 /* Detach socket from network */
1829 spin_lock(&po->bind_lock);
1830 was_running = po->running;
1831 num = po->num;
1832 if (was_running) {
1833 __dev_remove_pack(&po->prot_hook);
1834 po->num = 0;
1835 po->running = 0;
1836 __sock_put(sk);
1838 spin_unlock(&po->bind_lock);
1840 synchronize_net();
1842 err = -EBUSY;
1843 if (closing || atomic_read(&po->mapped) == 0) {
1844 err = 0;
1845 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1847 spin_lock_bh(&sk->sk_receive_queue.lock);
1848 pg_vec = XC(po->pg_vec, pg_vec);
1849 po->frame_max = (req->tp_frame_nr - 1);
1850 po->head = 0;
1851 po->frame_size = req->tp_frame_size;
1852 spin_unlock_bh(&sk->sk_receive_queue.lock);
1854 order = XC(po->pg_vec_order, order);
1855 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1857 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1858 po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1859 skb_queue_purge(&sk->sk_receive_queue);
1860 #undef XC
1861 if (atomic_read(&po->mapped))
1862 printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1865 spin_lock(&po->bind_lock);
1866 if (was_running && !po->running) {
1867 sock_hold(sk);
1868 po->running = 1;
1869 po->num = num;
1870 dev_add_pack(&po->prot_hook);
1872 spin_unlock(&po->bind_lock);
1874 release_sock(sk);
1876 if (pg_vec)
1877 free_pg_vec(pg_vec, order, req->tp_block_nr);
1878 out:
1879 return err;
1882 static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1884 struct sock *sk = sock->sk;
1885 struct packet_sock *po = pkt_sk(sk);
1886 unsigned long size;
1887 unsigned long start;
1888 int err = -EINVAL;
1889 int i;
1891 if (vma->vm_pgoff)
1892 return -EINVAL;
1894 size = vma->vm_end - vma->vm_start;
1896 lock_sock(sk);
1897 if (po->pg_vec == NULL)
1898 goto out;
1899 if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1900 goto out;
1902 start = vma->vm_start;
1903 for (i = 0; i < po->pg_vec_len; i++) {
1904 struct page *page = virt_to_page(po->pg_vec[i]);
1905 int pg_num;
1907 for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
1908 err = vm_insert_page(vma, start, page);
1909 if (unlikely(err))
1910 goto out;
1911 start += PAGE_SIZE;
1914 atomic_inc(&po->mapped);
1915 vma->vm_ops = &packet_mmap_ops;
1916 err = 0;
1918 out:
1919 release_sock(sk);
1920 return err;
1922 #endif
1925 static const struct proto_ops packet_ops_spkt = {
1926 .family = PF_PACKET,
1927 .owner = THIS_MODULE,
1928 .release = packet_release,
1929 .bind = packet_bind_spkt,
1930 .connect = sock_no_connect,
1931 .socketpair = sock_no_socketpair,
1932 .accept = sock_no_accept,
1933 .getname = packet_getname_spkt,
1934 .poll = datagram_poll,
1935 .ioctl = packet_ioctl,
1936 .listen = sock_no_listen,
1937 .shutdown = sock_no_shutdown,
1938 .setsockopt = sock_no_setsockopt,
1939 .getsockopt = sock_no_getsockopt,
1940 .sendmsg = packet_sendmsg_spkt,
1941 .recvmsg = packet_recvmsg,
1942 .mmap = sock_no_mmap,
1943 .sendpage = sock_no_sendpage,
1946 static const struct proto_ops packet_ops = {
1947 .family = PF_PACKET,
1948 .owner = THIS_MODULE,
1949 .release = packet_release,
1950 .bind = packet_bind,
1951 .connect = sock_no_connect,
1952 .socketpair = sock_no_socketpair,
1953 .accept = sock_no_accept,
1954 .getname = packet_getname,
1955 .poll = packet_poll,
1956 .ioctl = packet_ioctl,
1957 .listen = sock_no_listen,
1958 .shutdown = sock_no_shutdown,
1959 .setsockopt = packet_setsockopt,
1960 .getsockopt = packet_getsockopt,
1961 .sendmsg = packet_sendmsg,
1962 .recvmsg = packet_recvmsg,
1963 .mmap = packet_mmap,
1964 .sendpage = sock_no_sendpage,
1967 static struct net_proto_family packet_family_ops = {
1968 .family = PF_PACKET,
1969 .create = packet_create,
1970 .owner = THIS_MODULE,
1973 static struct notifier_block packet_netdev_notifier = {
1974 .notifier_call =packet_notifier,
1977 #ifdef CONFIG_PROC_FS
1978 static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
1980 struct sock *s;
1981 struct hlist_node *node;
1983 sk_for_each(s, node, &net->packet.sklist) {
1984 if (!off--)
1985 return s;
1987 return NULL;
1990 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1991 __acquires(seq_file_net(seq)->packet.sklist_lock)
1993 struct net *net = seq_file_net(seq);
1994 read_lock(&net->packet.sklist_lock);
1995 return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN;
1998 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2000 struct net *net = seq_file_net(seq);
2001 ++*pos;
2002 return (v == SEQ_START_TOKEN)
2003 ? sk_head(&net->packet.sklist)
2004 : sk_next((struct sock*)v) ;
2007 static void packet_seq_stop(struct seq_file *seq, void *v)
2008 __releases(seq_file_net(seq)->packet.sklist_lock)
2010 struct net *net = seq_file_net(seq);
2011 read_unlock(&net->packet.sklist_lock);
2014 static int packet_seq_show(struct seq_file *seq, void *v)
2016 if (v == SEQ_START_TOKEN)
2017 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2018 else {
2019 struct sock *s = v;
2020 const struct packet_sock *po = pkt_sk(s);
2022 seq_printf(seq,
2023 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
2025 atomic_read(&s->sk_refcnt),
2026 s->sk_type,
2027 ntohs(po->num),
2028 po->ifindex,
2029 po->running,
2030 atomic_read(&s->sk_rmem_alloc),
2031 sock_i_uid(s),
2032 sock_i_ino(s) );
2035 return 0;
2038 static const struct seq_operations packet_seq_ops = {
2039 .start = packet_seq_start,
2040 .next = packet_seq_next,
2041 .stop = packet_seq_stop,
2042 .show = packet_seq_show,
2045 static int packet_seq_open(struct inode *inode, struct file *file)
2047 return seq_open_net(inode, file, &packet_seq_ops,
2048 sizeof(struct seq_net_private));
2051 static const struct file_operations packet_seq_fops = {
2052 .owner = THIS_MODULE,
2053 .open = packet_seq_open,
2054 .read = seq_read,
2055 .llseek = seq_lseek,
2056 .release = seq_release_net,
2059 #endif
2061 static int packet_net_init(struct net *net)
2063 rwlock_init(&net->packet.sklist_lock);
2064 INIT_HLIST_HEAD(&net->packet.sklist);
2066 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2067 return -ENOMEM;
2069 return 0;
2072 static void packet_net_exit(struct net *net)
2074 proc_net_remove(net, "packet");
2077 static struct pernet_operations packet_net_ops = {
2078 .init = packet_net_init,
2079 .exit = packet_net_exit,
2083 static void __exit packet_exit(void)
2085 unregister_netdevice_notifier(&packet_netdev_notifier);
2086 unregister_pernet_subsys(&packet_net_ops);
2087 sock_unregister(PF_PACKET);
2088 proto_unregister(&packet_proto);
2091 static int __init packet_init(void)
2093 int rc = proto_register(&packet_proto, 0);
2095 if (rc != 0)
2096 goto out;
2098 sock_register(&packet_family_ops);
2099 register_pernet_subsys(&packet_net_ops);
2100 register_netdevice_notifier(&packet_netdev_notifier);
2101 out:
2102 return rc;
2105 module_init(packet_init);
2106 module_exit(packet_exit);
2107 MODULE_LICENSE("GPL");
2108 MODULE_ALIAS_NETPROTO(PF_PACKET);