Merge with 2.3.99-pre1.
[linux-2.6/linux-mips.git] / net / packet / af_packet.c
blob799ec94760fc08ab2b123ddbd195e81236393989
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * PACKET - implements raw packet sockets.
8 * Version: $Id: af_packet.c,v 1.33 2000/03/13 22:11:50 davem Exp $
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Alan Cox, <gw4pts@gw4pts.ampr.org>
14 * Fixes:
15 * Alan Cox : verify_area() now used correctly
16 * Alan Cox : new skbuff lists, look ma no backlogs!
17 * Alan Cox : tidied skbuff lists.
18 * Alan Cox : Now uses generic datagram routines I
19 * added. Also fixed the peek/read crash
20 * from all old Linux datagram code.
21 * Alan Cox : Uses the improved datagram code.
22 * Alan Cox : Added NULL's for socket options.
23 * Alan Cox : Re-commented the code.
24 * Alan Cox : Use new kernel side addressing
25 * Rob Janssen : Correct MTU usage.
26 * Dave Platt : Counter leaks caused by incorrect
27 * interrupt locking and some slightly
28 * dubious gcc output. Can you read
29 * compiler: it said _VOLATILE_
30 * Richard Kooijman : Timestamp fixes.
31 * Alan Cox : New buffers. Use sk->mac.raw.
32 * Alan Cox : sendmsg/recvmsg support.
33 * Alan Cox : Protocol setting support
34 * Alexey Kuznetsov : Untied from IPv4 stack.
35 * Cyrus Durgin : Fixed kerneld for kmod.
37 * This program is free software; you can redistribute it and/or
38 * modify it under the terms of the GNU General Public License
39 * as published by the Free Software Foundation; either version
40 * 2 of the License, or (at your option) any later version.
44 #include <linux/config.h>
45 #include <linux/types.h>
46 #include <linux/sched.h>
47 #include <linux/mm.h>
48 #include <linux/fcntl.h>
49 #include <linux/socket.h>
50 #include <linux/in.h>
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/if_packet.h>
54 #include <linux/wireless.h>
55 #include <linux/kmod.h>
56 #include <net/ip.h>
57 #include <net/protocol.h>
58 #include <linux/skbuff.h>
59 #include <net/sock.h>
60 #include <linux/errno.h>
61 #include <linux/timer.h>
62 #include <asm/system.h>
63 #include <asm/uaccess.h>
64 #include <linux/proc_fs.h>
65 #include <linux/poll.h>
66 #include <linux/module.h>
67 #include <linux/init.h>
68 #include <linux/if_bridge.h>
70 #ifdef CONFIG_INET
71 #include <net/inet_common.h>
72 #endif
74 #ifdef CONFIG_DLCI
75 extern int dlci_ioctl(unsigned int, void*);
76 #endif
79 Old SOCK_PACKET. Do exist programs, which use it?
80 (not counting tcpdump) - lots of them yes - AC.
83 #define CONFIG_SOCK_PACKET 1
86 Proposed replacement for SIOC{ADD,DEL}MULTI and
87 IFF_PROMISC, IFF_ALLMULTI flags.
89 It is more expensive, but I believe,
90 it is really correct solution: reentereble, safe and fault tolerant.
92 Differences:
93 - Changing IFF_ALLMULTI from user level is disabled.
94 It could only confused multicast routing daemons, not more.
95 - IFF_PROMISC is faked by keeping reference count and
96 global flag, so that real IFF_PROMISC == (gflag|(count != 0))
97 I'd remove it too, but it would require recompilation tcpdump
98 and another applications, using promiscuous mode.
99 - SIOC{ADD/DEL}MULTI are moved to deprecated state,
100 they work, but complain. I do know who uses them.
103 *************FIXME***************
104 Alexey : This doesnt cook Im afraid. We need the low level SIOCADD/DELMULTI
105 and also IFF_ALLMULTI for DECNET, Appletalk and other stuff as well as
106 BSD compatibility issues.
109 #define CONFIG_PACKET_MULTICAST 1
112 Assumptions:
113 - if device has no dev->hard_header routine, it adds and removes ll header
114 inside itself. In this case ll header is invisible outside of device,
115 but higher levels still should reserve dev->hard_header_len.
116 Some devices are enough clever to reallocate skb, when header
117 will not fit to reserved space (tunnel), another ones are silly
118 (PPP).
119 - packet socket receives packets with pulled ll header,
120 so that SOCK_RAW should push it back.
122 On receive:
123 -----------
125 Incoming, dev->hard_header!=NULL
126 mac.raw -> ll header
127 data -> data
129 Outgoing, dev->hard_header!=NULL
130 mac.raw -> ll header
131 data -> ll header
133 Incoming, dev->hard_header==NULL
134 mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
135 PPP makes it, that is wrong, because introduce assymetry
136 between rx and tx paths.
137 data -> data
139 Outgoing, dev->hard_header==NULL
140 mac.raw -> data. ll header is still not built!
141 data -> data
143 Resume
144 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
147 On transmit:
148 ------------
150 dev->hard_header != NULL
151 mac.raw -> ll header
152 data -> ll header
154 dev->hard_header == NULL (ll header is added by device, we cannot control it)
155 mac.raw -> data
156 data -> data
158 We should set nh.raw on output to correct posistion,
159 packet classifier depends on it.
162 /* List of all packet sockets. */
163 static struct sock * packet_sklist = NULL;
164 static rwlock_t packet_sklist_lock = RW_LOCK_UNLOCKED;
166 atomic_t packet_socks_nr;
169 /* Private packet socket structures. */
171 #ifdef CONFIG_PACKET_MULTICAST
172 struct packet_mclist
174 struct packet_mclist *next;
175 int ifindex;
176 int count;
177 unsigned short type;
178 unsigned short alen;
179 unsigned char addr[8];
181 #endif
182 #ifdef CONFIG_PACKET_MMAP
183 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
184 #endif
186 static void packet_flush_mclist(struct sock *sk);
188 struct packet_opt
190 struct packet_type prot_hook;
191 spinlock_t bind_lock;
192 char running; /* prot_hook is attached*/
193 int ifindex; /* bound device */
194 struct tpacket_stats stats;
195 #ifdef CONFIG_PACKET_MULTICAST
196 struct packet_mclist *mclist;
197 #endif
198 #ifdef CONFIG_PACKET_MMAP
199 atomic_t mapped;
200 unsigned long *pg_vec;
201 unsigned int pg_vec_order;
202 unsigned int pg_vec_pages;
203 unsigned int pg_vec_len;
205 struct tpacket_hdr **iovec;
206 unsigned int frame_size;
207 unsigned int iovmax;
208 unsigned int head;
209 #endif
212 void packet_sock_destruct(struct sock *sk)
214 BUG_TRAP(atomic_read(&sk->rmem_alloc)==0);
215 BUG_TRAP(atomic_read(&sk->wmem_alloc)==0);
217 if (!sk->dead) {
218 printk("Attempt to release alive packet socket: %p\n", sk);
219 return;
222 if (sk->protinfo.destruct_hook)
223 kfree(sk->protinfo.destruct_hook);
224 atomic_dec(&packet_socks_nr);
225 #ifdef PACKET_REFCNT_DEBUG
226 printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
227 #endif
228 MOD_DEC_USE_COUNT;
232 extern struct proto_ops packet_ops;
234 #ifdef CONFIG_SOCK_PACKET
235 extern struct proto_ops packet_ops_spkt;
237 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
239 struct sock *sk;
240 struct sockaddr_pkt *spkt;
243 * When we registered the protocol we saved the socket in the data
244 * field for just this event.
247 sk = (struct sock *) pt->data;
250 * Yank back the headers [hope the device set this
251 * right or kerboom...]
253 * Incoming packets have ll header pulled,
254 * push it back.
256 * For outgoing ones skb->data == skb->mac.raw
257 * so that this procedure is noop.
260 if (skb->pkt_type == PACKET_LOOPBACK)
261 goto out;
263 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
264 goto oom;
266 spkt = (struct sockaddr_pkt*)skb->cb;
268 skb_push(skb, skb->data-skb->mac.raw);
271 * The SOCK_PACKET socket receives _all_ frames.
274 spkt->spkt_family = dev->type;
275 strncpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
276 spkt->spkt_protocol = skb->protocol;
278 if (skb->rx_dev) {
279 dev_put(skb->rx_dev);
280 skb->rx_dev = NULL;
284 * Charge the memory to the socket. This is done specifically
285 * to prevent sockets using all the memory up.
288 if (sock_queue_rcv_skb(sk,skb) == 0)
289 return 0;
291 out:
292 kfree_skb(skb);
293 oom:
294 return 0;
299 * Output a raw packet to a device layer. This bypasses all the other
300 * protocol layers and you must therefore supply it with a complete frame
303 static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, int len,
304 struct scm_cookie *scm)
306 struct sock *sk = sock->sk;
307 struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
308 struct sk_buff *skb;
309 struct net_device *dev;
310 unsigned short proto=0;
311 int err;
314 * Get and verify the address.
317 if (saddr)
319 if (msg->msg_namelen < sizeof(struct sockaddr))
320 return(-EINVAL);
321 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
322 proto=saddr->spkt_protocol;
324 else
325 return(-ENOTCONN); /* SOCK_PACKET must be sent giving an address */
328 * Find the device first to size check it
331 saddr->spkt_device[13] = 0;
332 dev = dev_get_by_name(saddr->spkt_device);
333 err = -ENODEV;
334 if (dev == NULL)
335 goto out_unlock;
338 * You may not queue a frame bigger than the mtu. This is the lowest level
339 * raw protocol and you must do your own fragmentation at this level.
342 err = -EMSGSIZE;
343 if(len>dev->mtu+dev->hard_header_len)
344 goto out_unlock;
346 err = -ENOBUFS;
347 skb = sock_wmalloc(sk, len+dev->hard_header_len+15, 0, GFP_KERNEL);
350 * If the write buffer is full, then tough. At this level the user gets to
351 * deal with the problem - do your own algorithmic backoffs. That's far
352 * more flexible.
355 if (skb == NULL)
356 goto out_unlock;
359 * Fill it in
362 /* FIXME: Save some space for broken drivers that write a
363 * hard header at transmission time by themselves. PPP is the
364 * notable one here. This should really be fixed at the driver level.
366 skb_reserve(skb,(dev->hard_header_len+15)&~15);
367 skb->nh.raw = skb->data;
369 /* Try to align data part correctly */
370 if (dev->hard_header) {
371 skb->data -= dev->hard_header_len;
372 skb->tail -= dev->hard_header_len;
375 /* Returns -EFAULT on error */
376 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
377 skb->protocol = proto;
378 skb->dev = dev;
379 skb->priority = sk->priority;
380 if (err)
381 goto out_free;
383 err = -ENETDOWN;
384 if (!(dev->flags & IFF_UP))
385 goto out_free;
388 * Now send it
391 dev_queue_xmit(skb);
392 dev_put(dev);
393 return(len);
395 out_free:
396 kfree_skb(skb);
397 out_unlock:
398 if (dev)
399 dev_put(dev);
400 return err;
402 #endif
405 This function makes lazy skb cloning in hope that most of packets
406 are discarded by BPF.
408 Note tricky part: we DO mangle shared skb! skb->data, skb->len
409 and skb->cb are mangled. It works because (and until) packets
410 falling here are owned by current CPU. Output packets are cloned
411 by dev_queue_xmit_nit(), input packets are processed by net_bh
412 sequencially, so that if we return skb to original state on exit,
413 we will not harm anyone.
416 static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
418 struct sock *sk;
419 struct sockaddr_ll *sll;
420 struct packet_opt *po;
421 u8 * skb_head = skb->data;
422 #ifdef CONFIG_FILTER
423 unsigned snaplen;
424 #endif
426 if (skb->pkt_type == PACKET_LOOPBACK)
427 goto drop;
429 sk = (struct sock *) pt->data;
430 po = sk->protinfo.af_packet;
432 skb->dev = dev;
434 if (dev->hard_header) {
435 /* The device has an explicit notion of ll header,
436 exported to higher levels.
438 Otherwise, the device hides datails of it frame
439 structure, so that corresponding packet head
440 never delivered to user.
442 if (sk->type != SOCK_DGRAM)
443 skb_push(skb, skb->data - skb->mac.raw);
444 else if (skb->pkt_type == PACKET_OUTGOING) {
445 /* Special case: outgoing packets have ll header at head */
446 skb_pull(skb, skb->nh.raw - skb->data);
450 #ifdef CONFIG_FILTER
451 snaplen = skb->len;
453 if (sk->filter) {
454 unsigned res = snaplen;
455 struct sk_filter *filter;
457 bh_lock_sock(sk);
458 if ((filter = sk->filter) != NULL)
459 res = sk_run_filter(skb, sk->filter->insns, sk->filter->len);
460 bh_unlock_sock(sk);
462 if (res == 0)
463 goto drop_n_restore;
464 if (snaplen > res)
465 snaplen = res;
467 #endif /* CONFIG_FILTER */
469 if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf)
470 goto drop_n_acct;
472 if (skb_shared(skb)) {
473 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
474 if (nskb == NULL)
475 goto drop_n_acct;
477 if (skb_head != skb->data) {
478 skb->data = skb_head;
479 skb->len = skb->tail - skb->data;
481 kfree_skb(skb);
482 skb = nskb;
485 sll = (struct sockaddr_ll*)skb->cb;
486 sll->sll_family = AF_PACKET;
487 sll->sll_hatype = dev->type;
488 sll->sll_protocol = skb->protocol;
489 sll->sll_pkttype = skb->pkt_type;
490 sll->sll_ifindex = dev->ifindex;
491 sll->sll_halen = 0;
493 if (dev->hard_header_parse)
494 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
496 if (skb->rx_dev) {
497 dev_put(skb->rx_dev);
498 skb->rx_dev = NULL;
501 #ifdef CONFIG_FILTER
502 if (skb->len > snaplen)
503 __skb_trim(skb, snaplen);
504 #endif
506 skb_set_owner_r(skb, sk);
507 spin_lock(&sk->receive_queue.lock);
508 po->stats.tp_packets++;
509 __skb_queue_tail(&sk->receive_queue, skb);
510 spin_unlock(&sk->receive_queue.lock);
511 sk->data_ready(sk,skb->len);
512 return 0;
514 drop_n_acct:
515 spin_lock(&sk->receive_queue.lock);
516 po->stats.tp_drops++;
517 spin_unlock(&sk->receive_queue.lock);
519 #ifdef CONFIG_FILTER
520 drop_n_restore:
521 #endif
522 if (skb_head != skb->data && skb_shared(skb)) {
523 skb->data = skb_head;
524 skb->len = skb->tail - skb->data;
526 drop:
527 kfree_skb(skb);
528 return 0;
531 #ifdef CONFIG_PACKET_MMAP
532 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
534 struct sock *sk;
535 struct packet_opt *po;
536 struct sockaddr_ll *sll;
537 struct tpacket_hdr *h;
538 u8 * skb_head = skb->data;
539 unsigned snaplen;
540 unsigned long losing;
542 if (skb->pkt_type == PACKET_LOOPBACK)
543 goto drop;
545 sk = (struct sock *) pt->data;
546 po = sk->protinfo.af_packet;
548 if (dev->hard_header) {
549 if (sk->type != SOCK_DGRAM)
550 skb_push(skb, skb->data - skb->mac.raw);
551 else if (skb->pkt_type == PACKET_OUTGOING) {
552 /* Special case: outgoing packets have ll header at head */
553 skb_pull(skb, skb->nh.raw - skb->data);
557 snaplen = skb->len;
559 #ifdef CONFIG_FILTER
560 if (sk->filter) {
561 unsigned res = snaplen;
562 struct sk_filter *filter;
564 bh_lock_sock(sk);
565 if ((filter = sk->filter) != NULL)
566 res = sk_run_filter(skb, sk->filter->insns, sk->filter->len);
567 bh_unlock_sock(sk);
569 if (res == 0)
570 goto drop_n_restore;
571 if (snaplen > res)
572 snaplen = res;
574 #endif
575 spin_lock(&sk->receive_queue.lock);
576 h = po->iovec[po->head];
578 if (h->tp_status)
579 goto ring_is_full;
580 po->head = po->head != po->iovmax ? po->head+1 : 0;
581 po->stats.tp_packets++;
582 losing = TP_STATUS_LOSING;
583 if (!po->stats.tp_drops)
584 losing = 0;
585 spin_unlock(&sk->receive_queue.lock);
587 if (sk->type == SOCK_DGRAM) {
588 h->tp_mac = h->tp_net = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
589 } else {
590 unsigned maclen = skb->nh.raw - skb->data;
591 h->tp_net = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
592 h->tp_mac = h->tp_net - maclen;
595 if (h->tp_mac + snaplen > po->frame_size) {
596 snaplen = po->frame_size - h->tp_mac;
597 if ((int)snaplen < 0)
598 snaplen = 0;
601 memcpy((u8*)h + h->tp_mac, skb->data, snaplen);
603 h->tp_sec = skb->stamp.tv_sec;
604 h->tp_usec = skb->stamp.tv_usec;
605 h->tp_len = skb->len;
606 h->tp_snaplen = snaplen;
608 sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
609 sll->sll_halen = 0;
610 if (dev->hard_header_parse)
611 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
612 sll->sll_family = AF_PACKET;
613 sll->sll_hatype = dev->type;
614 sll->sll_protocol = skb->protocol;
615 sll->sll_pkttype = skb->pkt_type;
616 sll->sll_ifindex = dev->ifindex;
618 h->tp_status = losing|TP_STATUS_USER;
619 mb();
621 sk->data_ready(sk, 0);
623 drop_n_restore:
624 if (skb_head != skb->data && skb_shared(skb)) {
625 skb->data = skb_head;
626 skb->len = skb->tail - skb->data;
628 drop:
629 kfree_skb(skb);
630 return 0;
632 ring_is_full:
633 po->stats.tp_drops++;
634 spin_unlock(&sk->receive_queue.lock);
636 sk->data_ready(sk, 0);
637 goto drop_n_restore;
640 #endif
643 static int packet_sendmsg(struct socket *sock, struct msghdr *msg, int len,
644 struct scm_cookie *scm)
646 struct sock *sk = sock->sk;
647 struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
648 struct sk_buff *skb;
649 struct net_device *dev;
650 unsigned short proto;
651 unsigned char *addr;
652 int ifindex, err, reserve = 0;
655 * Get and verify the address.
658 if (saddr == NULL) {
659 ifindex = sk->protinfo.af_packet->ifindex;
660 proto = sk->num;
661 addr = NULL;
662 } else {
663 err = -EINVAL;
664 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
665 goto out;
666 ifindex = saddr->sll_ifindex;
667 proto = saddr->sll_protocol;
668 addr = saddr->sll_addr;
672 dev = dev_get_by_index(ifindex);
673 err = -ENXIO;
674 if (dev == NULL)
675 goto out_unlock;
676 if (sock->type == SOCK_RAW)
677 reserve = dev->hard_header_len;
679 err = -EMSGSIZE;
680 if (len > dev->mtu+reserve)
681 goto out_unlock;
683 skb = sock_alloc_send_skb(sk, len+dev->hard_header_len+15, 0,
684 msg->msg_flags & MSG_DONTWAIT, &err);
685 if (skb==NULL)
686 goto out_unlock;
688 skb_reserve(skb, (dev->hard_header_len+15)&~15);
689 skb->nh.raw = skb->data;
691 if (dev->hard_header) {
692 int res;
693 err = -EINVAL;
694 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
695 if (sock->type != SOCK_DGRAM) {
696 skb->tail = skb->data;
697 skb->len = 0;
698 } else if (res < 0)
699 goto out_free;
702 /* Returns -EFAULT on error */
703 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
704 if (err)
705 goto out_free;
707 skb->protocol = proto;
708 skb->dev = dev;
709 skb->priority = sk->priority;
711 err = -ENETDOWN;
712 if (!(dev->flags & IFF_UP))
713 goto out_free;
716 * Now send it
719 err = dev_queue_xmit(skb);
720 if (err > 0 && (err = net_xmit_errno(err)) != 0)
721 goto out_unlock;
723 dev_put(dev);
725 return(len);
727 out_free:
728 kfree_skb(skb);
729 out_unlock:
730 if (dev)
731 dev_put(dev);
732 out:
733 return err;
737 * Close a PACKET socket. This is fairly simple. We immediately go
738 * to 'closed' state and remove our protocol entry in the device list.
741 static int packet_release(struct socket *sock)
743 struct sock *sk = sock->sk;
744 struct sock **skp;
746 if (!sk)
747 return 0;
749 write_lock_bh(&packet_sklist_lock);
750 for (skp = &packet_sklist; *skp; skp = &(*skp)->next) {
751 if (*skp == sk) {
752 *skp = sk->next;
753 __sock_put(sk);
754 break;
757 write_unlock_bh(&packet_sklist_lock);
760 * Unhook packet receive handler.
763 if (sk->protinfo.af_packet->running) {
765 * Remove the protocol hook
767 dev_remove_pack(&sk->protinfo.af_packet->prot_hook);
768 sk->protinfo.af_packet->running = 0;
769 __sock_put(sk);
772 #ifdef CONFIG_PACKET_MULTICAST
773 packet_flush_mclist(sk);
774 #endif
776 #ifdef CONFIG_PACKET_MMAP
777 if (sk->protinfo.af_packet->pg_vec) {
778 struct tpacket_req req;
779 memset(&req, 0, sizeof(req));
780 packet_set_ring(sk, &req, 1);
782 #endif
785 * Now the socket is dead. No more input will appear.
788 sock_orphan(sk);
789 sock->sk = NULL;
791 /* Purge queues */
793 skb_queue_purge(&sk->receive_queue);
795 sock_put(sk);
796 return 0;
800 * Attach a packet hook.
803 static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
806 * Detach an existing hook if present.
809 lock_sock(sk);
811 spin_lock(&sk->protinfo.af_packet->bind_lock);
812 if (sk->protinfo.af_packet->running) {
813 dev_remove_pack(&sk->protinfo.af_packet->prot_hook);
814 __sock_put(sk);
815 sk->protinfo.af_packet->running = 0;
818 sk->num = protocol;
819 sk->protinfo.af_packet->prot_hook.type = protocol;
820 sk->protinfo.af_packet->prot_hook.dev = dev;
822 sk->protinfo.af_packet->ifindex = dev ? dev->ifindex : 0;
824 if (protocol == 0)
825 goto out_unlock;
827 if (dev) {
828 if (dev->flags&IFF_UP) {
829 dev_add_pack(&sk->protinfo.af_packet->prot_hook);
830 sock_hold(sk);
831 sk->protinfo.af_packet->running = 1;
832 } else {
833 sk->err = ENETDOWN;
834 if (!sk->dead)
835 sk->error_report(sk);
837 } else {
838 dev_add_pack(&sk->protinfo.af_packet->prot_hook);
839 sock_hold(sk);
840 sk->protinfo.af_packet->running = 1;
843 out_unlock:
844 spin_unlock(&sk->protinfo.af_packet->bind_lock);
845 release_sock(sk);
846 return 0;
850 * Bind a packet socket to a device
853 #ifdef CONFIG_SOCK_PACKET
855 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
857 struct sock *sk=sock->sk;
858 char name[15];
859 struct net_device *dev;
860 int err = -ENODEV;
863 * Check legality
866 if(addr_len!=sizeof(struct sockaddr))
867 return -EINVAL;
868 strncpy(name,uaddr->sa_data,14);
869 name[14]=0;
871 dev = dev_get_by_name(name);
872 if (dev) {
873 err = packet_do_bind(sk, dev, sk->num);
874 dev_put(dev);
876 return err;
878 #endif
880 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
882 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
883 struct sock *sk=sock->sk;
884 struct net_device *dev = NULL;
885 int err;
889 * Check legality
892 if (addr_len < sizeof(struct sockaddr_ll))
893 return -EINVAL;
894 if (sll->sll_family != AF_PACKET)
895 return -EINVAL;
897 if (sll->sll_ifindex) {
898 err = -ENODEV;
899 dev = dev_get_by_index(sll->sll_ifindex);
900 if (dev == NULL)
901 goto out;
903 err = packet_do_bind(sk, dev, sll->sll_protocol ? : sk->num);
904 if (dev)
905 dev_put(dev);
907 out:
908 return err;
913 * Create a packet of type SOCK_PACKET.
916 static int packet_create(struct socket *sock, int protocol)
918 struct sock *sk;
919 int err;
921 if (!capable(CAP_NET_RAW))
922 return -EPERM;
923 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
924 #ifdef CONFIG_SOCK_PACKET
925 && sock->type != SOCK_PACKET
926 #endif
928 return -ESOCKTNOSUPPORT;
930 sock->state = SS_UNCONNECTED;
931 MOD_INC_USE_COUNT;
933 err = -ENOBUFS;
934 sk = sk_alloc(PF_PACKET, GFP_KERNEL, 1);
935 if (sk == NULL)
936 goto out;
938 sock->ops = &packet_ops;
939 #ifdef CONFIG_SOCK_PACKET
940 if (sock->type == SOCK_PACKET)
941 sock->ops = &packet_ops_spkt;
942 #endif
943 sock_init_data(sock,sk);
945 sk->protinfo.af_packet = kmalloc(sizeof(struct packet_opt), GFP_KERNEL);
946 if (sk->protinfo.af_packet == NULL)
947 goto out_free;
948 memset(sk->protinfo.af_packet, 0, sizeof(struct packet_opt));
949 sk->family = PF_PACKET;
950 sk->num = protocol;
952 sk->destruct = packet_sock_destruct;
953 atomic_inc(&packet_socks_nr);
956 * Attach a protocol block
959 spin_lock_init(&sk->protinfo.af_packet->bind_lock);
960 sk->protinfo.af_packet->prot_hook.func = packet_rcv;
961 #ifdef CONFIG_SOCK_PACKET
962 if (sock->type == SOCK_PACKET)
963 sk->protinfo.af_packet->prot_hook.func = packet_rcv_spkt;
964 #endif
965 sk->protinfo.af_packet->prot_hook.data = (void *)sk;
967 if (protocol) {
968 sk->protinfo.af_packet->prot_hook.type = protocol;
969 dev_add_pack(&sk->protinfo.af_packet->prot_hook);
970 sock_hold(sk);
971 sk->protinfo.af_packet->running = 1;
974 write_lock_bh(&packet_sklist_lock);
975 sk->next = packet_sklist;
976 packet_sklist = sk;
977 sock_hold(sk);
978 write_unlock_bh(&packet_sklist_lock);
979 return(0);
981 out_free:
982 sk_free(sk);
983 out:
984 MOD_DEC_USE_COUNT;
985 return err;
989 * Pull a packet from our receive queue and hand it to the user.
990 * If necessary we block.
993 static int packet_recvmsg(struct socket *sock, struct msghdr *msg, int len,
994 int flags, struct scm_cookie *scm)
996 struct sock *sk = sock->sk;
997 struct sk_buff *skb;
998 int copied, err;
1000 err = -EINVAL;
1001 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC))
1002 goto out;
1004 #if 0
1005 /* What error should we return now? EUNATTACH? */
1006 if (sk->protinfo.af_packet->ifindex < 0)
1007 return -ENODEV;
1008 #endif
1011 * If the address length field is there to be filled in, we fill
1012 * it in now.
1015 if (sock->type == SOCK_PACKET)
1016 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1017 else
1018 msg->msg_namelen = sizeof(struct sockaddr_ll);
1021 * Call the generic datagram receiver. This handles all sorts
1022 * of horrible races and re-entrancy so we can forget about it
1023 * in the protocol layers.
1025 * Now it will return ENETDOWN, if device have just gone down,
1026 * but then it will block.
1029 skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1032 * An error occurred so return it. Because skb_recv_datagram()
1033 * handles the blocking we don't see and worry about blocking
1034 * retries.
1037 if(skb==NULL)
1038 goto out;
1041 * You lose any data beyond the buffer you gave. If it worries a
1042 * user program they can ask the device for its MTU anyway.
1045 copied = skb->len;
1046 if (copied > len)
1048 copied=len;
1049 msg->msg_flags|=MSG_TRUNC;
1052 /* We can't use skb_copy_datagram here */
1053 err = memcpy_toiovec(msg->msg_iov, skb->data, copied);
1054 if (err)
1055 goto out_free;
1056 sk->stamp=skb->stamp;
1058 if (msg->msg_name)
1059 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
1062 * Free or return the buffer as appropriate. Again this
1063 * hides all the races and re-entrancy issues from us.
1065 err = (flags&MSG_TRUNC) ? skb->len : copied;
1067 out_free:
1068 skb_free_datagram(sk, skb);
1069 out:
1070 return err;
1073 #ifdef CONFIG_SOCK_PACKET
1074 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1075 int *uaddr_len, int peer)
1077 struct net_device *dev;
1078 struct sock *sk = sock->sk;
1080 if (peer)
1081 return -EOPNOTSUPP;
1083 uaddr->sa_family = AF_PACKET;
1084 dev = dev_get_by_index(sk->protinfo.af_packet->ifindex);
1085 if (dev) {
1086 strncpy(uaddr->sa_data, dev->name, 15);
1087 dev_put(dev);
1088 } else
1089 memset(uaddr->sa_data, 0, 14);
1090 *uaddr_len = sizeof(*uaddr);
1092 return 0;
1094 #endif
1096 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1097 int *uaddr_len, int peer)
1099 struct net_device *dev;
1100 struct sock *sk = sock->sk;
1101 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1103 if (peer)
1104 return -EOPNOTSUPP;
1106 sll->sll_family = AF_PACKET;
1107 sll->sll_ifindex = sk->protinfo.af_packet->ifindex;
1108 sll->sll_protocol = sk->num;
1109 dev = dev_get_by_index(sk->protinfo.af_packet->ifindex);
1110 if (dev) {
1111 sll->sll_hatype = dev->type;
1112 sll->sll_halen = dev->addr_len;
1113 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1114 dev_put(dev);
1115 } else {
1116 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1117 sll->sll_halen = 0;
1119 *uaddr_len = sizeof(*sll);
1121 return 0;
1124 #ifdef CONFIG_PACKET_MULTICAST
1125 static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1127 switch (i->type) {
1128 case PACKET_MR_MULTICAST:
1129 if (what > 0)
1130 dev_mc_add(dev, i->addr, i->alen, 0);
1131 else
1132 dev_mc_delete(dev, i->addr, i->alen, 0);
1133 break;
1134 case PACKET_MR_PROMISC:
1135 dev_set_promiscuity(dev, what);
1136 break;
1137 case PACKET_MR_ALLMULTI:
1138 dev_set_allmulti(dev, what);
1139 break;
1140 default:
1144 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1146 for ( ; i; i=i->next) {
1147 if (i->ifindex == dev->ifindex)
1148 packet_dev_mc(dev, i, what);
1152 static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq)
1154 struct packet_mclist *ml, *i;
1155 struct net_device *dev;
1156 int err;
1158 rtnl_lock();
1160 err = -ENODEV;
1161 dev = __dev_get_by_index(mreq->mr_ifindex);
1162 if (!dev)
1163 goto done;
1165 err = -EINVAL;
1166 if (mreq->mr_alen > dev->addr_len)
1167 goto done;
1169 err = -ENOBUFS;
1170 i = (struct packet_mclist *)kmalloc(sizeof(*i), GFP_KERNEL);
1171 if (i == NULL)
1172 goto done;
1174 err = 0;
1175 for (ml=sk->protinfo.af_packet->mclist; ml; ml=ml->next) {
1176 if (ml->ifindex == mreq->mr_ifindex &&
1177 ml->type == mreq->mr_type &&
1178 ml->alen == mreq->mr_alen &&
1179 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1180 ml->count++;
1181 /* Free the new element ... */
1182 kfree(i);
1183 goto done;
1187 i->type = mreq->mr_type;
1188 i->ifindex = mreq->mr_ifindex;
1189 i->alen = mreq->mr_alen;
1190 memcpy(i->addr, mreq->mr_address, i->alen);
1191 i->count = 1;
1192 i->next = sk->protinfo.af_packet->mclist;
1193 sk->protinfo.af_packet->mclist = i;
1194 packet_dev_mc(dev, i, +1);
1196 done:
1197 rtnl_unlock();
1198 return err;
1201 static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq)
1203 struct packet_mclist *ml, **mlp;
1205 rtnl_lock();
1207 for (mlp=&sk->protinfo.af_packet->mclist; (ml=*mlp)!=NULL; mlp=&ml->next) {
1208 if (ml->ifindex == mreq->mr_ifindex &&
1209 ml->type == mreq->mr_type &&
1210 ml->alen == mreq->mr_alen &&
1211 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1212 if (--ml->count == 0) {
1213 struct net_device *dev;
1214 *mlp = ml->next;
1215 dev = dev_get_by_index(ml->ifindex);
1216 if (dev) {
1217 packet_dev_mc(dev, ml, -1);
1218 dev_put(dev);
1220 kfree_s(ml, sizeof(*ml));
1222 rtnl_unlock();
1223 return 0;
1226 rtnl_unlock();
1227 return -EADDRNOTAVAIL;
1230 static void packet_flush_mclist(struct sock *sk)
1232 struct packet_mclist *ml;
1234 if (sk->protinfo.af_packet->mclist == NULL)
1235 return;
1237 rtnl_lock();
1238 while ((ml=sk->protinfo.af_packet->mclist) != NULL) {
1239 struct net_device *dev;
1240 sk->protinfo.af_packet->mclist = ml->next;
1241 if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1242 packet_dev_mc(dev, ml, -1);
1243 dev_put(dev);
1245 kfree_s(ml, sizeof(*ml));
1247 rtnl_unlock();
1249 #endif
1251 static int
1252 packet_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen)
1254 struct sock *sk = sock->sk;
1255 int ret;
1257 if (level != SOL_PACKET)
1258 return -ENOPROTOOPT;
1260 switch(optname) {
1261 #ifdef CONFIG_PACKET_MULTICAST
1262 case PACKET_ADD_MEMBERSHIP:
1263 case PACKET_DROP_MEMBERSHIP:
1265 struct packet_mreq mreq;
1266 if (optlen<sizeof(mreq))
1267 return -EINVAL;
1268 if (copy_from_user(&mreq,optval,sizeof(mreq)))
1269 return -EFAULT;
1270 if (optname == PACKET_ADD_MEMBERSHIP)
1271 ret = packet_mc_add(sk, &mreq);
1272 else
1273 ret = packet_mc_drop(sk, &mreq);
1274 return ret;
1276 #endif
1277 #ifdef CONFIG_PACKET_MMAP
1278 case PACKET_RX_RING:
1280 struct tpacket_req req;
1282 if (optlen<sizeof(req))
1283 return -EINVAL;
1284 if (copy_from_user(&req,optval,sizeof(req)))
1285 return -EFAULT;
1286 return packet_set_ring(sk, &req, 0);
1288 #endif
1289 default:
1290 return -ENOPROTOOPT;
1294 int packet_getsockopt(struct socket *sock, int level, int optname,
1295 char *optval, int *optlen)
1297 int len;
1298 struct sock *sk = sock->sk;
1300 if (level != SOL_PACKET)
1301 return -ENOPROTOOPT;
1303 if (get_user(len,optlen))
1304 return -EFAULT;
1306 switch(optname) {
1307 case PACKET_STATISTICS:
1309 struct tpacket_stats st;
1311 if (len > sizeof(struct tpacket_stats))
1312 len = sizeof(struct tpacket_stats);
1313 spin_lock_bh(&sk->receive_queue.lock);
1314 st = sk->protinfo.af_packet->stats;
1315 memset(&sk->protinfo.af_packet->stats, 0, sizeof(st));
1316 spin_unlock_bh(&sk->receive_queue.lock);
1317 st.tp_packets += st.tp_drops;
1319 if (copy_to_user(optval, &st, len))
1320 return -EFAULT;
1321 break;
1323 default:
1324 return -ENOPROTOOPT;
1327 if (put_user(len, optlen))
1328 return -EFAULT;
1329 return 0;
1333 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1335 struct sock *sk;
1336 struct packet_opt *po;
1337 struct net_device *dev = (struct net_device*)data;
1339 read_lock(&packet_sklist_lock);
1340 for (sk = packet_sklist; sk; sk = sk->next) {
1341 po = sk->protinfo.af_packet;
1343 switch (msg) {
1344 case NETDEV_DOWN:
1345 case NETDEV_UNREGISTER:
1346 if (dev->ifindex == po->ifindex) {
1347 spin_lock(&po->bind_lock);
1348 if (po->running) {
1349 dev_remove_pack(&po->prot_hook);
1350 __sock_put(sk);
1351 po->running = 0;
1352 sk->err = ENETDOWN;
1353 if (!sk->dead)
1354 sk->error_report(sk);
1356 if (msg == NETDEV_UNREGISTER) {
1357 po->ifindex = -1;
1358 po->prot_hook.dev = NULL;
1360 spin_unlock(&po->bind_lock);
1362 #ifdef CONFIG_PACKET_MULTICAST
1363 if (po->mclist)
1364 packet_dev_mclist(dev, po->mclist, -1);
1365 #endif
1366 break;
1367 case NETDEV_UP:
1368 spin_lock(&po->bind_lock);
1369 if (dev->ifindex == po->ifindex && sk->num && po->running==0) {
1370 dev_add_pack(&po->prot_hook);
1371 sock_hold(sk);
1372 po->running = 1;
1374 spin_unlock(&po->bind_lock);
1375 #ifdef CONFIG_PACKET_MULTICAST
1376 if (po->mclist)
1377 packet_dev_mclist(dev, po->mclist, +1);
1378 #endif
1379 break;
1382 read_unlock(&packet_sklist_lock);
1383 return NOTIFY_DONE;
1387 static int packet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1389 struct sock *sk = sock->sk;
1390 int err;
1391 int pid;
1393 switch(cmd)
1395 case FIOSETOWN:
1396 case SIOCSPGRP:
1397 err = get_user(pid, (int *) arg);
1398 if (err)
1399 return err;
1400 if (current->pid != pid && current->pgrp != -pid &&
1401 !capable(CAP_NET_ADMIN))
1402 return -EPERM;
1403 sk->proc = pid;
1404 return(0);
1405 case FIOGETOWN:
1406 case SIOCGPGRP:
1407 return put_user(sk->proc, (int *)arg);
1408 case SIOCGSTAMP:
1409 if(sk->stamp.tv_sec==0)
1410 return -ENOENT;
1411 err = -EFAULT;
1412 if (!copy_to_user((void *)arg, &sk->stamp, sizeof(struct timeval)))
1413 err = 0;
1414 return err;
1415 case SIOCGIFFLAGS:
1416 #ifndef CONFIG_INET
1417 case SIOCSIFFLAGS:
1418 #endif
1419 case SIOCGIFCONF:
1420 case SIOCGIFMETRIC:
1421 case SIOCSIFMETRIC:
1422 case SIOCGIFMEM:
1423 case SIOCSIFMEM:
1424 case SIOCGIFMTU:
1425 case SIOCSIFMTU:
1426 case SIOCSIFLINK:
1427 case SIOCGIFHWADDR:
1428 case SIOCSIFHWADDR:
1429 case SIOCSIFMAP:
1430 case SIOCGIFMAP:
1431 case SIOCSIFSLAVE:
1432 case SIOCGIFSLAVE:
1433 case SIOCGIFINDEX:
1434 case SIOCGIFNAME:
1435 case SIOCGIFCOUNT:
1436 case SIOCSIFHWBROADCAST:
1437 return(dev_ioctl(cmd,(void *) arg));
1439 case SIOCGIFBR:
1440 case SIOCSIFBR:
1441 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
1442 #ifdef CONFIG_INET
1443 #ifdef CONFIG_KMOD
1444 if (br_ioctl_hook == NULL)
1445 request_module("bridge");
1446 #endif
1447 if (br_ioctl_hook != NULL)
1448 return br_ioctl_hook(arg);
1449 #endif
1450 #endif
1452 return -ENOPKG;
1454 #ifdef CONFIG_INET
1455 case SIOCADDRT:
1456 case SIOCDELRT:
1457 case SIOCDARP:
1458 case SIOCGARP:
1459 case SIOCSARP:
1460 case SIOCGIFADDR:
1461 case SIOCSIFADDR:
1462 case SIOCGIFBRDADDR:
1463 case SIOCSIFBRDADDR:
1464 case SIOCGIFNETMASK:
1465 case SIOCSIFNETMASK:
1466 case SIOCGIFDSTADDR:
1467 case SIOCSIFDSTADDR:
1468 case SIOCSIFFLAGS:
1469 case SIOCADDDLCI:
1470 case SIOCDELDLCI:
1471 return inet_dgram_ops.ioctl(sock, cmd, arg);
1472 #endif
1474 default:
1475 if ((cmd >= SIOCDEVPRIVATE) &&
1476 (cmd <= (SIOCDEVPRIVATE + 15)))
1477 return(dev_ioctl(cmd,(void *) arg));
1479 #ifdef CONFIG_NET_RADIO
1480 if((cmd >= SIOCIWFIRST) && (cmd <= SIOCIWLAST))
1481 return(dev_ioctl(cmd,(void *) arg));
1482 #endif
1483 return -EOPNOTSUPP;
1485 /*NOTREACHED*/
1486 return(0);
1489 #ifndef CONFIG_PACKET_MMAP
1490 #define packet_mmap sock_no_mmap
1491 #define packet_poll datagram_poll
1492 #else
1494 unsigned int packet_poll(struct file * file, struct socket *sock, poll_table *wait)
1496 struct sock *sk = sock->sk;
1497 struct packet_opt *po = sk->protinfo.af_packet;
1498 unsigned int mask = datagram_poll(file, sock, wait);
1500 spin_lock_bh(&sk->receive_queue.lock);
1501 if (po->iovec) {
1502 unsigned last = po->head ? po->head-1 : po->iovmax;
1504 if (po->iovec[last]->tp_status)
1505 mask |= POLLIN | POLLRDNORM;
1507 spin_unlock_bh(&sk->receive_queue.lock);
1508 return mask;
1512 /* Dirty? Well, I still did not learn better way to account
1513 * for user mmaps.
1516 static void packet_mm_open(struct vm_area_struct *vma)
1518 struct file *file = vma->vm_file;
1519 struct inode *inode = file->f_dentry->d_inode;
1520 struct socket * sock = &inode->u.socket_i;
1521 struct sock *sk = sock->sk;
1523 if (sk)
1524 atomic_inc(&sk->protinfo.af_packet->mapped);
1527 static void packet_mm_close(struct vm_area_struct *vma)
1529 struct file *file = vma->vm_file;
1530 struct inode *inode = file->f_dentry->d_inode;
1531 struct socket * sock = &inode->u.socket_i;
1532 struct sock *sk = sock->sk;
1534 if (sk)
1535 atomic_dec(&sk->protinfo.af_packet->mapped);
1538 static struct vm_operations_struct packet_mmap_ops = {
1539 open: packet_mm_open,
1540 close: packet_mm_close,
1543 static void free_pg_vec(unsigned long *pg_vec, unsigned order, unsigned len)
1545 int i;
1547 for (i=0; i<len; i++) {
1548 if (pg_vec[i]) {
1549 unsigned long map, mapend;
1551 mapend = MAP_NR(pg_vec[i] + (PAGE_SIZE << order) - 1);
1552 for (map = MAP_NR(pg_vec[i]); map <= mapend; map++)
1553 clear_bit(PG_reserved, &mem_map[map].flags);
1554 free_pages(pg_vec[i], order);
1557 kfree(pg_vec);
1561 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1563 unsigned long *pg_vec = NULL;
1564 struct tpacket_hdr **io_vec = NULL;
1565 struct packet_opt *po = sk->protinfo.af_packet;
1566 int order = 0;
1567 int err = 0;
1569 if (req->tp_block_nr) {
1570 int i, l;
1571 int frames_per_block;
1573 /* Sanity tests and some calculations */
1574 if ((int)req->tp_block_size <= 0)
1575 return -EINVAL;
1576 if (req->tp_block_size&(PAGE_SIZE-1))
1577 return -EINVAL;
1578 if (req->tp_frame_size < TPACKET_HDRLEN)
1579 return -EINVAL;
1580 if (req->tp_frame_size&(TPACKET_ALIGNMENT-1))
1581 return -EINVAL;
1582 frames_per_block = req->tp_block_size/req->tp_frame_size;
1583 if (frames_per_block <= 0)
1584 return -EINVAL;
1585 if (frames_per_block*req->tp_block_nr != req->tp_frame_nr)
1586 return -EINVAL;
1587 /* OK! */
1589 /* Allocate page vector */
1590 while ((PAGE_SIZE<<order) < req->tp_block_size)
1591 order++;
1593 err = -ENOMEM;
1595 pg_vec = kmalloc(req->tp_block_nr*sizeof(unsigned long*), GFP_KERNEL);
1596 if (pg_vec == NULL)
1597 goto out;
1598 memset(pg_vec, 0, req->tp_block_nr*sizeof(unsigned long*));
1600 for (i=0; i<req->tp_block_nr; i++) {
1601 unsigned long map, mapend;
1602 pg_vec[i] = __get_free_pages(GFP_KERNEL, order);
1603 if (!pg_vec[i])
1604 goto out_free_pgvec;
1606 mapend = MAP_NR(pg_vec[i] + (PAGE_SIZE << order) - 1);
1607 for (map = MAP_NR(pg_vec[i]); map <= mapend; map++)
1608 set_bit(PG_reserved, &mem_map[map].flags);
1610 /* Page vector is allocated */
1612 /* Draw frames */
1613 io_vec = kmalloc(req->tp_frame_nr*sizeof(struct tpacket_hdr*), GFP_KERNEL);
1614 if (io_vec == NULL)
1615 goto out_free_pgvec;
1616 memset(io_vec, 0, req->tp_frame_nr*sizeof(struct tpacket_hdr*));
1618 l = 0;
1619 for (i=0; i<req->tp_block_nr; i++) {
1620 unsigned long ptr = pg_vec[i];
1621 int k;
1623 for (k=0; k<frames_per_block; k++, l++) {
1624 io_vec[l] = (struct tpacket_hdr*)ptr;
1625 io_vec[l]->tp_status = TP_STATUS_KERNEL;
1626 ptr += req->tp_frame_size;
1629 /* Done */
1630 } else {
1631 if (req->tp_frame_nr)
1632 return -EINVAL;
1635 lock_sock(sk);
1637 /* Detach socket from network */
1638 spin_lock(&po->bind_lock);
1639 if (po->running)
1640 dev_remove_pack(&po->prot_hook);
1641 spin_unlock(&po->bind_lock);
1643 err = -EBUSY;
1644 if (closing || atomic_read(&po->mapped) == 0) {
1645 err = 0;
1646 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1648 spin_lock_bh(&sk->receive_queue.lock);
1649 pg_vec = XC(po->pg_vec, pg_vec);
1650 io_vec = XC(po->iovec, io_vec);
1651 po->iovmax = req->tp_frame_nr-1;
1652 po->head = 0;
1653 po->frame_size = req->tp_frame_size;
1654 spin_unlock_bh(&sk->receive_queue.lock);
1656 order = XC(po->pg_vec_order, order);
1657 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1659 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1660 po->prot_hook.func = po->iovec ? tpacket_rcv : packet_rcv;
1661 skb_queue_purge(&sk->receive_queue);
1662 #undef XC
1663 if (atomic_read(&po->mapped))
1664 printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1667 spin_lock(&po->bind_lock);
1668 if (po->running)
1669 dev_add_pack(&po->prot_hook);
1670 spin_unlock(&po->bind_lock);
1672 release_sock(sk);
1674 if (io_vec)
1675 kfree(io_vec);
1677 out_free_pgvec:
1678 if (pg_vec)
1679 free_pg_vec(pg_vec, order, req->tp_block_nr);
1680 out:
1681 return err;
1684 static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1686 struct sock *sk = sock->sk;
1687 struct packet_opt *po = sk->protinfo.af_packet;
1688 unsigned long size;
1689 unsigned long start;
1690 int err = -EINVAL;
1691 int i;
1693 if (vma->vm_pgoff)
1694 return -EINVAL;
1696 size = vma->vm_end - vma->vm_start;
1698 lock_sock(sk);
1699 if (po->pg_vec == NULL)
1700 goto out;
1701 if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1702 goto out;
1704 atomic_inc(&po->mapped);
1705 start = vma->vm_start;
1706 err = -EAGAIN;
1707 for (i=0; i<po->pg_vec_len; i++) {
1708 if (remap_page_range(start, __pa(po->pg_vec[i]),
1709 po->pg_vec_pages*PAGE_SIZE,
1710 vma->vm_page_prot))
1711 goto out;
1712 start += po->pg_vec_pages*PAGE_SIZE;
1714 vma->vm_ops = &packet_mmap_ops;
1715 err = 0;
1717 out:
1718 release_sock(sk);
1719 return err;
1721 #endif
1724 #ifdef CONFIG_SOCK_PACKET
1725 struct proto_ops packet_ops_spkt = {
1726 PF_PACKET,
1728 packet_release,
1729 packet_bind_spkt,
1730 sock_no_connect,
1731 sock_no_socketpair,
1732 sock_no_accept,
1733 packet_getname_spkt,
1734 datagram_poll,
1735 packet_ioctl,
1736 sock_no_listen,
1737 sock_no_shutdown,
1738 sock_no_setsockopt,
1739 sock_no_getsockopt,
1740 sock_no_fcntl,
1741 packet_sendmsg_spkt,
1742 packet_recvmsg,
1743 sock_no_mmap
1745 #endif
1747 struct proto_ops packet_ops = {
1748 PF_PACKET,
1750 packet_release,
1751 packet_bind,
1752 sock_no_connect,
1753 sock_no_socketpair,
1754 sock_no_accept,
1755 packet_getname,
1756 packet_poll,
1757 packet_ioctl,
1758 sock_no_listen,
1759 sock_no_shutdown,
1760 packet_setsockopt,
1761 packet_getsockopt,
1762 sock_no_fcntl,
1763 packet_sendmsg,
1764 packet_recvmsg,
1765 packet_mmap,
1768 static struct net_proto_family packet_family_ops = {
1769 PF_PACKET,
1770 packet_create
1773 struct notifier_block packet_netdev_notifier={
1774 packet_notifier,
1775 NULL,
1779 #ifdef CONFIG_PROC_FS
1780 static int packet_read_proc(char *buffer, char **start, off_t offset,
1781 int length, int *eof, void *data)
1783 off_t pos=0;
1784 off_t begin=0;
1785 int len=0;
1786 struct sock *s;
1788 len+= sprintf(buffer,"sk RefCnt Type Proto Iface R Rmem User Inode\n");
1790 read_lock(&packet_sklist_lock);
1792 for (s = packet_sklist; s; s = s->next) {
1793 len+=sprintf(buffer+len,"%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu",
1795 atomic_read(&s->refcnt),
1796 s->type,
1797 ntohs(s->num),
1798 s->protinfo.af_packet->ifindex,
1799 s->protinfo.af_packet->running,
1800 atomic_read(&s->rmem_alloc),
1801 s->socket->inode->i_uid,
1802 s->socket->inode->i_ino
1805 buffer[len++]='\n';
1807 pos=begin+len;
1808 if(pos<offset) {
1809 len=0;
1810 begin=pos;
1812 if(pos>offset+length)
1813 goto done;
1815 *eof = 1;
1817 done:
1818 read_unlock(&packet_sklist_lock);
1819 *start=buffer+(offset-begin);
1820 len-=(offset-begin);
1821 if(len>length)
1822 len=length;
1823 if(len<0)
1824 len=0;
1825 return len;
1827 #endif
1830 #ifdef MODULE
1831 void cleanup_module(void)
1833 #ifdef CONFIG_PROC_FS
1834 remove_proc_entry("net/packet", 0);
1835 #endif
1836 unregister_netdevice_notifier(&packet_netdev_notifier);
1837 sock_unregister(PF_PACKET);
1838 return;
1842 int init_module(void)
1843 #else
1844 void __init packet_proto_init(struct net_proto *pro)
1845 #endif
1847 sock_register(&packet_family_ops);
1848 register_netdevice_notifier(&packet_netdev_notifier);
1849 #ifdef CONFIG_PROC_FS
1850 create_proc_read_entry("net/packet", 0, 0, packet_read_proc, NULL);
1851 #endif
1852 #ifdef MODULE
1853 return 0;
1854 #endif