2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * PACKET - implements raw packet sockets.
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
13 * Alan Cox : verify_area() now used correctly
14 * Alan Cox : new skbuff lists, look ma no backlogs!
15 * Alan Cox : tidied skbuff lists.
16 * Alan Cox : Now uses generic datagram routines I
17 * added. Also fixed the peek/read crash
18 * from all old Linux datagram code.
19 * Alan Cox : Uses the improved datagram code.
20 * Alan Cox : Added NULL's for socket options.
21 * Alan Cox : Re-commented the code.
22 * Alan Cox : Use new kernel side addressing
23 * Rob Janssen : Correct MTU usage.
24 * Dave Platt : Counter leaks caused by incorrect
25 * interrupt locking and some slightly
26 * dubious gcc output. Can you read
27 * compiler: it said _VOLATILE_
28 * Richard Kooijman : Timestamp fixes.
29 * Alan Cox : New buffers. Use sk->mac.raw.
30 * Alan Cox : sendmsg/recvmsg support.
31 * Alan Cox : Protocol setting support
32 * Alexey Kuznetsov : Untied from IPv4 stack.
33 * Cyrus Durgin : Fixed kerneld for kmod.
34 * Michal Ostrowski : Module initialization cleanup.
35 * Ulises Alonso : Frame number limit removal and
36 * packet_set_ring memory leak.
37 * Eric Biederman : Allow for > 8 byte hardware addresses.
38 * The convention is that longer addresses
39 * will simply extend the hardware address
40 * byte arrays at the end of sockaddr_ll
42 * Johann Baudy : Added TX RING.
44 * This program is free software; you can redistribute it and/or
45 * modify it under the terms of the GNU General Public License
46 * as published by the Free Software Foundation; either version
47 * 2 of the License, or (at your option) any later version.
51 #include <linux/types.h>
53 #include <linux/capability.h>
54 #include <linux/fcntl.h>
55 #include <linux/socket.h>
57 #include <linux/inet.h>
58 #include <linux/netdevice.h>
59 #include <linux/if_packet.h>
60 #include <linux/wireless.h>
61 #include <linux/kernel.h>
62 #include <linux/kmod.h>
63 #include <linux/slab.h>
64 #include <linux/vmalloc.h>
65 #include <net/net_namespace.h>
67 #include <net/protocol.h>
68 #include <linux/skbuff.h>
70 #include <linux/errno.h>
71 #include <linux/timer.h>
72 #include <asm/system.h>
73 #include <asm/uaccess.h>
74 #include <asm/ioctls.h>
76 #include <asm/cacheflush.h>
78 #include <linux/proc_fs.h>
79 #include <linux/seq_file.h>
80 #include <linux/poll.h>
81 #include <linux/module.h>
82 #include <linux/init.h>
83 #include <linux/mutex.h>
84 #include <linux/if_vlan.h>
85 #include <linux/virtio_net.h>
86 #include <linux/errqueue.h>
87 #include <linux/net_tstamp.h>
90 #include <net/inet_common.h>
95 - if device has no dev->hard_header routine, it adds and removes ll header
96 inside itself. In this case ll header is invisible outside of device,
97 but higher levels still should reserve dev->hard_header_len.
98 Some devices are enough clever to reallocate skb, when header
99 will not fit to reserved space (tunnel), another ones are silly
101 - packet socket receives packets with pulled ll header,
102 so that SOCK_RAW should push it back.
107 Incoming, dev->hard_header!=NULL
108 mac_header -> ll header
111 Outgoing, dev->hard_header!=NULL
112 mac_header -> ll header
115 Incoming, dev->hard_header==NULL
116 mac_header -> UNKNOWN position. It is very likely, that it points to ll
117 header. PPP makes it, that is wrong, because introduce
118 assymetry between rx and tx paths.
121 Outgoing, dev->hard_header==NULL
122 mac_header -> data. ll header is still not built!
126 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
132 dev->hard_header != NULL
133 mac_header -> ll header
136 dev->hard_header == NULL (ll header is added by device, we cannot control it)
140 We should set nh.raw on output to correct posistion,
141 packet classifier depends on it.
144 /* Private packet socket structures. */
146 struct packet_mclist
{
147 struct packet_mclist
*next
;
152 unsigned char addr
[MAX_ADDR_LEN
];
154 /* identical to struct packet_mreq except it has
155 * a longer address field.
157 struct packet_mreq_max
{
159 unsigned short mr_type
;
160 unsigned short mr_alen
;
161 unsigned char mr_address
[MAX_ADDR_LEN
];
164 static int packet_set_ring(struct sock
*sk
, struct tpacket_req
*req
,
165 int closing
, int tx_ring
);
171 struct packet_ring_buffer
{
174 unsigned int frames_per_block
;
175 unsigned int frame_size
;
176 unsigned int frame_max
;
178 unsigned int pg_vec_order
;
179 unsigned int pg_vec_pages
;
180 unsigned int pg_vec_len
;
186 static int tpacket_snd(struct packet_sock
*po
, struct msghdr
*msg
);
188 static void packet_flush_mclist(struct sock
*sk
);
191 /* struct sock has to be the first member of packet_sock */
193 struct tpacket_stats stats
;
194 struct packet_ring_buffer rx_ring
;
195 struct packet_ring_buffer tx_ring
;
197 spinlock_t bind_lock
;
198 struct mutex pg_vec_lock
;
199 unsigned int running
:1, /* prot_hook is attached*/
203 int ifindex
; /* bound device */
205 struct packet_mclist
*mclist
;
207 enum tpacket_versions tp_version
;
208 unsigned int tp_hdrlen
;
209 unsigned int tp_reserve
;
210 unsigned int tp_loss
:1;
211 unsigned int tp_tstamp
;
212 struct packet_type prot_hook ____cacheline_aligned_in_smp
;
215 struct packet_skb_cb
{
216 unsigned int origlen
;
218 struct sockaddr_pkt pkt
;
219 struct sockaddr_ll ll
;
223 #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
225 static inline __pure
struct page
*pgv_to_page(void *addr
)
227 if (is_vmalloc_addr(addr
))
228 return vmalloc_to_page(addr
);
229 return virt_to_page(addr
);
232 static void __packet_set_status(struct packet_sock
*po
, void *frame
, int status
)
235 struct tpacket_hdr
*h1
;
236 struct tpacket2_hdr
*h2
;
241 switch (po
->tp_version
) {
243 h
.h1
->tp_status
= status
;
244 flush_dcache_page(pgv_to_page(&h
.h1
->tp_status
));
247 h
.h2
->tp_status
= status
;
248 flush_dcache_page(pgv_to_page(&h
.h2
->tp_status
));
251 pr_err("TPACKET version not supported\n");
258 static int __packet_get_status(struct packet_sock
*po
, void *frame
)
261 struct tpacket_hdr
*h1
;
262 struct tpacket2_hdr
*h2
;
269 switch (po
->tp_version
) {
271 flush_dcache_page(pgv_to_page(&h
.h1
->tp_status
));
272 return h
.h1
->tp_status
;
274 flush_dcache_page(pgv_to_page(&h
.h2
->tp_status
));
275 return h
.h2
->tp_status
;
277 pr_err("TPACKET version not supported\n");
283 static void *packet_lookup_frame(struct packet_sock
*po
,
284 struct packet_ring_buffer
*rb
,
285 unsigned int position
,
288 unsigned int pg_vec_pos
, frame_offset
;
290 struct tpacket_hdr
*h1
;
291 struct tpacket2_hdr
*h2
;
295 pg_vec_pos
= position
/ rb
->frames_per_block
;
296 frame_offset
= position
% rb
->frames_per_block
;
298 h
.raw
= rb
->pg_vec
[pg_vec_pos
].buffer
+
299 (frame_offset
* rb
->frame_size
);
301 if (status
!= __packet_get_status(po
, h
.raw
))
307 static inline void *packet_current_frame(struct packet_sock
*po
,
308 struct packet_ring_buffer
*rb
,
311 return packet_lookup_frame(po
, rb
, rb
->head
, status
);
314 static inline void *packet_previous_frame(struct packet_sock
*po
,
315 struct packet_ring_buffer
*rb
,
318 unsigned int previous
= rb
->head
? rb
->head
- 1 : rb
->frame_max
;
319 return packet_lookup_frame(po
, rb
, previous
, status
);
322 static inline void packet_increment_head(struct packet_ring_buffer
*buff
)
324 buff
->head
= buff
->head
!= buff
->frame_max
? buff
->head
+1 : 0;
327 static inline struct packet_sock
*pkt_sk(struct sock
*sk
)
329 return (struct packet_sock
*)sk
;
332 static void packet_sock_destruct(struct sock
*sk
)
334 skb_queue_purge(&sk
->sk_error_queue
);
336 WARN_ON(atomic_read(&sk
->sk_rmem_alloc
));
337 WARN_ON(atomic_read(&sk
->sk_wmem_alloc
));
339 if (!sock_flag(sk
, SOCK_DEAD
)) {
340 pr_err("Attempt to release alive packet socket: %p\n", sk
);
344 sk_refcnt_debug_dec(sk
);
348 static const struct proto_ops packet_ops
;
350 static const struct proto_ops packet_ops_spkt
;
352 static int packet_rcv_spkt(struct sk_buff
*skb
, struct net_device
*dev
,
353 struct packet_type
*pt
, struct net_device
*orig_dev
)
356 struct sockaddr_pkt
*spkt
;
359 * When we registered the protocol we saved the socket in the data
360 * field for just this event.
363 sk
= pt
->af_packet_priv
;
366 * Yank back the headers [hope the device set this
367 * right or kerboom...]
369 * Incoming packets have ll header pulled,
372 * For outgoing ones skb->data == skb_mac_header(skb)
373 * so that this procedure is noop.
376 if (skb
->pkt_type
== PACKET_LOOPBACK
)
379 if (!net_eq(dev_net(dev
), sock_net(sk
)))
382 skb
= skb_share_check(skb
, GFP_ATOMIC
);
386 /* drop any routing info */
389 /* drop conntrack reference */
392 spkt
= &PACKET_SKB_CB(skb
)->sa
.pkt
;
394 skb_push(skb
, skb
->data
- skb_mac_header(skb
));
397 * The SOCK_PACKET socket receives _all_ frames.
400 spkt
->spkt_family
= dev
->type
;
401 strlcpy(spkt
->spkt_device
, dev
->name
, sizeof(spkt
->spkt_device
));
402 spkt
->spkt_protocol
= skb
->protocol
;
405 * Charge the memory to the socket. This is done specifically
406 * to prevent sockets using all the memory up.
409 if (sock_queue_rcv_skb(sk
, skb
) == 0)
420 * Output a raw packet to a device layer. This bypasses all the other
421 * protocol layers and you must therefore supply it with a complete frame
424 static int packet_sendmsg_spkt(struct kiocb
*iocb
, struct socket
*sock
,
425 struct msghdr
*msg
, size_t len
)
427 struct sock
*sk
= sock
->sk
;
428 struct sockaddr_pkt
*saddr
= (struct sockaddr_pkt
*)msg
->msg_name
;
429 struct sk_buff
*skb
= NULL
;
430 struct net_device
*dev
;
435 * Get and verify the address.
439 if (msg
->msg_namelen
< sizeof(struct sockaddr
))
441 if (msg
->msg_namelen
== sizeof(struct sockaddr_pkt
))
442 proto
= saddr
->spkt_protocol
;
444 return -ENOTCONN
; /* SOCK_PACKET must be sent giving an address */
447 * Find the device first to size check it
450 saddr
->spkt_device
[13] = 0;
453 dev
= dev_get_by_name_rcu(sock_net(sk
), saddr
->spkt_device
);
459 if (!(dev
->flags
& IFF_UP
))
463 * You may not queue a frame bigger than the mtu. This is the lowest level
464 * raw protocol and you must do your own fragmentation at this level.
468 if (len
> dev
->mtu
+ dev
->hard_header_len
+ VLAN_HLEN
)
472 size_t reserved
= LL_RESERVED_SPACE(dev
);
473 unsigned int hhlen
= dev
->header_ops
? dev
->hard_header_len
: 0;
476 skb
= sock_wmalloc(sk
, len
+ reserved
, 0, GFP_KERNEL
);
479 /* FIXME: Save some space for broken drivers that write a hard
480 * header at transmission time by themselves. PPP is the notable
481 * one here. This should really be fixed at the driver level.
483 skb_reserve(skb
, reserved
);
484 skb_reset_network_header(skb
);
486 /* Try to align data part correctly */
491 skb_reset_network_header(skb
);
493 err
= memcpy_fromiovec(skb_put(skb
, len
), msg
->msg_iov
, len
);
499 if (len
> (dev
->mtu
+ dev
->hard_header_len
)) {
500 /* Earlier code assumed this would be a VLAN pkt,
501 * double-check this now that we have the actual
505 skb_reset_mac_header(skb
);
507 if (ehdr
->h_proto
!= htons(ETH_P_8021Q
)) {
513 skb
->protocol
= proto
;
515 skb
->priority
= sk
->sk_priority
;
516 skb
->mark
= sk
->sk_mark
;
517 err
= sock_tx_timestamp(sk
, &skb_shinfo(skb
)->tx_flags
);
532 static inline unsigned int run_filter(const struct sk_buff
*skb
,
533 const struct sock
*sk
,
536 struct sk_filter
*filter
;
539 filter
= rcu_dereference(sk
->sk_filter
);
541 res
= sk_run_filter(skb
, filter
->insns
);
548 * This function makes lazy skb cloning in hope that most of packets
549 * are discarded by BPF.
551 * Note tricky part: we DO mangle shared skb! skb->data, skb->len
552 * and skb->cb are mangled. It works because (and until) packets
553 * falling here are owned by current CPU. Output packets are cloned
554 * by dev_queue_xmit_nit(), input packets are processed by net_bh
555 * sequencially, so that if we return skb to original state on exit,
556 * we will not harm anyone.
559 static int packet_rcv(struct sk_buff
*skb
, struct net_device
*dev
,
560 struct packet_type
*pt
, struct net_device
*orig_dev
)
563 struct sockaddr_ll
*sll
;
564 struct packet_sock
*po
;
565 u8
*skb_head
= skb
->data
;
566 int skb_len
= skb
->len
;
567 unsigned int snaplen
, res
;
569 if (skb
->pkt_type
== PACKET_LOOPBACK
)
572 sk
= pt
->af_packet_priv
;
575 if (!net_eq(dev_net(dev
), sock_net(sk
)))
580 if (dev
->header_ops
) {
581 /* The device has an explicit notion of ll header,
582 * exported to higher levels.
584 * Otherwise, the device hides details of its frame
585 * structure, so that corresponding packet head is
586 * never delivered to user.
588 if (sk
->sk_type
!= SOCK_DGRAM
)
589 skb_push(skb
, skb
->data
- skb_mac_header(skb
));
590 else if (skb
->pkt_type
== PACKET_OUTGOING
) {
591 /* Special case: outgoing packets have ll header at head */
592 skb_pull(skb
, skb_network_offset(skb
));
598 res
= run_filter(skb
, sk
, snaplen
);
604 if (atomic_read(&sk
->sk_rmem_alloc
) + skb
->truesize
>=
605 (unsigned)sk
->sk_rcvbuf
)
608 if (skb_shared(skb
)) {
609 struct sk_buff
*nskb
= skb_clone(skb
, GFP_ATOMIC
);
613 if (skb_head
!= skb
->data
) {
614 skb
->data
= skb_head
;
621 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb
)) + MAX_ADDR_LEN
- 8 >
624 sll
= &PACKET_SKB_CB(skb
)->sa
.ll
;
625 sll
->sll_family
= AF_PACKET
;
626 sll
->sll_hatype
= dev
->type
;
627 sll
->sll_protocol
= skb
->protocol
;
628 sll
->sll_pkttype
= skb
->pkt_type
;
629 if (unlikely(po
->origdev
))
630 sll
->sll_ifindex
= orig_dev
->ifindex
;
632 sll
->sll_ifindex
= dev
->ifindex
;
634 sll
->sll_halen
= dev_parse_header(skb
, sll
->sll_addr
);
636 PACKET_SKB_CB(skb
)->origlen
= skb
->len
;
638 if (pskb_trim(skb
, snaplen
))
641 skb_set_owner_r(skb
, sk
);
645 /* drop conntrack reference */
648 spin_lock(&sk
->sk_receive_queue
.lock
);
649 po
->stats
.tp_packets
++;
650 skb
->dropcount
= atomic_read(&sk
->sk_drops
);
651 __skb_queue_tail(&sk
->sk_receive_queue
, skb
);
652 spin_unlock(&sk
->sk_receive_queue
.lock
);
653 sk
->sk_data_ready(sk
, skb
->len
);
657 po
->stats
.tp_drops
= atomic_inc_return(&sk
->sk_drops
);
660 if (skb_head
!= skb
->data
&& skb_shared(skb
)) {
661 skb
->data
= skb_head
;
669 static int tpacket_rcv(struct sk_buff
*skb
, struct net_device
*dev
,
670 struct packet_type
*pt
, struct net_device
*orig_dev
)
673 struct packet_sock
*po
;
674 struct sockaddr_ll
*sll
;
676 struct tpacket_hdr
*h1
;
677 struct tpacket2_hdr
*h2
;
680 u8
*skb_head
= skb
->data
;
681 int skb_len
= skb
->len
;
682 unsigned int snaplen
, res
;
683 unsigned long status
= TP_STATUS_LOSING
|TP_STATUS_USER
;
684 unsigned short macoff
, netoff
, hdrlen
;
685 struct sk_buff
*copy_skb
= NULL
;
688 struct skb_shared_hwtstamps
*shhwtstamps
= skb_hwtstamps(skb
);
690 if (skb
->pkt_type
== PACKET_LOOPBACK
)
693 sk
= pt
->af_packet_priv
;
696 if (!net_eq(dev_net(dev
), sock_net(sk
)))
699 if (dev
->header_ops
) {
700 if (sk
->sk_type
!= SOCK_DGRAM
)
701 skb_push(skb
, skb
->data
- skb_mac_header(skb
));
702 else if (skb
->pkt_type
== PACKET_OUTGOING
) {
703 /* Special case: outgoing packets have ll header at head */
704 skb_pull(skb
, skb_network_offset(skb
));
708 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
709 status
|= TP_STATUS_CSUMNOTREADY
;
713 res
= run_filter(skb
, sk
, snaplen
);
719 if (sk
->sk_type
== SOCK_DGRAM
) {
720 macoff
= netoff
= TPACKET_ALIGN(po
->tp_hdrlen
) + 16 +
723 unsigned maclen
= skb_network_offset(skb
);
724 netoff
= TPACKET_ALIGN(po
->tp_hdrlen
+
725 (maclen
< 16 ? 16 : maclen
)) +
727 macoff
= netoff
- maclen
;
730 if (macoff
+ snaplen
> po
->rx_ring
.frame_size
) {
731 if (po
->copy_thresh
&&
732 atomic_read(&sk
->sk_rmem_alloc
) + skb
->truesize
<
733 (unsigned)sk
->sk_rcvbuf
) {
734 if (skb_shared(skb
)) {
735 copy_skb
= skb_clone(skb
, GFP_ATOMIC
);
737 copy_skb
= skb_get(skb
);
738 skb_head
= skb
->data
;
741 skb_set_owner_r(copy_skb
, sk
);
743 snaplen
= po
->rx_ring
.frame_size
- macoff
;
744 if ((int)snaplen
< 0)
748 spin_lock(&sk
->sk_receive_queue
.lock
);
749 h
.raw
= packet_current_frame(po
, &po
->rx_ring
, TP_STATUS_KERNEL
);
752 packet_increment_head(&po
->rx_ring
);
753 po
->stats
.tp_packets
++;
755 status
|= TP_STATUS_COPY
;
756 __skb_queue_tail(&sk
->sk_receive_queue
, copy_skb
);
758 if (!po
->stats
.tp_drops
)
759 status
&= ~TP_STATUS_LOSING
;
760 spin_unlock(&sk
->sk_receive_queue
.lock
);
762 skb_copy_bits(skb
, 0, h
.raw
+ macoff
, snaplen
);
764 switch (po
->tp_version
) {
766 h
.h1
->tp_len
= skb
->len
;
767 h
.h1
->tp_snaplen
= snaplen
;
768 h
.h1
->tp_mac
= macoff
;
769 h
.h1
->tp_net
= netoff
;
770 if ((po
->tp_tstamp
& SOF_TIMESTAMPING_SYS_HARDWARE
)
771 && shhwtstamps
->syststamp
.tv64
)
772 tv
= ktime_to_timeval(shhwtstamps
->syststamp
);
773 else if ((po
->tp_tstamp
& SOF_TIMESTAMPING_RAW_HARDWARE
)
774 && shhwtstamps
->hwtstamp
.tv64
)
775 tv
= ktime_to_timeval(shhwtstamps
->hwtstamp
);
776 else if (skb
->tstamp
.tv64
)
777 tv
= ktime_to_timeval(skb
->tstamp
);
779 do_gettimeofday(&tv
);
780 h
.h1
->tp_sec
= tv
.tv_sec
;
781 h
.h1
->tp_usec
= tv
.tv_usec
;
782 hdrlen
= sizeof(*h
.h1
);
785 h
.h2
->tp_len
= skb
->len
;
786 h
.h2
->tp_snaplen
= snaplen
;
787 h
.h2
->tp_mac
= macoff
;
788 h
.h2
->tp_net
= netoff
;
789 if ((po
->tp_tstamp
& SOF_TIMESTAMPING_SYS_HARDWARE
)
790 && shhwtstamps
->syststamp
.tv64
)
791 ts
= ktime_to_timespec(shhwtstamps
->syststamp
);
792 else if ((po
->tp_tstamp
& SOF_TIMESTAMPING_RAW_HARDWARE
)
793 && shhwtstamps
->hwtstamp
.tv64
)
794 ts
= ktime_to_timespec(shhwtstamps
->hwtstamp
);
795 else if (skb
->tstamp
.tv64
)
796 ts
= ktime_to_timespec(skb
->tstamp
);
799 h
.h2
->tp_sec
= ts
.tv_sec
;
800 h
.h2
->tp_nsec
= ts
.tv_nsec
;
801 h
.h2
->tp_vlan_tci
= vlan_tx_tag_get(skb
);
802 h
.h2
->tp_padding
= 0;
803 hdrlen
= sizeof(*h
.h2
);
809 sll
= h
.raw
+ TPACKET_ALIGN(hdrlen
);
810 sll
->sll_halen
= dev_parse_header(skb
, sll
->sll_addr
);
811 sll
->sll_family
= AF_PACKET
;
812 sll
->sll_hatype
= dev
->type
;
813 sll
->sll_protocol
= skb
->protocol
;
814 sll
->sll_pkttype
= skb
->pkt_type
;
815 if (unlikely(po
->origdev
))
816 sll
->sll_ifindex
= orig_dev
->ifindex
;
818 sll
->sll_ifindex
= dev
->ifindex
;
820 __packet_set_status(po
, h
.raw
, status
);
822 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
826 end
= (u8
*)PAGE_ALIGN((unsigned long)h
.raw
+ macoff
+ snaplen
);
827 for (start
= h
.raw
; start
< end
; start
+= PAGE_SIZE
)
828 flush_dcache_page(pgv_to_page(start
));
832 sk
->sk_data_ready(sk
, 0);
835 if (skb_head
!= skb
->data
&& skb_shared(skb
)) {
836 skb
->data
= skb_head
;
844 po
->stats
.tp_drops
++;
845 spin_unlock(&sk
->sk_receive_queue
.lock
);
847 sk
->sk_data_ready(sk
, 0);
852 static void tpacket_destruct_skb(struct sk_buff
*skb
)
854 struct packet_sock
*po
= pkt_sk(skb
->sk
);
859 if (likely(po
->tx_ring
.pg_vec
)) {
860 ph
= skb_shinfo(skb
)->destructor_arg
;
861 BUG_ON(__packet_get_status(po
, ph
) != TP_STATUS_SENDING
);
862 BUG_ON(atomic_read(&po
->tx_ring
.pending
) == 0);
863 atomic_dec(&po
->tx_ring
.pending
);
864 __packet_set_status(po
, ph
, TP_STATUS_AVAILABLE
);
870 static int tpacket_fill_skb(struct packet_sock
*po
, struct sk_buff
*skb
,
871 void *frame
, struct net_device
*dev
, int size_max
,
872 __be16 proto
, unsigned char *addr
)
875 struct tpacket_hdr
*h1
;
876 struct tpacket2_hdr
*h2
;
879 int to_write
, offset
, len
, tp_len
, nr_frags
, len_max
;
880 struct socket
*sock
= po
->sk
.sk_socket
;
887 skb
->protocol
= proto
;
889 skb
->priority
= po
->sk
.sk_priority
;
890 skb
->mark
= po
->sk
.sk_mark
;
891 skb_shinfo(skb
)->destructor_arg
= ph
.raw
;
893 switch (po
->tp_version
) {
895 tp_len
= ph
.h2
->tp_len
;
898 tp_len
= ph
.h1
->tp_len
;
901 if (unlikely(tp_len
> size_max
)) {
902 pr_err("packet size is too long (%d > %d)\n", tp_len
, size_max
);
906 skb_reserve(skb
, LL_RESERVED_SPACE(dev
));
907 skb_reset_network_header(skb
);
909 data
= ph
.raw
+ po
->tp_hdrlen
- sizeof(struct sockaddr_ll
);
912 if (sock
->type
== SOCK_DGRAM
) {
913 err
= dev_hard_header(skb
, dev
, ntohs(proto
), addr
,
915 if (unlikely(err
< 0))
917 } else if (dev
->hard_header_len
) {
918 /* net device doesn't like empty head */
919 if (unlikely(tp_len
<= dev
->hard_header_len
)) {
920 pr_err("packet size is too short (%d < %d)\n",
921 tp_len
, dev
->hard_header_len
);
925 skb_push(skb
, dev
->hard_header_len
);
926 err
= skb_store_bits(skb
, 0, data
,
927 dev
->hard_header_len
);
931 data
+= dev
->hard_header_len
;
932 to_write
-= dev
->hard_header_len
;
936 offset
= offset_in_page(data
);
937 len_max
= PAGE_SIZE
- offset
;
938 len
= ((to_write
> len_max
) ? len_max
: to_write
);
940 skb
->data_len
= to_write
;
941 skb
->len
+= to_write
;
942 skb
->truesize
+= to_write
;
943 atomic_add(to_write
, &po
->sk
.sk_wmem_alloc
);
945 while (likely(to_write
)) {
946 nr_frags
= skb_shinfo(skb
)->nr_frags
;
948 if (unlikely(nr_frags
>= MAX_SKB_FRAGS
)) {
949 pr_err("Packet exceed the number of skb frags(%lu)\n",
954 page
= pgv_to_page(data
);
956 flush_dcache_page(page
);
958 skb_fill_page_desc(skb
, nr_frags
, page
, offset
, len
);
962 len
= ((to_write
> len_max
) ? len_max
: to_write
);
968 static int tpacket_snd(struct packet_sock
*po
, struct msghdr
*msg
)
971 struct net_device
*dev
;
973 int ifindex
, err
, reserve
= 0;
975 struct sockaddr_ll
*saddr
= (struct sockaddr_ll
*)msg
->msg_name
;
976 int tp_len
, size_max
;
981 mutex_lock(&po
->pg_vec_lock
);
985 ifindex
= po
->ifindex
;
990 if (msg
->msg_namelen
< sizeof(struct sockaddr_ll
))
992 if (msg
->msg_namelen
< (saddr
->sll_halen
993 + offsetof(struct sockaddr_ll
,
996 ifindex
= saddr
->sll_ifindex
;
997 proto
= saddr
->sll_protocol
;
998 addr
= saddr
->sll_addr
;
1001 dev
= dev_get_by_index(sock_net(&po
->sk
), ifindex
);
1003 if (unlikely(dev
== NULL
))
1006 reserve
= dev
->hard_header_len
;
1009 if (unlikely(!(dev
->flags
& IFF_UP
)))
1012 size_max
= po
->tx_ring
.frame_size
1013 - (po
->tp_hdrlen
- sizeof(struct sockaddr_ll
));
1015 if (size_max
> dev
->mtu
+ reserve
)
1016 size_max
= dev
->mtu
+ reserve
;
1019 ph
= packet_current_frame(po
, &po
->tx_ring
,
1020 TP_STATUS_SEND_REQUEST
);
1022 if (unlikely(ph
== NULL
)) {
1027 status
= TP_STATUS_SEND_REQUEST
;
1028 skb
= sock_alloc_send_skb(&po
->sk
,
1029 LL_ALLOCATED_SPACE(dev
)
1030 + sizeof(struct sockaddr_ll
),
1033 if (unlikely(skb
== NULL
))
1036 tp_len
= tpacket_fill_skb(po
, skb
, ph
, dev
, size_max
, proto
,
1039 if (unlikely(tp_len
< 0)) {
1041 __packet_set_status(po
, ph
,
1042 TP_STATUS_AVAILABLE
);
1043 packet_increment_head(&po
->tx_ring
);
1047 status
= TP_STATUS_WRONG_FORMAT
;
1053 skb
->destructor
= tpacket_destruct_skb
;
1054 __packet_set_status(po
, ph
, TP_STATUS_SENDING
);
1055 atomic_inc(&po
->tx_ring
.pending
);
1057 status
= TP_STATUS_SEND_REQUEST
;
1058 err
= dev_queue_xmit(skb
);
1059 if (unlikely(err
> 0)) {
1060 err
= net_xmit_errno(err
);
1061 if (err
&& __packet_get_status(po
, ph
) ==
1062 TP_STATUS_AVAILABLE
) {
1063 /* skb was destructed already */
1068 * skb was dropped but not destructed yet;
1069 * let's treat it like congestion or err < 0
1073 packet_increment_head(&po
->tx_ring
);
1075 } while (likely((ph
!= NULL
) ||
1076 ((!(msg
->msg_flags
& MSG_DONTWAIT
)) &&
1077 (atomic_read(&po
->tx_ring
.pending
))))
1084 __packet_set_status(po
, ph
, status
);
1089 mutex_unlock(&po
->pg_vec_lock
);
1093 static inline struct sk_buff
*packet_alloc_skb(struct sock
*sk
, size_t prepad
,
1094 size_t reserve
, size_t len
,
1095 size_t linear
, int noblock
,
1098 struct sk_buff
*skb
;
1100 /* Under a page? Don't bother with paged skb. */
1101 if (prepad
+ len
< PAGE_SIZE
|| !linear
)
1104 skb
= sock_alloc_send_pskb(sk
, prepad
+ linear
, len
- linear
, noblock
,
1109 skb_reserve(skb
, reserve
);
1110 skb_put(skb
, linear
);
1111 skb
->data_len
= len
- linear
;
1112 skb
->len
+= len
- linear
;
1117 static int packet_snd(struct socket
*sock
,
1118 struct msghdr
*msg
, size_t len
)
1120 struct sock
*sk
= sock
->sk
;
1121 struct sockaddr_ll
*saddr
= (struct sockaddr_ll
*)msg
->msg_name
;
1122 struct sk_buff
*skb
;
1123 struct net_device
*dev
;
1125 unsigned char *addr
;
1126 int ifindex
, err
, reserve
= 0;
1127 struct virtio_net_hdr vnet_hdr
= { 0 };
1130 struct packet_sock
*po
= pkt_sk(sk
);
1131 unsigned short gso_type
= 0;
1134 * Get and verify the address.
1137 if (saddr
== NULL
) {
1138 ifindex
= po
->ifindex
;
1143 if (msg
->msg_namelen
< sizeof(struct sockaddr_ll
))
1145 if (msg
->msg_namelen
< (saddr
->sll_halen
+ offsetof(struct sockaddr_ll
, sll_addr
)))
1147 ifindex
= saddr
->sll_ifindex
;
1148 proto
= saddr
->sll_protocol
;
1149 addr
= saddr
->sll_addr
;
1153 dev
= dev_get_by_index(sock_net(sk
), ifindex
);
1157 if (sock
->type
== SOCK_RAW
)
1158 reserve
= dev
->hard_header_len
;
1161 if (!(dev
->flags
& IFF_UP
))
1164 if (po
->has_vnet_hdr
) {
1165 vnet_hdr_len
= sizeof(vnet_hdr
);
1168 if (len
< vnet_hdr_len
)
1171 len
-= vnet_hdr_len
;
1173 err
= memcpy_fromiovec((void *)&vnet_hdr
, msg
->msg_iov
,
1178 if ((vnet_hdr
.flags
& VIRTIO_NET_HDR_F_NEEDS_CSUM
) &&
1179 (vnet_hdr
.csum_start
+ vnet_hdr
.csum_offset
+ 2 >
1181 vnet_hdr
.hdr_len
= vnet_hdr
.csum_start
+
1182 vnet_hdr
.csum_offset
+ 2;
1185 if (vnet_hdr
.hdr_len
> len
)
1188 if (vnet_hdr
.gso_type
!= VIRTIO_NET_HDR_GSO_NONE
) {
1189 switch (vnet_hdr
.gso_type
& ~VIRTIO_NET_HDR_GSO_ECN
) {
1190 case VIRTIO_NET_HDR_GSO_TCPV4
:
1191 gso_type
= SKB_GSO_TCPV4
;
1193 case VIRTIO_NET_HDR_GSO_TCPV6
:
1194 gso_type
= SKB_GSO_TCPV6
;
1196 case VIRTIO_NET_HDR_GSO_UDP
:
1197 gso_type
= SKB_GSO_UDP
;
1203 if (vnet_hdr
.gso_type
& VIRTIO_NET_HDR_GSO_ECN
)
1204 gso_type
|= SKB_GSO_TCP_ECN
;
1206 if (vnet_hdr
.gso_size
== 0)
1213 if (!gso_type
&& (len
> dev
->mtu
+ reserve
+ VLAN_HLEN
))
1217 skb
= packet_alloc_skb(sk
, LL_ALLOCATED_SPACE(dev
),
1218 LL_RESERVED_SPACE(dev
), len
, vnet_hdr
.hdr_len
,
1219 msg
->msg_flags
& MSG_DONTWAIT
, &err
);
1223 skb_set_network_header(skb
, reserve
);
1226 if (sock
->type
== SOCK_DGRAM
&&
1227 (offset
= dev_hard_header(skb
, dev
, ntohs(proto
), addr
, NULL
, len
)) < 0)
1230 /* Returns -EFAULT on error */
1231 err
= skb_copy_datagram_from_iovec(skb
, offset
, msg
->msg_iov
, 0, len
);
1234 err
= sock_tx_timestamp(sk
, &skb_shinfo(skb
)->tx_flags
);
1238 if (!gso_type
&& (len
> dev
->mtu
+ reserve
)) {
1239 /* Earlier code assumed this would be a VLAN pkt,
1240 * double-check this now that we have the actual
1243 struct ethhdr
*ehdr
;
1244 skb_reset_mac_header(skb
);
1245 ehdr
= eth_hdr(skb
);
1246 if (ehdr
->h_proto
!= htons(ETH_P_8021Q
)) {
1252 skb
->protocol
= proto
;
1254 skb
->priority
= sk
->sk_priority
;
1255 skb
->mark
= sk
->sk_mark
;
1257 if (po
->has_vnet_hdr
) {
1258 if (vnet_hdr
.flags
& VIRTIO_NET_HDR_F_NEEDS_CSUM
) {
1259 if (!skb_partial_csum_set(skb
, vnet_hdr
.csum_start
,
1260 vnet_hdr
.csum_offset
)) {
1266 skb_shinfo(skb
)->gso_size
= vnet_hdr
.gso_size
;
1267 skb_shinfo(skb
)->gso_type
= gso_type
;
1269 /* Header must be checked, and gso_segs computed. */
1270 skb_shinfo(skb
)->gso_type
|= SKB_GSO_DODGY
;
1271 skb_shinfo(skb
)->gso_segs
= 0;
1273 len
+= vnet_hdr_len
;
1280 err
= dev_queue_xmit(skb
);
1281 if (err
> 0 && (err
= net_xmit_errno(err
)) != 0)
1297 static int packet_sendmsg(struct kiocb
*iocb
, struct socket
*sock
,
1298 struct msghdr
*msg
, size_t len
)
1300 struct sock
*sk
= sock
->sk
;
1301 struct packet_sock
*po
= pkt_sk(sk
);
1302 if (po
->tx_ring
.pg_vec
)
1303 return tpacket_snd(po
, msg
);
1305 return packet_snd(sock
, msg
, len
);
1309 * Close a PACKET socket. This is fairly simple. We immediately go
1310 * to 'closed' state and remove our protocol entry in the device list.
1313 static int packet_release(struct socket
*sock
)
1315 struct sock
*sk
= sock
->sk
;
1316 struct packet_sock
*po
;
1318 struct tpacket_req req
;
1326 spin_lock_bh(&net
->packet
.sklist_lock
);
1327 sk_del_node_init_rcu(sk
);
1328 sock_prot_inuse_add(net
, sk
->sk_prot
, -1);
1329 spin_unlock_bh(&net
->packet
.sklist_lock
);
1331 spin_lock(&po
->bind_lock
);
1334 * Remove from protocol table
1338 __dev_remove_pack(&po
->prot_hook
);
1341 spin_unlock(&po
->bind_lock
);
1343 packet_flush_mclist(sk
);
1345 memset(&req
, 0, sizeof(req
));
1347 if (po
->rx_ring
.pg_vec
)
1348 packet_set_ring(sk
, &req
, 1, 0);
1350 if (po
->tx_ring
.pg_vec
)
1351 packet_set_ring(sk
, &req
, 1, 1);
1355 * Now the socket is dead. No more input will appear.
1362 skb_queue_purge(&sk
->sk_receive_queue
);
1363 sk_refcnt_debug_release(sk
);
1370 * Attach a packet hook.
1373 static int packet_do_bind(struct sock
*sk
, struct net_device
*dev
, __be16 protocol
)
1375 struct packet_sock
*po
= pkt_sk(sk
);
1377 * Detach an existing hook if present.
1382 spin_lock(&po
->bind_lock
);
1387 spin_unlock(&po
->bind_lock
);
1388 dev_remove_pack(&po
->prot_hook
);
1389 spin_lock(&po
->bind_lock
);
1393 po
->prot_hook
.type
= protocol
;
1394 po
->prot_hook
.dev
= dev
;
1396 po
->ifindex
= dev
? dev
->ifindex
: 0;
1401 if (!dev
|| (dev
->flags
& IFF_UP
)) {
1402 dev_add_pack(&po
->prot_hook
);
1406 sk
->sk_err
= ENETDOWN
;
1407 if (!sock_flag(sk
, SOCK_DEAD
))
1408 sk
->sk_error_report(sk
);
1412 spin_unlock(&po
->bind_lock
);
1418 * Bind a packet socket to a device
1421 static int packet_bind_spkt(struct socket
*sock
, struct sockaddr
*uaddr
,
1424 struct sock
*sk
= sock
->sk
;
1426 struct net_device
*dev
;
1433 if (addr_len
!= sizeof(struct sockaddr
))
1435 strlcpy(name
, uaddr
->sa_data
, sizeof(name
));
1437 dev
= dev_get_by_name(sock_net(sk
), name
);
1439 err
= packet_do_bind(sk
, dev
, pkt_sk(sk
)->num
);
1445 static int packet_bind(struct socket
*sock
, struct sockaddr
*uaddr
, int addr_len
)
1447 struct sockaddr_ll
*sll
= (struct sockaddr_ll
*)uaddr
;
1448 struct sock
*sk
= sock
->sk
;
1449 struct net_device
*dev
= NULL
;
1457 if (addr_len
< sizeof(struct sockaddr_ll
))
1459 if (sll
->sll_family
!= AF_PACKET
)
1462 if (sll
->sll_ifindex
) {
1464 dev
= dev_get_by_index(sock_net(sk
), sll
->sll_ifindex
);
1468 err
= packet_do_bind(sk
, dev
, sll
->sll_protocol
? : pkt_sk(sk
)->num
);
1476 static struct proto packet_proto
= {
1478 .owner
= THIS_MODULE
,
1479 .obj_size
= sizeof(struct packet_sock
),
1483 * Create a packet of type SOCK_PACKET.
1486 static int packet_create(struct net
*net
, struct socket
*sock
, int protocol
,
1490 struct packet_sock
*po
;
1491 __be16 proto
= (__force __be16
)protocol
; /* weird, but documented */
1494 if (!capable(CAP_NET_RAW
))
1496 if (sock
->type
!= SOCK_DGRAM
&& sock
->type
!= SOCK_RAW
&&
1497 sock
->type
!= SOCK_PACKET
)
1498 return -ESOCKTNOSUPPORT
;
1500 sock
->state
= SS_UNCONNECTED
;
1503 sk
= sk_alloc(net
, PF_PACKET
, GFP_KERNEL
, &packet_proto
);
1507 sock
->ops
= &packet_ops
;
1508 if (sock
->type
== SOCK_PACKET
)
1509 sock
->ops
= &packet_ops_spkt
;
1511 sock_init_data(sock
, sk
);
1514 sk
->sk_family
= PF_PACKET
;
1517 sk
->sk_destruct
= packet_sock_destruct
;
1518 sk_refcnt_debug_inc(sk
);
1521 * Attach a protocol block
1524 spin_lock_init(&po
->bind_lock
);
1525 mutex_init(&po
->pg_vec_lock
);
1526 po
->prot_hook
.func
= packet_rcv
;
1528 if (sock
->type
== SOCK_PACKET
)
1529 po
->prot_hook
.func
= packet_rcv_spkt
;
1531 po
->prot_hook
.af_packet_priv
= sk
;
1534 po
->prot_hook
.type
= proto
;
1535 dev_add_pack(&po
->prot_hook
);
1540 spin_lock_bh(&net
->packet
.sklist_lock
);
1541 sk_add_node_rcu(sk
, &net
->packet
.sklist
);
1542 sock_prot_inuse_add(net
, &packet_proto
, 1);
1543 spin_unlock_bh(&net
->packet
.sklist_lock
);
1550 static int packet_recv_error(struct sock
*sk
, struct msghdr
*msg
, int len
)
1552 struct sock_exterr_skb
*serr
;
1553 struct sk_buff
*skb
, *skb2
;
1557 skb
= skb_dequeue(&sk
->sk_error_queue
);
1563 msg
->msg_flags
|= MSG_TRUNC
;
1566 err
= skb_copy_datagram_iovec(skb
, 0, msg
->msg_iov
, copied
);
1570 sock_recv_timestamp(msg
, sk
, skb
);
1572 serr
= SKB_EXT_ERR(skb
);
1573 put_cmsg(msg
, SOL_PACKET
, PACKET_TX_TIMESTAMP
,
1574 sizeof(serr
->ee
), &serr
->ee
);
1576 msg
->msg_flags
|= MSG_ERRQUEUE
;
1579 /* Reset and regenerate socket error */
1580 spin_lock_bh(&sk
->sk_error_queue
.lock
);
1582 if ((skb2
= skb_peek(&sk
->sk_error_queue
)) != NULL
) {
1583 sk
->sk_err
= SKB_EXT_ERR(skb2
)->ee
.ee_errno
;
1584 spin_unlock_bh(&sk
->sk_error_queue
.lock
);
1585 sk
->sk_error_report(sk
);
1587 spin_unlock_bh(&sk
->sk_error_queue
.lock
);
1596 * Pull a packet from our receive queue and hand it to the user.
1597 * If necessary we block.
1600 static int packet_recvmsg(struct kiocb
*iocb
, struct socket
*sock
,
1601 struct msghdr
*msg
, size_t len
, int flags
)
1603 struct sock
*sk
= sock
->sk
;
1604 struct sk_buff
*skb
;
1606 struct sockaddr_ll
*sll
;
1607 int vnet_hdr_len
= 0;
1610 if (flags
& ~(MSG_PEEK
|MSG_DONTWAIT
|MSG_TRUNC
|MSG_CMSG_COMPAT
|MSG_ERRQUEUE
))
1614 /* What error should we return now? EUNATTACH? */
1615 if (pkt_sk(sk
)->ifindex
< 0)
1619 if (flags
& MSG_ERRQUEUE
) {
1620 err
= packet_recv_error(sk
, msg
, len
);
1625 * Call the generic datagram receiver. This handles all sorts
1626 * of horrible races and re-entrancy so we can forget about it
1627 * in the protocol layers.
1629 * Now it will return ENETDOWN, if device have just gone down,
1630 * but then it will block.
1633 skb
= skb_recv_datagram(sk
, flags
, flags
& MSG_DONTWAIT
, &err
);
1636 * An error occurred so return it. Because skb_recv_datagram()
1637 * handles the blocking we don't see and worry about blocking
1644 if (pkt_sk(sk
)->has_vnet_hdr
) {
1645 struct virtio_net_hdr vnet_hdr
= { 0 };
1648 vnet_hdr_len
= sizeof(vnet_hdr
);
1649 if (len
< vnet_hdr_len
)
1652 len
-= vnet_hdr_len
;
1654 if (skb_is_gso(skb
)) {
1655 struct skb_shared_info
*sinfo
= skb_shinfo(skb
);
1657 /* This is a hint as to how much should be linear. */
1658 vnet_hdr
.hdr_len
= skb_headlen(skb
);
1659 vnet_hdr
.gso_size
= sinfo
->gso_size
;
1660 if (sinfo
->gso_type
& SKB_GSO_TCPV4
)
1661 vnet_hdr
.gso_type
= VIRTIO_NET_HDR_GSO_TCPV4
;
1662 else if (sinfo
->gso_type
& SKB_GSO_TCPV6
)
1663 vnet_hdr
.gso_type
= VIRTIO_NET_HDR_GSO_TCPV6
;
1664 else if (sinfo
->gso_type
& SKB_GSO_UDP
)
1665 vnet_hdr
.gso_type
= VIRTIO_NET_HDR_GSO_UDP
;
1666 else if (sinfo
->gso_type
& SKB_GSO_FCOE
)
1670 if (sinfo
->gso_type
& SKB_GSO_TCP_ECN
)
1671 vnet_hdr
.gso_type
|= VIRTIO_NET_HDR_GSO_ECN
;
1673 vnet_hdr
.gso_type
= VIRTIO_NET_HDR_GSO_NONE
;
1675 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) {
1676 vnet_hdr
.flags
= VIRTIO_NET_HDR_F_NEEDS_CSUM
;
1677 vnet_hdr
.csum_start
= skb_checksum_start_offset(skb
);
1678 vnet_hdr
.csum_offset
= skb
->csum_offset
;
1679 } /* else everything is zero */
1681 err
= memcpy_toiovec(msg
->msg_iov
, (void *)&vnet_hdr
,
1688 * If the address length field is there to be filled in, we fill
1692 sll
= &PACKET_SKB_CB(skb
)->sa
.ll
;
1693 if (sock
->type
== SOCK_PACKET
)
1694 msg
->msg_namelen
= sizeof(struct sockaddr_pkt
);
1696 msg
->msg_namelen
= sll
->sll_halen
+ offsetof(struct sockaddr_ll
, sll_addr
);
1699 * You lose any data beyond the buffer you gave. If it worries a
1700 * user program they can ask the device for its MTU anyway.
1706 msg
->msg_flags
|= MSG_TRUNC
;
1709 err
= skb_copy_datagram_iovec(skb
, 0, msg
->msg_iov
, copied
);
1713 sock_recv_ts_and_drops(msg
, sk
, skb
);
1716 memcpy(msg
->msg_name
, &PACKET_SKB_CB(skb
)->sa
,
1719 if (pkt_sk(sk
)->auxdata
) {
1720 struct tpacket_auxdata aux
;
1722 aux
.tp_status
= TP_STATUS_USER
;
1723 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
1724 aux
.tp_status
|= TP_STATUS_CSUMNOTREADY
;
1725 aux
.tp_len
= PACKET_SKB_CB(skb
)->origlen
;
1726 aux
.tp_snaplen
= skb
->len
;
1728 aux
.tp_net
= skb_network_offset(skb
);
1729 aux
.tp_vlan_tci
= vlan_tx_tag_get(skb
);
1732 put_cmsg(msg
, SOL_PACKET
, PACKET_AUXDATA
, sizeof(aux
), &aux
);
1736 * Free or return the buffer as appropriate. Again this
1737 * hides all the races and re-entrancy issues from us.
1739 err
= vnet_hdr_len
+ ((flags
&MSG_TRUNC
) ? skb
->len
: copied
);
1742 skb_free_datagram(sk
, skb
);
1747 static int packet_getname_spkt(struct socket
*sock
, struct sockaddr
*uaddr
,
1748 int *uaddr_len
, int peer
)
1750 struct net_device
*dev
;
1751 struct sock
*sk
= sock
->sk
;
1756 uaddr
->sa_family
= AF_PACKET
;
1758 dev
= dev_get_by_index_rcu(sock_net(sk
), pkt_sk(sk
)->ifindex
);
1760 strncpy(uaddr
->sa_data
, dev
->name
, 14);
1762 memset(uaddr
->sa_data
, 0, 14);
1764 *uaddr_len
= sizeof(*uaddr
);
1769 static int packet_getname(struct socket
*sock
, struct sockaddr
*uaddr
,
1770 int *uaddr_len
, int peer
)
1772 struct net_device
*dev
;
1773 struct sock
*sk
= sock
->sk
;
1774 struct packet_sock
*po
= pkt_sk(sk
);
1775 DECLARE_SOCKADDR(struct sockaddr_ll
*, sll
, uaddr
);
1780 sll
->sll_family
= AF_PACKET
;
1781 sll
->sll_ifindex
= po
->ifindex
;
1782 sll
->sll_protocol
= po
->num
;
1783 sll
->sll_pkttype
= 0;
1785 dev
= dev_get_by_index_rcu(sock_net(sk
), po
->ifindex
);
1787 sll
->sll_hatype
= dev
->type
;
1788 sll
->sll_halen
= dev
->addr_len
;
1789 memcpy(sll
->sll_addr
, dev
->dev_addr
, dev
->addr_len
);
1791 sll
->sll_hatype
= 0; /* Bad: we have no ARPHRD_UNSPEC */
1795 *uaddr_len
= offsetof(struct sockaddr_ll
, sll_addr
) + sll
->sll_halen
;
1800 static int packet_dev_mc(struct net_device
*dev
, struct packet_mclist
*i
,
1804 case PACKET_MR_MULTICAST
:
1805 if (i
->alen
!= dev
->addr_len
)
1808 return dev_mc_add(dev
, i
->addr
);
1810 return dev_mc_del(dev
, i
->addr
);
1812 case PACKET_MR_PROMISC
:
1813 return dev_set_promiscuity(dev
, what
);
1815 case PACKET_MR_ALLMULTI
:
1816 return dev_set_allmulti(dev
, what
);
1818 case PACKET_MR_UNICAST
:
1819 if (i
->alen
!= dev
->addr_len
)
1822 return dev_uc_add(dev
, i
->addr
);
1824 return dev_uc_del(dev
, i
->addr
);
1832 static void packet_dev_mclist(struct net_device
*dev
, struct packet_mclist
*i
, int what
)
1834 for ( ; i
; i
= i
->next
) {
1835 if (i
->ifindex
== dev
->ifindex
)
1836 packet_dev_mc(dev
, i
, what
);
1840 static int packet_mc_add(struct sock
*sk
, struct packet_mreq_max
*mreq
)
1842 struct packet_sock
*po
= pkt_sk(sk
);
1843 struct packet_mclist
*ml
, *i
;
1844 struct net_device
*dev
;
1850 dev
= __dev_get_by_index(sock_net(sk
), mreq
->mr_ifindex
);
1855 if (mreq
->mr_alen
> dev
->addr_len
)
1859 i
= kmalloc(sizeof(*i
), GFP_KERNEL
);
1864 for (ml
= po
->mclist
; ml
; ml
= ml
->next
) {
1865 if (ml
->ifindex
== mreq
->mr_ifindex
&&
1866 ml
->type
== mreq
->mr_type
&&
1867 ml
->alen
== mreq
->mr_alen
&&
1868 memcmp(ml
->addr
, mreq
->mr_address
, ml
->alen
) == 0) {
1870 /* Free the new element ... */
1876 i
->type
= mreq
->mr_type
;
1877 i
->ifindex
= mreq
->mr_ifindex
;
1878 i
->alen
= mreq
->mr_alen
;
1879 memcpy(i
->addr
, mreq
->mr_address
, i
->alen
);
1881 i
->next
= po
->mclist
;
1883 err
= packet_dev_mc(dev
, i
, 1);
1885 po
->mclist
= i
->next
;
1894 static int packet_mc_drop(struct sock
*sk
, struct packet_mreq_max
*mreq
)
1896 struct packet_mclist
*ml
, **mlp
;
1900 for (mlp
= &pkt_sk(sk
)->mclist
; (ml
= *mlp
) != NULL
; mlp
= &ml
->next
) {
1901 if (ml
->ifindex
== mreq
->mr_ifindex
&&
1902 ml
->type
== mreq
->mr_type
&&
1903 ml
->alen
== mreq
->mr_alen
&&
1904 memcmp(ml
->addr
, mreq
->mr_address
, ml
->alen
) == 0) {
1905 if (--ml
->count
== 0) {
1906 struct net_device
*dev
;
1908 dev
= __dev_get_by_index(sock_net(sk
), ml
->ifindex
);
1910 packet_dev_mc(dev
, ml
, -1);
1918 return -EADDRNOTAVAIL
;
1921 static void packet_flush_mclist(struct sock
*sk
)
1923 struct packet_sock
*po
= pkt_sk(sk
);
1924 struct packet_mclist
*ml
;
1930 while ((ml
= po
->mclist
) != NULL
) {
1931 struct net_device
*dev
;
1933 po
->mclist
= ml
->next
;
1934 dev
= __dev_get_by_index(sock_net(sk
), ml
->ifindex
);
1936 packet_dev_mc(dev
, ml
, -1);
1943 packet_setsockopt(struct socket
*sock
, int level
, int optname
, char __user
*optval
, unsigned int optlen
)
1945 struct sock
*sk
= sock
->sk
;
1946 struct packet_sock
*po
= pkt_sk(sk
);
1949 if (level
!= SOL_PACKET
)
1950 return -ENOPROTOOPT
;
1953 case PACKET_ADD_MEMBERSHIP
:
1954 case PACKET_DROP_MEMBERSHIP
:
1956 struct packet_mreq_max mreq
;
1958 memset(&mreq
, 0, sizeof(mreq
));
1959 if (len
< sizeof(struct packet_mreq
))
1961 if (len
> sizeof(mreq
))
1963 if (copy_from_user(&mreq
, optval
, len
))
1965 if (len
< (mreq
.mr_alen
+ offsetof(struct packet_mreq
, mr_address
)))
1967 if (optname
== PACKET_ADD_MEMBERSHIP
)
1968 ret
= packet_mc_add(sk
, &mreq
);
1970 ret
= packet_mc_drop(sk
, &mreq
);
1974 case PACKET_RX_RING
:
1975 case PACKET_TX_RING
:
1977 struct tpacket_req req
;
1979 if (optlen
< sizeof(req
))
1981 if (pkt_sk(sk
)->has_vnet_hdr
)
1983 if (copy_from_user(&req
, optval
, sizeof(req
)))
1985 return packet_set_ring(sk
, &req
, 0, optname
== PACKET_TX_RING
);
1987 case PACKET_COPY_THRESH
:
1991 if (optlen
!= sizeof(val
))
1993 if (copy_from_user(&val
, optval
, sizeof(val
)))
1996 pkt_sk(sk
)->copy_thresh
= val
;
1999 case PACKET_VERSION
:
2003 if (optlen
!= sizeof(val
))
2005 if (po
->rx_ring
.pg_vec
|| po
->tx_ring
.pg_vec
)
2007 if (copy_from_user(&val
, optval
, sizeof(val
)))
2012 po
->tp_version
= val
;
2018 case PACKET_RESERVE
:
2022 if (optlen
!= sizeof(val
))
2024 if (po
->rx_ring
.pg_vec
|| po
->tx_ring
.pg_vec
)
2026 if (copy_from_user(&val
, optval
, sizeof(val
)))
2028 po
->tp_reserve
= val
;
2035 if (optlen
!= sizeof(val
))
2037 if (po
->rx_ring
.pg_vec
|| po
->tx_ring
.pg_vec
)
2039 if (copy_from_user(&val
, optval
, sizeof(val
)))
2041 po
->tp_loss
= !!val
;
2044 case PACKET_AUXDATA
:
2048 if (optlen
< sizeof(val
))
2050 if (copy_from_user(&val
, optval
, sizeof(val
)))
2053 po
->auxdata
= !!val
;
2056 case PACKET_ORIGDEV
:
2060 if (optlen
< sizeof(val
))
2062 if (copy_from_user(&val
, optval
, sizeof(val
)))
2065 po
->origdev
= !!val
;
2068 case PACKET_VNET_HDR
:
2072 if (sock
->type
!= SOCK_RAW
)
2074 if (po
->rx_ring
.pg_vec
|| po
->tx_ring
.pg_vec
)
2076 if (optlen
< sizeof(val
))
2078 if (copy_from_user(&val
, optval
, sizeof(val
)))
2081 po
->has_vnet_hdr
= !!val
;
2084 case PACKET_TIMESTAMP
:
2088 if (optlen
!= sizeof(val
))
2090 if (copy_from_user(&val
, optval
, sizeof(val
)))
2093 po
->tp_tstamp
= val
;
2097 return -ENOPROTOOPT
;
2101 static int packet_getsockopt(struct socket
*sock
, int level
, int optname
,
2102 char __user
*optval
, int __user
*optlen
)
2106 struct sock
*sk
= sock
->sk
;
2107 struct packet_sock
*po
= pkt_sk(sk
);
2109 struct tpacket_stats st
;
2111 if (level
!= SOL_PACKET
)
2112 return -ENOPROTOOPT
;
2114 if (get_user(len
, optlen
))
2121 case PACKET_STATISTICS
:
2122 if (len
> sizeof(struct tpacket_stats
))
2123 len
= sizeof(struct tpacket_stats
);
2124 spin_lock_bh(&sk
->sk_receive_queue
.lock
);
2126 memset(&po
->stats
, 0, sizeof(st
));
2127 spin_unlock_bh(&sk
->sk_receive_queue
.lock
);
2128 st
.tp_packets
+= st
.tp_drops
;
2132 case PACKET_AUXDATA
:
2133 if (len
> sizeof(int))
2139 case PACKET_ORIGDEV
:
2140 if (len
> sizeof(int))
2146 case PACKET_VNET_HDR
:
2147 if (len
> sizeof(int))
2149 val
= po
->has_vnet_hdr
;
2153 case PACKET_VERSION
:
2154 if (len
> sizeof(int))
2156 val
= po
->tp_version
;
2160 if (len
> sizeof(int))
2162 if (copy_from_user(&val
, optval
, len
))
2166 val
= sizeof(struct tpacket_hdr
);
2169 val
= sizeof(struct tpacket2_hdr
);
2176 case PACKET_RESERVE
:
2177 if (len
> sizeof(unsigned int))
2178 len
= sizeof(unsigned int);
2179 val
= po
->tp_reserve
;
2183 if (len
> sizeof(unsigned int))
2184 len
= sizeof(unsigned int);
2188 case PACKET_TIMESTAMP
:
2189 if (len
> sizeof(int))
2191 val
= po
->tp_tstamp
;
2195 return -ENOPROTOOPT
;
2198 if (put_user(len
, optlen
))
2200 if (copy_to_user(optval
, data
, len
))
2206 static int packet_notifier(struct notifier_block
*this, unsigned long msg
, void *data
)
2209 struct hlist_node
*node
;
2210 struct net_device
*dev
= data
;
2211 struct net
*net
= dev_net(dev
);
2214 sk_for_each_rcu(sk
, node
, &net
->packet
.sklist
) {
2215 struct packet_sock
*po
= pkt_sk(sk
);
2218 case NETDEV_UNREGISTER
:
2220 packet_dev_mclist(dev
, po
->mclist
, -1);
2224 if (dev
->ifindex
== po
->ifindex
) {
2225 spin_lock(&po
->bind_lock
);
2227 __dev_remove_pack(&po
->prot_hook
);
2230 sk
->sk_err
= ENETDOWN
;
2231 if (!sock_flag(sk
, SOCK_DEAD
))
2232 sk
->sk_error_report(sk
);
2234 if (msg
== NETDEV_UNREGISTER
) {
2236 po
->prot_hook
.dev
= NULL
;
2238 spin_unlock(&po
->bind_lock
);
2242 if (dev
->ifindex
== po
->ifindex
) {
2243 spin_lock(&po
->bind_lock
);
2244 if (po
->num
&& !po
->running
) {
2245 dev_add_pack(&po
->prot_hook
);
2249 spin_unlock(&po
->bind_lock
);
2259 static int packet_ioctl(struct socket
*sock
, unsigned int cmd
,
2262 struct sock
*sk
= sock
->sk
;
2267 int amount
= sk_wmem_alloc_get(sk
);
2269 return put_user(amount
, (int __user
*)arg
);
2273 struct sk_buff
*skb
;
2276 spin_lock_bh(&sk
->sk_receive_queue
.lock
);
2277 skb
= skb_peek(&sk
->sk_receive_queue
);
2280 spin_unlock_bh(&sk
->sk_receive_queue
.lock
);
2281 return put_user(amount
, (int __user
*)arg
);
2284 return sock_get_timestamp(sk
, (struct timeval __user
*)arg
);
2286 return sock_get_timestampns(sk
, (struct timespec __user
*)arg
);
2296 case SIOCGIFBRDADDR
:
2297 case SIOCSIFBRDADDR
:
2298 case SIOCGIFNETMASK
:
2299 case SIOCSIFNETMASK
:
2300 case SIOCGIFDSTADDR
:
2301 case SIOCSIFDSTADDR
:
2303 return inet_dgram_ops
.ioctl(sock
, cmd
, arg
);
2307 return -ENOIOCTLCMD
;
2312 static unsigned int packet_poll(struct file
*file
, struct socket
*sock
,
2315 struct sock
*sk
= sock
->sk
;
2316 struct packet_sock
*po
= pkt_sk(sk
);
2317 unsigned int mask
= datagram_poll(file
, sock
, wait
);
2319 spin_lock_bh(&sk
->sk_receive_queue
.lock
);
2320 if (po
->rx_ring
.pg_vec
) {
2321 if (!packet_previous_frame(po
, &po
->rx_ring
, TP_STATUS_KERNEL
))
2322 mask
|= POLLIN
| POLLRDNORM
;
2324 spin_unlock_bh(&sk
->sk_receive_queue
.lock
);
2325 spin_lock_bh(&sk
->sk_write_queue
.lock
);
2326 if (po
->tx_ring
.pg_vec
) {
2327 if (packet_current_frame(po
, &po
->tx_ring
, TP_STATUS_AVAILABLE
))
2328 mask
|= POLLOUT
| POLLWRNORM
;
2330 spin_unlock_bh(&sk
->sk_write_queue
.lock
);
2335 /* Dirty? Well, I still did not learn better way to account
2339 static void packet_mm_open(struct vm_area_struct
*vma
)
2341 struct file
*file
= vma
->vm_file
;
2342 struct socket
*sock
= file
->private_data
;
2343 struct sock
*sk
= sock
->sk
;
2346 atomic_inc(&pkt_sk(sk
)->mapped
);
2349 static void packet_mm_close(struct vm_area_struct
*vma
)
2351 struct file
*file
= vma
->vm_file
;
2352 struct socket
*sock
= file
->private_data
;
2353 struct sock
*sk
= sock
->sk
;
2356 atomic_dec(&pkt_sk(sk
)->mapped
);
2359 static const struct vm_operations_struct packet_mmap_ops
= {
2360 .open
= packet_mm_open
,
2361 .close
= packet_mm_close
,
2364 static void free_pg_vec(struct pgv
*pg_vec
, unsigned int order
,
2369 for (i
= 0; i
< len
; i
++) {
2370 if (likely(pg_vec
[i
].buffer
)) {
2371 if (is_vmalloc_addr(pg_vec
[i
].buffer
))
2372 vfree(pg_vec
[i
].buffer
);
2374 free_pages((unsigned long)pg_vec
[i
].buffer
,
2376 pg_vec
[i
].buffer
= NULL
;
2382 static inline char *alloc_one_pg_vec_page(unsigned long order
)
2384 char *buffer
= NULL
;
2385 gfp_t gfp_flags
= GFP_KERNEL
| __GFP_COMP
|
2386 __GFP_ZERO
| __GFP_NOWARN
| __GFP_NORETRY
;
2388 buffer
= (char *) __get_free_pages(gfp_flags
, order
);
2394 * __get_free_pages failed, fall back to vmalloc
2396 buffer
= vzalloc((1 << order
) * PAGE_SIZE
);
2402 * vmalloc failed, lets dig into swap here
2404 gfp_flags
&= ~__GFP_NORETRY
;
2405 buffer
= (char *)__get_free_pages(gfp_flags
, order
);
2410 * complete and utter failure
2415 static struct pgv
*alloc_pg_vec(struct tpacket_req
*req
, int order
)
2417 unsigned int block_nr
= req
->tp_block_nr
;
2421 pg_vec
= kcalloc(block_nr
, sizeof(struct pgv
), GFP_KERNEL
);
2422 if (unlikely(!pg_vec
))
2425 for (i
= 0; i
< block_nr
; i
++) {
2426 pg_vec
[i
].buffer
= alloc_one_pg_vec_page(order
);
2427 if (unlikely(!pg_vec
[i
].buffer
))
2428 goto out_free_pgvec
;
2435 free_pg_vec(pg_vec
, order
, block_nr
);
2440 static int packet_set_ring(struct sock
*sk
, struct tpacket_req
*req
,
2441 int closing
, int tx_ring
)
2443 struct pgv
*pg_vec
= NULL
;
2444 struct packet_sock
*po
= pkt_sk(sk
);
2445 int was_running
, order
= 0;
2446 struct packet_ring_buffer
*rb
;
2447 struct sk_buff_head
*rb_queue
;
2451 rb
= tx_ring
? &po
->tx_ring
: &po
->rx_ring
;
2452 rb_queue
= tx_ring
? &sk
->sk_write_queue
: &sk
->sk_receive_queue
;
2456 if (atomic_read(&po
->mapped
))
2458 if (atomic_read(&rb
->pending
))
2462 if (req
->tp_block_nr
) {
2463 /* Sanity tests and some calculations */
2465 if (unlikely(rb
->pg_vec
))
2468 switch (po
->tp_version
) {
2470 po
->tp_hdrlen
= TPACKET_HDRLEN
;
2473 po
->tp_hdrlen
= TPACKET2_HDRLEN
;
2478 if (unlikely((int)req
->tp_block_size
<= 0))
2480 if (unlikely(req
->tp_block_size
& (PAGE_SIZE
- 1)))
2482 if (unlikely(req
->tp_frame_size
< po
->tp_hdrlen
+
2485 if (unlikely(req
->tp_frame_size
& (TPACKET_ALIGNMENT
- 1)))
2488 rb
->frames_per_block
= req
->tp_block_size
/req
->tp_frame_size
;
2489 if (unlikely(rb
->frames_per_block
<= 0))
2491 if (unlikely((rb
->frames_per_block
* req
->tp_block_nr
) !=
2496 order
= get_order(req
->tp_block_size
);
2497 pg_vec
= alloc_pg_vec(req
, order
);
2498 if (unlikely(!pg_vec
))
2504 if (unlikely(req
->tp_frame_nr
))
2510 /* Detach socket from network */
2511 spin_lock(&po
->bind_lock
);
2512 was_running
= po
->running
;
2515 __dev_remove_pack(&po
->prot_hook
);
2520 spin_unlock(&po
->bind_lock
);
2525 mutex_lock(&po
->pg_vec_lock
);
2526 if (closing
|| atomic_read(&po
->mapped
) == 0) {
2528 spin_lock_bh(&rb_queue
->lock
);
2529 swap(rb
->pg_vec
, pg_vec
);
2530 rb
->frame_max
= (req
->tp_frame_nr
- 1);
2532 rb
->frame_size
= req
->tp_frame_size
;
2533 spin_unlock_bh(&rb_queue
->lock
);
2535 swap(rb
->pg_vec_order
, order
);
2536 swap(rb
->pg_vec_len
, req
->tp_block_nr
);
2538 rb
->pg_vec_pages
= req
->tp_block_size
/PAGE_SIZE
;
2539 po
->prot_hook
.func
= (po
->rx_ring
.pg_vec
) ?
2540 tpacket_rcv
: packet_rcv
;
2541 skb_queue_purge(rb_queue
);
2542 if (atomic_read(&po
->mapped
))
2543 pr_err("packet_mmap: vma is busy: %d\n",
2544 atomic_read(&po
->mapped
));
2546 mutex_unlock(&po
->pg_vec_lock
);
2548 spin_lock(&po
->bind_lock
);
2549 if (was_running
&& !po
->running
) {
2553 dev_add_pack(&po
->prot_hook
);
2555 spin_unlock(&po
->bind_lock
);
2560 free_pg_vec(pg_vec
, order
, req
->tp_block_nr
);
2565 static int packet_mmap(struct file
*file
, struct socket
*sock
,
2566 struct vm_area_struct
*vma
)
2568 struct sock
*sk
= sock
->sk
;
2569 struct packet_sock
*po
= pkt_sk(sk
);
2570 unsigned long size
, expected_size
;
2571 struct packet_ring_buffer
*rb
;
2572 unsigned long start
;
2579 mutex_lock(&po
->pg_vec_lock
);
2582 for (rb
= &po
->rx_ring
; rb
<= &po
->tx_ring
; rb
++) {
2584 expected_size
+= rb
->pg_vec_len
2590 if (expected_size
== 0)
2593 size
= vma
->vm_end
- vma
->vm_start
;
2594 if (size
!= expected_size
)
2597 start
= vma
->vm_start
;
2598 for (rb
= &po
->rx_ring
; rb
<= &po
->tx_ring
; rb
++) {
2599 if (rb
->pg_vec
== NULL
)
2602 for (i
= 0; i
< rb
->pg_vec_len
; i
++) {
2604 void *kaddr
= rb
->pg_vec
[i
].buffer
;
2607 for (pg_num
= 0; pg_num
< rb
->pg_vec_pages
; pg_num
++) {
2608 page
= pgv_to_page(kaddr
);
2609 err
= vm_insert_page(vma
, start
, page
);
2618 atomic_inc(&po
->mapped
);
2619 vma
->vm_ops
= &packet_mmap_ops
;
2623 mutex_unlock(&po
->pg_vec_lock
);
2627 static const struct proto_ops packet_ops_spkt
= {
2628 .family
= PF_PACKET
,
2629 .owner
= THIS_MODULE
,
2630 .release
= packet_release
,
2631 .bind
= packet_bind_spkt
,
2632 .connect
= sock_no_connect
,
2633 .socketpair
= sock_no_socketpair
,
2634 .accept
= sock_no_accept
,
2635 .getname
= packet_getname_spkt
,
2636 .poll
= datagram_poll
,
2637 .ioctl
= packet_ioctl
,
2638 .listen
= sock_no_listen
,
2639 .shutdown
= sock_no_shutdown
,
2640 .setsockopt
= sock_no_setsockopt
,
2641 .getsockopt
= sock_no_getsockopt
,
2642 .sendmsg
= packet_sendmsg_spkt
,
2643 .recvmsg
= packet_recvmsg
,
2644 .mmap
= sock_no_mmap
,
2645 .sendpage
= sock_no_sendpage
,
2648 static const struct proto_ops packet_ops
= {
2649 .family
= PF_PACKET
,
2650 .owner
= THIS_MODULE
,
2651 .release
= packet_release
,
2652 .bind
= packet_bind
,
2653 .connect
= sock_no_connect
,
2654 .socketpair
= sock_no_socketpair
,
2655 .accept
= sock_no_accept
,
2656 .getname
= packet_getname
,
2657 .poll
= packet_poll
,
2658 .ioctl
= packet_ioctl
,
2659 .listen
= sock_no_listen
,
2660 .shutdown
= sock_no_shutdown
,
2661 .setsockopt
= packet_setsockopt
,
2662 .getsockopt
= packet_getsockopt
,
2663 .sendmsg
= packet_sendmsg
,
2664 .recvmsg
= packet_recvmsg
,
2665 .mmap
= packet_mmap
,
2666 .sendpage
= sock_no_sendpage
,
2669 static const struct net_proto_family packet_family_ops
= {
2670 .family
= PF_PACKET
,
2671 .create
= packet_create
,
2672 .owner
= THIS_MODULE
,
2675 static struct notifier_block packet_netdev_notifier
= {
2676 .notifier_call
= packet_notifier
,
2679 #ifdef CONFIG_PROC_FS
2681 static void *packet_seq_start(struct seq_file
*seq
, loff_t
*pos
)
2684 struct net
*net
= seq_file_net(seq
);
2687 return seq_hlist_start_head_rcu(&net
->packet
.sklist
, *pos
);
2690 static void *packet_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
2692 struct net
*net
= seq_file_net(seq
);
2693 return seq_hlist_next_rcu(v
, &net
->packet
.sklist
, pos
);
2696 static void packet_seq_stop(struct seq_file
*seq
, void *v
)
2702 static int packet_seq_show(struct seq_file
*seq
, void *v
)
2704 if (v
== SEQ_START_TOKEN
)
2705 seq_puts(seq
, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2707 struct sock
*s
= sk_entry(v
);
2708 const struct packet_sock
*po
= pkt_sk(s
);
2711 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
2713 atomic_read(&s
->sk_refcnt
),
2718 atomic_read(&s
->sk_rmem_alloc
),
2726 static const struct seq_operations packet_seq_ops
= {
2727 .start
= packet_seq_start
,
2728 .next
= packet_seq_next
,
2729 .stop
= packet_seq_stop
,
2730 .show
= packet_seq_show
,
2733 static int packet_seq_open(struct inode
*inode
, struct file
*file
)
2735 return seq_open_net(inode
, file
, &packet_seq_ops
,
2736 sizeof(struct seq_net_private
));
2739 static const struct file_operations packet_seq_fops
= {
2740 .owner
= THIS_MODULE
,
2741 .open
= packet_seq_open
,
2743 .llseek
= seq_lseek
,
2744 .release
= seq_release_net
,
2749 static int __net_init
packet_net_init(struct net
*net
)
2751 spin_lock_init(&net
->packet
.sklist_lock
);
2752 INIT_HLIST_HEAD(&net
->packet
.sklist
);
2754 if (!proc_net_fops_create(net
, "packet", 0, &packet_seq_fops
))
2760 static void __net_exit
packet_net_exit(struct net
*net
)
2762 proc_net_remove(net
, "packet");
2765 static struct pernet_operations packet_net_ops
= {
2766 .init
= packet_net_init
,
2767 .exit
= packet_net_exit
,
2771 static void __exit
packet_exit(void)
2773 unregister_netdevice_notifier(&packet_netdev_notifier
);
2774 unregister_pernet_subsys(&packet_net_ops
);
2775 sock_unregister(PF_PACKET
);
2776 proto_unregister(&packet_proto
);
2779 static int __init
packet_init(void)
2781 int rc
= proto_register(&packet_proto
, 0);
2786 sock_register(&packet_family_ops
);
2787 register_pernet_subsys(&packet_net_ops
);
2788 register_netdevice_notifier(&packet_netdev_notifier
);
2793 module_init(packet_init
);
2794 module_exit(packet_exit
);
2795 MODULE_LICENSE("GPL");
2796 MODULE_ALIAS_NETPROTO(PF_PACKET
);