2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * PACKET - implements raw packet sockets.
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
13 * Alan Cox : verify_area() now used correctly
14 * Alan Cox : new skbuff lists, look ma no backlogs!
15 * Alan Cox : tidied skbuff lists.
16 * Alan Cox : Now uses generic datagram routines I
17 * added. Also fixed the peek/read crash
18 * from all old Linux datagram code.
19 * Alan Cox : Uses the improved datagram code.
20 * Alan Cox : Added NULL's for socket options.
21 * Alan Cox : Re-commented the code.
22 * Alan Cox : Use new kernel side addressing
23 * Rob Janssen : Correct MTU usage.
24 * Dave Platt : Counter leaks caused by incorrect
25 * interrupt locking and some slightly
26 * dubious gcc output. Can you read
27 * compiler: it said _VOLATILE_
28 * Richard Kooijman : Timestamp fixes.
29 * Alan Cox : New buffers. Use sk->mac.raw.
30 * Alan Cox : sendmsg/recvmsg support.
31 * Alan Cox : Protocol setting support
32 * Alexey Kuznetsov : Untied from IPv4 stack.
33 * Cyrus Durgin : Fixed kerneld for kmod.
34 * Michal Ostrowski : Module initialization cleanup.
35 * Ulises Alonso : Frame number limit removal and
36 * packet_set_ring memory leak.
37 * Eric Biederman : Allow for > 8 byte hardware addresses.
38 * The convention is that longer addresses
39 * will simply extend the hardware address
40 * byte arrays at the end of sockaddr_ll
42 * Johann Baudy : Added TX RING.
44 * This program is free software; you can redistribute it and/or
45 * modify it under the terms of the GNU General Public License
46 * as published by the Free Software Foundation; either version
47 * 2 of the License, or (at your option) any later version.
51 #include <linux/types.h>
53 #include <linux/capability.h>
54 #include <linux/fcntl.h>
55 #include <linux/socket.h>
57 #include <linux/inet.h>
58 #include <linux/netdevice.h>
59 #include <linux/if_packet.h>
60 #include <linux/wireless.h>
61 #include <linux/kernel.h>
62 #include <linux/kmod.h>
63 #include <linux/slab.h>
64 #include <linux/vmalloc.h>
65 #include <net/net_namespace.h>
67 #include <net/protocol.h>
68 #include <linux/skbuff.h>
70 #include <linux/errno.h>
71 #include <linux/timer.h>
72 #include <asm/system.h>
73 #include <asm/uaccess.h>
74 #include <asm/ioctls.h>
76 #include <asm/cacheflush.h>
78 #include <linux/proc_fs.h>
79 #include <linux/seq_file.h>
80 #include <linux/poll.h>
81 #include <linux/module.h>
82 #include <linux/init.h>
83 #include <linux/mutex.h>
84 #include <linux/if_vlan.h>
85 #include <linux/virtio_net.h>
86 #include <linux/errqueue.h>
87 #include <linux/net_tstamp.h>
90 #include <net/inet_common.h>
95 - if device has no dev->hard_header routine, it adds and removes ll header
96 inside itself. In this case ll header is invisible outside of device,
97 but higher levels still should reserve dev->hard_header_len.
98 Some devices are enough clever to reallocate skb, when header
99 will not fit to reserved space (tunnel), another ones are silly
101 - packet socket receives packets with pulled ll header,
102 so that SOCK_RAW should push it back.
107 Incoming, dev->hard_header!=NULL
108 mac_header -> ll header
111 Outgoing, dev->hard_header!=NULL
112 mac_header -> ll header
115 Incoming, dev->hard_header==NULL
116 mac_header -> UNKNOWN position. It is very likely, that it points to ll
117 header. PPP makes it, that is wrong, because introduce
118 assymetry between rx and tx paths.
121 Outgoing, dev->hard_header==NULL
122 mac_header -> data. ll header is still not built!
126 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
132 dev->hard_header != NULL
133 mac_header -> ll header
136 dev->hard_header == NULL (ll header is added by device, we cannot control it)
140 We should set nh.raw on output to correct posistion,
141 packet classifier depends on it.
144 /* Private packet socket structures. */
146 struct packet_mclist
{
147 struct packet_mclist
*next
;
152 unsigned char addr
[MAX_ADDR_LEN
];
154 /* identical to struct packet_mreq except it has
155 * a longer address field.
157 struct packet_mreq_max
{
159 unsigned short mr_type
;
160 unsigned short mr_alen
;
161 unsigned char mr_address
[MAX_ADDR_LEN
];
164 static int packet_set_ring(struct sock
*sk
, struct tpacket_req
*req
,
165 int closing
, int tx_ring
);
171 struct packet_ring_buffer
{
174 unsigned int frames_per_block
;
175 unsigned int frame_size
;
176 unsigned int frame_max
;
178 unsigned int pg_vec_order
;
179 unsigned int pg_vec_pages
;
180 unsigned int pg_vec_len
;
186 static int tpacket_snd(struct packet_sock
*po
, struct msghdr
*msg
);
188 static void packet_flush_mclist(struct sock
*sk
);
191 /* struct sock has to be the first member of packet_sock */
193 struct tpacket_stats stats
;
194 struct packet_ring_buffer rx_ring
;
195 struct packet_ring_buffer tx_ring
;
197 spinlock_t bind_lock
;
198 struct mutex pg_vec_lock
;
199 unsigned int running
:1, /* prot_hook is attached*/
203 int ifindex
; /* bound device */
205 struct packet_mclist
*mclist
;
207 enum tpacket_versions tp_version
;
208 unsigned int tp_hdrlen
;
209 unsigned int tp_reserve
;
210 unsigned int tp_loss
:1;
211 unsigned int tp_tstamp
;
212 struct packet_type prot_hook ____cacheline_aligned_in_smp
;
215 struct packet_skb_cb
{
216 unsigned int origlen
;
218 struct sockaddr_pkt pkt
;
219 struct sockaddr_ll ll
;
223 #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
225 static inline struct packet_sock
*pkt_sk(struct sock
*sk
)
227 return (struct packet_sock
*)sk
;
230 /* register_prot_hook must be invoked with the po->bind_lock held,
231 * or from a context in which asynchronous accesses to the packet
232 * socket is not possible (packet_create()).
234 static void register_prot_hook(struct sock
*sk
)
236 struct packet_sock
*po
= pkt_sk(sk
);
238 dev_add_pack(&po
->prot_hook
);
244 /* {,__}unregister_prot_hook() must be invoked with the po->bind_lock
245 * held. If the sync parameter is true, we will temporarily drop
246 * the po->bind_lock and do a synchronize_net to make sure no
247 * asynchronous packet processing paths still refer to the elements
248 * of po->prot_hook. If the sync parameter is false, it is the
249 * callers responsibility to take care of this.
251 static void __unregister_prot_hook(struct sock
*sk
, bool sync
)
253 struct packet_sock
*po
= pkt_sk(sk
);
256 __dev_remove_pack(&po
->prot_hook
);
260 spin_unlock(&po
->bind_lock
);
262 spin_lock(&po
->bind_lock
);
266 static void unregister_prot_hook(struct sock
*sk
, bool sync
)
268 struct packet_sock
*po
= pkt_sk(sk
);
271 __unregister_prot_hook(sk
, sync
);
274 static inline __pure
struct page
*pgv_to_page(void *addr
)
276 if (is_vmalloc_addr(addr
))
277 return vmalloc_to_page(addr
);
278 return virt_to_page(addr
);
281 static void __packet_set_status(struct packet_sock
*po
, void *frame
, int status
)
284 struct tpacket_hdr
*h1
;
285 struct tpacket2_hdr
*h2
;
290 switch (po
->tp_version
) {
292 h
.h1
->tp_status
= status
;
293 flush_dcache_page(pgv_to_page(&h
.h1
->tp_status
));
296 h
.h2
->tp_status
= status
;
297 flush_dcache_page(pgv_to_page(&h
.h2
->tp_status
));
300 pr_err("TPACKET version not supported\n");
307 static int __packet_get_status(struct packet_sock
*po
, void *frame
)
310 struct tpacket_hdr
*h1
;
311 struct tpacket2_hdr
*h2
;
318 switch (po
->tp_version
) {
320 flush_dcache_page(pgv_to_page(&h
.h1
->tp_status
));
321 return h
.h1
->tp_status
;
323 flush_dcache_page(pgv_to_page(&h
.h2
->tp_status
));
324 return h
.h2
->tp_status
;
326 pr_err("TPACKET version not supported\n");
332 static void *packet_lookup_frame(struct packet_sock
*po
,
333 struct packet_ring_buffer
*rb
,
334 unsigned int position
,
337 unsigned int pg_vec_pos
, frame_offset
;
339 struct tpacket_hdr
*h1
;
340 struct tpacket2_hdr
*h2
;
344 pg_vec_pos
= position
/ rb
->frames_per_block
;
345 frame_offset
= position
% rb
->frames_per_block
;
347 h
.raw
= rb
->pg_vec
[pg_vec_pos
].buffer
+
348 (frame_offset
* rb
->frame_size
);
350 if (status
!= __packet_get_status(po
, h
.raw
))
356 static inline void *packet_current_frame(struct packet_sock
*po
,
357 struct packet_ring_buffer
*rb
,
360 return packet_lookup_frame(po
, rb
, rb
->head
, status
);
363 static inline void *packet_previous_frame(struct packet_sock
*po
,
364 struct packet_ring_buffer
*rb
,
367 unsigned int previous
= rb
->head
? rb
->head
- 1 : rb
->frame_max
;
368 return packet_lookup_frame(po
, rb
, previous
, status
);
371 static inline void packet_increment_head(struct packet_ring_buffer
*buff
)
373 buff
->head
= buff
->head
!= buff
->frame_max
? buff
->head
+1 : 0;
376 static void packet_sock_destruct(struct sock
*sk
)
378 skb_queue_purge(&sk
->sk_error_queue
);
380 WARN_ON(atomic_read(&sk
->sk_rmem_alloc
));
381 WARN_ON(atomic_read(&sk
->sk_wmem_alloc
));
383 if (!sock_flag(sk
, SOCK_DEAD
)) {
384 pr_err("Attempt to release alive packet socket: %p\n", sk
);
388 sk_refcnt_debug_dec(sk
);
392 static const struct proto_ops packet_ops
;
394 static const struct proto_ops packet_ops_spkt
;
396 static int packet_rcv_spkt(struct sk_buff
*skb
, struct net_device
*dev
,
397 struct packet_type
*pt
, struct net_device
*orig_dev
)
400 struct sockaddr_pkt
*spkt
;
403 * When we registered the protocol we saved the socket in the data
404 * field for just this event.
407 sk
= pt
->af_packet_priv
;
410 * Yank back the headers [hope the device set this
411 * right or kerboom...]
413 * Incoming packets have ll header pulled,
416 * For outgoing ones skb->data == skb_mac_header(skb)
417 * so that this procedure is noop.
420 if (skb
->pkt_type
== PACKET_LOOPBACK
)
423 if (!net_eq(dev_net(dev
), sock_net(sk
)))
426 skb
= skb_share_check(skb
, GFP_ATOMIC
);
430 /* drop any routing info */
433 /* drop conntrack reference */
436 spkt
= &PACKET_SKB_CB(skb
)->sa
.pkt
;
438 skb_push(skb
, skb
->data
- skb_mac_header(skb
));
441 * The SOCK_PACKET socket receives _all_ frames.
444 spkt
->spkt_family
= dev
->type
;
445 strlcpy(spkt
->spkt_device
, dev
->name
, sizeof(spkt
->spkt_device
));
446 spkt
->spkt_protocol
= skb
->protocol
;
449 * Charge the memory to the socket. This is done specifically
450 * to prevent sockets using all the memory up.
453 if (sock_queue_rcv_skb(sk
, skb
) == 0)
464 * Output a raw packet to a device layer. This bypasses all the other
465 * protocol layers and you must therefore supply it with a complete frame
468 static int packet_sendmsg_spkt(struct kiocb
*iocb
, struct socket
*sock
,
469 struct msghdr
*msg
, size_t len
)
471 struct sock
*sk
= sock
->sk
;
472 struct sockaddr_pkt
*saddr
= (struct sockaddr_pkt
*)msg
->msg_name
;
473 struct sk_buff
*skb
= NULL
;
474 struct net_device
*dev
;
479 * Get and verify the address.
483 if (msg
->msg_namelen
< sizeof(struct sockaddr
))
485 if (msg
->msg_namelen
== sizeof(struct sockaddr_pkt
))
486 proto
= saddr
->spkt_protocol
;
488 return -ENOTCONN
; /* SOCK_PACKET must be sent giving an address */
491 * Find the device first to size check it
494 saddr
->spkt_device
[13] = 0;
497 dev
= dev_get_by_name_rcu(sock_net(sk
), saddr
->spkt_device
);
503 if (!(dev
->flags
& IFF_UP
))
507 * You may not queue a frame bigger than the mtu. This is the lowest level
508 * raw protocol and you must do your own fragmentation at this level.
512 if (len
> dev
->mtu
+ dev
->hard_header_len
+ VLAN_HLEN
)
516 size_t reserved
= LL_RESERVED_SPACE(dev
);
517 unsigned int hhlen
= dev
->header_ops
? dev
->hard_header_len
: 0;
520 skb
= sock_wmalloc(sk
, len
+ reserved
, 0, GFP_KERNEL
);
523 /* FIXME: Save some space for broken drivers that write a hard
524 * header at transmission time by themselves. PPP is the notable
525 * one here. This should really be fixed at the driver level.
527 skb_reserve(skb
, reserved
);
528 skb_reset_network_header(skb
);
530 /* Try to align data part correctly */
535 skb_reset_network_header(skb
);
537 err
= memcpy_fromiovec(skb_put(skb
, len
), msg
->msg_iov
, len
);
543 if (len
> (dev
->mtu
+ dev
->hard_header_len
)) {
544 /* Earlier code assumed this would be a VLAN pkt,
545 * double-check this now that we have the actual
549 skb_reset_mac_header(skb
);
551 if (ehdr
->h_proto
!= htons(ETH_P_8021Q
)) {
557 skb
->protocol
= proto
;
559 skb
->priority
= sk
->sk_priority
;
560 skb
->mark
= sk
->sk_mark
;
561 err
= sock_tx_timestamp(sk
, &skb_shinfo(skb
)->tx_flags
);
576 static inline unsigned int run_filter(const struct sk_buff
*skb
,
577 const struct sock
*sk
,
580 struct sk_filter
*filter
;
583 filter
= rcu_dereference(sk
->sk_filter
);
585 res
= SK_RUN_FILTER(filter
, skb
);
592 * This function makes lazy skb cloning in hope that most of packets
593 * are discarded by BPF.
595 * Note tricky part: we DO mangle shared skb! skb->data, skb->len
596 * and skb->cb are mangled. It works because (and until) packets
597 * falling here are owned by current CPU. Output packets are cloned
598 * by dev_queue_xmit_nit(), input packets are processed by net_bh
599 * sequencially, so that if we return skb to original state on exit,
600 * we will not harm anyone.
603 static int packet_rcv(struct sk_buff
*skb
, struct net_device
*dev
,
604 struct packet_type
*pt
, struct net_device
*orig_dev
)
607 struct sockaddr_ll
*sll
;
608 struct packet_sock
*po
;
609 u8
*skb_head
= skb
->data
;
610 int skb_len
= skb
->len
;
611 unsigned int snaplen
, res
;
613 if (skb
->pkt_type
== PACKET_LOOPBACK
)
616 sk
= pt
->af_packet_priv
;
619 if (!net_eq(dev_net(dev
), sock_net(sk
)))
624 if (dev
->header_ops
) {
625 /* The device has an explicit notion of ll header,
626 * exported to higher levels.
628 * Otherwise, the device hides details of its frame
629 * structure, so that corresponding packet head is
630 * never delivered to user.
632 if (sk
->sk_type
!= SOCK_DGRAM
)
633 skb_push(skb
, skb
->data
- skb_mac_header(skb
));
634 else if (skb
->pkt_type
== PACKET_OUTGOING
) {
635 /* Special case: outgoing packets have ll header at head */
636 skb_pull(skb
, skb_network_offset(skb
));
642 res
= run_filter(skb
, sk
, snaplen
);
648 if (atomic_read(&sk
->sk_rmem_alloc
) + skb
->truesize
>=
649 (unsigned)sk
->sk_rcvbuf
)
652 if (skb_shared(skb
)) {
653 struct sk_buff
*nskb
= skb_clone(skb
, GFP_ATOMIC
);
657 if (skb_head
!= skb
->data
) {
658 skb
->data
= skb_head
;
665 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb
)) + MAX_ADDR_LEN
- 8 >
668 sll
= &PACKET_SKB_CB(skb
)->sa
.ll
;
669 sll
->sll_family
= AF_PACKET
;
670 sll
->sll_hatype
= dev
->type
;
671 sll
->sll_protocol
= skb
->protocol
;
672 sll
->sll_pkttype
= skb
->pkt_type
;
673 if (unlikely(po
->origdev
))
674 sll
->sll_ifindex
= orig_dev
->ifindex
;
676 sll
->sll_ifindex
= dev
->ifindex
;
678 sll
->sll_halen
= dev_parse_header(skb
, sll
->sll_addr
);
680 PACKET_SKB_CB(skb
)->origlen
= skb
->len
;
682 if (pskb_trim(skb
, snaplen
))
685 skb_set_owner_r(skb
, sk
);
689 /* drop conntrack reference */
692 spin_lock(&sk
->sk_receive_queue
.lock
);
693 po
->stats
.tp_packets
++;
694 skb
->dropcount
= atomic_read(&sk
->sk_drops
);
695 __skb_queue_tail(&sk
->sk_receive_queue
, skb
);
696 spin_unlock(&sk
->sk_receive_queue
.lock
);
697 sk
->sk_data_ready(sk
, skb
->len
);
701 po
->stats
.tp_drops
= atomic_inc_return(&sk
->sk_drops
);
704 if (skb_head
!= skb
->data
&& skb_shared(skb
)) {
705 skb
->data
= skb_head
;
713 static int tpacket_rcv(struct sk_buff
*skb
, struct net_device
*dev
,
714 struct packet_type
*pt
, struct net_device
*orig_dev
)
717 struct packet_sock
*po
;
718 struct sockaddr_ll
*sll
;
720 struct tpacket_hdr
*h1
;
721 struct tpacket2_hdr
*h2
;
724 u8
*skb_head
= skb
->data
;
725 int skb_len
= skb
->len
;
726 unsigned int snaplen
, res
;
727 unsigned long status
= TP_STATUS_LOSING
|TP_STATUS_USER
;
728 unsigned short macoff
, netoff
, hdrlen
;
729 struct sk_buff
*copy_skb
= NULL
;
732 struct skb_shared_hwtstamps
*shhwtstamps
= skb_hwtstamps(skb
);
734 if (skb
->pkt_type
== PACKET_LOOPBACK
)
737 sk
= pt
->af_packet_priv
;
740 if (!net_eq(dev_net(dev
), sock_net(sk
)))
743 if (dev
->header_ops
) {
744 if (sk
->sk_type
!= SOCK_DGRAM
)
745 skb_push(skb
, skb
->data
- skb_mac_header(skb
));
746 else if (skb
->pkt_type
== PACKET_OUTGOING
) {
747 /* Special case: outgoing packets have ll header at head */
748 skb_pull(skb
, skb_network_offset(skb
));
752 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
753 status
|= TP_STATUS_CSUMNOTREADY
;
757 res
= run_filter(skb
, sk
, snaplen
);
763 if (sk
->sk_type
== SOCK_DGRAM
) {
764 macoff
= netoff
= TPACKET_ALIGN(po
->tp_hdrlen
) + 16 +
767 unsigned maclen
= skb_network_offset(skb
);
768 netoff
= TPACKET_ALIGN(po
->tp_hdrlen
+
769 (maclen
< 16 ? 16 : maclen
)) +
771 macoff
= netoff
- maclen
;
774 if (macoff
+ snaplen
> po
->rx_ring
.frame_size
) {
775 if (po
->copy_thresh
&&
776 atomic_read(&sk
->sk_rmem_alloc
) + skb
->truesize
<
777 (unsigned)sk
->sk_rcvbuf
) {
778 if (skb_shared(skb
)) {
779 copy_skb
= skb_clone(skb
, GFP_ATOMIC
);
781 copy_skb
= skb_get(skb
);
782 skb_head
= skb
->data
;
785 skb_set_owner_r(copy_skb
, sk
);
787 snaplen
= po
->rx_ring
.frame_size
- macoff
;
788 if ((int)snaplen
< 0)
792 spin_lock(&sk
->sk_receive_queue
.lock
);
793 h
.raw
= packet_current_frame(po
, &po
->rx_ring
, TP_STATUS_KERNEL
);
796 packet_increment_head(&po
->rx_ring
);
797 po
->stats
.tp_packets
++;
799 status
|= TP_STATUS_COPY
;
800 __skb_queue_tail(&sk
->sk_receive_queue
, copy_skb
);
802 if (!po
->stats
.tp_drops
)
803 status
&= ~TP_STATUS_LOSING
;
804 spin_unlock(&sk
->sk_receive_queue
.lock
);
806 skb_copy_bits(skb
, 0, h
.raw
+ macoff
, snaplen
);
808 switch (po
->tp_version
) {
810 h
.h1
->tp_len
= skb
->len
;
811 h
.h1
->tp_snaplen
= snaplen
;
812 h
.h1
->tp_mac
= macoff
;
813 h
.h1
->tp_net
= netoff
;
814 if ((po
->tp_tstamp
& SOF_TIMESTAMPING_SYS_HARDWARE
)
815 && shhwtstamps
->syststamp
.tv64
)
816 tv
= ktime_to_timeval(shhwtstamps
->syststamp
);
817 else if ((po
->tp_tstamp
& SOF_TIMESTAMPING_RAW_HARDWARE
)
818 && shhwtstamps
->hwtstamp
.tv64
)
819 tv
= ktime_to_timeval(shhwtstamps
->hwtstamp
);
820 else if (skb
->tstamp
.tv64
)
821 tv
= ktime_to_timeval(skb
->tstamp
);
823 do_gettimeofday(&tv
);
824 h
.h1
->tp_sec
= tv
.tv_sec
;
825 h
.h1
->tp_usec
= tv
.tv_usec
;
826 hdrlen
= sizeof(*h
.h1
);
829 h
.h2
->tp_len
= skb
->len
;
830 h
.h2
->tp_snaplen
= snaplen
;
831 h
.h2
->tp_mac
= macoff
;
832 h
.h2
->tp_net
= netoff
;
833 if ((po
->tp_tstamp
& SOF_TIMESTAMPING_SYS_HARDWARE
)
834 && shhwtstamps
->syststamp
.tv64
)
835 ts
= ktime_to_timespec(shhwtstamps
->syststamp
);
836 else if ((po
->tp_tstamp
& SOF_TIMESTAMPING_RAW_HARDWARE
)
837 && shhwtstamps
->hwtstamp
.tv64
)
838 ts
= ktime_to_timespec(shhwtstamps
->hwtstamp
);
839 else if (skb
->tstamp
.tv64
)
840 ts
= ktime_to_timespec(skb
->tstamp
);
843 h
.h2
->tp_sec
= ts
.tv_sec
;
844 h
.h2
->tp_nsec
= ts
.tv_nsec
;
845 if (vlan_tx_tag_present(skb
)) {
846 h
.h2
->tp_vlan_tci
= vlan_tx_tag_get(skb
);
847 status
|= TP_STATUS_VLAN_VALID
;
849 h
.h2
->tp_vlan_tci
= 0;
851 h
.h2
->tp_padding
= 0;
852 hdrlen
= sizeof(*h
.h2
);
858 sll
= h
.raw
+ TPACKET_ALIGN(hdrlen
);
859 sll
->sll_halen
= dev_parse_header(skb
, sll
->sll_addr
);
860 sll
->sll_family
= AF_PACKET
;
861 sll
->sll_hatype
= dev
->type
;
862 sll
->sll_protocol
= skb
->protocol
;
863 sll
->sll_pkttype
= skb
->pkt_type
;
864 if (unlikely(po
->origdev
))
865 sll
->sll_ifindex
= orig_dev
->ifindex
;
867 sll
->sll_ifindex
= dev
->ifindex
;
869 __packet_set_status(po
, h
.raw
, status
);
871 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
875 end
= (u8
*)PAGE_ALIGN((unsigned long)h
.raw
+ macoff
+ snaplen
);
876 for (start
= h
.raw
; start
< end
; start
+= PAGE_SIZE
)
877 flush_dcache_page(pgv_to_page(start
));
881 sk
->sk_data_ready(sk
, 0);
884 if (skb_head
!= skb
->data
&& skb_shared(skb
)) {
885 skb
->data
= skb_head
;
893 po
->stats
.tp_drops
++;
894 spin_unlock(&sk
->sk_receive_queue
.lock
);
896 sk
->sk_data_ready(sk
, 0);
901 static void tpacket_destruct_skb(struct sk_buff
*skb
)
903 struct packet_sock
*po
= pkt_sk(skb
->sk
);
908 if (likely(po
->tx_ring
.pg_vec
)) {
909 ph
= skb_shinfo(skb
)->destructor_arg
;
910 BUG_ON(__packet_get_status(po
, ph
) != TP_STATUS_SENDING
);
911 BUG_ON(atomic_read(&po
->tx_ring
.pending
) == 0);
912 atomic_dec(&po
->tx_ring
.pending
);
913 __packet_set_status(po
, ph
, TP_STATUS_AVAILABLE
);
919 static int tpacket_fill_skb(struct packet_sock
*po
, struct sk_buff
*skb
,
920 void *frame
, struct net_device
*dev
, int size_max
,
921 __be16 proto
, unsigned char *addr
)
924 struct tpacket_hdr
*h1
;
925 struct tpacket2_hdr
*h2
;
928 int to_write
, offset
, len
, tp_len
, nr_frags
, len_max
;
929 struct socket
*sock
= po
->sk
.sk_socket
;
936 skb
->protocol
= proto
;
938 skb
->priority
= po
->sk
.sk_priority
;
939 skb
->mark
= po
->sk
.sk_mark
;
940 skb_shinfo(skb
)->destructor_arg
= ph
.raw
;
942 switch (po
->tp_version
) {
944 tp_len
= ph
.h2
->tp_len
;
947 tp_len
= ph
.h1
->tp_len
;
950 if (unlikely(tp_len
> size_max
)) {
951 pr_err("packet size is too long (%d > %d)\n", tp_len
, size_max
);
955 skb_reserve(skb
, LL_RESERVED_SPACE(dev
));
956 skb_reset_network_header(skb
);
958 data
= ph
.raw
+ po
->tp_hdrlen
- sizeof(struct sockaddr_ll
);
961 if (sock
->type
== SOCK_DGRAM
) {
962 err
= dev_hard_header(skb
, dev
, ntohs(proto
), addr
,
964 if (unlikely(err
< 0))
966 } else if (dev
->hard_header_len
) {
967 /* net device doesn't like empty head */
968 if (unlikely(tp_len
<= dev
->hard_header_len
)) {
969 pr_err("packet size is too short (%d < %d)\n",
970 tp_len
, dev
->hard_header_len
);
974 skb_push(skb
, dev
->hard_header_len
);
975 err
= skb_store_bits(skb
, 0, data
,
976 dev
->hard_header_len
);
980 data
+= dev
->hard_header_len
;
981 to_write
-= dev
->hard_header_len
;
985 offset
= offset_in_page(data
);
986 len_max
= PAGE_SIZE
- offset
;
987 len
= ((to_write
> len_max
) ? len_max
: to_write
);
989 skb
->data_len
= to_write
;
990 skb
->len
+= to_write
;
991 skb
->truesize
+= to_write
;
992 atomic_add(to_write
, &po
->sk
.sk_wmem_alloc
);
994 while (likely(to_write
)) {
995 nr_frags
= skb_shinfo(skb
)->nr_frags
;
997 if (unlikely(nr_frags
>= MAX_SKB_FRAGS
)) {
998 pr_err("Packet exceed the number of skb frags(%lu)\n",
1003 page
= pgv_to_page(data
);
1005 flush_dcache_page(page
);
1007 skb_fill_page_desc(skb
, nr_frags
, page
, offset
, len
);
1010 len_max
= PAGE_SIZE
;
1011 len
= ((to_write
> len_max
) ? len_max
: to_write
);
1017 static int tpacket_snd(struct packet_sock
*po
, struct msghdr
*msg
)
1019 struct sk_buff
*skb
;
1020 struct net_device
*dev
;
1022 bool need_rls_dev
= false;
1023 int err
, reserve
= 0;
1025 struct sockaddr_ll
*saddr
= (struct sockaddr_ll
*)msg
->msg_name
;
1026 int tp_len
, size_max
;
1027 unsigned char *addr
;
1031 mutex_lock(&po
->pg_vec_lock
);
1034 if (saddr
== NULL
) {
1035 dev
= po
->prot_hook
.dev
;
1040 if (msg
->msg_namelen
< sizeof(struct sockaddr_ll
))
1042 if (msg
->msg_namelen
< (saddr
->sll_halen
1043 + offsetof(struct sockaddr_ll
,
1046 proto
= saddr
->sll_protocol
;
1047 addr
= saddr
->sll_addr
;
1048 dev
= dev_get_by_index(sock_net(&po
->sk
), saddr
->sll_ifindex
);
1049 need_rls_dev
= true;
1053 if (unlikely(dev
== NULL
))
1056 reserve
= dev
->hard_header_len
;
1059 if (unlikely(!(dev
->flags
& IFF_UP
)))
1062 size_max
= po
->tx_ring
.frame_size
1063 - (po
->tp_hdrlen
- sizeof(struct sockaddr_ll
));
1065 if (size_max
> dev
->mtu
+ reserve
)
1066 size_max
= dev
->mtu
+ reserve
;
1069 ph
= packet_current_frame(po
, &po
->tx_ring
,
1070 TP_STATUS_SEND_REQUEST
);
1072 if (unlikely(ph
== NULL
)) {
1077 status
= TP_STATUS_SEND_REQUEST
;
1078 skb
= sock_alloc_send_skb(&po
->sk
,
1079 LL_ALLOCATED_SPACE(dev
)
1080 + sizeof(struct sockaddr_ll
),
1083 if (unlikely(skb
== NULL
))
1086 tp_len
= tpacket_fill_skb(po
, skb
, ph
, dev
, size_max
, proto
,
1089 if (unlikely(tp_len
< 0)) {
1091 __packet_set_status(po
, ph
,
1092 TP_STATUS_AVAILABLE
);
1093 packet_increment_head(&po
->tx_ring
);
1097 status
= TP_STATUS_WRONG_FORMAT
;
1103 skb
->destructor
= tpacket_destruct_skb
;
1104 __packet_set_status(po
, ph
, TP_STATUS_SENDING
);
1105 atomic_inc(&po
->tx_ring
.pending
);
1107 status
= TP_STATUS_SEND_REQUEST
;
1108 err
= dev_queue_xmit(skb
);
1109 if (unlikely(err
> 0)) {
1110 err
= net_xmit_errno(err
);
1111 if (err
&& __packet_get_status(po
, ph
) ==
1112 TP_STATUS_AVAILABLE
) {
1113 /* skb was destructed already */
1118 * skb was dropped but not destructed yet;
1119 * let's treat it like congestion or err < 0
1123 packet_increment_head(&po
->tx_ring
);
1125 } while (likely((ph
!= NULL
) ||
1126 ((!(msg
->msg_flags
& MSG_DONTWAIT
)) &&
1127 (atomic_read(&po
->tx_ring
.pending
))))
1134 __packet_set_status(po
, ph
, status
);
1140 mutex_unlock(&po
->pg_vec_lock
);
1144 static inline struct sk_buff
*packet_alloc_skb(struct sock
*sk
, size_t prepad
,
1145 size_t reserve
, size_t len
,
1146 size_t linear
, int noblock
,
1149 struct sk_buff
*skb
;
1151 /* Under a page? Don't bother with paged skb. */
1152 if (prepad
+ len
< PAGE_SIZE
|| !linear
)
1155 skb
= sock_alloc_send_pskb(sk
, prepad
+ linear
, len
- linear
, noblock
,
1160 skb_reserve(skb
, reserve
);
1161 skb_put(skb
, linear
);
1162 skb
->data_len
= len
- linear
;
1163 skb
->len
+= len
- linear
;
1168 static int packet_snd(struct socket
*sock
,
1169 struct msghdr
*msg
, size_t len
)
1171 struct sock
*sk
= sock
->sk
;
1172 struct sockaddr_ll
*saddr
= (struct sockaddr_ll
*)msg
->msg_name
;
1173 struct sk_buff
*skb
;
1174 struct net_device
*dev
;
1176 bool need_rls_dev
= false;
1177 unsigned char *addr
;
1178 int err
, reserve
= 0;
1179 struct virtio_net_hdr vnet_hdr
= { 0 };
1182 struct packet_sock
*po
= pkt_sk(sk
);
1183 unsigned short gso_type
= 0;
1186 * Get and verify the address.
1189 if (saddr
== NULL
) {
1190 dev
= po
->prot_hook
.dev
;
1195 if (msg
->msg_namelen
< sizeof(struct sockaddr_ll
))
1197 if (msg
->msg_namelen
< (saddr
->sll_halen
+ offsetof(struct sockaddr_ll
, sll_addr
)))
1199 proto
= saddr
->sll_protocol
;
1200 addr
= saddr
->sll_addr
;
1201 dev
= dev_get_by_index(sock_net(sk
), saddr
->sll_ifindex
);
1202 need_rls_dev
= true;
1208 if (sock
->type
== SOCK_RAW
)
1209 reserve
= dev
->hard_header_len
;
1212 if (!(dev
->flags
& IFF_UP
))
1215 if (po
->has_vnet_hdr
) {
1216 vnet_hdr_len
= sizeof(vnet_hdr
);
1219 if (len
< vnet_hdr_len
)
1222 len
-= vnet_hdr_len
;
1224 err
= memcpy_fromiovec((void *)&vnet_hdr
, msg
->msg_iov
,
1229 if ((vnet_hdr
.flags
& VIRTIO_NET_HDR_F_NEEDS_CSUM
) &&
1230 (vnet_hdr
.csum_start
+ vnet_hdr
.csum_offset
+ 2 >
1232 vnet_hdr
.hdr_len
= vnet_hdr
.csum_start
+
1233 vnet_hdr
.csum_offset
+ 2;
1236 if (vnet_hdr
.hdr_len
> len
)
1239 if (vnet_hdr
.gso_type
!= VIRTIO_NET_HDR_GSO_NONE
) {
1240 switch (vnet_hdr
.gso_type
& ~VIRTIO_NET_HDR_GSO_ECN
) {
1241 case VIRTIO_NET_HDR_GSO_TCPV4
:
1242 gso_type
= SKB_GSO_TCPV4
;
1244 case VIRTIO_NET_HDR_GSO_TCPV6
:
1245 gso_type
= SKB_GSO_TCPV6
;
1247 case VIRTIO_NET_HDR_GSO_UDP
:
1248 gso_type
= SKB_GSO_UDP
;
1254 if (vnet_hdr
.gso_type
& VIRTIO_NET_HDR_GSO_ECN
)
1255 gso_type
|= SKB_GSO_TCP_ECN
;
1257 if (vnet_hdr
.gso_size
== 0)
1264 if (!gso_type
&& (len
> dev
->mtu
+ reserve
+ VLAN_HLEN
))
1268 skb
= packet_alloc_skb(sk
, LL_ALLOCATED_SPACE(dev
),
1269 LL_RESERVED_SPACE(dev
), len
, vnet_hdr
.hdr_len
,
1270 msg
->msg_flags
& MSG_DONTWAIT
, &err
);
1274 skb_set_network_header(skb
, reserve
);
1277 if (sock
->type
== SOCK_DGRAM
&&
1278 (offset
= dev_hard_header(skb
, dev
, ntohs(proto
), addr
, NULL
, len
)) < 0)
1281 /* Returns -EFAULT on error */
1282 err
= skb_copy_datagram_from_iovec(skb
, offset
, msg
->msg_iov
, 0, len
);
1285 err
= sock_tx_timestamp(sk
, &skb_shinfo(skb
)->tx_flags
);
1289 if (!gso_type
&& (len
> dev
->mtu
+ reserve
)) {
1290 /* Earlier code assumed this would be a VLAN pkt,
1291 * double-check this now that we have the actual
1294 struct ethhdr
*ehdr
;
1295 skb_reset_mac_header(skb
);
1296 ehdr
= eth_hdr(skb
);
1297 if (ehdr
->h_proto
!= htons(ETH_P_8021Q
)) {
1303 skb
->protocol
= proto
;
1305 skb
->priority
= sk
->sk_priority
;
1306 skb
->mark
= sk
->sk_mark
;
1308 if (po
->has_vnet_hdr
) {
1309 if (vnet_hdr
.flags
& VIRTIO_NET_HDR_F_NEEDS_CSUM
) {
1310 if (!skb_partial_csum_set(skb
, vnet_hdr
.csum_start
,
1311 vnet_hdr
.csum_offset
)) {
1317 skb_shinfo(skb
)->gso_size
= vnet_hdr
.gso_size
;
1318 skb_shinfo(skb
)->gso_type
= gso_type
;
1320 /* Header must be checked, and gso_segs computed. */
1321 skb_shinfo(skb
)->gso_type
|= SKB_GSO_DODGY
;
1322 skb_shinfo(skb
)->gso_segs
= 0;
1324 len
+= vnet_hdr_len
;
1331 err
= dev_queue_xmit(skb
);
1332 if (err
> 0 && (err
= net_xmit_errno(err
)) != 0)
1343 if (dev
&& need_rls_dev
)
1349 static int packet_sendmsg(struct kiocb
*iocb
, struct socket
*sock
,
1350 struct msghdr
*msg
, size_t len
)
1352 struct sock
*sk
= sock
->sk
;
1353 struct packet_sock
*po
= pkt_sk(sk
);
1354 if (po
->tx_ring
.pg_vec
)
1355 return tpacket_snd(po
, msg
);
1357 return packet_snd(sock
, msg
, len
);
1361 * Close a PACKET socket. This is fairly simple. We immediately go
1362 * to 'closed' state and remove our protocol entry in the device list.
1365 static int packet_release(struct socket
*sock
)
1367 struct sock
*sk
= sock
->sk
;
1368 struct packet_sock
*po
;
1370 struct tpacket_req req
;
1378 spin_lock_bh(&net
->packet
.sklist_lock
);
1379 sk_del_node_init_rcu(sk
);
1380 sock_prot_inuse_add(net
, sk
->sk_prot
, -1);
1381 spin_unlock_bh(&net
->packet
.sklist_lock
);
1383 spin_lock(&po
->bind_lock
);
1384 unregister_prot_hook(sk
, false);
1385 if (po
->prot_hook
.dev
) {
1386 dev_put(po
->prot_hook
.dev
);
1387 po
->prot_hook
.dev
= NULL
;
1389 spin_unlock(&po
->bind_lock
);
1391 packet_flush_mclist(sk
);
1393 memset(&req
, 0, sizeof(req
));
1395 if (po
->rx_ring
.pg_vec
)
1396 packet_set_ring(sk
, &req
, 1, 0);
1398 if (po
->tx_ring
.pg_vec
)
1399 packet_set_ring(sk
, &req
, 1, 1);
1403 * Now the socket is dead. No more input will appear.
1410 skb_queue_purge(&sk
->sk_receive_queue
);
1411 sk_refcnt_debug_release(sk
);
1418 * Attach a packet hook.
1421 static int packet_do_bind(struct sock
*sk
, struct net_device
*dev
, __be16 protocol
)
1423 struct packet_sock
*po
= pkt_sk(sk
);
1425 * Detach an existing hook if present.
1430 spin_lock(&po
->bind_lock
);
1431 unregister_prot_hook(sk
, true);
1433 po
->prot_hook
.type
= protocol
;
1434 if (po
->prot_hook
.dev
)
1435 dev_put(po
->prot_hook
.dev
);
1436 po
->prot_hook
.dev
= dev
;
1438 po
->ifindex
= dev
? dev
->ifindex
: 0;
1443 if (!dev
|| (dev
->flags
& IFF_UP
)) {
1444 register_prot_hook(sk
);
1446 sk
->sk_err
= ENETDOWN
;
1447 if (!sock_flag(sk
, SOCK_DEAD
))
1448 sk
->sk_error_report(sk
);
1452 spin_unlock(&po
->bind_lock
);
1458 * Bind a packet socket to a device
1461 static int packet_bind_spkt(struct socket
*sock
, struct sockaddr
*uaddr
,
1464 struct sock
*sk
= sock
->sk
;
1466 struct net_device
*dev
;
1473 if (addr_len
!= sizeof(struct sockaddr
))
1475 strlcpy(name
, uaddr
->sa_data
, sizeof(name
));
1477 dev
= dev_get_by_name(sock_net(sk
), name
);
1479 err
= packet_do_bind(sk
, dev
, pkt_sk(sk
)->num
);
1483 static int packet_bind(struct socket
*sock
, struct sockaddr
*uaddr
, int addr_len
)
1485 struct sockaddr_ll
*sll
= (struct sockaddr_ll
*)uaddr
;
1486 struct sock
*sk
= sock
->sk
;
1487 struct net_device
*dev
= NULL
;
1495 if (addr_len
< sizeof(struct sockaddr_ll
))
1497 if (sll
->sll_family
!= AF_PACKET
)
1500 if (sll
->sll_ifindex
) {
1502 dev
= dev_get_by_index(sock_net(sk
), sll
->sll_ifindex
);
1506 err
= packet_do_bind(sk
, dev
, sll
->sll_protocol
? : pkt_sk(sk
)->num
);
1512 static struct proto packet_proto
= {
1514 .owner
= THIS_MODULE
,
1515 .obj_size
= sizeof(struct packet_sock
),
1519 * Create a packet of type SOCK_PACKET.
1522 static int packet_create(struct net
*net
, struct socket
*sock
, int protocol
,
1526 struct packet_sock
*po
;
1527 __be16 proto
= (__force __be16
)protocol
; /* weird, but documented */
1530 if (!capable(CAP_NET_RAW
))
1532 if (sock
->type
!= SOCK_DGRAM
&& sock
->type
!= SOCK_RAW
&&
1533 sock
->type
!= SOCK_PACKET
)
1534 return -ESOCKTNOSUPPORT
;
1536 sock
->state
= SS_UNCONNECTED
;
1539 sk
= sk_alloc(net
, PF_PACKET
, GFP_KERNEL
, &packet_proto
);
1543 sock
->ops
= &packet_ops
;
1544 if (sock
->type
== SOCK_PACKET
)
1545 sock
->ops
= &packet_ops_spkt
;
1547 sock_init_data(sock
, sk
);
1550 sk
->sk_family
= PF_PACKET
;
1553 sk
->sk_destruct
= packet_sock_destruct
;
1554 sk_refcnt_debug_inc(sk
);
1557 * Attach a protocol block
1560 spin_lock_init(&po
->bind_lock
);
1561 mutex_init(&po
->pg_vec_lock
);
1562 po
->prot_hook
.func
= packet_rcv
;
1564 if (sock
->type
== SOCK_PACKET
)
1565 po
->prot_hook
.func
= packet_rcv_spkt
;
1567 po
->prot_hook
.af_packet_priv
= sk
;
1570 po
->prot_hook
.type
= proto
;
1571 register_prot_hook(sk
);
1574 spin_lock_bh(&net
->packet
.sklist_lock
);
1575 sk_add_node_rcu(sk
, &net
->packet
.sklist
);
1576 sock_prot_inuse_add(net
, &packet_proto
, 1);
1577 spin_unlock_bh(&net
->packet
.sklist_lock
);
1584 static int packet_recv_error(struct sock
*sk
, struct msghdr
*msg
, int len
)
1586 struct sock_exterr_skb
*serr
;
1587 struct sk_buff
*skb
, *skb2
;
1591 skb
= skb_dequeue(&sk
->sk_error_queue
);
1597 msg
->msg_flags
|= MSG_TRUNC
;
1600 err
= skb_copy_datagram_iovec(skb
, 0, msg
->msg_iov
, copied
);
1604 sock_recv_timestamp(msg
, sk
, skb
);
1606 serr
= SKB_EXT_ERR(skb
);
1607 put_cmsg(msg
, SOL_PACKET
, PACKET_TX_TIMESTAMP
,
1608 sizeof(serr
->ee
), &serr
->ee
);
1610 msg
->msg_flags
|= MSG_ERRQUEUE
;
1613 /* Reset and regenerate socket error */
1614 spin_lock_bh(&sk
->sk_error_queue
.lock
);
1616 if ((skb2
= skb_peek(&sk
->sk_error_queue
)) != NULL
) {
1617 sk
->sk_err
= SKB_EXT_ERR(skb2
)->ee
.ee_errno
;
1618 spin_unlock_bh(&sk
->sk_error_queue
.lock
);
1619 sk
->sk_error_report(sk
);
1621 spin_unlock_bh(&sk
->sk_error_queue
.lock
);
1630 * Pull a packet from our receive queue and hand it to the user.
1631 * If necessary we block.
1634 static int packet_recvmsg(struct kiocb
*iocb
, struct socket
*sock
,
1635 struct msghdr
*msg
, size_t len
, int flags
)
1637 struct sock
*sk
= sock
->sk
;
1638 struct sk_buff
*skb
;
1640 struct sockaddr_ll
*sll
;
1641 int vnet_hdr_len
= 0;
1644 if (flags
& ~(MSG_PEEK
|MSG_DONTWAIT
|MSG_TRUNC
|MSG_CMSG_COMPAT
|MSG_ERRQUEUE
))
1648 /* What error should we return now? EUNATTACH? */
1649 if (pkt_sk(sk
)->ifindex
< 0)
1653 if (flags
& MSG_ERRQUEUE
) {
1654 err
= packet_recv_error(sk
, msg
, len
);
1659 * Call the generic datagram receiver. This handles all sorts
1660 * of horrible races and re-entrancy so we can forget about it
1661 * in the protocol layers.
1663 * Now it will return ENETDOWN, if device have just gone down,
1664 * but then it will block.
1667 skb
= skb_recv_datagram(sk
, flags
, flags
& MSG_DONTWAIT
, &err
);
1670 * An error occurred so return it. Because skb_recv_datagram()
1671 * handles the blocking we don't see and worry about blocking
1678 if (pkt_sk(sk
)->has_vnet_hdr
) {
1679 struct virtio_net_hdr vnet_hdr
= { 0 };
1682 vnet_hdr_len
= sizeof(vnet_hdr
);
1683 if (len
< vnet_hdr_len
)
1686 len
-= vnet_hdr_len
;
1688 if (skb_is_gso(skb
)) {
1689 struct skb_shared_info
*sinfo
= skb_shinfo(skb
);
1691 /* This is a hint as to how much should be linear. */
1692 vnet_hdr
.hdr_len
= skb_headlen(skb
);
1693 vnet_hdr
.gso_size
= sinfo
->gso_size
;
1694 if (sinfo
->gso_type
& SKB_GSO_TCPV4
)
1695 vnet_hdr
.gso_type
= VIRTIO_NET_HDR_GSO_TCPV4
;
1696 else if (sinfo
->gso_type
& SKB_GSO_TCPV6
)
1697 vnet_hdr
.gso_type
= VIRTIO_NET_HDR_GSO_TCPV6
;
1698 else if (sinfo
->gso_type
& SKB_GSO_UDP
)
1699 vnet_hdr
.gso_type
= VIRTIO_NET_HDR_GSO_UDP
;
1700 else if (sinfo
->gso_type
& SKB_GSO_FCOE
)
1704 if (sinfo
->gso_type
& SKB_GSO_TCP_ECN
)
1705 vnet_hdr
.gso_type
|= VIRTIO_NET_HDR_GSO_ECN
;
1707 vnet_hdr
.gso_type
= VIRTIO_NET_HDR_GSO_NONE
;
1709 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) {
1710 vnet_hdr
.flags
= VIRTIO_NET_HDR_F_NEEDS_CSUM
;
1711 vnet_hdr
.csum_start
= skb_checksum_start_offset(skb
);
1712 vnet_hdr
.csum_offset
= skb
->csum_offset
;
1713 } else if (skb
->ip_summed
== CHECKSUM_UNNECESSARY
) {
1714 vnet_hdr
.flags
= VIRTIO_NET_HDR_F_DATA_VALID
;
1715 } /* else everything is zero */
1717 err
= memcpy_toiovec(msg
->msg_iov
, (void *)&vnet_hdr
,
1724 * If the address length field is there to be filled in, we fill
1728 sll
= &PACKET_SKB_CB(skb
)->sa
.ll
;
1729 if (sock
->type
== SOCK_PACKET
)
1730 msg
->msg_namelen
= sizeof(struct sockaddr_pkt
);
1732 msg
->msg_namelen
= sll
->sll_halen
+ offsetof(struct sockaddr_ll
, sll_addr
);
1735 * You lose any data beyond the buffer you gave. If it worries a
1736 * user program they can ask the device for its MTU anyway.
1742 msg
->msg_flags
|= MSG_TRUNC
;
1745 err
= skb_copy_datagram_iovec(skb
, 0, msg
->msg_iov
, copied
);
1749 sock_recv_ts_and_drops(msg
, sk
, skb
);
1752 memcpy(msg
->msg_name
, &PACKET_SKB_CB(skb
)->sa
,
1755 if (pkt_sk(sk
)->auxdata
) {
1756 struct tpacket_auxdata aux
;
1758 aux
.tp_status
= TP_STATUS_USER
;
1759 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
1760 aux
.tp_status
|= TP_STATUS_CSUMNOTREADY
;
1761 aux
.tp_len
= PACKET_SKB_CB(skb
)->origlen
;
1762 aux
.tp_snaplen
= skb
->len
;
1764 aux
.tp_net
= skb_network_offset(skb
);
1765 if (vlan_tx_tag_present(skb
)) {
1766 aux
.tp_vlan_tci
= vlan_tx_tag_get(skb
);
1767 aux
.tp_status
|= TP_STATUS_VLAN_VALID
;
1769 aux
.tp_vlan_tci
= 0;
1772 put_cmsg(msg
, SOL_PACKET
, PACKET_AUXDATA
, sizeof(aux
), &aux
);
1776 * Free or return the buffer as appropriate. Again this
1777 * hides all the races and re-entrancy issues from us.
1779 err
= vnet_hdr_len
+ ((flags
&MSG_TRUNC
) ? skb
->len
: copied
);
1782 skb_free_datagram(sk
, skb
);
1787 static int packet_getname_spkt(struct socket
*sock
, struct sockaddr
*uaddr
,
1788 int *uaddr_len
, int peer
)
1790 struct net_device
*dev
;
1791 struct sock
*sk
= sock
->sk
;
1796 uaddr
->sa_family
= AF_PACKET
;
1798 dev
= dev_get_by_index_rcu(sock_net(sk
), pkt_sk(sk
)->ifindex
);
1800 strncpy(uaddr
->sa_data
, dev
->name
, 14);
1802 memset(uaddr
->sa_data
, 0, 14);
1804 *uaddr_len
= sizeof(*uaddr
);
1809 static int packet_getname(struct socket
*sock
, struct sockaddr
*uaddr
,
1810 int *uaddr_len
, int peer
)
1812 struct net_device
*dev
;
1813 struct sock
*sk
= sock
->sk
;
1814 struct packet_sock
*po
= pkt_sk(sk
);
1815 DECLARE_SOCKADDR(struct sockaddr_ll
*, sll
, uaddr
);
1820 sll
->sll_family
= AF_PACKET
;
1821 sll
->sll_ifindex
= po
->ifindex
;
1822 sll
->sll_protocol
= po
->num
;
1823 sll
->sll_pkttype
= 0;
1825 dev
= dev_get_by_index_rcu(sock_net(sk
), po
->ifindex
);
1827 sll
->sll_hatype
= dev
->type
;
1828 sll
->sll_halen
= dev
->addr_len
;
1829 memcpy(sll
->sll_addr
, dev
->dev_addr
, dev
->addr_len
);
1831 sll
->sll_hatype
= 0; /* Bad: we have no ARPHRD_UNSPEC */
1835 *uaddr_len
= offsetof(struct sockaddr_ll
, sll_addr
) + sll
->sll_halen
;
1840 static int packet_dev_mc(struct net_device
*dev
, struct packet_mclist
*i
,
1844 case PACKET_MR_MULTICAST
:
1845 if (i
->alen
!= dev
->addr_len
)
1848 return dev_mc_add(dev
, i
->addr
);
1850 return dev_mc_del(dev
, i
->addr
);
1852 case PACKET_MR_PROMISC
:
1853 return dev_set_promiscuity(dev
, what
);
1855 case PACKET_MR_ALLMULTI
:
1856 return dev_set_allmulti(dev
, what
);
1858 case PACKET_MR_UNICAST
:
1859 if (i
->alen
!= dev
->addr_len
)
1862 return dev_uc_add(dev
, i
->addr
);
1864 return dev_uc_del(dev
, i
->addr
);
1872 static void packet_dev_mclist(struct net_device
*dev
, struct packet_mclist
*i
, int what
)
1874 for ( ; i
; i
= i
->next
) {
1875 if (i
->ifindex
== dev
->ifindex
)
1876 packet_dev_mc(dev
, i
, what
);
1880 static int packet_mc_add(struct sock
*sk
, struct packet_mreq_max
*mreq
)
1882 struct packet_sock
*po
= pkt_sk(sk
);
1883 struct packet_mclist
*ml
, *i
;
1884 struct net_device
*dev
;
1890 dev
= __dev_get_by_index(sock_net(sk
), mreq
->mr_ifindex
);
1895 if (mreq
->mr_alen
> dev
->addr_len
)
1899 i
= kmalloc(sizeof(*i
), GFP_KERNEL
);
1904 for (ml
= po
->mclist
; ml
; ml
= ml
->next
) {
1905 if (ml
->ifindex
== mreq
->mr_ifindex
&&
1906 ml
->type
== mreq
->mr_type
&&
1907 ml
->alen
== mreq
->mr_alen
&&
1908 memcmp(ml
->addr
, mreq
->mr_address
, ml
->alen
) == 0) {
1910 /* Free the new element ... */
1916 i
->type
= mreq
->mr_type
;
1917 i
->ifindex
= mreq
->mr_ifindex
;
1918 i
->alen
= mreq
->mr_alen
;
1919 memcpy(i
->addr
, mreq
->mr_address
, i
->alen
);
1921 i
->next
= po
->mclist
;
1923 err
= packet_dev_mc(dev
, i
, 1);
1925 po
->mclist
= i
->next
;
1934 static int packet_mc_drop(struct sock
*sk
, struct packet_mreq_max
*mreq
)
1936 struct packet_mclist
*ml
, **mlp
;
1940 for (mlp
= &pkt_sk(sk
)->mclist
; (ml
= *mlp
) != NULL
; mlp
= &ml
->next
) {
1941 if (ml
->ifindex
== mreq
->mr_ifindex
&&
1942 ml
->type
== mreq
->mr_type
&&
1943 ml
->alen
== mreq
->mr_alen
&&
1944 memcmp(ml
->addr
, mreq
->mr_address
, ml
->alen
) == 0) {
1945 if (--ml
->count
== 0) {
1946 struct net_device
*dev
;
1948 dev
= __dev_get_by_index(sock_net(sk
), ml
->ifindex
);
1950 packet_dev_mc(dev
, ml
, -1);
1958 return -EADDRNOTAVAIL
;
1961 static void packet_flush_mclist(struct sock
*sk
)
1963 struct packet_sock
*po
= pkt_sk(sk
);
1964 struct packet_mclist
*ml
;
1970 while ((ml
= po
->mclist
) != NULL
) {
1971 struct net_device
*dev
;
1973 po
->mclist
= ml
->next
;
1974 dev
= __dev_get_by_index(sock_net(sk
), ml
->ifindex
);
1976 packet_dev_mc(dev
, ml
, -1);
1983 packet_setsockopt(struct socket
*sock
, int level
, int optname
, char __user
*optval
, unsigned int optlen
)
1985 struct sock
*sk
= sock
->sk
;
1986 struct packet_sock
*po
= pkt_sk(sk
);
1989 if (level
!= SOL_PACKET
)
1990 return -ENOPROTOOPT
;
1993 case PACKET_ADD_MEMBERSHIP
:
1994 case PACKET_DROP_MEMBERSHIP
:
1996 struct packet_mreq_max mreq
;
1998 memset(&mreq
, 0, sizeof(mreq
));
1999 if (len
< sizeof(struct packet_mreq
))
2001 if (len
> sizeof(mreq
))
2003 if (copy_from_user(&mreq
, optval
, len
))
2005 if (len
< (mreq
.mr_alen
+ offsetof(struct packet_mreq
, mr_address
)))
2007 if (optname
== PACKET_ADD_MEMBERSHIP
)
2008 ret
= packet_mc_add(sk
, &mreq
);
2010 ret
= packet_mc_drop(sk
, &mreq
);
2014 case PACKET_RX_RING
:
2015 case PACKET_TX_RING
:
2017 struct tpacket_req req
;
2019 if (optlen
< sizeof(req
))
2021 if (pkt_sk(sk
)->has_vnet_hdr
)
2023 if (copy_from_user(&req
, optval
, sizeof(req
)))
2025 return packet_set_ring(sk
, &req
, 0, optname
== PACKET_TX_RING
);
2027 case PACKET_COPY_THRESH
:
2031 if (optlen
!= sizeof(val
))
2033 if (copy_from_user(&val
, optval
, sizeof(val
)))
2036 pkt_sk(sk
)->copy_thresh
= val
;
2039 case PACKET_VERSION
:
2043 if (optlen
!= sizeof(val
))
2045 if (po
->rx_ring
.pg_vec
|| po
->tx_ring
.pg_vec
)
2047 if (copy_from_user(&val
, optval
, sizeof(val
)))
2052 po
->tp_version
= val
;
2058 case PACKET_RESERVE
:
2062 if (optlen
!= sizeof(val
))
2064 if (po
->rx_ring
.pg_vec
|| po
->tx_ring
.pg_vec
)
2066 if (copy_from_user(&val
, optval
, sizeof(val
)))
2068 po
->tp_reserve
= val
;
2075 if (optlen
!= sizeof(val
))
2077 if (po
->rx_ring
.pg_vec
|| po
->tx_ring
.pg_vec
)
2079 if (copy_from_user(&val
, optval
, sizeof(val
)))
2081 po
->tp_loss
= !!val
;
2084 case PACKET_AUXDATA
:
2088 if (optlen
< sizeof(val
))
2090 if (copy_from_user(&val
, optval
, sizeof(val
)))
2093 po
->auxdata
= !!val
;
2096 case PACKET_ORIGDEV
:
2100 if (optlen
< sizeof(val
))
2102 if (copy_from_user(&val
, optval
, sizeof(val
)))
2105 po
->origdev
= !!val
;
2108 case PACKET_VNET_HDR
:
2112 if (sock
->type
!= SOCK_RAW
)
2114 if (po
->rx_ring
.pg_vec
|| po
->tx_ring
.pg_vec
)
2116 if (optlen
< sizeof(val
))
2118 if (copy_from_user(&val
, optval
, sizeof(val
)))
2121 po
->has_vnet_hdr
= !!val
;
2124 case PACKET_TIMESTAMP
:
2128 if (optlen
!= sizeof(val
))
2130 if (copy_from_user(&val
, optval
, sizeof(val
)))
2133 po
->tp_tstamp
= val
;
2137 return -ENOPROTOOPT
;
2141 static int packet_getsockopt(struct socket
*sock
, int level
, int optname
,
2142 char __user
*optval
, int __user
*optlen
)
2146 struct sock
*sk
= sock
->sk
;
2147 struct packet_sock
*po
= pkt_sk(sk
);
2149 struct tpacket_stats st
;
2151 if (level
!= SOL_PACKET
)
2152 return -ENOPROTOOPT
;
2154 if (get_user(len
, optlen
))
2161 case PACKET_STATISTICS
:
2162 if (len
> sizeof(struct tpacket_stats
))
2163 len
= sizeof(struct tpacket_stats
);
2164 spin_lock_bh(&sk
->sk_receive_queue
.lock
);
2166 memset(&po
->stats
, 0, sizeof(st
));
2167 spin_unlock_bh(&sk
->sk_receive_queue
.lock
);
2168 st
.tp_packets
+= st
.tp_drops
;
2172 case PACKET_AUXDATA
:
2173 if (len
> sizeof(int))
2179 case PACKET_ORIGDEV
:
2180 if (len
> sizeof(int))
2186 case PACKET_VNET_HDR
:
2187 if (len
> sizeof(int))
2189 val
= po
->has_vnet_hdr
;
2193 case PACKET_VERSION
:
2194 if (len
> sizeof(int))
2196 val
= po
->tp_version
;
2200 if (len
> sizeof(int))
2202 if (copy_from_user(&val
, optval
, len
))
2206 val
= sizeof(struct tpacket_hdr
);
2209 val
= sizeof(struct tpacket2_hdr
);
2216 case PACKET_RESERVE
:
2217 if (len
> sizeof(unsigned int))
2218 len
= sizeof(unsigned int);
2219 val
= po
->tp_reserve
;
2223 if (len
> sizeof(unsigned int))
2224 len
= sizeof(unsigned int);
2228 case PACKET_TIMESTAMP
:
2229 if (len
> sizeof(int))
2231 val
= po
->tp_tstamp
;
2235 return -ENOPROTOOPT
;
2238 if (put_user(len
, optlen
))
2240 if (copy_to_user(optval
, data
, len
))
2246 static int packet_notifier(struct notifier_block
*this, unsigned long msg
, void *data
)
2249 struct hlist_node
*node
;
2250 struct net_device
*dev
= data
;
2251 struct net
*net
= dev_net(dev
);
2254 sk_for_each_rcu(sk
, node
, &net
->packet
.sklist
) {
2255 struct packet_sock
*po
= pkt_sk(sk
);
2258 case NETDEV_UNREGISTER
:
2260 packet_dev_mclist(dev
, po
->mclist
, -1);
2264 if (dev
->ifindex
== po
->ifindex
) {
2265 spin_lock(&po
->bind_lock
);
2267 __unregister_prot_hook(sk
, false);
2268 sk
->sk_err
= ENETDOWN
;
2269 if (!sock_flag(sk
, SOCK_DEAD
))
2270 sk
->sk_error_report(sk
);
2272 if (msg
== NETDEV_UNREGISTER
) {
2274 if (po
->prot_hook
.dev
)
2275 dev_put(po
->prot_hook
.dev
);
2276 po
->prot_hook
.dev
= NULL
;
2278 spin_unlock(&po
->bind_lock
);
2282 if (dev
->ifindex
== po
->ifindex
) {
2283 spin_lock(&po
->bind_lock
);
2285 register_prot_hook(sk
);
2286 spin_unlock(&po
->bind_lock
);
2296 static int packet_ioctl(struct socket
*sock
, unsigned int cmd
,
2299 struct sock
*sk
= sock
->sk
;
2304 int amount
= sk_wmem_alloc_get(sk
);
2306 return put_user(amount
, (int __user
*)arg
);
2310 struct sk_buff
*skb
;
2313 spin_lock_bh(&sk
->sk_receive_queue
.lock
);
2314 skb
= skb_peek(&sk
->sk_receive_queue
);
2317 spin_unlock_bh(&sk
->sk_receive_queue
.lock
);
2318 return put_user(amount
, (int __user
*)arg
);
2321 return sock_get_timestamp(sk
, (struct timeval __user
*)arg
);
2323 return sock_get_timestampns(sk
, (struct timespec __user
*)arg
);
2333 case SIOCGIFBRDADDR
:
2334 case SIOCSIFBRDADDR
:
2335 case SIOCGIFNETMASK
:
2336 case SIOCSIFNETMASK
:
2337 case SIOCGIFDSTADDR
:
2338 case SIOCSIFDSTADDR
:
2340 return inet_dgram_ops
.ioctl(sock
, cmd
, arg
);
2344 return -ENOIOCTLCMD
;
2349 static unsigned int packet_poll(struct file
*file
, struct socket
*sock
,
2352 struct sock
*sk
= sock
->sk
;
2353 struct packet_sock
*po
= pkt_sk(sk
);
2354 unsigned int mask
= datagram_poll(file
, sock
, wait
);
2356 spin_lock_bh(&sk
->sk_receive_queue
.lock
);
2357 if (po
->rx_ring
.pg_vec
) {
2358 if (!packet_previous_frame(po
, &po
->rx_ring
, TP_STATUS_KERNEL
))
2359 mask
|= POLLIN
| POLLRDNORM
;
2361 spin_unlock_bh(&sk
->sk_receive_queue
.lock
);
2362 spin_lock_bh(&sk
->sk_write_queue
.lock
);
2363 if (po
->tx_ring
.pg_vec
) {
2364 if (packet_current_frame(po
, &po
->tx_ring
, TP_STATUS_AVAILABLE
))
2365 mask
|= POLLOUT
| POLLWRNORM
;
2367 spin_unlock_bh(&sk
->sk_write_queue
.lock
);
2372 /* Dirty? Well, I still did not learn better way to account
2376 static void packet_mm_open(struct vm_area_struct
*vma
)
2378 struct file
*file
= vma
->vm_file
;
2379 struct socket
*sock
= file
->private_data
;
2380 struct sock
*sk
= sock
->sk
;
2383 atomic_inc(&pkt_sk(sk
)->mapped
);
2386 static void packet_mm_close(struct vm_area_struct
*vma
)
2388 struct file
*file
= vma
->vm_file
;
2389 struct socket
*sock
= file
->private_data
;
2390 struct sock
*sk
= sock
->sk
;
2393 atomic_dec(&pkt_sk(sk
)->mapped
);
2396 static const struct vm_operations_struct packet_mmap_ops
= {
2397 .open
= packet_mm_open
,
2398 .close
= packet_mm_close
,
2401 static void free_pg_vec(struct pgv
*pg_vec
, unsigned int order
,
2406 for (i
= 0; i
< len
; i
++) {
2407 if (likely(pg_vec
[i
].buffer
)) {
2408 if (is_vmalloc_addr(pg_vec
[i
].buffer
))
2409 vfree(pg_vec
[i
].buffer
);
2411 free_pages((unsigned long)pg_vec
[i
].buffer
,
2413 pg_vec
[i
].buffer
= NULL
;
2419 static inline char *alloc_one_pg_vec_page(unsigned long order
)
2421 char *buffer
= NULL
;
2422 gfp_t gfp_flags
= GFP_KERNEL
| __GFP_COMP
|
2423 __GFP_ZERO
| __GFP_NOWARN
| __GFP_NORETRY
;
2425 buffer
= (char *) __get_free_pages(gfp_flags
, order
);
2431 * __get_free_pages failed, fall back to vmalloc
2433 buffer
= vzalloc((1 << order
) * PAGE_SIZE
);
2439 * vmalloc failed, lets dig into swap here
2441 gfp_flags
&= ~__GFP_NORETRY
;
2442 buffer
= (char *)__get_free_pages(gfp_flags
, order
);
2447 * complete and utter failure
2452 static struct pgv
*alloc_pg_vec(struct tpacket_req
*req
, int order
)
2454 unsigned int block_nr
= req
->tp_block_nr
;
2458 pg_vec
= kcalloc(block_nr
, sizeof(struct pgv
), GFP_KERNEL
);
2459 if (unlikely(!pg_vec
))
2462 for (i
= 0; i
< block_nr
; i
++) {
2463 pg_vec
[i
].buffer
= alloc_one_pg_vec_page(order
);
2464 if (unlikely(!pg_vec
[i
].buffer
))
2465 goto out_free_pgvec
;
2472 free_pg_vec(pg_vec
, order
, block_nr
);
2477 static int packet_set_ring(struct sock
*sk
, struct tpacket_req
*req
,
2478 int closing
, int tx_ring
)
2480 struct pgv
*pg_vec
= NULL
;
2481 struct packet_sock
*po
= pkt_sk(sk
);
2482 int was_running
, order
= 0;
2483 struct packet_ring_buffer
*rb
;
2484 struct sk_buff_head
*rb_queue
;
2488 rb
= tx_ring
? &po
->tx_ring
: &po
->rx_ring
;
2489 rb_queue
= tx_ring
? &sk
->sk_write_queue
: &sk
->sk_receive_queue
;
2493 if (atomic_read(&po
->mapped
))
2495 if (atomic_read(&rb
->pending
))
2499 if (req
->tp_block_nr
) {
2500 /* Sanity tests and some calculations */
2502 if (unlikely(rb
->pg_vec
))
2505 switch (po
->tp_version
) {
2507 po
->tp_hdrlen
= TPACKET_HDRLEN
;
2510 po
->tp_hdrlen
= TPACKET2_HDRLEN
;
2515 if (unlikely((int)req
->tp_block_size
<= 0))
2517 if (unlikely(req
->tp_block_size
& (PAGE_SIZE
- 1)))
2519 if (unlikely(req
->tp_frame_size
< po
->tp_hdrlen
+
2522 if (unlikely(req
->tp_frame_size
& (TPACKET_ALIGNMENT
- 1)))
2525 rb
->frames_per_block
= req
->tp_block_size
/req
->tp_frame_size
;
2526 if (unlikely(rb
->frames_per_block
<= 0))
2528 if (unlikely((rb
->frames_per_block
* req
->tp_block_nr
) !=
2533 order
= get_order(req
->tp_block_size
);
2534 pg_vec
= alloc_pg_vec(req
, order
);
2535 if (unlikely(!pg_vec
))
2541 if (unlikely(req
->tp_frame_nr
))
2547 /* Detach socket from network */
2548 spin_lock(&po
->bind_lock
);
2549 was_running
= po
->running
;
2553 __unregister_prot_hook(sk
, false);
2555 spin_unlock(&po
->bind_lock
);
2560 mutex_lock(&po
->pg_vec_lock
);
2561 if (closing
|| atomic_read(&po
->mapped
) == 0) {
2563 spin_lock_bh(&rb_queue
->lock
);
2564 swap(rb
->pg_vec
, pg_vec
);
2565 rb
->frame_max
= (req
->tp_frame_nr
- 1);
2567 rb
->frame_size
= req
->tp_frame_size
;
2568 spin_unlock_bh(&rb_queue
->lock
);
2570 swap(rb
->pg_vec_order
, order
);
2571 swap(rb
->pg_vec_len
, req
->tp_block_nr
);
2573 rb
->pg_vec_pages
= req
->tp_block_size
/PAGE_SIZE
;
2574 po
->prot_hook
.func
= (po
->rx_ring
.pg_vec
) ?
2575 tpacket_rcv
: packet_rcv
;
2576 skb_queue_purge(rb_queue
);
2577 if (atomic_read(&po
->mapped
))
2578 pr_err("packet_mmap: vma is busy: %d\n",
2579 atomic_read(&po
->mapped
));
2581 mutex_unlock(&po
->pg_vec_lock
);
2583 spin_lock(&po
->bind_lock
);
2586 register_prot_hook(sk
);
2588 spin_unlock(&po
->bind_lock
);
2593 free_pg_vec(pg_vec
, order
, req
->tp_block_nr
);
2598 static int packet_mmap(struct file
*file
, struct socket
*sock
,
2599 struct vm_area_struct
*vma
)
2601 struct sock
*sk
= sock
->sk
;
2602 struct packet_sock
*po
= pkt_sk(sk
);
2603 unsigned long size
, expected_size
;
2604 struct packet_ring_buffer
*rb
;
2605 unsigned long start
;
2612 mutex_lock(&po
->pg_vec_lock
);
2615 for (rb
= &po
->rx_ring
; rb
<= &po
->tx_ring
; rb
++) {
2617 expected_size
+= rb
->pg_vec_len
2623 if (expected_size
== 0)
2626 size
= vma
->vm_end
- vma
->vm_start
;
2627 if (size
!= expected_size
)
2630 start
= vma
->vm_start
;
2631 for (rb
= &po
->rx_ring
; rb
<= &po
->tx_ring
; rb
++) {
2632 if (rb
->pg_vec
== NULL
)
2635 for (i
= 0; i
< rb
->pg_vec_len
; i
++) {
2637 void *kaddr
= rb
->pg_vec
[i
].buffer
;
2640 for (pg_num
= 0; pg_num
< rb
->pg_vec_pages
; pg_num
++) {
2641 page
= pgv_to_page(kaddr
);
2642 err
= vm_insert_page(vma
, start
, page
);
2651 atomic_inc(&po
->mapped
);
2652 vma
->vm_ops
= &packet_mmap_ops
;
2656 mutex_unlock(&po
->pg_vec_lock
);
2660 static const struct proto_ops packet_ops_spkt
= {
2661 .family
= PF_PACKET
,
2662 .owner
= THIS_MODULE
,
2663 .release
= packet_release
,
2664 .bind
= packet_bind_spkt
,
2665 .connect
= sock_no_connect
,
2666 .socketpair
= sock_no_socketpair
,
2667 .accept
= sock_no_accept
,
2668 .getname
= packet_getname_spkt
,
2669 .poll
= datagram_poll
,
2670 .ioctl
= packet_ioctl
,
2671 .listen
= sock_no_listen
,
2672 .shutdown
= sock_no_shutdown
,
2673 .setsockopt
= sock_no_setsockopt
,
2674 .getsockopt
= sock_no_getsockopt
,
2675 .sendmsg
= packet_sendmsg_spkt
,
2676 .recvmsg
= packet_recvmsg
,
2677 .mmap
= sock_no_mmap
,
2678 .sendpage
= sock_no_sendpage
,
2681 static const struct proto_ops packet_ops
= {
2682 .family
= PF_PACKET
,
2683 .owner
= THIS_MODULE
,
2684 .release
= packet_release
,
2685 .bind
= packet_bind
,
2686 .connect
= sock_no_connect
,
2687 .socketpair
= sock_no_socketpair
,
2688 .accept
= sock_no_accept
,
2689 .getname
= packet_getname
,
2690 .poll
= packet_poll
,
2691 .ioctl
= packet_ioctl
,
2692 .listen
= sock_no_listen
,
2693 .shutdown
= sock_no_shutdown
,
2694 .setsockopt
= packet_setsockopt
,
2695 .getsockopt
= packet_getsockopt
,
2696 .sendmsg
= packet_sendmsg
,
2697 .recvmsg
= packet_recvmsg
,
2698 .mmap
= packet_mmap
,
2699 .sendpage
= sock_no_sendpage
,
2702 static const struct net_proto_family packet_family_ops
= {
2703 .family
= PF_PACKET
,
2704 .create
= packet_create
,
2705 .owner
= THIS_MODULE
,
2708 static struct notifier_block packet_netdev_notifier
= {
2709 .notifier_call
= packet_notifier
,
2712 #ifdef CONFIG_PROC_FS
2714 static void *packet_seq_start(struct seq_file
*seq
, loff_t
*pos
)
2717 struct net
*net
= seq_file_net(seq
);
2720 return seq_hlist_start_head_rcu(&net
->packet
.sklist
, *pos
);
2723 static void *packet_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
2725 struct net
*net
= seq_file_net(seq
);
2726 return seq_hlist_next_rcu(v
, &net
->packet
.sklist
, pos
);
2729 static void packet_seq_stop(struct seq_file
*seq
, void *v
)
2735 static int packet_seq_show(struct seq_file
*seq
, void *v
)
2737 if (v
== SEQ_START_TOKEN
)
2738 seq_puts(seq
, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2740 struct sock
*s
= sk_entry(v
);
2741 const struct packet_sock
*po
= pkt_sk(s
);
2744 "%pK %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
2746 atomic_read(&s
->sk_refcnt
),
2751 atomic_read(&s
->sk_rmem_alloc
),
2759 static const struct seq_operations packet_seq_ops
= {
2760 .start
= packet_seq_start
,
2761 .next
= packet_seq_next
,
2762 .stop
= packet_seq_stop
,
2763 .show
= packet_seq_show
,
2766 static int packet_seq_open(struct inode
*inode
, struct file
*file
)
2768 return seq_open_net(inode
, file
, &packet_seq_ops
,
2769 sizeof(struct seq_net_private
));
2772 static const struct file_operations packet_seq_fops
= {
2773 .owner
= THIS_MODULE
,
2774 .open
= packet_seq_open
,
2776 .llseek
= seq_lseek
,
2777 .release
= seq_release_net
,
2782 static int __net_init
packet_net_init(struct net
*net
)
2784 spin_lock_init(&net
->packet
.sklist_lock
);
2785 INIT_HLIST_HEAD(&net
->packet
.sklist
);
2787 if (!proc_net_fops_create(net
, "packet", 0, &packet_seq_fops
))
2793 static void __net_exit
packet_net_exit(struct net
*net
)
2795 proc_net_remove(net
, "packet");
2798 static struct pernet_operations packet_net_ops
= {
2799 .init
= packet_net_init
,
2800 .exit
= packet_net_exit
,
2804 static void __exit
packet_exit(void)
2806 unregister_netdevice_notifier(&packet_netdev_notifier
);
2807 unregister_pernet_subsys(&packet_net_ops
);
2808 sock_unregister(PF_PACKET
);
2809 proto_unregister(&packet_proto
);
2812 static int __init
packet_init(void)
2814 int rc
= proto_register(&packet_proto
, 0);
2819 sock_register(&packet_family_ops
);
2820 register_pernet_subsys(&packet_net_ops
);
2821 register_netdevice_notifier(&packet_netdev_notifier
);
2826 module_init(packet_init
);
2827 module_exit(packet_exit
);
2828 MODULE_LICENSE("GPL");
2829 MODULE_ALIAS_NETPROTO(PF_PACKET
);