2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * RAW - implementation of IP "raw" sockets.
8 * Version: $Id: raw.c,v 1.50 2000/05/03 06:37:06 davem Exp $
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 * Alan Cox : verify_area() fixed up
15 * Alan Cox : ICMP error handling
16 * Alan Cox : EMSGSIZE if you send too big a packet
17 * Alan Cox : Now uses generic datagrams and shared skbuff
18 * library. No more peek crashes, no more backlogs
19 * Alan Cox : Checks sk->broadcast.
20 * Alan Cox : Uses skb_free_datagram/skb_copy_datagram
21 * Alan Cox : Raw passes ip options too
22 * Alan Cox : Setsocketopt added
23 * Alan Cox : Fixed error return for broadcasts
24 * Alan Cox : Removed wake_up calls
25 * Alan Cox : Use ttl/tos
26 * Alan Cox : Cleaned up old debugging
27 * Alan Cox : Use new kernel side addresses
28 * Arnt Gulbrandsen : Fixed MSG_DONTROUTE in raw sockets.
29 * Alan Cox : BSD style RAW socket demultiplexing.
30 * Alan Cox : Beginnings of mrouted support.
31 * Alan Cox : Added IP_HDRINCL option.
32 * Alan Cox : Skip broadcast check if BSDism set.
33 * David S. Miller : New socket lookup architecture.
35 * This program is free software; you can redistribute it and/or
36 * modify it under the terms of the GNU General Public License
37 * as published by the Free Software Foundation; either version
38 * 2 of the License, or (at your option) any later version.
41 #include <linux/config.h>
42 #include <asm/system.h>
43 #include <asm/uaccess.h>
44 #include <linux/types.h>
45 #include <linux/sched.h>
46 #include <linux/errno.h>
47 #include <linux/timer.h>
49 #include <linux/kernel.h>
50 #include <linux/fcntl.h>
51 #include <linux/socket.h>
53 #include <linux/inet.h>
54 #include <linux/netdevice.h>
55 #include <linux/mroute.h>
57 #include <net/protocol.h>
58 #include <linux/skbuff.h>
63 #include <net/inet_common.h>
64 #include <net/checksum.h>
66 struct sock
*raw_v4_htable
[RAWV4_HTABLE_SIZE
];
67 rwlock_t raw_v4_lock
= RW_LOCK_UNLOCKED
;
69 static void raw_v4_hash(struct sock
*sk
)
71 struct sock
**skp
= &raw_v4_htable
[sk
->num
& (RAWV4_HTABLE_SIZE
- 1)];
73 write_lock_bh(&raw_v4_lock
);
74 if ((sk
->next
= *skp
) != NULL
)
75 (*skp
)->pprev
= &sk
->next
;
78 sock_prot_inc_use(sk
->prot
);
80 write_unlock_bh(&raw_v4_lock
);
83 static void raw_v4_unhash(struct sock
*sk
)
85 write_lock_bh(&raw_v4_lock
);
88 sk
->next
->pprev
= sk
->pprev
;
89 *sk
->pprev
= sk
->next
;
91 sock_prot_dec_use(sk
->prot
);
94 write_unlock_bh(&raw_v4_lock
);
97 struct sock
*__raw_v4_lookup(struct sock
*sk
, unsigned short num
,
98 unsigned long raddr
, unsigned long laddr
,
103 for(s
= sk
; s
; s
= s
->next
) {
104 if((s
->num
== num
) &&
105 !(s
->daddr
&& s
->daddr
!= raddr
) &&
106 !(s
->rcv_saddr
&& s
->rcv_saddr
!= laddr
) &&
107 !(s
->bound_dev_if
&& s
->bound_dev_if
!= dif
))
117 static __inline__
int icmp_filter(struct sock
*sk
, struct sk_buff
*skb
)
121 type
= skb
->h
.icmph
->type
;
123 return test_bit(type
, &sk
->tp_pinfo
.tp_raw4
.filter
);
125 /* Do not block unknown ICMP types */
129 /* IP input processing comes here for RAW socket delivery.
130 * This is fun as to avoid copies we want to make no surplus
133 * RFC 1122: SHOULD pass TOS value up to the transport layer.
134 * -> It does. And not only TOS, but all IP header.
136 struct sock
*raw_v4_input(struct sk_buff
*skb
, struct iphdr
*iph
, int hash
)
140 read_lock(&raw_v4_lock
);
141 if ((sk
= raw_v4_htable
[hash
]) == NULL
)
143 sk
= __raw_v4_lookup(sk
, iph
->protocol
,
144 iph
->saddr
, iph
->daddr
,
148 struct sock
*sknext
= __raw_v4_lookup(sk
->next
, iph
->protocol
,
149 iph
->saddr
, iph
->daddr
,
151 if (iph
->protocol
!= IPPROTO_ICMP
||
152 ! icmp_filter(sk
, skb
)) {
153 struct sk_buff
*clone
;
157 clone
= skb_clone(skb
, GFP_ATOMIC
);
158 /* Not releasing hash table! */
167 read_unlock(&raw_v4_lock
);
172 void raw_err (struct sock
*sk
, struct sk_buff
*skb
)
174 int type
= skb
->h
.icmph
->type
;
175 int code
= skb
->h
.icmph
->code
;
180 /* Report error on raw socket, if:
181 1. User requested ip_recverr.
182 2. Socket is connected (otherwise the error indication
183 is useless without ip_recverr and error is hard.
185 if (!sk
->protinfo
.af_inet
.recverr
&& sk
->state
!= TCP_ESTABLISHED
)
190 case ICMP_TIME_EXCEEDED
:
193 case ICMP_SOURCE_QUENCH
:
195 case ICMP_PARAMETERPROB
:
197 info
= ntohl(skb
->h
.icmph
->un
.gateway
)>>24;
200 case ICMP_DEST_UNREACH
:
202 if (code
> NR_ICMP_UNREACH
)
204 err
= icmp_err_convert
[code
].errno
;
205 harderr
= icmp_err_convert
[code
].fatal
;
206 if (code
== ICMP_FRAG_NEEDED
) {
207 harderr
= (sk
->protinfo
.af_inet
.pmtudisc
!= IP_PMTUDISC_DONT
);
209 info
= ntohs(skb
->h
.icmph
->un
.frag
.mtu
);
213 if (sk
->protinfo
.af_inet
.recverr
)
214 ip_icmp_error(sk
, skb
, err
, 0, info
, (u8
*)(skb
->h
.icmph
+ 1));
216 if (sk
->protinfo
.af_inet
.recverr
|| harderr
) {
218 sk
->error_report(sk
);
222 static int raw_rcv_skb(struct sock
* sk
, struct sk_buff
* skb
)
224 /* Charge it to the socket. */
226 if (sock_queue_rcv_skb(sk
,skb
)<0)
228 IP_INC_STATS(IpInDiscards
);
233 IP_INC_STATS(IpInDelivers
);
238 * This should be the easiest of all, all we do is
239 * copy it into a buffer. All demultiplexing is done
243 int raw_rcv(struct sock
*sk
, struct sk_buff
*skb
)
245 /* Now we need to copy this into memory. */
246 skb_trim(skb
, ntohs(skb
->nh
.iph
->tot_len
));
248 skb
->h
.raw
= skb
->nh
.raw
;
250 raw_rcv_skb(sk
, skb
);
258 struct dst_entry
*dst
;
262 * Send a RAW IP packet.
266 * Callback support is trivial for SOCK_RAW
269 static int raw_getfrag(const void *p
, char *to
, unsigned int offset
, unsigned int fraglen
)
271 struct rawfakehdr
*rfh
= (struct rawfakehdr
*) p
;
272 return memcpy_fromiovecend(to
, rfh
->iov
, offset
, fraglen
);
276 * IPPROTO_RAW needs extra work.
279 static int raw_getrawfrag(const void *p
, char *to
, unsigned int offset
, unsigned int fraglen
)
281 struct rawfakehdr
*rfh
= (struct rawfakehdr
*) p
;
283 if (memcpy_fromiovecend(to
, rfh
->iov
, offset
, fraglen
))
287 struct iphdr
*iph
= (struct iphdr
*)to
;
289 iph
->saddr
= rfh
->saddr
;
291 iph
->tot_len
=htons(fraglen
); /* This is right as you can't frag
294 * Deliberate breach of modularity to keep
295 * ip_build_xmit clean (well less messy).
298 ip_select_ident(iph
, rfh
->dst
);
299 iph
->check
=ip_fast_csum((unsigned char *)iph
, iph
->ihl
);
304 static int raw_sendmsg(struct sock
*sk
, struct msghdr
*msg
, int len
)
306 struct ipcm_cookie ipc
;
307 struct rawfakehdr rfh
;
308 struct rtable
*rt
= NULL
;
314 /* This check is ONLY to check for arithmetic overflow
315 on integer(!) len. Not more! Real check will be made
316 in ip_build_xmit --ANK
318 BTW socket.c -> af_*.c -> ... make multiple
319 invalid conversions size_t -> int. We MUST repair it f.e.
320 by replacing all of them with size_t and revise all
321 the places sort of len += sizeof(struct iphdr)
322 If len was ULONG_MAX-10 it would be cathastrophe --ANK
325 if (len
< 0 || len
> 0xFFFF)
332 if (msg
->msg_flags
& MSG_OOB
) /* Mirror BSD error message compatibility */
336 * Get and verify the address.
339 if (msg
->msg_namelen
) {
340 struct sockaddr_in
*usin
= (struct sockaddr_in
*)msg
->msg_name
;
341 if (msg
->msg_namelen
< sizeof(*usin
))
343 if (usin
->sin_family
!= AF_INET
) {
344 static int complained
;
346 printk(KERN_INFO
"%s forgot to set AF_INET in raw sendmsg. Fix it!\n", current
->comm
);
347 if (usin
->sin_family
)
350 daddr
= usin
->sin_addr
.s_addr
;
351 /* ANK: I did not forget to get protocol from port field.
352 * I just do not know, who uses this weirdness.
353 * IP_HDRINCL is much more convenient.
356 if (sk
->state
!= TCP_ESTABLISHED
)
361 ipc
.addr
= sk
->saddr
;
363 ipc
.oif
= sk
->bound_dev_if
;
365 if (msg
->msg_controllen
) {
366 int tmp
= ip_cmsg_send(msg
, &ipc
);
373 rfh
.saddr
= ipc
.addr
;
377 ipc
.opt
= sk
->protinfo
.af_inet
.opt
;
381 /* Linux does not mangle headers on raw sockets,
382 * so that IP options + IP_HDRINCL is non-sense.
384 if (sk
->protinfo
.af_inet
.hdrincl
)
389 daddr
= ipc
.opt
->faddr
;
392 tos
= RT_TOS(sk
->protinfo
.af_inet
.tos
) | sk
->localroute
;
393 if (msg
->msg_flags
&MSG_DONTROUTE
)
396 if (MULTICAST(daddr
)) {
398 ipc
.oif
= sk
->protinfo
.af_inet
.mc_index
;
400 rfh
.saddr
= sk
->protinfo
.af_inet
.mc_addr
;
403 err
= ip_route_output(&rt
, daddr
, rfh
.saddr
, tos
, ipc
.oif
);
409 if (rt
->rt_flags
&RTCF_BROADCAST
&& !sk
->broadcast
)
412 if (msg
->msg_flags
&MSG_CONFIRM
)
416 rfh
.iov
= msg
->msg_iov
;
417 rfh
.saddr
= rt
->rt_src
;
418 rfh
.dst
= &rt
->u
.dst
;
420 ipc
.addr
= rt
->rt_dst
;
421 err
=ip_build_xmit(sk
, sk
->protinfo
.af_inet
.hdrincl
? raw_getrawfrag
: raw_getfrag
,
422 &rfh
, len
, &ipc
, rt
, msg
->msg_flags
);
429 return err
<0 ? err
: len
;
432 dst_confirm(&rt
->u
.dst
);
433 if (!(msg
->msg_flags
&MSG_PROBE
) || len
)
434 goto back_from_confirm
;
439 static void raw_close(struct sock
*sk
, long timeout
)
442 * Raw sockets may have direct kernel refereneces. Kill them.
444 ip_ra_control(sk
, 0, NULL
);
446 inet_sock_release(sk
);
449 /* This gets rid of all the nasties in af_inet. -DaveM */
450 static int raw_bind(struct sock
*sk
, struct sockaddr
*uaddr
, int addr_len
)
452 struct sockaddr_in
*addr
= (struct sockaddr_in
*) uaddr
;
455 if((sk
->state
!= TCP_CLOSE
) || (addr_len
< sizeof(struct sockaddr_in
)))
457 chk_addr_ret
= inet_addr_type(addr
->sin_addr
.s_addr
);
458 if(addr
->sin_addr
.s_addr
!= 0 && chk_addr_ret
!= RTN_LOCAL
&&
459 chk_addr_ret
!= RTN_MULTICAST
&& chk_addr_ret
!= RTN_BROADCAST
)
460 return -EADDRNOTAVAIL
;
461 sk
->rcv_saddr
= sk
->saddr
= addr
->sin_addr
.s_addr
;
462 if(chk_addr_ret
== RTN_MULTICAST
|| chk_addr_ret
== RTN_BROADCAST
)
463 sk
->saddr
= 0; /* Use device */
469 * This should be easy, if there is something there
470 * we return it, otherwise we block.
473 int raw_recvmsg(struct sock
*sk
, struct msghdr
*msg
, int len
,
474 int noblock
, int flags
,int *addr_len
)
479 struct sockaddr_in
*sin
=(struct sockaddr_in
*)msg
->msg_name
;
485 *addr_len
=sizeof(*sin
);
487 if (flags
& MSG_ERRQUEUE
)
488 return ip_recv_error(sk
, msg
, len
);
490 skb
=skb_recv_datagram(sk
,flags
,noblock
,&err
);
497 msg
->msg_flags
|= MSG_TRUNC
;
501 err
= skb_copy_datagram_iovec(skb
, 0, msg
->msg_iov
, copied
);
505 sk
->stamp
=skb
->stamp
;
507 /* Copy the address. */
509 sin
->sin_family
= AF_INET
;
510 sin
->sin_addr
.s_addr
= skb
->nh
.iph
->saddr
;
512 if (sk
->protinfo
.af_inet
.cmsg_flags
)
513 ip_cmsg_recv(msg
, skb
);
515 skb_free_datagram(sk
, skb
);
516 return (err
? : copied
);
519 static int raw_init(struct sock
*sk
)
521 struct raw_opt
*tp
= &(sk
->tp_pinfo
.tp_raw4
);
522 if (sk
->num
== IPPROTO_ICMP
)
523 memset(&tp
->filter
, 0, sizeof(tp
->filter
));
527 static int raw_seticmpfilter(struct sock
*sk
, char *optval
, int optlen
)
529 if (optlen
> sizeof(struct icmp_filter
))
530 optlen
= sizeof(struct icmp_filter
);
531 if (copy_from_user(&sk
->tp_pinfo
.tp_raw4
.filter
, optval
, optlen
))
536 static int raw_geticmpfilter(struct sock
*sk
, char *optval
, int *optlen
)
540 if (get_user(len
,optlen
))
542 if (len
> sizeof(struct icmp_filter
))
543 len
= sizeof(struct icmp_filter
);
544 if (put_user(len
, optlen
))
546 if (copy_to_user(optval
, &sk
->tp_pinfo
.tp_raw4
.filter
, len
))
551 static int raw_setsockopt(struct sock
*sk
, int level
, int optname
,
552 char *optval
, int optlen
)
554 if (level
!= SOL_RAW
)
555 return ip_setsockopt(sk
, level
, optname
, optval
, optlen
);
559 if (sk
->num
!= IPPROTO_ICMP
)
561 return raw_seticmpfilter(sk
, optval
, optlen
);
567 static int raw_getsockopt(struct sock
*sk
, int level
, int optname
,
568 char *optval
, int *optlen
)
570 if (level
!= SOL_RAW
)
571 return ip_getsockopt(sk
, level
, optname
, optval
, optlen
);
575 if (sk
->num
!= IPPROTO_ICMP
)
577 return raw_geticmpfilter(sk
, optval
, optlen
);
583 static void get_raw_sock(struct sock
*sp
, char *tmpbuf
, int i
)
585 unsigned int dest
, src
;
588 unsigned long timer_expires
;
594 timer_active
= (timer_pending(&sp
->timer
)) ? 2 : 0;
595 timer_expires
= (timer_active
== 2 ? sp
->timer
.expires
: jiffies
);
596 sprintf(tmpbuf
, "%4d: %08X:%04X %08X:%04X"
597 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p",
598 i
, src
, srcp
, dest
, destp
, sp
->state
,
599 atomic_read(&sp
->wmem_alloc
), atomic_read(&sp
->rmem_alloc
),
600 timer_active
, timer_expires
-jiffies
, 0,
601 sp
->socket
->inode
->i_uid
, 0,
602 sp
->socket
? sp
->socket
->inode
->i_ino
: 0,
603 atomic_read(&sp
->refcnt
), sp
);
606 int raw_get_info(char *buffer
, char **start
, off_t offset
, int length
)
608 int len
= 0, num
= 0, i
;
614 len
+= sprintf(buffer
, "%-127s\n",
615 " sl local_address rem_address st tx_queue "
616 "rx_queue tr tm->when retrnsmt uid timeout inode");
618 read_lock(&raw_v4_lock
);
619 for (i
= 0; i
< RAWV4_HTABLE_SIZE
; i
++) {
622 for (sk
= raw_v4_htable
[i
]; sk
; sk
= sk
->next
, num
++) {
623 if (sk
->family
!= PF_INET
)
628 get_raw_sock(sk
, tmpbuf
, i
);
629 len
+= sprintf(buffer
+len
, "%-127s\n", tmpbuf
);
635 read_unlock(&raw_v4_lock
);
636 begin
= len
- (pos
- offset
);
637 *start
= buffer
+ begin
;
646 struct proto raw_prot
= {
649 connect
: udp_connect
,
650 disconnect
: udp_disconnect
,
651 #ifdef CONFIG_IP_MROUTE
655 setsockopt
: raw_setsockopt
,
656 getsockopt
: raw_getsockopt
,
657 sendmsg
: raw_sendmsg
,
658 recvmsg
: raw_recvmsg
,
660 backlog_rcv
: raw_rcv_skb
,
662 unhash
: raw_v4_unhash
,