3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Adapted from linux/net/ipv4/raw.c
10 * $Id: raw.c,v 1.42 2000/11/28 13:38:38 davem Exp $
13 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version.
21 #include <linux/errno.h>
22 #include <linux/types.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
25 #include <linux/sched.h>
26 #include <linux/net.h>
27 #include <linux/in6.h>
28 #include <linux/netdevice.h>
29 #include <linux/if_arp.h>
30 #include <linux/icmpv6.h>
31 #include <asm/uaccess.h>
37 #include <net/ndisc.h>
38 #include <net/protocol.h>
39 #include <net/ip6_route.h>
40 #include <net/addrconf.h>
41 #include <net/transp_v6.h>
43 #include <net/inet_common.h>
45 #include <net/rawv6.h>
47 struct sock
*raw_v6_htable
[RAWV6_HTABLE_SIZE
];
48 rwlock_t raw_v6_lock
= RW_LOCK_UNLOCKED
;
50 static void raw_v6_hash(struct sock
*sk
)
52 struct sock
**skp
= &raw_v6_htable
[sk
->num
& (RAWV6_HTABLE_SIZE
- 1)];
54 write_lock_bh(&raw_v6_lock
);
55 if ((sk
->next
= *skp
) != NULL
)
56 (*skp
)->pprev
= &sk
->next
;
59 sock_prot_inc_use(sk
->prot
);
61 write_unlock_bh(&raw_v6_lock
);
64 static void raw_v6_unhash(struct sock
*sk
)
66 write_lock_bh(&raw_v6_lock
);
69 sk
->next
->pprev
= sk
->pprev
;
70 *sk
->pprev
= sk
->next
;
72 sock_prot_dec_use(sk
->prot
);
75 write_unlock_bh(&raw_v6_lock
);
79 /* Grumble... icmp and ip_input want to get at this... */
80 struct sock
*__raw_v6_lookup(struct sock
*sk
, unsigned short num
,
81 struct in6_addr
*loc_addr
, struct in6_addr
*rmt_addr
)
84 int addr_type
= ipv6_addr_type(loc_addr
);
86 for(s
= sk
; s
; s
= s
->next
) {
88 struct ipv6_pinfo
*np
= &s
->net_pinfo
.af_inet6
;
90 if (!ipv6_addr_any(&np
->daddr
) &&
91 ipv6_addr_cmp(&np
->daddr
, rmt_addr
))
94 if (!ipv6_addr_any(&np
->rcv_saddr
)) {
95 if (ipv6_addr_cmp(&np
->rcv_saddr
, loc_addr
) == 0)
97 if ((addr_type
& IPV6_ADDR_MULTICAST
) &&
98 inet6_mc_check(s
, loc_addr
))
112 static __inline__
int icmpv6_filter(struct sock
*sk
, struct sk_buff
*skb
)
114 struct icmp6hdr
*icmph
;
115 struct raw6_opt
*opt
;
117 opt
= &sk
->tp_pinfo
.tp_raw
;
118 icmph
= (struct icmp6hdr
*) (skb
->nh
.ipv6h
+ 1);
119 return test_bit(icmph
->icmp6_type
, &opt
->filter
);
123 * demultiplex raw sockets.
124 * (should consider queueing the skb in the sock receive_queue
125 * without calling rawv6.c)
127 struct sock
* ipv6_raw_deliver(struct sk_buff
*skb
,
128 int nexthdr
, unsigned long len
)
130 struct in6_addr
*saddr
;
131 struct in6_addr
*daddr
;
132 struct sock
*sk
, *sk2
;
135 saddr
= &skb
->nh
.ipv6h
->saddr
;
138 hash
= nexthdr
& (MAX_INET_PROTOS
- 1);
140 read_lock(&raw_v6_lock
);
141 sk
= raw_v6_htable
[hash
];
144 * The first socket found will be delivered after
145 * delivery to transport protocols.
151 sk
= __raw_v6_lookup(sk
, nexthdr
, daddr
, saddr
);
156 while ((sk2
= __raw_v6_lookup(sk2
->next
, nexthdr
, daddr
, saddr
))) {
157 struct sk_buff
*buff
;
159 if (nexthdr
== IPPROTO_ICMPV6
&&
160 icmpv6_filter(sk2
, skb
))
163 buff
= skb_clone(skb
, GFP_ATOMIC
);
165 rawv6_rcv(sk2
, buff
, len
);
169 if (sk
&& nexthdr
== IPPROTO_ICMPV6
&& icmpv6_filter(sk
, skb
))
175 read_unlock(&raw_v6_lock
);
181 /* This cleans up af_inet6 a bit. -DaveM */
182 static int rawv6_bind(struct sock
*sk
, struct sockaddr
*uaddr
, int addr_len
)
184 struct sockaddr_in6
*addr
= (struct sockaddr_in6
*) uaddr
;
189 if (addr_len
< SIN6_LEN_RFC2133
)
191 addr_type
= ipv6_addr_type(&addr
->sin6_addr
);
193 /* Raw sockets are IPv6 only */
194 if (addr_type
== IPV6_ADDR_MAPPED
)
195 return(-EADDRNOTAVAIL
);
200 if (sk
->state
!= TCP_CLOSE
)
203 if (addr_type
& IPV6_ADDR_LINKLOCAL
) {
204 if (addr_len
>= sizeof(struct sockaddr_in6
) &&
205 addr
->sin6_scope_id
) {
206 /* Override any existing binding, if another one
207 * is supplied by user.
209 sk
->bound_dev_if
= addr
->sin6_scope_id
;
212 /* Binding to link-local address requires an interface */
213 if (sk
->bound_dev_if
== 0)
217 /* Check if the address belongs to the host. */
218 if (addr_type
!= IPV6_ADDR_ANY
) {
219 /* ipv4 addr of the socket is invalid. Only the
220 * unpecified and mapped address have a v4 equivalent.
222 v4addr
= LOOPBACK4_IPV6
;
223 if (!(addr_type
& IPV6_ADDR_MULTICAST
)) {
224 err
= -EADDRNOTAVAIL
;
225 if (!ipv6_chk_addr(&addr
->sin6_addr
, NULL
))
230 sk
->rcv_saddr
= v4addr
;
232 ipv6_addr_copy(&sk
->net_pinfo
.af_inet6
.rcv_saddr
, &addr
->sin6_addr
);
233 if (!(addr_type
& IPV6_ADDR_MULTICAST
))
234 ipv6_addr_copy(&sk
->net_pinfo
.af_inet6
.saddr
, &addr
->sin6_addr
);
241 void rawv6_err(struct sock
*sk
, struct sk_buff
*skb
, struct ipv6hdr
*hdr
,
242 struct inet6_skb_parm
*opt
,
243 int type
, int code
, unsigned char *buff
, u32 info
)
248 if (buff
> skb
->tail
)
251 /* Report error on raw socket, if:
252 1. User requested recverr.
253 2. Socket is connected (otherwise the error indication
254 is useless without recverr and error is hard.
256 if (!sk
->net_pinfo
.af_inet6
.recverr
&& sk
->state
!= TCP_ESTABLISHED
)
259 harderr
= icmpv6_err_convert(type
, code
, &err
);
260 if (type
== ICMPV6_PKT_TOOBIG
)
261 harderr
= (sk
->net_pinfo
.af_inet6
.pmtudisc
== IPV6_PMTUDISC_DO
);
263 if (sk
->net_pinfo
.af_inet6
.recverr
)
264 ipv6_icmp_error(sk
, skb
, err
, 0, ntohl(info
), buff
);
266 if (sk
->net_pinfo
.af_inet6
.recverr
|| harderr
) {
268 sk
->error_report(sk
);
272 static inline int rawv6_rcv_skb(struct sock
* sk
, struct sk_buff
* skb
)
274 /* Charge it to the socket. */
275 if (sock_queue_rcv_skb(sk
,skb
)<0) {
276 IP6_INC_STATS_BH(Ip6InDiscards
);
281 IP6_INC_STATS_BH(Ip6InDelivers
);
286 * This is next to useless...
287 * if we demultiplex in network layer we don't need the extra call
288 * just to queue the skb...
289 * maybe we could have the network decide uppon a hint if it
290 * should call raw_rcv for demultiplexing
292 int rawv6_rcv(struct sock
*sk
, struct sk_buff
*skb
, unsigned long len
)
294 if (sk
->protinfo
.af_inet
.hdrincl
)
295 skb
->h
.raw
= skb
->nh
.raw
;
297 rawv6_rcv_skb(sk
, skb
);
303 * This should be easy, if there is something there
304 * we return it, otherwise we block.
307 int rawv6_recvmsg(struct sock
*sk
, struct msghdr
*msg
, int len
,
308 int noblock
, int flags
, int *addr_len
)
310 struct sockaddr_in6
*sin6
= (struct sockaddr_in6
*)msg
->msg_name
;
318 *addr_len
=sizeof(*sin6
);
320 if (flags
& MSG_ERRQUEUE
)
321 return ipv6_recv_error(sk
, msg
, len
);
323 skb
= skb_recv_datagram(sk
, flags
, noblock
, &err
);
327 copied
= skb
->tail
- skb
->h
.raw
;
330 msg
->msg_flags
|= MSG_TRUNC
;
333 err
= skb_copy_datagram_iovec(skb
, 0, msg
->msg_iov
, copied
);
337 /* Copy the address. */
339 sin6
->sin6_family
= AF_INET6
;
340 memcpy(&sin6
->sin6_addr
, &skb
->nh
.ipv6h
->saddr
,
341 sizeof(struct in6_addr
));
342 sin6
->sin6_flowinfo
= 0;
343 sin6
->sin6_scope_id
= 0;
344 if (ipv6_addr_type(&sin6
->sin6_addr
) & IPV6_ADDR_LINKLOCAL
) {
345 struct inet6_skb_parm
*opt
= (struct inet6_skb_parm
*) skb
->cb
;
346 sin6
->sin6_scope_id
= opt
->iif
;
350 sock_recv_timestamp(msg
, sk
, skb
);
352 if (sk
->net_pinfo
.af_inet6
.rxopt
.all
)
353 datagram_recv_ctl(sk
, msg
, skb
);
357 skb_free_datagram(sk
, skb
);
366 struct rawv6_fakehdr
{
372 struct in6_addr
*daddr
;
375 static int rawv6_getfrag(const void *data
, struct in6_addr
*saddr
,
376 char *buff
, unsigned int offset
, unsigned int len
)
378 struct iovec
*iov
= (struct iovec
*) data
;
380 return memcpy_fromiovecend(buff
, iov
, offset
, len
);
383 static int rawv6_frag_cksum(const void *data
, struct in6_addr
*addr
,
384 char *buff
, unsigned int offset
,
387 struct rawv6_fakehdr
*hdr
= (struct rawv6_fakehdr
*) data
;
389 if (csum_partial_copy_fromiovecend(buff
, hdr
->iov
, offset
,
395 struct raw6_opt
*opt
;
396 struct in6_addr
*daddr
;
399 opt
= &sk
->tp_pinfo
.tp_raw
;
406 hdr
->cksum
= csum_ipv6_magic(addr
, daddr
, hdr
->len
,
407 hdr
->proto
, hdr
->cksum
);
409 if (opt
->offset
< len
) {
412 csum
= (__u16
*) (buff
+ opt
->offset
);
416 printk(KERN_DEBUG
"icmp: cksum offset too big\n");
424 static int rawv6_sendmsg(struct sock
*sk
, struct msghdr
*msg
, int len
)
426 struct ipv6_txoptions opt_space
;
427 struct sockaddr_in6
* sin6
= (struct sockaddr_in6
*) msg
->msg_name
;
428 struct ipv6_pinfo
*np
= &sk
->net_pinfo
.af_inet6
;
429 struct ipv6_txoptions
*opt
= NULL
;
430 struct ip6_flowlabel
*flowlabel
= NULL
;
432 int addr_len
= msg
->msg_namelen
;
433 struct in6_addr
*daddr
;
434 struct raw6_opt
*raw_opt
;
439 /* Rough check on arithmetic overflow,
440 better check is made in ip6_build_xmit
445 /* Mirror BSD error message compatibility */
446 if (msg
->msg_flags
& MSG_OOB
)
450 * Get and verify the address.
453 fl
.fl6_flowlabel
= 0;
457 if (addr_len
< SIN6_LEN_RFC2133
)
460 if (sin6
->sin6_family
&& sin6
->sin6_family
!= AF_INET6
)
463 /* port is the proto value [0..255] carried in nexthdr */
464 proto
= ntohs(sin6
->sin6_port
);
472 daddr
= &sin6
->sin6_addr
;
474 fl
.fl6_flowlabel
= sin6
->sin6_flowinfo
&IPV6_FLOWINFO_MASK
;
475 if (fl
.fl6_flowlabel
&IPV6_FLOWLABEL_MASK
) {
476 flowlabel
= fl6_sock_lookup(sk
, fl
.fl6_flowlabel
);
477 if (flowlabel
== NULL
)
479 daddr
= &flowlabel
->dst
;
483 /* Otherwise it will be difficult to maintain sk->dst_cache. */
484 if (sk
->state
== TCP_ESTABLISHED
&&
485 !ipv6_addr_cmp(daddr
, &sk
->net_pinfo
.af_inet6
.daddr
))
486 daddr
= &sk
->net_pinfo
.af_inet6
.daddr
;
488 if (addr_len
>= sizeof(struct sockaddr_in6
) &&
489 sin6
->sin6_scope_id
&&
490 ipv6_addr_type(daddr
)&IPV6_ADDR_LINKLOCAL
)
491 fl
.oif
= sin6
->sin6_scope_id
;
493 if (sk
->state
!= TCP_ESTABLISHED
)
497 daddr
= &(sk
->net_pinfo
.af_inet6
.daddr
);
498 fl
.fl6_flowlabel
= np
->flow_label
;
501 if (ipv6_addr_any(daddr
)) {
503 * unspecfied destination address
504 * treated as error... is this correct ?
510 fl
.oif
= sk
->bound_dev_if
;
513 if (msg
->msg_controllen
) {
515 memset(opt
, 0, sizeof(struct ipv6_txoptions
));
517 err
= datagram_send_ctl(msg
, &fl
, opt
, &hlimit
);
519 fl6_sock_release(flowlabel
);
522 if ((fl
.fl6_flowlabel
&IPV6_FLOWLABEL_MASK
) && !flowlabel
) {
523 flowlabel
= fl6_sock_lookup(sk
, fl
.fl6_flowlabel
);
524 if (flowlabel
== NULL
)
527 if (!(opt
->opt_nflen
|opt
->opt_flen
))
533 opt
= fl6_merge_options(&opt_space
, flowlabel
, opt
);
535 raw_opt
= &sk
->tp_pinfo
.tp_raw
;
539 if (fl
.fl6_src
== NULL
&& !ipv6_addr_any(&np
->saddr
))
540 fl
.fl6_src
= &np
->saddr
;
541 fl
.uli_u
.icmpt
.type
= 0;
542 fl
.uli_u
.icmpt
.code
= 0;
544 if (raw_opt
->checksum
) {
545 struct rawv6_fakehdr hdr
;
547 hdr
.iov
= msg
->msg_iov
;
553 if (opt
&& opt
->srcrt
)
558 err
= ip6_build_xmit(sk
, rawv6_frag_cksum
, &hdr
, &fl
, len
,
559 opt
, hlimit
, msg
->msg_flags
);
561 err
= ip6_build_xmit(sk
, rawv6_getfrag
, msg
->msg_iov
, &fl
, len
,
562 opt
, hlimit
, msg
->msg_flags
);
565 fl6_sock_release(flowlabel
);
567 return err
<0?err
:len
;
570 static int rawv6_seticmpfilter(struct sock
*sk
, int level
, int optname
,
571 char *optval
, int optlen
)
575 if (optlen
> sizeof(struct icmp6_filter
))
576 optlen
= sizeof(struct icmp6_filter
);
577 if (copy_from_user(&sk
->tp_pinfo
.tp_raw
.filter
, optval
, optlen
))
587 static int rawv6_geticmpfilter(struct sock
*sk
, int level
, int optname
,
588 char *optval
, int *optlen
)
594 if (get_user(len
, optlen
))
596 if (len
> sizeof(struct icmp6_filter
))
597 len
= sizeof(struct icmp6_filter
);
598 if (put_user(len
, optlen
))
600 if (copy_to_user(optval
, &sk
->tp_pinfo
.tp_raw
.filter
, len
))
611 static int rawv6_setsockopt(struct sock
*sk
, int level
, int optname
,
612 char *optval
, int optlen
)
614 struct raw6_opt
*opt
= &sk
->tp_pinfo
.tp_raw
;
622 if (sk
->num
!= IPPROTO_ICMPV6
)
624 return rawv6_seticmpfilter(sk
, level
, optname
, optval
,
627 if (optname
== IPV6_CHECKSUM
)
630 return ipv6_setsockopt(sk
, level
, optname
, optval
,
634 if (get_user(val
, (int *)optval
))
650 return(-ENOPROTOOPT
);
654 static int rawv6_getsockopt(struct sock
*sk
, int level
, int optname
,
655 char *optval
, int *optlen
)
657 struct raw6_opt
*opt
= &sk
->tp_pinfo
.tp_raw
;
665 if (sk
->num
!= IPPROTO_ICMPV6
)
667 return rawv6_geticmpfilter(sk
, level
, optname
, optval
,
670 if (optname
== IPV6_CHECKSUM
)
673 return ipv6_getsockopt(sk
, level
, optname
, optval
,
677 if (get_user(len
,optlen
))
682 if (opt
->checksum
== 0)
691 len
=min(sizeof(int),len
);
693 if (put_user(len
, optlen
))
695 if (copy_to_user(optval
,&val
,len
))
700 static int rawv6_ioctl(struct sock
*sk
, int cmd
, unsigned long arg
)
705 int amount
= atomic_read(&sk
->wmem_alloc
);
706 return put_user(amount
, (int *)arg
);
713 spin_lock_irq(&sk
->receive_queue
.lock
);
714 skb
= skb_peek(&sk
->receive_queue
);
716 amount
= skb
->tail
- skb
->h
.raw
;
717 spin_unlock_irq(&sk
->receive_queue
.lock
);
718 return put_user(amount
, (int *)arg
);
726 static void rawv6_close(struct sock
*sk
, long timeout
)
728 if (sk
->num
== IPPROTO_RAW
)
729 ip6_ra_control(sk
, -1, NULL
);
731 inet_sock_release(sk
);
734 static int rawv6_init_sk(struct sock
*sk
)
740 #define LINE_FMT "%-190s\n"
742 static void get_raw6_sock(struct sock
*sp
, char *tmpbuf
, int i
)
744 struct in6_addr
*dest
, *src
;
747 dest
= &sp
->net_pinfo
.af_inet6
.daddr
;
748 src
= &sp
->net_pinfo
.af_inet6
.rcv_saddr
;
752 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
753 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p",
755 src
->s6_addr32
[0], src
->s6_addr32
[1],
756 src
->s6_addr32
[2], src
->s6_addr32
[3], srcp
,
757 dest
->s6_addr32
[0], dest
->s6_addr32
[1],
758 dest
->s6_addr32
[2], dest
->s6_addr32
[3], destp
,
760 atomic_read(&sp
->wmem_alloc
), atomic_read(&sp
->rmem_alloc
),
764 atomic_read(&sp
->refcnt
), sp
);
767 int raw6_get_info(char *buffer
, char **start
, off_t offset
, int length
)
769 int len
= 0, num
= 0, i
;
772 char tmpbuf
[LINE_LEN
+2];
774 if (offset
< LINE_LEN
+1)
775 len
+= sprintf(buffer
, LINE_FMT
,
777 "local_address " /* 38 */
778 "remote_address " /* 38 */
779 "st tx_queue rx_queue tr tm->when retrnsmt" /* 41 */
780 " uid timeout inode"); /* 21 */
784 read_lock(&raw_v6_lock
);
785 for (i
= 0; i
< RAWV6_HTABLE_SIZE
; i
++) {
788 for (sk
= raw_v6_htable
[i
]; sk
; sk
= sk
->next
, num
++) {
789 if (sk
->family
!= PF_INET6
)
794 get_raw6_sock(sk
, tmpbuf
, i
);
795 len
+= sprintf(buffer
+len
, LINE_FMT
, tmpbuf
);
801 read_unlock(&raw_v6_lock
);
802 begin
= len
- (pos
- offset
);
803 *start
= buffer
+ begin
;
812 struct proto rawv6_prot
= {
815 connect
: udpv6_connect
,
816 disconnect
: udp_disconnect
,
819 destroy
: inet6_destroy_sock
,
820 setsockopt
: rawv6_setsockopt
,
821 getsockopt
: rawv6_getsockopt
,
822 sendmsg
: rawv6_sendmsg
,
823 recvmsg
: rawv6_recvmsg
,
825 backlog_rcv
: rawv6_rcv_skb
,
827 unhash
: raw_v6_unhash
,