3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 #include <linux/capability.h>
15 #include <linux/errno.h>
16 #include <linux/types.h>
17 #include <linux/kernel.h>
18 #include <linux/interrupt.h>
19 #include <linux/socket.h>
20 #include <linux/sockios.h>
21 #include <linux/in6.h>
22 #include <linux/ipv6.h>
23 #include <linux/route.h>
26 #include <net/ndisc.h>
27 #include <net/addrconf.h>
28 #include <net/transp_v6.h>
29 #include <net/ip6_route.h>
30 #include <net/tcp_states.h>
32 #include <linux/errqueue.h>
33 #include <asm/uaccess.h>
35 int ip6_datagram_connect(struct sock
*sk
, struct sockaddr
*uaddr
, int addr_len
)
37 struct sockaddr_in6
*usin
= (struct sockaddr_in6
*) uaddr
;
38 struct inet_sock
*inet
= inet_sk(sk
);
39 struct ipv6_pinfo
*np
= inet6_sk(sk
);
40 struct in6_addr
*daddr
, *final_p
= NULL
, final
;
41 struct dst_entry
*dst
;
43 struct ip6_flowlabel
*flowlabel
= NULL
;
47 if (usin
->sin6_family
== AF_INET
) {
48 if (__ipv6_only_sock(sk
))
50 err
= ip4_datagram_connect(sk
, uaddr
, addr_len
);
54 if (addr_len
< SIN6_LEN_RFC2133
)
57 if (usin
->sin6_family
!= AF_INET6
)
60 memset(&fl
, 0, sizeof(fl
));
62 fl
.fl6_flowlabel
= usin
->sin6_flowinfo
&IPV6_FLOWINFO_MASK
;
63 if (fl
.fl6_flowlabel
&IPV6_FLOWLABEL_MASK
) {
64 flowlabel
= fl6_sock_lookup(sk
, fl
.fl6_flowlabel
);
65 if (flowlabel
== NULL
)
67 ipv6_addr_copy(&usin
->sin6_addr
, &flowlabel
->dst
);
71 addr_type
= ipv6_addr_type(&usin
->sin6_addr
);
73 if (addr_type
== IPV6_ADDR_ANY
) {
77 usin
->sin6_addr
.s6_addr
[15] = 0x01;
80 daddr
= &usin
->sin6_addr
;
82 if (addr_type
== IPV6_ADDR_MAPPED
) {
83 struct sockaddr_in sin
;
85 if (__ipv6_only_sock(sk
)) {
89 sin
.sin_family
= AF_INET
;
90 sin
.sin_addr
.s_addr
= daddr
->s6_addr32
[3];
91 sin
.sin_port
= usin
->sin6_port
;
93 err
= ip4_datagram_connect(sk
,
94 (struct sockaddr
*) &sin
,
101 ipv6_addr_set_v4mapped(inet
->inet_daddr
, &np
->daddr
);
103 if (ipv6_addr_any(&np
->saddr
))
104 ipv6_addr_set_v4mapped(inet
->inet_saddr
, &np
->saddr
);
106 if (ipv6_addr_any(&np
->rcv_saddr
))
107 ipv6_addr_set_v4mapped(inet
->inet_rcv_saddr
,
113 if (addr_type
&IPV6_ADDR_LINKLOCAL
) {
114 if (addr_len
>= sizeof(struct sockaddr_in6
) &&
115 usin
->sin6_scope_id
) {
116 if (sk
->sk_bound_dev_if
&&
117 sk
->sk_bound_dev_if
!= usin
->sin6_scope_id
) {
121 sk
->sk_bound_dev_if
= usin
->sin6_scope_id
;
124 if (!sk
->sk_bound_dev_if
&& (addr_type
& IPV6_ADDR_MULTICAST
))
125 sk
->sk_bound_dev_if
= np
->mcast_oif
;
127 /* Connect to link-local address requires an interface */
128 if (!sk
->sk_bound_dev_if
) {
134 ipv6_addr_copy(&np
->daddr
, daddr
);
135 np
->flow_label
= fl
.fl6_flowlabel
;
137 inet
->inet_dport
= usin
->sin6_port
;
140 * Check for a route to destination an obtain the
141 * destination cache for it.
144 fl
.proto
= sk
->sk_protocol
;
145 ipv6_addr_copy(&fl
.fl6_dst
, &np
->daddr
);
146 ipv6_addr_copy(&fl
.fl6_src
, &np
->saddr
);
147 fl
.oif
= sk
->sk_bound_dev_if
;
148 fl
.mark
= sk
->sk_mark
;
149 fl
.fl_ip_dport
= inet
->inet_dport
;
150 fl
.fl_ip_sport
= inet
->inet_sport
;
152 if (!fl
.oif
&& (addr_type
&IPV6_ADDR_MULTICAST
))
153 fl
.oif
= np
->mcast_oif
;
155 security_sk_classify_flow(sk
, &fl
);
158 if (flowlabel
->opt
&& flowlabel
->opt
->srcrt
) {
159 struct rt0_hdr
*rt0
= (struct rt0_hdr
*) flowlabel
->opt
->srcrt
;
160 ipv6_addr_copy(&final
, &fl
.fl6_dst
);
161 ipv6_addr_copy(&fl
.fl6_dst
, rt0
->addr
);
164 } else if (np
->opt
&& np
->opt
->srcrt
) {
165 struct rt0_hdr
*rt0
= (struct rt0_hdr
*)np
->opt
->srcrt
;
166 ipv6_addr_copy(&final
, &fl
.fl6_dst
);
167 ipv6_addr_copy(&fl
.fl6_dst
, rt0
->addr
);
171 err
= ip6_dst_lookup(sk
, &dst
, &fl
);
175 ipv6_addr_copy(&fl
.fl6_dst
, final_p
);
177 err
= __xfrm_lookup(sock_net(sk
), &dst
, &fl
, sk
, XFRM_LOOKUP_WAIT
);
180 err
= ip6_dst_blackhole(sk
, &dst
, &fl
);
185 /* source address lookup done in ip6_dst_lookup */
187 if (ipv6_addr_any(&np
->saddr
))
188 ipv6_addr_copy(&np
->saddr
, &fl
.fl6_src
);
190 if (ipv6_addr_any(&np
->rcv_saddr
)) {
191 ipv6_addr_copy(&np
->rcv_saddr
, &fl
.fl6_src
);
192 inet
->inet_rcv_saddr
= LOOPBACK4_IPV6
;
195 ip6_dst_store(sk
, dst
,
196 ipv6_addr_equal(&fl
.fl6_dst
, &np
->daddr
) ?
198 #ifdef CONFIG_IPV6_SUBTREES
199 ipv6_addr_equal(&fl
.fl6_src
, &np
->saddr
) ?
204 sk
->sk_state
= TCP_ESTABLISHED
;
206 fl6_sock_release(flowlabel
);
210 void ipv6_icmp_error(struct sock
*sk
, struct sk_buff
*skb
, int err
,
211 __be16 port
, u32 info
, u8
*payload
)
213 struct ipv6_pinfo
*np
= inet6_sk(sk
);
214 struct icmp6hdr
*icmph
= icmp6_hdr(skb
);
215 struct sock_exterr_skb
*serr
;
220 skb
= skb_clone(skb
, GFP_ATOMIC
);
224 serr
= SKB_EXT_ERR(skb
);
225 serr
->ee
.ee_errno
= err
;
226 serr
->ee
.ee_origin
= SO_EE_ORIGIN_ICMP6
;
227 serr
->ee
.ee_type
= icmph
->icmp6_type
;
228 serr
->ee
.ee_code
= icmph
->icmp6_code
;
230 serr
->ee
.ee_info
= info
;
231 serr
->ee
.ee_data
= 0;
232 serr
->addr_offset
= (u8
*)&(((struct ipv6hdr
*)(icmph
+ 1))->daddr
) -
233 skb_network_header(skb
);
236 __skb_pull(skb
, payload
- skb
->data
);
237 skb_reset_transport_header(skb
);
239 if (sock_queue_err_skb(sk
, skb
))
243 void ipv6_local_error(struct sock
*sk
, int err
, struct flowi
*fl
, u32 info
)
245 struct ipv6_pinfo
*np
= inet6_sk(sk
);
246 struct sock_exterr_skb
*serr
;
253 skb
= alloc_skb(sizeof(struct ipv6hdr
), GFP_ATOMIC
);
257 skb_put(skb
, sizeof(struct ipv6hdr
));
258 skb_reset_network_header(skb
);
260 ipv6_addr_copy(&iph
->daddr
, &fl
->fl6_dst
);
262 serr
= SKB_EXT_ERR(skb
);
263 serr
->ee
.ee_errno
= err
;
264 serr
->ee
.ee_origin
= SO_EE_ORIGIN_LOCAL
;
265 serr
->ee
.ee_type
= 0;
266 serr
->ee
.ee_code
= 0;
268 serr
->ee
.ee_info
= info
;
269 serr
->ee
.ee_data
= 0;
270 serr
->addr_offset
= (u8
*)&iph
->daddr
- skb_network_header(skb
);
271 serr
->port
= fl
->fl_ip_dport
;
273 __skb_pull(skb
, skb_tail_pointer(skb
) - skb
->data
);
274 skb_reset_transport_header(skb
);
276 if (sock_queue_err_skb(sk
, skb
))
281 * Handle MSG_ERRQUEUE
283 int ipv6_recv_error(struct sock
*sk
, struct msghdr
*msg
, int len
)
285 struct ipv6_pinfo
*np
= inet6_sk(sk
);
286 struct sock_exterr_skb
*serr
;
287 struct sk_buff
*skb
, *skb2
;
288 struct sockaddr_in6
*sin
;
290 struct sock_extended_err ee
;
291 struct sockaddr_in6 offender
;
297 skb
= skb_dequeue(&sk
->sk_error_queue
);
303 msg
->msg_flags
|= MSG_TRUNC
;
306 err
= skb_copy_datagram_iovec(skb
, 0, msg
->msg_iov
, copied
);
310 sock_recv_timestamp(msg
, sk
, skb
);
312 serr
= SKB_EXT_ERR(skb
);
314 sin
= (struct sockaddr_in6
*)msg
->msg_name
;
316 const unsigned char *nh
= skb_network_header(skb
);
317 sin
->sin6_family
= AF_INET6
;
318 sin
->sin6_flowinfo
= 0;
319 sin
->sin6_port
= serr
->port
;
320 sin
->sin6_scope_id
= 0;
321 if (serr
->ee
.ee_origin
== SO_EE_ORIGIN_ICMP6
) {
322 ipv6_addr_copy(&sin
->sin6_addr
,
323 (struct in6_addr
*)(nh
+ serr
->addr_offset
));
326 (*(__be32
*)(nh
+ serr
->addr_offset
- 24) &
328 if (ipv6_addr_type(&sin
->sin6_addr
) & IPV6_ADDR_LINKLOCAL
)
329 sin
->sin6_scope_id
= IP6CB(skb
)->iif
;
331 ipv6_addr_set_v4mapped(*(__be32
*)(nh
+ serr
->addr_offset
),
336 memcpy(&errhdr
.ee
, &serr
->ee
, sizeof(struct sock_extended_err
));
337 sin
= &errhdr
.offender
;
338 sin
->sin6_family
= AF_UNSPEC
;
339 if (serr
->ee
.ee_origin
!= SO_EE_ORIGIN_LOCAL
) {
340 sin
->sin6_family
= AF_INET6
;
341 sin
->sin6_flowinfo
= 0;
342 sin
->sin6_scope_id
= 0;
343 if (serr
->ee
.ee_origin
== SO_EE_ORIGIN_ICMP6
) {
344 ipv6_addr_copy(&sin
->sin6_addr
, &ipv6_hdr(skb
)->saddr
);
346 datagram_recv_ctl(sk
, msg
, skb
);
347 if (ipv6_addr_type(&sin
->sin6_addr
) & IPV6_ADDR_LINKLOCAL
)
348 sin
->sin6_scope_id
= IP6CB(skb
)->iif
;
350 struct inet_sock
*inet
= inet_sk(sk
);
352 ipv6_addr_set_v4mapped(ip_hdr(skb
)->saddr
,
354 if (inet
->cmsg_flags
)
355 ip_cmsg_recv(msg
, skb
);
359 put_cmsg(msg
, SOL_IPV6
, IPV6_RECVERR
, sizeof(errhdr
), &errhdr
);
361 /* Now we could try to dump offended packet options */
363 msg
->msg_flags
|= MSG_ERRQUEUE
;
366 /* Reset and regenerate socket error */
367 spin_lock_bh(&sk
->sk_error_queue
.lock
);
369 if ((skb2
= skb_peek(&sk
->sk_error_queue
)) != NULL
) {
370 sk
->sk_err
= SKB_EXT_ERR(skb2
)->ee
.ee_errno
;
371 spin_unlock_bh(&sk
->sk_error_queue
.lock
);
372 sk
->sk_error_report(sk
);
374 spin_unlock_bh(&sk
->sk_error_queue
.lock
);
385 int datagram_recv_ctl(struct sock
*sk
, struct msghdr
*msg
, struct sk_buff
*skb
)
387 struct ipv6_pinfo
*np
= inet6_sk(sk
);
388 struct inet6_skb_parm
*opt
= IP6CB(skb
);
389 unsigned char *nh
= skb_network_header(skb
);
391 if (np
->rxopt
.bits
.rxinfo
) {
392 struct in6_pktinfo src_info
;
394 src_info
.ipi6_ifindex
= opt
->iif
;
395 ipv6_addr_copy(&src_info
.ipi6_addr
, &ipv6_hdr(skb
)->daddr
);
396 put_cmsg(msg
, SOL_IPV6
, IPV6_PKTINFO
, sizeof(src_info
), &src_info
);
399 if (np
->rxopt
.bits
.rxhlim
) {
400 int hlim
= ipv6_hdr(skb
)->hop_limit
;
401 put_cmsg(msg
, SOL_IPV6
, IPV6_HOPLIMIT
, sizeof(hlim
), &hlim
);
404 if (np
->rxopt
.bits
.rxtclass
) {
405 int tclass
= (ntohl(*(__be32
*)ipv6_hdr(skb
)) >> 20) & 0xff;
406 put_cmsg(msg
, SOL_IPV6
, IPV6_TCLASS
, sizeof(tclass
), &tclass
);
409 if (np
->rxopt
.bits
.rxflow
&& (*(__be32
*)nh
& IPV6_FLOWINFO_MASK
)) {
410 __be32 flowinfo
= *(__be32
*)nh
& IPV6_FLOWINFO_MASK
;
411 put_cmsg(msg
, SOL_IPV6
, IPV6_FLOWINFO
, sizeof(flowinfo
), &flowinfo
);
414 /* HbH is allowed only once */
415 if (np
->rxopt
.bits
.hopopts
&& opt
->hop
) {
416 u8
*ptr
= nh
+ opt
->hop
;
417 put_cmsg(msg
, SOL_IPV6
, IPV6_HOPOPTS
, (ptr
[1]+1)<<3, ptr
);
421 (np
->rxopt
.bits
.dstopts
|| np
->rxopt
.bits
.srcrt
)) {
423 * Silly enough, but we need to reparse in order to
424 * report extension headers (except for HbH)
427 * Also note that IPV6_RECVRTHDRDSTOPTS is NOT
428 * (and WILL NOT be) defined because
429 * IPV6_RECVDSTOPTS is more generic. --yoshfuji
431 unsigned int off
= sizeof(struct ipv6hdr
);
432 u8 nexthdr
= ipv6_hdr(skb
)->nexthdr
;
434 while (off
<= opt
->lastopt
) {
439 case IPPROTO_DSTOPTS
:
441 len
= (ptr
[1] + 1) << 3;
442 if (np
->rxopt
.bits
.dstopts
)
443 put_cmsg(msg
, SOL_IPV6
, IPV6_DSTOPTS
, len
, ptr
);
445 case IPPROTO_ROUTING
:
447 len
= (ptr
[1] + 1) << 3;
448 if (np
->rxopt
.bits
.srcrt
)
449 put_cmsg(msg
, SOL_IPV6
, IPV6_RTHDR
, len
, ptr
);
453 len
= (ptr
[1] + 2) << 2;
457 len
= (ptr
[1] + 1) << 3;
465 /* socket options in old style */
466 if (np
->rxopt
.bits
.rxoinfo
) {
467 struct in6_pktinfo src_info
;
469 src_info
.ipi6_ifindex
= opt
->iif
;
470 ipv6_addr_copy(&src_info
.ipi6_addr
, &ipv6_hdr(skb
)->daddr
);
471 put_cmsg(msg
, SOL_IPV6
, IPV6_2292PKTINFO
, sizeof(src_info
), &src_info
);
473 if (np
->rxopt
.bits
.rxohlim
) {
474 int hlim
= ipv6_hdr(skb
)->hop_limit
;
475 put_cmsg(msg
, SOL_IPV6
, IPV6_2292HOPLIMIT
, sizeof(hlim
), &hlim
);
477 if (np
->rxopt
.bits
.ohopopts
&& opt
->hop
) {
478 u8
*ptr
= nh
+ opt
->hop
;
479 put_cmsg(msg
, SOL_IPV6
, IPV6_2292HOPOPTS
, (ptr
[1]+1)<<3, ptr
);
481 if (np
->rxopt
.bits
.odstopts
&& opt
->dst0
) {
482 u8
*ptr
= nh
+ opt
->dst0
;
483 put_cmsg(msg
, SOL_IPV6
, IPV6_2292DSTOPTS
, (ptr
[1]+1)<<3, ptr
);
485 if (np
->rxopt
.bits
.osrcrt
&& opt
->srcrt
) {
486 struct ipv6_rt_hdr
*rthdr
= (struct ipv6_rt_hdr
*)(nh
+ opt
->srcrt
);
487 put_cmsg(msg
, SOL_IPV6
, IPV6_2292RTHDR
, (rthdr
->hdrlen
+1) << 3, rthdr
);
489 if (np
->rxopt
.bits
.odstopts
&& opt
->dst1
) {
490 u8
*ptr
= nh
+ opt
->dst1
;
491 put_cmsg(msg
, SOL_IPV6
, IPV6_2292DSTOPTS
, (ptr
[1]+1)<<3, ptr
);
496 int datagram_send_ctl(struct net
*net
,
497 struct msghdr
*msg
, struct flowi
*fl
,
498 struct ipv6_txoptions
*opt
,
499 int *hlimit
, int *tclass
)
501 struct in6_pktinfo
*src_info
;
502 struct cmsghdr
*cmsg
;
503 struct ipv6_rt_hdr
*rthdr
;
504 struct ipv6_opt_hdr
*hdr
;
508 for (cmsg
= CMSG_FIRSTHDR(msg
); cmsg
; cmsg
= CMSG_NXTHDR(msg
, cmsg
)) {
511 if (!CMSG_OK(msg
, cmsg
)) {
516 if (cmsg
->cmsg_level
!= SOL_IPV6
)
519 switch (cmsg
->cmsg_type
) {
521 case IPV6_2292PKTINFO
:
523 struct net_device
*dev
= NULL
;
525 if (cmsg
->cmsg_len
< CMSG_LEN(sizeof(struct in6_pktinfo
))) {
530 src_info
= (struct in6_pktinfo
*)CMSG_DATA(cmsg
);
532 if (src_info
->ipi6_ifindex
) {
533 if (fl
->oif
&& src_info
->ipi6_ifindex
!= fl
->oif
)
535 fl
->oif
= src_info
->ipi6_ifindex
;
538 addr_type
= __ipv6_addr_type(&src_info
->ipi6_addr
);
542 dev
= dev_get_by_index_rcu(net
, fl
->oif
);
547 } else if (addr_type
& IPV6_ADDR_LINKLOCAL
) {
552 if (addr_type
!= IPV6_ADDR_ANY
) {
553 int strict
= __ipv6_addr_src_scope(addr_type
) <= IPV6_ADDR_SCOPE_LINKLOCAL
;
554 if (!ipv6_chk_addr(net
, &src_info
->ipi6_addr
,
555 strict
? dev
: NULL
, 0))
558 ipv6_addr_copy(&fl
->fl6_src
, &src_info
->ipi6_addr
);
570 if (cmsg
->cmsg_len
< CMSG_LEN(4)) {
575 if (fl
->fl6_flowlabel
&IPV6_FLOWINFO_MASK
) {
576 if ((fl
->fl6_flowlabel
^*(__be32
*)CMSG_DATA(cmsg
))&~IPV6_FLOWINFO_MASK
) {
581 fl
->fl6_flowlabel
= IPV6_FLOWINFO_MASK
& *(__be32
*)CMSG_DATA(cmsg
);
584 case IPV6_2292HOPOPTS
:
586 if (opt
->hopopt
|| cmsg
->cmsg_len
< CMSG_LEN(sizeof(struct ipv6_opt_hdr
))) {
591 hdr
= (struct ipv6_opt_hdr
*)CMSG_DATA(cmsg
);
592 len
= ((hdr
->hdrlen
+ 1) << 3);
593 if (cmsg
->cmsg_len
< CMSG_LEN(len
)) {
597 if (!capable(CAP_NET_RAW
)) {
601 opt
->opt_nflen
+= len
;
605 case IPV6_2292DSTOPTS
:
606 if (cmsg
->cmsg_len
< CMSG_LEN(sizeof(struct ipv6_opt_hdr
))) {
611 hdr
= (struct ipv6_opt_hdr
*)CMSG_DATA(cmsg
);
612 len
= ((hdr
->hdrlen
+ 1) << 3);
613 if (cmsg
->cmsg_len
< CMSG_LEN(len
)) {
617 if (!capable(CAP_NET_RAW
)) {
625 opt
->opt_flen
+= len
;
630 case IPV6_RTHDRDSTOPTS
:
631 if (cmsg
->cmsg_len
< CMSG_LEN(sizeof(struct ipv6_opt_hdr
))) {
636 hdr
= (struct ipv6_opt_hdr
*)CMSG_DATA(cmsg
);
637 len
= ((hdr
->hdrlen
+ 1) << 3);
638 if (cmsg
->cmsg_len
< CMSG_LEN(len
)) {
642 if (!capable(CAP_NET_RAW
)) {
646 if (cmsg
->cmsg_type
== IPV6_DSTOPTS
) {
647 opt
->opt_flen
+= len
;
650 opt
->opt_nflen
+= len
;
657 if (cmsg
->cmsg_len
< CMSG_LEN(sizeof(struct ipv6_rt_hdr
))) {
662 rthdr
= (struct ipv6_rt_hdr
*)CMSG_DATA(cmsg
);
664 switch (rthdr
->type
) {
665 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
666 case IPV6_SRCRT_TYPE_2
:
667 if (rthdr
->hdrlen
!= 2 ||
668 rthdr
->segments_left
!= 1) {
679 len
= ((rthdr
->hdrlen
+ 1) << 3);
681 if (cmsg
->cmsg_len
< CMSG_LEN(len
)) {
686 /* segments left must also match */
687 if ((rthdr
->hdrlen
>> 1) != rthdr
->segments_left
) {
692 opt
->opt_nflen
+= len
;
695 if (cmsg
->cmsg_type
== IPV6_2292RTHDR
&& opt
->dst1opt
) {
696 int dsthdrlen
= ((opt
->dst1opt
->hdrlen
+1)<<3);
698 opt
->opt_nflen
+= dsthdrlen
;
699 opt
->dst0opt
= opt
->dst1opt
;
701 opt
->opt_flen
-= dsthdrlen
;
706 case IPV6_2292HOPLIMIT
:
708 if (cmsg
->cmsg_len
!= CMSG_LEN(sizeof(int))) {
713 *hlimit
= *(int *)CMSG_DATA(cmsg
);
714 if (*hlimit
< -1 || *hlimit
> 0xff) {
726 if (cmsg
->cmsg_len
!= CMSG_LEN(sizeof(int))) {
730 tc
= *(int *)CMSG_DATA(cmsg
);
731 if (tc
< -1 || tc
> 0xff)
740 LIMIT_NETDEBUG(KERN_DEBUG
"invalid cmsg type: %d\n",