1 /* $FreeBSD: src/sys/netinet6/frag6.c,v 1.2.2.6 2002/04/28 05:40:26 suz Exp $ */
2 /* $KAME: frag6.c,v 1.33 2002/01/07 11:34:48 kjc Exp $ */
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
37 #include <sys/domain.h>
38 #include <sys/protosw.h>
39 #include <sys/socket.h>
40 #include <sys/errno.h>
42 #include <sys/kernel.h>
43 #include <sys/syslog.h>
44 #include <sys/thread2.h>
47 #include <net/route.h>
49 #include <netinet/in.h>
50 #include <netinet/in_var.h>
51 #include <netinet/ip6.h>
52 #include <netinet6/ip6_var.h>
53 #include <netinet/icmp6.h>
55 #include <net/net_osdep.h>
58 * Define it to get a correct behavior on per-interface statistics.
59 * You will need to perform an extra routing table lookup, per fragment,
60 * to do it. This may, or may not be, a performance hit.
62 #define IN6_IFSTAT_STRICT
64 static void frag6_enq (struct ip6asfrag
*, struct ip6asfrag
*);
65 static void frag6_deq (struct ip6asfrag
*);
66 static void frag6_insque (struct ip6q
*, struct ip6q
*);
67 static void frag6_remque (struct ip6q
*);
68 static void frag6_freef (struct ip6q
*);
70 /* XXX we eventually need splreass6, or some real semaphore */
71 int frag6_doing_reass
;
72 u_int frag6_nfragpackets
;
74 struct ip6q ip6q
; /* ip6 reassemble queue */
77 MALLOC_DEFINE(M_FTABLE
, "fragment", "fragment reassembly header");
80 * Initialise reassembly queue and fragment identifier.
87 ip6_maxfragpackets
= nmbclusters
/ 4;
88 ip6_maxfrags
= nmbclusters
/ 4;
91 * in many cases, random() here does NOT return random number
92 * as initialization during bootstrap time occur in fixed order.
95 ip6_id
= krandom() ^ tv
.tv_usec
;
96 ip6q
.ip6q_next
= ip6q
.ip6q_prev
= &ip6q
;
100 * In RFC2460, fragment and reassembly rule do not agree with each other,
101 * in terms of next header field handling in fragment header.
102 * While the sender will use the same value for all of the fragmented packets,
103 * receiver is suggested not to check the consistency.
105 * fragment rule (p20):
106 * (2) A Fragment header containing:
107 * The Next Header value that identifies the first header of
108 * the Fragmentable Part of the original packet.
109 * -> next header field is same for all fragments
111 * reassembly rule (p21):
112 * The Next Header field of the last header of the Unfragmentable
113 * Part is obtained from the Next Header field of the first
114 * fragment's Fragment header.
115 * -> should grab it from the first fragment only
117 * The following note also contradicts with fragment rule - noone is going to
118 * send different fragment with different next header field.
120 * additional note (p22):
121 * The Next Header values in the Fragment headers of different
122 * fragments of the same original packet may differ. Only the value
123 * from the Offset zero fragment packet is used for reassembly.
124 * -> should grab it from the first fragment only
126 * There is no explicit reason given in the RFC. Historical reason maybe?
132 frag6_input(struct mbuf
**mp
, int *offp
, int proto
)
134 struct mbuf
*m
= *mp
, *t
;
136 struct ip6_frag
*ip6f
;
138 struct ip6asfrag
*af6
, *ip6af
, *af6dwn
;
139 int offset
= *offp
, nxt
, i
, next
;
141 int fragoff
, frgpartlen
; /* must be larger than u_int16_t */
142 struct ifnet
*dstifp
;
143 #ifdef IN6_IFSTAT_STRICT
144 static struct route_in6 ro
;
145 struct sockaddr_in6
*dst
;
148 ip6
= mtod(m
, struct ip6_hdr
*);
149 #ifndef PULLDOWN_TEST
150 IP6_EXTHDR_CHECK(m
, offset
, sizeof(struct ip6_frag
), IPPROTO_DONE
);
151 ip6f
= (struct ip6_frag
*)((caddr_t
)ip6
+ offset
);
153 IP6_EXTHDR_GET(ip6f
, struct ip6_frag
*, m
, offset
, sizeof(*ip6f
));
159 #ifdef IN6_IFSTAT_STRICT
160 /* find the destination interface of the packet. */
161 dst
= (struct sockaddr_in6
*)&ro
.ro_dst
;
163 (!(ro
.ro_rt
->rt_flags
& RTF_UP
) ||
164 !IN6_ARE_ADDR_EQUAL(&dst
->sin6_addr
, &ip6
->ip6_dst
))) {
168 if (ro
.ro_rt
== NULL
) {
169 bzero(dst
, sizeof(*dst
));
170 dst
->sin6_family
= AF_INET6
;
171 dst
->sin6_len
= sizeof(struct sockaddr_in6
);
172 dst
->sin6_addr
= ip6
->ip6_dst
;
174 rtalloc((struct route
*)&ro
);
175 if (ro
.ro_rt
!= NULL
&& ro
.ro_rt
->rt_ifa
!= NULL
)
176 dstifp
= ((struct in6_ifaddr
*)ro
.ro_rt
->rt_ifa
)->ia_ifp
;
178 /* we are violating the spec, this is not the destination interface */
179 if (m
->m_flags
& M_PKTHDR
)
180 dstifp
= m
->m_pkthdr
.rcvif
;
183 /* jumbo payload can't contain a fragment header */
184 if (ip6
->ip6_plen
== 0) {
185 icmp6_error(m
, ICMP6_PARAM_PROB
, ICMP6_PARAMPROB_HEADER
, offset
);
186 in6_ifstat_inc(dstifp
, ifs6_reass_fail
);
191 * check whether fragment packet's fragment length is
192 * multiple of 8 octets.
193 * sizeof(struct ip6_frag) == 8
194 * sizeof(struct ip6_hdr) = 40
196 if ((ip6f
->ip6f_offlg
& IP6F_MORE_FRAG
) &&
197 (((ntohs(ip6
->ip6_plen
) - offset
) & 0x7) != 0)) {
198 icmp6_error(m
, ICMP6_PARAM_PROB
,
199 ICMP6_PARAMPROB_HEADER
,
200 offsetof(struct ip6_hdr
, ip6_plen
));
201 in6_ifstat_inc(dstifp
, ifs6_reass_fail
);
205 ip6stat
.ip6s_fragments
++;
206 in6_ifstat_inc(dstifp
, ifs6_reass_reqd
);
208 /* offset now points to data portion */
209 offset
+= sizeof(struct ip6_frag
);
211 frag6_doing_reass
= 1;
214 * Enforce upper bound on number of fragments.
215 * If maxfrag is 0, never accept fragments.
216 * If maxfrag is -1, accept all fragments without limitation.
218 if (ip6_maxfrags
< 0)
220 else if (frag6_nfrags
>= (u_int
)ip6_maxfrags
)
223 for (q6
= ip6q
.ip6q_next
; q6
!= &ip6q
; q6
= q6
->ip6q_next
)
224 if (ip6f
->ip6f_ident
== q6
->ip6q_ident
&&
225 IN6_ARE_ADDR_EQUAL(&ip6
->ip6_src
, &q6
->ip6q_src
) &&
226 IN6_ARE_ADDR_EQUAL(&ip6
->ip6_dst
, &q6
->ip6q_dst
))
231 * the first fragment to arrive, create a reassembly queue.
236 * Enforce upper bound on number of fragmented packets
237 * for which we attempt reassembly;
238 * If maxfrag is 0, never accept fragments.
239 * If maxfrag is -1, accept all fragments without limitation.
241 if (ip6_maxfragpackets
< 0)
243 else if (frag6_nfragpackets
>= (u_int
)ip6_maxfragpackets
)
245 frag6_nfragpackets
++;
246 q6
= (struct ip6q
*)kmalloc(sizeof(struct ip6q
), M_FTABLE
,
251 frag6_insque(q6
, &ip6q
);
253 /* ip6q_nxt will be filled afterwards, from 1st fragment */
254 q6
->ip6q_down
= q6
->ip6q_up
= (struct ip6asfrag
*)q6
;
256 q6
->ip6q_nxtp
= (u_char
*)nxtp
;
258 q6
->ip6q_ident
= ip6f
->ip6f_ident
;
259 q6
->ip6q_arrive
= 0; /* Is it used anywhere? */
260 q6
->ip6q_ttl
= IPV6_FRAGTTL
;
261 q6
->ip6q_src
= ip6
->ip6_src
;
262 q6
->ip6q_dst
= ip6
->ip6_dst
;
263 q6
->ip6q_unfrglen
= -1; /* The 1st fragment has not arrived. */
268 * If it's the 1st fragment, record the length of the
269 * unfragmentable part and the next header of the fragment header.
271 fragoff
= ntohs(ip6f
->ip6f_offlg
& IP6F_OFF_MASK
);
273 q6
->ip6q_unfrglen
= offset
- sizeof(struct ip6_hdr
)
274 - sizeof(struct ip6_frag
);
275 q6
->ip6q_nxt
= ip6f
->ip6f_nxt
;
279 * Check that the reassembled packet would not exceed 65535 bytes
281 * If it would exceed, discard the fragment and return an ICMP error.
283 frgpartlen
= sizeof(struct ip6_hdr
) + ntohs(ip6
->ip6_plen
) - offset
;
284 if (q6
->ip6q_unfrglen
>= 0) {
285 /* The 1st fragment has already arrived. */
286 if (q6
->ip6q_unfrglen
+ fragoff
+ frgpartlen
> IPV6_MAXPACKET
) {
287 icmp6_error(m
, ICMP6_PARAM_PROB
, ICMP6_PARAMPROB_HEADER
,
288 offset
- sizeof(struct ip6_frag
) +
289 offsetof(struct ip6_frag
, ip6f_offlg
));
290 frag6_doing_reass
= 0;
291 return (IPPROTO_DONE
);
294 else if (fragoff
+ frgpartlen
> IPV6_MAXPACKET
) {
295 icmp6_error(m
, ICMP6_PARAM_PROB
, ICMP6_PARAMPROB_HEADER
,
296 offset
- sizeof(struct ip6_frag
) +
297 offsetof(struct ip6_frag
, ip6f_offlg
));
298 frag6_doing_reass
= 0;
299 return (IPPROTO_DONE
);
302 * If it's the first fragment, do the above check for each
303 * fragment already stored in the reassembly queue.
306 for (af6
= q6
->ip6q_down
; af6
!= (struct ip6asfrag
*)q6
;
308 af6dwn
= af6
->ip6af_down
;
310 if (q6
->ip6q_unfrglen
+ af6
->ip6af_off
+ af6
->ip6af_frglen
>
312 struct mbuf
*merr
= IP6_REASS_MBUF(af6
);
313 struct ip6_hdr
*ip6err
;
314 int erroff
= af6
->ip6af_offset
;
316 /* dequeue the fragment. */
318 kfree(af6
, M_FTABLE
);
320 /* adjust pointer. */
321 ip6err
= mtod(merr
, struct ip6_hdr
*);
324 * Restore source and destination addresses
325 * in the erroneous IPv6 header.
327 ip6err
->ip6_src
= q6
->ip6q_src
;
328 ip6err
->ip6_dst
= q6
->ip6q_dst
;
330 icmp6_error(merr
, ICMP6_PARAM_PROB
,
331 ICMP6_PARAMPROB_HEADER
,
332 erroff
- sizeof(struct ip6_frag
) +
333 offsetof(struct ip6_frag
, ip6f_offlg
));
338 ip6af
= (struct ip6asfrag
*)kmalloc(sizeof(struct ip6asfrag
), M_FTABLE
,
342 ip6af
->ip6af_head
= ip6
->ip6_flow
;
343 ip6af
->ip6af_len
= ip6
->ip6_plen
;
344 ip6af
->ip6af_nxt
= ip6
->ip6_nxt
;
345 ip6af
->ip6af_hlim
= ip6
->ip6_hlim
;
346 ip6af
->ip6af_mff
= ip6f
->ip6f_offlg
& IP6F_MORE_FRAG
;
347 ip6af
->ip6af_off
= fragoff
;
348 ip6af
->ip6af_frglen
= frgpartlen
;
349 ip6af
->ip6af_offset
= offset
;
350 IP6_REASS_MBUF(ip6af
) = m
;
353 af6
= (struct ip6asfrag
*)q6
;
358 * Find a segment which begins after this one does.
360 for (af6
= q6
->ip6q_down
; af6
!= (struct ip6asfrag
*)q6
;
361 af6
= af6
->ip6af_down
)
362 if (af6
->ip6af_off
> ip6af
->ip6af_off
)
366 * RFC 5722: Drop overlapping fragments
368 if (af6
->ip6af_up
!= (struct ip6asfrag
*)q6
) {
369 i
= af6
->ip6af_up
->ip6af_off
+ af6
->ip6af_up
->ip6af_frglen
372 kfree(ip6af
, M_FTABLE
);
376 if (af6
!= (struct ip6asfrag
*)q6
) {
377 i
= (ip6af
->ip6af_off
+ ip6af
->ip6af_frglen
) - af6
->ip6af_off
;
379 kfree(ip6af
, M_FTABLE
);
387 * Stick new segment in its place;
388 * check for complete reassembly.
389 * Move to front of packet queue, as we are
390 * the most recently active fragmented packet.
392 frag6_enq(ip6af
, af6
->ip6af_up
);
396 if (q6
!= ip6q
.ip6q_next
) {
398 frag6_insque(q6
, &ip6q
);
402 for (af6
= q6
->ip6q_down
; af6
!= (struct ip6asfrag
*)q6
;
403 af6
= af6
->ip6af_down
) {
404 if (af6
->ip6af_off
!= next
) {
405 frag6_doing_reass
= 0;
408 next
+= af6
->ip6af_frglen
;
410 if (af6
->ip6af_up
->ip6af_mff
) {
411 frag6_doing_reass
= 0;
416 * Reassembly is complete; concatenate fragments.
418 ip6af
= q6
->ip6q_down
;
419 t
= m
= IP6_REASS_MBUF(ip6af
);
420 af6
= ip6af
->ip6af_down
;
422 while (af6
!= (struct ip6asfrag
*)q6
) {
423 af6dwn
= af6
->ip6af_down
;
427 t
->m_next
= IP6_REASS_MBUF(af6
);
428 m_adj(t
->m_next
, af6
->ip6af_offset
);
429 kfree(af6
, M_FTABLE
);
433 /* adjust offset to point where the original next header starts */
434 offset
= ip6af
->ip6af_offset
- sizeof(struct ip6_frag
);
435 kfree(ip6af
, M_FTABLE
);
436 ip6
= mtod(m
, struct ip6_hdr
*);
437 ip6
->ip6_plen
= htons((u_short
)next
+ offset
- sizeof(struct ip6_hdr
));
438 ip6
->ip6_src
= q6
->ip6q_src
;
439 ip6
->ip6_dst
= q6
->ip6q_dst
;
442 *q6
->ip6q_nxtp
= (u_char
)(nxt
& 0xff);
446 * Delete frag6 header with as a few cost as possible.
448 if (offset
< m
->m_len
) {
449 ovbcopy((caddr_t
)ip6
, (caddr_t
)ip6
+ sizeof(struct ip6_frag
),
451 m
->m_data
+= sizeof(struct ip6_frag
);
452 m
->m_len
-= sizeof(struct ip6_frag
);
454 /* this comes with no copy if the boundary is on cluster */
455 if ((t
= m_split(m
, offset
, M_NOWAIT
)) == NULL
) {
457 frag6_nfrags
-= q6
->ip6q_nfrag
;
459 frag6_nfragpackets
--;
462 m_adj(t
, sizeof(struct ip6_frag
));
467 * Store NXT to the original.
470 char *prvnxtp
= ip6_get_prevhdr(m
, offset
); /* XXX */
475 frag6_nfrags
-= q6
->ip6q_nfrag
;
477 frag6_nfragpackets
--;
479 if (m
->m_flags
& M_PKTHDR
) { /* Isn't it always true? */
481 for (t
= m
; t
; t
= t
->m_next
)
483 m
->m_pkthdr
.len
= plen
;
486 ip6stat
.ip6s_reassembled
++;
487 in6_ifstat_inc(dstifp
, ifs6_reass_ok
);
490 * Reassembly complete, return the next protocol.
491 * Be sure to clear M_HASH to force the packet
492 * to be re-characterized.
494 m
->m_flags
&= ~M_HASH
;
499 frag6_doing_reass
= 0;
503 in6_ifstat_inc(dstifp
, ifs6_reass_fail
);
504 ip6stat
.ip6s_fragdropped
++;
506 frag6_doing_reass
= 0;
511 * Free a fragment reassembly header and all
512 * associated datagrams.
515 frag6_freef(struct ip6q
*q6
)
517 struct ip6asfrag
*af6
, *down6
;
519 for (af6
= q6
->ip6q_down
; af6
!= (struct ip6asfrag
*)q6
;
521 struct mbuf
*m
= IP6_REASS_MBUF(af6
);
523 down6
= af6
->ip6af_down
;
527 * Return ICMP time exceeded error for the 1st fragment.
528 * Just free other fragments.
530 if (af6
->ip6af_off
== 0) {
534 ip6
= mtod(m
, struct ip6_hdr
*);
536 /* restoure source and destination addresses */
537 ip6
->ip6_src
= q6
->ip6q_src
;
538 ip6
->ip6_dst
= q6
->ip6q_dst
;
540 icmp6_error(m
, ICMP6_TIME_EXCEEDED
,
541 ICMP6_TIME_EXCEED_REASSEMBLY
, 0);
544 kfree(af6
, M_FTABLE
);
547 frag6_nfrags
-= q6
->ip6q_nfrag
;
549 frag6_nfragpackets
--;
553 * Put an ip fragment on a reassembly chain.
554 * Like insque, but pointers in middle of structure.
557 frag6_enq(struct ip6asfrag
*af6
, struct ip6asfrag
*up6
)
560 af6
->ip6af_down
= up6
->ip6af_down
;
561 up6
->ip6af_down
->ip6af_up
= af6
;
562 up6
->ip6af_down
= af6
;
566 * To frag6_enq as remque is to insque.
569 frag6_deq(struct ip6asfrag
*af6
)
571 af6
->ip6af_up
->ip6af_down
= af6
->ip6af_down
;
572 af6
->ip6af_down
->ip6af_up
= af6
->ip6af_up
;
576 frag6_insque(struct ip6q
*new, struct ip6q
*old
)
578 new->ip6q_prev
= old
;
579 new->ip6q_next
= old
->ip6q_next
;
580 old
->ip6q_next
->ip6q_prev
= new;
581 old
->ip6q_next
= new;
585 frag6_remque(struct ip6q
*p6
)
587 p6
->ip6q_prev
->ip6q_next
= p6
->ip6q_next
;
588 p6
->ip6q_next
->ip6q_prev
= p6
->ip6q_prev
;
592 * IPv6 reassembling timer processing;
593 * if a timer expires on a reassembly
602 frag6_doing_reass
= 1;
605 while (q6
!= &ip6q
) {
608 if (q6
->ip6q_prev
->ip6q_ttl
== 0) {
609 ip6stat
.ip6s_fragtimeout
++;
610 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
611 frag6_freef(q6
->ip6q_prev
);
615 * If we are over the maximum number of fragments
616 * (due to the limit being lowered), drain off
617 * enough to get down to the new limit.
619 while (frag6_nfragpackets
> (u_int
)ip6_maxfragpackets
&&
621 ip6stat
.ip6s_fragoverflow
++;
622 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
623 frag6_freef(ip6q
.ip6q_prev
);
625 frag6_doing_reass
= 0;
629 * Routing changes might produce a better route than we last used;
630 * make sure we notice eventually, even if forwarding only for one
631 * destination and the cache is never replaced.
633 if (ip6_forward_rt
.ro_rt
) {
634 RTFREE(ip6_forward_rt
.ro_rt
);
635 ip6_forward_rt
.ro_rt
= NULL
;
637 if (ipsrcchk_rt
.ro_rt
) {
638 RTFREE(ipsrcchk_rt
.ro_rt
);
639 ipsrcchk_rt
.ro_rt
= NULL
;
647 * Drain off all datagram fragments.
652 if (frag6_doing_reass
)
654 while (ip6q
.ip6q_next
!= &ip6q
) {
655 ip6stat
.ip6s_fragdropped
++;
656 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
657 frag6_freef(ip6q
.ip6q_next
);