2 * Copyright (c) 1982, 1986, 1988, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * @(#)ip_input.c 8.2 (Berkeley) 1/4/94
34 * ip_input.c,v 1.11 1994/11/16 10:17:08 jkh Exp
38 * Changes and additions relating to SLiRP are
39 * Copyright (c) 1995 Danny Gasparovski.
41 * Please read the file COPYRIGHT for the
42 * terms and conditions of the copyright.
56 * IP initialization: fill in IP protocol switch table.
57 * All protocols not implemented in kernel go to raw IP protocol handler.
62 ipq
.next
= ipq
.prev
= (ipqp_32
)&ipq
;
63 ip_id
= tt
.tv_sec
& 0xffff;
70 * Ip input routine. Checksum and byte swap header. If fragmented
71 * try to reassemble. Process options. Pass to next level.
74 ip_input(struct mbuf
*m
)
76 register struct ip
*ip
;
79 DEBUG_CALL("ip_input");
80 DEBUG_ARG("m = %lx", (long)m
);
81 DEBUG_ARG("m_len = %d", m
->m_len
);
85 if (m
->m_len
< sizeof (struct ip
)) {
86 ipstat
.ips_toosmall
++;
90 ip
= mtod(m
, struct ip
*);
92 if (ip
->ip_v
!= IPVERSION
) {
94 //printf("bad version %d \n",ip->ip_v);
98 hlen
= ip
->ip_hl
<< 2;
99 if (hlen
<sizeof(struct ip
) || hlen
>m
->m_len
) {/* min header length */
100 ipstat
.ips_badhlen
++; /* or packet too short */
101 //printf("bad length\n");
105 /* keep ip header intact for ICMP reply
106 * ip->ip_sum = cksum(m, hlen);
111 //printf("bad sum\n");
116 * Convert fields to host representation.
119 if (ip
->ip_len
< hlen
) {
127 * Check that the amount of data in the buffers
128 * is as at least much as the IP header would have us expect.
129 * Trim mbufs if longer than we expect.
130 * Drop packet if shorter than we expect.
132 if (m
->m_len
< ip
->ip_len
) {
133 ipstat
.ips_tooshort
++;
134 //printf("bad shortx\n");
138 /* Should drop packet if mbuf too long? hmmm... */
139 if (m
->m_len
> ip
->ip_len
)
140 m_adj(m
, ip
->ip_len
- m
->m_len
);
142 /* check ip_ttl for a correct ICMP reply */
143 if(ip
->ip_ttl
==0 || ip
->ip_ttl
==1) {
144 icmp_error(m
, ICMP_TIMXCEED
,ICMP_TIMXCEED_INTRANS
, 0,"ttl");
145 //printf("bad ttl\n");
150 * Process options and, if not destined for us,
151 * ship it on. ip_dooptions returns 1 when an
152 * error was detected (causing an icmp message
153 * to be sent and the original packet to be freed).
155 /* We do no IP options */
156 /* if (hlen > sizeof (struct ip) && ip_dooptions(m))
160 * If offset or IP_MF are set, must reassemble.
161 * Otherwise, nothing need be done.
162 * (We could look in the reassembly queue to see
163 * if the packet was previously fragmented,
164 * but it's not worth the time; just let them time out.)
166 * XXX This should fail, don't fragment yet
168 if (ip
->ip_off
&~ IP_DF
) {
169 register struct ipq
*fp
;
171 * Look for queue of fragments
174 for (fp
= (struct ipq
*) ipq
.next
; fp
!= &ipq
;
175 fp
= (struct ipq
*) fp
->next
)
176 if (ip
->ip_id
== fp
->ipq_id
&&
177 ip
->ip_src
.s_addr
== fp
->ipq_src
.s_addr
&&
178 ip
->ip_dst
.s_addr
== fp
->ipq_dst
.s_addr
&&
179 ip
->ip_p
== fp
->ipq_p
)
185 * Adjust ip_len to not reflect header,
186 * set ip_mff if more fragments are expected,
187 * convert offset of this to bytes.
190 if (ip
->ip_off
& IP_MF
)
191 ((struct ipasfrag
*)ip
)->ipf_mff
|= 1;
193 ((struct ipasfrag
*)ip
)->ipf_mff
&= ~1;
198 * If datagram marked as having more fragments
199 * or if this is not the first fragment,
200 * attempt reassembly; if it succeeds, proceed.
202 if (((struct ipasfrag
*)ip
)->ipf_mff
& 1 || ip
->ip_off
) {
203 ipstat
.ips_fragments
++;
204 ip
= ip_reass((struct ipasfrag
*)ip
, fp
);
207 ipstat
.ips_reassembled
++;
217 * Switch out to protocol's input routine.
219 ipstat
.ips_delivered
++;
222 tcp_input(m
, hlen
, (struct socket
*)NULL
);
231 ipstat
.ips_noproto
++;
236 //printf("bad packet!\n");
242 * Take incoming datagram fragment and try to
243 * reassemble it into whole datagram. If a chain for
244 * reassembly of this datagram already exists, then it
245 * is given as fp; otherwise have to make a chain.
249 register struct ipasfrag
*ip
;
250 register struct ipq
*fp
;
252 register struct mbuf
*m
= dtom(ip
);
253 register struct ipasfrag
*q
;
254 int hlen
= ip
->ip_hl
<< 2;
257 DEBUG_CALL("ip_reass");
258 DEBUG_ARG("ip = %lx", (long)ip
);
259 DEBUG_ARG("fp = %lx", (long)fp
);
260 DEBUG_ARG("m = %lx", (long)m
);
263 * Presence of header sizes in mbufs
264 * would confuse code below.
265 * Fragment m_data is concatenated.
271 * If first fragment to arrive, create a reassembly queue.
275 if ((t
= m_get()) == NULL
) goto dropfrag
;
276 fp
= mtod(t
, struct ipq
*);
278 fp
->ipq_ttl
= IPFRAGTTL
;
279 fp
->ipq_p
= ip
->ip_p
;
280 fp
->ipq_id
= ip
->ip_id
;
281 fp
->ipq_next
= fp
->ipq_prev
= (ipasfragp_32
)fp
;
282 fp
->ipq_src
= ((struct ip
*)ip
)->ip_src
;
283 fp
->ipq_dst
= ((struct ip
*)ip
)->ip_dst
;
284 q
= (struct ipasfrag
*)fp
;
289 * Find a segment which begins after this one does.
291 for (q
= (struct ipasfrag
*)fp
->ipq_next
; q
!= (struct ipasfrag
*)fp
;
292 q
= (struct ipasfrag
*)q
->ipf_next
)
293 if (q
->ip_off
> ip
->ip_off
)
297 * If there is a preceding segment, it may provide some of
298 * our data already. If so, drop the data from the incoming
299 * segment. If it provides all of our data, drop us.
301 if (q
->ipf_prev
!= (ipasfragp_32
)fp
) {
302 i
= ((struct ipasfrag
*)(q
->ipf_prev
))->ip_off
+
303 ((struct ipasfrag
*)(q
->ipf_prev
))->ip_len
- ip
->ip_off
;
314 * While we overlap succeeding segments trim them or,
315 * if they are completely covered, dequeue them.
317 while (q
!= (struct ipasfrag
*)fp
&& ip
->ip_off
+ ip
->ip_len
> q
->ip_off
) {
318 i
= (ip
->ip_off
+ ip
->ip_len
) - q
->ip_off
;
325 q
= (struct ipasfrag
*) q
->ipf_next
;
326 m_freem(dtom((struct ipasfrag
*) q
->ipf_prev
));
327 ip_deq((struct ipasfrag
*) q
->ipf_prev
);
332 * Stick new segment in its place;
333 * check for complete reassembly.
335 ip_enq(ip
, (struct ipasfrag
*) q
->ipf_prev
);
337 for (q
= (struct ipasfrag
*) fp
->ipq_next
; q
!= (struct ipasfrag
*)fp
;
338 q
= (struct ipasfrag
*) q
->ipf_next
) {
339 if (q
->ip_off
!= next
)
343 if (((struct ipasfrag
*)(q
->ipf_prev
))->ipf_mff
& 1)
347 * Reassembly is complete; concatenate fragments.
349 q
= (struct ipasfrag
*) fp
->ipq_next
;
352 q
= (struct ipasfrag
*) q
->ipf_next
;
353 while (q
!= (struct ipasfrag
*)fp
) {
357 q
= (struct ipasfrag
*) q
->ipf_next
;
361 * Create header for new ip packet by
362 * modifying header of first packet;
363 * dequeue and discard fragment reassembly header.
364 * Make header visible.
366 ip
= (struct ipasfrag
*) fp
->ipq_next
;
369 * If the fragments concatenated to an mbuf that's
370 * bigger than the total size of the fragment, then and
371 * m_ext buffer was alloced. But fp->ipq_next points to
372 * the old buffer (in the mbuf), so we must point ip
373 * into the new buffer.
375 if (m
->m_flags
& M_EXT
) {
377 delta
= (char *)ip
- m
->m_dat
;
378 ip
= (struct ipasfrag
*)(m
->m_ext
+ delta
);
381 /* DEBUG_ARG("ip = %lx", (long)ip);
382 * ip=(struct ipasfrag *)m->m_data; */
386 ((struct ip
*)ip
)->ip_src
= fp
->ipq_src
;
387 ((struct ip
*)ip
)->ip_dst
= fp
->ipq_dst
;
389 (void) m_free(dtom(fp
));
391 m
->m_len
+= (ip
->ip_hl
<< 2);
392 m
->m_data
-= (ip
->ip_hl
<< 2);
394 return ((struct ip
*)ip
);
397 ipstat
.ips_fragdropped
++;
403 * Free a fragment reassembly header and all
404 * associated datagrams.
410 register struct ipasfrag
*q
, *p
;
412 for (q
= (struct ipasfrag
*) fp
->ipq_next
; q
!= (struct ipasfrag
*)fp
;
414 p
= (struct ipasfrag
*) q
->ipf_next
;
419 (void) m_free(dtom(fp
));
423 * Put an ip fragment on a reassembly chain.
424 * Like insque, but pointers in middle of structure.
428 register struct ipasfrag
*p
, *prev
;
430 DEBUG_CALL("ip_enq");
431 DEBUG_ARG("prev = %lx", (long)prev
);
432 p
->ipf_prev
= (ipasfragp_32
) prev
;
433 p
->ipf_next
= prev
->ipf_next
;
434 ((struct ipasfrag
*)(prev
->ipf_next
))->ipf_prev
= (ipasfragp_32
) p
;
435 prev
->ipf_next
= (ipasfragp_32
) p
;
439 * To ip_enq as remque is to insque.
443 register struct ipasfrag
*p
;
445 ((struct ipasfrag
*)(p
->ipf_prev
))->ipf_next
= p
->ipf_next
;
446 ((struct ipasfrag
*)(p
->ipf_next
))->ipf_prev
= p
->ipf_prev
;
450 * IP timer processing;
451 * if a timer expires on a reassembly
457 register struct ipq
*fp
;
459 DEBUG_CALL("ip_slowtimo");
461 fp
= (struct ipq
*) ipq
.next
;
467 fp
= (struct ipq
*) fp
->next
;
468 if (((struct ipq
*)(fp
->prev
))->ipq_ttl
== 0) {
469 ipstat
.ips_fragtimeout
++;
470 ip_freef((struct ipq
*) fp
->prev
);
476 * Do option processing on a datagram,
477 * possibly discarding it if bad options are encountered,
478 * or forwarding it if source-routed.
479 * Returns 1 if packet has been forwarded/freed,
480 * 0 if the packet should be processed further.
489 register struct ip
*ip
= mtod(m
, struct ip
*);
491 register struct ip_timestamp
*ipt
;
492 register struct in_ifaddr
*ia
;
493 /* int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0; */
494 int opt
, optlen
, cnt
, off
, code
, type
, forward
= 0;
495 struct in_addr
*sin
, dst
;
496 typedef u_int32_t n_time
;
500 cp
= (u_char
*)(ip
+ 1);
501 cnt
= (ip
->ip_hl
<< 2) - sizeof (struct ip
);
502 for (; cnt
> 0; cnt
-= optlen
, cp
+= optlen
) {
503 opt
= cp
[IPOPT_OPTVAL
];
504 if (opt
== IPOPT_EOL
)
506 if (opt
== IPOPT_NOP
)
509 optlen
= cp
[IPOPT_OLEN
];
510 if (optlen
<= 0 || optlen
> cnt
) {
511 code
= &cp
[IPOPT_OLEN
] - (u_char
*)ip
;
521 * Source routing with record.
522 * Find interface with current destination address.
523 * If none on this machine then drop if strictly routed,
524 * or do nothing if loosely routed.
525 * Record interface address and bring up next address
526 * component. If strictly routed make sure next
527 * address is on directly accessible net.
531 if ((off
= cp
[IPOPT_OFFSET
]) < IPOPT_MINOFF
) {
532 code
= &cp
[IPOPT_OFFSET
] - (u_char
*)ip
;
535 ipaddr
.sin_addr
= ip
->ip_dst
;
536 ia
= (struct in_ifaddr
*)
537 ifa_ifwithaddr((struct sockaddr
*)&ipaddr
);
539 if (opt
== IPOPT_SSRR
) {
541 code
= ICMP_UNREACH_SRCFAIL
;
545 * Loose routing, and not at next destination
546 * yet; nothing to do except forward.
550 off
--; / * 0 origin
* /
551 if (off
> optlen
- sizeof(struct in_addr
)) {
553 * End of source route. Should be for us.
555 save_rte(cp
, ip
->ip_src
);
559 * locate outgoing interface
561 bcopy((caddr_t
)(cp
+ off
), (caddr_t
)&ipaddr
.sin_addr
,
562 sizeof(ipaddr
.sin_addr
));
563 if (opt
== IPOPT_SSRR
) {
564 #define INA struct in_ifaddr *
565 #define SA struct sockaddr *
566 if ((ia
= (INA
)ifa_ifwithdstaddr((SA
)&ipaddr
)) == 0)
567 ia
= (INA
)ifa_ifwithnet((SA
)&ipaddr
);
569 ia
= ip_rtaddr(ipaddr
.sin_addr
);
572 code
= ICMP_UNREACH_SRCFAIL
;
575 ip
->ip_dst
= ipaddr
.sin_addr
;
576 bcopy((caddr_t
)&(IA_SIN(ia
)->sin_addr
),
577 (caddr_t
)(cp
+ off
), sizeof(struct in_addr
));
578 cp
[IPOPT_OFFSET
] += sizeof(struct in_addr
);
580 * Let ip_intr's mcast routing check handle mcast pkts
582 forward
= !IN_MULTICAST(ntohl(ip
->ip_dst
.s_addr
));
586 if ((off
= cp
[IPOPT_OFFSET
]) < IPOPT_MINOFF
) {
587 code
= &cp
[IPOPT_OFFSET
] - (u_char
*)ip
;
591 * If no space remains, ignore.
594 if (off
> optlen
- sizeof(struct in_addr
))
596 bcopy((caddr_t
)(&ip
->ip_dst
), (caddr_t
)&ipaddr
.sin_addr
,
597 sizeof(ipaddr
.sin_addr
));
599 * locate outgoing interface; if we're the destination,
600 * use the incoming interface (should be same).
602 if ((ia
= (INA
)ifa_ifwithaddr((SA
)&ipaddr
)) == 0 &&
603 (ia
= ip_rtaddr(ipaddr
.sin_addr
)) == 0) {
605 code
= ICMP_UNREACH_HOST
;
608 bcopy((caddr_t
)&(IA_SIN(ia
)->sin_addr
),
609 (caddr_t
)(cp
+ off
), sizeof(struct in_addr
));
610 cp
[IPOPT_OFFSET
] += sizeof(struct in_addr
);
614 code
= cp
- (u_char
*)ip
;
615 ipt
= (struct ip_timestamp
*)cp
;
616 if (ipt
->ipt_len
< 5)
618 if (ipt
->ipt_ptr
> ipt
->ipt_len
- sizeof (int32_t)) {
619 if (++ipt
->ipt_oflw
== 0)
623 sin
= (struct in_addr
*)(cp
+ ipt
->ipt_ptr
- 1);
624 switch (ipt
->ipt_flg
) {
626 case IPOPT_TS_TSONLY
:
629 case IPOPT_TS_TSANDADDR
:
630 if (ipt
->ipt_ptr
+ sizeof(n_time
) +
631 sizeof(struct in_addr
) > ipt
->ipt_len
)
633 ipaddr
.sin_addr
= dst
;
634 ia
= (INA
)ifaof_ i f p
foraddr((SA
)&ipaddr
,
638 bcopy((caddr_t
)&IA_SIN(ia
)->sin_addr
,
639 (caddr_t
)sin
, sizeof(struct in_addr
));
640 ipt
->ipt_ptr
+= sizeof(struct in_addr
);
643 case IPOPT_TS_PRESPEC
:
644 if (ipt
->ipt_ptr
+ sizeof(n_time
) +
645 sizeof(struct in_addr
) > ipt
->ipt_len
)
647 bcopy((caddr_t
)sin
, (caddr_t
)&ipaddr
.sin_addr
,
648 sizeof(struct in_addr
));
649 if (ifa_ifwithaddr((SA
)&ipaddr
) == 0)
651 ipt
->ipt_ptr
+= sizeof(struct in_addr
);
658 bcopy((caddr_t
)&ntime
, (caddr_t
)cp
+ ipt
->ipt_ptr
- 1,
660 ipt
->ipt_ptr
+= sizeof(n_time
);
671 /* ip->ip_len -= ip->ip_hl << 2; XXX icmp_error adds in hdr length */
674 icmp_error(m
, type
, code
, 0, 0);
676 ipstat
.ips_badoptions
++;
683 * Strip out IP options, at higher
684 * level protocol in the kernel.
685 * Second argument is buffer to which options
686 * will be moved, and return value is their length.
687 * (XXX) should be deleted; last arg currently ignored.
690 ip_stripoptions(m
, mopt
)
691 register struct mbuf
*m
;
695 struct ip
*ip
= mtod(m
, struct ip
*);
696 register caddr_t opts
;
699 olen
= (ip
->ip_hl
<<2) - sizeof (struct ip
);
700 opts
= (caddr_t
)(ip
+ 1);
701 i
= m
->m_len
- (sizeof (struct ip
) + olen
);
702 memcpy(opts
, opts
+ olen
, (unsigned)i
);
705 ip
->ip_hl
= sizeof(struct ip
) >> 2;