5734 IPFGENITER needs to know when to hit the brakes
[illumos-gate.git] / usr / src / uts / common / inet / ipf / ip_nat.c
blobb556d0983e3acffc4b44343ec53f1fc30df73ff0
1 /*
2 * Copyright (C) 1995-2004 by Darren Reed.
4 * See the IPFILTER.LICENCE file for details on licencing.
6 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
7 * Use is subject to license terms.
8 */
10 #if defined(KERNEL) || defined(_KERNEL)
11 # undef KERNEL
12 # undef _KERNEL
13 # define KERNEL 1
14 # define _KERNEL 1
15 #endif
16 #include <sys/errno.h>
17 #include <sys/types.h>
18 #include <sys/param.h>
19 #include <sys/time.h>
20 #include <sys/file.h>
21 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \
22 defined(_KERNEL)
23 # include "opt_ipfilter_log.h"
24 #endif
25 #if !defined(_KERNEL)
26 # include <stdio.h>
27 # include <string.h>
28 # include <stdlib.h>
29 # define _KERNEL
30 # ifdef __OpenBSD__
31 struct file;
32 # endif
33 # include <sys/uio.h>
34 # undef _KERNEL
35 #endif
36 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
37 # include <sys/filio.h>
38 # include <sys/fcntl.h>
39 #else
40 # include <sys/ioctl.h>
41 #endif
42 #if !defined(AIX)
43 # include <sys/fcntl.h>
44 #endif
45 #if !defined(linux)
46 # include <sys/protosw.h>
47 #endif
48 #include <sys/socket.h>
49 #if defined(_KERNEL)
50 # include <sys/systm.h>
51 # if !defined(__SVR4) && !defined(__svr4__)
52 # include <sys/mbuf.h>
53 # endif
54 #endif
55 #if defined(__SVR4) || defined(__svr4__)
56 # include <sys/filio.h>
57 # include <sys/byteorder.h>
58 # ifdef _KERNEL
59 # include <sys/dditypes.h>
60 # endif
61 # include <sys/stream.h>
62 # include <sys/kmem.h>
63 #endif
64 #if __FreeBSD_version >= 300000
65 # include <sys/queue.h>
66 #endif
67 #include <net/if.h>
68 #if __FreeBSD_version >= 300000
69 # include <net/if_var.h>
70 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
71 # include "opt_ipfilter.h"
72 # endif
73 #endif
74 #ifdef sun
75 # include <net/af.h>
76 #endif
77 #include <net/route.h>
78 #include <netinet/in.h>
79 #include <netinet/in_systm.h>
80 #include <netinet/ip.h>
82 #ifdef RFC1825
83 # include <vpn/md5.h>
84 # include <vpn/ipsec.h>
85 extern struct ifnet vpnif;
86 #endif
88 #if !defined(linux)
89 # include <netinet/ip_var.h>
90 #endif
91 #include <netinet/tcp.h>
92 #include <netinet/udp.h>
93 #include <netinet/ip_icmp.h>
94 #include "netinet/ip_compat.h"
95 #include <netinet/tcpip.h>
96 #include "netinet/ip_fil.h"
97 #include "netinet/ip_nat.h"
98 #include "netinet/ip_frag.h"
99 #include "netinet/ip_state.h"
100 #include "netinet/ip_proxy.h"
101 #include "netinet/ipf_stack.h"
102 #ifdef IPFILTER_SYNC
103 #include "netinet/ip_sync.h"
104 #endif
105 #if (__FreeBSD_version >= 300000)
106 # include <sys/malloc.h>
107 #endif
108 /* END OF INCLUDES */
110 #undef SOCKADDR_IN
111 #define SOCKADDR_IN struct sockaddr_in
113 #if !defined(lint)
114 static const char sccsid[] = "@(#)ip_nat.c 1.11 6/5/96 (C) 1995 Darren Reed";
115 static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.42 2005/08/11 19:51:36 darrenr Exp $";
116 #endif
119 /* ======================================================================== */
120 /* How the NAT is organised and works. */
121 /* */
122 /* Inside (interface y) NAT Outside (interface x) */
123 /* -------------------- -+- ------------------------------------- */
124 /* Packet going | out, processsed by fr_checknatout() for x */
125 /* ------------> | ------------> */
126 /* src=10.1.1.1 | src=192.1.1.1 */
127 /* | */
128 /* | in, processed by fr_checknatin() for x */
129 /* <------------ | <------------ */
130 /* dst=10.1.1.1 | dst=192.1.1.1 */
131 /* -------------------- -+- ------------------------------------- */
132 /* fr_checknatout() - changes ip_src and if required, sport */
133 /* - creates a new mapping, if required. */
134 /* fr_checknatin() - changes ip_dst and if required, dport */
135 /* */
136 /* In the NAT table, internal source is recorded as "in" and externally */
137 /* seen as "out". */
138 /* ======================================================================== */
141 static int nat_clearlist __P((ipf_stack_t *));
142 static void nat_addnat __P((struct ipnat *, ipf_stack_t *));
143 static void nat_addrdr __P((struct ipnat *, ipf_stack_t *));
144 static int fr_natgetent __P((caddr_t, ipf_stack_t *));
145 static int fr_natgetsz __P((caddr_t, ipf_stack_t *));
146 static int fr_natputent __P((caddr_t, int, ipf_stack_t *));
147 static void nat_tabmove __P((nat_t *, ipf_stack_t *));
148 static int nat_match __P((fr_info_t *, ipnat_t *));
149 static INLINE int nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *));
150 static INLINE int nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *));
151 static hostmap_t *nat_hostmap __P((ipnat_t *, struct in_addr,
152 struct in_addr, struct in_addr, u_32_t,
153 ipf_stack_t *));
154 static INLINE int nat_icmpquerytype4 __P((int));
155 static int nat_ruleaddrinit __P((ipnat_t *));
156 static int nat_siocaddnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *));
157 static void nat_siocdelnat __P((ipnat_t *, ipnat_t **, int, ipf_stack_t *));
158 static INLINE int nat_icmperrortype4 __P((int));
159 static INLINE int nat_finalise __P((fr_info_t *, nat_t *, natinfo_t *,
160 tcphdr_t *, nat_t **, int));
161 static INLINE int nat_resolverule __P((ipnat_t *, ipf_stack_t *));
162 static void nat_mssclamp __P((tcphdr_t *, u_32_t, u_short *));
163 static int nat_getnext __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
164 static int nat_iterator __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
165 static int nat_flushtable __P((int, ipf_stack_t *));
167 #define NAT_HAS_L4_CHANGED(n) \
168 (((n)->nat_flags & (IPN_TCPUDPICMP)) && \
169 (n)->nat_inport != (n)->nat_outport)
172 /* ------------------------------------------------------------------------ */
173 /* Function: fr_natinit */
174 /* Returns: int - 0 == success, -1 == failure */
175 /* Parameters: Nil */
176 /* */
177 /* Initialise all of the NAT locks, tables and other structures. */
178 /* ------------------------------------------------------------------------ */
179 int fr_natinit(ifs)
180 ipf_stack_t *ifs;
182 int i;
184 KMALLOCS(ifs->ifs_nat_table[0], nat_t **,
185 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
186 if (ifs->ifs_nat_table[0] != NULL)
187 bzero((char *)ifs->ifs_nat_table[0],
188 ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
189 else
190 return -1;
192 KMALLOCS(ifs->ifs_nat_table[1], nat_t **,
193 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
194 if (ifs->ifs_nat_table[1] != NULL)
195 bzero((char *)ifs->ifs_nat_table[1],
196 ifs->ifs_ipf_nattable_sz * sizeof(nat_t *));
197 else
198 return -2;
200 KMALLOCS(ifs->ifs_nat_rules, ipnat_t **,
201 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
202 if (ifs->ifs_nat_rules != NULL)
203 bzero((char *)ifs->ifs_nat_rules,
204 ifs->ifs_ipf_natrules_sz * sizeof(ipnat_t *));
205 else
206 return -3;
208 KMALLOCS(ifs->ifs_rdr_rules, ipnat_t **,
209 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
210 if (ifs->ifs_rdr_rules != NULL)
211 bzero((char *)ifs->ifs_rdr_rules,
212 ifs->ifs_ipf_rdrrules_sz * sizeof(ipnat_t *));
213 else
214 return -4;
216 KMALLOCS(ifs->ifs_maptable, hostmap_t **,
217 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
218 if (ifs->ifs_maptable != NULL)
219 bzero((char *)ifs->ifs_maptable,
220 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
221 else
222 return -5;
224 ifs->ifs_ipf_hm_maplist = NULL;
226 KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[0], u_long *,
227 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
228 if (ifs->ifs_nat_stats.ns_bucketlen[0] == NULL)
229 return -1;
230 bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[0],
231 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
233 KMALLOCS(ifs->ifs_nat_stats.ns_bucketlen[1], u_long *,
234 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
235 if (ifs->ifs_nat_stats.ns_bucketlen[1] == NULL)
236 return -1;
237 bzero((char *)ifs->ifs_nat_stats.ns_bucketlen[1],
238 ifs->ifs_ipf_nattable_sz * sizeof(u_long));
240 if (ifs->ifs_fr_nat_maxbucket == 0) {
241 for (i = ifs->ifs_ipf_nattable_sz; i > 0; i >>= 1)
242 ifs->ifs_fr_nat_maxbucket++;
243 ifs->ifs_fr_nat_maxbucket *= 2;
246 fr_sttab_init(ifs->ifs_nat_tqb, ifs);
248 * Increase this because we may have "keep state" following this too
249 * and packet storms can occur if this is removed too quickly.
251 ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcplastack;
252 ifs->ifs_nat_tqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_nat_udptq;
253 ifs->ifs_nat_udptq.ifq_ttl = ifs->ifs_fr_defnatage;
254 ifs->ifs_nat_udptq.ifq_ref = 1;
255 ifs->ifs_nat_udptq.ifq_head = NULL;
256 ifs->ifs_nat_udptq.ifq_tail = &ifs->ifs_nat_udptq.ifq_head;
257 MUTEX_INIT(&ifs->ifs_nat_udptq.ifq_lock, "nat ipftq udp tab");
258 ifs->ifs_nat_udptq.ifq_next = &ifs->ifs_nat_icmptq;
259 ifs->ifs_nat_icmptq.ifq_ttl = ifs->ifs_fr_defnaticmpage;
260 ifs->ifs_nat_icmptq.ifq_ref = 1;
261 ifs->ifs_nat_icmptq.ifq_head = NULL;
262 ifs->ifs_nat_icmptq.ifq_tail = &ifs->ifs_nat_icmptq.ifq_head;
263 MUTEX_INIT(&ifs->ifs_nat_icmptq.ifq_lock, "nat icmp ipftq tab");
264 ifs->ifs_nat_icmptq.ifq_next = &ifs->ifs_nat_iptq;
265 ifs->ifs_nat_iptq.ifq_ttl = ifs->ifs_fr_defnatipage;
266 ifs->ifs_nat_iptq.ifq_ref = 1;
267 ifs->ifs_nat_iptq.ifq_head = NULL;
268 ifs->ifs_nat_iptq.ifq_tail = &ifs->ifs_nat_iptq.ifq_head;
269 MUTEX_INIT(&ifs->ifs_nat_iptq.ifq_lock, "nat ip ipftq tab");
270 ifs->ifs_nat_iptq.ifq_next = NULL;
272 for (i = 0; i < IPF_TCP_NSTATES; i++) {
273 if (ifs->ifs_nat_tqb[i].ifq_ttl < ifs->ifs_fr_defnaticmpage)
274 ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnaticmpage;
275 #ifdef LARGE_NAT
276 else if (ifs->ifs_nat_tqb[i].ifq_ttl > ifs->ifs_fr_defnatage)
277 ifs->ifs_nat_tqb[i].ifq_ttl = ifs->ifs_fr_defnatage;
278 #endif
282 * Increase this because we may have "keep state" following
283 * this too and packet storms can occur if this is removed
284 * too quickly.
286 ifs->ifs_nat_tqb[IPF_TCPS_CLOSED].ifq_ttl =
287 ifs->ifs_nat_tqb[IPF_TCPS_LAST_ACK].ifq_ttl;
289 RWLOCK_INIT(&ifs->ifs_ipf_nat, "ipf IP NAT rwlock");
290 RWLOCK_INIT(&ifs->ifs_ipf_natfrag, "ipf IP NAT-Frag rwlock");
291 MUTEX_INIT(&ifs->ifs_ipf_nat_new, "ipf nat new mutex");
292 MUTEX_INIT(&ifs->ifs_ipf_natio, "ipf nat io mutex");
294 ifs->ifs_fr_nat_init = 1;
295 ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks;
296 return 0;
300 /* ------------------------------------------------------------------------ */
301 /* Function: nat_addrdr */
302 /* Returns: Nil */
303 /* Parameters: n(I) - pointer to NAT rule to add */
304 /* */
305 /* Adds a redirect rule to the hash table of redirect rules and the list of */
306 /* loaded NAT rules. Updates the bitmask indicating which netmasks are in */
307 /* use by redirect rules. */
308 /* ------------------------------------------------------------------------ */
309 static void nat_addrdr(n, ifs)
310 ipnat_t *n;
311 ipf_stack_t *ifs;
313 ipnat_t **np;
314 u_32_t j;
315 u_int hv;
316 int k;
318 k = count4bits(n->in_outmsk);
319 if ((k >= 0) && (k != 32))
320 ifs->ifs_rdr_masks |= 1 << k;
321 j = (n->in_outip & n->in_outmsk);
322 hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_rdrrules_sz);
323 np = ifs->ifs_rdr_rules + hv;
324 while (*np != NULL)
325 np = &(*np)->in_rnext;
326 n->in_rnext = NULL;
327 n->in_prnext = np;
328 n->in_hv = hv;
329 *np = n;
333 /* ------------------------------------------------------------------------ */
334 /* Function: nat_addnat */
335 /* Returns: Nil */
336 /* Parameters: n(I) - pointer to NAT rule to add */
337 /* */
338 /* Adds a NAT map rule to the hash table of rules and the list of loaded */
339 /* NAT rules. Updates the bitmask indicating which netmasks are in use by */
340 /* redirect rules. */
341 /* ------------------------------------------------------------------------ */
342 static void nat_addnat(n, ifs)
343 ipnat_t *n;
344 ipf_stack_t *ifs;
346 ipnat_t **np;
347 u_32_t j;
348 u_int hv;
349 int k;
351 k = count4bits(n->in_inmsk);
352 if ((k >= 0) && (k != 32))
353 ifs->ifs_nat_masks |= 1 << k;
354 j = (n->in_inip & n->in_inmsk);
355 hv = NAT_HASH_FN(j, 0, ifs->ifs_ipf_natrules_sz);
356 np = ifs->ifs_nat_rules + hv;
357 while (*np != NULL)
358 np = &(*np)->in_mnext;
359 n->in_mnext = NULL;
360 n->in_pmnext = np;
361 n->in_hv = hv;
362 *np = n;
366 /* ------------------------------------------------------------------------ */
367 /* Function: nat_delrdr */
368 /* Returns: Nil */
369 /* Parameters: n(I) - pointer to NAT rule to delete */
370 /* */
371 /* Removes a redirect rule from the hash table of redirect rules. */
372 /* ------------------------------------------------------------------------ */
373 void nat_delrdr(n)
374 ipnat_t *n;
376 if (n->in_rnext)
377 n->in_rnext->in_prnext = n->in_prnext;
378 *n->in_prnext = n->in_rnext;
382 /* ------------------------------------------------------------------------ */
383 /* Function: nat_delnat */
384 /* Returns: Nil */
385 /* Parameters: n(I) - pointer to NAT rule to delete */
386 /* */
387 /* Removes a NAT map rule from the hash table of NAT map rules. */
388 /* ------------------------------------------------------------------------ */
389 void nat_delnat(n)
390 ipnat_t *n;
392 if (n->in_mnext != NULL)
393 n->in_mnext->in_pmnext = n->in_pmnext;
394 *n->in_pmnext = n->in_mnext;
398 /* ------------------------------------------------------------------------ */
399 /* Function: nat_hostmap */
400 /* Returns: struct hostmap* - NULL if no hostmap could be created, */
401 /* else a pointer to the hostmapping to use */
402 /* Parameters: np(I) - pointer to NAT rule */
403 /* real(I) - real IP address */
404 /* map(I) - mapped IP address */
405 /* port(I) - destination port number */
406 /* Write Locks: ipf_nat */
407 /* */
408 /* Check if an ip address has already been allocated for a given mapping */
409 /* that is not doing port based translation. If is not yet allocated, then */
410 /* create a new entry if a non-NULL NAT rule pointer has been supplied. */
411 /* ------------------------------------------------------------------------ */
412 static struct hostmap *nat_hostmap(np, src, dst, map, port, ifs)
413 ipnat_t *np;
414 struct in_addr src;
415 struct in_addr dst;
416 struct in_addr map;
417 u_32_t port;
418 ipf_stack_t *ifs;
420 hostmap_t *hm;
421 u_int hv;
423 hv = (src.s_addr ^ dst.s_addr);
424 hv += src.s_addr;
425 hv += dst.s_addr;
426 hv %= HOSTMAP_SIZE;
427 for (hm = ifs->ifs_maptable[hv]; hm; hm = hm->hm_next)
428 if ((hm->hm_srcip.s_addr == src.s_addr) &&
429 (hm->hm_dstip.s_addr == dst.s_addr) &&
430 ((np == NULL) || (np == hm->hm_ipnat)) &&
431 ((port == 0) || (port == hm->hm_port))) {
432 hm->hm_ref++;
433 return hm;
436 if (np == NULL)
437 return NULL;
439 KMALLOC(hm, hostmap_t *);
440 if (hm) {
441 hm->hm_hnext = ifs->ifs_ipf_hm_maplist;
442 hm->hm_phnext = &ifs->ifs_ipf_hm_maplist;
443 if (ifs->ifs_ipf_hm_maplist != NULL)
444 ifs->ifs_ipf_hm_maplist->hm_phnext = &hm->hm_hnext;
445 ifs->ifs_ipf_hm_maplist = hm;
447 hm->hm_next = ifs->ifs_maptable[hv];
448 hm->hm_pnext = ifs->ifs_maptable + hv;
449 if (ifs->ifs_maptable[hv] != NULL)
450 ifs->ifs_maptable[hv]->hm_pnext = &hm->hm_next;
451 ifs->ifs_maptable[hv] = hm;
452 hm->hm_ipnat = np;
453 hm->hm_srcip = src;
454 hm->hm_dstip = dst;
455 hm->hm_mapip = map;
456 hm->hm_ref = 1;
457 hm->hm_port = port;
458 hm->hm_v = 4;
460 return hm;
464 /* ------------------------------------------------------------------------ */
465 /* Function: fr_hostmapdel */
466 /* Returns: Nil */
467 /* Parameters: hmp(I) - pointer to pointer to hostmap structure */
468 /* Write Locks: ipf_nat */
469 /* */
470 /* Decrement the references to this hostmap structure by one. If this */
471 /* reaches zero then remove it and free it. */
472 /* ------------------------------------------------------------------------ */
473 void fr_hostmapdel(hmp)
474 struct hostmap **hmp;
476 struct hostmap *hm;
478 hm = *hmp;
479 *hmp = NULL;
481 hm->hm_ref--;
482 if (hm->hm_ref == 0) {
483 if (hm->hm_next)
484 hm->hm_next->hm_pnext = hm->hm_pnext;
485 *hm->hm_pnext = hm->hm_next;
486 if (hm->hm_hnext)
487 hm->hm_hnext->hm_phnext = hm->hm_phnext;
488 *hm->hm_phnext = hm->hm_hnext;
489 KFREE(hm);
494 /* ------------------------------------------------------------------------ */
495 /* Function: fix_outcksum */
496 /* Returns: Nil */
497 /* Parameters: sp(I) - location of 16bit checksum to update */
498 /* n((I) - amount to adjust checksum by */
499 /* */
500 /* Adjusts the 16bit checksum by "n" for packets going out. */
501 /* ------------------------------------------------------------------------ */
502 void fix_outcksum(sp, n)
503 u_short *sp;
504 u_32_t n;
506 u_short sumshort;
507 u_32_t sum1;
509 if (n == 0)
510 return;
512 sum1 = (~ntohs(*sp)) & 0xffff;
513 sum1 += (n);
514 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
515 /* Again */
516 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
517 sumshort = ~(u_short)sum1;
518 *(sp) = htons(sumshort);
522 /* ------------------------------------------------------------------------ */
523 /* Function: fix_incksum */
524 /* Returns: Nil */
525 /* Parameters: sp(I) - location of 16bit checksum to update */
526 /* n((I) - amount to adjust checksum by */
527 /* */
528 /* Adjusts the 16bit checksum by "n" for packets going in. */
529 /* ------------------------------------------------------------------------ */
530 void fix_incksum(sp, n)
531 u_short *sp;
532 u_32_t n;
534 u_short sumshort;
535 u_32_t sum1;
537 if (n == 0)
538 return;
540 sum1 = (~ntohs(*sp)) & 0xffff;
541 sum1 += ~(n) & 0xffff;
542 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
543 /* Again */
544 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
545 sumshort = ~(u_short)sum1;
546 *(sp) = htons(sumshort);
550 /* ------------------------------------------------------------------------ */
551 /* Function: fix_datacksum */
552 /* Returns: Nil */
553 /* Parameters: sp(I) - location of 16bit checksum to update */
554 /* n((I) - amount to adjust checksum by */
555 /* */
556 /* Fix_datacksum is used *only* for the adjustments of checksums in the */
557 /* data section of an IP packet. */
558 /* */
559 /* The only situation in which you need to do this is when NAT'ing an */
560 /* ICMP error message. Such a message, contains in its body the IP header */
561 /* of the original IP packet, that causes the error. */
562 /* */
563 /* You can't use fix_incksum or fix_outcksum in that case, because for the */
564 /* kernel the data section of the ICMP error is just data, and no special */
565 /* processing like hardware cksum or ntohs processing have been done by the */
566 /* kernel on the data section. */
567 /* ------------------------------------------------------------------------ */
568 void fix_datacksum(sp, n)
569 u_short *sp;
570 u_32_t n;
572 u_short sumshort;
573 u_32_t sum1;
575 if (n == 0)
576 return;
578 sum1 = (~ntohs(*sp)) & 0xffff;
579 sum1 += (n);
580 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
581 /* Again */
582 sum1 = (sum1 >> 16) + (sum1 & 0xffff);
583 sumshort = ~(u_short)sum1;
584 *(sp) = htons(sumshort);
588 /* ------------------------------------------------------------------------ */
589 /* Function: fr_nat_ioctl */
590 /* Returns: int - 0 == success, != 0 == failure */
591 /* Parameters: data(I) - pointer to ioctl data */
592 /* cmd(I) - ioctl command integer */
593 /* mode(I) - file mode bits used with open */
594 /* uid(I) - uid of caller */
595 /* ctx(I) - pointer to give the uid context */
596 /* ifs - ipf stack instance */
597 /* */
598 /* Processes an ioctl call made to operate on the IP Filter NAT device. */
599 /* ------------------------------------------------------------------------ */
600 int fr_nat_ioctl(data, cmd, mode, uid, ctx, ifs)
601 ioctlcmd_t cmd;
602 caddr_t data;
603 int mode, uid;
604 void *ctx;
605 ipf_stack_t *ifs;
607 ipnat_t *nat, *nt, *n = NULL, **np = NULL;
608 int error = 0, ret, arg, getlock;
609 ipnat_t natd;
611 #if (BSD >= 199306) && defined(_KERNEL)
612 if ((securelevel >= 2) && (mode & FWRITE))
613 return EPERM;
614 #endif
616 #if defined(__osf__) && defined(_KERNEL)
617 getlock = 0;
618 #else
619 getlock = (mode & NAT_LOCKHELD) ? 0 : 1;
620 #endif
622 nat = NULL; /* XXX gcc -Wuninitialized */
623 if (cmd == (ioctlcmd_t)SIOCADNAT) {
624 KMALLOC(nt, ipnat_t *);
625 } else {
626 nt = NULL;
629 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
630 if (mode & NAT_SYSSPACE) {
631 bcopy(data, (char *)&natd, sizeof(natd));
632 error = 0;
633 } else {
634 error = fr_inobj(data, &natd, IPFOBJ_IPNAT);
639 if (error != 0)
640 goto done;
643 * For add/delete, look to see if the NAT entry is already present
645 if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT)) {
646 nat = &natd;
647 if (nat->in_v == 0) /* For backward compat. */
648 nat->in_v = 4;
649 nat->in_flags &= IPN_USERFLAGS;
650 if ((nat->in_redir & NAT_MAPBLK) == 0) {
651 if ((nat->in_flags & IPN_SPLIT) == 0)
652 nat->in_inip &= nat->in_inmsk;
653 if ((nat->in_flags & IPN_IPRANGE) == 0)
654 nat->in_outip &= nat->in_outmsk;
656 MUTEX_ENTER(&ifs->ifs_ipf_natio);
657 for (np = &ifs->ifs_nat_list; ((n = *np) != NULL);
658 np = &n->in_next)
659 if (bcmp((char *)&nat->in_flags, (char *)&n->in_flags,
660 IPN_CMPSIZ) == 0) {
661 if (nat->in_redir == NAT_REDIRECT &&
662 nat->in_pnext != n->in_pnext)
663 continue;
664 break;
668 switch (cmd)
670 case SIOCGENITER :
672 ipfgeniter_t iter;
673 ipftoken_t *token;
675 error = fr_inobj(data, &iter, IPFOBJ_GENITER);
676 if (error != 0)
677 break;
679 token = ipf_findtoken(iter.igi_type, uid, ctx, ifs);
680 if (token != NULL)
681 error = nat_iterator(token, &iter, ifs);
682 else
683 error = ESRCH;
684 RWLOCK_EXIT(&ifs->ifs_ipf_tokens);
685 break;
687 #ifdef IPFILTER_LOG
688 case SIOCIPFFB :
690 int tmp;
692 if (!(mode & FWRITE))
693 error = EPERM;
694 else {
695 tmp = ipflog_clear(IPL_LOGNAT, ifs);
696 error = BCOPYOUT((char *)&tmp, (char *)data,
697 sizeof(tmp));
698 if (error != 0)
699 error = EFAULT;
701 break;
703 case SIOCSETLG :
704 if (!(mode & FWRITE)) {
705 error = EPERM;
706 } else {
707 error = BCOPYIN((char *)data,
708 (char *)&ifs->ifs_nat_logging,
709 sizeof(ifs->ifs_nat_logging));
710 if (error != 0)
711 error = EFAULT;
713 break;
714 case SIOCGETLG :
715 error = BCOPYOUT((char *)&ifs->ifs_nat_logging, (char *)data,
716 sizeof(ifs->ifs_nat_logging));
717 if (error != 0)
718 error = EFAULT;
719 break;
720 case FIONREAD :
721 arg = ifs->ifs_iplused[IPL_LOGNAT];
722 error = BCOPYOUT(&arg, data, sizeof(arg));
723 if (error != 0)
724 error = EFAULT;
725 break;
726 #endif
727 case SIOCADNAT :
728 if (!(mode & FWRITE)) {
729 error = EPERM;
730 } else if (n != NULL) {
731 error = EEXIST;
732 } else if (nt == NULL) {
733 error = ENOMEM;
735 if (error != 0) {
736 MUTEX_EXIT(&ifs->ifs_ipf_natio);
737 break;
739 bcopy((char *)nat, (char *)nt, sizeof(*n));
740 error = nat_siocaddnat(nt, np, getlock, ifs);
741 MUTEX_EXIT(&ifs->ifs_ipf_natio);
742 if (error == 0)
743 nt = NULL;
744 break;
745 case SIOCRMNAT :
746 if (!(mode & FWRITE)) {
747 error = EPERM;
748 n = NULL;
749 } else if (n == NULL) {
750 error = ESRCH;
753 if (error != 0) {
754 MUTEX_EXIT(&ifs->ifs_ipf_natio);
755 break;
757 nat_siocdelnat(n, np, getlock, ifs);
759 MUTEX_EXIT(&ifs->ifs_ipf_natio);
760 n = NULL;
761 break;
762 case SIOCGNATS :
763 ifs->ifs_nat_stats.ns_table[0] = ifs->ifs_nat_table[0];
764 ifs->ifs_nat_stats.ns_table[1] = ifs->ifs_nat_table[1];
765 ifs->ifs_nat_stats.ns_list = ifs->ifs_nat_list;
766 ifs->ifs_nat_stats.ns_maptable = ifs->ifs_maptable;
767 ifs->ifs_nat_stats.ns_maplist = ifs->ifs_ipf_hm_maplist;
768 ifs->ifs_nat_stats.ns_nattab_max = ifs->ifs_ipf_nattable_max;
769 ifs->ifs_nat_stats.ns_nattab_sz = ifs->ifs_ipf_nattable_sz;
770 ifs->ifs_nat_stats.ns_rultab_sz = ifs->ifs_ipf_natrules_sz;
771 ifs->ifs_nat_stats.ns_rdrtab_sz = ifs->ifs_ipf_rdrrules_sz;
772 ifs->ifs_nat_stats.ns_hostmap_sz = ifs->ifs_ipf_hostmap_sz;
773 ifs->ifs_nat_stats.ns_instances = ifs->ifs_nat_instances;
774 ifs->ifs_nat_stats.ns_apslist = ifs->ifs_ap_sess_list;
775 error = fr_outobj(data, &ifs->ifs_nat_stats, IPFOBJ_NATSTAT);
776 break;
777 case SIOCGNATL :
779 natlookup_t nl;
781 if (getlock) {
782 READ_ENTER(&ifs->ifs_ipf_nat);
784 error = fr_inobj(data, &nl, IPFOBJ_NATLOOKUP);
785 if (nl.nl_v != 6)
786 nl.nl_v = 4;
787 if (error == 0) {
788 void *ptr;
790 switch (nl.nl_v)
792 case 4:
793 ptr = nat_lookupredir(&nl, ifs);
794 break;
795 #ifdef USE_INET6
796 case 6:
797 ptr = nat6_lookupredir(&nl, ifs);
798 break;
799 #endif
800 default:
801 ptr = NULL;
802 break;
805 if (ptr != NULL) {
806 error = fr_outobj(data, &nl, IPFOBJ_NATLOOKUP);
807 } else {
808 error = ESRCH;
811 if (getlock) {
812 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
814 break;
816 case SIOCIPFFL : /* old SIOCFLNAT & SIOCCNATL */
817 if (!(mode & FWRITE)) {
818 error = EPERM;
819 break;
821 if (getlock) {
822 WRITE_ENTER(&ifs->ifs_ipf_nat);
824 error = BCOPYIN(data, &arg, sizeof(arg));
825 if (error != 0) {
826 error = EFAULT;
827 } else {
828 if (arg == FLUSH_LIST)
829 ret = nat_clearlist(ifs);
830 else if (VALID_TABLE_FLUSH_OPT(arg))
831 ret = nat_flushtable(arg, ifs);
832 else
833 error = EINVAL;
835 if (getlock) {
836 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
838 if (error == 0) {
839 error = BCOPYOUT(&ret, data, sizeof(ret));
840 if (error != 0)
841 error = EFAULT;
843 break;
844 case SIOCPROXY :
845 error = appr_ioctl(data, cmd, mode, ifs);
846 break;
847 case SIOCSTLCK :
848 if (!(mode & FWRITE)) {
849 error = EPERM;
850 } else {
851 error = fr_lock(data, &ifs->ifs_fr_nat_lock);
853 break;
854 case SIOCSTPUT :
855 if ((mode & FWRITE) != 0) {
856 error = fr_natputent(data, getlock, ifs);
857 } else {
858 error = EACCES;
860 break;
861 case SIOCSTGSZ :
862 if (ifs->ifs_fr_nat_lock) {
863 if (getlock) {
864 READ_ENTER(&ifs->ifs_ipf_nat);
866 error = fr_natgetsz(data, ifs);
867 if (getlock) {
868 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
870 } else
871 error = EACCES;
872 break;
873 case SIOCSTGET :
874 if (ifs->ifs_fr_nat_lock) {
875 if (getlock) {
876 READ_ENTER(&ifs->ifs_ipf_nat);
878 error = fr_natgetent(data, ifs);
879 if (getlock) {
880 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
882 } else
883 error = EACCES;
884 break;
885 case SIOCIPFDELTOK :
886 error = BCOPYIN((caddr_t)data, (caddr_t)&arg, sizeof(arg));
887 if (error != 0) {
888 error = EFAULT;
889 } else {
890 error = ipf_deltoken(arg, uid, ctx, ifs);
892 break;
893 default :
894 error = EINVAL;
895 break;
897 done:
898 if (nt)
899 KFREE(nt);
900 return error;
904 /* ------------------------------------------------------------------------ */
905 /* Function: nat_siocaddnat */
906 /* Returns: int - 0 == success, != 0 == failure */
907 /* Parameters: n(I) - pointer to new NAT rule */
908 /* np(I) - pointer to where to insert new NAT rule */
909 /* getlock(I) - flag indicating if lock on ipf_nat is held */
910 /* Mutex Locks: ipf_natio */
911 /* */
912 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */
913 /* from information passed to the kernel, then add it to the appropriate */
914 /* NAT rule table(s). */
915 /* ------------------------------------------------------------------------ */
916 static int nat_siocaddnat(n, np, getlock, ifs)
917 ipnat_t *n, **np;
918 int getlock;
919 ipf_stack_t *ifs;
921 int error = 0, i, j;
923 if (nat_resolverule(n, ifs) != 0)
924 return ENOENT;
926 if ((n->in_age[0] == 0) && (n->in_age[1] != 0))
927 return EINVAL;
929 n->in_use = 0;
930 if (n->in_redir & NAT_MAPBLK)
931 n->in_space = USABLE_PORTS * ~ntohl(n->in_outmsk);
932 else if (n->in_flags & IPN_AUTOPORTMAP)
933 n->in_space = USABLE_PORTS * ~ntohl(n->in_inmsk);
934 else if (n->in_flags & IPN_IPRANGE)
935 n->in_space = ntohl(n->in_outmsk) - ntohl(n->in_outip);
936 else if (n->in_flags & IPN_SPLIT)
937 n->in_space = 2;
938 else if (n->in_outmsk != 0)
939 n->in_space = ~ntohl(n->in_outmsk);
940 else
941 n->in_space = 1;
942 if ((n->in_flags & NAT_TCPUDPICMPQ) && (n->in_redir != NAT_REDIRECT)) {
943 if (ntohs(n->in_pmax) < ntohs(n->in_pmin))
944 return EINVAL;
948 * Calculate the number of valid IP addresses in the output
949 * mapping range. In all cases, the range is inclusive of
950 * the start and ending IP addresses.
951 * If to a CIDR address, lose 2: broadcast + network address
952 * (so subtract 1)
953 * If to a range, add one.
954 * If to a single IP address, set to 1.
956 if (n->in_space) {
957 if ((n->in_flags & IPN_IPRANGE) != 0)
958 n->in_space += 1;
959 else
960 n->in_space -= 1;
961 } else
962 n->in_space = 1;
964 #ifdef USE_INET6
965 if (n->in_v == 6 && (n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0 &&
966 !IP6_ISONES(&n->in_out[1]) && !IP6_ISZERO(&n->in_out[1]))
967 IP6_ADD(&n->in_out[0], 1, &n->in_next6)
968 else if (n->in_v == 6 &&
969 (n->in_flags & IPN_SPLIT) && (n->in_redir & NAT_REDIRECT))
970 n->in_next6 = n->in_in[0];
971 else if (n->in_v == 6)
972 n->in_next6 = n->in_out[0];
973 else
974 #endif
975 if ((n->in_outmsk != 0xffffffff) && (n->in_outmsk != 0) &&
976 ((n->in_flags & (IPN_IPRANGE|IPN_SPLIT)) == 0))
977 n->in_nip = ntohl(n->in_outip) + 1;
978 else if ((n->in_flags & IPN_SPLIT) &&
979 (n->in_redir & NAT_REDIRECT))
980 n->in_nip = ntohl(n->in_inip);
981 else
982 n->in_nip = ntohl(n->in_outip);
984 if (n->in_redir & NAT_MAP) {
985 n->in_pnext = ntohs(n->in_pmin);
987 * Multiply by the number of ports made available.
989 if (ntohs(n->in_pmax) >= ntohs(n->in_pmin)) {
990 n->in_space *= (ntohs(n->in_pmax) -
991 ntohs(n->in_pmin) + 1);
993 * Because two different sources can map to
994 * different destinations but use the same
995 * local IP#/port #.
996 * If the result is smaller than in_space, then
997 * we may have wrapped around 32bits.
999 i = n->in_inmsk;
1000 if ((i != 0) && (i != 0xffffffff)) {
1001 j = n->in_space * (~ntohl(i) + 1);
1002 if (j >= n->in_space)
1003 n->in_space = j;
1004 else
1005 n->in_space = 0xffffffff;
1009 * If no protocol is specified, multiple by 256 to allow for
1010 * at least one IP:IP mapping per protocol.
1012 if ((n->in_flags & IPN_TCPUDPICMP) == 0) {
1013 j = n->in_space * 256;
1014 if (j >= n->in_space)
1015 n->in_space = j;
1016 else
1017 n->in_space = 0xffffffff;
1021 /* Otherwise, these fields are preset */
1023 if (getlock) {
1024 WRITE_ENTER(&ifs->ifs_ipf_nat);
1026 n->in_next = NULL;
1027 *np = n;
1029 if (n->in_age[0] != 0)
1030 n->in_tqehead[0] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
1031 n->in_age[0], ifs);
1033 if (n->in_age[1] != 0)
1034 n->in_tqehead[1] = fr_addtimeoutqueue(&ifs->ifs_nat_utqe,
1035 n->in_age[1], ifs);
1037 if (n->in_redir & NAT_REDIRECT) {
1038 n->in_flags &= ~IPN_NOTDST;
1039 switch (n->in_v)
1041 case 4 :
1042 nat_addrdr(n, ifs);
1043 break;
1044 #ifdef USE_INET6
1045 case 6 :
1046 nat6_addrdr(n, ifs);
1047 break;
1048 #endif
1049 default :
1050 break;
1053 if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) {
1054 n->in_flags &= ~IPN_NOTSRC;
1055 switch (n->in_v)
1057 case 4 :
1058 nat_addnat(n, ifs);
1059 break;
1060 #ifdef USE_INET6
1061 case 6 :
1062 nat6_addnat(n, ifs);
1063 break;
1064 #endif
1065 default :
1066 break;
1069 n = NULL;
1070 ifs->ifs_nat_stats.ns_rules++;
1071 if (getlock) {
1072 RWLOCK_EXIT(&ifs->ifs_ipf_nat); /* WRITE */
1075 return error;
1079 /* ------------------------------------------------------------------------ */
1080 /* Function: nat_resolvrule */
1081 /* Returns: int - 0 == success, -1 == failure */
1082 /* Parameters: n(I) - pointer to NAT rule */
1083 /* */
1084 /* Resolve some of the details inside the NAT rule. Includes resolving */
1085 /* any specified interfaces and proxy labels, and determines whether or not */
1086 /* all proxy labels are correctly specified. */
1087 /* */
1088 /* Called by nat_siocaddnat() (SIOCADNAT) and fr_natputent (SIOCSTPUT). */
1089 /* ------------------------------------------------------------------------ */
1090 static int nat_resolverule(n, ifs)
1091 ipnat_t *n;
1092 ipf_stack_t *ifs;
1094 n->in_ifnames[0][LIFNAMSIZ - 1] = '\0';
1095 n->in_ifps[0] = fr_resolvenic(n->in_ifnames[0], n->in_v, ifs);
1097 n->in_ifnames[1][LIFNAMSIZ - 1] = '\0';
1098 if (n->in_ifnames[1][0] == '\0') {
1099 (void) strncpy(n->in_ifnames[1], n->in_ifnames[0], LIFNAMSIZ);
1100 n->in_ifps[1] = n->in_ifps[0];
1101 } else {
1102 n->in_ifps[1] = fr_resolvenic(n->in_ifnames[1], n->in_v, ifs);
1105 if (n->in_plabel[0] != '\0') {
1106 n->in_apr = appr_lookup(n->in_p, n->in_plabel, ifs);
1107 if (n->in_apr == NULL)
1108 return -1;
1110 return 0;
1114 /* ------------------------------------------------------------------------ */
1115 /* Function: nat_siocdelnat */
1116 /* Returns: int - 0 == success, != 0 == failure */
1117 /* Parameters: n(I) - pointer to new NAT rule */
1118 /* np(I) - pointer to where to insert new NAT rule */
1119 /* getlock(I) - flag indicating if lock on ipf_nat is held */
1120 /* Mutex Locks: ipf_natio */
1121 /* */
1122 /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */
1123 /* from information passed to the kernel, then add it to the appropriate */
1124 /* NAT rule table(s). */
1125 /* ------------------------------------------------------------------------ */
1126 static void nat_siocdelnat(n, np, getlock, ifs)
1127 ipnat_t *n, **np;
1128 int getlock;
1129 ipf_stack_t *ifs;
1131 int i;
1133 if (getlock) {
1134 WRITE_ENTER(&ifs->ifs_ipf_nat);
1136 if (n->in_redir & NAT_REDIRECT)
1137 nat_delrdr(n);
1138 if (n->in_redir & (NAT_MAPBLK|NAT_MAP))
1139 nat_delnat(n);
1140 if (ifs->ifs_nat_list == NULL) {
1141 ifs->ifs_nat_masks = 0;
1142 ifs->ifs_rdr_masks = 0;
1143 for (i = 0; i < 4; i++) {
1144 ifs->ifs_nat6_masks[i] = 0;
1145 ifs->ifs_rdr6_masks[i] = 0;
1149 if (n->in_tqehead[0] != NULL) {
1150 if (fr_deletetimeoutqueue(n->in_tqehead[0]) == 0) {
1151 fr_freetimeoutqueue(n->in_tqehead[0], ifs);
1155 if (n->in_tqehead[1] != NULL) {
1156 if (fr_deletetimeoutqueue(n->in_tqehead[1]) == 0) {
1157 fr_freetimeoutqueue(n->in_tqehead[1], ifs);
1161 *np = n->in_next;
1163 if (n->in_use == 0) {
1164 if (n->in_apr)
1165 appr_free(n->in_apr);
1166 KFREE(n);
1167 ifs->ifs_nat_stats.ns_rules--;
1168 } else {
1169 n->in_flags |= IPN_DELETE;
1170 n->in_next = NULL;
1172 if (getlock) {
1173 RWLOCK_EXIT(&ifs->ifs_ipf_nat); /* READ/WRITE */
1178 /* ------------------------------------------------------------------------ */
1179 /* Function: fr_natgetsz */
1180 /* Returns: int - 0 == success, != 0 is the error value. */
1181 /* Parameters: data(I) - pointer to natget structure with kernel pointer */
1182 /* get the size of. */
1183 /* */
1184 /* Handle SIOCSTGSZ. */
1185 /* Return the size of the nat list entry to be copied back to user space. */
1186 /* The size of the entry is stored in the ng_sz field and the enture natget */
1187 /* structure is copied back to the user. */
1188 /* ------------------------------------------------------------------------ */
1189 static int fr_natgetsz(data, ifs)
1190 caddr_t data;
1191 ipf_stack_t *ifs;
1193 ap_session_t *aps;
1194 nat_t *nat, *n;
1195 natget_t ng;
1196 int err;
1198 err = BCOPYIN(data, &ng, sizeof(ng));
1199 if (err != 0)
1200 return EFAULT;
1202 nat = ng.ng_ptr;
1203 if (!nat) {
1204 nat = ifs->ifs_nat_instances;
1205 ng.ng_sz = 0;
1207 * Empty list so the size returned is 0. Simple.
1209 if (nat == NULL) {
1210 err = BCOPYOUT(&ng, data, sizeof(ng));
1211 if (err != 0) {
1212 return EFAULT;
1213 } else {
1214 return 0;
1217 } else {
1219 * Make sure the pointer we're copying from exists in the
1220 * current list of entries. Security precaution to prevent
1221 * copying of random kernel data.
1223 for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1224 if (n == nat)
1225 break;
1226 if (!n)
1227 return ESRCH;
1231 * Incluse any space required for proxy data structures.
1233 ng.ng_sz = sizeof(nat_save_t);
1234 aps = nat->nat_aps;
1235 if (aps != NULL) {
1236 ng.ng_sz += sizeof(ap_session_t) - 4;
1237 if (aps->aps_data != 0)
1238 ng.ng_sz += aps->aps_psiz;
1241 err = BCOPYOUT(&ng, data, sizeof(ng));
1242 if (err != 0)
1243 return EFAULT;
1244 return 0;
1248 /* ------------------------------------------------------------------------ */
1249 /* Function: fr_natgetent */
1250 /* Returns: int - 0 == success, != 0 is the error value. */
1251 /* Parameters: data(I) - pointer to natget structure with kernel pointer */
1252 /* to NAT structure to copy out. */
1253 /* */
1254 /* Handle SIOCSTGET. */
1255 /* Copies out NAT entry to user space. Any additional data held for a */
1256 /* proxy is also copied, as to is the NAT rule which was responsible for it */
1257 /* ------------------------------------------------------------------------ */
1258 static int fr_natgetent(data, ifs)
1259 caddr_t data;
1260 ipf_stack_t *ifs;
1262 int error, outsize;
1263 ap_session_t *aps;
1264 nat_save_t *ipn, ipns;
1265 nat_t *n, *nat;
1267 error = fr_inobj(data, &ipns, IPFOBJ_NATSAVE);
1268 if (error != 0)
1269 return error;
1271 if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920))
1272 return EINVAL;
1274 KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize);
1275 if (ipn == NULL)
1276 return ENOMEM;
1278 ipn->ipn_dsize = ipns.ipn_dsize;
1279 nat = ipns.ipn_next;
1280 if (nat == NULL) {
1281 nat = ifs->ifs_nat_instances;
1282 if (nat == NULL) {
1283 if (ifs->ifs_nat_instances == NULL)
1284 error = ENOENT;
1285 goto finished;
1287 } else {
1289 * Make sure the pointer we're copying from exists in the
1290 * current list of entries. Security precaution to prevent
1291 * copying of random kernel data.
1293 for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1294 if (n == nat)
1295 break;
1296 if (n == NULL) {
1297 error = ESRCH;
1298 goto finished;
1301 ipn->ipn_next = nat->nat_next;
1304 * Copy the NAT structure.
1306 bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat));
1309 * If we have a pointer to the NAT rule it belongs to, save that too.
1311 if (nat->nat_ptr != NULL)
1312 bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat,
1313 sizeof(ipn->ipn_ipnat));
1316 * If we also know the NAT entry has an associated filter rule,
1317 * save that too.
1319 if (nat->nat_fr != NULL)
1320 bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr,
1321 sizeof(ipn->ipn_fr));
1324 * Last but not least, if there is an application proxy session set
1325 * up for this NAT entry, then copy that out too, including any
1326 * private data saved along side it by the proxy.
1328 aps = nat->nat_aps;
1329 outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data);
1330 if (aps != NULL) {
1331 char *s;
1333 if (outsize < sizeof(*aps)) {
1334 error = ENOBUFS;
1335 goto finished;
1338 s = ipn->ipn_data;
1339 bcopy((char *)aps, s, sizeof(*aps));
1340 s += sizeof(*aps);
1341 outsize -= sizeof(*aps);
1342 if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz))
1343 bcopy(aps->aps_data, s, aps->aps_psiz);
1344 else
1345 error = ENOBUFS;
1347 if (error == 0) {
1348 error = fr_outobjsz(data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize);
1351 finished:
1352 if (ipn != NULL) {
1353 KFREES(ipn, ipns.ipn_dsize);
1355 return error;
1358 /* ------------------------------------------------------------------------ */
1359 /* Function: nat_calc_chksum_diffs */
1360 /* Returns: void */
1361 /* Parameters: nat - pointer to NAT table entry */
1362 /* */
1363 /* Function calculates chksum deltas for IP header (nat_ipsumd) and TCP/UDP */
1364 /* headers (nat_sumd). The things for L4 (UDP/TCP) get complicated when */
1365 /* we are dealing with partial chksum offload. For these cases we need to */
1366 /* compute a 'partial chksum delta'. The 'partial chksum delta'is stored */
1367 /* into nat_sumd[1], while ordinary chksum delta for TCP/UDP is in */
1368 /* nat_sumd[0]. */
1369 /* */
1370 /* The function accepts initialized NAT table entry and computes the deltas */
1371 /* from nat_inip/nat_outip members. The function is called right before */
1372 /* the new entry is inserted into the table. */
1373 /* */
1374 /* The ipsumd (IP hedaer chksum delta adjustment) is computed as a chksum */
1375 /* of delta between original and new IP addresses. */
1376 /* */
1377 /* the nat_sumd[0] (TCP/UDP header chksum delta adjustment) is computed as */
1378 /* a chkusm of delta between original an new IP addrress:port tupples. */
1379 /* */
1380 /* Some facts about chksum, we should remember: */
1381 /* IP header chksum covers IP header only */
1382 /* */
1383 /* TCP/UDP chksum covers data payload and so called pseudo header */
1384 /* SRC, DST IP address */
1385 /* SRC, DST Port */
1386 /* length of payload */
1387 /* */
1388 /* The partial chksum delta (nat_sumd[1] is used to adjust db_ckusm16 */
1389 /* member of dblk_t structure. The db_ckusm16 member is not part of */
1390 /* IP/UDP/TCP header it is 16 bit value computed by NIC driver with partial */
1391 /* chksum offload capacbility for every inbound packet. The db_cksum16 is */
1392 /* stored along with other IP packet data in dblk_t structure and used in */
1393 /* for IP/UDP/TCP chksum validation later in ip.c. */
1394 /* */
1395 /* The partial chksum delta (adjustment, nat_sumd[1]) is computed as chksum */
1396 /* of delta between new and orig address. NOTE: the order of operands for */
1397 /* partial delta operation is swapped compared to computing the IP/TCP/UDP */
1398 /* header adjustment. It is by design see (IP_CKSUM_RECV() macro in ip.c). */
1399 /* */
1400 /* ------------------------------------------------------------------------ */
1401 void nat_calc_chksum_diffs(nat)
1402 nat_t *nat;
1404 u_32_t sum_orig = 0;
1405 u_32_t sum_changed = 0;
1406 u_32_t sumd;
1407 u_32_t ipsum_orig = 0;
1408 u_32_t ipsum_changed = 0;
1410 if (nat->nat_v != 4 && nat->nat_v != 6)
1411 return;
1414 * the switch calculates operands for CALC_SUMD(),
1415 * which will compute the partial chksum delta.
1417 switch (nat->nat_dir)
1419 case NAT_INBOUND:
1421 * we are dealing with RDR rule (DST address gets
1422 * modified on packet from client)
1424 if (nat->nat_v == 4) {
1425 sum_changed = LONG_SUM(ntohl(nat->nat_inip.s_addr));
1426 sum_orig = LONG_SUM(ntohl(nat->nat_outip.s_addr));
1427 } else {
1428 sum_changed = LONG_SUM6(&nat->nat_inip6);
1429 sum_orig = LONG_SUM6(&nat->nat_outip6);
1431 break;
1432 case NAT_OUTBOUND:
1434 * we are dealing with MAP rule (SRC address gets
1435 * modified on packet from client)
1437 if (nat->nat_v == 4) {
1438 sum_changed = LONG_SUM(ntohl(nat->nat_outip.s_addr));
1439 sum_orig = LONG_SUM(ntohl(nat->nat_inip.s_addr));
1440 } else {
1441 sum_changed = LONG_SUM6(&nat->nat_outip6);
1442 sum_orig = LONG_SUM6(&nat->nat_inip6);
1444 break;
1445 default: ;
1446 break;
1450 * we also preserve CALC_SUMD() operands here, for IP chksum delta
1451 * calculation, which happens at the end of function.
1453 ipsum_changed = sum_changed;
1454 ipsum_orig = sum_orig;
1456 * NOTE: the order of operands for partial chksum adjustment
1457 * computation has to be swapped!
1459 CALC_SUMD(sum_changed, sum_orig, sumd);
1460 nat->nat_sumd[1] = (sumd & 0xffff) + (sumd >> 16);
1462 if (nat->nat_flags & (IPN_TCPUDP | IPN_ICMPQUERY)) {
1465 * switch calculates operands for CALC_SUMD(), which will
1466 * compute the full chksum delta.
1468 switch (nat->nat_dir)
1470 case NAT_INBOUND:
1471 if (nat->nat_v == 4) {
1472 sum_changed = LONG_SUM(
1473 ntohl(nat->nat_inip.s_addr) +
1474 ntohs(nat->nat_inport));
1475 sum_orig = LONG_SUM(
1476 ntohl(nat->nat_outip.s_addr) +
1477 ntohs(nat->nat_outport));
1478 } else {
1479 sum_changed = LONG_SUM6(&nat->nat_inip6) +
1480 ntohs(nat->nat_inport);
1481 sum_orig = LONG_SUM6(&nat->nat_outip6) +
1482 ntohs(nat->nat_outport);
1484 break;
1485 case NAT_OUTBOUND:
1486 if (nat->nat_v == 4) {
1487 sum_changed = LONG_SUM(
1488 ntohl(nat->nat_outip.s_addr) +
1489 ntohs(nat->nat_outport));
1490 sum_orig = LONG_SUM(
1491 ntohl(nat->nat_inip.s_addr) +
1492 ntohs(nat->nat_inport));
1493 } else {
1494 sum_changed = LONG_SUM6(&nat->nat_outip6) +
1495 ntohs(nat->nat_outport);
1496 sum_orig = LONG_SUM6(&nat->nat_inip6) +
1497 ntohs(nat->nat_inport);
1499 break;
1500 default: ;
1501 break;
1504 CALC_SUMD(sum_orig, sum_changed, sumd);
1505 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
1507 if (!(nat->nat_flags & IPN_TCPUDP)) {
1509 * partial HW chksum offload works for TCP/UDP headers only,
1510 * so we need to enforce full chksum adjustment for ICMP
1512 nat->nat_sumd[1] = nat->nat_sumd[0];
1515 else
1516 nat->nat_sumd[0] = nat->nat_sumd[1];
1519 * we may reuse the already computed nat_sumd[0] for IP header chksum
1520 * adjustment in case the L4 (TCP/UDP header) is not changed by NAT.
1522 if (nat->nat_v == 4) {
1523 if (NAT_HAS_L4_CHANGED(nat)) {
1525 * bad luck, NAT changes also the L4 header, use IP
1526 * addresses to compute chksum adjustment for IP header.
1528 CALC_SUMD(ipsum_orig, ipsum_changed, sumd);
1529 nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16);
1530 } else {
1532 * the NAT does not change L4 hdr -> reuse chksum
1533 * adjustment for IP hdr.
1535 nat->nat_ipsumd = nat->nat_sumd[0];
1538 * if L4 header does not use chksum - zero out deltas
1540 if (!(nat->nat_flags & IPN_TCPUDP)) {
1541 nat->nat_sumd[0] = 0;
1542 nat->nat_sumd[1] = 0;
1547 return;
1550 /* ------------------------------------------------------------------------ */
1551 /* Function: fr_natputent */
1552 /* Returns: int - 0 == success, != 0 is the error value. */
1553 /* Parameters: data(I) - pointer to natget structure with NAT */
1554 /* structure information to load into the kernel */
1555 /* getlock(I) - flag indicating whether or not a write lock */
1556 /* on ipf_nat is already held. */
1557 /* ifs - ipf stack instance */
1558 /* */
1559 /* Handle SIOCSTPUT. */
1560 /* Loads a NAT table entry from user space, including a NAT rule, proxy and */
1561 /* firewall rule data structures, if pointers to them indicate so. */
1562 /* ------------------------------------------------------------------------ */
1563 static int fr_natputent(data, getlock, ifs)
1564 caddr_t data;
1565 int getlock;
1566 ipf_stack_t *ifs;
1568 nat_save_t ipn, *ipnn;
1569 ap_session_t *aps;
1570 nat_t *n, *nat;
1571 frentry_t *fr;
1572 fr_info_t fin;
1573 ipnat_t *in;
1574 int error;
1576 error = fr_inobj(data, &ipn, IPFOBJ_NATSAVE);
1577 if (error != 0)
1578 return error;
1581 * Trigger automatic call to nat_flushtable() if the
1582 * table has reached capcity specified by hi watermark.
1584 if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_level_hi)
1585 ifs->ifs_nat_doflush = 1;
1588 * If automatic flushing did not do its job, and the table
1589 * has filled up, don't try to create a new entry.
1591 if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
1592 ifs->ifs_nat_stats.ns_memfail++;
1593 return ENOMEM;
1597 * Initialise early because of code at junkput label.
1599 in = NULL;
1600 aps = NULL;
1601 nat = NULL;
1602 ipnn = NULL;
1605 * New entry, copy in the rest of the NAT entry if it's size is more
1606 * than just the nat_t structure.
1608 fr = NULL;
1609 if (ipn.ipn_dsize > sizeof(ipn)) {
1610 if (ipn.ipn_dsize > 81920) {
1611 error = ENOMEM;
1612 goto junkput;
1615 KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize);
1616 if (ipnn == NULL)
1617 return ENOMEM;
1619 error = fr_inobjsz(data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize);
1620 if (error != 0) {
1621 error = EFAULT;
1622 goto junkput;
1624 } else
1625 ipnn = &ipn;
1627 KMALLOC(nat, nat_t *);
1628 if (nat == NULL) {
1629 error = ENOMEM;
1630 goto junkput;
1633 bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat));
1635 * Initialize all these so that nat_delete() doesn't cause a crash.
1637 bzero((char *)nat, offsetof(struct nat, nat_tqe));
1638 nat->nat_tqe.tqe_pnext = NULL;
1639 nat->nat_tqe.tqe_next = NULL;
1640 nat->nat_tqe.tqe_ifq = NULL;
1641 nat->nat_tqe.tqe_parent = nat;
1644 * Restore the rule associated with this nat session
1646 in = ipnn->ipn_nat.nat_ptr;
1647 if (in != NULL) {
1648 KMALLOC(in, ipnat_t *);
1649 nat->nat_ptr = in;
1650 if (in == NULL) {
1651 error = ENOMEM;
1652 goto junkput;
1654 bzero((char *)in, offsetof(struct ipnat, in_next6));
1655 bcopy((char *)&ipnn->ipn_ipnat, (char *)in, sizeof(*in));
1656 in->in_use = 1;
1657 in->in_flags |= IPN_DELETE;
1659 ATOMIC_INC(ifs->ifs_nat_stats.ns_rules);
1661 if (nat_resolverule(in, ifs) != 0) {
1662 error = ESRCH;
1663 goto junkput;
1668 * Check that the NAT entry doesn't already exist in the kernel.
1670 if (nat->nat_v != 6)
1671 nat->nat_v = 4;
1672 bzero((char *)&fin, sizeof(fin));
1673 fin.fin_p = nat->nat_p;
1674 fin.fin_ifs = ifs;
1675 if (nat->nat_dir == NAT_OUTBOUND) {
1676 fin.fin_data[0] = ntohs(nat->nat_oport);
1677 fin.fin_data[1] = ntohs(nat->nat_outport);
1678 fin.fin_ifp = nat->nat_ifps[0];
1679 if (getlock) {
1680 READ_ENTER(&ifs->ifs_ipf_nat);
1683 switch (nat->nat_v)
1685 case 4:
1686 fin.fin_v = nat->nat_v;
1687 n = nat_inlookup(&fin, nat->nat_flags, fin.fin_p,
1688 nat->nat_oip, nat->nat_outip);
1689 break;
1690 #ifdef USE_INET6
1691 case 6:
1692 n = nat6_inlookup(&fin, nat->nat_flags, fin.fin_p,
1693 &nat->nat_oip6.in6, &nat->nat_outip6.in6);
1694 break;
1695 #endif
1696 default:
1697 n = NULL;
1698 break;
1701 if (getlock) {
1702 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1704 if (n != NULL) {
1705 error = EEXIST;
1706 goto junkput;
1708 } else if (nat->nat_dir == NAT_INBOUND) {
1709 fin.fin_data[0] = ntohs(nat->nat_inport);
1710 fin.fin_data[1] = ntohs(nat->nat_oport);
1711 fin.fin_ifp = nat->nat_ifps[1];
1712 if (getlock) {
1713 READ_ENTER(&ifs->ifs_ipf_nat);
1716 switch (nat->nat_v)
1718 case 4:
1719 n = nat_outlookup(&fin, nat->nat_flags, fin.fin_p,
1720 nat->nat_inip, nat->nat_oip);
1721 break;
1722 #ifdef USE_INET6
1723 case 6:
1724 n = nat6_outlookup(&fin, nat->nat_flags, fin.fin_p,
1725 &nat->nat_inip6.in6, &nat->nat_oip6.in6);
1726 break;
1727 #endif
1728 default:
1729 n = NULL;
1730 break;
1733 if (getlock) {
1734 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1736 if (n != NULL) {
1737 error = EEXIST;
1738 goto junkput;
1740 } else {
1741 error = EINVAL;
1742 goto junkput;
1746 * Restore ap_session_t structure. Include the private data allocated
1747 * if it was there.
1749 aps = nat->nat_aps;
1750 if (aps != NULL) {
1751 KMALLOC(aps, ap_session_t *);
1752 nat->nat_aps = aps;
1753 if (aps == NULL) {
1754 error = ENOMEM;
1755 goto junkput;
1757 bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps));
1758 if (in != NULL)
1759 aps->aps_apr = in->in_apr;
1760 else
1761 aps->aps_apr = NULL;
1762 if (aps->aps_psiz != 0) {
1763 if (aps->aps_psiz > 81920) {
1764 error = ENOMEM;
1765 goto junkput;
1767 KMALLOCS(aps->aps_data, void *, aps->aps_psiz);
1768 if (aps->aps_data == NULL) {
1769 error = ENOMEM;
1770 goto junkput;
1772 bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data,
1773 aps->aps_psiz);
1774 } else {
1775 aps->aps_psiz = 0;
1776 aps->aps_data = NULL;
1781 * If there was a filtering rule associated with this entry then
1782 * build up a new one.
1784 fr = nat->nat_fr;
1785 if (fr != NULL) {
1786 if ((nat->nat_flags & SI_NEWFR) != 0) {
1787 KMALLOC(fr, frentry_t *);
1788 nat->nat_fr = fr;
1789 if (fr == NULL) {
1790 error = ENOMEM;
1791 goto junkput;
1793 ipnn->ipn_nat.nat_fr = fr;
1794 (void) fr_outobj(data, ipnn, IPFOBJ_NATSAVE);
1795 bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr));
1797 fr->fr_ref = 1;
1798 fr->fr_dsize = 0;
1799 fr->fr_data = NULL;
1800 fr->fr_type = FR_T_NONE;
1802 MUTEX_NUKE(&fr->fr_lock);
1803 MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock");
1804 } else {
1805 if (getlock) {
1806 READ_ENTER(&ifs->ifs_ipf_nat);
1808 for (n = ifs->ifs_nat_instances; n; n = n->nat_next)
1809 if (n->nat_fr == fr)
1810 break;
1812 if (n != NULL) {
1813 MUTEX_ENTER(&fr->fr_lock);
1814 fr->fr_ref++;
1815 MUTEX_EXIT(&fr->fr_lock);
1817 if (getlock) {
1818 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1820 if (!n) {
1821 error = ESRCH;
1822 goto junkput;
1827 if (ipnn != &ipn) {
1828 KFREES(ipnn, ipn.ipn_dsize);
1829 ipnn = NULL;
1832 nat_calc_chksum_diffs(nat);
1834 if (getlock) {
1835 WRITE_ENTER(&ifs->ifs_ipf_nat);
1838 nat_calc_chksum_diffs(nat);
1840 switch (nat->nat_v)
1842 case 4 :
1843 error = nat_insert(nat, nat->nat_rev, ifs);
1844 break;
1845 #ifdef USE_INET6
1846 case 6 :
1847 error = nat6_insert(nat, nat->nat_rev, ifs);
1848 break;
1849 #endif
1850 default :
1851 break;
1854 if ((error == 0) && (aps != NULL)) {
1855 aps->aps_next = ifs->ifs_ap_sess_list;
1856 ifs->ifs_ap_sess_list = aps;
1858 if (getlock) {
1859 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
1862 if (error == 0)
1863 return 0;
1865 error = ENOMEM;
1867 junkput:
1868 if (fr != NULL)
1869 (void) fr_derefrule(&fr, ifs);
1871 if ((ipnn != NULL) && (ipnn != &ipn)) {
1872 KFREES(ipnn, ipn.ipn_dsize);
1874 if (nat != NULL) {
1875 if (aps != NULL) {
1876 if (aps->aps_data != NULL) {
1877 KFREES(aps->aps_data, aps->aps_psiz);
1879 KFREE(aps);
1881 if (in != NULL) {
1882 if (in->in_apr)
1883 appr_free(in->in_apr);
1884 KFREE(in);
1886 KFREE(nat);
1888 return error;
1892 /* ------------------------------------------------------------------------ */
1893 /* Function: nat_delete */
1894 /* Returns: int - 0 if entry deleted. Otherwise, ref count on entry */
1895 /* Parameters: nat - pointer to the NAT entry to delete */
1896 /* logtype - type of LOG record to create before deleting */
1897 /* ifs - ipf stack instance */
1898 /* Write Lock: ipf_nat */
1899 /* */
1900 /* Delete a nat entry from the various lists and table. If NAT logging is */
1901 /* enabled then generate a NAT log record for this event. */
1902 /* ------------------------------------------------------------------------ */
1903 int nat_delete(nat, logtype, ifs)
1904 struct nat *nat;
1905 int logtype;
1906 ipf_stack_t *ifs;
1908 struct ipnat *ipn;
1909 int removed = 0;
1911 if (logtype != 0 && ifs->ifs_nat_logging != 0)
1912 nat_log(nat, logtype, ifs);
1915 * Start by removing the entry from the hash table of nat entries
1916 * so it will not be "used" again.
1918 * It will remain in the "list" of nat entries until all references
1919 * have been accounted for.
1921 if ((nat->nat_phnext[0] != NULL) && (nat->nat_phnext[1] != NULL)) {
1922 removed = 1;
1924 ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
1925 ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
1927 *nat->nat_phnext[0] = nat->nat_hnext[0];
1928 if (nat->nat_hnext[0] != NULL) {
1929 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
1930 nat->nat_hnext[0] = NULL;
1932 nat->nat_phnext[0] = NULL;
1934 *nat->nat_phnext[1] = nat->nat_hnext[1];
1935 if (nat->nat_hnext[1] != NULL) {
1936 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
1937 nat->nat_hnext[1] = NULL;
1939 nat->nat_phnext[1] = NULL;
1941 if ((nat->nat_flags & SI_WILDP) != 0)
1942 ifs->ifs_nat_stats.ns_wilds--;
1946 * Next, remove it from the timeout queue it is in.
1948 fr_deletequeueentry(&nat->nat_tqe);
1950 if (nat->nat_me != NULL) {
1951 *nat->nat_me = NULL;
1952 nat->nat_me = NULL;
1955 MUTEX_ENTER(&nat->nat_lock);
1956 if (nat->nat_ref > 1) {
1957 nat->nat_ref--;
1958 MUTEX_EXIT(&nat->nat_lock);
1959 if (removed)
1960 ifs->ifs_nat_stats.ns_orphans++;
1961 return (nat->nat_ref);
1963 MUTEX_EXIT(&nat->nat_lock);
1965 nat->nat_ref = 0;
1968 * If entry had already been removed,
1969 * it means we're cleaning up an orphan.
1971 if (!removed)
1972 ifs->ifs_nat_stats.ns_orphans--;
1974 #ifdef IPFILTER_SYNC
1975 if (nat->nat_sync)
1976 ipfsync_del(nat->nat_sync);
1977 #endif
1980 * Now remove it from master list of nat table entries
1982 if (nat->nat_pnext != NULL) {
1983 *nat->nat_pnext = nat->nat_next;
1984 if (nat->nat_next != NULL) {
1985 nat->nat_next->nat_pnext = nat->nat_pnext;
1986 nat->nat_next = NULL;
1988 nat->nat_pnext = NULL;
1991 if (nat->nat_fr != NULL)
1992 (void)fr_derefrule(&nat->nat_fr, ifs);
1994 if (nat->nat_hm != NULL)
1995 fr_hostmapdel(&nat->nat_hm);
1998 * If there is an active reference from the nat entry to its parent
1999 * rule, decrement the rule's reference count and free it too if no
2000 * longer being used.
2002 ipn = nat->nat_ptr;
2003 if (ipn != NULL) {
2004 ipn->in_space++;
2005 ipn->in_use--;
2006 if (ipn->in_use == 0 && (ipn->in_flags & IPN_DELETE)) {
2007 if (ipn->in_apr)
2008 appr_free(ipn->in_apr);
2009 KFREE(ipn);
2010 ifs->ifs_nat_stats.ns_rules--;
2014 MUTEX_DESTROY(&nat->nat_lock);
2016 aps_free(nat->nat_aps, ifs);
2017 ifs->ifs_nat_stats.ns_inuse--;
2020 * If there's a fragment table entry too for this nat entry, then
2021 * dereference that as well. This is after nat_lock is released
2022 * because of Tru64.
2024 fr_forgetnat((void *)nat, ifs);
2026 KFREE(nat);
2028 return (0);
2032 /* ------------------------------------------------------------------------ */
2033 /* Function: nat_clearlist */
2034 /* Returns: int - number of NAT/RDR rules deleted */
2035 /* Parameters: Nil */
2036 /* */
2037 /* Delete all rules in the current list of rules. There is nothing elegant */
2038 /* about this cleanup: simply free all entries on the list of rules and */
2039 /* clear out the tables used for hashed NAT rule lookups. */
2040 /* ------------------------------------------------------------------------ */
2041 static int nat_clearlist(ifs)
2042 ipf_stack_t *ifs;
2044 ipnat_t *n, **np = &ifs->ifs_nat_list;
2045 int i = 0;
2047 if (ifs->ifs_nat_rules != NULL)
2048 bzero((char *)ifs->ifs_nat_rules,
2049 sizeof(*ifs->ifs_nat_rules) * ifs->ifs_ipf_natrules_sz);
2050 if (ifs->ifs_rdr_rules != NULL)
2051 bzero((char *)ifs->ifs_rdr_rules,
2052 sizeof(*ifs->ifs_rdr_rules) * ifs->ifs_ipf_rdrrules_sz);
2054 while ((n = *np) != NULL) {
2055 *np = n->in_next;
2056 if (n->in_use == 0) {
2057 if (n->in_apr != NULL)
2058 appr_free(n->in_apr);
2059 KFREE(n);
2060 ifs->ifs_nat_stats.ns_rules--;
2061 } else {
2062 n->in_flags |= IPN_DELETE;
2063 n->in_next = NULL;
2065 i++;
2067 ifs->ifs_nat_masks = 0;
2068 ifs->ifs_rdr_masks = 0;
2069 for (i = 0; i < 4; i++) {
2070 ifs->ifs_nat6_masks[i] = 0;
2071 ifs->ifs_rdr6_masks[i] = 0;
2073 return i;
2077 /* ------------------------------------------------------------------------ */
2078 /* Function: nat_newmap */
2079 /* Returns: int - -1 == error, 0 == success */
2080 /* Parameters: fin(I) - pointer to packet information */
2081 /* nat(I) - pointer to NAT entry */
2082 /* ni(I) - pointer to structure with misc. information needed */
2083 /* to create new NAT entry. */
2084 /* */
2085 /* Given an empty NAT structure, populate it with new information about a */
2086 /* new NAT session, as defined by the matching NAT rule. */
2087 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2088 /* to the new IP address for the translation. */
2089 /* ------------------------------------------------------------------------ */
2090 static INLINE int nat_newmap(fin, nat, ni)
2091 fr_info_t *fin;
2092 nat_t *nat;
2093 natinfo_t *ni;
2095 u_short st_port, dport, sport, port, sp, dp;
2096 struct in_addr in, inb;
2097 hostmap_t *hm;
2098 u_32_t flags;
2099 u_32_t st_ip;
2100 ipnat_t *np;
2101 nat_t *natl;
2102 int l;
2103 ipf_stack_t *ifs = fin->fin_ifs;
2106 * If it's an outbound packet which doesn't match any existing
2107 * record, then create a new port
2109 l = 0;
2110 hm = NULL;
2111 np = ni->nai_np;
2112 st_ip = np->in_nip;
2113 st_port = np->in_pnext;
2114 flags = ni->nai_flags;
2115 sport = ni->nai_sport;
2116 dport = ni->nai_dport;
2119 * Do a loop until we either run out of entries to try or we find
2120 * a NAT mapping that isn't currently being used. This is done
2121 * because the change to the source is not (usually) being fixed.
2123 do {
2124 port = 0;
2125 in.s_addr = htonl(np->in_nip);
2126 if (l == 0) {
2128 * Check to see if there is an existing NAT
2129 * setup for this IP address pair.
2131 hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2132 in, 0, ifs);
2133 if (hm != NULL)
2134 in.s_addr = hm->hm_mapip.s_addr;
2135 } else if ((l == 1) && (hm != NULL)) {
2136 fr_hostmapdel(&hm);
2138 in.s_addr = ntohl(in.s_addr);
2140 nat->nat_hm = hm;
2142 if ((np->in_outmsk == 0xffffffff) && (np->in_pnext == 0)) {
2143 if (l > 0)
2144 return -1;
2147 if (np->in_redir == NAT_BIMAP &&
2148 np->in_inmsk == np->in_outmsk) {
2150 * map the address block in a 1:1 fashion
2152 in.s_addr = np->in_outip;
2153 in.s_addr |= fin->fin_saddr & ~np->in_inmsk;
2154 in.s_addr = ntohl(in.s_addr);
2156 } else if (np->in_redir & NAT_MAPBLK) {
2157 if ((l >= np->in_ppip) || ((l > 0) &&
2158 !(flags & IPN_TCPUDP)))
2159 return -1;
2161 * map-block - Calculate destination address.
2163 in.s_addr = ntohl(fin->fin_saddr);
2164 in.s_addr &= ntohl(~np->in_inmsk);
2165 inb.s_addr = in.s_addr;
2166 in.s_addr /= np->in_ippip;
2167 in.s_addr &= ntohl(~np->in_outmsk);
2168 in.s_addr += ntohl(np->in_outip);
2170 * Calculate destination port.
2172 if ((flags & IPN_TCPUDP) &&
2173 (np->in_ppip != 0)) {
2174 port = ntohs(sport) + l;
2175 port %= np->in_ppip;
2176 port += np->in_ppip *
2177 (inb.s_addr % np->in_ippip);
2178 port += MAPBLK_MINPORT;
2179 port = htons(port);
2182 } else if ((np->in_outip == 0) &&
2183 (np->in_outmsk == 0xffffffff)) {
2185 * 0/32 - use the interface's IP address.
2187 if ((l > 0) ||
2188 fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp,
2189 &in, NULL, fin->fin_ifs) == -1)
2190 return -1;
2191 in.s_addr = ntohl(in.s_addr);
2193 } else if ((np->in_outip == 0) && (np->in_outmsk == 0)) {
2195 * 0/0 - use the original source address/port.
2197 if (l > 0)
2198 return -1;
2199 in.s_addr = ntohl(fin->fin_saddr);
2201 } else if ((np->in_outmsk != 0xffffffff) &&
2202 (np->in_pnext == 0) && ((l > 0) || (hm == NULL)))
2203 np->in_nip++;
2205 natl = NULL;
2207 if ((flags & IPN_TCPUDP) &&
2208 ((np->in_redir & NAT_MAPBLK) == 0) &&
2209 (np->in_flags & IPN_AUTOPORTMAP)) {
2211 * "ports auto" (without map-block)
2213 if ((l > 0) && (l % np->in_ppip == 0)) {
2214 if (l > np->in_space) {
2215 return -1;
2216 } else if ((l > np->in_ppip) &&
2217 np->in_outmsk != 0xffffffff)
2218 np->in_nip++;
2220 if (np->in_ppip != 0) {
2221 port = ntohs(sport);
2222 port += (l % np->in_ppip);
2223 port %= np->in_ppip;
2224 port += np->in_ppip *
2225 (ntohl(fin->fin_saddr) %
2226 np->in_ippip);
2227 port += MAPBLK_MINPORT;
2228 port = htons(port);
2231 } else if (((np->in_redir & NAT_MAPBLK) == 0) &&
2232 (flags & IPN_TCPUDPICMP) && (np->in_pnext != 0)) {
2234 * Standard port translation. Select next port.
2236 if (np->in_flags & IPN_SEQUENTIAL) {
2237 port = np->in_pnext;
2238 } else {
2239 port = ipf_random() % (ntohs(np->in_pmax) -
2240 ntohs(np->in_pmin) + 1);
2241 port += ntohs(np->in_pmin);
2243 port = htons(port);
2244 np->in_pnext++;
2246 if (np->in_pnext > ntohs(np->in_pmax)) {
2247 np->in_pnext = ntohs(np->in_pmin);
2248 if (np->in_outmsk != 0xffffffff)
2249 np->in_nip++;
2253 if (np->in_flags & IPN_IPRANGE) {
2254 if (np->in_nip > ntohl(np->in_outmsk))
2255 np->in_nip = ntohl(np->in_outip);
2256 } else {
2257 if ((np->in_outmsk != 0xffffffff) &&
2258 ((np->in_nip + 1) & ntohl(np->in_outmsk)) >
2259 ntohl(np->in_outip))
2260 np->in_nip = ntohl(np->in_outip) + 1;
2263 if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY)))
2264 port = sport;
2267 * Here we do a lookup of the connection as seen from
2268 * the outside. If an IP# pair already exists, try
2269 * again. So if you have A->B becomes C->B, you can
2270 * also have D->E become C->E but not D->B causing
2271 * another C->B. Also take protocol and ports into
2272 * account when determining whether a pre-existing
2273 * NAT setup will cause an external conflict where
2274 * this is appropriate.
2276 inb.s_addr = htonl(in.s_addr);
2277 sp = fin->fin_data[0];
2278 dp = fin->fin_data[1];
2279 fin->fin_data[0] = fin->fin_data[1];
2280 fin->fin_data[1] = htons(port);
2281 natl = nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2282 (u_int)fin->fin_p, fin->fin_dst, inb);
2283 fin->fin_data[0] = sp;
2284 fin->fin_data[1] = dp;
2287 * Has the search wrapped around and come back to the
2288 * start ?
2290 if ((natl != NULL) &&
2291 (np->in_pnext != 0) && (st_port == np->in_pnext) &&
2292 (np->in_nip != 0) && (st_ip == np->in_nip))
2293 return -1;
2294 l++;
2295 } while (natl != NULL);
2297 if (np->in_space > 0)
2298 np->in_space--;
2300 /* Setup the NAT table */
2301 nat->nat_inip = fin->fin_src;
2302 nat->nat_outip.s_addr = htonl(in.s_addr);
2303 nat->nat_oip = fin->fin_dst;
2304 if (nat->nat_hm == NULL)
2305 nat->nat_hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2306 nat->nat_outip, 0, ifs);
2308 if (flags & IPN_TCPUDP) {
2309 nat->nat_inport = sport;
2310 nat->nat_outport = port; /* sport */
2311 nat->nat_oport = dport;
2312 ((tcphdr_t *)fin->fin_dp)->th_sport = port;
2313 } else if (flags & IPN_ICMPQUERY) {
2314 ((icmphdr_t *)fin->fin_dp)->icmp_id = port;
2315 nat->nat_inport = port;
2316 nat->nat_outport = port;
2319 ni->nai_ip.s_addr = in.s_addr;
2320 ni->nai_port = port;
2321 ni->nai_nport = dport;
2322 return 0;
2326 /* ------------------------------------------------------------------------ */
2327 /* Function: nat_newrdr */
2328 /* Returns: int - -1 == error, 0 == success (no move), 1 == success and */
2329 /* allow rule to be moved if IPN_ROUNDR is set. */
2330 /* Parameters: fin(I) - pointer to packet information */
2331 /* nat(I) - pointer to NAT entry */
2332 /* ni(I) - pointer to structure with misc. information needed */
2333 /* to create new NAT entry. */
2334 /* */
2335 /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/
2336 /* to the new IP address for the translation. */
2337 /* ------------------------------------------------------------------------ */
2338 static INLINE int nat_newrdr(fin, nat, ni)
2339 fr_info_t *fin;
2340 nat_t *nat;
2341 natinfo_t *ni;
2343 u_short nport, dport, sport;
2344 struct in_addr in, inb;
2345 u_short sp, dp;
2346 hostmap_t *hm;
2347 u_32_t flags;
2348 ipnat_t *np;
2349 nat_t *natl;
2350 int move;
2351 ipf_stack_t *ifs = fin->fin_ifs;
2353 move = 1;
2354 hm = NULL;
2355 in.s_addr = 0;
2356 np = ni->nai_np;
2357 flags = ni->nai_flags;
2358 sport = ni->nai_sport;
2359 dport = ni->nai_dport;
2362 * If the matching rule has IPN_STICKY set, then we want to have the
2363 * same rule kick in as before. Why would this happen? If you have
2364 * a collection of rdr rules with "round-robin sticky", the current
2365 * packet might match a different one to the previous connection but
2366 * we want the same destination to be used.
2368 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) ==
2369 (IPN_ROUNDR|IPN_STICKY)) {
2370 hm = nat_hostmap(NULL, fin->fin_src, fin->fin_dst, in,
2371 (u_32_t)dport, ifs);
2372 if (hm != NULL) {
2373 in.s_addr = ntohl(hm->hm_mapip.s_addr);
2374 np = hm->hm_ipnat;
2375 ni->nai_np = np;
2376 move = 0;
2381 * Otherwise, it's an inbound packet. Most likely, we don't
2382 * want to rewrite source ports and source addresses. Instead,
2383 * we want to rewrite to a fixed internal address and fixed
2384 * internal port.
2386 if (np->in_flags & IPN_SPLIT) {
2387 in.s_addr = np->in_nip;
2389 if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) {
2390 hm = nat_hostmap(np, fin->fin_src, fin->fin_dst,
2391 in, (u_32_t)dport, ifs);
2392 if (hm != NULL) {
2393 in.s_addr = hm->hm_mapip.s_addr;
2394 move = 0;
2398 if (hm == NULL || hm->hm_ref == 1) {
2399 if (np->in_inip == htonl(in.s_addr)) {
2400 np->in_nip = ntohl(np->in_inmsk);
2401 move = 0;
2402 } else {
2403 np->in_nip = ntohl(np->in_inip);
2407 } else if ((np->in_inip == 0) && (np->in_inmsk == 0xffffffff)) {
2409 * 0/32 - use the interface's IP address.
2411 if (fr_ifpaddr(4, FRI_NORMAL, fin->fin_ifp, &in, NULL,
2412 fin->fin_ifs) == -1)
2413 return -1;
2414 in.s_addr = ntohl(in.s_addr);
2416 } else if ((np->in_inip == 0) && (np->in_inmsk== 0)) {
2418 * 0/0 - use the original destination address/port.
2420 in.s_addr = ntohl(fin->fin_daddr);
2422 } else if (np->in_redir == NAT_BIMAP &&
2423 np->in_inmsk == np->in_outmsk) {
2425 * map the address block in a 1:1 fashion
2427 in.s_addr = np->in_inip;
2428 in.s_addr |= fin->fin_daddr & ~np->in_inmsk;
2429 in.s_addr = ntohl(in.s_addr);
2430 } else {
2431 in.s_addr = ntohl(np->in_inip);
2434 if ((np->in_pnext == 0) || ((flags & NAT_NOTRULEPORT) != 0))
2435 nport = dport;
2436 else {
2438 * Whilst not optimized for the case where
2439 * pmin == pmax, the gain is not significant.
2441 if (((np->in_flags & IPN_FIXEDDPORT) == 0) &&
2442 (np->in_pmin != np->in_pmax)) {
2443 nport = ntohs(dport) - ntohs(np->in_pmin) +
2444 ntohs(np->in_pnext);
2445 nport = htons(nport);
2446 } else
2447 nport = np->in_pnext;
2451 * When the redirect-to address is set to 0.0.0.0, just
2452 * assume a blank `forwarding' of the packet. We don't
2453 * setup any translation for this either.
2455 if (in.s_addr == 0) {
2456 if (nport == dport)
2457 return -1;
2458 in.s_addr = ntohl(fin->fin_daddr);
2462 * Check to see if this redirect mapping already exists and if
2463 * it does, return "failure" (allowing it to be created will just
2464 * cause one or both of these "connections" to stop working.)
2466 inb.s_addr = htonl(in.s_addr);
2467 sp = fin->fin_data[0];
2468 dp = fin->fin_data[1];
2469 fin->fin_data[1] = fin->fin_data[0];
2470 fin->fin_data[0] = ntohs(nport);
2471 natl = nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH),
2472 (u_int)fin->fin_p, inb, fin->fin_src);
2473 fin->fin_data[0] = sp;
2474 fin->fin_data[1] = dp;
2475 if (natl != NULL)
2476 return (-1);
2478 nat->nat_inip.s_addr = htonl(in.s_addr);
2479 nat->nat_outip = fin->fin_dst;
2480 nat->nat_oip = fin->fin_src;
2482 ni->nai_ip.s_addr = in.s_addr;
2483 ni->nai_nport = nport;
2484 ni->nai_port = sport;
2486 if (flags & IPN_TCPUDP) {
2487 nat->nat_inport = nport;
2488 nat->nat_outport = dport;
2489 nat->nat_oport = sport;
2490 ((tcphdr_t *)fin->fin_dp)->th_dport = nport;
2491 } else if (flags & IPN_ICMPQUERY) {
2492 ((icmphdr_t *)fin->fin_dp)->icmp_id = nport;
2493 nat->nat_inport = nport;
2494 nat->nat_outport = nport;
2497 return move;
2500 /* ------------------------------------------------------------------------ */
2501 /* Function: nat_new */
2502 /* Returns: nat_t* - NULL == failure to create new NAT structure, */
2503 /* else pointer to new NAT structure */
2504 /* Parameters: fin(I) - pointer to packet information */
2505 /* np(I) - pointer to NAT rule */
2506 /* natsave(I) - pointer to where to store NAT struct pointer */
2507 /* flags(I) - flags describing the current packet */
2508 /* direction(I) - direction of packet (in/out) */
2509 /* Write Lock: ipf_nat */
2510 /* */
2511 /* Attempts to create a new NAT entry. Does not actually change the packet */
2512 /* in any way. */
2513 /* */
2514 /* This fucntion is in three main parts: (1) deal with creating a new NAT */
2515 /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with */
2516 /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */
2517 /* and (3) building that structure and putting it into the NAT table(s). */
2518 /* ------------------------------------------------------------------------ */
2519 nat_t *nat_new(fin, np, natsave, flags, direction)
2520 fr_info_t *fin;
2521 ipnat_t *np;
2522 nat_t **natsave;
2523 u_int flags;
2524 int direction;
2526 tcphdr_t *tcp = NULL;
2527 hostmap_t *hm = NULL;
2528 nat_t *nat, *natl;
2529 u_int nflags;
2530 natinfo_t ni;
2531 int move;
2532 ipf_stack_t *ifs = fin->fin_ifs;
2535 * Trigger automatic call to nat_flushtable() if the
2536 * table has reached capcity specified by hi watermark.
2538 if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_level_hi)
2539 ifs->ifs_nat_doflush = 1;
2542 * If automatic flushing did not do its job, and the table
2543 * has filled up, don't try to create a new entry.
2545 if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
2546 ifs->ifs_nat_stats.ns_memfail++;
2547 return NULL;
2550 move = 1;
2551 nflags = np->in_flags & flags;
2552 nflags &= NAT_FROMRULE;
2554 ni.nai_np = np;
2555 ni.nai_nflags = nflags;
2556 ni.nai_flags = flags;
2558 /* Give me a new nat */
2559 KMALLOC(nat, nat_t *);
2560 if (nat == NULL) {
2561 ifs->ifs_nat_stats.ns_memfail++;
2563 * Try to automatically tune the max # of entries in the
2564 * table allowed to be less than what will cause kmem_alloc()
2565 * to fail and try to eliminate panics due to out of memory
2566 * conditions arising.
2568 if (ifs->ifs_ipf_nattable_max > ifs->ifs_ipf_nattable_sz) {
2569 ifs->ifs_ipf_nattable_max = ifs->ifs_nat_stats.ns_inuse - 100;
2570 printf("ipf_nattable_max reduced to %d\n",
2571 ifs->ifs_ipf_nattable_max);
2573 return NULL;
2576 if (flags & IPN_TCPUDP) {
2577 tcp = fin->fin_dp;
2578 ni.nai_sport = htons(fin->fin_sport);
2579 ni.nai_dport = htons(fin->fin_dport);
2580 } else if (flags & IPN_ICMPQUERY) {
2582 * In the ICMP query NAT code, we translate the ICMP id fields
2583 * to make them unique. This is indepedent of the ICMP type
2584 * (e.g. in the unlikely event that a host sends an echo and
2585 * an tstamp request with the same id, both packets will have
2586 * their ip address/id field changed in the same way).
2588 /* The icmp_id field is used by the sender to identify the
2589 * process making the icmp request. (the receiver justs
2590 * copies it back in its response). So, it closely matches
2591 * the concept of source port. We overlay sport, so we can
2592 * maximally reuse the existing code.
2594 ni.nai_sport = ((icmphdr_t *)fin->fin_dp)->icmp_id;
2595 ni.nai_dport = ni.nai_sport;
2598 bzero((char *)nat, sizeof(*nat));
2599 nat->nat_flags = flags;
2600 nat->nat_redir = np->in_redir;
2602 if ((flags & NAT_SLAVE) == 0) {
2603 MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
2607 * Search the current table for a match.
2609 if (direction == NAT_OUTBOUND) {
2611 * We can now arrange to call this for the same connection
2612 * because ipf_nat_new doesn't protect the code path into
2613 * this function.
2615 natl = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
2616 fin->fin_src, fin->fin_dst);
2617 if (natl != NULL) {
2618 KFREE(nat);
2619 nat = natl;
2620 goto done;
2623 move = nat_newmap(fin, nat, &ni);
2624 if (move == -1)
2625 goto badnat;
2627 np = ni.nai_np;
2628 } else {
2630 * NAT_INBOUND is used only for redirects rules
2632 natl = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
2633 fin->fin_src, fin->fin_dst);
2634 if (natl != NULL) {
2635 KFREE(nat);
2636 nat = natl;
2637 goto done;
2640 move = nat_newrdr(fin, nat, &ni);
2641 if (move == -1)
2642 goto badnat;
2644 np = ni.nai_np;
2647 if ((move == 1) && (np->in_flags & IPN_ROUNDR)) {
2648 if (np->in_redir == NAT_REDIRECT) {
2649 nat_delrdr(np);
2650 nat_addrdr(np, ifs);
2651 } else if (np->in_redir == NAT_MAP) {
2652 nat_delnat(np);
2653 nat_addnat(np, ifs);
2657 if (nat_finalise(fin, nat, &ni, tcp, natsave, direction) == -1) {
2658 goto badnat;
2661 nat_calc_chksum_diffs(nat);
2663 if (flags & SI_WILDP)
2664 ifs->ifs_nat_stats.ns_wilds++;
2665 fin->fin_flx |= FI_NEWNAT;
2666 goto done;
2667 badnat:
2668 ifs->ifs_nat_stats.ns_badnat++;
2669 if ((hm = nat->nat_hm) != NULL)
2670 fr_hostmapdel(&hm);
2671 KFREE(nat);
2672 nat = NULL;
2673 done:
2674 if ((flags & NAT_SLAVE) == 0) {
2675 MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
2677 return nat;
2681 /* ------------------------------------------------------------------------ */
2682 /* Function: nat_finalise */
2683 /* Returns: int - 0 == sucess, -1 == failure */
2684 /* Parameters: fin(I) - pointer to packet information */
2685 /* nat(I) - pointer to NAT entry */
2686 /* ni(I) - pointer to structure with misc. information needed */
2687 /* to create new NAT entry. */
2688 /* Write Lock: ipf_nat */
2689 /* */
2690 /* This is the tail end of constructing a new NAT entry and is the same */
2691 /* for both IPv4 and IPv6. */
2692 /* ------------------------------------------------------------------------ */
2693 /*ARGSUSED*/
2694 static INLINE int nat_finalise(fin, nat, ni, tcp, natsave, direction)
2695 fr_info_t *fin;
2696 nat_t *nat;
2697 natinfo_t *ni;
2698 tcphdr_t *tcp;
2699 nat_t **natsave;
2700 int direction;
2702 frentry_t *fr;
2703 ipnat_t *np;
2704 ipf_stack_t *ifs = fin->fin_ifs;
2706 np = ni->nai_np;
2708 COPYIFNAME(fin->fin_ifp, nat->nat_ifnames[0], fin->fin_v);
2710 #ifdef IPFILTER_SYNC
2711 if ((nat->nat_flags & SI_CLONE) == 0)
2712 nat->nat_sync = ipfsync_new(SMC_NAT, fin, nat);
2713 #endif
2715 nat->nat_me = natsave;
2716 nat->nat_dir = direction;
2717 nat->nat_ifps[0] = np->in_ifps[0];
2718 nat->nat_ifps[1] = np->in_ifps[1];
2719 nat->nat_ptr = np;
2720 nat->nat_p = fin->fin_p;
2721 nat->nat_v = fin->fin_v;
2722 nat->nat_mssclamp = np->in_mssclamp;
2723 fr = fin->fin_fr;
2724 nat->nat_fr = fr;
2726 if ((np->in_apr != NULL) && ((ni->nai_flags & NAT_SLAVE) == 0))
2727 if (appr_new(fin, nat) == -1)
2728 return -1;
2730 if (nat_insert(nat, fin->fin_rev, ifs) == 0) {
2731 if (ifs->ifs_nat_logging)
2732 nat_log(nat, (u_int)np->in_redir, ifs);
2733 np->in_use++;
2734 if (fr != NULL) {
2735 MUTEX_ENTER(&fr->fr_lock);
2736 fr->fr_ref++;
2737 MUTEX_EXIT(&fr->fr_lock);
2739 return 0;
2743 * nat_insert failed, so cleanup time...
2745 return -1;
2749 /* ------------------------------------------------------------------------ */
2750 /* Function: nat_insert */
2751 /* Returns: int - 0 == sucess, -1 == failure */
2752 /* Parameters: nat(I) - pointer to NAT structure */
2753 /* rev(I) - flag indicating forward/reverse direction of packet */
2754 /* Write Lock: ipf_nat */
2755 /* */
2756 /* Insert a NAT entry into the hash tables for searching and add it to the */
2757 /* list of active NAT entries. Adjust global counters when complete. */
2758 /* ------------------------------------------------------------------------ */
2759 int nat_insert(nat, rev, ifs)
2760 nat_t *nat;
2761 int rev;
2762 ipf_stack_t *ifs;
2764 u_int hv1, hv2;
2765 nat_t **natp;
2768 * Try and return an error as early as possible, so calculate the hash
2769 * entry numbers first and then proceed.
2771 if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) {
2772 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport,
2773 0xffffffff);
2774 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1 + nat->nat_oport,
2775 ifs->ifs_ipf_nattable_sz);
2776 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport,
2777 0xffffffff);
2778 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2 + nat->nat_oport,
2779 ifs->ifs_ipf_nattable_sz);
2780 } else {
2781 hv1 = NAT_HASH_FN(nat->nat_inip.s_addr, 0, 0xffffffff);
2782 hv1 = NAT_HASH_FN(nat->nat_oip.s_addr, hv1,
2783 ifs->ifs_ipf_nattable_sz);
2784 hv2 = NAT_HASH_FN(nat->nat_outip.s_addr, 0, 0xffffffff);
2785 hv2 = NAT_HASH_FN(nat->nat_oip.s_addr, hv2,
2786 ifs->ifs_ipf_nattable_sz);
2789 if (ifs->ifs_nat_stats.ns_bucketlen[0][hv1] >= ifs->ifs_fr_nat_maxbucket ||
2790 ifs->ifs_nat_stats.ns_bucketlen[1][hv2] >= ifs->ifs_fr_nat_maxbucket) {
2791 return -1;
2794 nat->nat_hv[0] = hv1;
2795 nat->nat_hv[1] = hv2;
2797 MUTEX_INIT(&nat->nat_lock, "nat entry lock");
2799 nat->nat_rev = rev;
2800 nat->nat_ref = 1;
2801 nat->nat_bytes[0] = 0;
2802 nat->nat_pkts[0] = 0;
2803 nat->nat_bytes[1] = 0;
2804 nat->nat_pkts[1] = 0;
2806 nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0';
2807 nat->nat_ifps[0] = fr_resolvenic(nat->nat_ifnames[0], 4, ifs);
2809 if (nat->nat_ifnames[1][0] !='\0') {
2810 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2811 nat->nat_ifps[1] = fr_resolvenic(nat->nat_ifnames[1], 4, ifs);
2812 } else {
2813 (void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0],
2814 LIFNAMSIZ);
2815 nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0';
2816 nat->nat_ifps[1] = nat->nat_ifps[0];
2819 nat->nat_next = ifs->ifs_nat_instances;
2820 nat->nat_pnext = &ifs->ifs_nat_instances;
2821 if (ifs->ifs_nat_instances)
2822 ifs->ifs_nat_instances->nat_pnext = &nat->nat_next;
2823 ifs->ifs_nat_instances = nat;
2825 natp = &ifs->ifs_nat_table[0][hv1];
2826 if (*natp)
2827 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
2828 nat->nat_phnext[0] = natp;
2829 nat->nat_hnext[0] = *natp;
2830 *natp = nat;
2831 ifs->ifs_nat_stats.ns_bucketlen[0][hv1]++;
2833 natp = &ifs->ifs_nat_table[1][hv2];
2834 if (*natp)
2835 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
2836 nat->nat_phnext[1] = natp;
2837 nat->nat_hnext[1] = *natp;
2838 *natp = nat;
2839 ifs->ifs_nat_stats.ns_bucketlen[1][hv2]++;
2841 fr_setnatqueue(nat, rev, ifs);
2843 ifs->ifs_nat_stats.ns_added++;
2844 ifs->ifs_nat_stats.ns_inuse++;
2845 return 0;
2849 /* ------------------------------------------------------------------------ */
2850 /* Function: nat_icmperrorlookup */
2851 /* Returns: nat_t* - point to matching NAT structure */
2852 /* Parameters: fin(I) - pointer to packet information */
2853 /* dir(I) - direction of packet (in/out) */
2854 /* */
2855 /* Check if the ICMP error message is related to an existing TCP, UDP or */
2856 /* ICMP query nat entry. It is assumed that the packet is already of the */
2857 /* the required length. */
2858 /* ------------------------------------------------------------------------ */
2859 nat_t *nat_icmperrorlookup(fin, dir)
2860 fr_info_t *fin;
2861 int dir;
2863 int flags = 0, minlen;
2864 icmphdr_t *orgicmp;
2865 tcphdr_t *tcp = NULL;
2866 u_short data[2];
2867 nat_t *nat;
2868 ip_t *oip;
2869 u_int p;
2872 * Does it at least have the return (basic) IP header ?
2873 * Only a basic IP header (no options) should be with an ICMP error
2874 * header. Also, if it's not an error type, then return.
2876 if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR))
2877 return NULL;
2880 * Check packet size
2882 oip = (ip_t *)((char *)fin->fin_dp + 8);
2883 minlen = IP_HL(oip) << 2;
2884 if ((minlen < sizeof(ip_t)) ||
2885 (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen))
2886 return NULL;
2888 * Is the buffer big enough for all of it ? It's the size of the IP
2889 * header claimed in the encapsulated part which is of concern. It
2890 * may be too big to be in this buffer but not so big that it's
2891 * outside the ICMP packet, leading to TCP deref's causing problems.
2892 * This is possible because we don't know how big oip_hl is when we
2893 * do the pullup early in fr_check() and thus can't gaurantee it is
2894 * all here now.
2896 #ifdef _KERNEL
2898 mb_t *m;
2900 m = fin->fin_m;
2901 # if defined(MENTAT)
2902 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr)
2903 return NULL;
2904 # else
2905 if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN >
2906 (char *)fin->fin_ip + M_LEN(m))
2907 return NULL;
2908 # endif
2910 #endif
2912 if (fin->fin_daddr != oip->ip_src.s_addr)
2913 return NULL;
2915 p = oip->ip_p;
2916 if (p == IPPROTO_TCP)
2917 flags = IPN_TCP;
2918 else if (p == IPPROTO_UDP)
2919 flags = IPN_UDP;
2920 else if (p == IPPROTO_ICMP) {
2921 orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2923 /* see if this is related to an ICMP query */
2924 if (nat_icmpquerytype4(orgicmp->icmp_type)) {
2925 data[0] = fin->fin_data[0];
2926 data[1] = fin->fin_data[1];
2927 fin->fin_data[0] = 0;
2928 fin->fin_data[1] = orgicmp->icmp_id;
2930 flags = IPN_ICMPERR|IPN_ICMPQUERY;
2932 * NOTE : dir refers to the direction of the original
2933 * ip packet. By definition the icmp error
2934 * message flows in the opposite direction.
2936 if (dir == NAT_INBOUND)
2937 nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2938 oip->ip_src);
2939 else
2940 nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2941 oip->ip_src);
2942 fin->fin_data[0] = data[0];
2943 fin->fin_data[1] = data[1];
2944 return nat;
2948 if (flags & IPN_TCPUDP) {
2949 minlen += 8; /* + 64bits of data to get ports */
2950 if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)
2951 return NULL;
2953 data[0] = fin->fin_data[0];
2954 data[1] = fin->fin_data[1];
2955 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2956 fin->fin_data[0] = ntohs(tcp->th_dport);
2957 fin->fin_data[1] = ntohs(tcp->th_sport);
2959 if (dir == NAT_INBOUND) {
2960 nat = nat_inlookup(fin, flags, p, oip->ip_dst,
2961 oip->ip_src);
2962 } else {
2963 nat = nat_outlookup(fin, flags, p, oip->ip_dst,
2964 oip->ip_src);
2966 fin->fin_data[0] = data[0];
2967 fin->fin_data[1] = data[1];
2968 return nat;
2970 if (dir == NAT_INBOUND)
2971 return nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2972 else
2973 return nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src);
2977 /* ------------------------------------------------------------------------ */
2978 /* Function: nat_icmperror */
2979 /* Returns: nat_t* - point to matching NAT structure */
2980 /* Parameters: fin(I) - pointer to packet information */
2981 /* nflags(I) - NAT flags for this packet */
2982 /* dir(I) - direction of packet (in/out) */
2983 /* */
2984 /* Fix up an ICMP packet which is an error message for an existing NAT */
2985 /* session. This will correct both packet header data and checksums. */
2986 /* */
2987 /* This should *ONLY* be used for incoming ICMP error packets to make sure */
2988 /* a NAT'd ICMP packet gets correctly recognised. */
2989 /* ------------------------------------------------------------------------ */
2990 nat_t *nat_icmperror(fin, nflags, dir)
2991 fr_info_t *fin;
2992 u_int *nflags;
2993 int dir;
2995 u_32_t sum1, sum2, sumd, psum1, psum2, psumd, sumd2;
2996 struct in_addr in;
2997 icmphdr_t *icmp, *orgicmp;
2998 int dlen;
2999 udphdr_t *udp;
3000 tcphdr_t *tcp;
3001 nat_t *nat;
3002 ip_t *oip;
3003 if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY)))
3004 return NULL;
3007 * nat_icmperrorlookup() looks up nat entry associated with the
3008 * offending IP packet and returns pointer to the entry, or NULL
3009 * if packet wasn't natted or for `defective' packets.
3012 if ((fin->fin_v != 4) || !(nat = nat_icmperrorlookup(fin, dir)))
3013 return NULL;
3015 sumd2 = 0;
3016 *nflags = IPN_ICMPERR;
3017 icmp = fin->fin_dp;
3018 oip = (ip_t *)&icmp->icmp_ip;
3019 udp = (udphdr_t *)((((char *)oip) + (IP_HL(oip) << 2)));
3020 tcp = (tcphdr_t *)udp;
3021 dlen = fin->fin_plen - ((char *)udp - (char *)fin->fin_ip);
3024 * Need to adjust ICMP header to include the real IP#'s and
3025 * port #'s. There are three steps required.
3027 * Step 1
3028 * Fix the IP addresses in the offending IP packet and update
3029 * ip header checksum to compensate for the change.
3031 * No update needed here for icmp_cksum because the ICMP checksum
3032 * is calculated over the complete ICMP packet, which includes the
3033 * changed oip IP addresses and oip->ip_sum. These two changes
3034 * cancel each other out (if the delta for the IP address is x,
3035 * then the delta for ip_sum is minus x).
3038 if (oip->ip_dst.s_addr == nat->nat_oip.s_addr) {
3039 sum1 = LONG_SUM(ntohl(oip->ip_src.s_addr));
3040 in = nat->nat_inip;
3041 oip->ip_src = in;
3042 } else {
3043 sum1 = LONG_SUM(ntohl(oip->ip_dst.s_addr));
3044 in = nat->nat_outip;
3045 oip->ip_dst = in;
3048 sum2 = LONG_SUM(ntohl(in.s_addr));
3049 CALC_SUMD(sum1, sum2, sumd);
3050 fix_datacksum(&oip->ip_sum, sumd);
3053 * Step 2
3054 * Perform other adjustments based on protocol of offending packet.
3057 switch (oip->ip_p) {
3058 case IPPROTO_TCP :
3059 case IPPROTO_UDP :
3062 * For offending TCP/UDP IP packets, translate the ports
3063 * based on the NAT specification.
3065 * Advance notice : Now it becomes complicated :-)
3067 * Since the port and IP addresse fields are both part
3068 * of the TCP/UDP checksum of the offending IP packet,
3069 * we need to adjust that checksum as well.
3071 * To further complicate things, the TCP/UDP checksum
3072 * may not be present. We must check to see if the
3073 * length of the data portion is big enough to hold
3074 * the checksum. In the UDP case, a test to determine
3075 * if the checksum is even set is also required.
3077 * Any changes to an IP address, port or checksum within
3078 * the ICMP packet requires a change to icmp_cksum.
3080 * Be extremely careful here ... The change is dependent
3081 * upon whether or not the TCP/UPD checksum is present.
3083 * If TCP/UPD checksum is present, the icmp_cksum must
3084 * compensate for checksum modification resulting from
3085 * IP address change only. Port change and resulting
3086 * data checksum adjustments cancel each other out.
3088 * If TCP/UDP checksum is not present, icmp_cksum must
3089 * compensate for port change only. The IP address
3090 * change does not modify anything else in this case.
3093 psum1 = 0;
3094 psum2 = 0;
3095 psumd = 0;
3097 if ((tcp->th_dport == nat->nat_oport) &&
3098 (tcp->th_sport != nat->nat_inport)) {
3101 * Translate the source port.
3104 psum1 = ntohs(tcp->th_sport);
3105 psum2 = ntohs(nat->nat_inport);
3106 tcp->th_sport = nat->nat_inport;
3108 } else if ((tcp->th_sport == nat->nat_oport) &&
3109 (tcp->th_dport != nat->nat_outport)) {
3112 * Translate the destination port.
3115 psum1 = ntohs(tcp->th_dport);
3116 psum2 = ntohs(nat->nat_outport);
3117 tcp->th_dport = nat->nat_outport;
3120 if ((oip->ip_p == IPPROTO_TCP) && (dlen >= 18)) {
3123 * TCP checksum present.
3125 * Adjust data checksum and icmp checksum to
3126 * compensate for any IP address change.
3129 sum1 = ntohs(tcp->th_sum);
3130 fix_datacksum(&tcp->th_sum, sumd);
3131 sum2 = ntohs(tcp->th_sum);
3132 sumd2 = sumd << 1;
3133 CALC_SUMD(sum1, sum2, sumd);
3134 sumd2 += sumd;
3137 * Also make data checksum adjustment to
3138 * compensate for any port change.
3141 if (psum1 != psum2) {
3142 CALC_SUMD(psum1, psum2, psumd);
3143 fix_datacksum(&tcp->th_sum, psumd);
3146 } else if ((oip->ip_p == IPPROTO_UDP) &&
3147 (dlen >= 8) && (udp->uh_sum != 0)) {
3150 * The UDP checksum is present and set.
3152 * Adjust data checksum and icmp checksum to
3153 * compensate for any IP address change.
3156 sum1 = ntohs(udp->uh_sum);
3157 fix_datacksum(&udp->uh_sum, sumd);
3158 sum2 = ntohs(udp->uh_sum);
3159 sumd2 = sumd << 1;
3160 CALC_SUMD(sum1, sum2, sumd);
3161 sumd2 += sumd;
3164 * Also make data checksum adjustment to
3165 * compensate for any port change.
3168 if (psum1 != psum2) {
3169 CALC_SUMD(psum1, psum2, psumd);
3170 fix_datacksum(&udp->uh_sum, psumd);
3173 } else {
3176 * Data checksum was not present.
3178 * Compensate for any port change.
3181 CALC_SUMD(psum2, psum1, psumd);
3182 sumd2 += psumd;
3184 break;
3186 case IPPROTO_ICMP :
3188 orgicmp = (icmphdr_t *)udp;
3190 if ((nat->nat_dir == NAT_OUTBOUND) &&
3191 (orgicmp->icmp_id != nat->nat_inport) &&
3192 (dlen >= 8)) {
3195 * Fix ICMP checksum (of the offening ICMP
3196 * query packet) to compensate the change
3197 * in the ICMP id of the offending ICMP
3198 * packet.
3200 * Since you modify orgicmp->icmp_id with
3201 * a delta (say x) and you compensate that
3202 * in origicmp->icmp_cksum with a delta
3203 * minus x, you don't have to adjust the
3204 * overall icmp->icmp_cksum
3207 sum1 = ntohs(orgicmp->icmp_id);
3208 sum2 = ntohs(nat->nat_inport);
3209 CALC_SUMD(sum1, sum2, sumd);
3210 orgicmp->icmp_id = nat->nat_inport;
3211 fix_datacksum(&orgicmp->icmp_cksum, sumd);
3213 } /* nat_dir can't be NAT_INBOUND for icmp queries */
3215 break;
3217 default :
3219 break;
3221 } /* switch (oip->ip_p) */
3224 * Step 3
3225 * Make the adjustments to icmp checksum.
3228 if (sumd2 != 0) {
3229 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3230 sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16);
3231 fix_incksum(&icmp->icmp_cksum, sumd2);
3233 return nat;
3238 * NB: these lookups don't lock access to the list, it assumed that it has
3239 * already been done!
3242 /* ------------------------------------------------------------------------ */
3243 /* Function: nat_inlookup */
3244 /* Returns: nat_t* - NULL == no match, */
3245 /* else pointer to matching NAT entry */
3246 /* Parameters: fin(I) - pointer to packet information */
3247 /* flags(I) - NAT flags for this packet */
3248 /* p(I) - protocol for this packet */
3249 /* src(I) - source IP address */
3250 /* mapdst(I) - destination IP address */
3251 /* */
3252 /* Lookup a nat entry based on the mapped destination ip address/port and */
3253 /* real source address/port. We use this lookup when receiving a packet, */
3254 /* we're looking for a table entry, based on the destination address. */
3255 /* */
3256 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */
3257 /* */
3258 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */
3259 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */
3260 /* */
3261 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */
3262 /* the packet is of said protocol */
3263 /* ------------------------------------------------------------------------ */
3264 nat_t *nat_inlookup(fin, flags, p, src, mapdst)
3265 fr_info_t *fin;
3266 u_int flags, p;
3267 struct in_addr src , mapdst;
3269 u_short sport, dport;
3270 ipnat_t *ipn;
3271 u_int sflags;
3272 nat_t *nat;
3273 int nflags;
3274 u_32_t dst;
3275 void *ifp;
3276 u_int hv;
3277 ipf_stack_t *ifs = fin->fin_ifs;
3279 if (fin != NULL)
3280 ifp = fin->fin_ifp;
3281 else
3282 ifp = NULL;
3283 sport = 0;
3284 dport = 0;
3285 dst = mapdst.s_addr;
3286 sflags = flags & NAT_TCPUDPICMP;
3288 switch (p)
3290 case IPPROTO_TCP :
3291 case IPPROTO_UDP :
3292 sport = htons(fin->fin_data[0]);
3293 dport = htons(fin->fin_data[1]);
3294 break;
3295 case IPPROTO_ICMP :
3296 if (flags & IPN_ICMPERR)
3297 sport = fin->fin_data[1];
3298 else
3299 dport = fin->fin_data[1];
3300 break;
3301 default :
3302 break;
3306 if ((flags & SI_WILDP) != 0)
3307 goto find_in_wild_ports;
3309 hv = NAT_HASH_FN(dst, dport, 0xffffffff);
3310 hv = NAT_HASH_FN(src.s_addr, hv + sport, ifs->ifs_ipf_nattable_sz);
3311 nat = ifs->ifs_nat_table[1][hv];
3312 for (; nat; nat = nat->nat_hnext[1]) {
3313 if (nat->nat_v != 4)
3314 continue;
3316 if (nat->nat_ifps[0] != NULL) {
3317 if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3318 continue;
3319 } else if (ifp != NULL)
3320 nat->nat_ifps[0] = ifp;
3322 nflags = nat->nat_flags;
3324 if (nat->nat_oip.s_addr == src.s_addr &&
3325 nat->nat_outip.s_addr == dst &&
3326 (((p == 0) &&
3327 (sflags == (nat->nat_flags & IPN_TCPUDPICMP)))
3328 || (p == nat->nat_p))) {
3329 switch (p)
3331 #if 0
3332 case IPPROTO_GRE :
3333 if (nat->nat_call[1] != fin->fin_data[0])
3334 continue;
3335 break;
3336 #endif
3337 case IPPROTO_ICMP :
3338 if ((flags & IPN_ICMPERR) != 0) {
3339 if (nat->nat_outport != sport)
3340 continue;
3341 } else {
3342 if (nat->nat_outport != dport)
3343 continue;
3345 break;
3346 case IPPROTO_TCP :
3347 case IPPROTO_UDP :
3348 if (nat->nat_oport != sport)
3349 continue;
3350 if (nat->nat_outport != dport)
3351 continue;
3352 break;
3353 default :
3354 break;
3357 ipn = nat->nat_ptr;
3358 if ((ipn != NULL) && (nat->nat_aps != NULL))
3359 if (appr_match(fin, nat) != 0)
3360 continue;
3361 return nat;
3366 * So if we didn't find it but there are wildcard members in the hash
3367 * table, go back and look for them. We do this search and update here
3368 * because it is modifying the NAT table and we want to do this only
3369 * for the first packet that matches. The exception, of course, is
3370 * for "dummy" (FI_IGNORE) lookups.
3372 find_in_wild_ports:
3373 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3374 return NULL;
3375 if (ifs->ifs_nat_stats.ns_wilds == 0)
3376 return NULL;
3378 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3380 hv = NAT_HASH_FN(dst, 0, 0xffffffff);
3381 hv = NAT_HASH_FN(src.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3383 WRITE_ENTER(&ifs->ifs_ipf_nat);
3385 nat = ifs->ifs_nat_table[1][hv];
3386 for (; nat; nat = nat->nat_hnext[1]) {
3387 if (nat->nat_v != 4)
3388 continue;
3390 if (nat->nat_ifps[0] != NULL) {
3391 if ((ifp != NULL) && (ifp != nat->nat_ifps[0]))
3392 continue;
3393 } else if (ifp != NULL)
3394 nat->nat_ifps[0] = ifp;
3396 if (nat->nat_p != fin->fin_p)
3397 continue;
3398 if (nat->nat_oip.s_addr != src.s_addr ||
3399 nat->nat_outip.s_addr != dst)
3400 continue;
3402 nflags = nat->nat_flags;
3403 if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3404 continue;
3406 if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3407 NAT_INBOUND) == 1) {
3408 if ((fin->fin_flx & FI_IGNORE) != 0)
3409 break;
3410 if ((nflags & SI_CLONE) != 0) {
3411 nat = fr_natclone(fin, nat);
3412 if (nat == NULL)
3413 break;
3414 } else {
3415 MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3416 ifs->ifs_nat_stats.ns_wilds--;
3417 MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3419 nat->nat_oport = sport;
3420 nat->nat_outport = dport;
3421 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3422 nat_tabmove(nat, ifs);
3423 break;
3427 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3429 return nat;
3433 /* ------------------------------------------------------------------------ */
3434 /* Function: nat_tabmove */
3435 /* Returns: Nil */
3436 /* Parameters: nat(I) - pointer to NAT structure */
3437 /* Write Lock: ipf_nat */
3438 /* */
3439 /* This function is only called for TCP/UDP NAT table entries where the */
3440 /* original was placed in the table without hashing on the ports and we now */
3441 /* want to include hashing on port numbers. */
3442 /* ------------------------------------------------------------------------ */
3443 static void nat_tabmove(nat, ifs)
3444 nat_t *nat;
3445 ipf_stack_t *ifs;
3447 nat_t **natp;
3448 u_int hv;
3450 if (nat->nat_flags & SI_CLONE)
3451 return;
3454 * Remove the NAT entry from the old location
3456 if (nat->nat_hnext[0])
3457 nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0];
3458 *nat->nat_phnext[0] = nat->nat_hnext[0];
3459 ifs->ifs_nat_stats.ns_bucketlen[0][nat->nat_hv[0]]--;
3461 if (nat->nat_hnext[1])
3462 nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1];
3463 *nat->nat_phnext[1] = nat->nat_hnext[1];
3464 ifs->ifs_nat_stats.ns_bucketlen[1][nat->nat_hv[1]]--;
3467 * Add into the NAT table in the new position
3469 hv = NAT_HASH_FN(nat->nat_inip.s_addr, nat->nat_inport, 0xffffffff);
3470 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3471 ifs->ifs_ipf_nattable_sz);
3472 nat->nat_hv[0] = hv;
3473 natp = &ifs->ifs_nat_table[0][hv];
3474 if (*natp)
3475 (*natp)->nat_phnext[0] = &nat->nat_hnext[0];
3476 nat->nat_phnext[0] = natp;
3477 nat->nat_hnext[0] = *natp;
3478 *natp = nat;
3479 ifs->ifs_nat_stats.ns_bucketlen[0][hv]++;
3481 hv = NAT_HASH_FN(nat->nat_outip.s_addr, nat->nat_outport, 0xffffffff);
3482 hv = NAT_HASH_FN(nat->nat_oip.s_addr, hv + nat->nat_oport,
3483 ifs->ifs_ipf_nattable_sz);
3484 nat->nat_hv[1] = hv;
3485 natp = &ifs->ifs_nat_table[1][hv];
3486 if (*natp)
3487 (*natp)->nat_phnext[1] = &nat->nat_hnext[1];
3488 nat->nat_phnext[1] = natp;
3489 nat->nat_hnext[1] = *natp;
3490 *natp = nat;
3491 ifs->ifs_nat_stats.ns_bucketlen[1][hv]++;
3495 /* ------------------------------------------------------------------------ */
3496 /* Function: nat_outlookup */
3497 /* Returns: nat_t* - NULL == no match, */
3498 /* else pointer to matching NAT entry */
3499 /* Parameters: fin(I) - pointer to packet information */
3500 /* flags(I) - NAT flags for this packet */
3501 /* p(I) - protocol for this packet */
3502 /* src(I) - source IP address */
3503 /* dst(I) - destination IP address */
3504 /* rw(I) - 1 == write lock on ipf_nat held, 0 == read lock. */
3505 /* */
3506 /* Lookup a nat entry based on the source 'real' ip address/port and */
3507 /* destination address/port. We use this lookup when sending a packet out, */
3508 /* we're looking for a table entry, based on the source address. */
3509 /* */
3510 /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */
3511 /* */
3512 /* NOTE: IT IS ASSUMED THAT ipf_nat IS ONLY HELD WITH A READ LOCK WHEN */
3513 /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */
3514 /* */
3515 /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */
3516 /* the packet is of said protocol */
3517 /* ------------------------------------------------------------------------ */
3518 nat_t *nat_outlookup(fin, flags, p, src, dst)
3519 fr_info_t *fin;
3520 u_int flags, p;
3521 struct in_addr src , dst;
3523 u_short sport, dport;
3524 u_int sflags;
3525 ipnat_t *ipn;
3526 u_32_t srcip;
3527 nat_t *nat;
3528 int nflags;
3529 void *ifp;
3530 u_int hv;
3531 ipf_stack_t *ifs = fin->fin_ifs;
3533 ifp = fin->fin_ifp;
3535 srcip = src.s_addr;
3536 sflags = flags & IPN_TCPUDPICMP;
3537 sport = 0;
3538 dport = 0;
3540 switch (p)
3542 case IPPROTO_TCP :
3543 case IPPROTO_UDP :
3544 sport = htons(fin->fin_data[0]);
3545 dport = htons(fin->fin_data[1]);
3546 break;
3547 case IPPROTO_ICMP :
3548 if (flags & IPN_ICMPERR)
3549 sport = fin->fin_data[1];
3550 else
3551 dport = fin->fin_data[1];
3552 break;
3553 default :
3554 break;
3557 if ((flags & SI_WILDP) != 0)
3558 goto find_out_wild_ports;
3560 hv = NAT_HASH_FN(srcip, sport, 0xffffffff);
3561 hv = NAT_HASH_FN(dst.s_addr, hv + dport, ifs->ifs_ipf_nattable_sz);
3562 nat = ifs->ifs_nat_table[0][hv];
3563 for (; nat; nat = nat->nat_hnext[0]) {
3564 if (nat->nat_v != 4)
3565 continue;
3567 if (nat->nat_ifps[1] != NULL) {
3568 if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3569 continue;
3570 } else if (ifp != NULL)
3571 nat->nat_ifps[1] = ifp;
3573 nflags = nat->nat_flags;
3575 if (nat->nat_inip.s_addr == srcip &&
3576 nat->nat_oip.s_addr == dst.s_addr &&
3577 (((p == 0) && (sflags == (nflags & NAT_TCPUDPICMP)))
3578 || (p == nat->nat_p))) {
3579 switch (p)
3581 #if 0
3582 case IPPROTO_GRE :
3583 if (nat->nat_call[1] != fin->fin_data[0])
3584 continue;
3585 break;
3586 #endif
3587 case IPPROTO_TCP :
3588 case IPPROTO_UDP :
3589 if (nat->nat_oport != dport)
3590 continue;
3591 if (nat->nat_inport != sport)
3592 continue;
3593 break;
3594 default :
3595 break;
3598 ipn = nat->nat_ptr;
3599 if ((ipn != NULL) && (nat->nat_aps != NULL))
3600 if (appr_match(fin, nat) != 0)
3601 continue;
3602 return nat;
3607 * So if we didn't find it but there are wildcard members in the hash
3608 * table, go back and look for them. We do this search and update here
3609 * because it is modifying the NAT table and we want to do this only
3610 * for the first packet that matches. The exception, of course, is
3611 * for "dummy" (FI_IGNORE) lookups.
3613 find_out_wild_ports:
3614 if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH))
3615 return NULL;
3616 if (ifs->ifs_nat_stats.ns_wilds == 0)
3617 return NULL;
3619 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3621 hv = NAT_HASH_FN(srcip, 0, 0xffffffff);
3622 hv = NAT_HASH_FN(dst.s_addr, hv, ifs->ifs_ipf_nattable_sz);
3624 WRITE_ENTER(&ifs->ifs_ipf_nat);
3626 nat = ifs->ifs_nat_table[0][hv];
3627 for (; nat; nat = nat->nat_hnext[0]) {
3628 if (nat->nat_v != 4)
3629 continue;
3631 if (nat->nat_ifps[1] != NULL) {
3632 if ((ifp != NULL) && (ifp != nat->nat_ifps[1]))
3633 continue;
3634 } else if (ifp != NULL)
3635 nat->nat_ifps[1] = ifp;
3637 if (nat->nat_p != fin->fin_p)
3638 continue;
3639 if ((nat->nat_inip.s_addr != srcip) ||
3640 (nat->nat_oip.s_addr != dst.s_addr))
3641 continue;
3643 nflags = nat->nat_flags;
3644 if (!(nflags & (NAT_TCPUDP|SI_WILDP)))
3645 continue;
3647 if (nat_wildok(nat, (int)sport, (int)dport, nflags,
3648 NAT_OUTBOUND) == 1) {
3649 if ((fin->fin_flx & FI_IGNORE) != 0)
3650 break;
3651 if ((nflags & SI_CLONE) != 0) {
3652 nat = fr_natclone(fin, nat);
3653 if (nat == NULL)
3654 break;
3655 } else {
3656 MUTEX_ENTER(&ifs->ifs_ipf_nat_new);
3657 ifs->ifs_nat_stats.ns_wilds--;
3658 MUTEX_EXIT(&ifs->ifs_ipf_nat_new);
3660 nat->nat_inport = sport;
3661 nat->nat_oport = dport;
3662 if (nat->nat_outport == 0)
3663 nat->nat_outport = sport;
3664 nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT);
3665 nat_tabmove(nat, ifs);
3666 break;
3670 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3672 return nat;
3676 /* ------------------------------------------------------------------------ */
3677 /* Function: nat_lookupredir */
3678 /* Returns: nat_t* - NULL == no match, */
3679 /* else pointer to matching NAT entry */
3680 /* Parameters: np(I) - pointer to description of packet to find NAT table */
3681 /* entry for. */
3682 /* */
3683 /* Lookup the NAT tables to search for a matching redirect */
3684 /* ------------------------------------------------------------------------ */
3685 nat_t *nat_lookupredir(np, ifs)
3686 natlookup_t *np;
3687 ipf_stack_t *ifs;
3689 fr_info_t fi;
3690 nat_t *nat;
3692 bzero((char *)&fi, sizeof(fi));
3693 if (np->nl_flags & IPN_IN) {
3694 fi.fin_data[0] = ntohs(np->nl_realport);
3695 fi.fin_data[1] = ntohs(np->nl_outport);
3696 } else {
3697 fi.fin_data[0] = ntohs(np->nl_inport);
3698 fi.fin_data[1] = ntohs(np->nl_outport);
3700 if (np->nl_flags & IPN_TCP)
3701 fi.fin_p = IPPROTO_TCP;
3702 else if (np->nl_flags & IPN_UDP)
3703 fi.fin_p = IPPROTO_UDP;
3704 else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY))
3705 fi.fin_p = IPPROTO_ICMP;
3707 fi.fin_ifs = ifs;
3709 * We can do two sorts of lookups:
3710 * - IPN_IN: we have the `real' and `out' address, look for `in'.
3711 * - default: we have the `in' and `out' address, look for `real'.
3713 if (np->nl_flags & IPN_IN) {
3714 if ((nat = nat_inlookup(&fi, np->nl_flags, fi.fin_p,
3715 np->nl_realip, np->nl_outip))) {
3716 np->nl_inip = nat->nat_inip;
3717 np->nl_inport = nat->nat_inport;
3719 } else {
3721 * If nl_inip is non null, this is a lookup based on the real
3722 * ip address. Else, we use the fake.
3724 if ((nat = nat_outlookup(&fi, np->nl_flags, fi.fin_p,
3725 np->nl_inip, np->nl_outip))) {
3727 if ((np->nl_flags & IPN_FINDFORWARD) != 0) {
3728 fr_info_t fin;
3729 bzero((char *)&fin, sizeof(fin));
3730 fin.fin_p = nat->nat_p;
3731 fin.fin_data[0] = ntohs(nat->nat_outport);
3732 fin.fin_data[1] = ntohs(nat->nat_oport);
3733 fin.fin_ifs = ifs;
3734 if (nat_inlookup(&fin, np->nl_flags, fin.fin_p,
3735 nat->nat_outip,
3736 nat->nat_oip) != NULL) {
3737 np->nl_flags &= ~IPN_FINDFORWARD;
3741 np->nl_realip = nat->nat_outip;
3742 np->nl_realport = nat->nat_outport;
3746 return nat;
3750 /* ------------------------------------------------------------------------ */
3751 /* Function: nat_match */
3752 /* Returns: int - 0 == no match, 1 == match */
3753 /* Parameters: fin(I) - pointer to packet information */
3754 /* np(I) - pointer to NAT rule */
3755 /* */
3756 /* Pull the matching of a packet against a NAT rule out of that complex */
3757 /* loop inside fr_checknatin() and lay it out properly in its own function. */
3758 /* ------------------------------------------------------------------------ */
3759 static int nat_match(fin, np)
3760 fr_info_t *fin;
3761 ipnat_t *np;
3763 frtuc_t *ft;
3765 if (fin->fin_v != 4)
3766 return 0;
3768 if (np->in_p && fin->fin_p != np->in_p)
3769 return 0;
3771 if (fin->fin_out) {
3772 if (!(np->in_redir & (NAT_MAP|NAT_MAPBLK)))
3773 return 0;
3774 if (((fin->fin_fi.fi_saddr & np->in_inmsk) != np->in_inip)
3775 ^ ((np->in_flags & IPN_NOTSRC) != 0))
3776 return 0;
3777 if (((fin->fin_fi.fi_daddr & np->in_srcmsk) != np->in_srcip)
3778 ^ ((np->in_flags & IPN_NOTDST) != 0))
3779 return 0;
3780 } else {
3781 if (!(np->in_redir & NAT_REDIRECT))
3782 return 0;
3783 if (((fin->fin_fi.fi_saddr & np->in_srcmsk) != np->in_srcip)
3784 ^ ((np->in_flags & IPN_NOTSRC) != 0))
3785 return 0;
3786 if (((fin->fin_fi.fi_daddr & np->in_outmsk) != np->in_outip)
3787 ^ ((np->in_flags & IPN_NOTDST) != 0))
3788 return 0;
3791 ft = &np->in_tuc;
3792 if (!(fin->fin_flx & FI_TCPUDP) ||
3793 (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) {
3794 if (ft->ftu_scmp || ft->ftu_dcmp)
3795 return 0;
3796 return 1;
3799 return fr_tcpudpchk(fin, ft);
3803 /* ------------------------------------------------------------------------ */
3804 /* Function: nat_update */
3805 /* Returns: Nil */
3806 /* Parameters: fin(I) - pointer to packet information */
3807 /* nat(I) - pointer to NAT structure */
3808 /* np(I) - pointer to NAT rule */
3809 /* Locks: nat_lock */
3810 /* */
3811 /* Updates the lifetime of a NAT table entry for non-TCP packets. Must be */
3812 /* called with fin_rev updated - i.e. after calling nat_proto(). */
3813 /* ------------------------------------------------------------------------ */
3814 void nat_update(fin, nat, np)
3815 fr_info_t *fin;
3816 nat_t *nat;
3817 ipnat_t *np;
3819 ipftq_t *ifq, *ifq2;
3820 ipftqent_t *tqe;
3821 ipf_stack_t *ifs = fin->fin_ifs;
3823 tqe = &nat->nat_tqe;
3824 ifq = tqe->tqe_ifq;
3827 * We allow over-riding of NAT timeouts from NAT rules, even for
3828 * TCP, however, if it is TCP and there is no rule timeout set,
3829 * then do not update the timeout here.
3831 if (np != NULL)
3832 ifq2 = np->in_tqehead[fin->fin_rev];
3833 else
3834 ifq2 = NULL;
3836 if (nat->nat_p == IPPROTO_TCP && ifq2 == NULL) {
3837 (void) fr_tcp_age(&nat->nat_tqe, fin, ifs->ifs_nat_tqb, 0);
3838 } else {
3839 if (ifq2 == NULL) {
3840 if (nat->nat_p == IPPROTO_UDP)
3841 ifq2 = &ifs->ifs_nat_udptq;
3842 else if (nat->nat_p == IPPROTO_ICMP)
3843 ifq2 = &ifs->ifs_nat_icmptq;
3844 else
3845 ifq2 = &ifs->ifs_nat_iptq;
3848 fr_movequeue(tqe, ifq, ifq2, ifs);
3853 /* ------------------------------------------------------------------------ */
3854 /* Function: fr_checknatout */
3855 /* Returns: int - -1 == packet failed NAT checks so block it, */
3856 /* 0 == no packet translation occurred, */
3857 /* 1 == packet was successfully translated. */
3858 /* Parameters: fin(I) - pointer to packet information */
3859 /* passp(I) - pointer to filtering result flags */
3860 /* */
3861 /* Check to see if an outcoming packet should be changed. ICMP packets are */
3862 /* first checked to see if they match an existing entry (if an error), */
3863 /* otherwise a search of the current NAT table is made. If neither results */
3864 /* in a match then a search for a matching NAT rule is made. Create a new */
3865 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */
3866 /* packet header(s) as required. */
3867 /* ------------------------------------------------------------------------ */
3868 int fr_checknatout(fin, passp)
3869 fr_info_t *fin;
3870 u_32_t *passp;
3872 ipnat_t *np = NULL, *npnext;
3873 struct ifnet *ifp, *sifp;
3874 icmphdr_t *icmp = NULL;
3875 tcphdr_t *tcp = NULL;
3876 int rval, natfailed;
3877 u_int nflags = 0;
3878 u_32_t ipa, iph;
3879 int natadd = 1;
3880 frentry_t *fr;
3881 nat_t *nat;
3882 ipf_stack_t *ifs = fin->fin_ifs;
3884 if (ifs->ifs_fr_nat_lock != 0)
3885 return 0;
3886 if (ifs->ifs_nat_stats.ns_rules == 0 && ifs->ifs_nat_instances == NULL)
3887 return 0;
3889 natfailed = 0;
3890 fr = fin->fin_fr;
3891 sifp = fin->fin_ifp;
3892 if ((fr != NULL) && !(fr->fr_flags & FR_DUP) &&
3893 fr->fr_tifs[fin->fin_rev].fd_ifp &&
3894 fr->fr_tifs[fin->fin_rev].fd_ifp != (void *)-1)
3895 fin->fin_ifp = fr->fr_tifs[fin->fin_rev].fd_ifp;
3896 ifp = fin->fin_ifp;
3898 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
3899 switch (fin->fin_p)
3901 case IPPROTO_TCP :
3902 nflags = IPN_TCP;
3903 break;
3904 case IPPROTO_UDP :
3905 nflags = IPN_UDP;
3906 break;
3907 case IPPROTO_ICMP :
3908 icmp = fin->fin_dp;
3911 * This is an incoming packet, so the destination is
3912 * the icmp_id and the source port equals 0
3914 if (nat_icmpquerytype4(icmp->icmp_type))
3915 nflags = IPN_ICMPQUERY;
3916 break;
3917 default :
3918 break;
3921 if ((nflags & IPN_TCPUDP))
3922 tcp = fin->fin_dp;
3925 ipa = fin->fin_saddr;
3927 READ_ENTER(&ifs->ifs_ipf_nat);
3929 if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
3930 (nat = nat_icmperror(fin, &nflags, NAT_OUTBOUND)))
3931 /*EMPTY*/;
3932 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
3933 natadd = 0;
3934 else if ((nat = nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
3935 fin->fin_src, fin->fin_dst))) {
3936 nflags = nat->nat_flags;
3937 } else {
3938 u_32_t hv, msk, nmsk;
3941 * There is no current entry in the nat table for this packet.
3943 * If the packet is a fragment, but not the first fragment,
3944 * then don't do anything. Otherwise, if there is a matching
3945 * nat rule, try to create a new nat entry.
3947 if ((fin->fin_off != 0) && (fin->fin_flx & FI_TCPUDP))
3948 goto nonatfrag;
3950 msk = 0xffffffff;
3951 nmsk = ifs->ifs_nat_masks;
3952 maskloop:
3953 iph = ipa & htonl(msk);
3954 hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_natrules_sz);
3955 for (np = ifs->ifs_nat_rules[hv]; np; np = npnext) {
3956 npnext = np->in_mnext;
3957 if ((np->in_ifps[1] && (np->in_ifps[1] != ifp)))
3958 continue;
3959 if (np->in_v != fin->fin_v)
3960 continue;
3961 if (np->in_p && (np->in_p != fin->fin_p))
3962 continue;
3963 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
3964 continue;
3965 if (np->in_flags & IPN_FILTER) {
3966 if (!nat_match(fin, np))
3967 continue;
3968 } else if ((ipa & np->in_inmsk) != np->in_inip)
3969 continue;
3971 if ((fr != NULL) &&
3972 !fr_matchtag(&np->in_tag, &fr->fr_nattag))
3973 continue;
3975 if (*np->in_plabel != '\0') {
3976 if (((np->in_flags & IPN_FILTER) == 0) &&
3977 (np->in_dport != tcp->th_dport))
3978 continue;
3979 if (appr_ok(fin, tcp, np) == 0)
3980 continue;
3983 ATOMIC_INC32(np->in_use);
3984 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
3985 WRITE_ENTER(&ifs->ifs_ipf_nat);
3986 nat = nat_new(fin, np, NULL, nflags, NAT_OUTBOUND);
3987 if (nat != NULL) {
3988 np->in_use--;
3989 np->in_hits++;
3990 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3991 break;
3993 natfailed = -1;
3994 npnext = np->in_mnext;
3995 fr_ipnatderef(&np, ifs);
3996 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
3998 if ((np == NULL) && (nmsk != 0)) {
3999 while (nmsk) {
4000 msk <<= 1;
4001 if (nmsk & 0x80000000)
4002 break;
4003 nmsk <<= 1;
4005 if (nmsk != 0) {
4006 nmsk <<= 1;
4007 goto maskloop;
4012 nonatfrag:
4013 if (nat != NULL) {
4014 rval = fr_natout(fin, nat, natadd, nflags);
4015 if (rval == 1) {
4016 MUTEX_ENTER(&nat->nat_lock);
4017 nat_update(fin, nat, nat->nat_ptr);
4018 nat->nat_bytes[1] += fin->fin_plen;
4019 nat->nat_pkts[1]++;
4020 fin->fin_pktnum = nat->nat_pkts[1];
4021 MUTEX_EXIT(&nat->nat_lock);
4023 } else
4024 rval = natfailed;
4025 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4027 if (rval == -1) {
4028 if (passp != NULL)
4029 *passp = FR_BLOCK;
4030 fin->fin_flx |= FI_BADNAT;
4032 fin->fin_ifp = sifp;
4033 return rval;
4036 /* ------------------------------------------------------------------------ */
4037 /* Function: fr_natout */
4038 /* Returns: int - -1 == packet failed NAT checks so block it, */
4039 /* 1 == packet was successfully translated. */
4040 /* Parameters: fin(I) - pointer to packet information */
4041 /* nat(I) - pointer to NAT structure */
4042 /* natadd(I) - flag indicating if it is safe to add frag cache */
4043 /* nflags(I) - NAT flags set for this packet */
4044 /* */
4045 /* Translate a packet coming "out" on an interface. */
4046 /* ------------------------------------------------------------------------ */
4047 int fr_natout(fin, nat, natadd, nflags)
4048 fr_info_t *fin;
4049 nat_t *nat;
4050 int natadd;
4051 u_32_t nflags;
4053 icmphdr_t *icmp;
4054 u_short *csump;
4055 u_32_t sumd;
4056 tcphdr_t *tcp;
4057 ipnat_t *np;
4058 int i;
4059 ipf_stack_t *ifs = fin->fin_ifs;
4061 if (fin->fin_v == 6) {
4062 #ifdef USE_INET6
4063 return fr_nat6out(fin, nat, natadd, nflags);
4064 #else
4065 return NULL;
4066 #endif
4069 #if SOLARIS && defined(_KERNEL)
4070 net_handle_t net_data_p = ifs->ifs_ipf_ipv4;
4071 #endif
4073 tcp = NULL;
4074 icmp = NULL;
4075 csump = NULL;
4076 np = nat->nat_ptr;
4078 if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4079 (void) fr_nat_newfrag(fin, 0, nat);
4082 * Fix up checksums, not by recalculating them, but
4083 * simply computing adjustments.
4084 * This is only done for STREAMS based IP implementations where the
4085 * checksum has already been calculated by IP. In all other cases,
4086 * IPFilter is called before the checksum needs calculating so there
4087 * is no call to modify whatever is in the header now.
4089 ASSERT(fin->fin_m != NULL);
4090 if (fin->fin_v == 4 && !NET_IS_HCK_L3_FULL(net_data_p, fin->fin_m)) {
4091 if (nflags == IPN_ICMPERR) {
4092 u_32_t s1, s2;
4094 s1 = LONG_SUM(ntohl(fin->fin_saddr));
4095 s2 = LONG_SUM(ntohl(nat->nat_outip.s_addr));
4096 CALC_SUMD(s1, s2, sumd);
4098 fix_outcksum(&fin->fin_ip->ip_sum, sumd);
4100 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4101 defined(linux) || defined(BRIDGE_IPF)
4102 else {
4104 * Strictly speaking, this isn't necessary on BSD
4105 * kernels because they do checksum calculation after
4106 * this code has run BUT if ipfilter is being used
4107 * to do NAT as a bridge, that code doesn't exist.
4109 if (nat->nat_dir == NAT_OUTBOUND)
4110 fix_outcksum(&fin->fin_ip->ip_sum,
4111 nat->nat_ipsumd);
4112 else
4113 fix_incksum(&fin->fin_ip->ip_sum,
4114 nat->nat_ipsumd);
4116 #endif
4119 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4120 if ((nat->nat_outport != 0) && (nflags & IPN_TCPUDP)) {
4121 tcp = fin->fin_dp;
4123 tcp->th_sport = nat->nat_outport;
4124 fin->fin_data[0] = ntohs(nat->nat_outport);
4127 if ((nat->nat_outport != 0) && (nflags & IPN_ICMPQUERY)) {
4128 icmp = fin->fin_dp;
4129 icmp->icmp_id = nat->nat_outport;
4132 csump = nat_proto(fin, nat, nflags);
4135 fin->fin_ip->ip_src = nat->nat_outip;
4138 * The above comments do not hold for layer 4 (or higher) checksums...
4140 if (csump != NULL && !NET_IS_HCK_L4_FULL(net_data_p, fin->fin_m)) {
4141 if (nflags & IPN_TCPUDP &&
4142 NET_IS_HCK_L4_PART(net_data_p, fin->fin_m))
4143 sumd = nat->nat_sumd[1];
4144 else
4145 sumd = nat->nat_sumd[0];
4147 if (nat->nat_dir == NAT_OUTBOUND)
4148 fix_outcksum(csump, sumd);
4149 else
4150 fix_incksum(csump, sumd);
4152 #ifdef IPFILTER_SYNC
4153 ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4154 #endif
4155 /* ------------------------------------------------------------- */
4156 /* A few quick notes: */
4157 /* Following are test conditions prior to calling the */
4158 /* appr_check routine. */
4159 /* */
4160 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */
4161 /* with a redirect rule, we attempt to match the packet's */
4162 /* source port against in_dport, otherwise we'd compare the */
4163 /* packet's destination. */
4164 /* ------------------------------------------------------------- */
4165 if ((np != NULL) && (np->in_apr != NULL)) {
4166 i = appr_check(fin, nat);
4167 if (i == 0)
4168 i = 1;
4169 } else
4170 i = 1;
4171 ifs->ifs_nat_stats.ns_mapped[1]++;
4172 fin->fin_flx |= FI_NATED;
4173 return i;
4177 /* ------------------------------------------------------------------------ */
4178 /* Function: fr_checknatin */
4179 /* Returns: int - -1 == packet failed NAT checks so block it, */
4180 /* 0 == no packet translation occurred, */
4181 /* 1 == packet was successfully translated. */
4182 /* Parameters: fin(I) - pointer to packet information */
4183 /* passp(I) - pointer to filtering result flags */
4184 /* */
4185 /* Check to see if an incoming packet should be changed. ICMP packets are */
4186 /* first checked to see if they match an existing entry (if an error), */
4187 /* otherwise a search of the current NAT table is made. If neither results */
4188 /* in a match then a search for a matching NAT rule is made. Create a new */
4189 /* NAT entry if a we matched a NAT rule. Lastly, actually change the */
4190 /* packet header(s) as required. */
4191 /* ------------------------------------------------------------------------ */
4192 int fr_checknatin(fin, passp)
4193 fr_info_t *fin;
4194 u_32_t *passp;
4196 u_int nflags, natadd;
4197 ipnat_t *np, *npnext;
4198 int rval, natfailed;
4199 struct ifnet *ifp;
4200 struct in_addr in;
4201 icmphdr_t *icmp;
4202 tcphdr_t *tcp;
4203 u_short dport;
4204 nat_t *nat;
4205 u_32_t iph;
4206 ipf_stack_t *ifs = fin->fin_ifs;
4208 if (ifs->ifs_fr_nat_lock != 0)
4209 return 0;
4210 if (ifs->ifs_nat_stats.ns_rules == 0 && ifs->ifs_nat_instances == NULL)
4211 return 0;
4213 tcp = NULL;
4214 icmp = NULL;
4215 dport = 0;
4216 natadd = 1;
4217 nflags = 0;
4218 natfailed = 0;
4219 ifp = fin->fin_ifp;
4221 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4222 switch (fin->fin_p)
4224 case IPPROTO_TCP :
4225 nflags = IPN_TCP;
4226 break;
4227 case IPPROTO_UDP :
4228 nflags = IPN_UDP;
4229 break;
4230 case IPPROTO_ICMP :
4231 icmp = fin->fin_dp;
4234 * This is an incoming packet, so the destination is
4235 * the icmp_id and the source port equals 0
4237 if (nat_icmpquerytype4(icmp->icmp_type)) {
4238 nflags = IPN_ICMPQUERY;
4239 dport = icmp->icmp_id;
4240 } break;
4241 default :
4242 break;
4245 if ((nflags & IPN_TCPUDP)) {
4246 tcp = fin->fin_dp;
4247 dport = tcp->th_dport;
4251 in = fin->fin_dst;
4253 READ_ENTER(&ifs->ifs_ipf_nat);
4255 if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) &&
4256 (nat = nat_icmperror(fin, &nflags, NAT_INBOUND)))
4257 /*EMPTY*/;
4258 else if ((fin->fin_flx & FI_FRAG) && (nat = fr_nat_knownfrag(fin)))
4259 natadd = 0;
4260 else if ((nat = nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p,
4261 fin->fin_src, in))) {
4262 nflags = nat->nat_flags;
4263 } else {
4264 u_32_t hv, msk, rmsk;
4267 * There is no current entry in the nat table for this packet.
4269 * If the packet is a fragment, but not the first fragment,
4270 * then don't do anything. Otherwise, if there is a matching
4271 * nat rule, try to create a new nat entry.
4273 if ((fin->fin_off != 0) && (fin->fin_flx & FI_TCPUDP))
4274 goto nonatfrag;
4276 rmsk = ifs->ifs_rdr_masks;
4277 msk = 0xffffffff;
4278 maskloop:
4279 iph = in.s_addr & htonl(msk);
4280 hv = NAT_HASH_FN(iph, 0, ifs->ifs_ipf_rdrrules_sz);
4281 for (np = ifs->ifs_rdr_rules[hv]; np; np = npnext) {
4282 npnext = np->in_rnext;
4283 if (np->in_ifps[0] && (np->in_ifps[0] != ifp))
4284 continue;
4285 if (np->in_v != fin->fin_v)
4286 continue;
4287 if (np->in_p && (np->in_p != fin->fin_p))
4288 continue;
4289 if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags))
4290 continue;
4291 if (np->in_flags & IPN_FILTER) {
4292 if (!nat_match(fin, np))
4293 continue;
4294 } else {
4295 if ((in.s_addr & np->in_outmsk) != np->in_outip)
4296 continue;
4297 if (np->in_pmin &&
4298 ((ntohs(np->in_pmax) < ntohs(dport)) ||
4299 (ntohs(dport) < ntohs(np->in_pmin))))
4300 continue;
4303 if (*np->in_plabel != '\0') {
4304 if (!appr_ok(fin, tcp, np)) {
4305 continue;
4309 ATOMIC_INC32(np->in_use);
4310 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4311 WRITE_ENTER(&ifs->ifs_ipf_nat);
4312 nat = nat_new(fin, np, NULL, nflags, NAT_INBOUND);
4313 if (nat != NULL) {
4314 np->in_use--;
4315 np->in_hits++;
4316 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
4317 break;
4319 natfailed = -1;
4320 npnext = np->in_rnext;
4321 fr_ipnatderef(&np, ifs);
4322 MUTEX_DOWNGRADE(&ifs->ifs_ipf_nat);
4325 if ((np == NULL) && (rmsk != 0)) {
4326 while (rmsk) {
4327 msk <<= 1;
4328 if (rmsk & 0x80000000)
4329 break;
4330 rmsk <<= 1;
4332 if (rmsk != 0) {
4333 rmsk <<= 1;
4334 goto maskloop;
4339 nonatfrag:
4340 if (nat != NULL) {
4341 rval = fr_natin(fin, nat, natadd, nflags);
4342 if (rval == 1) {
4343 MUTEX_ENTER(&nat->nat_lock);
4344 nat_update(fin, nat, nat->nat_ptr);
4345 nat->nat_bytes[0] += fin->fin_plen;
4346 nat->nat_pkts[0]++;
4347 fin->fin_pktnum = nat->nat_pkts[0];
4348 MUTEX_EXIT(&nat->nat_lock);
4350 } else
4351 rval = natfailed;
4352 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4354 if (rval == -1) {
4355 if (passp != NULL)
4356 *passp = FR_BLOCK;
4357 fin->fin_flx |= FI_BADNAT;
4359 return rval;
4363 /* ------------------------------------------------------------------------ */
4364 /* Function: fr_natin */
4365 /* Returns: int - -1 == packet failed NAT checks so block it, */
4366 /* 1 == packet was successfully translated. */
4367 /* Parameters: fin(I) - pointer to packet information */
4368 /* nat(I) - pointer to NAT structure */
4369 /* natadd(I) - flag indicating if it is safe to add frag cache */
4370 /* nflags(I) - NAT flags set for this packet */
4371 /* Locks Held: ipf_nat (READ) */
4372 /* */
4373 /* Translate a packet coming "in" on an interface. */
4374 /* ------------------------------------------------------------------------ */
4375 int fr_natin(fin, nat, natadd, nflags)
4376 fr_info_t *fin;
4377 nat_t *nat;
4378 int natadd;
4379 u_32_t nflags;
4381 icmphdr_t *icmp;
4382 u_short *csump;
4383 tcphdr_t *tcp;
4384 ipnat_t *np;
4385 int i;
4386 ipf_stack_t *ifs = fin->fin_ifs;
4388 if (fin->fin_v == 6) {
4389 #ifdef USE_INET6
4390 return fr_nat6in(fin, nat, natadd, nflags);
4391 #else
4392 return NULL;
4393 #endif
4396 #if SOLARIS && defined(_KERNEL)
4397 net_handle_t net_data_p = ifs->ifs_ipf_ipv4;
4398 #endif
4400 tcp = NULL;
4401 csump = NULL;
4402 np = nat->nat_ptr;
4403 fin->fin_fr = nat->nat_fr;
4405 if ((natadd != 0) && (fin->fin_flx & FI_FRAG))
4406 (void) fr_nat_newfrag(fin, 0, nat);
4408 if (np != NULL) {
4410 /* ------------------------------------------------------------- */
4411 /* A few quick notes: */
4412 /* Following are test conditions prior to calling the */
4413 /* appr_check routine. */
4414 /* */
4415 /* A NULL tcp indicates a non TCP/UDP packet. When dealing */
4416 /* with a map rule, we attempt to match the packet's */
4417 /* source port against in_dport, otherwise we'd compare the */
4418 /* packet's destination. */
4419 /* ------------------------------------------------------------- */
4420 if (np->in_apr != NULL) {
4421 i = appr_check(fin, nat);
4422 if (i == -1) {
4423 return -1;
4428 #ifdef IPFILTER_SYNC
4429 ipfsync_update(SMC_NAT, fin, nat->nat_sync);
4430 #endif
4432 fin->fin_ip->ip_dst = nat->nat_inip;
4433 fin->fin_fi.fi_daddr = nat->nat_inip.s_addr;
4434 if (nflags & IPN_TCPUDP)
4435 tcp = fin->fin_dp;
4438 * Fix up checksums, not by recalculating them, but
4439 * simply computing adjustments.
4440 * Why only do this for some platforms on inbound packets ?
4441 * Because for those that it is done, IP processing is yet to happen
4442 * and so the IPv4 header checksum has not yet been evaluated.
4443 * Perhaps it should always be done for the benefit of things like
4444 * fast forwarding (so that it doesn't need to be recomputed) but with
4445 * header checksum offloading, perhaps it is a moot point.
4447 #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \
4448 defined(__osf__) || defined(linux)
4449 if (nat->nat_dir == NAT_OUTBOUND)
4450 fix_incksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4451 else
4452 fix_outcksum(&fin->fin_ip->ip_sum, nat->nat_ipsumd);
4453 #endif
4455 if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) {
4456 if ((nat->nat_inport != 0) && (nflags & IPN_TCPUDP)) {
4457 tcp->th_dport = nat->nat_inport;
4458 fin->fin_data[1] = ntohs(nat->nat_inport);
4462 if ((nat->nat_inport != 0) && (nflags & IPN_ICMPQUERY)) {
4463 icmp = fin->fin_dp;
4465 icmp->icmp_id = nat->nat_inport;
4468 csump = nat_proto(fin, nat, nflags);
4472 * In case they are being forwarded, inbound packets always need to have
4473 * their checksum adjusted even if hardware checksum validation said OK.
4475 if (csump != NULL) {
4476 if (nat->nat_dir == NAT_OUTBOUND)
4477 fix_incksum(csump, nat->nat_sumd[0]);
4478 else
4479 fix_outcksum(csump, nat->nat_sumd[0]);
4482 #if SOLARIS && defined(_KERNEL)
4483 if (nflags & IPN_TCPUDP &&
4484 NET_IS_HCK_L4_PART(net_data_p, fin->fin_m)) {
4486 * Need to adjust the partial checksum result stored in
4487 * db_cksum16, which will be used for validation in IP.
4488 * See IP_CKSUM_RECV().
4489 * Adjustment data should be the inverse of the IP address
4490 * changes, because db_cksum16 is supposed to be the complement
4491 * of the pesudo header.
4493 csump = &fin->fin_m->b_datap->db_cksum16;
4494 if (nat->nat_dir == NAT_OUTBOUND)
4495 fix_outcksum(csump, nat->nat_sumd[1]);
4496 else
4497 fix_incksum(csump, nat->nat_sumd[1]);
4499 #endif
4501 ifs->ifs_nat_stats.ns_mapped[0]++;
4502 fin->fin_flx |= FI_NATED;
4503 if (np != NULL && np->in_tag.ipt_num[0] != 0)
4504 fin->fin_nattag = &np->in_tag;
4505 return 1;
4509 /* ------------------------------------------------------------------------ */
4510 /* Function: nat_proto */
4511 /* Returns: u_short* - pointer to transport header checksum to update, */
4512 /* NULL if the transport protocol is not recognised */
4513 /* as needing a checksum update. */
4514 /* Parameters: fin(I) - pointer to packet information */
4515 /* nat(I) - pointer to NAT structure */
4516 /* nflags(I) - NAT flags set for this packet */
4517 /* */
4518 /* Return the pointer to the checksum field for each protocol so understood.*/
4519 /* If support for making other changes to a protocol header is required, */
4520 /* that is not strictly 'address' translation, such as clamping the MSS in */
4521 /* TCP down to a specific value, then do it from here. */
4522 /* ------------------------------------------------------------------------ */
4523 u_short *nat_proto(fin, nat, nflags)
4524 fr_info_t *fin;
4525 nat_t *nat;
4526 u_int nflags;
4528 icmphdr_t *icmp;
4529 struct icmp6_hdr *icmp6;
4530 u_short *csump;
4531 tcphdr_t *tcp;
4532 udphdr_t *udp;
4534 csump = NULL;
4535 if (fin->fin_out == 0) {
4536 fin->fin_rev = (nat->nat_dir == NAT_OUTBOUND);
4537 } else {
4538 fin->fin_rev = (nat->nat_dir == NAT_INBOUND);
4541 switch (fin->fin_p)
4543 case IPPROTO_TCP :
4544 tcp = fin->fin_dp;
4546 csump = &tcp->th_sum;
4549 * Do a MSS CLAMPING on a SYN packet,
4550 * only deal IPv4 for now.
4552 if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0)
4553 nat_mssclamp(tcp, nat->nat_mssclamp, csump);
4555 break;
4557 case IPPROTO_UDP :
4558 udp = fin->fin_dp;
4560 if (udp->uh_sum)
4561 csump = &udp->uh_sum;
4562 break;
4564 case IPPROTO_ICMP :
4565 icmp = fin->fin_dp;
4567 if ((nflags & IPN_ICMPQUERY) != 0) {
4568 if (icmp->icmp_cksum != 0)
4569 csump = &icmp->icmp_cksum;
4571 break;
4573 case IPPROTO_ICMPV6 :
4574 icmp6 = fin->fin_dp;
4576 if ((nflags & IPN_ICMPQUERY) != 0) {
4577 if (icmp6->icmp6_cksum != 0)
4578 csump = &icmp6->icmp6_cksum;
4580 break;
4582 return csump;
4586 /* ------------------------------------------------------------------------ */
4587 /* Function: fr_natunload */
4588 /* Returns: Nil */
4589 /* Parameters: ifs - ipf stack instance */
4590 /* */
4591 /* Free all memory used by NAT structures allocated at runtime. */
4592 /* ------------------------------------------------------------------------ */
4593 void fr_natunload(ifs)
4594 ipf_stack_t *ifs;
4596 ipftq_t *ifq, *ifqnext;
4598 (void) nat_clearlist(ifs);
4599 (void) nat_flushtable(FLUSH_TABLE_ALL, ifs);
4602 * Proxy timeout queues are not cleaned here because although they
4603 * exist on the NAT list, appr_unload is called after fr_natunload
4604 * and the proxies actually are responsible for them being created.
4605 * Should the proxy timeouts have their own list? There's no real
4606 * justification as this is the only complication.
4608 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4609 ifqnext = ifq->ifq_next;
4610 if (((ifq->ifq_flags & IFQF_PROXY) == 0) &&
4611 (fr_deletetimeoutqueue(ifq) == 0))
4612 fr_freetimeoutqueue(ifq, ifs);
4615 if (ifs->ifs_nat_table[0] != NULL) {
4616 KFREES(ifs->ifs_nat_table[0],
4617 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4618 ifs->ifs_nat_table[0] = NULL;
4620 if (ifs->ifs_nat_table[1] != NULL) {
4621 KFREES(ifs->ifs_nat_table[1],
4622 sizeof(nat_t *) * ifs->ifs_ipf_nattable_sz);
4623 ifs->ifs_nat_table[1] = NULL;
4625 if (ifs->ifs_nat_rules != NULL) {
4626 KFREES(ifs->ifs_nat_rules,
4627 sizeof(ipnat_t *) * ifs->ifs_ipf_natrules_sz);
4628 ifs->ifs_nat_rules = NULL;
4630 if (ifs->ifs_rdr_rules != NULL) {
4631 KFREES(ifs->ifs_rdr_rules,
4632 sizeof(ipnat_t *) * ifs->ifs_ipf_rdrrules_sz);
4633 ifs->ifs_rdr_rules = NULL;
4635 if (ifs->ifs_maptable != NULL) {
4636 KFREES(ifs->ifs_maptable,
4637 sizeof(hostmap_t *) * ifs->ifs_ipf_hostmap_sz);
4638 ifs->ifs_maptable = NULL;
4640 if (ifs->ifs_nat_stats.ns_bucketlen[0] != NULL) {
4641 KFREES(ifs->ifs_nat_stats.ns_bucketlen[0],
4642 sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4643 ifs->ifs_nat_stats.ns_bucketlen[0] = NULL;
4645 if (ifs->ifs_nat_stats.ns_bucketlen[1] != NULL) {
4646 KFREES(ifs->ifs_nat_stats.ns_bucketlen[1],
4647 sizeof(u_long *) * ifs->ifs_ipf_nattable_sz);
4648 ifs->ifs_nat_stats.ns_bucketlen[1] = NULL;
4651 if (ifs->ifs_fr_nat_maxbucket_reset == 1)
4652 ifs->ifs_fr_nat_maxbucket = 0;
4654 if (ifs->ifs_fr_nat_init == 1) {
4655 ifs->ifs_fr_nat_init = 0;
4656 fr_sttab_destroy(ifs->ifs_nat_tqb);
4658 RW_DESTROY(&ifs->ifs_ipf_natfrag);
4659 RW_DESTROY(&ifs->ifs_ipf_nat);
4661 MUTEX_DESTROY(&ifs->ifs_ipf_nat_new);
4662 MUTEX_DESTROY(&ifs->ifs_ipf_natio);
4664 MUTEX_DESTROY(&ifs->ifs_nat_udptq.ifq_lock);
4665 MUTEX_DESTROY(&ifs->ifs_nat_icmptq.ifq_lock);
4666 MUTEX_DESTROY(&ifs->ifs_nat_iptq.ifq_lock);
4671 /* ------------------------------------------------------------------------ */
4672 /* Function: fr_natexpire */
4673 /* Returns: Nil */
4674 /* Parameters: ifs - ipf stack instance */
4675 /* */
4676 /* Check all of the timeout queues for entries at the top which need to be */
4677 /* expired. */
4678 /* ------------------------------------------------------------------------ */
4679 void fr_natexpire(ifs)
4680 ipf_stack_t *ifs;
4682 ipftq_t *ifq, *ifqnext;
4683 ipftqent_t *tqe, *tqn;
4684 int i;
4685 SPL_INT(s);
4687 SPL_NET(s);
4688 WRITE_ENTER(&ifs->ifs_ipf_nat);
4689 for (ifq = ifs->ifs_nat_tqb, i = 0; ifq != NULL; ifq = ifq->ifq_next) {
4690 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4691 if (tqe->tqe_die > ifs->ifs_fr_ticks)
4692 break;
4693 tqn = tqe->tqe_next;
4694 (void) nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4698 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4699 ifqnext = ifq->ifq_next;
4701 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) {
4702 if (tqe->tqe_die > ifs->ifs_fr_ticks)
4703 break;
4704 tqn = tqe->tqe_next;
4705 (void) nat_delete(tqe->tqe_parent, NL_EXPIRE, ifs);
4709 for (ifq = ifs->ifs_nat_utqe; ifq != NULL; ifq = ifqnext) {
4710 ifqnext = ifq->ifq_next;
4712 if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
4713 (ifq->ifq_ref == 0)) {
4714 fr_freetimeoutqueue(ifq, ifs);
4718 if (ifs->ifs_nat_doflush != 0) {
4719 (void) nat_flushtable(FLUSH_TABLE_EXTRA, ifs);
4720 ifs->ifs_nat_doflush = 0;
4723 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4724 SPL_X(s);
4728 /* ------------------------------------------------------------------------ */
4729 /* Function: fr_nataddrsync */
4730 /* Returns: Nil */
4731 /* Parameters: ifp(I) - pointer to network interface */
4732 /* addr(I) - pointer to new network address */
4733 /* */
4734 /* Walk through all of the currently active NAT sessions, looking for those */
4735 /* which need to have their translated address updated (where the interface */
4736 /* matches the one passed in) and change it, recalculating the checksum sum */
4737 /* difference too. */
4738 /* ------------------------------------------------------------------------ */
4739 void fr_nataddrsync(v, ifp, addr, ifs)
4740 int v;
4741 void *ifp;
4742 void *addr;
4743 ipf_stack_t *ifs;
4745 u_32_t sum1, sum2, sumd;
4746 nat_t *nat;
4747 ipnat_t *np;
4748 SPL_INT(s);
4750 if (ifs->ifs_fr_running <= 0)
4751 return;
4753 SPL_NET(s);
4754 WRITE_ENTER(&ifs->ifs_ipf_nat);
4756 if (ifs->ifs_fr_running <= 0) {
4757 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4758 return;
4762 * Change IP addresses for NAT sessions for any protocol except TCP
4763 * since it will break the TCP connection anyway. The only rules
4764 * which will get changed are those which are "map ... -> 0/32",
4765 * where the rule specifies the address is taken from the interface.
4767 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4768 if (addr != NULL) {
4769 if (((ifp != NULL) && ifp != (nat->nat_ifps[0])) ||
4770 ((nat->nat_flags & IPN_TCP) != 0))
4771 continue;
4772 if ((np = nat->nat_ptr) == NULL)
4773 continue;
4774 if (v == 4 && np->in_v == 4) {
4775 if (np->in_nip || np->in_outmsk != 0xffffffff)
4776 continue;
4778 * Change the map-to address to be the same as
4779 * the new one.
4781 sum1 = nat->nat_outip.s_addr;
4782 nat->nat_outip = *(struct in_addr *)addr;
4783 sum2 = nat->nat_outip.s_addr;
4784 } else if (v == 6 && np->in_v == 6) {
4785 if (!IP6_ISZERO(&np->in_next6.in6) ||
4786 !IP6_ISONES(&np->in_out[1].in6))
4787 continue;
4789 * Change the map-to address to be the same as
4790 * the new one.
4792 nat->nat_outip6.in6 = *(struct in6_addr *)addr;
4793 } else
4794 continue;
4796 } else if (((ifp == NULL) || (ifp == nat->nat_ifps[0])) &&
4797 !(nat->nat_flags & IPN_TCP) && (np = nat->nat_ptr)) {
4798 if (np->in_v == 4 && (v == 4 || v == 0)) {
4799 struct in_addr in;
4800 if (np->in_outmsk != 0xffffffff || np->in_nip)
4801 continue;
4803 * Change the map-to address to be the same as
4804 * the new one.
4806 sum1 = nat->nat_outip.s_addr;
4807 if (fr_ifpaddr(4, FRI_NORMAL, nat->nat_ifps[0],
4808 &in, NULL, ifs) != -1)
4809 nat->nat_outip = in;
4810 sum2 = nat->nat_outip.s_addr;
4811 } else if (np->in_v == 6 && (v == 6 || v == 0)) {
4812 struct in6_addr in6;
4813 if (!IP6_ISZERO(&np->in_next6.in6) ||
4814 !IP6_ISONES(&np->in_out[1].in6))
4815 continue;
4817 * Change the map-to address to be the same as
4818 * the new one.
4820 if (fr_ifpaddr(6, FRI_NORMAL, nat->nat_ifps[0],
4821 (void *)&in6, NULL, ifs) != -1)
4822 nat->nat_outip6.in6 = in6;
4823 } else
4824 continue;
4825 } else {
4826 continue;
4829 if (sum1 == sum2)
4830 continue;
4832 * Readjust the checksum adjustment to take into
4833 * account the new IP#.
4835 CALC_SUMD(sum1, sum2, sumd);
4836 /* XXX - dont change for TCP when solaris does
4837 * hardware checksumming.
4839 sumd += nat->nat_sumd[0];
4840 nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16);
4841 nat->nat_sumd[1] = nat->nat_sumd[0];
4844 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4845 SPL_X(s);
4849 /* ------------------------------------------------------------------------ */
4850 /* Function: fr_natifpsync */
4851 /* Returns: Nil */
4852 /* Parameters: action(I) - how we are syncing */
4853 /* ifp(I) - pointer to network interface */
4854 /* name(I) - name of interface to sync to */
4855 /* */
4856 /* This function is used to resync the mapping of interface names and their */
4857 /* respective 'pointers'. For "action == IPFSYNC_RESYNC", resync all */
4858 /* interfaces by doing a new lookup of name to 'pointer'. For "action == */
4859 /* IPFSYNC_NEWIFP", treat ifp as the new pointer value associated with */
4860 /* "name" and for "action == IPFSYNC_OLDIFP", ifp is a pointer for which */
4861 /* there is no longer any interface associated with it. */
4862 /* ------------------------------------------------------------------------ */
4863 void fr_natifpsync(action, v, ifp, name, ifs)
4864 int action, v;
4865 void *ifp;
4866 char *name;
4867 ipf_stack_t *ifs;
4869 #if defined(_KERNEL) && !defined(MENTAT) && defined(USE_SPL)
4870 int s;
4871 #endif
4872 nat_t *nat;
4873 ipnat_t *n;
4874 int nv;
4876 if (ifs->ifs_fr_running <= 0)
4877 return;
4879 SPL_NET(s);
4880 WRITE_ENTER(&ifs->ifs_ipf_nat);
4882 if (ifs->ifs_fr_running <= 0) {
4883 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4884 return;
4887 switch (action)
4889 case IPFSYNC_RESYNC :
4890 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4891 nv = (v == 0) ? nat->nat_v : v;
4892 if (nat->nat_v != nv)
4893 continue;
4894 if ((ifp == nat->nat_ifps[0]) ||
4895 (nat->nat_ifps[0] == (void *)-1)) {
4896 nat->nat_ifps[0] =
4897 fr_resolvenic(nat->nat_ifnames[0], nv, ifs);
4900 if ((ifp == nat->nat_ifps[1]) ||
4901 (nat->nat_ifps[1] == (void *)-1)) {
4902 nat->nat_ifps[1] =
4903 fr_resolvenic(nat->nat_ifnames[1], nv, ifs);
4907 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4908 nv = (v == 0) ? (int)n->in_v : v;
4909 if ((int)n->in_v != nv)
4910 continue;
4911 if (n->in_ifps[0] == ifp ||
4912 n->in_ifps[0] == (void *)-1) {
4913 n->in_ifps[0] =
4914 fr_resolvenic(n->in_ifnames[0], nv, ifs);
4916 if (n->in_ifps[1] == ifp ||
4917 n->in_ifps[1] == (void *)-1) {
4918 n->in_ifps[1] =
4919 fr_resolvenic(n->in_ifnames[1], nv, ifs);
4922 break;
4923 case IPFSYNC_NEWIFP :
4924 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4925 if (nat->nat_v != v)
4926 continue;
4927 if (!strncmp(name, nat->nat_ifnames[0],
4928 sizeof(nat->nat_ifnames[0])))
4929 nat->nat_ifps[0] = ifp;
4930 if (!strncmp(name, nat->nat_ifnames[1],
4931 sizeof(nat->nat_ifnames[1])))
4932 nat->nat_ifps[1] = ifp;
4934 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4935 if ((int)n->in_v != v)
4936 continue;
4937 if (!strncmp(name, n->in_ifnames[0],
4938 sizeof(n->in_ifnames[0])))
4939 n->in_ifps[0] = ifp;
4940 if (!strncmp(name, n->in_ifnames[1],
4941 sizeof(n->in_ifnames[1])))
4942 n->in_ifps[1] = ifp;
4944 break;
4945 case IPFSYNC_OLDIFP :
4946 for (nat = ifs->ifs_nat_instances; nat; nat = nat->nat_next) {
4947 if (nat->nat_v != v)
4948 continue;
4949 if (ifp == nat->nat_ifps[0])
4950 nat->nat_ifps[0] = (void *)-1;
4951 if (ifp == nat->nat_ifps[1])
4952 nat->nat_ifps[1] = (void *)-1;
4954 for (n = ifs->ifs_nat_list; (n != NULL); n = n->in_next) {
4955 if ((int)n->in_v != v)
4956 continue;
4957 if (n->in_ifps[0] == ifp)
4958 n->in_ifps[0] = (void *)-1;
4959 if (n->in_ifps[1] == ifp)
4960 n->in_ifps[1] = (void *)-1;
4962 break;
4964 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
4965 SPL_X(s);
4968 #if SOLARIS2 >= 10
4969 /* ------------------------------------------------------------------------ */
4970 /* Function: fr_natifindexsync */
4971 /* Returns: void */
4972 /* Parameters: ifp - interface, which is being sync'd */
4973 /* newifp - new ifindex value for interface */
4974 /* ifs - IPF's stack */
4975 /* */
4976 /* Write Locks: assumes ipf_mutex is locked */
4977 /* */
4978 /* Updates all interface index references in NAT rules and NAT entries. */
4979 /* the index, which is about to be updated must match ifp value. */
4980 /* ------------------------------------------------------------------------ */
4981 void fr_natifindexsync(ifp, newifp, ifs)
4982 void *ifp;
4983 void *newifp;
4984 ipf_stack_t *ifs;
4986 nat_t *nat;
4987 ipnat_t *n;
4989 WRITE_ENTER(&ifs->ifs_ipf_nat);
4991 for (nat = ifs->ifs_nat_instances; nat != NULL; nat = nat->nat_next) {
4992 if (ifp == nat->nat_ifps[0])
4993 nat->nat_ifps[0] = newifp;
4995 if (ifp == nat->nat_ifps[1])
4996 nat->nat_ifps[1] = newifp;
4999 for (n = ifs->ifs_nat_list; n != NULL; n = n->in_next) {
5000 if (ifp == n->in_ifps[0])
5001 n->in_ifps[0] = newifp;
5003 if (ifp == n->in_ifps[1])
5004 n->in_ifps[1] = newifp;
5007 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5009 #endif
5011 /* ------------------------------------------------------------------------ */
5012 /* Function: nat_icmpquerytype4 */
5013 /* Returns: int - 1 == success, 0 == failure */
5014 /* Parameters: icmptype(I) - ICMP type number */
5015 /* */
5016 /* Tests to see if the ICMP type number passed is a query/response type or */
5017 /* not. */
5018 /* ------------------------------------------------------------------------ */
5019 static INLINE int nat_icmpquerytype4(icmptype)
5020 int icmptype;
5024 * For the ICMP query NAT code, it is essential that both the query
5025 * and the reply match on the NAT rule. Because the NAT structure
5026 * does not keep track of the icmptype, and a single NAT structure
5027 * is used for all icmp types with the same src, dest and id, we
5028 * simply define the replies as queries as well. The funny thing is,
5029 * altough it seems silly to call a reply a query, this is exactly
5030 * as it is defined in the IPv4 specification
5033 switch (icmptype)
5036 case ICMP_ECHOREPLY:
5037 case ICMP_ECHO:
5038 /* route aedvertisement/solliciation is currently unsupported: */
5039 /* it would require rewriting the ICMP data section */
5040 case ICMP_TSTAMP:
5041 case ICMP_TSTAMPREPLY:
5042 case ICMP_IREQ:
5043 case ICMP_IREQREPLY:
5044 case ICMP_MASKREQ:
5045 case ICMP_MASKREPLY:
5046 return 1;
5047 default:
5048 return 0;
5053 /* ------------------------------------------------------------------------ */
5054 /* Function: nat_log */
5055 /* Returns: Nil */
5056 /* Parameters: nat(I) - pointer to NAT structure */
5057 /* type(I) - type of log entry to create */
5058 /* */
5059 /* Creates a NAT log entry. */
5060 /* ------------------------------------------------------------------------ */
5061 void nat_log(nat, type, ifs)
5062 struct nat *nat;
5063 u_int type;
5064 ipf_stack_t *ifs;
5066 #ifdef IPFILTER_LOG
5067 # ifndef LARGE_NAT
5068 struct ipnat *np;
5069 int rulen;
5070 # endif
5071 struct natlog natl;
5072 void *items[1];
5073 size_t sizes[1];
5074 int types[1];
5076 natl.nlg_inip = nat->nat_inip6;
5077 natl.nlg_outip = nat->nat_outip6;
5078 natl.nlg_origip = nat->nat_oip6;
5079 natl.nlg_bytes[0] = nat->nat_bytes[0];
5080 natl.nlg_bytes[1] = nat->nat_bytes[1];
5081 natl.nlg_pkts[0] = nat->nat_pkts[0];
5082 natl.nlg_pkts[1] = nat->nat_pkts[1];
5083 natl.nlg_origport = nat->nat_oport;
5084 natl.nlg_inport = nat->nat_inport;
5085 natl.nlg_outport = nat->nat_outport;
5086 natl.nlg_p = nat->nat_p;
5087 natl.nlg_type = type;
5088 natl.nlg_rule = -1;
5089 natl.nlg_v = nat->nat_v;
5090 # ifndef LARGE_NAT
5091 if (nat->nat_ptr != NULL) {
5092 for (rulen = 0, np = ifs->ifs_nat_list; np;
5093 np = np->in_next, rulen++)
5094 if (np == nat->nat_ptr) {
5095 natl.nlg_rule = rulen;
5096 break;
5099 # endif
5100 items[0] = &natl;
5101 sizes[0] = sizeof(natl);
5102 types[0] = 0;
5104 (void) ipllog(IPL_LOGNAT, NULL, items, sizes, types, 1, ifs);
5105 #endif
5109 #if defined(__OpenBSD__)
5110 /* ------------------------------------------------------------------------ */
5111 /* Function: nat_ifdetach */
5112 /* Returns: Nil */
5113 /* Parameters: ifp(I) - pointer to network interface */
5114 /* */
5115 /* Compatibility interface for OpenBSD to trigger the correct updating of */
5116 /* interface references within IPFilter. */
5117 /* ------------------------------------------------------------------------ */
5118 void nat_ifdetach(ifp, ifs)
5119 void *ifp;
5120 ipf_stack_t *ifs;
5122 frsync(ifp, ifs);
5123 return;
5125 #endif
5128 /* ------------------------------------------------------------------------ */
5129 /* Function: fr_ipnatderef */
5130 /* Returns: Nil */
5131 /* Parameters: inp(I) - pointer to pointer to NAT rule */
5132 /* Write Locks: ipf_nat */
5133 /* */
5134 /* ------------------------------------------------------------------------ */
5135 void fr_ipnatderef(inp, ifs)
5136 ipnat_t **inp;
5137 ipf_stack_t *ifs;
5139 ipnat_t *in;
5141 in = *inp;
5142 *inp = NULL;
5143 in->in_use--;
5144 if (in->in_use == 0 && (in->in_flags & IPN_DELETE)) {
5145 if (in->in_apr)
5146 appr_free(in->in_apr);
5147 KFREE(in);
5148 ifs->ifs_nat_stats.ns_rules--;
5149 #ifdef notdef
5150 #if SOLARIS
5151 if (ifs->ifs_nat_stats.ns_rules == 0)
5152 ifs->ifs_pfil_delayed_copy = 1;
5153 #endif
5154 #endif
5159 /* ------------------------------------------------------------------------ */
5160 /* Function: fr_natderef */
5161 /* Returns: Nil */
5162 /* Parameters: natp - pointer to pointer to NAT table entry */
5163 /* ifs - ipf stack instance */
5164 /* */
5165 /* Decrement the reference counter for this NAT table entry and free it if */
5166 /* there are no more things using it. */
5167 /* */
5168 /* IF nat_ref == 1 when this function is called, then we have an orphan nat */
5169 /* structure *because* it only gets called on paths _after_ nat_ref has been*/
5170 /* incremented. If nat_ref == 1 then we shouldn't decrement it here */
5171 /* because nat_delete() will do that and send nat_ref to -1. */
5172 /* */
5173 /* Holding the lock on nat_lock is required to serialise nat_delete() being */
5174 /* called from a NAT flush ioctl with a deref happening because of a packet.*/
5175 /* ------------------------------------------------------------------------ */
5176 void fr_natderef(natp, ifs)
5177 nat_t **natp;
5178 ipf_stack_t *ifs;
5180 nat_t *nat;
5182 nat = *natp;
5183 *natp = NULL;
5185 MUTEX_ENTER(&nat->nat_lock);
5186 if (nat->nat_ref > 1) {
5187 nat->nat_ref--;
5188 MUTEX_EXIT(&nat->nat_lock);
5189 return;
5191 MUTEX_EXIT(&nat->nat_lock);
5193 WRITE_ENTER(&ifs->ifs_ipf_nat);
5194 (void) nat_delete(nat, NL_EXPIRE, ifs);
5195 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5199 /* ------------------------------------------------------------------------ */
5200 /* Function: fr_natclone */
5201 /* Returns: ipstate_t* - NULL == cloning failed, */
5202 /* else pointer to new NAT structure */
5203 /* Parameters: fin(I) - pointer to packet information */
5204 /* nat(I) - pointer to master NAT structure */
5205 /* Write Lock: ipf_nat */
5206 /* */
5207 /* Create a "duplicate" NAT table entry from the master. */
5208 /* ------------------------------------------------------------------------ */
5209 nat_t *fr_natclone(fin, nat)
5210 fr_info_t *fin;
5211 nat_t *nat;
5213 frentry_t *fr;
5214 nat_t *clone;
5215 ipnat_t *np;
5216 ipf_stack_t *ifs = fin->fin_ifs;
5219 * Trigger automatic call to nat_flushtable() if the
5220 * table has reached capcity specified by hi watermark.
5222 if (NAT_TAB_WATER_LEVEL(ifs) > ifs->ifs_nat_flush_level_hi)
5223 ifs->ifs_nat_doflush = 1;
5226 * If automatic flushing did not do its job, and the table
5227 * has filled up, don't try to create a new entry.
5229 if (ifs->ifs_nat_stats.ns_inuse >= ifs->ifs_ipf_nattable_max) {
5230 ifs->ifs_nat_stats.ns_memfail++;
5231 return NULL;
5234 KMALLOC(clone, nat_t *);
5235 if (clone == NULL)
5236 return NULL;
5237 bcopy((char *)nat, (char *)clone, sizeof(*clone));
5239 MUTEX_NUKE(&clone->nat_lock);
5241 clone->nat_aps = NULL;
5243 * Initialize all these so that nat_delete() doesn't cause a crash.
5245 clone->nat_tqe.tqe_pnext = NULL;
5246 clone->nat_tqe.tqe_next = NULL;
5247 clone->nat_tqe.tqe_ifq = NULL;
5248 clone->nat_tqe.tqe_parent = clone;
5250 clone->nat_flags &= ~SI_CLONE;
5251 clone->nat_flags |= SI_CLONED;
5253 if (clone->nat_hm)
5254 clone->nat_hm->hm_ref++;
5256 if (nat_insert(clone, fin->fin_rev, ifs) == -1) {
5257 KFREE(clone);
5258 return NULL;
5260 np = clone->nat_ptr;
5261 if (np != NULL) {
5262 if (ifs->ifs_nat_logging)
5263 nat_log(clone, (u_int)np->in_redir, ifs);
5264 np->in_use++;
5266 fr = clone->nat_fr;
5267 if (fr != NULL) {
5268 MUTEX_ENTER(&fr->fr_lock);
5269 fr->fr_ref++;
5270 MUTEX_EXIT(&fr->fr_lock);
5274 * Because the clone is created outside the normal loop of things and
5275 * TCP has special needs in terms of state, initialise the timeout
5276 * state of the new NAT from here.
5278 if (clone->nat_p == IPPROTO_TCP) {
5279 (void) fr_tcp_age(&clone->nat_tqe, fin, ifs->ifs_nat_tqb,
5280 clone->nat_flags);
5282 #ifdef IPFILTER_SYNC
5283 clone->nat_sync = ipfsync_new(SMC_NAT, fin, clone);
5284 #endif
5285 if (ifs->ifs_nat_logging)
5286 nat_log(clone, NL_CLONE, ifs);
5287 return clone;
5291 /* ------------------------------------------------------------------------ */
5292 /* Function: nat_wildok */
5293 /* Returns: int - 1 == packet's ports match wildcards */
5294 /* 0 == packet's ports don't match wildcards */
5295 /* Parameters: nat(I) - NAT entry */
5296 /* sport(I) - source port */
5297 /* dport(I) - destination port */
5298 /* flags(I) - wildcard flags */
5299 /* dir(I) - packet direction */
5300 /* */
5301 /* Use NAT entry and packet direction to determine which combination of */
5302 /* wildcard flags should be used. */
5303 /* ------------------------------------------------------------------------ */
5304 int nat_wildok(nat, sport, dport, flags, dir)
5305 nat_t *nat;
5306 int sport;
5307 int dport;
5308 int flags;
5309 int dir;
5312 * When called by dir is set to
5313 * nat_inlookup NAT_INBOUND (0)
5314 * nat_outlookup NAT_OUTBOUND (1)
5316 * We simply combine the packet's direction in dir with the original
5317 * "intended" direction of that NAT entry in nat->nat_dir to decide
5318 * which combination of wildcard flags to allow.
5321 switch ((dir << 1) | nat->nat_dir)
5323 case 3: /* outbound packet / outbound entry */
5324 if (((nat->nat_inport == sport) ||
5325 (flags & SI_W_SPORT)) &&
5326 ((nat->nat_oport == dport) ||
5327 (flags & SI_W_DPORT)))
5328 return 1;
5329 break;
5330 case 2: /* outbound packet / inbound entry */
5331 if (((nat->nat_outport == sport) ||
5332 (flags & SI_W_DPORT)) &&
5333 ((nat->nat_oport == dport) ||
5334 (flags & SI_W_SPORT)))
5335 return 1;
5336 break;
5337 case 1: /* inbound packet / outbound entry */
5338 if (((nat->nat_oport == sport) ||
5339 (flags & SI_W_DPORT)) &&
5340 ((nat->nat_outport == dport) ||
5341 (flags & SI_W_SPORT)))
5342 return 1;
5343 break;
5344 case 0: /* inbound packet / inbound entry */
5345 if (((nat->nat_oport == sport) ||
5346 (flags & SI_W_SPORT)) &&
5347 ((nat->nat_outport == dport) ||
5348 (flags & SI_W_DPORT)))
5349 return 1;
5350 break;
5351 default:
5352 break;
5355 return(0);
5359 /* ------------------------------------------------------------------------ */
5360 /* Function: nat_mssclamp */
5361 /* Returns: Nil */
5362 /* Parameters: tcp(I) - pointer to TCP header */
5363 /* maxmss(I) - value to clamp the TCP MSS to */
5364 /* csump(I) - pointer to TCP checksum */
5365 /* */
5366 /* Check for MSS option and clamp it if necessary. If found and changed, */
5367 /* then the TCP header checksum will be updated to reflect the change in */
5368 /* the MSS. */
5369 /* ------------------------------------------------------------------------ */
5370 static void nat_mssclamp(tcp, maxmss, csump)
5371 tcphdr_t *tcp;
5372 u_32_t maxmss;
5373 u_short *csump;
5375 u_char *cp, *ep, opt;
5376 int hlen, advance;
5377 u_32_t mss, sumd;
5379 hlen = TCP_OFF(tcp) << 2;
5380 if (hlen > sizeof(*tcp)) {
5381 cp = (u_char *)tcp + sizeof(*tcp);
5382 ep = (u_char *)tcp + hlen;
5384 while (cp < ep) {
5385 opt = cp[0];
5386 if (opt == TCPOPT_EOL)
5387 break;
5388 else if (opt == TCPOPT_NOP) {
5389 cp++;
5390 continue;
5393 if (cp + 1 >= ep)
5394 break;
5395 advance = cp[1];
5396 if ((cp + advance > ep) || (advance <= 0))
5397 break;
5398 switch (opt)
5400 case TCPOPT_MAXSEG:
5401 if (advance != 4)
5402 break;
5403 mss = cp[2] * 256 + cp[3];
5404 if (mss > maxmss) {
5405 cp[2] = maxmss / 256;
5406 cp[3] = maxmss & 0xff;
5407 CALC_SUMD(mss, maxmss, sumd);
5408 fix_outcksum(csump, sumd);
5410 break;
5411 default:
5412 /* ignore unknown options */
5413 break;
5416 cp += advance;
5422 /* ------------------------------------------------------------------------ */
5423 /* Function: fr_setnatqueue */
5424 /* Returns: Nil */
5425 /* Parameters: nat(I)- pointer to NAT structure */
5426 /* rev(I) - forward(0) or reverse(1) direction */
5427 /* Locks: ipf_nat (read or write) */
5428 /* */
5429 /* Put the NAT entry on its default queue entry, using rev as a helped in */
5430 /* determining which queue it should be placed on. */
5431 /* ------------------------------------------------------------------------ */
5432 void fr_setnatqueue(nat, rev, ifs)
5433 nat_t *nat;
5434 int rev;
5435 ipf_stack_t *ifs;
5437 ipftq_t *oifq, *nifq;
5439 if (nat->nat_ptr != NULL)
5440 nifq = nat->nat_ptr->in_tqehead[rev];
5441 else
5442 nifq = NULL;
5444 if (nifq == NULL) {
5445 switch (nat->nat_p)
5447 case IPPROTO_UDP :
5448 nifq = &ifs->ifs_nat_udptq;
5449 break;
5450 case IPPROTO_ICMP :
5451 nifq = &ifs->ifs_nat_icmptq;
5452 break;
5453 case IPPROTO_TCP :
5454 nifq = ifs->ifs_nat_tqb + nat->nat_tqe.tqe_state[rev];
5455 break;
5456 default :
5457 nifq = &ifs->ifs_nat_iptq;
5458 break;
5462 oifq = nat->nat_tqe.tqe_ifq;
5464 * If it's currently on a timeout queue, move it from one queue to
5465 * another, else put it on the end of the newly determined queue.
5467 if (oifq != NULL)
5468 fr_movequeue(&nat->nat_tqe, oifq, nifq, ifs);
5469 else
5470 fr_queueappend(&nat->nat_tqe, nifq, nat, ifs);
5471 return;
5474 /* ------------------------------------------------------------------------ */
5475 /* Function: nat_getnext */
5476 /* Returns: int - 0 == ok, else error */
5477 /* Parameters: t(I) - pointer to ipftoken structure */
5478 /* itp(I) - pointer to ipfgeniter_t structure */
5479 /* ifs - ipf stack instance */
5480 /* */
5481 /* Fetch the next nat/ipnat/hostmap structure pointer from the linked list */
5482 /* and copy it out to the storage space pointed to by itp. The next item */
5483 /* in the list to look at is put back in the ipftoken struture. */
5484 /* ------------------------------------------------------------------------ */
5485 static int nat_getnext(t, itp, ifs)
5486 ipftoken_t *t;
5487 ipfgeniter_t *itp;
5488 ipf_stack_t *ifs;
5490 hostmap_t *hm, *nexthm = NULL, zerohm;
5491 ipnat_t *ipn, *nextipnat = NULL, zeroipn;
5492 nat_t *nat, *nextnat = NULL, zeronat;
5493 int error = 0, count;
5494 char *dst;
5496 if (itp->igi_nitems == 0)
5497 return EINVAL;
5499 READ_ENTER(&ifs->ifs_ipf_nat);
5502 * Get "previous" entry from the token and find the next entry.
5504 switch (itp->igi_type)
5506 case IPFGENITER_HOSTMAP :
5507 hm = t->ipt_data;
5508 if (hm == NULL) {
5509 nexthm = ifs->ifs_ipf_hm_maplist;
5510 } else {
5511 nexthm = hm->hm_next;
5513 break;
5515 case IPFGENITER_IPNAT :
5516 ipn = t->ipt_data;
5517 if (ipn == NULL) {
5518 nextipnat = ifs->ifs_nat_list;
5519 } else {
5520 nextipnat = ipn->in_next;
5522 break;
5524 case IPFGENITER_NAT :
5525 nat = t->ipt_data;
5526 if (nat == NULL) {
5527 nextnat = ifs->ifs_nat_instances;
5528 } else {
5529 nextnat = nat->nat_next;
5531 break;
5532 default :
5533 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5534 return EINVAL;
5538 * Note, this loop is based on the number of items that a user
5539 * requested. The user can request any number, potentially far more than
5540 * the number of items that actually exist. If a user does that, we'll
5541 * break out of this by setting the value of count to 1 which terminates
5542 * the loop. This should be fine from an ioctl perspective, because the
5543 * last entry that we insert will be the zero entry which terminates the
5544 * chain.
5546 dst = itp->igi_data;
5547 for (count = itp->igi_nitems; count > 0; count--) {
5549 * If we found an entry, add a reference to it and update the token.
5550 * Otherwise, zero out data to be returned and NULL out token.
5552 switch (itp->igi_type)
5554 case IPFGENITER_HOSTMAP :
5555 if (nexthm != NULL) {
5556 ATOMIC_INC32(nexthm->hm_ref);
5557 t->ipt_data = nexthm;
5558 } else {
5559 bzero(&zerohm, sizeof(zerohm));
5560 nexthm = &zerohm;
5561 t->ipt_data = NULL;
5563 break;
5564 case IPFGENITER_IPNAT :
5565 if (nextipnat != NULL) {
5566 ATOMIC_INC32(nextipnat->in_use);
5567 t->ipt_data = nextipnat;
5568 } else {
5569 bzero(&zeroipn, sizeof(zeroipn));
5570 nextipnat = &zeroipn;
5571 t->ipt_data = NULL;
5573 break;
5574 case IPFGENITER_NAT :
5575 if (nextnat != NULL) {
5576 MUTEX_ENTER(&nextnat->nat_lock);
5577 nextnat->nat_ref++;
5578 MUTEX_EXIT(&nextnat->nat_lock);
5579 t->ipt_data = nextnat;
5580 } else {
5581 bzero(&zeronat, sizeof(zeronat));
5582 nextnat = &zeronat;
5583 t->ipt_data = NULL;
5585 break;
5586 default :
5587 break;
5591 * Now that we have ref, it's save to give up lock.
5593 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5596 * Copy out data and clean up references and token as needed.
5598 switch (itp->igi_type)
5600 case IPFGENITER_HOSTMAP :
5601 error = COPYOUT(nexthm, dst, sizeof(*nexthm));
5602 if (error != 0)
5603 error = EFAULT;
5604 if (t->ipt_data == NULL) {
5605 ipf_freetoken(t, ifs);
5606 count = 1;
5607 break;
5608 } else {
5609 if (hm != NULL) {
5610 WRITE_ENTER(&ifs->ifs_ipf_nat);
5611 fr_hostmapdel(&hm);
5612 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5614 if (nexthm->hm_next == NULL) {
5615 ipf_freetoken(t, ifs);
5616 count = 1;
5617 break;
5619 dst += sizeof(*nexthm);
5620 hm = nexthm;
5621 nexthm = nexthm->hm_next;
5623 break;
5625 case IPFGENITER_IPNAT :
5626 error = COPYOUT(nextipnat, dst, sizeof(*nextipnat));
5627 if (error != 0)
5628 error = EFAULT;
5629 if (t->ipt_data == NULL) {
5630 ipf_freetoken(t, ifs);
5631 count = 1;
5632 break;
5633 } else {
5634 if (ipn != NULL) {
5635 WRITE_ENTER(&ifs->ifs_ipf_nat);
5636 fr_ipnatderef(&ipn, ifs);
5637 RWLOCK_EXIT(&ifs->ifs_ipf_nat);
5639 if (nextipnat->in_next == NULL) {
5640 ipf_freetoken(t, ifs);
5641 count = 1;
5642 break;
5644 dst += sizeof(*nextipnat);
5645 ipn = nextipnat;
5646 nextipnat = nextipnat->in_next;
5648 break;
5650 case IPFGENITER_NAT :
5651 error = COPYOUT(nextnat, dst, sizeof(*nextnat));
5652 if (error != 0)
5653 error = EFAULT;
5654 if (t->ipt_data == NULL) {
5655 ipf_freetoken(t, ifs);
5656 count = 1;
5657 break;
5658 } else {
5659 if (nat != NULL)
5660 fr_natderef(&nat, ifs);
5661 if (nextnat->nat_next == NULL) {
5662 ipf_freetoken(t, ifs);
5663 count = 1;
5664 break;
5666 dst += sizeof(*nextnat);
5667 nat = nextnat;
5668 nextnat = nextnat->nat_next;
5670 break;
5671 default :
5672 break;
5675 if ((count == 1) || (error != 0))
5676 break;
5678 READ_ENTER(&ifs->ifs_ipf_nat);
5681 return error;
5685 /* ------------------------------------------------------------------------ */
5686 /* Function: nat_iterator */
5687 /* Returns: int - 0 == ok, else error */
5688 /* Parameters: token(I) - pointer to ipftoken structure */
5689 /* itp(I) - pointer to ipfgeniter_t structure */
5690 /* */
5691 /* This function acts as a handler for the SIOCGENITER ioctls that use a */
5692 /* generic structure to iterate through a list. There are three different */
5693 /* linked lists of NAT related information to go through: NAT rules, active */
5694 /* NAT mappings and the NAT fragment cache. */
5695 /* ------------------------------------------------------------------------ */
5696 static int nat_iterator(token, itp, ifs)
5697 ipftoken_t *token;
5698 ipfgeniter_t *itp;
5699 ipf_stack_t *ifs;
5701 int error;
5703 if (itp->igi_data == NULL)
5704 return EFAULT;
5706 token->ipt_subtype = itp->igi_type;
5708 switch (itp->igi_type)
5710 case IPFGENITER_HOSTMAP :
5711 case IPFGENITER_IPNAT :
5712 case IPFGENITER_NAT :
5713 error = nat_getnext(token, itp, ifs);
5714 break;
5715 case IPFGENITER_NATFRAG :
5716 error = fr_nextfrag(token, itp, &ifs->ifs_ipfr_natlist,
5717 &ifs->ifs_ipfr_nattail,
5718 &ifs->ifs_ipf_natfrag, ifs);
5719 break;
5720 default :
5721 error = EINVAL;
5722 break;
5725 return error;
5729 /* ---------------------------------------------------------------------- */
5730 /* Function: nat_flushtable */
5731 /* Returns: int - 0 == success, -1 == failure */
5732 /* Parameters: flush_option - how to flush the active NAT table */
5733 /* ifs - ipf stack instance */
5734 /* Write Locks: ipf_nat */
5735 /* */
5736 /* Flush NAT tables. Three actions currently defined: */
5737 /* */
5738 /* FLUSH_TABLE_ALL : Flush all NAT table entries */
5739 /* */
5740 /* FLUSH_TABLE_CLOSING : Flush entries with TCP connections which */
5741 /* have started to close on both ends using */
5742 /* ipf_flushclosing(). */
5743 /* */
5744 /* FLUSH_TABLE_EXTRA : First, flush entries which are "almost" closed. */
5745 /* Then, if needed, flush entries with TCP */
5746 /* connections which have been idle for a long */
5747 /* time with ipf_extraflush(). */
5748 /* ---------------------------------------------------------------------- */
5749 static int nat_flushtable(flush_option, ifs)
5750 int flush_option;
5751 ipf_stack_t *ifs;
5753 nat_t *nat, *natn;
5754 int removed;
5755 SPL_INT(s);
5757 removed = 0;
5759 SPL_NET(s);
5760 switch (flush_option)
5762 case FLUSH_TABLE_ALL:
5763 natn = ifs->ifs_nat_instances;
5764 while ((nat = natn) != NULL) {
5765 natn = nat->nat_next;
5766 if (nat_delete(nat, NL_FLUSH, ifs) == 0)
5767 removed++;
5769 break;
5771 case FLUSH_TABLE_CLOSING:
5772 removed = ipf_flushclosing(NAT_FLUSH,
5773 IPF_TCPS_CLOSE_WAIT,
5774 ifs->ifs_nat_tqb,
5775 ifs->ifs_nat_utqe,
5776 ifs);
5777 break;
5779 case FLUSH_TABLE_EXTRA:
5780 removed = ipf_flushclosing(NAT_FLUSH,
5781 IPF_TCPS_FIN_WAIT_2,
5782 ifs->ifs_nat_tqb,
5783 ifs->ifs_nat_utqe,
5784 ifs);
5787 * Be sure we haven't done this in the last 10 seconds.
5789 if (ifs->ifs_fr_ticks - ifs->ifs_nat_last_force_flush <
5790 IPF_TTLVAL(10))
5791 break;
5792 ifs->ifs_nat_last_force_flush = ifs->ifs_fr_ticks;
5793 removed += ipf_extraflush(NAT_FLUSH,
5794 &ifs->ifs_nat_tqb[IPF_TCPS_ESTABLISHED],
5795 ifs->ifs_nat_utqe,
5796 ifs);
5797 break;
5799 default: /* Flush Nothing */
5800 break;
5803 SPL_X(s);
5804 return (removed);
5808 /* ------------------------------------------------------------------------ */
5809 /* Function: nat_uncreate */
5810 /* Returns: Nil */
5811 /* Parameters: fin(I) - pointer to packet information */
5812 /* */
5813 /* This function is used to remove a NAT entry from the NAT table when we */
5814 /* decide that the create was actually in error. It is thus assumed that */
5815 /* fin_flx will have both FI_NATED and FI_NATNEW set. Because we're dealing */
5816 /* with the translated packet (not the original), we have to reverse the */
5817 /* lookup. Although doing the lookup is expensive (relatively speaking), it */
5818 /* is not anticipated that this will be a frequent occurance for normal */
5819 /* traffic patterns. */
5820 /* ------------------------------------------------------------------------ */
5821 void nat_uncreate(fin)
5822 fr_info_t *fin;
5824 ipf_stack_t *ifs = fin->fin_ifs;
5825 int nflags;
5826 nat_t *nat;
5828 switch (fin->fin_p)
5830 case IPPROTO_TCP :
5831 nflags = IPN_TCP;
5832 break;
5833 case IPPROTO_UDP :
5834 nflags = IPN_UDP;
5835 break;
5836 default :
5837 nflags = 0;
5838 break;
5841 WRITE_ENTER(&ifs->ifs_ipf_nat);
5843 if (fin->fin_out == 0) {
5844 nat = nat_outlookup(fin, nflags, (u_int)fin->fin_p,
5845 fin->fin_dst, fin->fin_src);
5846 } else {
5847 nat = nat_inlookup(fin, nflags, (u_int)fin->fin_p,
5848 fin->fin_src, fin->fin_dst);
5851 if (nat != NULL) {
5852 ifs->ifs_nat_stats.ns_uncreate[fin->fin_out][0]++;
5853 (void) nat_delete(nat, NL_DESTROY, ifs);
5854 } else {
5855 ifs->ifs_nat_stats.ns_uncreate[fin->fin_out][1]++;
5858 RWLOCK_EXIT(&ifs->ifs_ipf_nat);