1 /* NAT for netfilter; shared with compatibility layer. */
3 /* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/timer.h>
14 #include <linux/skbuff.h>
15 #include <linux/vmalloc.h>
16 #include <net/checksum.h>
19 #include <net/tcp.h> /* For tcp_prot in getorigdst */
20 #include <linux/icmp.h>
21 #include <linux/udp.h>
22 #include <linux/jhash.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <net/netfilter/nf_conntrack.h>
26 #include <net/netfilter/nf_conntrack_core.h>
27 #include <net/netfilter/nf_nat.h>
28 #include <net/netfilter/nf_nat_protocol.h>
29 #include <net/netfilter/nf_nat_core.h>
30 #include <net/netfilter/nf_nat_helper.h>
31 #include <net/netfilter/nf_conntrack_helper.h>
32 #include <net/netfilter/nf_conntrack_l3proto.h>
33 #include <net/netfilter/nf_conntrack_l4proto.h>
34 #include <linux/netfilter_ipv4/ipt_cone.h>
38 #include <linux/if_vlan.h>
41 #include <ctf/hndctf.h>
43 #define NFC_CTF_ENABLED (1 << 31)
49 #define DEBUGP(format, args...)
52 static DEFINE_RWLOCK(nf_nat_lock
);
54 static struct nf_conntrack_l3proto
*l3proto
= NULL
;
56 /* Calculated at init based on memory size */
57 static unsigned int nf_nat_htable_size
;
59 static struct list_head
*bysource
;
61 #define MAX_IP_NAT_PROTO 256
62 static struct nf_nat_protocol
*nf_nat_protos
[MAX_IP_NAT_PROTO
];
64 static inline struct nf_nat_protocol
*
65 __nf_nat_proto_find(u_int8_t protonum
)
67 return rcu_dereference(nf_nat_protos
[protonum
]);
70 struct nf_nat_protocol
*
71 nf_nat_proto_find_get(u_int8_t protonum
)
73 struct nf_nat_protocol
*p
;
76 p
= __nf_nat_proto_find(protonum
);
77 if (!try_module_get(p
->me
))
78 p
= &nf_nat_unknown_protocol
;
83 EXPORT_SYMBOL_GPL(nf_nat_proto_find_get
);
86 nf_nat_proto_put(struct nf_nat_protocol
*p
)
90 EXPORT_SYMBOL_GPL(nf_nat_proto_put
);
92 /* We keep an extra hash for each conntrack, for fast searching. */
93 static inline unsigned int
94 hash_by_src(const struct nf_conntrack_tuple
*tuple
)
98 /* Original src, to ensure we map it consistently if poss. */
99 hash
= jhash_3words((__force u32
)tuple
->src
.u3
.ip
,
100 (__force u32
)tuple
->src
.u
.all
,
101 tuple
->dst
.protonum
, 0);
102 return ((u64
)hash
* nf_nat_htable_size
) >> 32;
106 extern int ipv4_conntrack_fastnat
;
109 ip_conntrack_is_ipc_allowed(struct sk_buff
*skb
, u_int32_t hooknum
)
111 struct net_device
*dev
;
113 if (!ipv4_conntrack_fastnat
|| !CTF_ENAB(kcih
))
116 if (hooknum
== NF_IP_PRE_ROUTING
|| hooknum
== NF_IP_POST_ROUTING
) {
118 if (dev
->priv_flags
& IFF_802_1Q_VLAN
)
119 dev
= VLAN_DEV_INFO(dev
)->real_dev
;
121 /* Add ipc entry if packet is received on ctf enabled interface
122 * and the packet is not a defrag'd one.
124 if (ctf_isenabled(kcih
, dev
) && (skb
->len
<= dev
->mtu
))
125 skb
->nfcache
|= NFC_CTF_ENABLED
;
128 /* Add the cache entries only if the device has registered and
131 if (skb
->nfcache
& NFC_CTF_ENABLED
)
136 #ifdef CONFIG_BCM_NAT_MODULE
137 EXPORT_SYMBOL(ip_conntrack_is_ipc_allowed
);
141 ip_conntrack_ipct_add(struct sk_buff
*skb
, u_int32_t hooknum
,
142 struct nf_conn
*ct
, enum ip_conntrack_info ci
,
143 struct nf_conntrack_tuple
*manip
)
152 struct nf_conn_help
*help
;
153 enum ip_conntrack_dir dir
;
155 if ((skb
== NULL
) || (ct
== NULL
))
158 /* Check CTF enabled */
159 if (!ip_conntrack_is_ipc_allowed(skb
, hooknum
))
162 /* We only add cache entires for non-helper connections and at
163 * pre or post routing hooks.
165 help
= nfct_help(ct
);
166 if ((help
&& help
->helper
) || (ct
->ctf_flags
& CTF_FLAGS_EXCLUDED
) ||
167 ((hooknum
!= NF_IP_PRE_ROUTING
) && (hooknum
!= NF_IP_POST_ROUTING
)))
170 /* Add ipc entries for connections in established state only */
171 if ((ci
!= IP_CT_ESTABLISHED
) && (ci
!= (IP_CT_ESTABLISHED
+IP_CT_IS_REPLY
)))
175 if (iph
->version
!= 4 ||
176 (iph
->protocol
!= IPPROTO_TCP
&& iph
->protocol
!= IPPROTO_UDP
))
179 if (iph
->protocol
== IPPROTO_TCP
&&
180 ct
->proto
.tcp
.state
>= TCP_CONNTRACK_FIN_WAIT
&&
181 ct
->proto
.tcp
.state
<= TCP_CONNTRACK_TIME_WAIT
)
184 dir
= CTINFO2DIR(ci
);
185 if (ct
->ctf_flags
& (1 << dir
))
188 /* Do route lookup for alias address if we are doing DNAT in this
192 if ((manip
!= NULL
) && (HOOK2MANIP(hooknum
) == IP_NAT_MANIP_DST
))
193 daddr
= manip
->dst
.u3
.ip
;
195 /* Find the destination interface */
196 if (skb
->dst
== NULL
)
197 ip_route_input(skb
, daddr
, iph
->saddr
, iph
->tos
, skb
->dev
);
199 /* Ensure the packet belongs to a forwarding connection and it is
200 * destined to an unicast address.
202 rt
= (struct rtable
*)skb
->dst
;
203 if ((rt
== NULL
) || (rt
->u
.dst
.input
!= ip_forward
) ||
204 (rt
->rt_type
!= RTN_UNICAST
) || (rt
->u
.dst
.neighbour
== NULL
) ||
205 ((rt
->u
.dst
.neighbour
->nud_state
&
206 (NUD_PERMANENT
|NUD_REACHABLE
|NUD_STALE
|NUD_DELAY
|NUD_PROBE
)) == 0))
209 memset(&ipc_entry
, 0, sizeof(ipc_entry
));
211 /* Init the neighboring sender address */
212 memcpy(ipc_entry
.sa
.octet
, eth_hdr(skb
)->h_source
, ETH_ALEN
);
214 /* If the packet is received on a bridge device then save
215 * the bridge cache entry pointer in the ip cache entry.
216 * This will be referenced in the data path to update the
217 * live counter of brc entry whenever a received packet
218 * matches corresponding ipc entry matches.
220 if ((skb
->dev
!= NULL
) && ctf_isbridge(kcih
, skb
->dev
))
221 ipc_entry
.brcp
= ctf_brc_lkup(kcih
, eth_hdr(skb
)->h_source
);
225 eth
= (struct ethhdr
*)(((unsigned char *)hh
->hh_data
) + 2);
226 memcpy(ipc_entry
.dhost
.octet
, eth
->h_dest
, ETH_ALEN
);
227 memcpy(ipc_entry
.shost
.octet
, eth
->h_source
, ETH_ALEN
);
229 memcpy(ipc_entry
.dhost
.octet
, rt
->u
.dst
.neighbour
->ha
, ETH_ALEN
);
230 memcpy(ipc_entry
.shost
.octet
, skb
->dst
->dev
->dev_addr
, ETH_ALEN
);
233 tcph
= ((struct tcphdr
*)(((__u8
*)iph
) + (iph
->ihl
<< 2)));
235 /* Add ctf ipc entry for this direction */
236 ipc_entry
.tuple
.sip
= iph
->saddr
;
237 ipc_entry
.tuple
.dip
= iph
->daddr
;
238 ipc_entry
.tuple
.proto
= iph
->protocol
;
239 ipc_entry
.tuple
.sp
= tcph
->source
;
240 ipc_entry
.tuple
.dp
= tcph
->dest
;
242 ipc_entry
.next
= NULL
;
244 /* For vlan interfaces fill the vlan id and the tag/untag actions */
245 if (skb
->dst
->dev
->priv_flags
& IFF_802_1Q_VLAN
) {
246 ipc_entry
.txif
= (void *)(VLAN_DEV_INFO(skb
->dst
->dev
)->real_dev
);
247 ipc_entry
.vid
= VLAN_DEV_INFO(skb
->dst
->dev
)->vlan_id
;
248 ipc_entry
.action
= ((VLAN_DEV_INFO(skb
->dst
->dev
)->flags
& 1) ?
249 CTF_ACTION_TAG
: CTF_ACTION_UNTAG
);
251 ipc_entry
.txif
= skb
->dst
->dev
;
252 ipc_entry
.action
= CTF_ACTION_UNTAG
;
255 /* Update the manip ip and port */
257 if (HOOK2MANIP(hooknum
) == IP_NAT_MANIP_SRC
) {
258 ipc_entry
.nat
.ip
= manip
->src
.u3
.ip
;
259 ipc_entry
.nat
.port
= manip
->src
.u
.tcp
.port
;
260 ipc_entry
.action
|= CTF_ACTION_SNAT
;
262 ipc_entry
.nat
.ip
= manip
->dst
.u3
.ip
;
263 ipc_entry
.nat
.port
= manip
->dst
.u
.tcp
.port
;
264 ipc_entry
.action
|= CTF_ACTION_DNAT
;
268 /* Do bridge cache lookup to determine outgoing interface
269 * and any vlan tagging actions if needed.
271 if (ctf_isbridge(kcih
, ipc_entry
.txif
)) {
274 brcp
= ctf_brc_lkup(kcih
, ipc_entry
.dhost
.octet
);
279 ipc_entry
.action
|= brcp
->action
;
280 ipc_entry
.txif
= brcp
->txifp
;
281 ipc_entry
.vid
= brcp
->vid
;
286 printk("%s: Adding ipc entry for [%d]%u.%u.%u.%u:%u - %u.%u.%u.%u:%u\n", __FUNCTION__
,
287 ipc_entry
.tuple
.proto
,
288 NIPQUAD(ipc_entry
.tuple
.sip
), ntohs(ipc_entry
.tuple
.sp
),
289 NIPQUAD(ipc_entry
.tuple
.dip
), ntohs(ipc_entry
.tuple
.dp
));
290 printk("sa %02x:%02x:%02x:%02x:%02x:%02x\n",
291 ipc_entry
.shost
.octet
[0], ipc_entry
.shost
.octet
[1],
292 ipc_entry
.shost
.octet
[2], ipc_entry
.shost
.octet
[3],
293 ipc_entry
.shost
.octet
[4], ipc_entry
.shost
.octet
[5]);
294 printk("da %02x:%02x:%02x:%02x:%02x:%02x\n",
295 ipc_entry
.dhost
.octet
[0], ipc_entry
.dhost
.octet
[1],
296 ipc_entry
.dhost
.octet
[2], ipc_entry
.dhost
.octet
[3],
297 ipc_entry
.dhost
.octet
[4], ipc_entry
.dhost
.octet
[5]);
298 printk("[%d] vid: %d action %x\n", hooknum
, ipc_entry
.vid
, ipc_entry
.action
);
300 printk("manip_ip: %u.%u.%u.%u manip_port %u\n",
301 NIPQUAD(ipc_entry
.nat
.ip
), ntohs(ipc_entry
.nat
.port
));
302 printk("txif: %s\n", ((struct net_device
*)ipc_entry
.txif
)->name
);
305 ctf_ipc_add(kcih
, &ipc_entry
);
307 /* Update the attributes flag to indicate a CTF conn */
308 ct
->ctf_flags
|= (CTF_FLAGS_CACHED
| (1 << dir
));
311 #ifdef CONFIG_BCM_NAT_MODULE
312 EXPORT_SYMBOL(ip_conntrack_ipct_add
);
316 ip_conntrack_ipct_delete(struct nf_conn
*ct
, int ct_timeout
)
319 struct nf_conntrack_tuple
*orig
, *repl
;
324 orig
= &ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].tuple
;
326 if ((orig
->dst
.protonum
!= IPPROTO_TCP
) && (orig
->dst
.protonum
!= IPPROTO_UDP
))
329 repl
= &ct
->tuplehash
[IP_CT_DIR_REPLY
].tuple
;
331 /* If the refresh counter of ipc entry is non zero, it indicates
332 * that the packet transfer is active and we should not delete
333 * the conntrack entry.
336 ipct
= ctf_ipc_lkup(kcih
, orig
->src
.u3
.ip
, orig
->dst
.u3
.ip
,
337 orig
->dst
.protonum
, orig
->src
.u
.tcp
.port
,
338 orig
->dst
.u
.tcp
.port
);
340 /* Postpone the deletion of ct entry if there are frames
341 * flowing in this direction.
343 if ((ipct
!= NULL
) && (ipct
->live
> 0)) {
345 ct
->timeout
.expires
= jiffies
+ ct
->expire_jiffies
;
346 add_timer(&ct
->timeout
);
350 ipct
= ctf_ipc_lkup(kcih
, repl
->src
.u3
.ip
, repl
->dst
.u3
.ip
,
351 repl
->dst
.protonum
, repl
->src
.u
.tcp
.port
,
352 repl
->dst
.u
.tcp
.port
);
354 if ((ipct
!= NULL
) && (ipct
->live
> 0)) {
356 ct
->timeout
.expires
= jiffies
+ ct
->expire_jiffies
;
357 add_timer(&ct
->timeout
);
362 /* If there are no packets over this connection for timeout period
363 * delete the entries.
365 ctf_ipc_delete(kcih
, orig
->src
.u3
.ip
, orig
->dst
.u3
.ip
, orig
->dst
.protonum
,
366 orig
->src
.u
.tcp
.port
, orig
->dst
.u
.tcp
.port
);
368 ctf_ipc_delete(kcih
, repl
->src
.u3
.ip
, repl
->dst
.u3
.ip
, repl
->dst
.protonum
,
369 repl
->src
.u
.tcp
.port
, repl
->dst
.u
.tcp
.port
);
372 printk("%s: Deleting the tuple %x %x %d %d %d\n",
373 __FUNCTION__
, orig
->src
.u3
.ip
, orig
->dst
.u3
.ip
, orig
->dst
.protonum
,
374 orig
->src
.u
.tcp
.port
, orig
->dst
.u
.tcp
.port
);
375 printk("%s: Deleting the tuple %x %x %d %d %d\n",
376 __FUNCTION__
, repl
->dst
.u3
.ip
, repl
->src
.u3
.ip
, repl
->dst
.protonum
,
377 repl
->dst
.u
.tcp
.port
, repl
->src
.u
.tcp
.port
);
384 /* Noone using conntrack by the time this called. */
385 static void nf_nat_cleanup_conntrack(struct nf_conn
*conn
)
387 struct nf_conn_nat
*nat
;
388 if (!(conn
->status
& IPS_NAT_DONE_MASK
))
391 nat
= nfct_nat(conn
);
392 write_lock_bh(&nf_nat_lock
);
393 list_del(&nat
->info
.bysource
);
394 write_unlock_bh(&nf_nat_lock
);
396 /* Detach from cone list */
397 ipt_cone_cleanup_conntrack(nat
);
400 /* Is this tuple already taken? (not by us) */
402 nf_nat_used_tuple(const struct nf_conntrack_tuple
*tuple
,
403 const struct nf_conn
*ignored_conntrack
)
405 /* Conntrack tracking doesn't keep track of outgoing tuples; only
406 incoming ones. NAT means they don't have a fixed mapping,
407 so we invert the tuple and look for the incoming reply.
409 We could keep a separate hash if this proves too slow. */
410 struct nf_conntrack_tuple reply
;
412 nf_ct_invert_tuplepr(&reply
, tuple
);
413 return nf_conntrack_tuple_taken(&reply
, ignored_conntrack
);
415 EXPORT_SYMBOL(nf_nat_used_tuple
);
417 /* If we source map this tuple so reply looks like reply_tuple, will
418 * that meet the constraints of range. */
420 in_range(const struct nf_conntrack_tuple
*tuple
,
421 const struct nf_nat_range
*range
)
423 struct nf_nat_protocol
*proto
;
426 /* If we are supposed to map IPs, then we must be in the
427 range specified, otherwise let this drag us onto a new src IP. */
428 if (range
->flags
& IP_NAT_RANGE_MAP_IPS
) {
429 if (ntohl(tuple
->src
.u3
.ip
) < ntohl(range
->min_ip
) ||
430 ntohl(tuple
->src
.u3
.ip
) > ntohl(range
->max_ip
))
435 proto
= __nf_nat_proto_find(tuple
->dst
.protonum
);
436 if (!(range
->flags
& IP_NAT_RANGE_PROTO_SPECIFIED
) ||
437 proto
->in_range(tuple
, IP_NAT_MANIP_SRC
,
438 &range
->min
, &range
->max
))
446 same_src(const struct nf_conn
*ct
,
447 const struct nf_conntrack_tuple
*tuple
)
449 const struct nf_conntrack_tuple
*t
;
451 t
= &ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].tuple
;
452 return (t
->dst
.protonum
== tuple
->dst
.protonum
&&
453 t
->src
.u3
.ip
== tuple
->src
.u3
.ip
&&
454 t
->src
.u
.all
== tuple
->src
.u
.all
);
457 /* Only called for SRC manip */
459 find_appropriate_src(const struct nf_conntrack_tuple
*tuple
,
460 struct nf_conntrack_tuple
*result
,
461 const struct nf_nat_range
*range
)
463 unsigned int h
= hash_by_src(tuple
);
464 struct nf_conn_nat
*nat
;
467 read_lock_bh(&nf_nat_lock
);
468 list_for_each_entry(nat
, &bysource
[h
], info
.bysource
) {
469 ct
= (struct nf_conn
*)((char *)nat
- offsetof(struct nf_conn
, data
));
470 if (same_src(ct
, tuple
)) {
471 /* Copy source part from reply tuple. */
472 nf_ct_invert_tuplepr(result
,
473 &ct
->tuplehash
[IP_CT_DIR_REPLY
].tuple
);
474 result
->dst
= tuple
->dst
;
476 if (in_range(result
, range
)) {
477 read_unlock_bh(&nf_nat_lock
);
482 read_unlock_bh(&nf_nat_lock
);
486 /* For [FUTURE] fragmentation handling, we want the least-used
487 src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus
488 if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
489 1-65535, we don't do pro-rata allocation based on ports; we choose
490 the ip with the lowest src-ip/dst-ip/proto usage.
493 find_best_ips_proto(struct nf_conntrack_tuple
*tuple
,
494 const struct nf_nat_range
*range
,
495 const struct nf_conn
*ct
,
496 enum nf_nat_manip_type maniptype
)
500 u_int32_t minip
, maxip
, j
;
502 /* No IP mapping? Do nothing. */
503 if (!(range
->flags
& IP_NAT_RANGE_MAP_IPS
))
506 if (maniptype
== IP_NAT_MANIP_SRC
)
507 var_ipp
= &tuple
->src
.u3
.ip
;
509 var_ipp
= &tuple
->dst
.u3
.ip
;
511 /* Fast path: only one choice. */
512 if (range
->min_ip
== range
->max_ip
) {
513 *var_ipp
= range
->min_ip
;
517 /* Hashing source and destination IPs gives a fairly even
518 * spread in practice (if there are a small number of IPs
519 * involved, there usually aren't that many connections
520 * anyway). The consistency means that servers see the same
521 * client coming from the same IP (some Internet Banking sites
522 * like this), even across reboots. */
523 minip
= ntohl(range
->min_ip
);
524 maxip
= ntohl(range
->max_ip
);
525 j
= jhash_2words((__force u32
)tuple
->src
.u3
.ip
,
526 (__force u32
)tuple
->dst
.u3
.ip
, 0);
527 j
= ((u64
)j
* (maxip
- minip
+ 1)) >> 32;
528 *var_ipp
= htonl(minip
+ j
);
531 /* Manipulate the tuple into the range given. For NF_IP_POST_ROUTING,
532 * we change the source to map into the range. For NF_IP_PRE_ROUTING
533 * and NF_IP_LOCAL_OUT, we change the destination to map into the
534 * range. It might not be possible to get a unique tuple, but we try.
535 * At worst (or if we race), we will end up with a final duplicate in
536 * __ip_conntrack_confirm and drop the packet. */
538 get_unique_tuple(struct nf_conntrack_tuple
*tuple
,
539 const struct nf_conntrack_tuple
*orig_tuple
,
540 const struct nf_nat_range
*range
,
542 enum nf_nat_manip_type maniptype
)
544 struct nf_nat_protocol
*proto
;
546 /* 1) If this srcip/proto/src-proto-part is currently mapped,
547 and that same mapping gives a unique tuple within the given
550 This is only required for source (ie. NAT/masq) mappings.
551 So far, we don't do local source mappings, so multiple
552 manips not an issue. */
553 if (maniptype
== IP_NAT_MANIP_SRC
) {
554 if (find_appropriate_src(orig_tuple
, tuple
, range
)) {
555 DEBUGP("get_unique_tuple: Found current src map\n");
556 if (!(range
->flags
& IP_NAT_RANGE_PROTO_RANDOM
))
557 if (!nf_nat_used_tuple(tuple
, ct
))
562 /* 2) Select the least-used IP/proto combination in the given
564 *tuple
= *orig_tuple
;
565 find_best_ips_proto(tuple
, range
, ct
, maniptype
);
567 /* 3) The per-protocol part of the manip is made to map into
568 the range to make a unique tuple. */
571 proto
= __nf_nat_proto_find(orig_tuple
->dst
.protonum
);
573 /* Only bother mapping if it's not already in range and unique */
574 if (!(range
->flags
& IP_NAT_RANGE_PROTO_RANDOM
)) {
575 if (range
->flags
& IP_NAT_RANGE_PROTO_SPECIFIED
) {
576 if (proto
->in_range(tuple
, maniptype
, &range
->min
,
578 (range
->min
.all
== range
->max
.all
||
579 !nf_nat_used_tuple(tuple
, ct
)))
581 } else if (!nf_nat_used_tuple(tuple
, ct
)) {
586 /* Last change: get protocol to try to obtain unique tuple. */
587 proto
->unique_tuple(tuple
, range
, maniptype
, ct
);
593 nf_nat_setup_info(struct nf_conn
*ct
,
594 const struct nf_nat_range
*range
,
595 unsigned int hooknum
)
597 struct nf_conntrack_tuple curr_tuple
, new_tuple
;
598 struct nf_conn_nat
*nat
= nfct_nat(ct
);
599 struct nf_nat_info
*info
= &nat
->info
;
600 int have_to_hash
= !(ct
->status
& IPS_NAT_DONE_MASK
);
601 enum nf_nat_manip_type maniptype
= HOOK2MANIP(hooknum
);
603 NF_CT_ASSERT(hooknum
== NF_IP_PRE_ROUTING
||
604 hooknum
== NF_IP_POST_ROUTING
||
605 hooknum
== NF_IP_LOCAL_IN
||
606 hooknum
== NF_IP_LOCAL_OUT
);
607 BUG_ON(nf_nat_initialized(ct
, maniptype
));
609 /* What we've got will look like inverse of reply. Normally
610 this is what is in the conntrack, except for prior
611 manipulations (future optimization: if num_manips == 0,
613 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
614 nf_ct_invert_tuplepr(&curr_tuple
,
615 &ct
->tuplehash
[IP_CT_DIR_REPLY
].tuple
);
617 get_unique_tuple(&new_tuple
, &curr_tuple
, range
, ct
, maniptype
);
619 if (!nf_ct_tuple_equal(&new_tuple
, &curr_tuple
)) {
620 struct nf_conntrack_tuple reply
;
622 /* Alter conntrack table so will recognize replies. */
623 nf_ct_invert_tuplepr(&reply
, &new_tuple
);
624 nf_conntrack_alter_reply(ct
, &reply
);
626 /* Non-atomic: we own this at the moment. */
627 if (maniptype
== IP_NAT_MANIP_SRC
)
628 ct
->status
|= IPS_SRC_NAT
;
630 ct
->status
|= IPS_DST_NAT
;
633 /* Place in source hash if this is the first time. */
635 unsigned int srchash
;
637 srchash
= hash_by_src(&ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].tuple
);
638 write_lock_bh(&nf_nat_lock
);
639 list_add(&info
->bysource
, &bysource
[srchash
]);
640 write_unlock_bh(&nf_nat_lock
);
644 if (maniptype
== IP_NAT_MANIP_DST
)
645 set_bit(IPS_DST_NAT_DONE_BIT
, &ct
->status
);
647 set_bit(IPS_SRC_NAT_DONE_BIT
, &ct
->status
);
651 EXPORT_SYMBOL(nf_nat_setup_info
);
653 /* Returns true if succeeded. */
655 manip_pkt(u_int16_t proto
,
657 unsigned int iphdroff
,
658 const struct nf_conntrack_tuple
*target
,
659 enum nf_nat_manip_type maniptype
)
662 struct nf_nat_protocol
*p
;
664 if (!skb_make_writable(skb
, iphdroff
+ sizeof(*iph
)))
667 iph
= (void *)skb
->data
+ iphdroff
;
669 /* Manipulate protcol part. */
671 /* rcu_read_lock()ed by nf_hook_slow */
672 p
= __nf_nat_proto_find(proto
);
673 if (!p
->manip_pkt(skb
, iphdroff
, target
, maniptype
))
676 iph
= (void *)skb
->data
+ iphdroff
;
678 if (maniptype
== IP_NAT_MANIP_SRC
) {
679 nf_csum_replace4(&iph
->check
, iph
->saddr
, target
->src
.u3
.ip
);
680 iph
->saddr
= target
->src
.u3
.ip
;
682 nf_csum_replace4(&iph
->check
, iph
->daddr
, target
->dst
.u3
.ip
);
683 iph
->daddr
= target
->dst
.u3
.ip
;
688 #if defined(CONFIG_BCM_NAT) || defined(CONFIG_BCM_NAT_MODULE)
689 #ifndef CONFIG_BCM_NAT_MODULE
692 int bcm_manip_pkt(u_int16_t proto
,
694 unsigned int iphdroff
,
695 const struct nf_conntrack_tuple
*target
,
696 enum nf_nat_manip_type maniptype
)
698 return manip_pkt(proto
, skb
, iphdroff
, target
, maniptype
);
700 #ifdef CONFIG_BCM_NAT_MODULE
701 EXPORT_SYMBOL(bcm_manip_pkt
);
705 /* Do packet manipulations according to nf_nat_setup_info. */
706 unsigned int nf_nat_packet(struct nf_conn
*ct
,
707 enum ip_conntrack_info ctinfo
,
708 unsigned int hooknum
,
711 enum ip_conntrack_dir dir
= CTINFO2DIR(ctinfo
);
712 unsigned long statusbit
;
713 enum nf_nat_manip_type mtype
= HOOK2MANIP(hooknum
);
715 if (mtype
== IP_NAT_MANIP_SRC
)
716 statusbit
= IPS_SRC_NAT
;
718 statusbit
= IPS_DST_NAT
;
720 /* Invert if this is reply dir. */
721 if (dir
== IP_CT_DIR_REPLY
)
722 statusbit
^= IPS_NAT_MASK
;
724 /* Non-atomic: these bits don't change. */
725 if (ct
->status
& statusbit
) {
726 struct nf_conntrack_tuple target
;
728 /* We are aiming to look like inverse of other direction. */
729 nf_ct_invert_tuplepr(&target
, &ct
->tuplehash
[!dir
].tuple
);
731 ip_conntrack_ipct_add(skb
, hooknum
, ct
, ctinfo
, &target
);
733 if (!manip_pkt(target
.dst
.protonum
, skb
, 0, &target
, mtype
))
742 EXPORT_SYMBOL_GPL(nf_nat_packet
);
744 /* Dir is direction ICMP is coming from (opposite to packet it contains) */
745 int nf_nat_icmp_reply_translation(struct nf_conn
*ct
,
746 enum ip_conntrack_info ctinfo
,
747 unsigned int hooknum
,
754 struct nf_conntrack_l4proto
*l4proto
;
755 struct nf_conntrack_tuple inner
, target
;
756 int hdrlen
= ip_hdrlen(skb
);
757 enum ip_conntrack_dir dir
= CTINFO2DIR(ctinfo
);
758 unsigned long statusbit
;
759 enum nf_nat_manip_type manip
= HOOK2MANIP(hooknum
);
761 if (!skb_make_writable(skb
, hdrlen
+ sizeof(*inside
)))
764 inside
= (void *)skb
->data
+ hdrlen
;
766 /* We're actually going to mangle it beyond trivial checksum
767 adjustment, so make sure the current checksum is correct. */
768 if (nf_ip_checksum(skb
, hooknum
, hdrlen
, 0))
771 /* Must be RELATED */
772 NF_CT_ASSERT(skb
->nfctinfo
== IP_CT_RELATED
||
773 skb
->nfctinfo
== IP_CT_RELATED
+IP_CT_IS_REPLY
);
775 /* Redirects on non-null nats must be dropped, else they'll
776 start talking to each other without our translation, and be
778 if (inside
->icmp
.type
== ICMP_REDIRECT
) {
779 /* If NAT isn't finished, assume it and drop. */
780 if ((ct
->status
& IPS_NAT_DONE_MASK
) != IPS_NAT_DONE_MASK
)
783 if (ct
->status
& IPS_NAT_MASK
)
787 DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
788 skb
, manip
, dir
== IP_CT_DIR_ORIGINAL
? "ORIG" : "REPLY");
790 /* rcu_read_lock()ed by nf_hook_slow */
791 l4proto
= __nf_ct_l4proto_find(PF_INET
, inside
->ip
.protocol
);
793 if (!nf_ct_get_tuple(skb
, hdrlen
+ sizeof(struct icmphdr
),
795 sizeof(struct icmphdr
) + inside
->ip
.ihl
* 4),
796 (u_int16_t
)AF_INET
, inside
->ip
.protocol
,
797 &inner
, l3proto
, l4proto
))
800 /* Change inner back to look like incoming packet. We do the
801 opposite manip on this hook to normal, because it might not
802 pass all hooks (locally-generated ICMP). Consider incoming
803 packet: PREROUTING (DST manip), routing produces ICMP, goes
804 through POSTROUTING (which must correct the DST manip). */
805 if (!manip_pkt(inside
->ip
.protocol
, skb
, hdrlen
+ sizeof(inside
->icmp
),
806 &ct
->tuplehash
[!dir
].tuple
, !manip
))
809 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
) {
810 /* Reloading "inside" here since manip_pkt inner. */
811 inside
= (void *)skb
->data
+ hdrlen
;
812 inside
->icmp
.checksum
= 0;
813 inside
->icmp
.checksum
=
814 csum_fold(skb_checksum(skb
, hdrlen
,
815 skb
->len
- hdrlen
, 0));
818 /* Change outer to look the reply to an incoming packet
819 * (proto 0 means don't invert per-proto part). */
820 if (manip
== IP_NAT_MANIP_SRC
)
821 statusbit
= IPS_SRC_NAT
;
823 statusbit
= IPS_DST_NAT
;
825 /* Invert if this is reply dir. */
826 if (dir
== IP_CT_DIR_REPLY
)
827 statusbit
^= IPS_NAT_MASK
;
829 if (ct
->status
& statusbit
) {
830 nf_ct_invert_tuplepr(&target
, &ct
->tuplehash
[!dir
].tuple
);
831 if (!manip_pkt(0, skb
, 0, &target
, manip
))
837 EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation
);
839 /* Protocol registration. */
840 int nf_nat_protocol_register(struct nf_nat_protocol
*proto
)
844 write_lock_bh(&nf_nat_lock
);
845 if (nf_nat_protos
[proto
->protonum
] != &nf_nat_unknown_protocol
) {
849 rcu_assign_pointer(nf_nat_protos
[proto
->protonum
], proto
);
851 write_unlock_bh(&nf_nat_lock
);
854 EXPORT_SYMBOL(nf_nat_protocol_register
);
856 /* Noone stores the protocol anywhere; simply delete it. */
857 void nf_nat_protocol_unregister(struct nf_nat_protocol
*proto
)
859 write_lock_bh(&nf_nat_lock
);
860 rcu_assign_pointer(nf_nat_protos
[proto
->protonum
],
861 &nf_nat_unknown_protocol
);
862 write_unlock_bh(&nf_nat_lock
);
865 EXPORT_SYMBOL(nf_nat_protocol_unregister
);
867 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
869 nf_nat_port_range_to_nfattr(struct sk_buff
*skb
,
870 const struct nf_nat_range
*range
)
872 NFA_PUT(skb
, CTA_PROTONAT_PORT_MIN
, sizeof(__be16
),
873 &range
->min
.tcp
.port
);
874 NFA_PUT(skb
, CTA_PROTONAT_PORT_MAX
, sizeof(__be16
),
875 &range
->max
.tcp
.port
);
882 EXPORT_SYMBOL_GPL(nf_nat_port_nfattr_to_range
);
885 nf_nat_port_nfattr_to_range(struct nfattr
*tb
[], struct nf_nat_range
*range
)
889 /* we have to return whether we actually parsed something or not */
891 if (tb
[CTA_PROTONAT_PORT_MIN
-1]) {
893 range
->min
.tcp
.port
=
894 *(__be16
*)NFA_DATA(tb
[CTA_PROTONAT_PORT_MIN
-1]);
897 if (!tb
[CTA_PROTONAT_PORT_MAX
-1]) {
899 range
->max
.tcp
.port
= range
->min
.tcp
.port
;
902 range
->max
.tcp
.port
=
903 *(__be16
*)NFA_DATA(tb
[CTA_PROTONAT_PORT_MAX
-1]);
908 EXPORT_SYMBOL_GPL(nf_nat_port_range_to_nfattr
);
911 static int __init
nf_nat_init(void)
915 /* Leave them the same for the moment. */
916 nf_nat_htable_size
= nf_conntrack_htable_size
;
918 /* One vmalloc for both hash tables */
919 bysource
= vmalloc(sizeof(struct list_head
) * nf_nat_htable_size
);
923 /* Sew in builtin protocols. */
924 write_lock_bh(&nf_nat_lock
);
925 for (i
= 0; i
< MAX_IP_NAT_PROTO
; i
++)
926 rcu_assign_pointer(nf_nat_protos
[i
], &nf_nat_unknown_protocol
);
927 rcu_assign_pointer(nf_nat_protos
[IPPROTO_TCP
], &nf_nat_protocol_tcp
);
928 rcu_assign_pointer(nf_nat_protos
[IPPROTO_UDP
], &nf_nat_protocol_udp
);
929 rcu_assign_pointer(nf_nat_protos
[IPPROTO_ICMP
], &nf_nat_protocol_icmp
);
930 write_unlock_bh(&nf_nat_lock
);
932 for (i
= 0; i
< nf_nat_htable_size
; i
++) {
933 INIT_LIST_HEAD(&bysource
[i
]);
936 /* FIXME: Man, this is a hack. <SIGH> */
937 NF_CT_ASSERT(rcu_dereference(nf_conntrack_destroyed
) == NULL
);
938 rcu_assign_pointer(nf_conntrack_destroyed
, nf_nat_cleanup_conntrack
);
940 NF_CT_ASSERT(rcu_dereference(nf_ct_nat_offset
) == NULL
);
941 rcu_assign_pointer(nf_ct_nat_offset
, nf_nat_get_offset
);
943 /* Initialize fake conntrack so that NAT will skip it */
944 nf_conntrack_untracked
.status
|= IPS_NAT_DONE_MASK
;
946 l3proto
= nf_ct_l3proto_find_get((u_int16_t
)AF_INET
);
950 /* Clear NAT section of all conntracks, in case we're loaded again. */
951 static int clean_nat(struct nf_conn
*i
, void *data
)
953 struct nf_conn_nat
*nat
= nfct_nat(i
);
957 memset(nat
, 0, sizeof(*nat
));
958 i
->status
&= ~(IPS_NAT_MASK
| IPS_NAT_DONE_MASK
| IPS_SEQ_ADJUST
);
962 static void __exit
nf_nat_cleanup(void)
964 nf_ct_iterate_cleanup(&clean_nat
, NULL
);
965 rcu_assign_pointer(nf_conntrack_destroyed
, NULL
);
966 rcu_assign_pointer(nf_ct_nat_offset
, NULL
);
969 nf_ct_l3proto_put(l3proto
);
972 MODULE_LICENSE("GPL");
974 module_init(nf_nat_init
);
975 module_exit(nf_nat_cleanup
);