Merge Broadcom SDK and wireless driver 5.10.147.0 update
[tomato.git] / release / src-rt / linux / linux-2.6 / net / ipv4 / netfilter / nf_nat_core.c
bloba459fcec4aca41b1e6c44a010440533c73fe6e66
1 /* NAT for netfilter; shared with compatibility layer. */
3 /* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/timer.h>
14 #include <linux/skbuff.h>
15 #include <linux/vmalloc.h>
16 #include <net/checksum.h>
17 #include <net/icmp.h>
18 #include <net/ip.h>
19 #include <net/tcp.h> /* For tcp_prot in getorigdst */
20 #include <linux/icmp.h>
21 #include <linux/udp.h>
22 #include <linux/jhash.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <net/netfilter/nf_conntrack.h>
26 #include <net/netfilter/nf_conntrack_core.h>
27 #include <net/netfilter/nf_nat.h>
28 #include <net/netfilter/nf_nat_protocol.h>
29 #include <net/netfilter/nf_nat_core.h>
30 #include <net/netfilter/nf_nat_helper.h>
31 #include <net/netfilter/nf_conntrack_helper.h>
32 #include <net/netfilter/nf_conntrack_l3proto.h>
33 #include <net/netfilter/nf_conntrack_l4proto.h>
34 #include <linux/netfilter_ipv4/ipt_cone.h>
36 #ifdef HNDCTF
37 #include <linux/if.h>
38 #include <linux/if_vlan.h>
39 #include <typedefs.h>
40 #include <osl.h>
41 #include <ctf/hndctf.h>
43 #define NFC_CTF_ENABLED (1 << 31)
44 extern ctf_t *kcih;
45 #endif /* HNDCTF */
47 #if 0
48 #define DEBUGP printk
49 #else
50 #define DEBUGP(format, args...)
51 #endif
53 static DEFINE_RWLOCK(nf_nat_lock);
55 static struct nf_conntrack_l3proto *l3proto = NULL;
57 /* Calculated at init based on memory size */
58 static unsigned int nf_nat_htable_size;
60 static struct list_head *bysource;
62 #define MAX_IP_NAT_PROTO 256
63 static struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO];
65 static inline struct nf_nat_protocol *
66 __nf_nat_proto_find(u_int8_t protonum)
68 return rcu_dereference(nf_nat_protos[protonum]);
71 struct nf_nat_protocol *
72 nf_nat_proto_find_get(u_int8_t protonum)
74 struct nf_nat_protocol *p;
76 rcu_read_lock();
77 p = __nf_nat_proto_find(protonum);
78 if (!try_module_get(p->me))
79 p = &nf_nat_unknown_protocol;
80 rcu_read_unlock();
82 return p;
84 EXPORT_SYMBOL_GPL(nf_nat_proto_find_get);
86 void
87 nf_nat_proto_put(struct nf_nat_protocol *p)
89 module_put(p->me);
91 EXPORT_SYMBOL_GPL(nf_nat_proto_put);
93 /* We keep an extra hash for each conntrack, for fast searching. */
94 static inline unsigned int
95 hash_by_src(const struct nf_conntrack_tuple *tuple)
97 unsigned int hash;
99 /* Original src, to ensure we map it consistently if poss. */
100 hash = jhash_3words((__force u32)tuple->src.u3.ip,
101 (__force u32)tuple->src.u.all,
102 tuple->dst.protonum, 0);
103 return ((u64)hash * nf_nat_htable_size) >> 32;
106 #ifdef HNDCTF
107 bool
108 ip_conntrack_is_ipc_allowed(struct sk_buff *skb, u_int32_t hooknum)
110 struct net_device *dev;
112 if (!CTF_ENAB(kcih))
113 return FALSE;
115 if (hooknum == NF_IP_PRE_ROUTING) {
116 dev = skb->dev;
117 if (dev->priv_flags & IFF_802_1Q_VLAN)
118 dev = VLAN_DEV_INFO(dev)->real_dev;
120 /* Add ipc entry if packet is received on ctf enabled interface
121 * and the packet is not a defrag'd one.
123 if (ctf_isenabled(kcih, dev) && (skb->len <= dev->mtu))
124 skb->nfcache |= NFC_CTF_ENABLED;
127 /* Add the cache entries only if the device has registered and
128 * enabled ctf.
130 if (skb->nfcache & NFC_CTF_ENABLED)
131 return TRUE;
133 return FALSE;
136 void
137 ip_conntrack_ipct_add(struct sk_buff *skb, u_int32_t hooknum,
138 struct nf_conn *ct, enum ip_conntrack_info ci,
139 struct nf_conntrack_tuple *manip)
141 ctf_ipc_t ipc_entry;
142 struct hh_cache *hh;
143 struct ethhdr *eth;
144 struct iphdr *iph;
145 struct tcphdr *tcph;
146 u_int32_t daddr;
147 struct rtable *rt;
148 struct nf_conn_help *help;
150 if ((skb == NULL) || (ct == NULL))
151 return;
153 /* We only add cache entires for non-helper connections and at
154 * pre or post routing hooks.
156 help = nfct_help(ct);
157 if ((help && help->helper) ||
158 ((hooknum != NF_IP_PRE_ROUTING) && (hooknum != NF_IP_POST_ROUTING)))
159 return;
161 /* Add ipc entries for connections in established state only */
162 if ((ci != IP_CT_ESTABLISHED) && (ci != (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)))
163 return;
165 iph = ip_hdr(skb);
166 if (((iph->protocol != IPPROTO_TCP) ||
167 ((ct->proto.tcp.state >= TCP_CONNTRACK_FIN_WAIT) &&
168 (ct->proto.tcp.state <= TCP_CONNTRACK_LAST_ACK))) &&
169 (iph->protocol != IPPROTO_UDP))
170 return;
172 /* Do route lookup for alias address if we are doing DNAT in this
173 * direction.
175 daddr = iph->daddr;
176 if ((manip != NULL) && (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST))
177 daddr = manip->dst.u3.ip;
179 /* Find the destination interface */
180 if (skb->dst == NULL)
181 ip_route_input(skb, daddr, iph->saddr, iph->tos, skb->dev);
183 /* Ensure the packet belongs to a forwarding connection and it is
184 * destined to an unicast address.
186 rt = (struct rtable *)skb->dst;
187 if ((rt == NULL) || (rt->u.dst.input != ip_forward) ||
188 (rt->rt_type != RTN_UNICAST) || (rt->u.dst.neighbour == NULL) ||
189 ((rt->u.dst.neighbour->nud_state &
190 (NUD_PERMANENT|NUD_REACHABLE|NUD_STALE|NUD_DELAY|NUD_PROBE)) == 0))
191 return;
193 skb->dev = skb->dst->dev;
195 memset(&ipc_entry, 0, sizeof(ipc_entry));
197 hh = skb->dst->hh;
198 if (hh != NULL) {
199 eth = (struct ethhdr *)(((unsigned char *)hh->hh_data) + 2);
200 memcpy(ipc_entry.dhost.octet, eth->h_dest, ETH_ALEN);
201 memcpy(ipc_entry.shost.octet, eth->h_source, ETH_ALEN);
202 } else {
203 memcpy(ipc_entry.dhost.octet, rt->u.dst.neighbour->ha, ETH_ALEN);
204 memcpy(ipc_entry.shost.octet, skb->dst->dev->dev_addr, ETH_ALEN);
207 tcph = ((struct tcphdr *)(((__u8 *)iph) + (iph->ihl << 2)));
209 /* Add ctf ipc entry for this direction */
210 ipc_entry.tuple.sip = iph->saddr;
211 ipc_entry.tuple.dip = iph->daddr;
212 ipc_entry.tuple.proto = iph->protocol;
213 ipc_entry.tuple.sp = tcph->source;
214 ipc_entry.tuple.dp = tcph->dest;
216 ipc_entry.live = 0;
217 ipc_entry.hits = 0;
218 ipc_entry.next = NULL;
220 /* For vlan interfaces fill the vlan id and the tag/untag actions */
221 if (skb->dst->dev->priv_flags & IFF_802_1Q_VLAN) {
222 ipc_entry.txif = (void *)(VLAN_DEV_INFO(skb->dst->dev)->real_dev);
223 ipc_entry.vid = VLAN_DEV_INFO(skb->dst->dev)->vlan_id;
224 ipc_entry.action = ((VLAN_DEV_INFO(skb->dst->dev)->flags & 1) ?
225 CTF_ACTION_TAG : CTF_ACTION_UNTAG);
226 } else {
227 ipc_entry.txif = skb->dst->dev;
228 ipc_entry.action = CTF_ACTION_UNTAG;
231 /* Update the manip ip and port */
232 if (manip != NULL) {
233 if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) {
234 ipc_entry.nat[0].ip = manip->src.u3.ip;
235 ipc_entry.nat[0].port = manip->src.u.tcp.port;
236 ipc_entry.action |= CTF_ACTION_SNAT;
237 } else {
238 ipc_entry.nat[1].ip = manip->dst.u3.ip;
239 ipc_entry.nat[1].port = manip->dst.u.tcp.port;
240 ipc_entry.action |= CTF_ACTION_DNAT;
244 #ifdef DEBUG
245 printk("%s: Adding ipc entry for %x %x %d %d %d\n", __FUNCTION__,
246 ipc_entry.tuple.sip, ipc_entry.tuple.dip,
247 ipc_entry.tuple.proto, ipc_entry.tuple.sp,
248 ipc_entry.tuple.dp);
249 printk("sa %02x:%02x:%02x:%02x:%02x:%02x\n",
250 ipc_entry.shost.octet[0], ipc_entry.shost.octet[1],
251 ipc_entry.shost.octet[2], ipc_entry.shost.octet[3],
252 ipc_entry.shost.octet[4], ipc_entry.shost.octet[5]);
253 printk("da %02x:%02x:%02x:%02x:%02x:%02x\n",
254 ipc_entry.dhost.octet[0], ipc_entry.dhost.octet[1],
255 ipc_entry.dhost.octet[2], ipc_entry.dhost.octet[3],
256 ipc_entry.dhost.octet[4], ipc_entry.dhost.octet[5]);
257 printk("vid: %d action %x\n", ipc_entry.vid, ipc_entry.action);
258 if (manip != NULL)
259 printk("manip_ip: %x manip_port %x\n",
260 ipc_entry.nat[HOOK2MANIP(hooknum)].ip,
261 ipc_entry.nat[HOOK2MANIP(hooknum)].port);
262 printk("txif: %s\n", ((struct net_device *)ipc_entry.txif)->name);
263 #endif
265 ctf_ipc_add(kcih, &ipc_entry);
267 /* Update the attributes flag to indicate a CTF conn */
268 ct->ctf_flags |= CTF_FLAGS_CACHED;
272 ip_conntrack_ipct_delete(struct nf_conn *ct, int ct_timeout)
274 ctf_ipc_t *ipct;
275 struct nf_conntrack_tuple *orig, *repl;
277 if (!CTF_ENAB(kcih))
278 return (0);
280 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
282 if ((orig->dst.protonum != IPPROTO_TCP) && (orig->dst.protonum != IPPROTO_UDP))
283 return (0);
285 repl = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
287 /* If the refresh counter of ipc entry is non zero, it indicates
288 * that the packet transfer is active and we should not delete
289 * the conntrack entry.
291 if (ct_timeout) {
292 ipct = ctf_ipc_lkup(kcih, orig->src.u3.ip, orig->dst.u3.ip,
293 orig->dst.protonum, orig->src.u.tcp.port,
294 orig->dst.u.tcp.port);
296 /* Postpone the deletion of ct entry if there are frames
297 * flowing in this direction.
299 if ((ipct != NULL) && (ipct->live > 0)) {
300 ipct->live = 0;
301 ct->timeout.expires = jiffies + ct->expire_jiffies;
302 add_timer(&ct->timeout);
303 return (-1);
306 ipct = ctf_ipc_lkup(kcih, repl->src.u3.ip, repl->dst.u3.ip,
307 repl->dst.protonum, repl->src.u.tcp.port,
308 repl->dst.u.tcp.port);
310 if ((ipct != NULL) && (ipct->live > 0)) {
311 ipct->live = 0;
312 ct->timeout.expires = jiffies + ct->expire_jiffies;
313 add_timer(&ct->timeout);
314 return (-1);
318 /* If there are no packets over this connection for timeout period
319 * delete the entries.
321 ctf_ipc_delete(kcih, orig->src.u3.ip, orig->dst.u3.ip, orig->dst.protonum,
322 orig->src.u.tcp.port, orig->dst.u.tcp.port);
324 ctf_ipc_delete(kcih, repl->src.u3.ip, repl->dst.u3.ip, repl->dst.protonum,
325 repl->src.u.tcp.port, repl->dst.u.tcp.port);
327 #ifdef DEBUG
328 printk("%s: Deleting the tuple %x %x %d %d %d\n",
329 __FUNCTION__, orig->src.u3.ip, orig->dst.u3.ip, orig->dst.protonum,
330 orig->src.u.tcp.port, orig->dst.u.tcp.port);
331 printk("%s: Deleting the tuple %x %x %d %d %d\n",
332 __FUNCTION__, repl->dst.u3.ip, repl->src.u3.ip, repl->dst.protonum,
333 repl->dst.u.tcp.port, repl->src.u.tcp.port);
334 #endif
336 return (0);
338 #endif /* HNDCTF */
340 /* Noone using conntrack by the time this called. */
341 static void nf_nat_cleanup_conntrack(struct nf_conn *conn)
343 struct nf_conn_nat *nat;
344 if (!(conn->status & IPS_NAT_DONE_MASK))
345 return;
347 nat = nfct_nat(conn);
348 write_lock_bh(&nf_nat_lock);
349 list_del(&nat->info.bysource);
350 write_unlock_bh(&nf_nat_lock);
352 /* Detach from cone list */
353 ipt_cone_cleanup_conntrack(nat);
356 /* Is this tuple already taken? (not by us) */
358 nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
359 const struct nf_conn *ignored_conntrack)
361 /* Conntrack tracking doesn't keep track of outgoing tuples; only
362 incoming ones. NAT means they don't have a fixed mapping,
363 so we invert the tuple and look for the incoming reply.
365 We could keep a separate hash if this proves too slow. */
366 struct nf_conntrack_tuple reply;
368 nf_ct_invert_tuplepr(&reply, tuple);
369 return nf_conntrack_tuple_taken(&reply, ignored_conntrack);
371 EXPORT_SYMBOL(nf_nat_used_tuple);
373 /* If we source map this tuple so reply looks like reply_tuple, will
374 * that meet the constraints of range. */
375 static int
376 in_range(const struct nf_conntrack_tuple *tuple,
377 const struct nf_nat_range *range)
379 struct nf_nat_protocol *proto;
380 int ret = 0;
382 /* If we are supposed to map IPs, then we must be in the
383 range specified, otherwise let this drag us onto a new src IP. */
384 if (range->flags & IP_NAT_RANGE_MAP_IPS) {
385 if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) ||
386 ntohl(tuple->src.u3.ip) > ntohl(range->max_ip))
387 return 0;
390 rcu_read_lock();
391 proto = __nf_nat_proto_find(tuple->dst.protonum);
392 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
393 proto->in_range(tuple, IP_NAT_MANIP_SRC,
394 &range->min, &range->max))
395 ret = 1;
396 rcu_read_unlock();
398 return ret;
401 static inline int
402 same_src(const struct nf_conn *ct,
403 const struct nf_conntrack_tuple *tuple)
405 const struct nf_conntrack_tuple *t;
407 t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
408 return (t->dst.protonum == tuple->dst.protonum &&
409 t->src.u3.ip == tuple->src.u3.ip &&
410 t->src.u.all == tuple->src.u.all);
413 /* Only called for SRC manip */
414 static int
415 find_appropriate_src(const struct nf_conntrack_tuple *tuple,
416 struct nf_conntrack_tuple *result,
417 const struct nf_nat_range *range)
419 unsigned int h = hash_by_src(tuple);
420 struct nf_conn_nat *nat;
421 struct nf_conn *ct;
423 read_lock_bh(&nf_nat_lock);
424 list_for_each_entry(nat, &bysource[h], info.bysource) {
425 ct = (struct nf_conn *)((char *)nat - offsetof(struct nf_conn, data));
426 if (same_src(ct, tuple)) {
427 /* Copy source part from reply tuple. */
428 nf_ct_invert_tuplepr(result,
429 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
430 result->dst = tuple->dst;
432 if (in_range(result, range)) {
433 read_unlock_bh(&nf_nat_lock);
434 return 1;
438 read_unlock_bh(&nf_nat_lock);
439 return 0;
442 /* For [FUTURE] fragmentation handling, we want the least-used
443 src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus
444 if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
445 1-65535, we don't do pro-rata allocation based on ports; we choose
446 the ip with the lowest src-ip/dst-ip/proto usage.
448 static void
449 find_best_ips_proto(struct nf_conntrack_tuple *tuple,
450 const struct nf_nat_range *range,
451 const struct nf_conn *ct,
452 enum nf_nat_manip_type maniptype)
454 __be32 *var_ipp;
455 /* Host order */
456 u_int32_t minip, maxip, j;
458 /* No IP mapping? Do nothing. */
459 if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
460 return;
462 if (maniptype == IP_NAT_MANIP_SRC)
463 var_ipp = &tuple->src.u3.ip;
464 else
465 var_ipp = &tuple->dst.u3.ip;
467 /* Fast path: only one choice. */
468 if (range->min_ip == range->max_ip) {
469 *var_ipp = range->min_ip;
470 return;
473 /* Hashing source and destination IPs gives a fairly even
474 * spread in practice (if there are a small number of IPs
475 * involved, there usually aren't that many connections
476 * anyway). The consistency means that servers see the same
477 * client coming from the same IP (some Internet Banking sites
478 * like this), even across reboots. */
479 minip = ntohl(range->min_ip);
480 maxip = ntohl(range->max_ip);
481 j = jhash_2words((__force u32)tuple->src.u3.ip,
482 (__force u32)tuple->dst.u3.ip, 0);
483 j = ((u64)j * (maxip - minip + 1)) >> 32;
484 *var_ipp = htonl(minip + j);
487 /* Manipulate the tuple into the range given. For NF_IP_POST_ROUTING,
488 * we change the source to map into the range. For NF_IP_PRE_ROUTING
489 * and NF_IP_LOCAL_OUT, we change the destination to map into the
490 * range. It might not be possible to get a unique tuple, but we try.
491 * At worst (or if we race), we will end up with a final duplicate in
492 * __ip_conntrack_confirm and drop the packet. */
493 static void
494 get_unique_tuple(struct nf_conntrack_tuple *tuple,
495 const struct nf_conntrack_tuple *orig_tuple,
496 const struct nf_nat_range *range,
497 struct nf_conn *ct,
498 enum nf_nat_manip_type maniptype)
500 struct nf_nat_protocol *proto;
502 /* 1) If this srcip/proto/src-proto-part is currently mapped,
503 and that same mapping gives a unique tuple within the given
504 range, use that.
506 This is only required for source (ie. NAT/masq) mappings.
507 So far, we don't do local source mappings, so multiple
508 manips not an issue. */
509 if (maniptype == IP_NAT_MANIP_SRC) {
510 if (find_appropriate_src(orig_tuple, tuple, range)) {
511 DEBUGP("get_unique_tuple: Found current src map\n");
512 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
513 if (!nf_nat_used_tuple(tuple, ct))
514 return;
518 /* 2) Select the least-used IP/proto combination in the given
519 range. */
520 *tuple = *orig_tuple;
521 find_best_ips_proto(tuple, range, ct, maniptype);
523 /* 3) The per-protocol part of the manip is made to map into
524 the range to make a unique tuple. */
526 rcu_read_lock();
527 proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
529 /* Change protocol info to have some randomization */
530 if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
531 proto->unique_tuple(tuple, range, maniptype, ct);
532 goto out;
535 /* Only bother mapping if it's not already in range and unique */
536 if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
537 proto->in_range(tuple, maniptype, &range->min, &range->max)) &&
538 !nf_nat_used_tuple(tuple, ct))
539 goto out;
541 /* Last change: get protocol to try to obtain unique tuple. */
542 proto->unique_tuple(tuple, range, maniptype, ct);
543 out:
544 rcu_read_unlock();
547 unsigned int
548 nf_nat_setup_info(struct nf_conn *ct,
549 const struct nf_nat_range *range,
550 unsigned int hooknum)
552 struct nf_conntrack_tuple curr_tuple, new_tuple;
553 struct nf_conn_nat *nat = nfct_nat(ct);
554 struct nf_nat_info *info = &nat->info;
555 int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);
556 enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
558 NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
559 hooknum == NF_IP_POST_ROUTING ||
560 hooknum == NF_IP_LOCAL_IN ||
561 hooknum == NF_IP_LOCAL_OUT);
562 BUG_ON(nf_nat_initialized(ct, maniptype));
564 /* What we've got will look like inverse of reply. Normally
565 this is what is in the conntrack, except for prior
566 manipulations (future optimization: if num_manips == 0,
567 orig_tp =
568 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
569 nf_ct_invert_tuplepr(&curr_tuple,
570 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
572 get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);
574 if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) {
575 struct nf_conntrack_tuple reply;
577 /* Alter conntrack table so will recognize replies. */
578 nf_ct_invert_tuplepr(&reply, &new_tuple);
579 nf_conntrack_alter_reply(ct, &reply);
581 /* Non-atomic: we own this at the moment. */
582 if (maniptype == IP_NAT_MANIP_SRC)
583 ct->status |= IPS_SRC_NAT;
584 else
585 ct->status |= IPS_DST_NAT;
588 /* Place in source hash if this is the first time. */
589 if (have_to_hash) {
590 unsigned int srchash;
592 srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
593 write_lock_bh(&nf_nat_lock);
594 list_add(&info->bysource, &bysource[srchash]);
595 write_unlock_bh(&nf_nat_lock);
598 /* It's done. */
599 if (maniptype == IP_NAT_MANIP_DST)
600 set_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
601 else
602 set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
604 return NF_ACCEPT;
606 EXPORT_SYMBOL(nf_nat_setup_info);
608 /* Returns true if succeeded. */
609 static int
610 manip_pkt(u_int16_t proto,
611 struct sk_buff **pskb,
612 unsigned int iphdroff,
613 const struct nf_conntrack_tuple *target,
614 enum nf_nat_manip_type maniptype)
616 struct iphdr *iph;
617 struct nf_nat_protocol *p;
619 if (!skb_make_writable(pskb, iphdroff + sizeof(*iph)))
620 return 0;
622 iph = (void *)(*pskb)->data + iphdroff;
624 /* Manipulate protcol part. */
626 /* rcu_read_lock()ed by nf_hook_slow */
627 p = __nf_nat_proto_find(proto);
628 if (!p->manip_pkt(pskb, iphdroff, target, maniptype))
629 return 0;
631 iph = (void *)(*pskb)->data + iphdroff;
633 if (maniptype == IP_NAT_MANIP_SRC) {
634 nf_csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
635 iph->saddr = target->src.u3.ip;
636 } else {
637 nf_csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
638 iph->daddr = target->dst.u3.ip;
640 return 1;
643 #if defined(CONFIG_BCM_NAT) || defined(CONFIG_BCM_NAT_MODULE)
645 bcm_manip_pkt(u_int16_t proto,
646 struct sk_buff **pskb,
647 unsigned int iphdroff,
648 const struct nf_conntrack_tuple *target,
649 enum nf_nat_manip_type maniptype)
651 return manip_pkt(proto, pskb, iphdroff, target, maniptype);
653 #endif
655 /* Do packet manipulations according to nf_nat_setup_info. */
656 unsigned int nf_nat_packet(struct nf_conn *ct,
657 enum ip_conntrack_info ctinfo,
658 unsigned int hooknum,
659 struct sk_buff **pskb)
661 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
662 unsigned long statusbit;
663 enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);
664 #ifdef HNDCTF
665 bool enabled = ip_conntrack_is_ipc_allowed(*pskb, hooknum);
666 #endif /* HNDCTF */
668 if (mtype == IP_NAT_MANIP_SRC)
669 statusbit = IPS_SRC_NAT;
670 else
671 statusbit = IPS_DST_NAT;
673 /* Invert if this is reply dir. */
674 if (dir == IP_CT_DIR_REPLY)
675 statusbit ^= IPS_NAT_MASK;
677 /* Non-atomic: these bits don't change. */
678 if (ct->status & statusbit) {
679 struct nf_conntrack_tuple target;
681 /* We are aiming to look like inverse of other direction. */
682 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
683 #ifdef HNDCTF
684 if (enabled)
685 ip_conntrack_ipct_add(*pskb, hooknum, ct, ctinfo, &target);
686 #endif /* HNDCTF */
687 if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype))
688 return NF_DROP;
689 } else {
690 #ifdef HNDCTF
691 if (enabled)
692 ip_conntrack_ipct_add(*pskb, hooknum, ct, ctinfo, NULL);
693 #endif /* HNDCTF */
696 return NF_ACCEPT;
698 EXPORT_SYMBOL_GPL(nf_nat_packet);
700 /* Dir is direction ICMP is coming from (opposite to packet it contains) */
701 int nf_nat_icmp_reply_translation(struct nf_conn *ct,
702 enum ip_conntrack_info ctinfo,
703 unsigned int hooknum,
704 struct sk_buff **pskb)
706 struct {
707 struct icmphdr icmp;
708 struct iphdr ip;
709 } *inside;
710 struct nf_conntrack_l4proto *l4proto;
711 struct nf_conntrack_tuple inner, target;
712 int hdrlen = ip_hdrlen(*pskb);
713 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
714 unsigned long statusbit;
715 enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
717 if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
718 return 0;
720 inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
722 /* We're actually going to mangle it beyond trivial checksum
723 adjustment, so make sure the current checksum is correct. */
724 if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0))
725 return 0;
727 /* Must be RELATED */
728 NF_CT_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
729 (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
731 /* Redirects on non-null nats must be dropped, else they'll
732 start talking to each other without our translation, and be
733 confused... --RR */
734 if (inside->icmp.type == ICMP_REDIRECT) {
735 /* If NAT isn't finished, assume it and drop. */
736 if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
737 return 0;
739 if (ct->status & IPS_NAT_MASK)
740 return 0;
743 DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
744 *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
746 /* rcu_read_lock()ed by nf_hook_slow */
747 l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
749 if (!nf_ct_get_tuple(*pskb,
750 ip_hdrlen(*pskb) + sizeof(struct icmphdr),
751 (ip_hdrlen(*pskb) +
752 sizeof(struct icmphdr) + inside->ip.ihl * 4),
753 (u_int16_t)AF_INET,
754 inside->ip.protocol,
755 &inner, l3proto, l4proto))
756 return 0;
758 /* Change inner back to look like incoming packet. We do the
759 opposite manip on this hook to normal, because it might not
760 pass all hooks (locally-generated ICMP). Consider incoming
761 packet: PREROUTING (DST manip), routing produces ICMP, goes
762 through POSTROUTING (which must correct the DST manip). */
763 if (!manip_pkt(inside->ip.protocol, pskb,
764 ip_hdrlen(*pskb) + sizeof(inside->icmp),
765 &ct->tuplehash[!dir].tuple,
766 !manip))
767 return 0;
769 if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
770 /* Reloading "inside" here since manip_pkt inner. */
771 inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
772 inside->icmp.checksum = 0;
773 inside->icmp.checksum =
774 csum_fold(skb_checksum(*pskb, hdrlen,
775 (*pskb)->len - hdrlen, 0));
778 /* Change outer to look the reply to an incoming packet
779 * (proto 0 means don't invert per-proto part). */
780 if (manip == IP_NAT_MANIP_SRC)
781 statusbit = IPS_SRC_NAT;
782 else
783 statusbit = IPS_DST_NAT;
785 /* Invert if this is reply dir. */
786 if (dir == IP_CT_DIR_REPLY)
787 statusbit ^= IPS_NAT_MASK;
789 if (ct->status & statusbit) {
790 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
791 if (!manip_pkt(0, pskb, 0, &target, manip))
792 return 0;
795 return 1;
797 EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
799 /* Protocol registration. */
800 int nf_nat_protocol_register(struct nf_nat_protocol *proto)
802 int ret = 0;
804 write_lock_bh(&nf_nat_lock);
805 if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) {
806 ret = -EBUSY;
807 goto out;
809 rcu_assign_pointer(nf_nat_protos[proto->protonum], proto);
810 out:
811 write_unlock_bh(&nf_nat_lock);
812 return ret;
814 EXPORT_SYMBOL(nf_nat_protocol_register);
816 /* Noone stores the protocol anywhere; simply delete it. */
817 void nf_nat_protocol_unregister(struct nf_nat_protocol *proto)
819 write_lock_bh(&nf_nat_lock);
820 rcu_assign_pointer(nf_nat_protos[proto->protonum],
821 &nf_nat_unknown_protocol);
822 write_unlock_bh(&nf_nat_lock);
823 synchronize_rcu();
825 EXPORT_SYMBOL(nf_nat_protocol_unregister);
827 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
829 nf_nat_port_range_to_nfattr(struct sk_buff *skb,
830 const struct nf_nat_range *range)
832 NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16),
833 &range->min.tcp.port);
834 NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16),
835 &range->max.tcp.port);
837 return 0;
839 nfattr_failure:
840 return -1;
842 EXPORT_SYMBOL_GPL(nf_nat_port_nfattr_to_range);
845 nf_nat_port_nfattr_to_range(struct nfattr *tb[], struct nf_nat_range *range)
847 int ret = 0;
849 /* we have to return whether we actually parsed something or not */
851 if (tb[CTA_PROTONAT_PORT_MIN-1]) {
852 ret = 1;
853 range->min.tcp.port =
854 *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]);
857 if (!tb[CTA_PROTONAT_PORT_MAX-1]) {
858 if (ret)
859 range->max.tcp.port = range->min.tcp.port;
860 } else {
861 ret = 1;
862 range->max.tcp.port =
863 *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]);
866 return ret;
868 EXPORT_SYMBOL_GPL(nf_nat_port_range_to_nfattr);
869 #endif
871 static int __init nf_nat_init(void)
873 size_t i;
875 /* Leave them the same for the moment. */
876 nf_nat_htable_size = nf_conntrack_htable_size;
878 /* One vmalloc for both hash tables */
879 bysource = vmalloc(sizeof(struct list_head) * nf_nat_htable_size);
880 if (!bysource)
881 return -ENOMEM;
883 /* Sew in builtin protocols. */
884 write_lock_bh(&nf_nat_lock);
885 for (i = 0; i < MAX_IP_NAT_PROTO; i++)
886 rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol);
887 rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp);
888 rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp);
889 rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp);
890 write_unlock_bh(&nf_nat_lock);
892 for (i = 0; i < nf_nat_htable_size; i++) {
893 INIT_LIST_HEAD(&bysource[i]);
896 /* FIXME: Man, this is a hack. <SIGH> */
897 NF_CT_ASSERT(rcu_dereference(nf_conntrack_destroyed) == NULL);
898 rcu_assign_pointer(nf_conntrack_destroyed, nf_nat_cleanup_conntrack);
900 /* Initialize fake conntrack so that NAT will skip it */
901 nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
903 l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
904 return 0;
907 /* Clear NAT section of all conntracks, in case we're loaded again. */
908 static int clean_nat(struct nf_conn *i, void *data)
910 struct nf_conn_nat *nat = nfct_nat(i);
912 if (!nat)
913 return 0;
914 memset(nat, 0, sizeof(*nat));
915 i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
916 return 0;
919 static void __exit nf_nat_cleanup(void)
921 nf_ct_iterate_cleanup(&clean_nat, NULL);
922 rcu_assign_pointer(nf_conntrack_destroyed, NULL);
923 synchronize_rcu();
924 vfree(bysource);
925 nf_ct_l3proto_put(l3proto);
928 MODULE_LICENSE("GPL");
930 module_init(nf_nat_init);
931 module_exit(nf_nat_cleanup);