1 /* Modified by Broadcom Corp. Portions Copyright (c) Broadcom Corp, 2012. */
2 /* Connection state tracking for netfilter. This is separated from,
3 but required by, the NAT layer; it can also be used by an iptables
6 /* (C) 1999-2001 Paul `Rusty' Russell
7 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
8 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
15 #include <linux/types.h>
16 #include <linux/netfilter.h>
17 #include <linux/module.h>
18 #include <linux/sched.h>
19 #include <linux/skbuff.h>
20 #include <linux/proc_fs.h>
21 #include <linux/vmalloc.h>
22 #include <linux/stddef.h>
23 #include <linux/slab.h>
24 #include <linux/random.h>
25 #include <linux/jhash.h>
26 #include <linux/err.h>
27 #include <linux/percpu.h>
28 #include <linux/moduleparam.h>
29 #include <linux/notifier.h>
30 #include <linux/kernel.h>
31 #include <linux/netdevice.h>
32 #include <linux/socket.h>
34 #include <linux/nsproxy.h>
35 #include <linux/rculist_nulls.h>
37 #include <net/netfilter/nf_conntrack.h>
38 #include <net/netfilter/nf_conntrack_l3proto.h>
39 #include <net/netfilter/nf_conntrack_l4proto.h>
40 #include <net/netfilter/nf_conntrack_expect.h>
41 #include <net/netfilter/nf_conntrack_helper.h>
42 #include <net/netfilter/nf_conntrack_core.h>
43 #include <net/netfilter/nf_conntrack_extend.h>
44 #include <net/netfilter/nf_conntrack_acct.h>
45 #include <net/netfilter/nf_conntrack_ecache.h>
46 #include <net/netfilter/nf_conntrack_zones.h>
47 #include <net/netfilter/nf_nat.h>
48 #include <net/netfilter/nf_nat_core.h>
50 #define NF_CONNTRACK_VERSION "0.5.0"
54 #include <linux/if_vlan.h>
55 #include <linux/if_pppox.h>
58 #include <linux/tcp.h>
61 #include <linux/ipv6.h>
63 #include <net/ip6_route.h>
64 #define IPVERSION_IS_4(ipver) ((ipver) == 4)
66 #define IPVERSION_IS_4(ipver) 1
67 #endif /* CONFIG_IPV6 */
70 #include <net/route.h>
73 #include <ctf/hndctf.h>
75 #define NFC_CTF_ENABLED (1 << 31)
78 int (*nfnetlink_parse_nat_setup_hook
)(struct nf_conn
*ct
,
79 enum nf_nat_manip_type manip
,
80 const struct nlattr
*attr
) __read_mostly
;
81 EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook
);
83 DEFINE_SPINLOCK(nf_conntrack_lock
);
84 EXPORT_SYMBOL_GPL(nf_conntrack_lock
);
86 unsigned int nf_conntrack_htable_size __read_mostly
;
87 EXPORT_SYMBOL_GPL(nf_conntrack_htable_size
);
89 unsigned int nf_conntrack_max __read_mostly
;
90 EXPORT_SYMBOL_GPL(nf_conntrack_max
);
92 DEFINE_PER_CPU(struct nf_conn
, nf_conntrack_untracked
);
93 EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked
);
97 * Display an IP address in readable format.
100 ip_conntrack_is_ipc_allowed(struct sk_buff
*skb
, u_int32_t hooknum
)
102 struct net_device
*dev
;
107 if (hooknum
== NF_INET_PRE_ROUTING
|| hooknum
== NF_INET_POST_ROUTING
) {
109 if (dev
->priv_flags
& IFF_802_1Q_VLAN
)
110 dev
= vlan_dev_real_dev(dev
);
112 /* Add ipc entry if packet is received on ctf enabled interface
113 * and the packet is not a defrag'd one.
115 if (ctf_isenabled(kcih
, dev
) && (skb
->len
<= dev
->mtu
))
116 skb
->nfcache
|= NFC_CTF_ENABLED
;
119 /* Add the cache entries only if the device has registered and
122 if (skb
->nfcache
& NFC_CTF_ENABLED
)
129 ip_conntrack_ipct_add(struct sk_buff
*skb
, u_int32_t hooknum
,
130 struct nf_conn
*ct
, enum ip_conntrack_info ci
,
131 struct nf_conntrack_tuple
*manip
)
139 struct nf_conn_help
*help
;
140 enum ip_conntrack_dir dir
;
141 uint8 ipver
, protocol
;
143 struct ipv6hdr
*ip6h
= NULL
;
144 #endif /* CONFIG_IPV6 */
147 if ((skb
== NULL
) || (ct
== NULL
))
150 /* Check CTF enabled */
151 if (!ip_conntrack_is_ipc_allowed(skb
, hooknum
))
153 /* We only add cache entires for non-helper connections and at
154 * pre or post routing hooks.
156 help
= nfct_help(ct
);
157 if ((help
&& help
->helper
) || (ct
->ctf_flags
& CTF_FLAGS_EXCLUDED
) ||
158 ((hooknum
!= NF_INET_PRE_ROUTING
) && (hooknum
!= NF_INET_POST_ROUTING
)))
162 ipver
= iph
->version
;
164 /* Support both IPv4 and IPv6 */
166 tcph
= ((struct tcphdr
*)(((__u8
*)iph
) + (iph
->ihl
<< 2)));
167 protocol
= iph
->protocol
;
170 else if (ipver
== 6) {
171 ip6h
= (struct ipv6hdr
*)iph
;
172 tcph
= (struct tcphdr
*)ctf_ipc_lkup_l4proto(kcih
, ip6h
, &protocol
);
176 #endif /* CONFIG_IPV6 */
180 /* Only TCP and UDP are supported */
181 if (protocol
== IPPROTO_TCP
) {
182 /* Add ipc entries for connections in established state only */
183 if ((ci
!= IP_CT_ESTABLISHED
) && (ci
!= (IP_CT_ESTABLISHED
+IP_CT_IS_REPLY
)))
186 if (ct
->proto
.tcp
.state
>= TCP_CONNTRACK_FIN_WAIT
&&
187 ct
->proto
.tcp
.state
<= TCP_CONNTRACK_TIME_WAIT
)
190 else if (protocol
!= IPPROTO_UDP
)
193 dir
= CTINFO2DIR(ci
);
194 if (ct
->ctf_flags
& (1 << dir
))
197 /* Do route lookup for alias address if we are doing DNAT in this
200 if (skb_dst(skb
) == NULL
) {
201 /* Find the destination interface */
202 if (IPVERSION_IS_4(ipver
)) {
205 if ((manip
!= NULL
) && (HOOK2MANIP(hooknum
) == IP_NAT_MANIP_DST
))
206 daddr
= manip
->dst
.u3
.ip
;
209 ip_route_input(skb
, daddr
, iph
->saddr
, iph
->tos
, skb
->dev
);
213 ip6_route_input(skb
);
214 #endif /* CONFIG_IPV6 */
217 /* Ensure the packet belongs to a forwarding connection and it is
218 * destined to an unicast address.
220 rt
= (struct rtable
*)skb_dst(skb
);
222 nud_flags
= NUD_PERMANENT
| NUD_REACHABLE
| NUD_STALE
| NUD_DELAY
| NUD_PROBE
;
224 if ((skb_dst(skb
) != NULL
) && (skb_dst(skb
)->dev
!= NULL
) &&
225 (skb_dst(skb
)->dev
->flags
& IFF_POINTOPOINT
))
226 nud_flags
|= NUD_NOARP
;
229 if ((rt
== NULL
) || (
231 !IPVERSION_IS_4(ipver
) ?
232 ((rt
->dst
.input
!= ip6_forward
) ||
233 !(ipv6_addr_type(&ip6h
->daddr
) & IPV6_ADDR_UNICAST
)) :
234 #endif /* CONFIG_IPV6 */
235 ((rt
->dst
.input
!= ip_forward
) || (rt
->rt_type
!= RTN_UNICAST
))) ||
236 (rt
->dst
.neighbour
== NULL
) ||
237 ((rt
->dst
.neighbour
->nud_state
& nud_flags
) == 0))
240 memset(&ipc_entry
, 0, sizeof(ipc_entry
));
242 /* Init the neighboring sender address */
243 memcpy(ipc_entry
.sa
.octet
, eth_hdr(skb
)->h_source
, ETH_ALEN
);
245 /* If the packet is received on a bridge device then save
246 * the bridge cache entry pointer in the ip cache entry.
247 * This will be referenced in the data path to update the
248 * live counter of brc entry whenever a received packet
249 * matches corresponding ipc entry matches.
251 if ((skb
->dev
!= NULL
) && ctf_isbridge(kcih
, skb
->dev
))
252 ipc_entry
.brcp
= ctf_brc_lkup(kcih
, eth_hdr(skb
)->h_source
);
254 hh
= skb_dst(skb
)->hh
;
256 eth
= (struct ethhdr
*)(((unsigned char *)hh
->hh_data
) + 2);
257 memcpy(ipc_entry
.dhost
.octet
, eth
->h_dest
, ETH_ALEN
);
258 memcpy(ipc_entry
.shost
.octet
, eth
->h_source
, ETH_ALEN
);
260 memcpy(ipc_entry
.dhost
.octet
, rt
->dst
.neighbour
->ha
, ETH_ALEN
);
261 memcpy(ipc_entry
.shost
.octet
, skb_dst(skb
)->dev
->dev_addr
, ETH_ALEN
);
264 /* Add ctf ipc entry for this direction */
265 if (IPVERSION_IS_4(ipver
)) {
266 ipc_entry
.tuple
.sip
[0] = iph
->saddr
;
267 ipc_entry
.tuple
.dip
[0] = iph
->daddr
;
270 memcpy(ipc_entry
.tuple
.sip
, &ip6h
->saddr
, sizeof(ipc_entry
.tuple
.sip
));
271 memcpy(ipc_entry
.tuple
.dip
, &ip6h
->daddr
, sizeof(ipc_entry
.tuple
.dip
));
272 #endif /* CONFIG_IPV6 */
274 ipc_entry
.tuple
.proto
= protocol
;
275 ipc_entry
.tuple
.sp
= tcph
->source
;
276 ipc_entry
.tuple
.dp
= tcph
->dest
;
278 ipc_entry
.next
= NULL
;
280 /* For vlan interfaces fill the vlan id and the tag/untag actions */
282 if(!CTFQOS_ULDL_DIFFIF(kcih
)){
283 if (skb_dst(skb
)->dev
->priv_flags
& IFF_802_1Q_VLAN
) {
284 ipc_entry
.txif
= (void *)vlan_dev_real_dev(skb_dst(skb
)->dev
);
285 ipc_entry
.vid
= vlan_dev_vlan_id(skb_dst(skb
)->dev
);
286 ipc_entry
.action
= ((vlan_dev_vlan_flags(skb_dst(skb
)->dev
) & 1) ?
287 CTF_ACTION_TAG
: CTF_ACTION_UNTAG
);
289 ipc_entry
.txif
= skb_dst(skb
)->dev
;
290 ipc_entry
.action
= CTF_ACTION_UNTAG
;
294 ipc_entry
.txif
= skb_dst(skb
)->dev
;
295 ipc_entry
.action
= CTF_ACTION_UNTAG
;
298 const char *vars
= NULL
, *dev_name
= NULL
;
300 /* For pppoe interfaces fill the session id and header add/del actions */
301 if (skb_dst(skb
)->dev
->flags
& IFF_POINTOPOINT
) {
302 /* Transmit interface and sid will be populated by pppoe module */
303 ipc_entry
.ppp_ifp
= skb_dst(skb
)->dev
;
304 dev_name
= skb_dst(skb
)->dev
->name
;
305 } else if (skb
->dev
->flags
& IFF_POINTOPOINT
) {
306 ipc_entry
.ppp_ifp
= skb
->dev
;
307 dev_name
= skb
->dev
->name
;
309 ipc_entry
.ppp_ifp
= NULL
;
310 ipc_entry
.pppoe_sid
= 0xffff;
313 if (ipc_entry
.ppp_ifp
){
314 struct net_device
*pppox_tx_dev
=NULL
;
318 if (ppp_get_conn_pkt_info(ipc_entry
.ppp_ifp
,&ctfppp
))
321 if(ctfppp
.psk
.pppox_protocol
== PX_PROTO_OE
){
322 if (skb_dst(skb
)->dev
->flags
& IFF_POINTOPOINT
) {
323 ipc_entry
.action
|= CTF_ACTION_PPPOE_ADD
;
324 pppox_tx_dev
= ctfppp
.psk
.po
->pppoe_dev
;
325 memcpy(ipc_entry
.dhost
.octet
, ctfppp
.psk
.dhost
.octet
, ETH_ALEN
);
326 memcpy(ipc_entry
.shost
.octet
, ctfppp
.psk
.po
->pppoe_dev
->dev_addr
, ETH_ALEN
);
329 ipc_entry
.action
|= CTF_ACTION_PPPOE_DEL
;
331 ipc_entry
.pppoe_sid
= ctfppp
.pppox_id
;
336 /* For vlan interfaces fill the vlan id and the tag/untag actions */
338 if(!CTFQOS_ULDL_DIFFIF(kcih
)){
339 if (pppox_tx_dev
->priv_flags
& IFF_802_1Q_VLAN
) {
340 ipc_entry
.txif
= (void *)vlan_dev_real_dev(pppox_tx_dev
);
341 ipc_entry
.vid
= vlan_dev_vlan_id(pppox_tx_dev
);
342 ipc_entry
.action
|= ((vlan_dev_vlan_flags(pppox_tx_dev
) & 1) ?
343 CTF_ACTION_TAG
: CTF_ACTION_UNTAG
);
345 ipc_entry
.txif
= pppox_tx_dev
;
346 ipc_entry
.action
|= CTF_ACTION_UNTAG
;
350 ipc_entry
.txif
= pppox_tx_dev
;
351 ipc_entry
.action
|= CTF_ACTION_UNTAG
;
357 #endif /* CTF_PPPOE */
359 if (kcih
->ipc_suspend
) {
360 /* The default action is suspend */
361 ipc_entry
.action
|= CTF_ACTION_SUSPEND
;
364 /* Copy the DSCP value. ECN bits must be cleared. */
365 if (IPVERSION_IS_4(ipver
))
366 ipc_entry
.tos
= IPV4_TOS(iph
);
369 ipc_entry
.tos
= IPV6_TRAFFIC_CLASS(ip6h
);
370 #endif /* CONFIG_IPV6 */
371 ipc_entry
.tos
&= IPV4_TOS_DSCP_MASK
;
373 ipc_entry
.action
|= CTF_ACTION_TOS
;
375 #ifdef CONFIG_NF_CONNTRACK_MARK
376 /* Initialize the mark for this connection */
378 ipc_entry
.mark
.value
= ct
->mark
;
379 ipc_entry
.action
|= CTF_ACTION_MARK
;
381 #endif /* CONFIG_NF_CONNTRACK_MARK */
383 /* Update the manip ip and port */
385 if (HOOK2MANIP(hooknum
) == IP_NAT_MANIP_SRC
) {
386 ipc_entry
.nat
.ip
= manip
->src
.u3
.ip
;
387 ipc_entry
.nat
.port
= manip
->src
.u
.tcp
.port
;
388 ipc_entry
.action
|= CTF_ACTION_SNAT
;
390 ipc_entry
.nat
.ip
= manip
->dst
.u3
.ip
;
391 ipc_entry
.nat
.port
= manip
->dst
.u
.tcp
.port
;
392 ipc_entry
.action
|= CTF_ACTION_DNAT
;
396 /* Do bridge cache lookup to determine outgoing interface
397 * and any vlan tagging actions if needed.
399 if(!CTFQOS_ULDL_DIFFIF(kcih
)){
400 if (ctf_isbridge(kcih
, ipc_entry
.txif
)) {
403 brcp
= ctf_brc_lkup(kcih
, ipc_entry
.dhost
.octet
);
408 ipc_entry
.action
|= brcp
->action
;
409 ipc_entry
.txif
= brcp
->txifp
;
410 ipc_entry
.vid
= brcp
->vid
;
415 if (ctf_isbridge(kcih
, ipc_entry
.txif
)) {
418 brcp
= ctf_brc_lkup(kcih
, ipc_entry
.dhost
.octet
);
423 ipc_entry
.action
|= brcp
->action
;
425 ipc_entry
.txif
= brcp
->txvifp
;
426 ipc_entry
.action
&= ~CTF_ACTION_TAG
;
427 ipc_entry
.action
|= CTF_ACTION_UNTAG
;
430 ipc_entry
.txif
= brcp
->txifp
;
431 ipc_entry
.vid
= brcp
->vid
;
438 if (IPVERSION_IS_4(ipver
))
439 printk("%s: Adding ipc entry for [%d]%u.%u.%u.%u:%u - %u.%u.%u.%u:%u\n", __FUNCTION__
,
440 ipc_entry
.tuple
.proto
,
441 NIPQUAD(ipc_entry
.tuple
.sip
[0]), ntohs(ipc_entry
.tuple
.sp
),
442 NIPQUAD(ipc_entry
.tuple
.dip
[0]), ntohs(ipc_entry
.tuple
.dp
));
445 printk("\n%s: Adding ipc entry for [%d]\n"
446 "%08x.%08x.%08x.%08x:%u => %08x.%08x.%08x.%08x:%u\n",
447 __FUNCTION__
, ipc_entry
.tuple
.proto
,
448 ntohl(ipc_entry
.tuple
.sip
[0]), ntohl(ipc_entry
.tuple
.sip
[1]),
449 ntohl(ipc_entry
.tuple
.sip
[2]), ntohl(ipc_entry
.tuple
.sip
[3]),
450 ntohs(ipc_entry
.tuple
.sp
),
451 ntohl(ipc_entry
.tuple
.dip
[0]), ntohl(ipc_entry
.tuple
.dip
[1]),
452 ntohl(ipc_entry
.tuple
.dip
[2]), ntohl(ipc_entry
.tuple
.dip
[3]),
453 ntohs(ipc_entry
.tuple
.dp
));
454 #endif /* CONFIG_IPV6 */
455 printk("sa %02x:%02x:%02x:%02x:%02x:%02x\n",
456 ipc_entry
.shost
.octet
[0], ipc_entry
.shost
.octet
[1],
457 ipc_entry
.shost
.octet
[2], ipc_entry
.shost
.octet
[3],
458 ipc_entry
.shost
.octet
[4], ipc_entry
.shost
.octet
[5]);
459 printk("da %02x:%02x:%02x:%02x:%02x:%02x\n",
460 ipc_entry
.dhost
.octet
[0], ipc_entry
.dhost
.octet
[1],
461 ipc_entry
.dhost
.octet
[2], ipc_entry
.dhost
.octet
[3],
462 ipc_entry
.dhost
.octet
[4], ipc_entry
.dhost
.octet
[5]);
463 printk("[%d] vid: %d action %x\n", hooknum
, ipc_entry
.vid
, ipc_entry
.action
);
465 printk("manip_ip: %u.%u.%u.%u manip_port %u\n",
466 NIPQUAD(ipc_entry
.nat
.ip
), ntohs(ipc_entry
.nat
.port
));
467 printk("txif: %s\n", ((struct net_device
*)ipc_entry
.txif
)->name
);
468 if (ipc_entry
.ppp_ifp
) printk("pppif: %s\n", ((struct net_device
*)ipc_entry
.ppp_ifp
)->name
);
471 ctf_ipc_add(kcih
, &ipc_entry
, !IPVERSION_IS_4(ipver
));
473 /* Update the attributes flag to indicate a CTF conn */
474 ct
->ctf_flags
|= (CTF_FLAGS_CACHED
| (1 << dir
));
478 ip_conntrack_ipct_delete(struct nf_conn
*ct
, int ct_timeout
)
481 struct nf_conntrack_tuple
*orig
, *repl
;
482 ctf_ipc_t orig_ipct
, repl_ipct
;
489 orig
= &ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].tuple
;
491 if ((orig
->dst
.protonum
!= IPPROTO_TCP
) && (orig
->dst
.protonum
!= IPPROTO_UDP
))
494 repl
= &ct
->tuplehash
[IP_CT_DIR_REPLY
].tuple
;
497 v6
= (orig
->src
.l3num
== AF_INET6
);
498 ipaddr_sz
= (v6
) ? sizeof(struct in6_addr
) : sizeof(struct in_addr
);
501 ipaddr_sz
= sizeof(struct in_addr
);
502 #endif /* CONFIG_IPV6 */
504 memset(&orig_ipct
, 0, sizeof(orig_ipct
));
505 memcpy(orig_ipct
.tuple
.sip
, &orig
->src
.u3
.ip
, ipaddr_sz
);
506 memcpy(orig_ipct
.tuple
.dip
, &orig
->dst
.u3
.ip
, ipaddr_sz
);
507 orig_ipct
.tuple
.proto
= orig
->dst
.protonum
;
508 orig_ipct
.tuple
.sp
= orig
->src
.u
.tcp
.port
;
509 orig_ipct
.tuple
.dp
= orig
->dst
.u
.tcp
.port
;
511 memset(&repl_ipct
, 0, sizeof(repl_ipct
));
512 memcpy(repl_ipct
.tuple
.sip
, &repl
->src
.u3
.ip
, ipaddr_sz
);
513 memcpy(repl_ipct
.tuple
.dip
, &repl
->dst
.u3
.ip
, ipaddr_sz
);
514 repl_ipct
.tuple
.proto
= repl
->dst
.protonum
;
515 repl_ipct
.tuple
.sp
= repl
->src
.u
.tcp
.port
;
516 repl_ipct
.tuple
.dp
= repl
->dst
.u
.tcp
.port
;
518 /* If the refresh counter of ipc entry is non zero, it indicates
519 * that the packet transfer is active and we should not delete
520 * the conntrack entry.
523 ipct
= ctf_ipc_lkup(kcih
, &orig_ipct
, v6
);
525 /* Postpone the deletion of ct entry if there are frames
526 * flowing in this direction.
528 if ((ipct
!= NULL
) && (ipct
->live
> 0)) {
530 ct
->timeout
.expires
= jiffies
+ ct
->expire_jiffies
;
531 add_timer(&ct
->timeout
);
535 ipct
= ctf_ipc_lkup(kcih
, &repl_ipct
, v6
);
537 if ((ipct
!= NULL
) && (ipct
->live
> 0)) {
539 ct
->timeout
.expires
= jiffies
+ ct
->expire_jiffies
;
540 add_timer(&ct
->timeout
);
545 /* If there are no packets over this connection for timeout period
546 * delete the entries.
548 ctf_ipc_delete(kcih
, &orig_ipct
, v6
);
550 ctf_ipc_delete(kcih
, &repl_ipct
, v6
);
553 printk("%s: Deleting the tuple %x %x %d %d %d\n",
554 __FUNCTION__
, orig
->src
.u3
.ip
, orig
->dst
.u3
.ip
, orig
->dst
.protonum
,
555 orig
->src
.u
.tcp
.port
, orig
->dst
.u
.tcp
.port
);
556 printk("%s: Deleting the tuple %x %x %d %d %d\n",
557 __FUNCTION__
, repl
->dst
.u3
.ip
, repl
->src
.u3
.ip
, repl
->dst
.protonum
,
558 repl
->dst
.u
.tcp
.port
, repl
->src
.u
.tcp
.port
);
566 static int nf_conntrack_hash_rnd_initted
;
567 static unsigned int nf_conntrack_hash_rnd
;
569 static u_int32_t BCMFASTPATH_HOST
__hash_conntrack(const struct nf_conntrack_tuple
*tuple
,
570 u16 zone
, unsigned int size
, unsigned int rnd
)
575 /* The direction must be ignored, so we hash everything up to the
576 * destination ports (which is a multiple of 4) and treat the last
577 * three bytes manually.
579 n
= (sizeof(tuple
->src
) + sizeof(tuple
->dst
.u3
)) / sizeof(u32
);
580 h
= jhash2((u32
*)tuple
, n
,
581 zone
^ rnd
^ (((__force __u16
)tuple
->dst
.u
.all
<< 16) |
582 tuple
->dst
.protonum
));
584 return ((u64
)h
* size
) >> 32;
587 static inline u_int32_t
hash_conntrack(const struct net
*net
, u16 zone
,
588 const struct nf_conntrack_tuple
*tuple
)
590 return __hash_conntrack(tuple
, zone
, net
->ct
.htable_size
,
591 nf_conntrack_hash_rnd
);
595 nf_ct_get_tuple(const struct sk_buff
*skb
,
597 unsigned int dataoff
,
600 struct nf_conntrack_tuple
*tuple
,
601 const struct nf_conntrack_l3proto
*l3proto
,
602 const struct nf_conntrack_l4proto
*l4proto
)
604 memset(tuple
, 0, sizeof(*tuple
));
606 tuple
->src
.l3num
= l3num
;
607 if (l3proto
->pkt_to_tuple(skb
, nhoff
, tuple
) == 0)
610 tuple
->dst
.protonum
= protonum
;
611 tuple
->dst
.dir
= IP_CT_DIR_ORIGINAL
;
613 return l4proto
->pkt_to_tuple(skb
, dataoff
, tuple
);
615 EXPORT_SYMBOL_GPL(nf_ct_get_tuple
);
617 bool nf_ct_get_tuplepr(const struct sk_buff
*skb
, unsigned int nhoff
,
618 u_int16_t l3num
, struct nf_conntrack_tuple
*tuple
)
620 struct nf_conntrack_l3proto
*l3proto
;
621 struct nf_conntrack_l4proto
*l4proto
;
622 unsigned int protoff
;
628 l3proto
= __nf_ct_l3proto_find(l3num
);
629 ret
= l3proto
->get_l4proto(skb
, nhoff
, &protoff
, &protonum
);
630 if (ret
!= NF_ACCEPT
) {
635 l4proto
= __nf_ct_l4proto_find(l3num
, protonum
);
637 ret
= nf_ct_get_tuple(skb
, nhoff
, protoff
, l3num
, protonum
, tuple
,
643 EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr
);
646 nf_ct_invert_tuple(struct nf_conntrack_tuple
*inverse
,
647 const struct nf_conntrack_tuple
*orig
,
648 const struct nf_conntrack_l3proto
*l3proto
,
649 const struct nf_conntrack_l4proto
*l4proto
)
651 memset(inverse
, 0, sizeof(*inverse
));
653 inverse
->src
.l3num
= orig
->src
.l3num
;
654 if (l3proto
->invert_tuple(inverse
, orig
) == 0)
657 inverse
->dst
.dir
= !orig
->dst
.dir
;
659 inverse
->dst
.protonum
= orig
->dst
.protonum
;
660 return l4proto
->invert_tuple(inverse
, orig
);
662 EXPORT_SYMBOL_GPL(nf_ct_invert_tuple
);
665 clean_from_lists(struct nf_conn
*ct
)
667 pr_debug("clean_from_lists(%p)\n", ct
);
668 hlist_nulls_del_rcu(&ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].hnnode
);
669 hlist_nulls_del_rcu(&ct
->tuplehash
[IP_CT_DIR_REPLY
].hnnode
);
671 /* Destroy all pending expectations */
672 nf_ct_remove_expectations(ct
);
676 destroy_conntrack(struct nf_conntrack
*nfct
)
678 struct nf_conn
*ct
= (struct nf_conn
*)nfct
;
679 struct net
*net
= nf_ct_net(ct
);
680 struct nf_conntrack_l4proto
*l4proto
;
682 pr_debug("destroy_conntrack(%p)\n", ct
);
683 NF_CT_ASSERT(atomic_read(&nfct
->use
) == 0);
684 NF_CT_ASSERT(!timer_pending(&ct
->timeout
));
687 ip_conntrack_ipct_delete(ct
, 0);
689 /* To make sure we don't get any weird locking issues here:
690 * destroy_conntrack() MUST NOT be called with a write lock
691 * to nf_conntrack_lock!!! -HW */
693 l4proto
= __nf_ct_l4proto_find(nf_ct_l3num(ct
), nf_ct_protonum(ct
));
694 if (l4proto
&& l4proto
->destroy
)
695 l4proto
->destroy(ct
);
699 spin_lock_bh(&nf_conntrack_lock
);
700 /* Expectations will have been removed in clean_from_lists,
701 * except TFTP can create an expectation on the first packet,
702 * before connection is in the list, so we need to clean here,
704 nf_ct_remove_expectations(ct
);
706 /* We overload first tuple to link into unconfirmed list. */
707 if (!nf_ct_is_confirmed(ct
)) {
708 BUG_ON(hlist_nulls_unhashed(&ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].hnnode
));
709 hlist_nulls_del_rcu(&ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].hnnode
);
712 NF_CT_STAT_INC(net
, delete);
713 spin_unlock_bh(&nf_conntrack_lock
);
716 nf_ct_put(ct
->master
);
718 pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct
);
719 nf_conntrack_free(ct
);
722 void nf_ct_delete_from_lists(struct nf_conn
*ct
)
724 struct net
*net
= nf_ct_net(ct
);
726 nf_ct_helper_destroy(ct
);
727 spin_lock_bh(&nf_conntrack_lock
);
728 /* Inside lock so preempt is disabled on module removal path.
729 * Otherwise we can get spurious warnings. */
730 NF_CT_STAT_INC(net
, delete_list
);
731 clean_from_lists(ct
);
732 spin_unlock_bh(&nf_conntrack_lock
);
734 EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists
);
736 static void death_by_event(unsigned long ul_conntrack
)
738 struct nf_conn
*ct
= (void *)ul_conntrack
;
739 struct net
*net
= nf_ct_net(ct
);
741 if (nf_conntrack_event(IPCT_DESTROY
, ct
) < 0) {
742 /* bad luck, let's retry again */
743 ct
->timeout
.expires
= jiffies
+
744 (random32() % net
->ct
.sysctl_events_retry_timeout
);
745 add_timer(&ct
->timeout
);
748 /* we've got the event delivered, now it's dying */
749 set_bit(IPS_DYING_BIT
, &ct
->status
);
750 spin_lock(&nf_conntrack_lock
);
751 hlist_nulls_del(&ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].hnnode
);
752 spin_unlock(&nf_conntrack_lock
);
756 void nf_ct_insert_dying_list(struct nf_conn
*ct
)
758 struct net
*net
= nf_ct_net(ct
);
760 /* add this conntrack to the dying list */
761 spin_lock_bh(&nf_conntrack_lock
);
762 hlist_nulls_add_head(&ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].hnnode
,
764 spin_unlock_bh(&nf_conntrack_lock
);
765 /* set a new timer to retry event delivery */
766 setup_timer(&ct
->timeout
, death_by_event
, (unsigned long)ct
);
767 ct
->timeout
.expires
= jiffies
+
768 (random32() % net
->ct
.sysctl_events_retry_timeout
);
769 add_timer(&ct
->timeout
);
771 EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list
);
773 static void death_by_timeout(unsigned long ul_conntrack
)
775 struct nf_conn
*ct
= (void *)ul_conntrack
;
777 /* If negative error is returned it means the entry hasn't
780 if (ip_conntrack_ipct_delete(ct
, jiffies
>= ct
->timeout
.expires
? 1 : 0) != 0)
784 if (!test_bit(IPS_DYING_BIT
, &ct
->status
) &&
785 unlikely(nf_conntrack_event(IPCT_DESTROY
, ct
) < 0)) {
786 /* destroy event was not delivered */
787 nf_ct_delete_from_lists(ct
);
788 nf_ct_insert_dying_list(ct
);
791 set_bit(IPS_DYING_BIT
, &ct
->status
);
792 nf_ct_delete_from_lists(ct
);
798 * - Caller must take a reference on returned object
799 * and recheck nf_ct_tuple_equal(tuple, &h->tuple)
801 * - Caller must lock nf_conntrack_lock before calling this function
803 struct nf_conntrack_tuple_hash
* BCMFASTPATH_HOST
804 __nf_conntrack_find(struct net
*net
, u16 zone
,
805 const struct nf_conntrack_tuple
*tuple
)
807 struct nf_conntrack_tuple_hash
*h
;
808 struct hlist_nulls_node
*n
;
809 unsigned int hash
= hash_conntrack(net
, zone
, tuple
);
811 /* Disable BHs the entire time since we normally need to disable them
812 * at least once for the stats anyway.
816 hlist_nulls_for_each_entry_rcu(h
, n
, &net
->ct
.hash
[hash
], hnnode
) {
817 if (nf_ct_tuple_equal(tuple
, &h
->tuple
) &&
818 nf_ct_zone(nf_ct_tuplehash_to_ctrack(h
)) == zone
) {
819 NF_CT_STAT_INC(net
, found
);
823 NF_CT_STAT_INC(net
, searched
);
826 * if the nulls value we got at the end of this lookup is
827 * not the expected one, we must restart lookup.
828 * We probably met an item that was moved to another chain.
830 if (get_nulls_value(n
) != hash
) {
831 NF_CT_STAT_INC(net
, search_restart
);
838 EXPORT_SYMBOL_GPL(__nf_conntrack_find
);
840 /* Find a connection corresponding to a tuple. */
841 struct nf_conntrack_tuple_hash
* BCMFASTPATH_HOST
842 nf_conntrack_find_get(struct net
*net
, u16 zone
,
843 const struct nf_conntrack_tuple
*tuple
)
845 struct nf_conntrack_tuple_hash
*h
;
850 h
= __nf_conntrack_find(net
, zone
, tuple
);
852 ct
= nf_ct_tuplehash_to_ctrack(h
);
853 if (unlikely(nf_ct_is_dying(ct
) ||
854 !atomic_inc_not_zero(&ct
->ct_general
.use
)))
857 if (unlikely(!nf_ct_tuple_equal(tuple
, &h
->tuple
) ||
858 nf_ct_zone(ct
) != zone
)) {
868 EXPORT_SYMBOL_GPL(nf_conntrack_find_get
);
870 static void __nf_conntrack_hash_insert(struct nf_conn
*ct
,
872 unsigned int repl_hash
)
874 struct net
*net
= nf_ct_net(ct
);
876 hlist_nulls_add_head_rcu(&ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].hnnode
,
877 &net
->ct
.hash
[hash
]);
878 hlist_nulls_add_head_rcu(&ct
->tuplehash
[IP_CT_DIR_REPLY
].hnnode
,
879 &net
->ct
.hash
[repl_hash
]);
882 void nf_conntrack_hash_insert(struct nf_conn
*ct
)
884 struct net
*net
= nf_ct_net(ct
);
885 unsigned int hash
, repl_hash
;
888 zone
= nf_ct_zone(ct
);
889 hash
= hash_conntrack(net
, zone
, &ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].tuple
);
890 repl_hash
= hash_conntrack(net
, zone
, &ct
->tuplehash
[IP_CT_DIR_REPLY
].tuple
);
892 __nf_conntrack_hash_insert(ct
, hash
, repl_hash
);
894 EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert
);
896 /* Confirm a connection given skb; places it in hash table */
898 __nf_conntrack_confirm(struct sk_buff
*skb
)
900 unsigned int hash
, repl_hash
;
901 struct nf_conntrack_tuple_hash
*h
;
903 struct nf_conn_help
*help
;
904 struct hlist_nulls_node
*n
;
905 enum ip_conntrack_info ctinfo
;
909 ct
= nf_ct_get(skb
, &ctinfo
);
912 /* ipt_REJECT uses nf_conntrack_attach to attach related
913 ICMP/TCP RST packets in other direction. Actual packet
914 which created connection will be IP_CT_NEW or for an
915 expected connection, IP_CT_RELATED. */
916 if (CTINFO2DIR(ctinfo
) != IP_CT_DIR_ORIGINAL
)
919 zone
= nf_ct_zone(ct
);
920 hash
= hash_conntrack(net
, zone
, &ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].tuple
);
921 repl_hash
= hash_conntrack(net
, zone
, &ct
->tuplehash
[IP_CT_DIR_REPLY
].tuple
);
923 /* We're not in hash table, and we refuse to set up related
924 connections for unconfirmed conns. But packet copies and
925 REJECT will give spurious warnings here. */
926 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
928 /* No external references means noone else could have
930 NF_CT_ASSERT(!nf_ct_is_confirmed(ct
));
931 pr_debug("Confirming conntrack %p\n", ct
);
933 spin_lock_bh(&nf_conntrack_lock
);
935 /* We have to check the DYING flag inside the lock to prevent
936 a race against nf_ct_get_next_corpse() possibly called from
937 user context, else we insert an already 'dead' hash, blocking
938 further use of that particular connection -JM */
940 if (unlikely(nf_ct_is_dying(ct
))) {
941 spin_unlock_bh(&nf_conntrack_lock
);
945 /* See if there's one in the list already, including reverse:
946 NAT could have grabbed it without realizing, since we're
947 not in the hash. If there is, we lost race. */
948 hlist_nulls_for_each_entry(h
, n
, &net
->ct
.hash
[hash
], hnnode
)
949 if (nf_ct_tuple_equal(&ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].tuple
,
951 zone
== nf_ct_zone(nf_ct_tuplehash_to_ctrack(h
)))
953 hlist_nulls_for_each_entry(h
, n
, &net
->ct
.hash
[repl_hash
], hnnode
)
954 if (nf_ct_tuple_equal(&ct
->tuplehash
[IP_CT_DIR_REPLY
].tuple
,
956 zone
== nf_ct_zone(nf_ct_tuplehash_to_ctrack(h
)))
959 /* Remove from unconfirmed list */
960 hlist_nulls_del_rcu(&ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].hnnode
);
962 /* Timer relative to confirmation time, not original
963 setting time, otherwise we'd get timer wrap in
964 weird delay cases. */
965 ct
->timeout
.expires
+= jiffies
;
966 add_timer(&ct
->timeout
);
967 atomic_inc(&ct
->ct_general
.use
);
968 set_bit(IPS_CONFIRMED_BIT
, &ct
->status
);
970 /* Since the lookup is lockless, hash insertion must be done after
971 * starting the timer and setting the CONFIRMED bit. The RCU barriers
972 * guarantee that no other CPU can find the conntrack before the above
973 * stores are visible.
975 __nf_conntrack_hash_insert(ct
, hash
, repl_hash
);
976 NF_CT_STAT_INC(net
, insert
);
977 spin_unlock_bh(&nf_conntrack_lock
);
979 help
= nfct_help(ct
);
980 if (help
&& help
->helper
)
981 nf_conntrack_event_cache(IPCT_HELPER
, ct
);
983 nf_conntrack_event_cache(master_ct(ct
) ?
984 IPCT_RELATED
: IPCT_NEW
, ct
);
988 NF_CT_STAT_INC(net
, insert_failed
);
989 spin_unlock_bh(&nf_conntrack_lock
);
992 EXPORT_SYMBOL_GPL(__nf_conntrack_confirm
);
994 /* Returns true if a connection correspondings to the tuple (required
997 nf_conntrack_tuple_taken(const struct nf_conntrack_tuple
*tuple
,
998 const struct nf_conn
*ignored_conntrack
)
1000 struct net
*net
= nf_ct_net(ignored_conntrack
);
1001 struct nf_conntrack_tuple_hash
*h
;
1002 struct hlist_nulls_node
*n
;
1004 u16 zone
= nf_ct_zone(ignored_conntrack
);
1005 unsigned int hash
= hash_conntrack(net
, zone
, tuple
);
1007 /* Disable BHs the entire time since we need to disable them at
1008 * least once for the stats anyway.
1011 hlist_nulls_for_each_entry_rcu(h
, n
, &net
->ct
.hash
[hash
], hnnode
) {
1012 ct
= nf_ct_tuplehash_to_ctrack(h
);
1013 if (ct
!= ignored_conntrack
&&
1014 nf_ct_tuple_equal(tuple
, &h
->tuple
) &&
1015 nf_ct_zone(ct
) == zone
) {
1016 NF_CT_STAT_INC(net
, found
);
1017 rcu_read_unlock_bh();
1020 NF_CT_STAT_INC(net
, searched
);
1022 rcu_read_unlock_bh();
1026 EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken
);
1028 #define NF_CT_EVICTION_RANGE 8
1030 /* There's a small race here where we may free a just-assured
1031 connection. Too bad: we're in trouble anyway. */
1032 static noinline
int early_drop(struct net
*net
, unsigned int hash
)
1034 /* Use oldest entry, which is roughly LRU */
1035 struct nf_conntrack_tuple_hash
*h
;
1036 struct nf_conn
*ct
= NULL
, *tmp
;
1037 struct hlist_nulls_node
*n
;
1038 unsigned int i
, cnt
= 0;
1042 for (i
= 0; i
< net
->ct
.htable_size
; i
++) {
1043 hlist_nulls_for_each_entry_rcu(h
, n
, &net
->ct
.hash
[hash
],
1045 tmp
= nf_ct_tuplehash_to_ctrack(h
);
1046 if (!test_bit(IPS_ASSURED_BIT
, &tmp
->status
))
1052 if (likely(!nf_ct_is_dying(ct
) &&
1053 atomic_inc_not_zero(&ct
->ct_general
.use
)))
1059 if (cnt
>= NF_CT_EVICTION_RANGE
)
1062 hash
= (hash
+ 1) % net
->ct
.htable_size
;
1070 ip_conntrack_ipct_delete(ct
, 0);
1073 if (del_timer(&ct
->timeout
)) {
1074 death_by_timeout((unsigned long)ct
);
1076 NF_CT_STAT_INC_ATOMIC(net
, early_drop
);
1082 struct nf_conn
*nf_conntrack_alloc(struct net
*net
, u16 zone
,
1083 const struct nf_conntrack_tuple
*orig
,
1084 const struct nf_conntrack_tuple
*repl
,
1089 if (unlikely(!nf_conntrack_hash_rnd_initted
)) {
1090 get_random_bytes(&nf_conntrack_hash_rnd
,
1091 sizeof(nf_conntrack_hash_rnd
));
1092 nf_conntrack_hash_rnd_initted
= 1;
1095 /* We don't want any race condition at early drop stage */
1096 atomic_inc(&net
->ct
.count
);
1098 if (nf_conntrack_max
&&
1099 unlikely(atomic_read(&net
->ct
.count
) > nf_conntrack_max
)) {
1100 unsigned int hash
= hash_conntrack(net
, zone
, orig
);
1101 if (!early_drop(net
, hash
)) {
1102 atomic_dec(&net
->ct
.count
);
1103 if (net_ratelimit())
1105 "nf_conntrack: table full, dropping"
1107 return ERR_PTR(-ENOMEM
);
1112 * Do not use kmem_cache_zalloc(), as this cache uses
1113 * SLAB_DESTROY_BY_RCU.
1115 ct
= kmem_cache_alloc(net
->ct
.nf_conntrack_cachep
, gfp
);
1117 pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n");
1118 atomic_dec(&net
->ct
.count
);
1119 return ERR_PTR(-ENOMEM
);
1122 * Let ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.next
1123 * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged.
1125 memset(&ct
->tuplehash
[IP_CT_DIR_MAX
], 0,
1126 sizeof(*ct
) - offsetof(struct nf_conn
, tuplehash
[IP_CT_DIR_MAX
]));
1127 spin_lock_init(&ct
->lock
);
1128 ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].tuple
= *orig
;
1129 ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].hnnode
.pprev
= NULL
;
1130 ct
->tuplehash
[IP_CT_DIR_REPLY
].tuple
= *repl
;
1131 ct
->tuplehash
[IP_CT_DIR_REPLY
].hnnode
.pprev
= NULL
;
1132 /* Don't set timer yet: wait for confirmation */
1133 setup_timer(&ct
->timeout
, death_by_timeout
, (unsigned long)ct
);
1134 write_pnet(&ct
->ct_net
, net
);
1135 #ifdef CONFIG_NF_CONNTRACK_ZONES
1137 struct nf_conntrack_zone
*nf_ct_zone
;
1139 nf_ct_zone
= nf_ct_ext_add(ct
, NF_CT_EXT_ZONE
, GFP_ATOMIC
);
1142 nf_ct_zone
->id
= zone
;
1146 * changes to lookup keys must be done before setting refcnt to 1
1149 atomic_set(&ct
->ct_general
.use
, 1);
1152 #ifdef CONFIG_NF_CONNTRACK_ZONES
1154 kmem_cache_free(net
->ct
.nf_conntrack_cachep
, ct
);
1155 return ERR_PTR(-ENOMEM
);
1158 EXPORT_SYMBOL_GPL(nf_conntrack_alloc
);
1160 void nf_conntrack_free(struct nf_conn
*ct
)
1162 struct net
*net
= nf_ct_net(ct
);
1164 nf_ct_ext_destroy(ct
);
1165 atomic_dec(&net
->ct
.count
);
1167 kmem_cache_free(net
->ct
.nf_conntrack_cachep
, ct
);
1169 EXPORT_SYMBOL_GPL(nf_conntrack_free
);
1171 /* Allocate a new conntrack: we return -ENOMEM if classification
1172 failed due to stress. Otherwise it really is unclassifiable. */
1173 static struct nf_conntrack_tuple_hash
*
1174 init_conntrack(struct net
*net
, struct nf_conn
*tmpl
,
1175 const struct nf_conntrack_tuple
*tuple
,
1176 struct nf_conntrack_l3proto
*l3proto
,
1177 struct nf_conntrack_l4proto
*l4proto
,
1178 struct sk_buff
*skb
,
1179 unsigned int dataoff
)
1182 struct nf_conn_help
*help
;
1183 struct nf_conntrack_tuple repl_tuple
;
1184 struct nf_conntrack_ecache
*ecache
;
1185 struct nf_conntrack_expect
*exp
;
1186 u16 zone
= tmpl
? nf_ct_zone(tmpl
) : NF_CT_DEFAULT_ZONE
;
1188 if (!nf_ct_invert_tuple(&repl_tuple
, tuple
, l3proto
, l4proto
)) {
1189 pr_debug("Can't invert tuple.\n");
1193 ct
= nf_conntrack_alloc(net
, zone
, tuple
, &repl_tuple
, GFP_ATOMIC
);
1195 pr_debug("Can't allocate conntrack.\n");
1196 return (struct nf_conntrack_tuple_hash
*)ct
;
1199 if (!l4proto
->new(ct
, skb
, dataoff
)) {
1200 nf_conntrack_free(ct
);
1201 pr_debug("init conntrack: can't track with proto module\n");
1205 nf_ct_acct_ext_add(ct
, GFP_ATOMIC
);
1207 ecache
= tmpl
? nf_ct_ecache_find(tmpl
) : NULL
;
1208 nf_ct_ecache_ext_add(ct
, ecache
? ecache
->ctmask
: 0,
1209 ecache
? ecache
->expmask
: 0,
1212 spin_lock_bh(&nf_conntrack_lock
);
1213 exp
= nf_ct_find_expectation(net
, zone
, tuple
);
1215 pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
1217 /* Welcome, Mr. Bond. We've been expecting you... */
1218 __set_bit(IPS_EXPECTED_BIT
, &ct
->status
);
1219 ct
->master
= exp
->master
;
1221 help
= nf_ct_helper_ext_add(ct
, GFP_ATOMIC
);
1223 rcu_assign_pointer(help
->helper
, exp
->helper
);
1226 #ifdef CONFIG_NF_CONNTRACK_MARK
1227 ct
->mark
= exp
->master
->mark
;
1229 #ifdef CONFIG_NF_CONNTRACK_SECMARK
1230 ct
->secmark
= exp
->master
->secmark
;
1232 nf_conntrack_get(&ct
->master
->ct_general
);
1233 NF_CT_STAT_INC(net
, expect_new
);
1235 __nf_ct_try_assign_helper(ct
, tmpl
, GFP_ATOMIC
);
1236 NF_CT_STAT_INC(net
, new);
1239 /* Overload tuple linked list to put us in unconfirmed list. */
1240 hlist_nulls_add_head_rcu(&ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].hnnode
,
1241 &net
->ct
.unconfirmed
);
1243 spin_unlock_bh(&nf_conntrack_lock
);
1247 exp
->expectfn(ct
, exp
);
1248 nf_ct_expect_put(exp
);
1251 return &ct
->tuplehash
[IP_CT_DIR_ORIGINAL
];
1254 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
1255 static inline struct nf_conn
*
1256 resolve_normal_ct(struct net
*net
, struct nf_conn
*tmpl
,
1257 struct sk_buff
*skb
,
1258 unsigned int dataoff
,
1261 struct nf_conntrack_l3proto
*l3proto
,
1262 struct nf_conntrack_l4proto
*l4proto
,
1264 enum ip_conntrack_info
*ctinfo
)
1266 struct nf_conntrack_tuple tuple
;
1267 struct nf_conntrack_tuple_hash
*h
;
1269 u16 zone
= tmpl
? nf_ct_zone(tmpl
) : NF_CT_DEFAULT_ZONE
;
1271 if (!nf_ct_get_tuple(skb
, skb_network_offset(skb
),
1272 dataoff
, l3num
, protonum
, &tuple
, l3proto
,
1274 pr_debug("resolve_normal_ct: Can't get tuple\n");
1278 /* look for tuple match */
1279 h
= nf_conntrack_find_get(net
, zone
, &tuple
);
1281 h
= init_conntrack(net
, tmpl
, &tuple
, l3proto
, l4proto
,
1288 ct
= nf_ct_tuplehash_to_ctrack(h
);
1290 /* It exists; we have (non-exclusive) reference. */
1291 if (NF_CT_DIRECTION(h
) == IP_CT_DIR_REPLY
) {
1292 *ctinfo
= IP_CT_ESTABLISHED
+ IP_CT_IS_REPLY
;
1293 /* Please set reply bit if this packet OK */
1296 /* Once we've had two way comms, always ESTABLISHED. */
1297 if (test_bit(IPS_SEEN_REPLY_BIT
, &ct
->status
)) {
1298 pr_debug("nf_conntrack_in: normal packet for %p\n", ct
);
1299 *ctinfo
= IP_CT_ESTABLISHED
;
1300 } else if (test_bit(IPS_EXPECTED_BIT
, &ct
->status
)) {
1301 pr_debug("nf_conntrack_in: related packet for %p\n",
1303 *ctinfo
= IP_CT_RELATED
;
1305 pr_debug("nf_conntrack_in: new packet for %p\n", ct
);
1306 *ctinfo
= IP_CT_NEW
;
1310 skb
->nfct
= &ct
->ct_general
;
1311 skb
->nfctinfo
= *ctinfo
;
1315 unsigned int BCMFASTPATH_HOST
1316 nf_conntrack_in(struct net
*net
, u_int8_t pf
, unsigned int hooknum
,
1317 struct sk_buff
*skb
)
1319 struct nf_conn
*ct
, *tmpl
= NULL
;
1320 enum ip_conntrack_info ctinfo
;
1321 struct nf_conntrack_l3proto
*l3proto
;
1322 struct nf_conntrack_l4proto
*l4proto
;
1323 unsigned int dataoff
;
1329 /* Previously seen (loopback or untracked)? Ignore. */
1330 tmpl
= (struct nf_conn
*)skb
->nfct
;
1331 if (!nf_ct_is_template(tmpl
)) {
1332 NF_CT_STAT_INC_ATOMIC(net
, ignore
);
1338 /* rcu_read_lock()ed by nf_hook_slow */
1339 l3proto
= __nf_ct_l3proto_find(pf
);
1340 ret
= l3proto
->get_l4proto(skb
, skb_network_offset(skb
),
1341 &dataoff
, &protonum
);
1343 pr_debug("not prepared to track yet or error occured\n");
1344 NF_CT_STAT_INC_ATOMIC(net
, error
);
1345 NF_CT_STAT_INC_ATOMIC(net
, invalid
);
1350 l4proto
= __nf_ct_l4proto_find(pf
, protonum
);
1352 /* It may be an special packet, error, unclean...
1353 * inverse of the return code tells to the netfilter
1354 * core what to do with the packet. */
1355 if (l4proto
->error
!= NULL
) {
1356 ret
= l4proto
->error(net
, tmpl
, skb
, dataoff
, &ctinfo
,
1359 NF_CT_STAT_INC_ATOMIC(net
, error
);
1360 NF_CT_STAT_INC_ATOMIC(net
, invalid
);
1366 ct
= resolve_normal_ct(net
, tmpl
, skb
, dataoff
, pf
, protonum
,
1367 l3proto
, l4proto
, &set_reply
, &ctinfo
);
1369 /* Not valid part of a connection */
1370 NF_CT_STAT_INC_ATOMIC(net
, invalid
);
1376 /* Too stressed to deal. */
1377 NF_CT_STAT_INC_ATOMIC(net
, drop
);
1382 NF_CT_ASSERT(skb
->nfct
);
1384 ret
= l4proto
->packet(ct
, skb
, dataoff
, ctinfo
, pf
, hooknum
);
1386 /* Invalid: inverse of the return code tells
1387 * the netfilter core what to do */
1388 pr_debug("nf_conntrack_in: Can't track with proto module\n");
1389 nf_conntrack_put(skb
->nfct
);
1391 NF_CT_STAT_INC_ATOMIC(net
, invalid
);
1392 if (ret
== -NF_DROP
)
1393 NF_CT_STAT_INC_ATOMIC(net
, drop
);
1398 if (set_reply
&& !test_and_set_bit(IPS_SEEN_REPLY_BIT
, &ct
->status
))
1399 nf_conntrack_event_cache(IPCT_REPLY
, ct
);
1406 EXPORT_SYMBOL_GPL(nf_conntrack_in
);
1408 bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple
*inverse
,
1409 const struct nf_conntrack_tuple
*orig
)
1414 ret
= nf_ct_invert_tuple(inverse
, orig
,
1415 __nf_ct_l3proto_find(orig
->src
.l3num
),
1416 __nf_ct_l4proto_find(orig
->src
.l3num
,
1417 orig
->dst
.protonum
));
1421 EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr
);
1423 /* Alter reply tuple (maybe alter helper). This is for NAT, and is
1424 implicitly racy: see __nf_conntrack_confirm */
1425 void nf_conntrack_alter_reply(struct nf_conn
*ct
,
1426 const struct nf_conntrack_tuple
*newreply
)
1428 struct nf_conn_help
*help
= nfct_help(ct
);
1430 /* Should be unconfirmed, so not in hash table yet */
1431 NF_CT_ASSERT(!nf_ct_is_confirmed(ct
));
1433 pr_debug("Altering reply tuple of %p to ", ct
);
1434 nf_ct_dump_tuple(newreply
);
1436 ct
->tuplehash
[IP_CT_DIR_REPLY
].tuple
= *newreply
;
1437 if (ct
->master
|| (help
&& !hlist_empty(&help
->expectations
)))
1441 __nf_ct_try_assign_helper(ct
, NULL
, GFP_ATOMIC
);
1444 EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply
);
1446 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
1447 void __nf_ct_refresh_acct(struct nf_conn
*ct
,
1448 enum ip_conntrack_info ctinfo
,
1449 const struct sk_buff
*skb
,
1450 unsigned long extra_jiffies
,
1453 NF_CT_ASSERT(ct
->timeout
.data
== (unsigned long)ct
);
1456 /* Only update if this is not a fixed timeout */
1457 if (test_bit(IPS_FIXED_TIMEOUT_BIT
, &ct
->status
))
1460 /* If not in hash table, timer will not be active yet */
1461 if (!nf_ct_is_confirmed(ct
)) {
1463 ct
->expire_jiffies
= extra_jiffies
;
1465 ct
->timeout
.expires
= extra_jiffies
;
1467 unsigned long newtime
= jiffies
+ extra_jiffies
;
1469 /* Only update the timeout if the new timeout is at least
1470 HZ jiffies from the old timeout. Need del_timer for race
1471 avoidance (may already be dying). */
1472 if (newtime
- ct
->timeout
.expires
>= HZ
)
1474 ct
->expire_jiffies
= extra_jiffies
;
1476 mod_timer_pending(&ct
->timeout
, newtime
);
1481 struct nf_conn_counter
*acct
;
1483 acct
= nf_conn_acct_find(ct
);
1485 spin_lock_bh(&ct
->lock
);
1486 acct
[CTINFO2DIR(ctinfo
)].packets
++;
1487 acct
[CTINFO2DIR(ctinfo
)].bytes
+= skb
->len
;
1488 spin_unlock_bh(&ct
->lock
);
1492 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct
);
1494 bool __nf_ct_kill_acct(struct nf_conn
*ct
,
1495 enum ip_conntrack_info ctinfo
,
1496 const struct sk_buff
*skb
,
1500 struct nf_conn_counter
*acct
;
1502 acct
= nf_conn_acct_find(ct
);
1504 spin_lock_bh(&ct
->lock
);
1505 acct
[CTINFO2DIR(ctinfo
)].packets
++;
1506 acct
[CTINFO2DIR(ctinfo
)].bytes
+=
1507 skb
->len
- skb_network_offset(skb
);
1508 spin_unlock_bh(&ct
->lock
);
1512 if (del_timer(&ct
->timeout
)) {
1513 ct
->timeout
.function((unsigned long)ct
);
1518 EXPORT_SYMBOL_GPL(__nf_ct_kill_acct
);
1520 #ifdef CONFIG_NF_CONNTRACK_ZONES
1521 static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly
= {
1522 .len
= sizeof(struct nf_conntrack_zone
),
1523 .align
= __alignof__(struct nf_conntrack_zone
),
1524 .id
= NF_CT_EXT_ZONE
,
1528 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
1530 #include <linux/netfilter/nfnetlink.h>
1531 #include <linux/netfilter/nfnetlink_conntrack.h>
1532 #include <linux/mutex.h>
1534 /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
1535 * in ip_conntrack_core, since we don't want the protocols to autoload
1536 * or depend on ctnetlink */
1537 int nf_ct_port_tuple_to_nlattr(struct sk_buff
*skb
,
1538 const struct nf_conntrack_tuple
*tuple
)
1540 NLA_PUT_BE16(skb
, CTA_PROTO_SRC_PORT
, tuple
->src
.u
.tcp
.port
);
1541 NLA_PUT_BE16(skb
, CTA_PROTO_DST_PORT
, tuple
->dst
.u
.tcp
.port
);
1547 EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nlattr
);
1549 const struct nla_policy nf_ct_port_nla_policy
[CTA_PROTO_MAX
+1] = {
1550 [CTA_PROTO_SRC_PORT
] = { .type
= NLA_U16
},
1551 [CTA_PROTO_DST_PORT
] = { .type
= NLA_U16
},
1553 EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy
);
1555 int nf_ct_port_nlattr_to_tuple(struct nlattr
*tb
[],
1556 struct nf_conntrack_tuple
*t
)
1558 if (!tb
[CTA_PROTO_SRC_PORT
] || !tb
[CTA_PROTO_DST_PORT
])
1561 t
->src
.u
.tcp
.port
= nla_get_be16(tb
[CTA_PROTO_SRC_PORT
]);
1562 t
->dst
.u
.tcp
.port
= nla_get_be16(tb
[CTA_PROTO_DST_PORT
]);
1566 EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple
);
1568 int nf_ct_port_nlattr_tuple_size(void)
1570 return nla_policy_len(nf_ct_port_nla_policy
, CTA_PROTO_MAX
+ 1);
1572 EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size
);
1575 /* Used by ipt_REJECT and ip6t_REJECT. */
1576 static void nf_conntrack_attach(struct sk_buff
*nskb
, struct sk_buff
*skb
)
1579 enum ip_conntrack_info ctinfo
;
1581 /* This ICMP is in reverse direction to the packet which caused it */
1582 ct
= nf_ct_get(skb
, &ctinfo
);
1583 if (CTINFO2DIR(ctinfo
) == IP_CT_DIR_ORIGINAL
)
1584 ctinfo
= IP_CT_RELATED
+ IP_CT_IS_REPLY
;
1586 ctinfo
= IP_CT_RELATED
;
1588 /* Attach to new skbuff, and increment count */
1589 nskb
->nfct
= &ct
->ct_general
;
1590 nskb
->nfctinfo
= ctinfo
;
1591 nf_conntrack_get(nskb
->nfct
);
1594 /* Bring out ya dead! */
1595 static struct nf_conn
*
1596 get_next_corpse(struct net
*net
, int (*iter
)(struct nf_conn
*i
, void *data
),
1597 void *data
, unsigned int *bucket
)
1599 struct nf_conntrack_tuple_hash
*h
;
1601 struct hlist_nulls_node
*n
;
1603 spin_lock_bh(&nf_conntrack_lock
);
1604 for (; *bucket
< net
->ct
.htable_size
; (*bucket
)++) {
1605 hlist_nulls_for_each_entry(h
, n
, &net
->ct
.hash
[*bucket
], hnnode
) {
1606 ct
= nf_ct_tuplehash_to_ctrack(h
);
1611 hlist_nulls_for_each_entry(h
, n
, &net
->ct
.unconfirmed
, hnnode
) {
1612 ct
= nf_ct_tuplehash_to_ctrack(h
);
1614 set_bit(IPS_DYING_BIT
, &ct
->status
);
1616 spin_unlock_bh(&nf_conntrack_lock
);
1619 atomic_inc(&ct
->ct_general
.use
);
1620 spin_unlock_bh(&nf_conntrack_lock
);
1624 void nf_ct_iterate_cleanup(struct net
*net
,
1625 int (*iter
)(struct nf_conn
*i
, void *data
),
1629 unsigned int bucket
= 0;
1631 while ((ct
= get_next_corpse(net
, iter
, data
, &bucket
)) != NULL
) {
1633 ip_conntrack_ipct_delete(ct
, 0);
1635 /* Time to push up daises... */
1636 if (del_timer(&ct
->timeout
))
1637 death_by_timeout((unsigned long)ct
);
1638 /* ... else the timer will get him soon. */
1643 EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup
);
1645 struct __nf_ct_flush_report
{
1650 static int kill_report(struct nf_conn
*i
, void *data
)
1652 struct __nf_ct_flush_report
*fr
= (struct __nf_ct_flush_report
*)data
;
1654 /* If we fail to deliver the event, death_by_timeout() will retry */
1655 if (nf_conntrack_event_report(IPCT_DESTROY
, i
,
1656 fr
->pid
, fr
->report
) < 0)
1659 /* Avoid the delivery of the destroy event in death_by_timeout(). */
1660 set_bit(IPS_DYING_BIT
, &i
->status
);
1664 static int kill_all(struct nf_conn
*i
, void *data
)
1669 void nf_ct_free_hashtable(void *hash
, int vmalloced
, unsigned int size
)
1674 free_pages((unsigned long)hash
,
1675 get_order(sizeof(struct hlist_head
) * size
));
1677 EXPORT_SYMBOL_GPL(nf_ct_free_hashtable
);
1679 void nf_conntrack_flush_report(struct net
*net
, u32 pid
, int report
)
1681 struct __nf_ct_flush_report fr
= {
1685 nf_ct_iterate_cleanup(net
, kill_report
, &fr
);
1687 EXPORT_SYMBOL_GPL(nf_conntrack_flush_report
);
1689 static void nf_ct_release_dying_list(struct net
*net
)
1691 struct nf_conntrack_tuple_hash
*h
;
1693 struct hlist_nulls_node
*n
;
1695 spin_lock_bh(&nf_conntrack_lock
);
1696 hlist_nulls_for_each_entry(h
, n
, &net
->ct
.dying
, hnnode
) {
1697 ct
= nf_ct_tuplehash_to_ctrack(h
);
1698 /* never fails to remove them, no listeners at this point */
1701 spin_unlock_bh(&nf_conntrack_lock
);
1704 static int untrack_refs(void)
1708 for_each_possible_cpu(cpu
) {
1709 struct nf_conn
*ct
= &per_cpu(nf_conntrack_untracked
, cpu
);
1711 cnt
+= atomic_read(&ct
->ct_general
.use
) - 1;
1716 static void nf_conntrack_cleanup_init_net(void)
1718 while (untrack_refs() > 0)
1721 nf_conntrack_helper_fini();
1722 nf_conntrack_proto_fini();
1723 #ifdef CONFIG_NF_CONNTRACK_ZONES
1724 nf_ct_extend_unregister(&nf_ct_zone_extend
);
1728 static void nf_conntrack_cleanup_net(struct net
*net
)
1731 nf_ct_iterate_cleanup(net
, kill_all
, NULL
);
1732 nf_ct_release_dying_list(net
);
1733 if (atomic_read(&net
->ct
.count
) != 0) {
1735 goto i_see_dead_people
;
1738 nf_ct_free_hashtable(net
->ct
.hash
, net
->ct
.hash_vmalloc
,
1739 net
->ct
.htable_size
);
1740 nf_conntrack_ecache_fini(net
);
1741 nf_conntrack_acct_fini(net
);
1742 nf_conntrack_expect_fini(net
);
1743 kmem_cache_destroy(net
->ct
.nf_conntrack_cachep
);
1744 kfree(net
->ct
.slabname
);
1745 free_percpu(net
->ct
.stat
);
1748 /* Mishearing the voices in his head, our hero wonders how he's
1749 supposed to kill the mall. */
1750 void nf_conntrack_cleanup(struct net
*net
)
1752 if (net_eq(net
, &init_net
))
1753 rcu_assign_pointer(ip_ct_attach
, NULL
);
1755 /* This makes sure all current packets have passed through
1756 netfilter framework. Roll on, two-stage module
1760 nf_conntrack_cleanup_net(net
);
1762 if (net_eq(net
, &init_net
)) {
1763 rcu_assign_pointer(nf_ct_destroy
, NULL
);
1764 nf_conntrack_cleanup_init_net();
1768 void *nf_ct_alloc_hashtable(unsigned int *sizep
, int *vmalloced
, int nulls
)
1770 struct hlist_nulls_head
*hash
;
1771 unsigned int nr_slots
, i
;
1776 BUILD_BUG_ON(sizeof(struct hlist_nulls_head
) != sizeof(struct hlist_head
));
1777 nr_slots
= *sizep
= roundup(*sizep
, PAGE_SIZE
/ sizeof(struct hlist_nulls_head
));
1778 sz
= nr_slots
* sizeof(struct hlist_nulls_head
);
1779 hash
= (void *)__get_free_pages(GFP_KERNEL
| __GFP_NOWARN
| __GFP_ZERO
,
1783 printk(KERN_WARNING
"nf_conntrack: falling back to vmalloc.\n");
1784 hash
= __vmalloc(sz
, GFP_KERNEL
| __GFP_HIGHMEM
| __GFP_ZERO
,
1789 for (i
= 0; i
< nr_slots
; i
++)
1790 INIT_HLIST_NULLS_HEAD(&hash
[i
], i
);
1794 EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable
);
1796 int nf_conntrack_set_hashsize(const char *val
, struct kernel_param
*kp
)
1798 int i
, bucket
, vmalloced
, old_vmalloced
;
1799 unsigned int hashsize
, old_size
;
1800 struct hlist_nulls_head
*hash
, *old_hash
;
1801 struct nf_conntrack_tuple_hash
*h
;
1804 if (current
->nsproxy
->net_ns
!= &init_net
)
1807 /* On boot, we can set this without any fancy locking. */
1808 if (!nf_conntrack_htable_size
)
1809 return param_set_uint(val
, kp
);
1811 hashsize
= simple_strtoul(val
, NULL
, 0);
1815 hash
= nf_ct_alloc_hashtable(&hashsize
, &vmalloced
, 1);
1819 /* Lookups in the old hash might happen in parallel, which means we
1820 * might get false negatives during connection lookup. New connections
1821 * created because of a false negative won't make it into the hash
1822 * though since that required taking the lock.
1824 spin_lock_bh(&nf_conntrack_lock
);
1825 for (i
= 0; i
< init_net
.ct
.htable_size
; i
++) {
1826 while (!hlist_nulls_empty(&init_net
.ct
.hash
[i
])) {
1827 h
= hlist_nulls_entry(init_net
.ct
.hash
[i
].first
,
1828 struct nf_conntrack_tuple_hash
, hnnode
);
1829 ct
= nf_ct_tuplehash_to_ctrack(h
);
1830 hlist_nulls_del_rcu(&h
->hnnode
);
1831 bucket
= __hash_conntrack(&h
->tuple
, nf_ct_zone(ct
),
1833 nf_conntrack_hash_rnd
);
1834 hlist_nulls_add_head_rcu(&h
->hnnode
, &hash
[bucket
]);
1837 old_size
= init_net
.ct
.htable_size
;
1838 old_vmalloced
= init_net
.ct
.hash_vmalloc
;
1839 old_hash
= init_net
.ct
.hash
;
1841 init_net
.ct
.htable_size
= nf_conntrack_htable_size
= hashsize
;
1842 init_net
.ct
.hash_vmalloc
= vmalloced
;
1843 init_net
.ct
.hash
= hash
;
1844 spin_unlock_bh(&nf_conntrack_lock
);
1846 nf_ct_free_hashtable(old_hash
, old_vmalloced
, old_size
);
1849 EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize
);
1851 module_param_call(hashsize
, nf_conntrack_set_hashsize
, param_get_uint
,
1852 &nf_conntrack_htable_size
, 0600);
1854 void nf_ct_untracked_status_or(unsigned long bits
)
1858 for_each_possible_cpu(cpu
)
1859 per_cpu(nf_conntrack_untracked
, cpu
).status
|= bits
;
1861 EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or
);
1863 static int nf_conntrack_init_init_net(void)
1868 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1869 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
1870 if (!nf_conntrack_htable_size
) {
1871 nf_conntrack_htable_size
1872 = (((totalram_pages
<< PAGE_SHIFT
) / 16384)
1873 / sizeof(struct hlist_head
));
1874 if (totalram_pages
> (1024 * 1024 * 1024 / PAGE_SIZE
))
1875 nf_conntrack_htable_size
= 16384;
1876 if (nf_conntrack_htable_size
< 32)
1877 nf_conntrack_htable_size
= 32;
1879 /* Use a max. factor of four by default to get the same max as
1880 * with the old struct list_heads. When a table size is given
1881 * we use the old value of 8 to avoid reducing the max.
1885 nf_conntrack_max
= max_factor
* nf_conntrack_htable_size
;
1887 printk(KERN_INFO
"nf_conntrack version %s (%u buckets, %d max)\n",
1888 NF_CONNTRACK_VERSION
, nf_conntrack_htable_size
,
1891 ret
= nf_conntrack_proto_init();
1895 ret
= nf_conntrack_helper_init();
1899 #ifdef CONFIG_NF_CONNTRACK_ZONES
1900 ret
= nf_ct_extend_register(&nf_ct_zone_extend
);
1904 /* Set up fake conntrack: to never be deleted, not in any hashes */
1905 for_each_possible_cpu(cpu
) {
1906 struct nf_conn
*ct
= &per_cpu(nf_conntrack_untracked
, cpu
);
1907 write_pnet(&ct
->ct_net
, &init_net
);
1908 atomic_set(&ct
->ct_general
.use
, 1);
1910 /* - and look it like as a confirmed connection */
1911 nf_ct_untracked_status_or(IPS_CONFIRMED
| IPS_UNTRACKED
);
1914 #ifdef CONFIG_NF_CONNTRACK_ZONES
1916 nf_conntrack_helper_fini();
1919 nf_conntrack_proto_fini();
1925 * We need to use special "null" values, not used in hash table
1927 #define UNCONFIRMED_NULLS_VAL ((1<<30)+0)
1928 #define DYING_NULLS_VAL ((1<<30)+1)
1930 static int nf_conntrack_init_net(struct net
*net
)
1934 atomic_set(&net
->ct
.count
, 0);
1935 INIT_HLIST_NULLS_HEAD(&net
->ct
.unconfirmed
, UNCONFIRMED_NULLS_VAL
);
1936 INIT_HLIST_NULLS_HEAD(&net
->ct
.dying
, DYING_NULLS_VAL
);
1937 net
->ct
.stat
= alloc_percpu(struct ip_conntrack_stat
);
1938 if (!net
->ct
.stat
) {
1943 net
->ct
.slabname
= kasprintf(GFP_KERNEL
, "nf_conntrack_%p", net
);
1944 if (!net
->ct
.slabname
) {
1949 net
->ct
.nf_conntrack_cachep
= kmem_cache_create(net
->ct
.slabname
,
1950 sizeof(struct nf_conn
), 0,
1951 SLAB_DESTROY_BY_RCU
, NULL
);
1952 if (!net
->ct
.nf_conntrack_cachep
) {
1953 printk(KERN_ERR
"Unable to create nf_conn slab cache\n");
1958 net
->ct
.htable_size
= nf_conntrack_htable_size
;
1959 net
->ct
.hash
= nf_ct_alloc_hashtable(&net
->ct
.htable_size
,
1960 &net
->ct
.hash_vmalloc
, 1);
1961 if (!net
->ct
.hash
) {
1963 printk(KERN_ERR
"Unable to create nf_conntrack_hash\n");
1966 ret
= nf_conntrack_expect_init(net
);
1969 ret
= nf_conntrack_acct_init(net
);
1972 ret
= nf_conntrack_ecache_init(net
);
1979 nf_conntrack_acct_fini(net
);
1981 nf_conntrack_expect_fini(net
);
1983 nf_ct_free_hashtable(net
->ct
.hash
, net
->ct
.hash_vmalloc
,
1984 net
->ct
.htable_size
);
1986 kmem_cache_destroy(net
->ct
.nf_conntrack_cachep
);
1988 kfree(net
->ct
.slabname
);
1990 free_percpu(net
->ct
.stat
);
1995 s16 (*nf_ct_nat_offset
)(const struct nf_conn
*ct
,
1996 enum ip_conntrack_dir dir
,
1998 EXPORT_SYMBOL_GPL(nf_ct_nat_offset
);
2000 int nf_conntrack_init(struct net
*net
)
2004 if (net_eq(net
, &init_net
)) {
2005 ret
= nf_conntrack_init_init_net();
2009 ret
= nf_conntrack_init_net(net
);
2013 if (net_eq(net
, &init_net
)) {
2014 /* For use by REJECT target */
2015 rcu_assign_pointer(ip_ct_attach
, nf_conntrack_attach
);
2016 rcu_assign_pointer(nf_ct_destroy
, destroy_conntrack
);
2018 /* Howto get NAT offsets */
2019 rcu_assign_pointer(nf_ct_nat_offset
, NULL
);
2024 if (net_eq(net
, &init_net
))
2025 nf_conntrack_cleanup_init_net();