bnxt_en: Expand bnxt_check_rings() to check all resources.
[linux-2.6/btrfs-unstable.git] / net / netfilter / nf_flow_table.c
blob2f5099cb85b85106406910c820e3736dc1533233
1 #include <linux/kernel.h>
2 #include <linux/init.h>
3 #include <linux/module.h>
4 #include <linux/netfilter.h>
5 #include <linux/rhashtable.h>
6 #include <linux/netdevice.h>
7 #include <net/netfilter/nf_flow_table.h>
8 #include <net/netfilter/nf_conntrack.h>
9 #include <net/netfilter/nf_conntrack_core.h>
10 #include <net/netfilter/nf_conntrack_tuple.h>
12 struct flow_offload_entry {
13 struct flow_offload flow;
14 struct nf_conn *ct;
15 struct rcu_head rcu_head;
18 struct flow_offload *
19 flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
21 struct flow_offload_entry *entry;
22 struct flow_offload *flow;
24 if (unlikely(nf_ct_is_dying(ct) ||
25 !atomic_inc_not_zero(&ct->ct_general.use)))
26 return NULL;
28 entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
29 if (!entry)
30 goto err_ct_refcnt;
32 flow = &entry->flow;
34 if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
35 goto err_dst_cache_original;
37 if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
38 goto err_dst_cache_reply;
40 entry->ct = ct;
42 switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) {
43 case NFPROTO_IPV4:
44 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 =
45 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in;
46 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 =
47 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in;
48 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 =
49 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in;
50 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 =
51 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in;
52 break;
53 case NFPROTO_IPV6:
54 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 =
55 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6;
56 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 =
57 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6;
58 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 =
59 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6;
60 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 =
61 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6;
62 break;
65 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto =
66 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
67 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto =
68 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
69 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto =
70 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
71 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto =
72 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
74 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache =
75 route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst;
76 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache =
77 route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst;
79 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port =
80 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port;
81 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port =
82 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
83 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port =
84 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port;
85 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port =
86 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
88 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir =
89 FLOW_OFFLOAD_DIR_ORIGINAL;
90 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir =
91 FLOW_OFFLOAD_DIR_REPLY;
93 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
94 route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
95 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx =
96 route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
97 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
98 route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
99 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx =
100 route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
102 if (ct->status & IPS_SRC_NAT)
103 flow->flags |= FLOW_OFFLOAD_SNAT;
104 else if (ct->status & IPS_DST_NAT)
105 flow->flags |= FLOW_OFFLOAD_DNAT;
107 return flow;
109 err_dst_cache_reply:
110 dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
111 err_dst_cache_original:
112 kfree(entry);
113 err_ct_refcnt:
114 nf_ct_put(ct);
116 return NULL;
118 EXPORT_SYMBOL_GPL(flow_offload_alloc);
120 void flow_offload_free(struct flow_offload *flow)
122 struct flow_offload_entry *e;
124 dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
125 dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
126 e = container_of(flow, struct flow_offload_entry, flow);
127 kfree(e);
129 EXPORT_SYMBOL_GPL(flow_offload_free);
131 void flow_offload_dead(struct flow_offload *flow)
133 flow->flags |= FLOW_OFFLOAD_DYING;
135 EXPORT_SYMBOL_GPL(flow_offload_dead);
137 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
139 flow->timeout = (u32)jiffies;
141 rhashtable_insert_fast(&flow_table->rhashtable,
142 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
143 *flow_table->type->params);
144 rhashtable_insert_fast(&flow_table->rhashtable,
145 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
146 *flow_table->type->params);
147 return 0;
149 EXPORT_SYMBOL_GPL(flow_offload_add);
151 void flow_offload_del(struct nf_flowtable *flow_table,
152 struct flow_offload *flow)
154 struct flow_offload_entry *e;
156 rhashtable_remove_fast(&flow_table->rhashtable,
157 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
158 *flow_table->type->params);
159 rhashtable_remove_fast(&flow_table->rhashtable,
160 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
161 *flow_table->type->params);
163 e = container_of(flow, struct flow_offload_entry, flow);
164 kfree_rcu(e, rcu_head);
166 EXPORT_SYMBOL_GPL(flow_offload_del);
168 struct flow_offload_tuple_rhash *
169 flow_offload_lookup(struct nf_flowtable *flow_table,
170 struct flow_offload_tuple *tuple)
172 return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
173 *flow_table->type->params);
175 EXPORT_SYMBOL_GPL(flow_offload_lookup);
177 static void nf_flow_release_ct(const struct flow_offload *flow)
179 struct flow_offload_entry *e;
181 e = container_of(flow, struct flow_offload_entry, flow);
182 nf_ct_delete(e->ct, 0, 0);
183 nf_ct_put(e->ct);
186 int nf_flow_table_iterate(struct nf_flowtable *flow_table,
187 void (*iter)(struct flow_offload *flow, void *data),
188 void *data)
190 struct flow_offload_tuple_rhash *tuplehash;
191 struct rhashtable_iter hti;
192 struct flow_offload *flow;
193 int err;
195 err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
196 if (err)
197 return err;
199 rhashtable_walk_start(&hti);
201 while ((tuplehash = rhashtable_walk_next(&hti))) {
202 if (IS_ERR(tuplehash)) {
203 err = PTR_ERR(tuplehash);
204 if (err != -EAGAIN)
205 goto out;
207 continue;
209 if (tuplehash->tuple.dir)
210 continue;
212 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
214 iter(flow, data);
216 out:
217 rhashtable_walk_stop(&hti);
218 rhashtable_walk_exit(&hti);
220 return err;
222 EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
224 static inline bool nf_flow_has_expired(const struct flow_offload *flow)
226 return (__s32)(flow->timeout - (u32)jiffies) <= 0;
229 static inline bool nf_flow_is_dying(const struct flow_offload *flow)
231 return flow->flags & FLOW_OFFLOAD_DYING;
234 void nf_flow_offload_work_gc(struct work_struct *work)
236 struct flow_offload_tuple_rhash *tuplehash;
237 struct nf_flowtable *flow_table;
238 struct rhashtable_iter hti;
239 struct flow_offload *flow;
240 int err;
242 flow_table = container_of(work, struct nf_flowtable, gc_work.work);
244 err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
245 if (err)
246 goto schedule;
248 rhashtable_walk_start(&hti);
250 while ((tuplehash = rhashtable_walk_next(&hti))) {
251 if (IS_ERR(tuplehash)) {
252 err = PTR_ERR(tuplehash);
253 if (err != -EAGAIN)
254 goto out;
256 continue;
258 if (tuplehash->tuple.dir)
259 continue;
261 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
263 if (nf_flow_has_expired(flow) ||
264 nf_flow_is_dying(flow)) {
265 flow_offload_del(flow_table, flow);
266 nf_flow_release_ct(flow);
269 out:
270 rhashtable_walk_stop(&hti);
271 rhashtable_walk_exit(&hti);
272 schedule:
273 queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
275 EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
277 static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
279 const struct flow_offload_tuple *tuple = data;
281 return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
284 static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
286 const struct flow_offload_tuple_rhash *tuplehash = data;
288 return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
291 static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
292 const void *ptr)
294 const struct flow_offload_tuple *tuple = arg->key;
295 const struct flow_offload_tuple_rhash *x = ptr;
297 if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
298 return 1;
300 return 0;
303 const struct rhashtable_params nf_flow_offload_rhash_params = {
304 .head_offset = offsetof(struct flow_offload_tuple_rhash, node),
305 .hashfn = flow_offload_hash,
306 .obj_hashfn = flow_offload_hash_obj,
307 .obj_cmpfn = flow_offload_hash_cmp,
308 .automatic_shrinking = true,
310 EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
312 static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
313 __be16 port, __be16 new_port)
315 struct tcphdr *tcph;
317 if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
318 skb_try_make_writable(skb, thoff + sizeof(*tcph)))
319 return -1;
321 tcph = (void *)(skb_network_header(skb) + thoff);
322 inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
324 return 0;
327 static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
328 __be16 port, __be16 new_port)
330 struct udphdr *udph;
332 if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
333 skb_try_make_writable(skb, thoff + sizeof(*udph)))
334 return -1;
336 udph = (void *)(skb_network_header(skb) + thoff);
337 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
338 inet_proto_csum_replace2(&udph->check, skb, port,
339 new_port, true);
340 if (!udph->check)
341 udph->check = CSUM_MANGLED_0;
344 return 0;
347 static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
348 u8 protocol, __be16 port, __be16 new_port)
350 switch (protocol) {
351 case IPPROTO_TCP:
352 if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
353 return NF_DROP;
354 break;
355 case IPPROTO_UDP:
356 if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
357 return NF_DROP;
358 break;
361 return 0;
364 int nf_flow_snat_port(const struct flow_offload *flow,
365 struct sk_buff *skb, unsigned int thoff,
366 u8 protocol, enum flow_offload_tuple_dir dir)
368 struct flow_ports *hdr;
369 __be16 port, new_port;
371 if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
372 skb_try_make_writable(skb, thoff + sizeof(*hdr)))
373 return -1;
375 hdr = (void *)(skb_network_header(skb) + thoff);
377 switch (dir) {
378 case FLOW_OFFLOAD_DIR_ORIGINAL:
379 port = hdr->source;
380 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
381 hdr->source = new_port;
382 break;
383 case FLOW_OFFLOAD_DIR_REPLY:
384 port = hdr->dest;
385 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
386 hdr->dest = new_port;
387 break;
388 default:
389 return -1;
392 return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
394 EXPORT_SYMBOL_GPL(nf_flow_snat_port);
396 int nf_flow_dnat_port(const struct flow_offload *flow,
397 struct sk_buff *skb, unsigned int thoff,
398 u8 protocol, enum flow_offload_tuple_dir dir)
400 struct flow_ports *hdr;
401 __be16 port, new_port;
403 if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
404 skb_try_make_writable(skb, thoff + sizeof(*hdr)))
405 return -1;
407 hdr = (void *)(skb_network_header(skb) + thoff);
409 switch (dir) {
410 case FLOW_OFFLOAD_DIR_ORIGINAL:
411 port = hdr->dest;
412 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
413 hdr->dest = new_port;
414 break;
415 case FLOW_OFFLOAD_DIR_REPLY:
416 port = hdr->source;
417 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
418 hdr->source = new_port;
419 break;
420 default:
421 return -1;
424 return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
426 EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
428 MODULE_LICENSE("GPL");
429 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");