ACPI: thinkpad-acpi: add development version tag
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / xfrm / xfrm_policy.c
blob9fc4c315f6cd9702a223b5758d3f61e26c89808b
1 /*
2 * xfrm_policy.c
4 * Changes:
5 * Mitsuru KANDA @USAGI
6 * Kazunori MIYAZAWA @USAGI
7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8 * IPv6 support
9 * Kazunori MIYAZAWA @USAGI
10 * YOSHIFUJI Hideaki
11 * Split up af-specific portion
12 * Derek Atkins <derek@ihtfp.com> Add the post_input processor
16 #include <linux/err.h>
17 #include <linux/slab.h>
18 #include <linux/kmod.h>
19 #include <linux/list.h>
20 #include <linux/spinlock.h>
21 #include <linux/workqueue.h>
22 #include <linux/notifier.h>
23 #include <linux/netdevice.h>
24 #include <linux/netfilter.h>
25 #include <linux/module.h>
26 #include <linux/cache.h>
27 #include <linux/audit.h>
28 #include <net/dst.h>
29 #include <net/xfrm.h>
30 #include <net/ip.h>
31 #ifdef CONFIG_XFRM_STATISTICS
32 #include <net/snmp.h>
33 #endif
35 #include "xfrm_hash.h"
37 int sysctl_xfrm_larval_drop __read_mostly;
39 #ifdef CONFIG_XFRM_STATISTICS
40 DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly;
41 EXPORT_SYMBOL(xfrm_statistics);
42 #endif
44 DEFINE_MUTEX(xfrm_cfg_mutex);
45 EXPORT_SYMBOL(xfrm_cfg_mutex);
47 static DEFINE_RWLOCK(xfrm_policy_lock);
49 unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
50 EXPORT_SYMBOL(xfrm_policy_count);
52 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
53 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
55 static struct kmem_cache *xfrm_dst_cache __read_mostly;
57 static struct work_struct xfrm_policy_gc_work;
58 static HLIST_HEAD(xfrm_policy_gc_list);
59 static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
61 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
62 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
63 static void xfrm_init_pmtu(struct dst_entry *dst);
65 static inline int
66 __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
68 return addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) &&
69 addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) &&
70 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
71 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
72 (fl->proto == sel->proto || !sel->proto) &&
73 (fl->oif == sel->ifindex || !sel->ifindex);
76 static inline int
77 __xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl)
79 return addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) &&
80 addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) &&
81 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
82 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
83 (fl->proto == sel->proto || !sel->proto) &&
84 (fl->oif == sel->ifindex || !sel->ifindex);
87 int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
88 unsigned short family)
90 switch (family) {
91 case AF_INET:
92 return __xfrm4_selector_match(sel, fl);
93 case AF_INET6:
94 return __xfrm6_selector_match(sel, fl);
96 return 0;
99 static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
100 int family)
102 xfrm_address_t *saddr = &x->props.saddr;
103 xfrm_address_t *daddr = &x->id.daddr;
104 struct xfrm_policy_afinfo *afinfo;
105 struct dst_entry *dst;
107 if (x->type->flags & XFRM_TYPE_LOCAL_COADDR)
108 saddr = x->coaddr;
109 if (x->type->flags & XFRM_TYPE_REMOTE_COADDR)
110 daddr = x->coaddr;
112 afinfo = xfrm_policy_get_afinfo(family);
113 if (unlikely(afinfo == NULL))
114 return ERR_PTR(-EAFNOSUPPORT);
116 dst = afinfo->dst_lookup(tos, saddr, daddr);
117 xfrm_policy_put_afinfo(afinfo);
118 return dst;
121 static inline unsigned long make_jiffies(long secs)
123 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
124 return MAX_SCHEDULE_TIMEOUT-1;
125 else
126 return secs*HZ;
129 static void xfrm_policy_timer(unsigned long data)
131 struct xfrm_policy *xp = (struct xfrm_policy*)data;
132 unsigned long now = get_seconds();
133 long next = LONG_MAX;
134 int warn = 0;
135 int dir;
137 read_lock(&xp->lock);
139 if (xp->dead)
140 goto out;
142 dir = xfrm_policy_id2dir(xp->index);
144 if (xp->lft.hard_add_expires_seconds) {
145 long tmo = xp->lft.hard_add_expires_seconds +
146 xp->curlft.add_time - now;
147 if (tmo <= 0)
148 goto expired;
149 if (tmo < next)
150 next = tmo;
152 if (xp->lft.hard_use_expires_seconds) {
153 long tmo = xp->lft.hard_use_expires_seconds +
154 (xp->curlft.use_time ? : xp->curlft.add_time) - now;
155 if (tmo <= 0)
156 goto expired;
157 if (tmo < next)
158 next = tmo;
160 if (xp->lft.soft_add_expires_seconds) {
161 long tmo = xp->lft.soft_add_expires_seconds +
162 xp->curlft.add_time - now;
163 if (tmo <= 0) {
164 warn = 1;
165 tmo = XFRM_KM_TIMEOUT;
167 if (tmo < next)
168 next = tmo;
170 if (xp->lft.soft_use_expires_seconds) {
171 long tmo = xp->lft.soft_use_expires_seconds +
172 (xp->curlft.use_time ? : xp->curlft.add_time) - now;
173 if (tmo <= 0) {
174 warn = 1;
175 tmo = XFRM_KM_TIMEOUT;
177 if (tmo < next)
178 next = tmo;
181 if (warn)
182 km_policy_expired(xp, dir, 0, 0);
183 if (next != LONG_MAX &&
184 !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
185 xfrm_pol_hold(xp);
187 out:
188 read_unlock(&xp->lock);
189 xfrm_pol_put(xp);
190 return;
192 expired:
193 read_unlock(&xp->lock);
194 if (!xfrm_policy_delete(xp, dir))
195 km_policy_expired(xp, dir, 1, 0);
196 xfrm_pol_put(xp);
200 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
201 * SPD calls.
204 struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
206 struct xfrm_policy *policy;
208 policy = kzalloc(sizeof(struct xfrm_policy), gfp);
210 if (policy) {
211 INIT_HLIST_NODE(&policy->bydst);
212 INIT_HLIST_NODE(&policy->byidx);
213 rwlock_init(&policy->lock);
214 atomic_set(&policy->refcnt, 1);
215 setup_timer(&policy->timer, xfrm_policy_timer,
216 (unsigned long)policy);
218 return policy;
220 EXPORT_SYMBOL(xfrm_policy_alloc);
222 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
224 void xfrm_policy_destroy(struct xfrm_policy *policy)
226 BUG_ON(!policy->dead);
228 BUG_ON(policy->bundles);
230 if (del_timer(&policy->timer))
231 BUG();
233 security_xfrm_policy_free(policy);
234 kfree(policy);
236 EXPORT_SYMBOL(xfrm_policy_destroy);
238 static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
240 struct dst_entry *dst;
242 while ((dst = policy->bundles) != NULL) {
243 policy->bundles = dst->next;
244 dst_free(dst);
247 if (del_timer(&policy->timer))
248 atomic_dec(&policy->refcnt);
250 if (atomic_read(&policy->refcnt) > 1)
251 flow_cache_flush();
253 xfrm_pol_put(policy);
256 static void xfrm_policy_gc_task(struct work_struct *work)
258 struct xfrm_policy *policy;
259 struct hlist_node *entry, *tmp;
260 struct hlist_head gc_list;
262 spin_lock_bh(&xfrm_policy_gc_lock);
263 gc_list.first = xfrm_policy_gc_list.first;
264 INIT_HLIST_HEAD(&xfrm_policy_gc_list);
265 spin_unlock_bh(&xfrm_policy_gc_lock);
267 hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
268 xfrm_policy_gc_kill(policy);
271 /* Rule must be locked. Release descentant resources, announce
272 * entry dead. The rule must be unlinked from lists to the moment.
275 static void xfrm_policy_kill(struct xfrm_policy *policy)
277 int dead;
279 write_lock_bh(&policy->lock);
280 dead = policy->dead;
281 policy->dead = 1;
282 write_unlock_bh(&policy->lock);
284 if (unlikely(dead)) {
285 WARN_ON(1);
286 return;
289 spin_lock(&xfrm_policy_gc_lock);
290 hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
291 spin_unlock(&xfrm_policy_gc_lock);
293 schedule_work(&xfrm_policy_gc_work);
296 struct xfrm_policy_hash {
297 struct hlist_head *table;
298 unsigned int hmask;
301 static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2];
302 static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly;
303 static struct hlist_head *xfrm_policy_byidx __read_mostly;
304 static unsigned int xfrm_idx_hmask __read_mostly;
305 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
307 static inline unsigned int idx_hash(u32 index)
309 return __idx_hash(index, xfrm_idx_hmask);
312 static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir)
314 unsigned int hmask = xfrm_policy_bydst[dir].hmask;
315 unsigned int hash = __sel_hash(sel, family, hmask);
317 return (hash == hmask + 1 ?
318 &xfrm_policy_inexact[dir] :
319 xfrm_policy_bydst[dir].table + hash);
322 static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir)
324 unsigned int hmask = xfrm_policy_bydst[dir].hmask;
325 unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
327 return xfrm_policy_bydst[dir].table + hash;
330 static void xfrm_dst_hash_transfer(struct hlist_head *list,
331 struct hlist_head *ndsttable,
332 unsigned int nhashmask)
334 struct hlist_node *entry, *tmp, *entry0 = NULL;
335 struct xfrm_policy *pol;
336 unsigned int h0 = 0;
338 redo:
339 hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
340 unsigned int h;
342 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
343 pol->family, nhashmask);
344 if (!entry0) {
345 hlist_del(entry);
346 hlist_add_head(&pol->bydst, ndsttable+h);
347 h0 = h;
348 } else {
349 if (h != h0)
350 continue;
351 hlist_del(entry);
352 hlist_add_after(entry0, &pol->bydst);
354 entry0 = entry;
356 if (!hlist_empty(list)) {
357 entry0 = NULL;
358 goto redo;
362 static void xfrm_idx_hash_transfer(struct hlist_head *list,
363 struct hlist_head *nidxtable,
364 unsigned int nhashmask)
366 struct hlist_node *entry, *tmp;
367 struct xfrm_policy *pol;
369 hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
370 unsigned int h;
372 h = __idx_hash(pol->index, nhashmask);
373 hlist_add_head(&pol->byidx, nidxtable+h);
377 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
379 return ((old_hmask + 1) << 1) - 1;
382 static void xfrm_bydst_resize(int dir)
384 unsigned int hmask = xfrm_policy_bydst[dir].hmask;
385 unsigned int nhashmask = xfrm_new_hash_mask(hmask);
386 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
387 struct hlist_head *odst = xfrm_policy_bydst[dir].table;
388 struct hlist_head *ndst = xfrm_hash_alloc(nsize);
389 int i;
391 if (!ndst)
392 return;
394 write_lock_bh(&xfrm_policy_lock);
396 for (i = hmask; i >= 0; i--)
397 xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
399 xfrm_policy_bydst[dir].table = ndst;
400 xfrm_policy_bydst[dir].hmask = nhashmask;
402 write_unlock_bh(&xfrm_policy_lock);
404 xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
407 static void xfrm_byidx_resize(int total)
409 unsigned int hmask = xfrm_idx_hmask;
410 unsigned int nhashmask = xfrm_new_hash_mask(hmask);
411 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
412 struct hlist_head *oidx = xfrm_policy_byidx;
413 struct hlist_head *nidx = xfrm_hash_alloc(nsize);
414 int i;
416 if (!nidx)
417 return;
419 write_lock_bh(&xfrm_policy_lock);
421 for (i = hmask; i >= 0; i--)
422 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
424 xfrm_policy_byidx = nidx;
425 xfrm_idx_hmask = nhashmask;
427 write_unlock_bh(&xfrm_policy_lock);
429 xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
432 static inline int xfrm_bydst_should_resize(int dir, int *total)
434 unsigned int cnt = xfrm_policy_count[dir];
435 unsigned int hmask = xfrm_policy_bydst[dir].hmask;
437 if (total)
438 *total += cnt;
440 if ((hmask + 1) < xfrm_policy_hashmax &&
441 cnt > hmask)
442 return 1;
444 return 0;
447 static inline int xfrm_byidx_should_resize(int total)
449 unsigned int hmask = xfrm_idx_hmask;
451 if ((hmask + 1) < xfrm_policy_hashmax &&
452 total > hmask)
453 return 1;
455 return 0;
458 void xfrm_spd_getinfo(struct xfrmk_spdinfo *si)
460 read_lock_bh(&xfrm_policy_lock);
461 si->incnt = xfrm_policy_count[XFRM_POLICY_IN];
462 si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT];
463 si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD];
464 si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
465 si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
466 si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
467 si->spdhcnt = xfrm_idx_hmask;
468 si->spdhmcnt = xfrm_policy_hashmax;
469 read_unlock_bh(&xfrm_policy_lock);
471 EXPORT_SYMBOL(xfrm_spd_getinfo);
473 static DEFINE_MUTEX(hash_resize_mutex);
474 static void xfrm_hash_resize(struct work_struct *__unused)
476 int dir, total;
478 mutex_lock(&hash_resize_mutex);
480 total = 0;
481 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
482 if (xfrm_bydst_should_resize(dir, &total))
483 xfrm_bydst_resize(dir);
485 if (xfrm_byidx_should_resize(total))
486 xfrm_byidx_resize(total);
488 mutex_unlock(&hash_resize_mutex);
491 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
493 /* Generate new index... KAME seems to generate them ordered by cost
494 * of an absolute inpredictability of ordering of rules. This will not pass. */
495 static u32 xfrm_gen_index(u8 type, int dir)
497 static u32 idx_generator;
499 for (;;) {
500 struct hlist_node *entry;
501 struct hlist_head *list;
502 struct xfrm_policy *p;
503 u32 idx;
504 int found;
506 idx = (idx_generator | dir);
507 idx_generator += 8;
508 if (idx == 0)
509 idx = 8;
510 list = xfrm_policy_byidx + idx_hash(idx);
511 found = 0;
512 hlist_for_each_entry(p, entry, list, byidx) {
513 if (p->index == idx) {
514 found = 1;
515 break;
518 if (!found)
519 return idx;
523 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
525 u32 *p1 = (u32 *) s1;
526 u32 *p2 = (u32 *) s2;
527 int len = sizeof(struct xfrm_selector) / sizeof(u32);
528 int i;
530 for (i = 0; i < len; i++) {
531 if (p1[i] != p2[i])
532 return 1;
535 return 0;
538 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
540 struct xfrm_policy *pol;
541 struct xfrm_policy *delpol;
542 struct hlist_head *chain;
543 struct hlist_node *entry, *newpos;
544 struct dst_entry *gc_list;
546 write_lock_bh(&xfrm_policy_lock);
547 chain = policy_hash_bysel(&policy->selector, policy->family, dir);
548 delpol = NULL;
549 newpos = NULL;
550 hlist_for_each_entry(pol, entry, chain, bydst) {
551 if (pol->type == policy->type &&
552 !selector_cmp(&pol->selector, &policy->selector) &&
553 xfrm_sec_ctx_match(pol->security, policy->security) &&
554 !WARN_ON(delpol)) {
555 if (excl) {
556 write_unlock_bh(&xfrm_policy_lock);
557 return -EEXIST;
559 delpol = pol;
560 if (policy->priority > pol->priority)
561 continue;
562 } else if (policy->priority >= pol->priority) {
563 newpos = &pol->bydst;
564 continue;
566 if (delpol)
567 break;
569 if (newpos)
570 hlist_add_after(newpos, &policy->bydst);
571 else
572 hlist_add_head(&policy->bydst, chain);
573 xfrm_pol_hold(policy);
574 xfrm_policy_count[dir]++;
575 atomic_inc(&flow_cache_genid);
576 if (delpol) {
577 hlist_del(&delpol->bydst);
578 hlist_del(&delpol->byidx);
579 xfrm_policy_count[dir]--;
581 policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
582 hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
583 policy->curlft.add_time = get_seconds();
584 policy->curlft.use_time = 0;
585 if (!mod_timer(&policy->timer, jiffies + HZ))
586 xfrm_pol_hold(policy);
587 write_unlock_bh(&xfrm_policy_lock);
589 if (delpol)
590 xfrm_policy_kill(delpol);
591 else if (xfrm_bydst_should_resize(dir, NULL))
592 schedule_work(&xfrm_hash_work);
594 read_lock_bh(&xfrm_policy_lock);
595 gc_list = NULL;
596 entry = &policy->bydst;
597 hlist_for_each_entry_continue(policy, entry, bydst) {
598 struct dst_entry *dst;
600 write_lock(&policy->lock);
601 dst = policy->bundles;
602 if (dst) {
603 struct dst_entry *tail = dst;
604 while (tail->next)
605 tail = tail->next;
606 tail->next = gc_list;
607 gc_list = dst;
609 policy->bundles = NULL;
611 write_unlock(&policy->lock);
613 read_unlock_bh(&xfrm_policy_lock);
615 while (gc_list) {
616 struct dst_entry *dst = gc_list;
618 gc_list = dst->next;
619 dst_free(dst);
622 return 0;
624 EXPORT_SYMBOL(xfrm_policy_insert);
626 struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
627 struct xfrm_selector *sel,
628 struct xfrm_sec_ctx *ctx, int delete,
629 int *err)
631 struct xfrm_policy *pol, *ret;
632 struct hlist_head *chain;
633 struct hlist_node *entry;
635 *err = 0;
636 write_lock_bh(&xfrm_policy_lock);
637 chain = policy_hash_bysel(sel, sel->family, dir);
638 ret = NULL;
639 hlist_for_each_entry(pol, entry, chain, bydst) {
640 if (pol->type == type &&
641 !selector_cmp(sel, &pol->selector) &&
642 xfrm_sec_ctx_match(ctx, pol->security)) {
643 xfrm_pol_hold(pol);
644 if (delete) {
645 *err = security_xfrm_policy_delete(pol);
646 if (*err) {
647 write_unlock_bh(&xfrm_policy_lock);
648 return pol;
650 hlist_del(&pol->bydst);
651 hlist_del(&pol->byidx);
652 xfrm_policy_count[dir]--;
654 ret = pol;
655 break;
658 write_unlock_bh(&xfrm_policy_lock);
660 if (ret && delete) {
661 atomic_inc(&flow_cache_genid);
662 xfrm_policy_kill(ret);
664 return ret;
666 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
668 struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete,
669 int *err)
671 struct xfrm_policy *pol, *ret;
672 struct hlist_head *chain;
673 struct hlist_node *entry;
675 *err = -ENOENT;
676 if (xfrm_policy_id2dir(id) != dir)
677 return NULL;
679 *err = 0;
680 write_lock_bh(&xfrm_policy_lock);
681 chain = xfrm_policy_byidx + idx_hash(id);
682 ret = NULL;
683 hlist_for_each_entry(pol, entry, chain, byidx) {
684 if (pol->type == type && pol->index == id) {
685 xfrm_pol_hold(pol);
686 if (delete) {
687 *err = security_xfrm_policy_delete(pol);
688 if (*err) {
689 write_unlock_bh(&xfrm_policy_lock);
690 return pol;
692 hlist_del(&pol->bydst);
693 hlist_del(&pol->byidx);
694 xfrm_policy_count[dir]--;
696 ret = pol;
697 break;
700 write_unlock_bh(&xfrm_policy_lock);
702 if (ret && delete) {
703 atomic_inc(&flow_cache_genid);
704 xfrm_policy_kill(ret);
706 return ret;
708 EXPORT_SYMBOL(xfrm_policy_byid);
710 #ifdef CONFIG_SECURITY_NETWORK_XFRM
711 static inline int
712 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
714 int dir, err = 0;
716 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
717 struct xfrm_policy *pol;
718 struct hlist_node *entry;
719 int i;
721 hlist_for_each_entry(pol, entry,
722 &xfrm_policy_inexact[dir], bydst) {
723 if (pol->type != type)
724 continue;
725 err = security_xfrm_policy_delete(pol);
726 if (err) {
727 xfrm_audit_policy_delete(pol, 0,
728 audit_info->loginuid,
729 audit_info->secid);
730 return err;
733 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
734 hlist_for_each_entry(pol, entry,
735 xfrm_policy_bydst[dir].table + i,
736 bydst) {
737 if (pol->type != type)
738 continue;
739 err = security_xfrm_policy_delete(pol);
740 if (err) {
741 xfrm_audit_policy_delete(pol, 0,
742 audit_info->loginuid,
743 audit_info->secid);
744 return err;
749 return err;
751 #else
752 static inline int
753 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
755 return 0;
757 #endif
759 int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
761 int dir, err = 0;
763 write_lock_bh(&xfrm_policy_lock);
765 err = xfrm_policy_flush_secctx_check(type, audit_info);
766 if (err)
767 goto out;
769 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
770 struct xfrm_policy *pol;
771 struct hlist_node *entry;
772 int i, killed;
774 killed = 0;
775 again1:
776 hlist_for_each_entry(pol, entry,
777 &xfrm_policy_inexact[dir], bydst) {
778 if (pol->type != type)
779 continue;
780 hlist_del(&pol->bydst);
781 hlist_del(&pol->byidx);
782 write_unlock_bh(&xfrm_policy_lock);
784 xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
785 audit_info->secid);
787 xfrm_policy_kill(pol);
788 killed++;
790 write_lock_bh(&xfrm_policy_lock);
791 goto again1;
794 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
795 again2:
796 hlist_for_each_entry(pol, entry,
797 xfrm_policy_bydst[dir].table + i,
798 bydst) {
799 if (pol->type != type)
800 continue;
801 hlist_del(&pol->bydst);
802 hlist_del(&pol->byidx);
803 write_unlock_bh(&xfrm_policy_lock);
805 xfrm_audit_policy_delete(pol, 1,
806 audit_info->loginuid,
807 audit_info->secid);
808 xfrm_policy_kill(pol);
809 killed++;
811 write_lock_bh(&xfrm_policy_lock);
812 goto again2;
816 xfrm_policy_count[dir] -= killed;
818 atomic_inc(&flow_cache_genid);
819 out:
820 write_unlock_bh(&xfrm_policy_lock);
821 return err;
823 EXPORT_SYMBOL(xfrm_policy_flush);
825 int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*),
826 void *data)
828 struct xfrm_policy *pol, *last = NULL;
829 struct hlist_node *entry;
830 int dir, last_dir = 0, count, error;
832 read_lock_bh(&xfrm_policy_lock);
833 count = 0;
835 for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
836 struct hlist_head *table = xfrm_policy_bydst[dir].table;
837 int i;
839 hlist_for_each_entry(pol, entry,
840 &xfrm_policy_inexact[dir], bydst) {
841 if (pol->type != type)
842 continue;
843 if (last) {
844 error = func(last, last_dir % XFRM_POLICY_MAX,
845 count, data);
846 if (error)
847 goto out;
849 last = pol;
850 last_dir = dir;
851 count++;
853 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
854 hlist_for_each_entry(pol, entry, table + i, bydst) {
855 if (pol->type != type)
856 continue;
857 if (last) {
858 error = func(last, last_dir % XFRM_POLICY_MAX,
859 count, data);
860 if (error)
861 goto out;
863 last = pol;
864 last_dir = dir;
865 count++;
869 if (count == 0) {
870 error = -ENOENT;
871 goto out;
873 error = func(last, last_dir % XFRM_POLICY_MAX, 0, data);
874 out:
875 read_unlock_bh(&xfrm_policy_lock);
876 return error;
878 EXPORT_SYMBOL(xfrm_policy_walk);
881 * Find policy to apply to this flow.
883 * Returns 0 if policy found, else an -errno.
885 static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
886 u8 type, u16 family, int dir)
888 struct xfrm_selector *sel = &pol->selector;
889 int match, ret = -ESRCH;
891 if (pol->family != family ||
892 pol->type != type)
893 return ret;
895 match = xfrm_selector_match(sel, fl, family);
896 if (match)
897 ret = security_xfrm_policy_lookup(pol, fl->secid, dir);
899 return ret;
902 static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
903 u16 family, u8 dir)
905 int err;
906 struct xfrm_policy *pol, *ret;
907 xfrm_address_t *daddr, *saddr;
908 struct hlist_node *entry;
909 struct hlist_head *chain;
910 u32 priority = ~0U;
912 daddr = xfrm_flowi_daddr(fl, family);
913 saddr = xfrm_flowi_saddr(fl, family);
914 if (unlikely(!daddr || !saddr))
915 return NULL;
917 read_lock_bh(&xfrm_policy_lock);
918 chain = policy_hash_direct(daddr, saddr, family, dir);
919 ret = NULL;
920 hlist_for_each_entry(pol, entry, chain, bydst) {
921 err = xfrm_policy_match(pol, fl, type, family, dir);
922 if (err) {
923 if (err == -ESRCH)
924 continue;
925 else {
926 ret = ERR_PTR(err);
927 goto fail;
929 } else {
930 ret = pol;
931 priority = ret->priority;
932 break;
935 chain = &xfrm_policy_inexact[dir];
936 hlist_for_each_entry(pol, entry, chain, bydst) {
937 err = xfrm_policy_match(pol, fl, type, family, dir);
938 if (err) {
939 if (err == -ESRCH)
940 continue;
941 else {
942 ret = ERR_PTR(err);
943 goto fail;
945 } else if (pol->priority < priority) {
946 ret = pol;
947 break;
950 if (ret)
951 xfrm_pol_hold(ret);
952 fail:
953 read_unlock_bh(&xfrm_policy_lock);
955 return ret;
958 static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
959 void **objp, atomic_t **obj_refp)
961 struct xfrm_policy *pol;
962 int err = 0;
964 #ifdef CONFIG_XFRM_SUB_POLICY
965 pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir);
966 if (IS_ERR(pol)) {
967 err = PTR_ERR(pol);
968 pol = NULL;
970 if (pol || err)
971 goto end;
972 #endif
973 pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir);
974 if (IS_ERR(pol)) {
975 err = PTR_ERR(pol);
976 pol = NULL;
978 #ifdef CONFIG_XFRM_SUB_POLICY
979 end:
980 #endif
981 if ((*objp = (void *) pol) != NULL)
982 *obj_refp = &pol->refcnt;
983 return err;
986 static inline int policy_to_flow_dir(int dir)
988 if (XFRM_POLICY_IN == FLOW_DIR_IN &&
989 XFRM_POLICY_OUT == FLOW_DIR_OUT &&
990 XFRM_POLICY_FWD == FLOW_DIR_FWD)
991 return dir;
992 switch (dir) {
993 default:
994 case XFRM_POLICY_IN:
995 return FLOW_DIR_IN;
996 case XFRM_POLICY_OUT:
997 return FLOW_DIR_OUT;
998 case XFRM_POLICY_FWD:
999 return FLOW_DIR_FWD;
1003 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
1005 struct xfrm_policy *pol;
1007 read_lock_bh(&xfrm_policy_lock);
1008 if ((pol = sk->sk_policy[dir]) != NULL) {
1009 int match = xfrm_selector_match(&pol->selector, fl,
1010 sk->sk_family);
1011 int err = 0;
1013 if (match) {
1014 err = security_xfrm_policy_lookup(pol, fl->secid,
1015 policy_to_flow_dir(dir));
1016 if (!err)
1017 xfrm_pol_hold(pol);
1018 else if (err == -ESRCH)
1019 pol = NULL;
1020 else
1021 pol = ERR_PTR(err);
1022 } else
1023 pol = NULL;
1025 read_unlock_bh(&xfrm_policy_lock);
1026 return pol;
1029 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
1031 struct hlist_head *chain = policy_hash_bysel(&pol->selector,
1032 pol->family, dir);
1034 hlist_add_head(&pol->bydst, chain);
1035 hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index));
1036 xfrm_policy_count[dir]++;
1037 xfrm_pol_hold(pol);
1039 if (xfrm_bydst_should_resize(dir, NULL))
1040 schedule_work(&xfrm_hash_work);
1043 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1044 int dir)
1046 if (hlist_unhashed(&pol->bydst))
1047 return NULL;
1049 hlist_del(&pol->bydst);
1050 hlist_del(&pol->byidx);
1051 xfrm_policy_count[dir]--;
1053 return pol;
1056 int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1058 write_lock_bh(&xfrm_policy_lock);
1059 pol = __xfrm_policy_unlink(pol, dir);
1060 write_unlock_bh(&xfrm_policy_lock);
1061 if (pol) {
1062 if (dir < XFRM_POLICY_MAX)
1063 atomic_inc(&flow_cache_genid);
1064 xfrm_policy_kill(pol);
1065 return 0;
1067 return -ENOENT;
1069 EXPORT_SYMBOL(xfrm_policy_delete);
1071 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1073 struct xfrm_policy *old_pol;
1075 #ifdef CONFIG_XFRM_SUB_POLICY
1076 if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1077 return -EINVAL;
1078 #endif
1080 write_lock_bh(&xfrm_policy_lock);
1081 old_pol = sk->sk_policy[dir];
1082 sk->sk_policy[dir] = pol;
1083 if (pol) {
1084 pol->curlft.add_time = get_seconds();
1085 pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir);
1086 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
1088 if (old_pol)
1089 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
1090 write_unlock_bh(&xfrm_policy_lock);
1092 if (old_pol) {
1093 xfrm_policy_kill(old_pol);
1095 return 0;
1098 static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
1100 struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
1102 if (newp) {
1103 newp->selector = old->selector;
1104 if (security_xfrm_policy_clone(old, newp)) {
1105 kfree(newp);
1106 return NULL; /* ENOMEM */
1108 newp->lft = old->lft;
1109 newp->curlft = old->curlft;
1110 newp->action = old->action;
1111 newp->flags = old->flags;
1112 newp->xfrm_nr = old->xfrm_nr;
1113 newp->index = old->index;
1114 newp->type = old->type;
1115 memcpy(newp->xfrm_vec, old->xfrm_vec,
1116 newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1117 write_lock_bh(&xfrm_policy_lock);
1118 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
1119 write_unlock_bh(&xfrm_policy_lock);
1120 xfrm_pol_put(newp);
1122 return newp;
1125 int __xfrm_sk_clone_policy(struct sock *sk)
1127 struct xfrm_policy *p0 = sk->sk_policy[0],
1128 *p1 = sk->sk_policy[1];
1130 sk->sk_policy[0] = sk->sk_policy[1] = NULL;
1131 if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
1132 return -ENOMEM;
1133 if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
1134 return -ENOMEM;
1135 return 0;
1138 static int
1139 xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote,
1140 unsigned short family)
1142 int err;
1143 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1145 if (unlikely(afinfo == NULL))
1146 return -EINVAL;
1147 err = afinfo->get_saddr(local, remote);
1148 xfrm_policy_put_afinfo(afinfo);
1149 return err;
1152 /* Resolve list of templates for the flow, given policy. */
1154 static int
1155 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
1156 struct xfrm_state **xfrm,
1157 unsigned short family)
1159 int nx;
1160 int i, error;
1161 xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1162 xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1163 xfrm_address_t tmp;
1165 for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
1166 struct xfrm_state *x;
1167 xfrm_address_t *remote = daddr;
1168 xfrm_address_t *local = saddr;
1169 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1171 if (tmpl->mode == XFRM_MODE_TUNNEL ||
1172 tmpl->mode == XFRM_MODE_BEET) {
1173 remote = &tmpl->id.daddr;
1174 local = &tmpl->saddr;
1175 family = tmpl->encap_family;
1176 if (xfrm_addr_any(local, family)) {
1177 error = xfrm_get_saddr(&tmp, remote, family);
1178 if (error)
1179 goto fail;
1180 local = &tmp;
1184 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1186 if (x && x->km.state == XFRM_STATE_VALID) {
1187 xfrm[nx++] = x;
1188 daddr = remote;
1189 saddr = local;
1190 continue;
1192 if (x) {
1193 error = (x->km.state == XFRM_STATE_ERROR ?
1194 -EINVAL : -EAGAIN);
1195 xfrm_state_put(x);
1198 if (!tmpl->optional)
1199 goto fail;
1201 return nx;
1203 fail:
1204 for (nx--; nx>=0; nx--)
1205 xfrm_state_put(xfrm[nx]);
1206 return error;
1209 static int
1210 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
1211 struct xfrm_state **xfrm,
1212 unsigned short family)
1214 struct xfrm_state *tp[XFRM_MAX_DEPTH];
1215 struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1216 int cnx = 0;
1217 int error;
1218 int ret;
1219 int i;
1221 for (i = 0; i < npols; i++) {
1222 if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1223 error = -ENOBUFS;
1224 goto fail;
1227 ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1228 if (ret < 0) {
1229 error = ret;
1230 goto fail;
1231 } else
1232 cnx += ret;
1235 /* found states are sorted for outbound processing */
1236 if (npols > 1)
1237 xfrm_state_sort(xfrm, tpp, cnx, family);
1239 return cnx;
1241 fail:
1242 for (cnx--; cnx>=0; cnx--)
1243 xfrm_state_put(tpp[cnx]);
1244 return error;
1248 /* Check that the bundle accepts the flow and its components are
1249 * still valid.
1252 static struct dst_entry *
1253 xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
1255 struct dst_entry *x;
1256 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1257 if (unlikely(afinfo == NULL))
1258 return ERR_PTR(-EINVAL);
1259 x = afinfo->find_bundle(fl, policy);
1260 xfrm_policy_put_afinfo(afinfo);
1261 return x;
1264 static inline int xfrm_get_tos(struct flowi *fl, int family)
1266 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1267 int tos;
1269 if (!afinfo)
1270 return -EINVAL;
1272 tos = afinfo->get_tos(fl);
1274 xfrm_policy_put_afinfo(afinfo);
1276 return tos;
1279 static inline struct xfrm_dst *xfrm_alloc_dst(int family)
1281 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1282 struct xfrm_dst *xdst;
1284 if (!afinfo)
1285 return ERR_PTR(-EINVAL);
1287 xdst = dst_alloc(afinfo->dst_ops) ?: ERR_PTR(-ENOBUFS);
1289 xfrm_policy_put_afinfo(afinfo);
1291 return xdst;
1294 static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1295 int nfheader_len)
1297 struct xfrm_policy_afinfo *afinfo =
1298 xfrm_policy_get_afinfo(dst->ops->family);
1299 int err;
1301 if (!afinfo)
1302 return -EINVAL;
1304 err = afinfo->init_path(path, dst, nfheader_len);
1306 xfrm_policy_put_afinfo(afinfo);
1308 return err;
1311 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
1313 struct xfrm_policy_afinfo *afinfo =
1314 xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
1315 int err;
1317 if (!afinfo)
1318 return -EINVAL;
1320 err = afinfo->fill_dst(xdst, dev);
1322 xfrm_policy_put_afinfo(afinfo);
1324 return err;
1327 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
1328 * all the metrics... Shortly, bundle a bundle.
1331 static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1332 struct xfrm_state **xfrm, int nx,
1333 struct flowi *fl,
1334 struct dst_entry *dst)
1336 unsigned long now = jiffies;
1337 struct net_device *dev;
1338 struct dst_entry *dst_prev = NULL;
1339 struct dst_entry *dst0 = NULL;
1340 int i = 0;
1341 int err;
1342 int header_len = 0;
1343 int nfheader_len = 0;
1344 int trailer_len = 0;
1345 int tos;
1346 int family = policy->selector.family;
1348 tos = xfrm_get_tos(fl, family);
1349 err = tos;
1350 if (tos < 0)
1351 goto put_states;
1353 dst_hold(dst);
1355 for (; i < nx; i++) {
1356 struct xfrm_dst *xdst = xfrm_alloc_dst(family);
1357 struct dst_entry *dst1 = &xdst->u.dst;
1359 err = PTR_ERR(xdst);
1360 if (IS_ERR(xdst)) {
1361 dst_release(dst);
1362 goto put_states;
1365 if (!dst_prev)
1366 dst0 = dst1;
1367 else {
1368 dst_prev->child = dst_clone(dst1);
1369 dst1->flags |= DST_NOHASH;
1372 xdst->route = dst;
1373 memcpy(&dst1->metrics, &dst->metrics, sizeof(dst->metrics));
1375 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
1376 family = xfrm[i]->props.family;
1377 dst = xfrm_dst_lookup(xfrm[i], tos, family);
1378 err = PTR_ERR(dst);
1379 if (IS_ERR(dst))
1380 goto put_states;
1381 } else
1382 dst_hold(dst);
1384 dst1->xfrm = xfrm[i];
1385 xdst->genid = xfrm[i]->genid;
1387 dst1->obsolete = -1;
1388 dst1->flags |= DST_HOST;
1389 dst1->lastuse = now;
1391 dst1->input = dst_discard;
1392 dst1->output = xfrm[i]->outer_mode->afinfo->output;
1394 dst1->next = dst_prev;
1395 dst_prev = dst1;
1397 header_len += xfrm[i]->props.header_len;
1398 if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
1399 nfheader_len += xfrm[i]->props.header_len;
1400 trailer_len += xfrm[i]->props.trailer_len;
1403 dst_prev->child = dst;
1404 dst0->path = dst;
1406 err = -ENODEV;
1407 dev = dst->dev;
1408 if (!dev)
1409 goto free_dst;
1411 /* Copy neighbout for reachability confirmation */
1412 dst0->neighbour = neigh_clone(dst->neighbour);
1414 xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1415 xfrm_init_pmtu(dst_prev);
1417 for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
1418 struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
1420 err = xfrm_fill_dst(xdst, dev);
1421 if (err)
1422 goto free_dst;
1424 dst_prev->header_len = header_len;
1425 dst_prev->trailer_len = trailer_len;
1426 header_len -= xdst->u.dst.xfrm->props.header_len;
1427 trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
1430 out:
1431 return dst0;
1433 put_states:
1434 for (; i < nx; i++)
1435 xfrm_state_put(xfrm[i]);
1436 free_dst:
1437 if (dst0)
1438 dst_free(dst0);
1439 dst0 = ERR_PTR(err);
1440 goto out;
1443 static int inline
1444 xfrm_dst_alloc_copy(void **target, void *src, int size)
1446 if (!*target) {
1447 *target = kmalloc(size, GFP_ATOMIC);
1448 if (!*target)
1449 return -ENOMEM;
1451 memcpy(*target, src, size);
1452 return 0;
1455 static int inline
1456 xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel)
1458 #ifdef CONFIG_XFRM_SUB_POLICY
1459 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1460 return xfrm_dst_alloc_copy((void **)&(xdst->partner),
1461 sel, sizeof(*sel));
1462 #else
1463 return 0;
1464 #endif
1467 static int inline
1468 xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
1470 #ifdef CONFIG_XFRM_SUB_POLICY
1471 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1472 return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
1473 #else
1474 return 0;
1475 #endif
1478 static int stale_bundle(struct dst_entry *dst);
1480 /* Main function: finds/creates a bundle for given flow.
1482 * At the moment we eat a raw IP route. Mostly to speed up lookups
1483 * on interfaces with disabled IPsec.
1485 int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1486 struct sock *sk, int flags)
1488 struct xfrm_policy *policy;
1489 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1490 int npols;
1491 int pol_dead;
1492 int xfrm_nr;
1493 int pi;
1494 struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1495 struct dst_entry *dst, *dst_orig = *dst_p;
1496 int nx = 0;
1497 int err;
1498 u32 genid;
1499 u16 family;
1500 u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
1502 restart:
1503 genid = atomic_read(&flow_cache_genid);
1504 policy = NULL;
1505 for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
1506 pols[pi] = NULL;
1507 npols = 0;
1508 pol_dead = 0;
1509 xfrm_nr = 0;
1511 if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
1512 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
1513 err = PTR_ERR(policy);
1514 if (IS_ERR(policy)) {
1515 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1516 goto dropdst;
1520 if (!policy) {
1521 /* To accelerate a bit... */
1522 if ((dst_orig->flags & DST_NOXFRM) ||
1523 !xfrm_policy_count[XFRM_POLICY_OUT])
1524 goto nopol;
1526 policy = flow_cache_lookup(fl, dst_orig->ops->family,
1527 dir, xfrm_policy_lookup);
1528 err = PTR_ERR(policy);
1529 if (IS_ERR(policy)) {
1530 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1531 goto dropdst;
1535 if (!policy)
1536 goto nopol;
1538 family = dst_orig->ops->family;
1539 pols[0] = policy;
1540 npols ++;
1541 xfrm_nr += pols[0]->xfrm_nr;
1543 err = -ENOENT;
1544 if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP))
1545 goto error;
1547 policy->curlft.use_time = get_seconds();
1549 switch (policy->action) {
1550 default:
1551 case XFRM_POLICY_BLOCK:
1552 /* Prohibit the flow */
1553 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
1554 err = -EPERM;
1555 goto error;
1557 case XFRM_POLICY_ALLOW:
1558 #ifndef CONFIG_XFRM_SUB_POLICY
1559 if (policy->xfrm_nr == 0) {
1560 /* Flow passes not transformed. */
1561 xfrm_pol_put(policy);
1562 return 0;
1564 #endif
1566 /* Try to find matching bundle.
1568 * LATER: help from flow cache. It is optional, this
1569 * is required only for output policy.
1571 dst = xfrm_find_bundle(fl, policy, family);
1572 if (IS_ERR(dst)) {
1573 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1574 err = PTR_ERR(dst);
1575 goto error;
1578 if (dst)
1579 break;
1581 #ifdef CONFIG_XFRM_SUB_POLICY
1582 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1583 pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1584 fl, family,
1585 XFRM_POLICY_OUT);
1586 if (pols[1]) {
1587 if (IS_ERR(pols[1])) {
1588 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1589 err = PTR_ERR(pols[1]);
1590 goto error;
1592 if (pols[1]->action == XFRM_POLICY_BLOCK) {
1593 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
1594 err = -EPERM;
1595 goto error;
1597 npols ++;
1598 xfrm_nr += pols[1]->xfrm_nr;
1603 * Because neither flowi nor bundle information knows about
1604 * transformation template size. On more than one policy usage
1605 * we can realize whether all of them is bypass or not after
1606 * they are searched. See above not-transformed bypass
1607 * is surrounded by non-sub policy configuration, too.
1609 if (xfrm_nr == 0) {
1610 /* Flow passes not transformed. */
1611 xfrm_pols_put(pols, npols);
1612 return 0;
1615 #endif
1616 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1618 if (unlikely(nx<0)) {
1619 err = nx;
1620 if (err == -EAGAIN && sysctl_xfrm_larval_drop) {
1621 /* EREMOTE tells the caller to generate
1622 * a one-shot blackhole route.
1624 XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1625 xfrm_pol_put(policy);
1626 return -EREMOTE;
1628 if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
1629 DECLARE_WAITQUEUE(wait, current);
1631 add_wait_queue(&km_waitq, &wait);
1632 set_current_state(TASK_INTERRUPTIBLE);
1633 schedule();
1634 set_current_state(TASK_RUNNING);
1635 remove_wait_queue(&km_waitq, &wait);
1637 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1639 if (nx == -EAGAIN && signal_pending(current)) {
1640 XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1641 err = -ERESTART;
1642 goto error;
1644 if (nx == -EAGAIN ||
1645 genid != atomic_read(&flow_cache_genid)) {
1646 xfrm_pols_put(pols, npols);
1647 goto restart;
1649 err = nx;
1651 if (err < 0) {
1652 XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1653 goto error;
1656 if (nx == 0) {
1657 /* Flow passes not transformed. */
1658 xfrm_pols_put(pols, npols);
1659 return 0;
1662 dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
1663 err = PTR_ERR(dst);
1664 if (IS_ERR(dst)) {
1665 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1666 goto error;
1669 for (pi = 0; pi < npols; pi++) {
1670 read_lock_bh(&pols[pi]->lock);
1671 pol_dead |= pols[pi]->dead;
1672 read_unlock_bh(&pols[pi]->lock);
1675 write_lock_bh(&policy->lock);
1676 if (unlikely(pol_dead || stale_bundle(dst))) {
1677 /* Wow! While we worked on resolving, this
1678 * policy has gone. Retry. It is not paranoia,
1679 * we just cannot enlist new bundle to dead object.
1680 * We can't enlist stable bundles either.
1682 write_unlock_bh(&policy->lock);
1683 if (dst)
1684 dst_free(dst);
1686 if (pol_dead)
1687 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLDEAD);
1688 else
1689 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1690 err = -EHOSTUNREACH;
1691 goto error;
1694 if (npols > 1)
1695 err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1696 else
1697 err = xfrm_dst_update_origin(dst, fl);
1698 if (unlikely(err)) {
1699 write_unlock_bh(&policy->lock);
1700 if (dst)
1701 dst_free(dst);
1702 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1703 goto error;
1706 dst->next = policy->bundles;
1707 policy->bundles = dst;
1708 dst_hold(dst);
1709 write_unlock_bh(&policy->lock);
1711 *dst_p = dst;
1712 dst_release(dst_orig);
1713 xfrm_pols_put(pols, npols);
1714 return 0;
1716 error:
1717 xfrm_pols_put(pols, npols);
1718 dropdst:
1719 dst_release(dst_orig);
1720 *dst_p = NULL;
1721 return err;
1723 nopol:
1724 err = -ENOENT;
1725 if (flags & XFRM_LOOKUP_ICMP)
1726 goto dropdst;
1727 return 0;
1729 EXPORT_SYMBOL(__xfrm_lookup);
1731 int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1732 struct sock *sk, int flags)
1734 int err = __xfrm_lookup(dst_p, fl, sk, flags);
1736 if (err == -EREMOTE) {
1737 dst_release(*dst_p);
1738 *dst_p = NULL;
1739 err = -EAGAIN;
1742 return err;
1744 EXPORT_SYMBOL(xfrm_lookup);
1746 static inline int
1747 xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)
1749 struct xfrm_state *x;
1751 if (!skb->sp || idx < 0 || idx >= skb->sp->len)
1752 return 0;
1753 x = skb->sp->xvec[idx];
1754 if (!x->type->reject)
1755 return 0;
1756 return x->type->reject(x, skb, fl);
1759 /* When skb is transformed back to its "native" form, we have to
1760 * check policy restrictions. At the moment we make this in maximally
1761 * stupid way. Shame on me. :-) Of course, connected sockets must
1762 * have policy cached at them.
1765 static inline int
1766 xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
1767 unsigned short family)
1769 if (xfrm_state_kern(x))
1770 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
1771 return x->id.proto == tmpl->id.proto &&
1772 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
1773 (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
1774 x->props.mode == tmpl->mode &&
1775 ((tmpl->aalgos & (1<<x->props.aalgo)) ||
1776 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
1777 !(x->props.mode != XFRM_MODE_TRANSPORT &&
1778 xfrm_state_addr_cmp(tmpl, x, family));
1782 * 0 or more than 0 is returned when validation is succeeded (either bypass
1783 * because of optional transport mode, or next index of the mathced secpath
1784 * state with the template.
1785 * -1 is returned when no matching template is found.
1786 * Otherwise "-2 - errored_index" is returned.
1788 static inline int
1789 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
1790 unsigned short family)
1792 int idx = start;
1794 if (tmpl->optional) {
1795 if (tmpl->mode == XFRM_MODE_TRANSPORT)
1796 return start;
1797 } else
1798 start = -1;
1799 for (; idx < sp->len; idx++) {
1800 if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
1801 return ++idx;
1802 if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
1803 if (start == -1)
1804 start = -2-idx;
1805 break;
1808 return start;
1811 int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
1812 unsigned int family, int reverse)
1814 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1815 int err;
1817 if (unlikely(afinfo == NULL))
1818 return -EAFNOSUPPORT;
1820 afinfo->decode_session(skb, fl, reverse);
1821 err = security_xfrm_decode_session(skb, &fl->secid);
1822 xfrm_policy_put_afinfo(afinfo);
1823 return err;
1825 EXPORT_SYMBOL(__xfrm_decode_session);
1827 static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
1829 for (; k < sp->len; k++) {
1830 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
1831 *idxp = k;
1832 return 1;
1836 return 0;
1839 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1840 unsigned short family)
1842 struct xfrm_policy *pol;
1843 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1844 int npols = 0;
1845 int xfrm_nr;
1846 int pi;
1847 int reverse;
1848 struct flowi fl;
1849 u8 fl_dir;
1850 int xerr_idx = -1;
1852 reverse = dir & ~XFRM_POLICY_MASK;
1853 dir &= XFRM_POLICY_MASK;
1854 fl_dir = policy_to_flow_dir(dir);
1856 if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
1857 XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
1858 return 0;
1861 nf_nat_decode_session(skb, &fl, family);
1863 /* First, check used SA against their selectors. */
1864 if (skb->sp) {
1865 int i;
1867 for (i=skb->sp->len-1; i>=0; i--) {
1868 struct xfrm_state *x = skb->sp->xvec[i];
1869 if (!xfrm_selector_match(&x->sel, &fl, family)) {
1870 XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH);
1871 return 0;
1876 pol = NULL;
1877 if (sk && sk->sk_policy[dir]) {
1878 pol = xfrm_sk_policy_lookup(sk, dir, &fl);
1879 if (IS_ERR(pol)) {
1880 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1881 return 0;
1885 if (!pol)
1886 pol = flow_cache_lookup(&fl, family, fl_dir,
1887 xfrm_policy_lookup);
1889 if (IS_ERR(pol)) {
1890 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1891 return 0;
1894 if (!pol) {
1895 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
1896 xfrm_secpath_reject(xerr_idx, skb, &fl);
1897 XFRM_INC_STATS(LINUX_MIB_XFRMINNOPOLS);
1898 return 0;
1900 return 1;
1903 pol->curlft.use_time = get_seconds();
1905 pols[0] = pol;
1906 npols ++;
1907 #ifdef CONFIG_XFRM_SUB_POLICY
1908 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1909 pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1910 &fl, family,
1911 XFRM_POLICY_IN);
1912 if (pols[1]) {
1913 if (IS_ERR(pols[1])) {
1914 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1915 return 0;
1917 pols[1]->curlft.use_time = get_seconds();
1918 npols ++;
1921 #endif
1923 if (pol->action == XFRM_POLICY_ALLOW) {
1924 struct sec_path *sp;
1925 static struct sec_path dummy;
1926 struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
1927 struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
1928 struct xfrm_tmpl **tpp = tp;
1929 int ti = 0;
1930 int i, k;
1932 if ((sp = skb->sp) == NULL)
1933 sp = &dummy;
1935 for (pi = 0; pi < npols; pi++) {
1936 if (pols[pi] != pol &&
1937 pols[pi]->action != XFRM_POLICY_ALLOW) {
1938 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
1939 goto reject;
1941 if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
1942 XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR);
1943 goto reject_error;
1945 for (i = 0; i < pols[pi]->xfrm_nr; i++)
1946 tpp[ti++] = &pols[pi]->xfrm_vec[i];
1948 xfrm_nr = ti;
1949 if (npols > 1) {
1950 xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
1951 tpp = stp;
1954 /* For each tunnel xfrm, find the first matching tmpl.
1955 * For each tmpl before that, find corresponding xfrm.
1956 * Order is _important_. Later we will implement
1957 * some barriers, but at the moment barriers
1958 * are implied between each two transformations.
1960 for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
1961 k = xfrm_policy_ok(tpp[i], sp, k, family);
1962 if (k < 0) {
1963 if (k < -1)
1964 /* "-2 - errored_index" returned */
1965 xerr_idx = -(2+k);
1966 XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
1967 goto reject;
1971 if (secpath_has_nontransport(sp, k, &xerr_idx)) {
1972 XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
1973 goto reject;
1976 xfrm_pols_put(pols, npols);
1977 return 1;
1979 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
1981 reject:
1982 xfrm_secpath_reject(xerr_idx, skb, &fl);
1983 reject_error:
1984 xfrm_pols_put(pols, npols);
1985 return 0;
1987 EXPORT_SYMBOL(__xfrm_policy_check);
1989 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
1991 struct flowi fl;
1993 if (xfrm_decode_session(skb, &fl, family) < 0) {
1994 /* XXX: we should have something like FWDHDRERROR here. */
1995 XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
1996 return 0;
1999 return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
2001 EXPORT_SYMBOL(__xfrm_route_forward);
2003 /* Optimize later using cookies and generation ids. */
2005 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
2007 /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
2008 * to "-1" to force all XFRM destinations to get validated by
2009 * dst_ops->check on every use. We do this because when a
2010 * normal route referenced by an XFRM dst is obsoleted we do
2011 * not go looking around for all parent referencing XFRM dsts
2012 * so that we can invalidate them. It is just too much work.
2013 * Instead we make the checks here on every use. For example:
2015 * XFRM dst A --> IPv4 dst X
2017 * X is the "xdst->route" of A (X is also the "dst->path" of A
2018 * in this example). If X is marked obsolete, "A" will not
2019 * notice. That's what we are validating here via the
2020 * stale_bundle() check.
2022 * When a policy's bundle is pruned, we dst_free() the XFRM
2023 * dst which causes it's ->obsolete field to be set to a
2024 * positive non-zero integer. If an XFRM dst has been pruned
2025 * like this, we want to force a new route lookup.
2027 if (dst->obsolete < 0 && !stale_bundle(dst))
2028 return dst;
2030 return NULL;
2033 static int stale_bundle(struct dst_entry *dst)
2035 return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);
2038 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
2040 while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2041 dst->dev = dev->nd_net->loopback_dev;
2042 dev_hold(dst->dev);
2043 dev_put(dev);
2046 EXPORT_SYMBOL(xfrm_dst_ifdown);
2048 static void xfrm_link_failure(struct sk_buff *skb)
2050 /* Impossible. Such dst must be popped before reaches point of failure. */
2051 return;
2054 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2056 if (dst) {
2057 if (dst->obsolete) {
2058 dst_release(dst);
2059 dst = NULL;
2062 return dst;
2065 static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
2067 struct dst_entry *dst, **dstp;
2069 write_lock(&pol->lock);
2070 dstp = &pol->bundles;
2071 while ((dst=*dstp) != NULL) {
2072 if (func(dst)) {
2073 *dstp = dst->next;
2074 dst->next = *gc_list_p;
2075 *gc_list_p = dst;
2076 } else {
2077 dstp = &dst->next;
2080 write_unlock(&pol->lock);
2083 static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
2085 struct dst_entry *gc_list = NULL;
2086 int dir;
2088 read_lock_bh(&xfrm_policy_lock);
2089 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2090 struct xfrm_policy *pol;
2091 struct hlist_node *entry;
2092 struct hlist_head *table;
2093 int i;
2095 hlist_for_each_entry(pol, entry,
2096 &xfrm_policy_inexact[dir], bydst)
2097 prune_one_bundle(pol, func, &gc_list);
2099 table = xfrm_policy_bydst[dir].table;
2100 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
2101 hlist_for_each_entry(pol, entry, table + i, bydst)
2102 prune_one_bundle(pol, func, &gc_list);
2105 read_unlock_bh(&xfrm_policy_lock);
2107 while (gc_list) {
2108 struct dst_entry *dst = gc_list;
2109 gc_list = dst->next;
2110 dst_free(dst);
2114 static int unused_bundle(struct dst_entry *dst)
2116 return !atomic_read(&dst->__refcnt);
2119 static void __xfrm_garbage_collect(void)
2121 xfrm_prune_bundles(unused_bundle);
2124 static int xfrm_flush_bundles(void)
2126 xfrm_prune_bundles(stale_bundle);
2127 return 0;
2130 static void xfrm_init_pmtu(struct dst_entry *dst)
2132 do {
2133 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2134 u32 pmtu, route_mtu_cached;
2136 pmtu = dst_mtu(dst->child);
2137 xdst->child_mtu_cached = pmtu;
2139 pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
2141 route_mtu_cached = dst_mtu(xdst->route);
2142 xdst->route_mtu_cached = route_mtu_cached;
2144 if (pmtu > route_mtu_cached)
2145 pmtu = route_mtu_cached;
2147 dst->metrics[RTAX_MTU-1] = pmtu;
2148 } while ((dst = dst->next));
2151 /* Check that the bundle accepts the flow and its components are
2152 * still valid.
2155 int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2156 struct flowi *fl, int family, int strict)
2158 struct dst_entry *dst = &first->u.dst;
2159 struct xfrm_dst *last;
2160 u32 mtu;
2162 if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
2163 (dst->dev && !netif_running(dst->dev)))
2164 return 0;
2165 #ifdef CONFIG_XFRM_SUB_POLICY
2166 if (fl) {
2167 if (first->origin && !flow_cache_uli_match(first->origin, fl))
2168 return 0;
2169 if (first->partner &&
2170 !xfrm_selector_match(first->partner, fl, family))
2171 return 0;
2173 #endif
2175 last = NULL;
2177 do {
2178 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2180 if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
2181 return 0;
2182 if (fl && pol &&
2183 !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl))
2184 return 0;
2185 if (dst->xfrm->km.state != XFRM_STATE_VALID)
2186 return 0;
2187 if (xdst->genid != dst->xfrm->genid)
2188 return 0;
2190 if (strict && fl &&
2191 !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
2192 !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
2193 return 0;
2195 mtu = dst_mtu(dst->child);
2196 if (xdst->child_mtu_cached != mtu) {
2197 last = xdst;
2198 xdst->child_mtu_cached = mtu;
2201 if (!dst_check(xdst->route, xdst->route_cookie))
2202 return 0;
2203 mtu = dst_mtu(xdst->route);
2204 if (xdst->route_mtu_cached != mtu) {
2205 last = xdst;
2206 xdst->route_mtu_cached = mtu;
2209 dst = dst->child;
2210 } while (dst->xfrm);
2212 if (likely(!last))
2213 return 1;
2215 mtu = last->child_mtu_cached;
2216 for (;;) {
2217 dst = &last->u.dst;
2219 mtu = xfrm_state_mtu(dst->xfrm, mtu);
2220 if (mtu > last->route_mtu_cached)
2221 mtu = last->route_mtu_cached;
2222 dst->metrics[RTAX_MTU-1] = mtu;
2224 if (last == first)
2225 break;
2227 last = (struct xfrm_dst *)last->u.dst.next;
2228 last->child_mtu_cached = mtu;
2231 return 1;
2234 EXPORT_SYMBOL(xfrm_bundle_ok);
2236 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2238 int err = 0;
2239 if (unlikely(afinfo == NULL))
2240 return -EINVAL;
2241 if (unlikely(afinfo->family >= NPROTO))
2242 return -EAFNOSUPPORT;
2243 write_lock_bh(&xfrm_policy_afinfo_lock);
2244 if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2245 err = -ENOBUFS;
2246 else {
2247 struct dst_ops *dst_ops = afinfo->dst_ops;
2248 if (likely(dst_ops->kmem_cachep == NULL))
2249 dst_ops->kmem_cachep = xfrm_dst_cache;
2250 if (likely(dst_ops->check == NULL))
2251 dst_ops->check = xfrm_dst_check;
2252 if (likely(dst_ops->negative_advice == NULL))
2253 dst_ops->negative_advice = xfrm_negative_advice;
2254 if (likely(dst_ops->link_failure == NULL))
2255 dst_ops->link_failure = xfrm_link_failure;
2256 if (likely(afinfo->garbage_collect == NULL))
2257 afinfo->garbage_collect = __xfrm_garbage_collect;
2258 xfrm_policy_afinfo[afinfo->family] = afinfo;
2260 write_unlock_bh(&xfrm_policy_afinfo_lock);
2261 return err;
2263 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
2265 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
2267 int err = 0;
2268 if (unlikely(afinfo == NULL))
2269 return -EINVAL;
2270 if (unlikely(afinfo->family >= NPROTO))
2271 return -EAFNOSUPPORT;
2272 write_lock_bh(&xfrm_policy_afinfo_lock);
2273 if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
2274 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
2275 err = -EINVAL;
2276 else {
2277 struct dst_ops *dst_ops = afinfo->dst_ops;
2278 xfrm_policy_afinfo[afinfo->family] = NULL;
2279 dst_ops->kmem_cachep = NULL;
2280 dst_ops->check = NULL;
2281 dst_ops->negative_advice = NULL;
2282 dst_ops->link_failure = NULL;
2283 afinfo->garbage_collect = NULL;
2286 write_unlock_bh(&xfrm_policy_afinfo_lock);
2287 return err;
2289 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
2291 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
2293 struct xfrm_policy_afinfo *afinfo;
2294 if (unlikely(family >= NPROTO))
2295 return NULL;
2296 read_lock(&xfrm_policy_afinfo_lock);
2297 afinfo = xfrm_policy_afinfo[family];
2298 if (unlikely(!afinfo))
2299 read_unlock(&xfrm_policy_afinfo_lock);
2300 return afinfo;
2303 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
2305 read_unlock(&xfrm_policy_afinfo_lock);
2308 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2310 struct net_device *dev = ptr;
2312 if (dev->nd_net != &init_net)
2313 return NOTIFY_DONE;
2315 switch (event) {
2316 case NETDEV_DOWN:
2317 xfrm_flush_bundles();
2319 return NOTIFY_DONE;
2322 static struct notifier_block xfrm_dev_notifier = {
2323 xfrm_dev_event,
2324 NULL,
2328 #ifdef CONFIG_XFRM_STATISTICS
2329 static int __init xfrm_statistics_init(void)
2331 if (snmp_mib_init((void **)xfrm_statistics,
2332 sizeof(struct linux_xfrm_mib)) < 0)
2333 return -ENOMEM;
2334 return 0;
2336 #endif
2338 static void __init xfrm_policy_init(void)
2340 unsigned int hmask, sz;
2341 int dir;
2343 xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
2344 sizeof(struct xfrm_dst),
2345 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2346 NULL);
2348 hmask = 8 - 1;
2349 sz = (hmask+1) * sizeof(struct hlist_head);
2351 xfrm_policy_byidx = xfrm_hash_alloc(sz);
2352 xfrm_idx_hmask = hmask;
2353 if (!xfrm_policy_byidx)
2354 panic("XFRM: failed to allocate byidx hash\n");
2356 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2357 struct xfrm_policy_hash *htab;
2359 INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]);
2361 htab = &xfrm_policy_bydst[dir];
2362 htab->table = xfrm_hash_alloc(sz);
2363 htab->hmask = hmask;
2364 if (!htab->table)
2365 panic("XFRM: failed to allocate bydst hash\n");
2368 INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task);
2369 register_netdevice_notifier(&xfrm_dev_notifier);
2372 void __init xfrm_init(void)
2374 #ifdef CONFIG_XFRM_STATISTICS
2375 xfrm_statistics_init();
2376 #endif
2377 xfrm_state_init();
2378 xfrm_policy_init();
2379 xfrm_input_init();
2380 #ifdef CONFIG_XFRM_STATISTICS
2381 xfrm_proc_init();
2382 #endif
2385 #ifdef CONFIG_AUDITSYSCALL
2386 static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
2387 struct audit_buffer *audit_buf)
2389 struct xfrm_sec_ctx *ctx = xp->security;
2390 struct xfrm_selector *sel = &xp->selector;
2392 if (ctx)
2393 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2394 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2396 switch(sel->family) {
2397 case AF_INET:
2398 audit_log_format(audit_buf, " src=" NIPQUAD_FMT,
2399 NIPQUAD(sel->saddr.a4));
2400 if (sel->prefixlen_s != 32)
2401 audit_log_format(audit_buf, " src_prefixlen=%d",
2402 sel->prefixlen_s);
2403 audit_log_format(audit_buf, " dst=" NIPQUAD_FMT,
2404 NIPQUAD(sel->daddr.a4));
2405 if (sel->prefixlen_d != 32)
2406 audit_log_format(audit_buf, " dst_prefixlen=%d",
2407 sel->prefixlen_d);
2408 break;
2409 case AF_INET6:
2410 audit_log_format(audit_buf, " src=" NIP6_FMT,
2411 NIP6(*(struct in6_addr *)sel->saddr.a6));
2412 if (sel->prefixlen_s != 128)
2413 audit_log_format(audit_buf, " src_prefixlen=%d",
2414 sel->prefixlen_s);
2415 audit_log_format(audit_buf, " dst=" NIP6_FMT,
2416 NIP6(*(struct in6_addr *)sel->daddr.a6));
2417 if (sel->prefixlen_d != 128)
2418 audit_log_format(audit_buf, " dst_prefixlen=%d",
2419 sel->prefixlen_d);
2420 break;
2424 void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
2425 u32 auid, u32 secid)
2427 struct audit_buffer *audit_buf;
2429 audit_buf = xfrm_audit_start("SPD-add");
2430 if (audit_buf == NULL)
2431 return;
2432 xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
2433 audit_log_format(audit_buf, " res=%u", result);
2434 xfrm_audit_common_policyinfo(xp, audit_buf);
2435 audit_log_end(audit_buf);
2437 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
2439 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
2440 u32 auid, u32 secid)
2442 struct audit_buffer *audit_buf;
2444 audit_buf = xfrm_audit_start("SPD-delete");
2445 if (audit_buf == NULL)
2446 return;
2447 xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
2448 audit_log_format(audit_buf, " res=%u", result);
2449 xfrm_audit_common_policyinfo(xp, audit_buf);
2450 audit_log_end(audit_buf);
2452 EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
2453 #endif
2455 #ifdef CONFIG_XFRM_MIGRATE
2456 static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp,
2457 struct xfrm_selector *sel_tgt)
2459 if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
2460 if (sel_tgt->family == sel_cmp->family &&
2461 xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr,
2462 sel_cmp->family) == 0 &&
2463 xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr,
2464 sel_cmp->family) == 0 &&
2465 sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
2466 sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
2467 return 1;
2469 } else {
2470 if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
2471 return 1;
2474 return 0;
2477 static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel,
2478 u8 dir, u8 type)
2480 struct xfrm_policy *pol, *ret = NULL;
2481 struct hlist_node *entry;
2482 struct hlist_head *chain;
2483 u32 priority = ~0U;
2485 read_lock_bh(&xfrm_policy_lock);
2486 chain = policy_hash_direct(&sel->daddr, &sel->saddr, sel->family, dir);
2487 hlist_for_each_entry(pol, entry, chain, bydst) {
2488 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2489 pol->type == type) {
2490 ret = pol;
2491 priority = ret->priority;
2492 break;
2495 chain = &xfrm_policy_inexact[dir];
2496 hlist_for_each_entry(pol, entry, chain, bydst) {
2497 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2498 pol->type == type &&
2499 pol->priority < priority) {
2500 ret = pol;
2501 break;
2505 if (ret)
2506 xfrm_pol_hold(ret);
2508 read_unlock_bh(&xfrm_policy_lock);
2510 return ret;
2513 static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t)
2515 int match = 0;
2517 if (t->mode == m->mode && t->id.proto == m->proto &&
2518 (m->reqid == 0 || t->reqid == m->reqid)) {
2519 switch (t->mode) {
2520 case XFRM_MODE_TUNNEL:
2521 case XFRM_MODE_BEET:
2522 if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr,
2523 m->old_family) == 0 &&
2524 xfrm_addr_cmp(&t->saddr, &m->old_saddr,
2525 m->old_family) == 0) {
2526 match = 1;
2528 break;
2529 case XFRM_MODE_TRANSPORT:
2530 /* in case of transport mode, template does not store
2531 any IP addresses, hence we just compare mode and
2532 protocol */
2533 match = 1;
2534 break;
2535 default:
2536 break;
2539 return match;
2542 /* update endpoint address(es) of template(s) */
2543 static int xfrm_policy_migrate(struct xfrm_policy *pol,
2544 struct xfrm_migrate *m, int num_migrate)
2546 struct xfrm_migrate *mp;
2547 struct dst_entry *dst;
2548 int i, j, n = 0;
2550 write_lock_bh(&pol->lock);
2551 if (unlikely(pol->dead)) {
2552 /* target policy has been deleted */
2553 write_unlock_bh(&pol->lock);
2554 return -ENOENT;
2557 for (i = 0; i < pol->xfrm_nr; i++) {
2558 for (j = 0, mp = m; j < num_migrate; j++, mp++) {
2559 if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
2560 continue;
2561 n++;
2562 if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
2563 pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
2564 continue;
2565 /* update endpoints */
2566 memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
2567 sizeof(pol->xfrm_vec[i].id.daddr));
2568 memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
2569 sizeof(pol->xfrm_vec[i].saddr));
2570 pol->xfrm_vec[i].encap_family = mp->new_family;
2571 /* flush bundles */
2572 while ((dst = pol->bundles) != NULL) {
2573 pol->bundles = dst->next;
2574 dst_free(dst);
2579 write_unlock_bh(&pol->lock);
2581 if (!n)
2582 return -ENODATA;
2584 return 0;
2587 static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate)
2589 int i, j;
2591 if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
2592 return -EINVAL;
2594 for (i = 0; i < num_migrate; i++) {
2595 if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr,
2596 m[i].old_family) == 0) &&
2597 (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr,
2598 m[i].old_family) == 0))
2599 return -EINVAL;
2600 if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
2601 xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
2602 return -EINVAL;
2604 /* check if there is any duplicated entry */
2605 for (j = i + 1; j < num_migrate; j++) {
2606 if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
2607 sizeof(m[i].old_daddr)) &&
2608 !memcmp(&m[i].old_saddr, &m[j].old_saddr,
2609 sizeof(m[i].old_saddr)) &&
2610 m[i].proto == m[j].proto &&
2611 m[i].mode == m[j].mode &&
2612 m[i].reqid == m[j].reqid &&
2613 m[i].old_family == m[j].old_family)
2614 return -EINVAL;
2618 return 0;
2621 int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
2622 struct xfrm_migrate *m, int num_migrate)
2624 int i, err, nx_cur = 0, nx_new = 0;
2625 struct xfrm_policy *pol = NULL;
2626 struct xfrm_state *x, *xc;
2627 struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
2628 struct xfrm_state *x_new[XFRM_MAX_DEPTH];
2629 struct xfrm_migrate *mp;
2631 if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
2632 goto out;
2634 /* Stage 1 - find policy */
2635 if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) {
2636 err = -ENOENT;
2637 goto out;
2640 /* Stage 2 - find and update state(s) */
2641 for (i = 0, mp = m; i < num_migrate; i++, mp++) {
2642 if ((x = xfrm_migrate_state_find(mp))) {
2643 x_cur[nx_cur] = x;
2644 nx_cur++;
2645 if ((xc = xfrm_state_migrate(x, mp))) {
2646 x_new[nx_new] = xc;
2647 nx_new++;
2648 } else {
2649 err = -ENODATA;
2650 goto restore_state;
2655 /* Stage 3 - update policy */
2656 if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
2657 goto restore_state;
2659 /* Stage 4 - delete old state(s) */
2660 if (nx_cur) {
2661 xfrm_states_put(x_cur, nx_cur);
2662 xfrm_states_delete(x_cur, nx_cur);
2665 /* Stage 5 - announce */
2666 km_migrate(sel, dir, type, m, num_migrate);
2668 xfrm_pol_put(pol);
2670 return 0;
2671 out:
2672 return err;
2674 restore_state:
2675 if (pol)
2676 xfrm_pol_put(pol);
2677 if (nx_cur)
2678 xfrm_states_put(x_cur, nx_cur);
2679 if (nx_new)
2680 xfrm_states_delete(x_new, nx_new);
2682 return err;
2684 EXPORT_SYMBOL(xfrm_migrate);
2685 #endif