[MLSXFRM]: Flow based matching of xfrm policy and state
[linux-2.6.22.y-op.git] / net / xfrm / xfrm_state.c
blobbe02bd981d12f51aa43def59a7aad9b34def3306
1 /*
2 * xfrm_state.c
4 * Changes:
5 * Mitsuru KANDA @USAGI
6 * Kazunori MIYAZAWA @USAGI
7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8 * IPv6 support
9 * YOSHIFUJI Hideaki @USAGI
10 * Split up af-specific functions
11 * Derek Atkins <derek@ihtfp.com>
12 * Add UDP Encapsulation
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <asm/uaccess.h>
23 struct sock *xfrm_nl;
24 EXPORT_SYMBOL(xfrm_nl);
26 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
27 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
29 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
32 /* Each xfrm_state may be linked to two tables:
34 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
35 2. Hash table by daddr to find what SAs exist for given
36 destination/tunnel endpoint. (output)
39 static DEFINE_SPINLOCK(xfrm_state_lock);
41 /* Hash table to find appropriate SA towards given target (endpoint
42 * of tunnel or destination of transport mode) allowed by selector.
44 * Main use is finding SA after policy selected tunnel or transport mode.
45 * Also, it can be used by ah/esp icmp error handler to find offending SA.
47 static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
48 static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
50 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
51 EXPORT_SYMBOL(km_waitq);
53 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
54 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
56 static struct work_struct xfrm_state_gc_work;
57 static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
58 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
60 static int xfrm_state_gc_flush_bundles;
62 int __xfrm_state_delete(struct xfrm_state *x);
64 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
65 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
67 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
68 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
70 static void xfrm_state_gc_destroy(struct xfrm_state *x)
72 if (del_timer(&x->timer))
73 BUG();
74 if (del_timer(&x->rtimer))
75 BUG();
76 kfree(x->aalg);
77 kfree(x->ealg);
78 kfree(x->calg);
79 kfree(x->encap);
80 if (x->mode)
81 xfrm_put_mode(x->mode);
82 if (x->type) {
83 x->type->destructor(x);
84 xfrm_put_type(x->type);
86 security_xfrm_state_free(x);
87 kfree(x);
90 static void xfrm_state_gc_task(void *data)
92 struct xfrm_state *x;
93 struct list_head *entry, *tmp;
94 struct list_head gc_list = LIST_HEAD_INIT(gc_list);
96 if (xfrm_state_gc_flush_bundles) {
97 xfrm_state_gc_flush_bundles = 0;
98 xfrm_flush_bundles();
101 spin_lock_bh(&xfrm_state_gc_lock);
102 list_splice_init(&xfrm_state_gc_list, &gc_list);
103 spin_unlock_bh(&xfrm_state_gc_lock);
105 list_for_each_safe(entry, tmp, &gc_list) {
106 x = list_entry(entry, struct xfrm_state, bydst);
107 xfrm_state_gc_destroy(x);
109 wake_up(&km_waitq);
112 static inline unsigned long make_jiffies(long secs)
114 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
115 return MAX_SCHEDULE_TIMEOUT-1;
116 else
117 return secs*HZ;
120 static void xfrm_timer_handler(unsigned long data)
122 struct xfrm_state *x = (struct xfrm_state*)data;
123 unsigned long now = (unsigned long)xtime.tv_sec;
124 long next = LONG_MAX;
125 int warn = 0;
127 spin_lock(&x->lock);
128 if (x->km.state == XFRM_STATE_DEAD)
129 goto out;
130 if (x->km.state == XFRM_STATE_EXPIRED)
131 goto expired;
132 if (x->lft.hard_add_expires_seconds) {
133 long tmo = x->lft.hard_add_expires_seconds +
134 x->curlft.add_time - now;
135 if (tmo <= 0)
136 goto expired;
137 if (tmo < next)
138 next = tmo;
140 if (x->lft.hard_use_expires_seconds) {
141 long tmo = x->lft.hard_use_expires_seconds +
142 (x->curlft.use_time ? : now) - now;
143 if (tmo <= 0)
144 goto expired;
145 if (tmo < next)
146 next = tmo;
148 if (x->km.dying)
149 goto resched;
150 if (x->lft.soft_add_expires_seconds) {
151 long tmo = x->lft.soft_add_expires_seconds +
152 x->curlft.add_time - now;
153 if (tmo <= 0)
154 warn = 1;
155 else if (tmo < next)
156 next = tmo;
158 if (x->lft.soft_use_expires_seconds) {
159 long tmo = x->lft.soft_use_expires_seconds +
160 (x->curlft.use_time ? : now) - now;
161 if (tmo <= 0)
162 warn = 1;
163 else if (tmo < next)
164 next = tmo;
167 x->km.dying = warn;
168 if (warn)
169 km_state_expired(x, 0, 0);
170 resched:
171 if (next != LONG_MAX &&
172 !mod_timer(&x->timer, jiffies + make_jiffies(next)))
173 xfrm_state_hold(x);
174 goto out;
176 expired:
177 if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
178 x->km.state = XFRM_STATE_EXPIRED;
179 wake_up(&km_waitq);
180 next = 2;
181 goto resched;
183 if (!__xfrm_state_delete(x) && x->id.spi)
184 km_state_expired(x, 1, 0);
186 out:
187 spin_unlock(&x->lock);
188 xfrm_state_put(x);
191 static void xfrm_replay_timer_handler(unsigned long data);
193 struct xfrm_state *xfrm_state_alloc(void)
195 struct xfrm_state *x;
197 x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
199 if (x) {
200 atomic_set(&x->refcnt, 1);
201 atomic_set(&x->tunnel_users, 0);
202 INIT_LIST_HEAD(&x->bydst);
203 INIT_LIST_HEAD(&x->byspi);
204 init_timer(&x->timer);
205 x->timer.function = xfrm_timer_handler;
206 x->timer.data = (unsigned long)x;
207 init_timer(&x->rtimer);
208 x->rtimer.function = xfrm_replay_timer_handler;
209 x->rtimer.data = (unsigned long)x;
210 x->curlft.add_time = (unsigned long)xtime.tv_sec;
211 x->lft.soft_byte_limit = XFRM_INF;
212 x->lft.soft_packet_limit = XFRM_INF;
213 x->lft.hard_byte_limit = XFRM_INF;
214 x->lft.hard_packet_limit = XFRM_INF;
215 x->replay_maxage = 0;
216 x->replay_maxdiff = 0;
217 spin_lock_init(&x->lock);
219 return x;
221 EXPORT_SYMBOL(xfrm_state_alloc);
223 void __xfrm_state_destroy(struct xfrm_state *x)
225 BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
227 spin_lock_bh(&xfrm_state_gc_lock);
228 list_add(&x->bydst, &xfrm_state_gc_list);
229 spin_unlock_bh(&xfrm_state_gc_lock);
230 schedule_work(&xfrm_state_gc_work);
232 EXPORT_SYMBOL(__xfrm_state_destroy);
234 int __xfrm_state_delete(struct xfrm_state *x)
236 int err = -ESRCH;
238 if (x->km.state != XFRM_STATE_DEAD) {
239 x->km.state = XFRM_STATE_DEAD;
240 spin_lock(&xfrm_state_lock);
241 list_del(&x->bydst);
242 __xfrm_state_put(x);
243 if (x->id.spi) {
244 list_del(&x->byspi);
245 __xfrm_state_put(x);
247 spin_unlock(&xfrm_state_lock);
248 if (del_timer(&x->timer))
249 __xfrm_state_put(x);
250 if (del_timer(&x->rtimer))
251 __xfrm_state_put(x);
253 /* The number two in this test is the reference
254 * mentioned in the comment below plus the reference
255 * our caller holds. A larger value means that
256 * there are DSTs attached to this xfrm_state.
258 if (atomic_read(&x->refcnt) > 2) {
259 xfrm_state_gc_flush_bundles = 1;
260 schedule_work(&xfrm_state_gc_work);
263 /* All xfrm_state objects are created by xfrm_state_alloc.
264 * The xfrm_state_alloc call gives a reference, and that
265 * is what we are dropping here.
267 __xfrm_state_put(x);
268 err = 0;
271 return err;
273 EXPORT_SYMBOL(__xfrm_state_delete);
275 int xfrm_state_delete(struct xfrm_state *x)
277 int err;
279 spin_lock_bh(&x->lock);
280 err = __xfrm_state_delete(x);
281 spin_unlock_bh(&x->lock);
283 return err;
285 EXPORT_SYMBOL(xfrm_state_delete);
287 void xfrm_state_flush(u8 proto)
289 int i;
290 struct xfrm_state *x;
292 spin_lock_bh(&xfrm_state_lock);
293 for (i = 0; i < XFRM_DST_HSIZE; i++) {
294 restart:
295 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
296 if (!xfrm_state_kern(x) &&
297 (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
298 xfrm_state_hold(x);
299 spin_unlock_bh(&xfrm_state_lock);
301 xfrm_state_delete(x);
302 xfrm_state_put(x);
304 spin_lock_bh(&xfrm_state_lock);
305 goto restart;
309 spin_unlock_bh(&xfrm_state_lock);
310 wake_up(&km_waitq);
312 EXPORT_SYMBOL(xfrm_state_flush);
314 static int
315 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
316 struct xfrm_tmpl *tmpl,
317 xfrm_address_t *daddr, xfrm_address_t *saddr,
318 unsigned short family)
320 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
321 if (!afinfo)
322 return -1;
323 afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
324 xfrm_state_put_afinfo(afinfo);
325 return 0;
328 struct xfrm_state *
329 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
330 struct flowi *fl, struct xfrm_tmpl *tmpl,
331 struct xfrm_policy *pol, int *err,
332 unsigned short family)
334 unsigned h = xfrm_dst_hash(daddr, family);
335 struct xfrm_state *x, *x0;
336 int acquire_in_progress = 0;
337 int error = 0;
338 struct xfrm_state *best = NULL;
339 struct xfrm_state_afinfo *afinfo;
341 afinfo = xfrm_state_get_afinfo(family);
342 if (afinfo == NULL) {
343 *err = -EAFNOSUPPORT;
344 return NULL;
347 spin_lock_bh(&xfrm_state_lock);
348 list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
349 if (x->props.family == family &&
350 x->props.reqid == tmpl->reqid &&
351 xfrm_state_addr_check(x, daddr, saddr, family) &&
352 tmpl->mode == x->props.mode &&
353 tmpl->id.proto == x->id.proto &&
354 (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
355 /* Resolution logic:
356 1. There is a valid state with matching selector.
357 Done.
358 2. Valid state with inappropriate selector. Skip.
360 Entering area of "sysdeps".
362 3. If state is not valid, selector is temporary,
363 it selects only session which triggered
364 previous resolution. Key manager will do
365 something to install a state with proper
366 selector.
368 if (x->km.state == XFRM_STATE_VALID) {
369 if (!xfrm_selector_match(&x->sel, fl, family) ||
370 !security_xfrm_state_pol_flow_match(x, pol, fl))
371 continue;
372 if (!best ||
373 best->km.dying > x->km.dying ||
374 (best->km.dying == x->km.dying &&
375 best->curlft.add_time < x->curlft.add_time))
376 best = x;
377 } else if (x->km.state == XFRM_STATE_ACQ) {
378 acquire_in_progress = 1;
379 } else if (x->km.state == XFRM_STATE_ERROR ||
380 x->km.state == XFRM_STATE_EXPIRED) {
381 if (xfrm_selector_match(&x->sel, fl, family) &&
382 security_xfrm_state_pol_flow_match(x, pol, fl))
383 error = -ESRCH;
388 x = best;
389 if (!x && !error && !acquire_in_progress) {
390 if (tmpl->id.spi &&
391 (x0 = afinfo->state_lookup(daddr, tmpl->id.spi,
392 tmpl->id.proto)) != NULL) {
393 xfrm_state_put(x0);
394 error = -EEXIST;
395 goto out;
397 x = xfrm_state_alloc();
398 if (x == NULL) {
399 error = -ENOMEM;
400 goto out;
402 /* Initialize temporary selector matching only
403 * to current session. */
404 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
406 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
407 if (error) {
408 x->km.state = XFRM_STATE_DEAD;
409 xfrm_state_put(x);
410 x = NULL;
411 goto out;
414 if (km_query(x, tmpl, pol) == 0) {
415 x->km.state = XFRM_STATE_ACQ;
416 list_add_tail(&x->bydst, xfrm_state_bydst+h);
417 xfrm_state_hold(x);
418 if (x->id.spi) {
419 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
420 list_add(&x->byspi, xfrm_state_byspi+h);
421 xfrm_state_hold(x);
423 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
424 xfrm_state_hold(x);
425 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
426 add_timer(&x->timer);
427 } else {
428 x->km.state = XFRM_STATE_DEAD;
429 xfrm_state_put(x);
430 x = NULL;
431 error = -ESRCH;
434 out:
435 if (x)
436 xfrm_state_hold(x);
437 else
438 *err = acquire_in_progress ? -EAGAIN : error;
439 spin_unlock_bh(&xfrm_state_lock);
440 xfrm_state_put_afinfo(afinfo);
441 return x;
444 static void __xfrm_state_insert(struct xfrm_state *x)
446 unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
448 list_add(&x->bydst, xfrm_state_bydst+h);
449 xfrm_state_hold(x);
451 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
453 list_add(&x->byspi, xfrm_state_byspi+h);
454 xfrm_state_hold(x);
456 if (!mod_timer(&x->timer, jiffies + HZ))
457 xfrm_state_hold(x);
459 if (x->replay_maxage &&
460 !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
461 xfrm_state_hold(x);
463 wake_up(&km_waitq);
466 void xfrm_state_insert(struct xfrm_state *x)
468 spin_lock_bh(&xfrm_state_lock);
469 __xfrm_state_insert(x);
470 spin_unlock_bh(&xfrm_state_lock);
472 xfrm_flush_all_bundles();
474 EXPORT_SYMBOL(xfrm_state_insert);
476 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
478 int xfrm_state_add(struct xfrm_state *x)
480 struct xfrm_state_afinfo *afinfo;
481 struct xfrm_state *x1;
482 int family;
483 int err;
485 family = x->props.family;
486 afinfo = xfrm_state_get_afinfo(family);
487 if (unlikely(afinfo == NULL))
488 return -EAFNOSUPPORT;
490 spin_lock_bh(&xfrm_state_lock);
492 x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
493 if (x1) {
494 xfrm_state_put(x1);
495 x1 = NULL;
496 err = -EEXIST;
497 goto out;
500 if (x->km.seq) {
501 x1 = __xfrm_find_acq_byseq(x->km.seq);
502 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
503 xfrm_state_put(x1);
504 x1 = NULL;
508 if (!x1)
509 x1 = afinfo->find_acq(
510 x->props.mode, x->props.reqid, x->id.proto,
511 &x->id.daddr, &x->props.saddr, 0);
513 __xfrm_state_insert(x);
514 err = 0;
516 out:
517 spin_unlock_bh(&xfrm_state_lock);
518 xfrm_state_put_afinfo(afinfo);
520 if (!err)
521 xfrm_flush_all_bundles();
523 if (x1) {
524 xfrm_state_delete(x1);
525 xfrm_state_put(x1);
528 return err;
530 EXPORT_SYMBOL(xfrm_state_add);
532 int xfrm_state_update(struct xfrm_state *x)
534 struct xfrm_state_afinfo *afinfo;
535 struct xfrm_state *x1;
536 int err;
538 afinfo = xfrm_state_get_afinfo(x->props.family);
539 if (unlikely(afinfo == NULL))
540 return -EAFNOSUPPORT;
542 spin_lock_bh(&xfrm_state_lock);
543 x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
545 err = -ESRCH;
546 if (!x1)
547 goto out;
549 if (xfrm_state_kern(x1)) {
550 xfrm_state_put(x1);
551 err = -EEXIST;
552 goto out;
555 if (x1->km.state == XFRM_STATE_ACQ) {
556 __xfrm_state_insert(x);
557 x = NULL;
559 err = 0;
561 out:
562 spin_unlock_bh(&xfrm_state_lock);
563 xfrm_state_put_afinfo(afinfo);
565 if (err)
566 return err;
568 if (!x) {
569 xfrm_state_delete(x1);
570 xfrm_state_put(x1);
571 return 0;
574 err = -EINVAL;
575 spin_lock_bh(&x1->lock);
576 if (likely(x1->km.state == XFRM_STATE_VALID)) {
577 if (x->encap && x1->encap)
578 memcpy(x1->encap, x->encap, sizeof(*x1->encap));
579 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
580 x1->km.dying = 0;
582 if (!mod_timer(&x1->timer, jiffies + HZ))
583 xfrm_state_hold(x1);
584 if (x1->curlft.use_time)
585 xfrm_state_check_expire(x1);
587 err = 0;
589 spin_unlock_bh(&x1->lock);
591 xfrm_state_put(x1);
593 return err;
595 EXPORT_SYMBOL(xfrm_state_update);
597 int xfrm_state_check_expire(struct xfrm_state *x)
599 if (!x->curlft.use_time)
600 x->curlft.use_time = (unsigned long)xtime.tv_sec;
602 if (x->km.state != XFRM_STATE_VALID)
603 return -EINVAL;
605 if (x->curlft.bytes >= x->lft.hard_byte_limit ||
606 x->curlft.packets >= x->lft.hard_packet_limit) {
607 x->km.state = XFRM_STATE_EXPIRED;
608 if (!mod_timer(&x->timer, jiffies))
609 xfrm_state_hold(x);
610 return -EINVAL;
613 if (!x->km.dying &&
614 (x->curlft.bytes >= x->lft.soft_byte_limit ||
615 x->curlft.packets >= x->lft.soft_packet_limit)) {
616 x->km.dying = 1;
617 km_state_expired(x, 0, 0);
619 return 0;
621 EXPORT_SYMBOL(xfrm_state_check_expire);
623 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
625 int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
626 - skb_headroom(skb);
628 if (nhead > 0)
629 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
631 /* Check tail too... */
632 return 0;
635 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
637 int err = xfrm_state_check_expire(x);
638 if (err < 0)
639 goto err;
640 err = xfrm_state_check_space(x, skb);
641 err:
642 return err;
644 EXPORT_SYMBOL(xfrm_state_check);
646 struct xfrm_state *
647 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
648 unsigned short family)
650 struct xfrm_state *x;
651 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
652 if (!afinfo)
653 return NULL;
655 spin_lock_bh(&xfrm_state_lock);
656 x = afinfo->state_lookup(daddr, spi, proto);
657 spin_unlock_bh(&xfrm_state_lock);
658 xfrm_state_put_afinfo(afinfo);
659 return x;
661 EXPORT_SYMBOL(xfrm_state_lookup);
663 struct xfrm_state *
664 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
665 xfrm_address_t *daddr, xfrm_address_t *saddr,
666 int create, unsigned short family)
668 struct xfrm_state *x;
669 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
670 if (!afinfo)
671 return NULL;
673 spin_lock_bh(&xfrm_state_lock);
674 x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
675 spin_unlock_bh(&xfrm_state_lock);
676 xfrm_state_put_afinfo(afinfo);
677 return x;
679 EXPORT_SYMBOL(xfrm_find_acq);
681 /* Silly enough, but I'm lazy to build resolution list */
683 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
685 int i;
686 struct xfrm_state *x;
688 for (i = 0; i < XFRM_DST_HSIZE; i++) {
689 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
690 if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) {
691 xfrm_state_hold(x);
692 return x;
696 return NULL;
699 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
701 struct xfrm_state *x;
703 spin_lock_bh(&xfrm_state_lock);
704 x = __xfrm_find_acq_byseq(seq);
705 spin_unlock_bh(&xfrm_state_lock);
706 return x;
708 EXPORT_SYMBOL(xfrm_find_acq_byseq);
710 u32 xfrm_get_acqseq(void)
712 u32 res;
713 static u32 acqseq;
714 static DEFINE_SPINLOCK(acqseq_lock);
716 spin_lock_bh(&acqseq_lock);
717 res = (++acqseq ? : ++acqseq);
718 spin_unlock_bh(&acqseq_lock);
719 return res;
721 EXPORT_SYMBOL(xfrm_get_acqseq);
723 void
724 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
726 u32 h;
727 struct xfrm_state *x0;
729 if (x->id.spi)
730 return;
732 if (minspi == maxspi) {
733 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
734 if (x0) {
735 xfrm_state_put(x0);
736 return;
738 x->id.spi = minspi;
739 } else {
740 u32 spi = 0;
741 minspi = ntohl(minspi);
742 maxspi = ntohl(maxspi);
743 for (h=0; h<maxspi-minspi+1; h++) {
744 spi = minspi + net_random()%(maxspi-minspi+1);
745 x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
746 if (x0 == NULL) {
747 x->id.spi = htonl(spi);
748 break;
750 xfrm_state_put(x0);
753 if (x->id.spi) {
754 spin_lock_bh(&xfrm_state_lock);
755 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
756 list_add(&x->byspi, xfrm_state_byspi+h);
757 xfrm_state_hold(x);
758 spin_unlock_bh(&xfrm_state_lock);
759 wake_up(&km_waitq);
762 EXPORT_SYMBOL(xfrm_alloc_spi);
764 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
765 void *data)
767 int i;
768 struct xfrm_state *x;
769 int count = 0;
770 int err = 0;
772 spin_lock_bh(&xfrm_state_lock);
773 for (i = 0; i < XFRM_DST_HSIZE; i++) {
774 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
775 if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
776 count++;
779 if (count == 0) {
780 err = -ENOENT;
781 goto out;
784 for (i = 0; i < XFRM_DST_HSIZE; i++) {
785 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
786 if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
787 continue;
788 err = func(x, --count, data);
789 if (err)
790 goto out;
793 out:
794 spin_unlock_bh(&xfrm_state_lock);
795 return err;
797 EXPORT_SYMBOL(xfrm_state_walk);
800 void xfrm_replay_notify(struct xfrm_state *x, int event)
802 struct km_event c;
803 /* we send notify messages in case
804 * 1. we updated on of the sequence numbers, and the seqno difference
805 * is at least x->replay_maxdiff, in this case we also update the
806 * timeout of our timer function
807 * 2. if x->replay_maxage has elapsed since last update,
808 * and there were changes
810 * The state structure must be locked!
813 switch (event) {
814 case XFRM_REPLAY_UPDATE:
815 if (x->replay_maxdiff &&
816 (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
817 (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
818 if (x->xflags & XFRM_TIME_DEFER)
819 event = XFRM_REPLAY_TIMEOUT;
820 else
821 return;
824 break;
826 case XFRM_REPLAY_TIMEOUT:
827 if ((x->replay.seq == x->preplay.seq) &&
828 (x->replay.bitmap == x->preplay.bitmap) &&
829 (x->replay.oseq == x->preplay.oseq)) {
830 x->xflags |= XFRM_TIME_DEFER;
831 return;
834 break;
837 memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
838 c.event = XFRM_MSG_NEWAE;
839 c.data.aevent = event;
840 km_state_notify(x, &c);
842 if (x->replay_maxage &&
843 !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) {
844 xfrm_state_hold(x);
845 x->xflags &= ~XFRM_TIME_DEFER;
848 EXPORT_SYMBOL(xfrm_replay_notify);
850 static void xfrm_replay_timer_handler(unsigned long data)
852 struct xfrm_state *x = (struct xfrm_state*)data;
854 spin_lock(&x->lock);
856 if (x->km.state == XFRM_STATE_VALID) {
857 if (xfrm_aevent_is_on())
858 xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
859 else
860 x->xflags |= XFRM_TIME_DEFER;
863 spin_unlock(&x->lock);
864 xfrm_state_put(x);
867 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
869 u32 diff;
871 seq = ntohl(seq);
873 if (unlikely(seq == 0))
874 return -EINVAL;
876 if (likely(seq > x->replay.seq))
877 return 0;
879 diff = x->replay.seq - seq;
880 if (diff >= x->props.replay_window) {
881 x->stats.replay_window++;
882 return -EINVAL;
885 if (x->replay.bitmap & (1U << diff)) {
886 x->stats.replay++;
887 return -EINVAL;
889 return 0;
891 EXPORT_SYMBOL(xfrm_replay_check);
893 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
895 u32 diff;
897 seq = ntohl(seq);
899 if (seq > x->replay.seq) {
900 diff = seq - x->replay.seq;
901 if (diff < x->props.replay_window)
902 x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
903 else
904 x->replay.bitmap = 1;
905 x->replay.seq = seq;
906 } else {
907 diff = x->replay.seq - seq;
908 x->replay.bitmap |= (1U << diff);
911 if (xfrm_aevent_is_on())
912 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
914 EXPORT_SYMBOL(xfrm_replay_advance);
916 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
917 static DEFINE_RWLOCK(xfrm_km_lock);
919 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
921 struct xfrm_mgr *km;
923 read_lock(&xfrm_km_lock);
924 list_for_each_entry(km, &xfrm_km_list, list)
925 if (km->notify_policy)
926 km->notify_policy(xp, dir, c);
927 read_unlock(&xfrm_km_lock);
930 void km_state_notify(struct xfrm_state *x, struct km_event *c)
932 struct xfrm_mgr *km;
933 read_lock(&xfrm_km_lock);
934 list_for_each_entry(km, &xfrm_km_list, list)
935 if (km->notify)
936 km->notify(x, c);
937 read_unlock(&xfrm_km_lock);
940 EXPORT_SYMBOL(km_policy_notify);
941 EXPORT_SYMBOL(km_state_notify);
943 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
945 struct km_event c;
947 c.data.hard = hard;
948 c.pid = pid;
949 c.event = XFRM_MSG_EXPIRE;
950 km_state_notify(x, &c);
952 if (hard)
953 wake_up(&km_waitq);
956 EXPORT_SYMBOL(km_state_expired);
958 * We send to all registered managers regardless of failure
959 * We are happy with one success
961 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
963 int err = -EINVAL, acqret;
964 struct xfrm_mgr *km;
966 read_lock(&xfrm_km_lock);
967 list_for_each_entry(km, &xfrm_km_list, list) {
968 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
969 if (!acqret)
970 err = acqret;
972 read_unlock(&xfrm_km_lock);
973 return err;
975 EXPORT_SYMBOL(km_query);
977 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
979 int err = -EINVAL;
980 struct xfrm_mgr *km;
982 read_lock(&xfrm_km_lock);
983 list_for_each_entry(km, &xfrm_km_list, list) {
984 if (km->new_mapping)
985 err = km->new_mapping(x, ipaddr, sport);
986 if (!err)
987 break;
989 read_unlock(&xfrm_km_lock);
990 return err;
992 EXPORT_SYMBOL(km_new_mapping);
994 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
996 struct km_event c;
998 c.data.hard = hard;
999 c.pid = pid;
1000 c.event = XFRM_MSG_POLEXPIRE;
1001 km_policy_notify(pol, dir, &c);
1003 if (hard)
1004 wake_up(&km_waitq);
1006 EXPORT_SYMBOL(km_policy_expired);
1008 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1010 int err;
1011 u8 *data;
1012 struct xfrm_mgr *km;
1013 struct xfrm_policy *pol = NULL;
1015 if (optlen <= 0 || optlen > PAGE_SIZE)
1016 return -EMSGSIZE;
1018 data = kmalloc(optlen, GFP_KERNEL);
1019 if (!data)
1020 return -ENOMEM;
1022 err = -EFAULT;
1023 if (copy_from_user(data, optval, optlen))
1024 goto out;
1026 err = -EINVAL;
1027 read_lock(&xfrm_km_lock);
1028 list_for_each_entry(km, &xfrm_km_list, list) {
1029 pol = km->compile_policy(sk->sk_family, optname, data,
1030 optlen, &err);
1031 if (err >= 0)
1032 break;
1034 read_unlock(&xfrm_km_lock);
1036 if (err >= 0) {
1037 xfrm_sk_policy_insert(sk, err, pol);
1038 xfrm_pol_put(pol);
1039 err = 0;
1042 out:
1043 kfree(data);
1044 return err;
1046 EXPORT_SYMBOL(xfrm_user_policy);
1048 int xfrm_register_km(struct xfrm_mgr *km)
1050 write_lock_bh(&xfrm_km_lock);
1051 list_add_tail(&km->list, &xfrm_km_list);
1052 write_unlock_bh(&xfrm_km_lock);
1053 return 0;
1055 EXPORT_SYMBOL(xfrm_register_km);
1057 int xfrm_unregister_km(struct xfrm_mgr *km)
1059 write_lock_bh(&xfrm_km_lock);
1060 list_del(&km->list);
1061 write_unlock_bh(&xfrm_km_lock);
1062 return 0;
1064 EXPORT_SYMBOL(xfrm_unregister_km);
1066 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1068 int err = 0;
1069 if (unlikely(afinfo == NULL))
1070 return -EINVAL;
1071 if (unlikely(afinfo->family >= NPROTO))
1072 return -EAFNOSUPPORT;
1073 write_lock_bh(&xfrm_state_afinfo_lock);
1074 if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1075 err = -ENOBUFS;
1076 else {
1077 afinfo->state_bydst = xfrm_state_bydst;
1078 afinfo->state_byspi = xfrm_state_byspi;
1079 xfrm_state_afinfo[afinfo->family] = afinfo;
1081 write_unlock_bh(&xfrm_state_afinfo_lock);
1082 return err;
1084 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1086 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1088 int err = 0;
1089 if (unlikely(afinfo == NULL))
1090 return -EINVAL;
1091 if (unlikely(afinfo->family >= NPROTO))
1092 return -EAFNOSUPPORT;
1093 write_lock_bh(&xfrm_state_afinfo_lock);
1094 if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1095 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1096 err = -EINVAL;
1097 else {
1098 xfrm_state_afinfo[afinfo->family] = NULL;
1099 afinfo->state_byspi = NULL;
1100 afinfo->state_bydst = NULL;
1103 write_unlock_bh(&xfrm_state_afinfo_lock);
1104 return err;
1106 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1108 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1110 struct xfrm_state_afinfo *afinfo;
1111 if (unlikely(family >= NPROTO))
1112 return NULL;
1113 read_lock(&xfrm_state_afinfo_lock);
1114 afinfo = xfrm_state_afinfo[family];
1115 if (unlikely(!afinfo))
1116 read_unlock(&xfrm_state_afinfo_lock);
1117 return afinfo;
1120 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1122 read_unlock(&xfrm_state_afinfo_lock);
1125 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1126 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1128 if (x->tunnel) {
1129 struct xfrm_state *t = x->tunnel;
1131 if (atomic_read(&t->tunnel_users) == 2)
1132 xfrm_state_delete(t);
1133 atomic_dec(&t->tunnel_users);
1134 xfrm_state_put(t);
1135 x->tunnel = NULL;
1138 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1141 * This function is NOT optimal. For example, with ESP it will give an
1142 * MTU that's usually two bytes short of being optimal. However, it will
1143 * usually give an answer that's a multiple of 4 provided the input is
1144 * also a multiple of 4.
1146 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1148 int res = mtu;
1150 res -= x->props.header_len;
1152 for (;;) {
1153 int m = res;
1155 if (m < 68)
1156 return 68;
1158 spin_lock_bh(&x->lock);
1159 if (x->km.state == XFRM_STATE_VALID &&
1160 x->type && x->type->get_max_size)
1161 m = x->type->get_max_size(x, m);
1162 else
1163 m += x->props.header_len;
1164 spin_unlock_bh(&x->lock);
1166 if (m <= mtu)
1167 break;
1168 res -= (m - mtu);
1171 return res;
1174 int xfrm_init_state(struct xfrm_state *x)
1176 struct xfrm_state_afinfo *afinfo;
1177 int family = x->props.family;
1178 int err;
1180 err = -EAFNOSUPPORT;
1181 afinfo = xfrm_state_get_afinfo(family);
1182 if (!afinfo)
1183 goto error;
1185 err = 0;
1186 if (afinfo->init_flags)
1187 err = afinfo->init_flags(x);
1189 xfrm_state_put_afinfo(afinfo);
1191 if (err)
1192 goto error;
1194 err = -EPROTONOSUPPORT;
1195 x->type = xfrm_get_type(x->id.proto, family);
1196 if (x->type == NULL)
1197 goto error;
1199 err = x->type->init_state(x);
1200 if (err)
1201 goto error;
1203 x->mode = xfrm_get_mode(x->props.mode, family);
1204 if (x->mode == NULL)
1205 goto error;
1207 x->km.state = XFRM_STATE_VALID;
1209 error:
1210 return err;
1213 EXPORT_SYMBOL(xfrm_init_state);
1215 void __init xfrm_state_init(void)
1217 int i;
1219 for (i=0; i<XFRM_DST_HSIZE; i++) {
1220 INIT_LIST_HEAD(&xfrm_state_bydst[i]);
1221 INIT_LIST_HEAD(&xfrm_state_byspi[i]);
1223 INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);