[XFRM]: Pull xfrm_state_bydst hash table knowledge out of afinfo.
[linux-2.6/linux-loongson.git] / net / xfrm / xfrm_state.c
blob80f5f9dc2b9e17bafe98219c2daa8ae106243584
1 /*
2 * xfrm_state.c
4 * Changes:
5 * Mitsuru KANDA @USAGI
6 * Kazunori MIYAZAWA @USAGI
7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8 * IPv6 support
9 * YOSHIFUJI Hideaki @USAGI
10 * Split up af-specific functions
11 * Derek Atkins <derek@ihtfp.com>
12 * Add UDP Encapsulation
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <asm/uaccess.h>
23 struct sock *xfrm_nl;
24 EXPORT_SYMBOL(xfrm_nl);
26 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
27 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
29 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
32 /* Each xfrm_state may be linked to two tables:
34 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
35 2. Hash table by daddr to find what SAs exist for given
36 destination/tunnel endpoint. (output)
39 static DEFINE_SPINLOCK(xfrm_state_lock);
41 /* Hash table to find appropriate SA towards given target (endpoint
42 * of tunnel or destination of transport mode) allowed by selector.
44 * Main use is finding SA after policy selected tunnel or transport mode.
45 * Also, it can be used by ah/esp icmp error handler to find offending SA.
47 static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
48 static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE];
49 static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
51 static __inline__
52 unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family)
54 switch (family) {
55 case AF_INET:
56 return __xfrm4_dst_hash(addr);
57 case AF_INET6:
58 return __xfrm6_dst_hash(addr);
60 return 0;
63 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
64 EXPORT_SYMBOL(km_waitq);
66 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
67 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
69 static struct work_struct xfrm_state_gc_work;
70 static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
71 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
73 static int xfrm_state_gc_flush_bundles;
75 int __xfrm_state_delete(struct xfrm_state *x);
77 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
78 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
80 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
81 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
83 static void xfrm_state_gc_destroy(struct xfrm_state *x)
85 if (del_timer(&x->timer))
86 BUG();
87 if (del_timer(&x->rtimer))
88 BUG();
89 kfree(x->aalg);
90 kfree(x->ealg);
91 kfree(x->calg);
92 kfree(x->encap);
93 kfree(x->coaddr);
94 if (x->mode)
95 xfrm_put_mode(x->mode);
96 if (x->type) {
97 x->type->destructor(x);
98 xfrm_put_type(x->type);
100 security_xfrm_state_free(x);
101 kfree(x);
104 static void xfrm_state_gc_task(void *data)
106 struct xfrm_state *x;
107 struct list_head *entry, *tmp;
108 struct list_head gc_list = LIST_HEAD_INIT(gc_list);
110 if (xfrm_state_gc_flush_bundles) {
111 xfrm_state_gc_flush_bundles = 0;
112 xfrm_flush_bundles();
115 spin_lock_bh(&xfrm_state_gc_lock);
116 list_splice_init(&xfrm_state_gc_list, &gc_list);
117 spin_unlock_bh(&xfrm_state_gc_lock);
119 list_for_each_safe(entry, tmp, &gc_list) {
120 x = list_entry(entry, struct xfrm_state, bydst);
121 xfrm_state_gc_destroy(x);
123 wake_up(&km_waitq);
126 static inline unsigned long make_jiffies(long secs)
128 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
129 return MAX_SCHEDULE_TIMEOUT-1;
130 else
131 return secs*HZ;
134 static void xfrm_timer_handler(unsigned long data)
136 struct xfrm_state *x = (struct xfrm_state*)data;
137 unsigned long now = (unsigned long)xtime.tv_sec;
138 long next = LONG_MAX;
139 int warn = 0;
141 spin_lock(&x->lock);
142 if (x->km.state == XFRM_STATE_DEAD)
143 goto out;
144 if (x->km.state == XFRM_STATE_EXPIRED)
145 goto expired;
146 if (x->lft.hard_add_expires_seconds) {
147 long tmo = x->lft.hard_add_expires_seconds +
148 x->curlft.add_time - now;
149 if (tmo <= 0)
150 goto expired;
151 if (tmo < next)
152 next = tmo;
154 if (x->lft.hard_use_expires_seconds) {
155 long tmo = x->lft.hard_use_expires_seconds +
156 (x->curlft.use_time ? : now) - now;
157 if (tmo <= 0)
158 goto expired;
159 if (tmo < next)
160 next = tmo;
162 if (x->km.dying)
163 goto resched;
164 if (x->lft.soft_add_expires_seconds) {
165 long tmo = x->lft.soft_add_expires_seconds +
166 x->curlft.add_time - now;
167 if (tmo <= 0)
168 warn = 1;
169 else if (tmo < next)
170 next = tmo;
172 if (x->lft.soft_use_expires_seconds) {
173 long tmo = x->lft.soft_use_expires_seconds +
174 (x->curlft.use_time ? : now) - now;
175 if (tmo <= 0)
176 warn = 1;
177 else if (tmo < next)
178 next = tmo;
181 x->km.dying = warn;
182 if (warn)
183 km_state_expired(x, 0, 0);
184 resched:
185 if (next != LONG_MAX &&
186 !mod_timer(&x->timer, jiffies + make_jiffies(next)))
187 xfrm_state_hold(x);
188 goto out;
190 expired:
191 if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
192 x->km.state = XFRM_STATE_EXPIRED;
193 wake_up(&km_waitq);
194 next = 2;
195 goto resched;
197 if (!__xfrm_state_delete(x) && x->id.spi)
198 km_state_expired(x, 1, 0);
200 out:
201 spin_unlock(&x->lock);
202 xfrm_state_put(x);
205 static void xfrm_replay_timer_handler(unsigned long data);
207 struct xfrm_state *xfrm_state_alloc(void)
209 struct xfrm_state *x;
211 x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
213 if (x) {
214 atomic_set(&x->refcnt, 1);
215 atomic_set(&x->tunnel_users, 0);
216 INIT_LIST_HEAD(&x->bydst);
217 INIT_LIST_HEAD(&x->bysrc);
218 INIT_LIST_HEAD(&x->byspi);
219 init_timer(&x->timer);
220 x->timer.function = xfrm_timer_handler;
221 x->timer.data = (unsigned long)x;
222 init_timer(&x->rtimer);
223 x->rtimer.function = xfrm_replay_timer_handler;
224 x->rtimer.data = (unsigned long)x;
225 x->curlft.add_time = (unsigned long)xtime.tv_sec;
226 x->lft.soft_byte_limit = XFRM_INF;
227 x->lft.soft_packet_limit = XFRM_INF;
228 x->lft.hard_byte_limit = XFRM_INF;
229 x->lft.hard_packet_limit = XFRM_INF;
230 x->replay_maxage = 0;
231 x->replay_maxdiff = 0;
232 spin_lock_init(&x->lock);
234 return x;
236 EXPORT_SYMBOL(xfrm_state_alloc);
238 void __xfrm_state_destroy(struct xfrm_state *x)
240 BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
242 spin_lock_bh(&xfrm_state_gc_lock);
243 list_add(&x->bydst, &xfrm_state_gc_list);
244 spin_unlock_bh(&xfrm_state_gc_lock);
245 schedule_work(&xfrm_state_gc_work);
247 EXPORT_SYMBOL(__xfrm_state_destroy);
249 int __xfrm_state_delete(struct xfrm_state *x)
251 int err = -ESRCH;
253 if (x->km.state != XFRM_STATE_DEAD) {
254 x->km.state = XFRM_STATE_DEAD;
255 spin_lock(&xfrm_state_lock);
256 list_del(&x->bydst);
257 __xfrm_state_put(x);
258 list_del(&x->bysrc);
259 __xfrm_state_put(x);
260 if (x->id.spi) {
261 list_del(&x->byspi);
262 __xfrm_state_put(x);
264 spin_unlock(&xfrm_state_lock);
265 if (del_timer(&x->timer))
266 __xfrm_state_put(x);
267 if (del_timer(&x->rtimer))
268 __xfrm_state_put(x);
270 /* The number two in this test is the reference
271 * mentioned in the comment below plus the reference
272 * our caller holds. A larger value means that
273 * there are DSTs attached to this xfrm_state.
275 if (atomic_read(&x->refcnt) > 2) {
276 xfrm_state_gc_flush_bundles = 1;
277 schedule_work(&xfrm_state_gc_work);
280 /* All xfrm_state objects are created by xfrm_state_alloc.
281 * The xfrm_state_alloc call gives a reference, and that
282 * is what we are dropping here.
284 __xfrm_state_put(x);
285 err = 0;
288 return err;
290 EXPORT_SYMBOL(__xfrm_state_delete);
292 int xfrm_state_delete(struct xfrm_state *x)
294 int err;
296 spin_lock_bh(&x->lock);
297 err = __xfrm_state_delete(x);
298 spin_unlock_bh(&x->lock);
300 return err;
302 EXPORT_SYMBOL(xfrm_state_delete);
304 void xfrm_state_flush(u8 proto)
306 int i;
307 struct xfrm_state *x;
309 spin_lock_bh(&xfrm_state_lock);
310 for (i = 0; i < XFRM_DST_HSIZE; i++) {
311 restart:
312 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
313 if (!xfrm_state_kern(x) &&
314 xfrm_id_proto_match(x->id.proto, proto)) {
315 xfrm_state_hold(x);
316 spin_unlock_bh(&xfrm_state_lock);
318 xfrm_state_delete(x);
319 xfrm_state_put(x);
321 spin_lock_bh(&xfrm_state_lock);
322 goto restart;
326 spin_unlock_bh(&xfrm_state_lock);
327 wake_up(&km_waitq);
329 EXPORT_SYMBOL(xfrm_state_flush);
331 static int
332 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
333 struct xfrm_tmpl *tmpl,
334 xfrm_address_t *daddr, xfrm_address_t *saddr,
335 unsigned short family)
337 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
338 if (!afinfo)
339 return -1;
340 afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
341 xfrm_state_put_afinfo(afinfo);
342 return 0;
345 struct xfrm_state *
346 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
347 struct flowi *fl, struct xfrm_tmpl *tmpl,
348 struct xfrm_policy *pol, int *err,
349 unsigned short family)
351 unsigned h = xfrm_dst_hash(daddr, family);
352 struct xfrm_state *x, *x0;
353 int acquire_in_progress = 0;
354 int error = 0;
355 struct xfrm_state *best = NULL;
356 struct xfrm_state_afinfo *afinfo;
358 afinfo = xfrm_state_get_afinfo(family);
359 if (afinfo == NULL) {
360 *err = -EAFNOSUPPORT;
361 return NULL;
364 spin_lock_bh(&xfrm_state_lock);
365 list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
366 if (x->props.family == family &&
367 x->props.reqid == tmpl->reqid &&
368 !(x->props.flags & XFRM_STATE_WILDRECV) &&
369 xfrm_state_addr_check(x, daddr, saddr, family) &&
370 tmpl->mode == x->props.mode &&
371 tmpl->id.proto == x->id.proto &&
372 (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
373 /* Resolution logic:
374 1. There is a valid state with matching selector.
375 Done.
376 2. Valid state with inappropriate selector. Skip.
378 Entering area of "sysdeps".
380 3. If state is not valid, selector is temporary,
381 it selects only session which triggered
382 previous resolution. Key manager will do
383 something to install a state with proper
384 selector.
386 if (x->km.state == XFRM_STATE_VALID) {
387 if (!xfrm_selector_match(&x->sel, fl, family) ||
388 !security_xfrm_state_pol_flow_match(x, pol, fl))
389 continue;
390 if (!best ||
391 best->km.dying > x->km.dying ||
392 (best->km.dying == x->km.dying &&
393 best->curlft.add_time < x->curlft.add_time))
394 best = x;
395 } else if (x->km.state == XFRM_STATE_ACQ) {
396 acquire_in_progress = 1;
397 } else if (x->km.state == XFRM_STATE_ERROR ||
398 x->km.state == XFRM_STATE_EXPIRED) {
399 if (xfrm_selector_match(&x->sel, fl, family) &&
400 security_xfrm_state_pol_flow_match(x, pol, fl))
401 error = -ESRCH;
406 x = best;
407 if (!x && !error && !acquire_in_progress) {
408 if (tmpl->id.spi &&
409 (x0 = afinfo->state_lookup(daddr, tmpl->id.spi,
410 tmpl->id.proto)) != NULL) {
411 xfrm_state_put(x0);
412 error = -EEXIST;
413 goto out;
415 x = xfrm_state_alloc();
416 if (x == NULL) {
417 error = -ENOMEM;
418 goto out;
420 /* Initialize temporary selector matching only
421 * to current session. */
422 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
424 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
425 if (error) {
426 x->km.state = XFRM_STATE_DEAD;
427 xfrm_state_put(x);
428 x = NULL;
429 goto out;
432 if (km_query(x, tmpl, pol) == 0) {
433 x->km.state = XFRM_STATE_ACQ;
434 list_add_tail(&x->bydst, xfrm_state_bydst+h);
435 xfrm_state_hold(x);
436 list_add_tail(&x->bysrc, xfrm_state_bysrc+h);
437 xfrm_state_hold(x);
438 if (x->id.spi) {
439 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
440 list_add(&x->byspi, xfrm_state_byspi+h);
441 xfrm_state_hold(x);
443 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
444 xfrm_state_hold(x);
445 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
446 add_timer(&x->timer);
447 } else {
448 x->km.state = XFRM_STATE_DEAD;
449 xfrm_state_put(x);
450 x = NULL;
451 error = -ESRCH;
454 out:
455 if (x)
456 xfrm_state_hold(x);
457 else
458 *err = acquire_in_progress ? -EAGAIN : error;
459 spin_unlock_bh(&xfrm_state_lock);
460 xfrm_state_put_afinfo(afinfo);
461 return x;
464 static void __xfrm_state_insert(struct xfrm_state *x)
466 unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
468 list_add(&x->bydst, xfrm_state_bydst+h);
469 xfrm_state_hold(x);
471 h = xfrm_src_hash(&x->props.saddr, x->props.family);
473 list_add(&x->bysrc, xfrm_state_bysrc+h);
474 xfrm_state_hold(x);
476 if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
477 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
478 x->props.family);
480 list_add(&x->byspi, xfrm_state_byspi+h);
481 xfrm_state_hold(x);
484 if (!mod_timer(&x->timer, jiffies + HZ))
485 xfrm_state_hold(x);
487 if (x->replay_maxage &&
488 !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
489 xfrm_state_hold(x);
491 wake_up(&km_waitq);
494 void xfrm_state_insert(struct xfrm_state *x)
496 spin_lock_bh(&xfrm_state_lock);
497 __xfrm_state_insert(x);
498 spin_unlock_bh(&xfrm_state_lock);
500 xfrm_flush_all_bundles();
502 EXPORT_SYMBOL(xfrm_state_insert);
504 /* xfrm_state_lock is held */
505 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
507 unsigned int h = xfrm_dst_hash(daddr, family);
508 struct xfrm_state *x;
510 list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
511 if (x->props.reqid != reqid ||
512 x->props.mode != mode ||
513 x->props.family != family ||
514 x->km.state != XFRM_STATE_ACQ ||
515 x->id.spi != 0)
516 continue;
518 switch (family) {
519 case AF_INET:
520 if (x->id.daddr.a4 != daddr->a4 ||
521 x->props.saddr.a4 != saddr->a4)
522 continue;
523 break;
524 case AF_INET6:
525 if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
526 (struct in6_addr *)daddr) ||
527 !ipv6_addr_equal((struct in6_addr *)
528 x->props.saddr.a6,
529 (struct in6_addr *)saddr))
530 continue;
531 break;
534 xfrm_state_hold(x);
535 return x;
538 if (!create)
539 return NULL;
541 x = xfrm_state_alloc();
542 if (likely(x)) {
543 switch (family) {
544 case AF_INET:
545 x->sel.daddr.a4 = daddr->a4;
546 x->sel.saddr.a4 = saddr->a4;
547 x->sel.prefixlen_d = 32;
548 x->sel.prefixlen_s = 32;
549 x->props.saddr.a4 = saddr->a4;
550 x->id.daddr.a4 = daddr->a4;
551 break;
553 case AF_INET6:
554 ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
555 (struct in6_addr *)daddr);
556 ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
557 (struct in6_addr *)saddr);
558 x->sel.prefixlen_d = 128;
559 x->sel.prefixlen_s = 128;
560 ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
561 (struct in6_addr *)saddr);
562 ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
563 (struct in6_addr *)daddr);
564 break;
567 x->km.state = XFRM_STATE_ACQ;
568 x->id.proto = proto;
569 x->props.family = family;
570 x->props.mode = mode;
571 x->props.reqid = reqid;
572 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
573 xfrm_state_hold(x);
574 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
575 add_timer(&x->timer);
576 xfrm_state_hold(x);
577 list_add_tail(&x->bydst, xfrm_state_bydst+h);
578 h = xfrm_src_hash(saddr, family);
579 xfrm_state_hold(x);
580 list_add_tail(&x->bysrc, xfrm_state_bysrc+h);
581 wake_up(&km_waitq);
584 return x;
587 static inline struct xfrm_state *
588 __xfrm_state_locate(struct xfrm_state_afinfo *afinfo, struct xfrm_state *x,
589 int use_spi)
591 if (use_spi)
592 return afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
593 else
594 return afinfo->state_lookup_byaddr(&x->id.daddr, &x->props.saddr, x->id.proto);
597 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
599 int xfrm_state_add(struct xfrm_state *x)
601 struct xfrm_state_afinfo *afinfo;
602 struct xfrm_state *x1;
603 int family;
604 int err;
605 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
607 family = x->props.family;
608 afinfo = xfrm_state_get_afinfo(family);
609 if (unlikely(afinfo == NULL))
610 return -EAFNOSUPPORT;
612 spin_lock_bh(&xfrm_state_lock);
614 x1 = __xfrm_state_locate(afinfo, x, use_spi);
615 if (x1) {
616 xfrm_state_put(x1);
617 x1 = NULL;
618 err = -EEXIST;
619 goto out;
622 if (use_spi && x->km.seq) {
623 x1 = __xfrm_find_acq_byseq(x->km.seq);
624 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
625 xfrm_state_put(x1);
626 x1 = NULL;
630 if (use_spi && !x1)
631 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
632 x->id.proto,
633 &x->id.daddr, &x->props.saddr, 0);
635 __xfrm_state_insert(x);
636 err = 0;
638 out:
639 spin_unlock_bh(&xfrm_state_lock);
640 xfrm_state_put_afinfo(afinfo);
642 if (!err)
643 xfrm_flush_all_bundles();
645 if (x1) {
646 xfrm_state_delete(x1);
647 xfrm_state_put(x1);
650 return err;
652 EXPORT_SYMBOL(xfrm_state_add);
654 int xfrm_state_update(struct xfrm_state *x)
656 struct xfrm_state_afinfo *afinfo;
657 struct xfrm_state *x1;
658 int err;
659 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
661 afinfo = xfrm_state_get_afinfo(x->props.family);
662 if (unlikely(afinfo == NULL))
663 return -EAFNOSUPPORT;
665 spin_lock_bh(&xfrm_state_lock);
666 x1 = __xfrm_state_locate(afinfo, x, use_spi);
668 err = -ESRCH;
669 if (!x1)
670 goto out;
672 if (xfrm_state_kern(x1)) {
673 xfrm_state_put(x1);
674 err = -EEXIST;
675 goto out;
678 if (x1->km.state == XFRM_STATE_ACQ) {
679 __xfrm_state_insert(x);
680 x = NULL;
682 err = 0;
684 out:
685 spin_unlock_bh(&xfrm_state_lock);
686 xfrm_state_put_afinfo(afinfo);
688 if (err)
689 return err;
691 if (!x) {
692 xfrm_state_delete(x1);
693 xfrm_state_put(x1);
694 return 0;
697 err = -EINVAL;
698 spin_lock_bh(&x1->lock);
699 if (likely(x1->km.state == XFRM_STATE_VALID)) {
700 if (x->encap && x1->encap)
701 memcpy(x1->encap, x->encap, sizeof(*x1->encap));
702 if (x->coaddr && x1->coaddr) {
703 memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
705 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
706 memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
707 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
708 x1->km.dying = 0;
710 if (!mod_timer(&x1->timer, jiffies + HZ))
711 xfrm_state_hold(x1);
712 if (x1->curlft.use_time)
713 xfrm_state_check_expire(x1);
715 err = 0;
717 spin_unlock_bh(&x1->lock);
719 xfrm_state_put(x1);
721 return err;
723 EXPORT_SYMBOL(xfrm_state_update);
725 int xfrm_state_check_expire(struct xfrm_state *x)
727 if (!x->curlft.use_time)
728 x->curlft.use_time = (unsigned long)xtime.tv_sec;
730 if (x->km.state != XFRM_STATE_VALID)
731 return -EINVAL;
733 if (x->curlft.bytes >= x->lft.hard_byte_limit ||
734 x->curlft.packets >= x->lft.hard_packet_limit) {
735 x->km.state = XFRM_STATE_EXPIRED;
736 if (!mod_timer(&x->timer, jiffies))
737 xfrm_state_hold(x);
738 return -EINVAL;
741 if (!x->km.dying &&
742 (x->curlft.bytes >= x->lft.soft_byte_limit ||
743 x->curlft.packets >= x->lft.soft_packet_limit)) {
744 x->km.dying = 1;
745 km_state_expired(x, 0, 0);
747 return 0;
749 EXPORT_SYMBOL(xfrm_state_check_expire);
751 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
753 int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
754 - skb_headroom(skb);
756 if (nhead > 0)
757 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
759 /* Check tail too... */
760 return 0;
763 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
765 int err = xfrm_state_check_expire(x);
766 if (err < 0)
767 goto err;
768 err = xfrm_state_check_space(x, skb);
769 err:
770 return err;
772 EXPORT_SYMBOL(xfrm_state_check);
774 struct xfrm_state *
775 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
776 unsigned short family)
778 struct xfrm_state *x;
779 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
780 if (!afinfo)
781 return NULL;
783 spin_lock_bh(&xfrm_state_lock);
784 x = afinfo->state_lookup(daddr, spi, proto);
785 spin_unlock_bh(&xfrm_state_lock);
786 xfrm_state_put_afinfo(afinfo);
787 return x;
789 EXPORT_SYMBOL(xfrm_state_lookup);
791 struct xfrm_state *
792 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
793 u8 proto, unsigned short family)
795 struct xfrm_state *x;
796 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
797 if (!afinfo)
798 return NULL;
800 spin_lock_bh(&xfrm_state_lock);
801 x = afinfo->state_lookup_byaddr(daddr, saddr, proto);
802 spin_unlock_bh(&xfrm_state_lock);
803 xfrm_state_put_afinfo(afinfo);
804 return x;
806 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
808 struct xfrm_state *
809 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
810 xfrm_address_t *daddr, xfrm_address_t *saddr,
811 int create, unsigned short family)
813 struct xfrm_state *x;
815 spin_lock_bh(&xfrm_state_lock);
816 x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
817 spin_unlock_bh(&xfrm_state_lock);
819 return x;
821 EXPORT_SYMBOL(xfrm_find_acq);
823 #ifdef CONFIG_XFRM_SUB_POLICY
825 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
826 unsigned short family)
828 int err = 0;
829 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
830 if (!afinfo)
831 return -EAFNOSUPPORT;
833 spin_lock_bh(&xfrm_state_lock);
834 if (afinfo->tmpl_sort)
835 err = afinfo->tmpl_sort(dst, src, n);
836 spin_unlock_bh(&xfrm_state_lock);
837 xfrm_state_put_afinfo(afinfo);
838 return err;
840 EXPORT_SYMBOL(xfrm_tmpl_sort);
843 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
844 unsigned short family)
846 int err = 0;
847 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
848 if (!afinfo)
849 return -EAFNOSUPPORT;
851 spin_lock_bh(&xfrm_state_lock);
852 if (afinfo->state_sort)
853 err = afinfo->state_sort(dst, src, n);
854 spin_unlock_bh(&xfrm_state_lock);
855 xfrm_state_put_afinfo(afinfo);
856 return err;
858 EXPORT_SYMBOL(xfrm_state_sort);
859 #endif
861 /* Silly enough, but I'm lazy to build resolution list */
863 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
865 int i;
866 struct xfrm_state *x;
868 for (i = 0; i < XFRM_DST_HSIZE; i++) {
869 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
870 if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) {
871 xfrm_state_hold(x);
872 return x;
876 return NULL;
879 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
881 struct xfrm_state *x;
883 spin_lock_bh(&xfrm_state_lock);
884 x = __xfrm_find_acq_byseq(seq);
885 spin_unlock_bh(&xfrm_state_lock);
886 return x;
888 EXPORT_SYMBOL(xfrm_find_acq_byseq);
890 u32 xfrm_get_acqseq(void)
892 u32 res;
893 static u32 acqseq;
894 static DEFINE_SPINLOCK(acqseq_lock);
896 spin_lock_bh(&acqseq_lock);
897 res = (++acqseq ? : ++acqseq);
898 spin_unlock_bh(&acqseq_lock);
899 return res;
901 EXPORT_SYMBOL(xfrm_get_acqseq);
903 void
904 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
906 u32 h;
907 struct xfrm_state *x0;
909 if (x->id.spi)
910 return;
912 if (minspi == maxspi) {
913 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
914 if (x0) {
915 xfrm_state_put(x0);
916 return;
918 x->id.spi = minspi;
919 } else {
920 u32 spi = 0;
921 minspi = ntohl(minspi);
922 maxspi = ntohl(maxspi);
923 for (h=0; h<maxspi-minspi+1; h++) {
924 spi = minspi + net_random()%(maxspi-minspi+1);
925 x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
926 if (x0 == NULL) {
927 x->id.spi = htonl(spi);
928 break;
930 xfrm_state_put(x0);
933 if (x->id.spi) {
934 spin_lock_bh(&xfrm_state_lock);
935 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
936 list_add(&x->byspi, xfrm_state_byspi+h);
937 xfrm_state_hold(x);
938 spin_unlock_bh(&xfrm_state_lock);
939 wake_up(&km_waitq);
942 EXPORT_SYMBOL(xfrm_alloc_spi);
944 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
945 void *data)
947 int i;
948 struct xfrm_state *x;
949 int count = 0;
950 int err = 0;
952 spin_lock_bh(&xfrm_state_lock);
953 for (i = 0; i < XFRM_DST_HSIZE; i++) {
954 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
955 if (xfrm_id_proto_match(x->id.proto, proto))
956 count++;
959 if (count == 0) {
960 err = -ENOENT;
961 goto out;
964 for (i = 0; i < XFRM_DST_HSIZE; i++) {
965 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
966 if (!xfrm_id_proto_match(x->id.proto, proto))
967 continue;
968 err = func(x, --count, data);
969 if (err)
970 goto out;
973 out:
974 spin_unlock_bh(&xfrm_state_lock);
975 return err;
977 EXPORT_SYMBOL(xfrm_state_walk);
980 void xfrm_replay_notify(struct xfrm_state *x, int event)
982 struct km_event c;
983 /* we send notify messages in case
984 * 1. we updated on of the sequence numbers, and the seqno difference
985 * is at least x->replay_maxdiff, in this case we also update the
986 * timeout of our timer function
987 * 2. if x->replay_maxage has elapsed since last update,
988 * and there were changes
990 * The state structure must be locked!
993 switch (event) {
994 case XFRM_REPLAY_UPDATE:
995 if (x->replay_maxdiff &&
996 (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
997 (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
998 if (x->xflags & XFRM_TIME_DEFER)
999 event = XFRM_REPLAY_TIMEOUT;
1000 else
1001 return;
1004 break;
1006 case XFRM_REPLAY_TIMEOUT:
1007 if ((x->replay.seq == x->preplay.seq) &&
1008 (x->replay.bitmap == x->preplay.bitmap) &&
1009 (x->replay.oseq == x->preplay.oseq)) {
1010 x->xflags |= XFRM_TIME_DEFER;
1011 return;
1014 break;
1017 memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1018 c.event = XFRM_MSG_NEWAE;
1019 c.data.aevent = event;
1020 km_state_notify(x, &c);
1022 if (x->replay_maxage &&
1023 !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) {
1024 xfrm_state_hold(x);
1025 x->xflags &= ~XFRM_TIME_DEFER;
1028 EXPORT_SYMBOL(xfrm_replay_notify);
1030 static void xfrm_replay_timer_handler(unsigned long data)
1032 struct xfrm_state *x = (struct xfrm_state*)data;
1034 spin_lock(&x->lock);
1036 if (x->km.state == XFRM_STATE_VALID) {
1037 if (xfrm_aevent_is_on())
1038 xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1039 else
1040 x->xflags |= XFRM_TIME_DEFER;
1043 spin_unlock(&x->lock);
1044 xfrm_state_put(x);
1047 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
1049 u32 diff;
1051 seq = ntohl(seq);
1053 if (unlikely(seq == 0))
1054 return -EINVAL;
1056 if (likely(seq > x->replay.seq))
1057 return 0;
1059 diff = x->replay.seq - seq;
1060 if (diff >= x->props.replay_window) {
1061 x->stats.replay_window++;
1062 return -EINVAL;
1065 if (x->replay.bitmap & (1U << diff)) {
1066 x->stats.replay++;
1067 return -EINVAL;
1069 return 0;
1071 EXPORT_SYMBOL(xfrm_replay_check);
1073 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
1075 u32 diff;
1077 seq = ntohl(seq);
1079 if (seq > x->replay.seq) {
1080 diff = seq - x->replay.seq;
1081 if (diff < x->props.replay_window)
1082 x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1083 else
1084 x->replay.bitmap = 1;
1085 x->replay.seq = seq;
1086 } else {
1087 diff = x->replay.seq - seq;
1088 x->replay.bitmap |= (1U << diff);
1091 if (xfrm_aevent_is_on())
1092 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1094 EXPORT_SYMBOL(xfrm_replay_advance);
1096 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1097 static DEFINE_RWLOCK(xfrm_km_lock);
1099 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1101 struct xfrm_mgr *km;
1103 read_lock(&xfrm_km_lock);
1104 list_for_each_entry(km, &xfrm_km_list, list)
1105 if (km->notify_policy)
1106 km->notify_policy(xp, dir, c);
1107 read_unlock(&xfrm_km_lock);
1110 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1112 struct xfrm_mgr *km;
1113 read_lock(&xfrm_km_lock);
1114 list_for_each_entry(km, &xfrm_km_list, list)
1115 if (km->notify)
1116 km->notify(x, c);
1117 read_unlock(&xfrm_km_lock);
1120 EXPORT_SYMBOL(km_policy_notify);
1121 EXPORT_SYMBOL(km_state_notify);
1123 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1125 struct km_event c;
1127 c.data.hard = hard;
1128 c.pid = pid;
1129 c.event = XFRM_MSG_EXPIRE;
1130 km_state_notify(x, &c);
1132 if (hard)
1133 wake_up(&km_waitq);
1136 EXPORT_SYMBOL(km_state_expired);
1138 * We send to all registered managers regardless of failure
1139 * We are happy with one success
1141 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1143 int err = -EINVAL, acqret;
1144 struct xfrm_mgr *km;
1146 read_lock(&xfrm_km_lock);
1147 list_for_each_entry(km, &xfrm_km_list, list) {
1148 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1149 if (!acqret)
1150 err = acqret;
1152 read_unlock(&xfrm_km_lock);
1153 return err;
1155 EXPORT_SYMBOL(km_query);
1157 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
1159 int err = -EINVAL;
1160 struct xfrm_mgr *km;
1162 read_lock(&xfrm_km_lock);
1163 list_for_each_entry(km, &xfrm_km_list, list) {
1164 if (km->new_mapping)
1165 err = km->new_mapping(x, ipaddr, sport);
1166 if (!err)
1167 break;
1169 read_unlock(&xfrm_km_lock);
1170 return err;
1172 EXPORT_SYMBOL(km_new_mapping);
1174 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1176 struct km_event c;
1178 c.data.hard = hard;
1179 c.pid = pid;
1180 c.event = XFRM_MSG_POLEXPIRE;
1181 km_policy_notify(pol, dir, &c);
1183 if (hard)
1184 wake_up(&km_waitq);
1186 EXPORT_SYMBOL(km_policy_expired);
1188 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1190 int err = -EINVAL;
1191 int ret;
1192 struct xfrm_mgr *km;
1194 read_lock(&xfrm_km_lock);
1195 list_for_each_entry(km, &xfrm_km_list, list) {
1196 if (km->report) {
1197 ret = km->report(proto, sel, addr);
1198 if (!ret)
1199 err = ret;
1202 read_unlock(&xfrm_km_lock);
1203 return err;
1205 EXPORT_SYMBOL(km_report);
1207 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1209 int err;
1210 u8 *data;
1211 struct xfrm_mgr *km;
1212 struct xfrm_policy *pol = NULL;
1214 if (optlen <= 0 || optlen > PAGE_SIZE)
1215 return -EMSGSIZE;
1217 data = kmalloc(optlen, GFP_KERNEL);
1218 if (!data)
1219 return -ENOMEM;
1221 err = -EFAULT;
1222 if (copy_from_user(data, optval, optlen))
1223 goto out;
1225 err = -EINVAL;
1226 read_lock(&xfrm_km_lock);
1227 list_for_each_entry(km, &xfrm_km_list, list) {
1228 pol = km->compile_policy(sk, optname, data,
1229 optlen, &err);
1230 if (err >= 0)
1231 break;
1233 read_unlock(&xfrm_km_lock);
1235 if (err >= 0) {
1236 xfrm_sk_policy_insert(sk, err, pol);
1237 xfrm_pol_put(pol);
1238 err = 0;
1241 out:
1242 kfree(data);
1243 return err;
1245 EXPORT_SYMBOL(xfrm_user_policy);
1247 int xfrm_register_km(struct xfrm_mgr *km)
1249 write_lock_bh(&xfrm_km_lock);
1250 list_add_tail(&km->list, &xfrm_km_list);
1251 write_unlock_bh(&xfrm_km_lock);
1252 return 0;
1254 EXPORT_SYMBOL(xfrm_register_km);
1256 int xfrm_unregister_km(struct xfrm_mgr *km)
1258 write_lock_bh(&xfrm_km_lock);
1259 list_del(&km->list);
1260 write_unlock_bh(&xfrm_km_lock);
1261 return 0;
1263 EXPORT_SYMBOL(xfrm_unregister_km);
1265 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1267 int err = 0;
1268 if (unlikely(afinfo == NULL))
1269 return -EINVAL;
1270 if (unlikely(afinfo->family >= NPROTO))
1271 return -EAFNOSUPPORT;
1272 write_lock_bh(&xfrm_state_afinfo_lock);
1273 if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1274 err = -ENOBUFS;
1275 else {
1276 afinfo->state_bysrc = xfrm_state_bysrc;
1277 afinfo->state_byspi = xfrm_state_byspi;
1278 xfrm_state_afinfo[afinfo->family] = afinfo;
1280 write_unlock_bh(&xfrm_state_afinfo_lock);
1281 return err;
1283 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1285 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1287 int err = 0;
1288 if (unlikely(afinfo == NULL))
1289 return -EINVAL;
1290 if (unlikely(afinfo->family >= NPROTO))
1291 return -EAFNOSUPPORT;
1292 write_lock_bh(&xfrm_state_afinfo_lock);
1293 if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1294 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1295 err = -EINVAL;
1296 else {
1297 xfrm_state_afinfo[afinfo->family] = NULL;
1298 afinfo->state_byspi = NULL;
1299 afinfo->state_bysrc = NULL;
1302 write_unlock_bh(&xfrm_state_afinfo_lock);
1303 return err;
1305 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1307 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1309 struct xfrm_state_afinfo *afinfo;
1310 if (unlikely(family >= NPROTO))
1311 return NULL;
1312 read_lock(&xfrm_state_afinfo_lock);
1313 afinfo = xfrm_state_afinfo[family];
1314 if (unlikely(!afinfo))
1315 read_unlock(&xfrm_state_afinfo_lock);
1316 return afinfo;
1319 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1321 read_unlock(&xfrm_state_afinfo_lock);
1324 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1325 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1327 if (x->tunnel) {
1328 struct xfrm_state *t = x->tunnel;
1330 if (atomic_read(&t->tunnel_users) == 2)
1331 xfrm_state_delete(t);
1332 atomic_dec(&t->tunnel_users);
1333 xfrm_state_put(t);
1334 x->tunnel = NULL;
1337 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1340 * This function is NOT optimal. For example, with ESP it will give an
1341 * MTU that's usually two bytes short of being optimal. However, it will
1342 * usually give an answer that's a multiple of 4 provided the input is
1343 * also a multiple of 4.
1345 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1347 int res = mtu;
1349 res -= x->props.header_len;
1351 for (;;) {
1352 int m = res;
1354 if (m < 68)
1355 return 68;
1357 spin_lock_bh(&x->lock);
1358 if (x->km.state == XFRM_STATE_VALID &&
1359 x->type && x->type->get_max_size)
1360 m = x->type->get_max_size(x, m);
1361 else
1362 m += x->props.header_len;
1363 spin_unlock_bh(&x->lock);
1365 if (m <= mtu)
1366 break;
1367 res -= (m - mtu);
1370 return res;
1373 int xfrm_init_state(struct xfrm_state *x)
1375 struct xfrm_state_afinfo *afinfo;
1376 int family = x->props.family;
1377 int err;
1379 err = -EAFNOSUPPORT;
1380 afinfo = xfrm_state_get_afinfo(family);
1381 if (!afinfo)
1382 goto error;
1384 err = 0;
1385 if (afinfo->init_flags)
1386 err = afinfo->init_flags(x);
1388 xfrm_state_put_afinfo(afinfo);
1390 if (err)
1391 goto error;
1393 err = -EPROTONOSUPPORT;
1394 x->type = xfrm_get_type(x->id.proto, family);
1395 if (x->type == NULL)
1396 goto error;
1398 err = x->type->init_state(x);
1399 if (err)
1400 goto error;
1402 x->mode = xfrm_get_mode(x->props.mode, family);
1403 if (x->mode == NULL)
1404 goto error;
1406 x->km.state = XFRM_STATE_VALID;
1408 error:
1409 return err;
1412 EXPORT_SYMBOL(xfrm_init_state);
1414 void __init xfrm_state_init(void)
1416 int i;
1418 for (i=0; i<XFRM_DST_HSIZE; i++) {
1419 INIT_LIST_HEAD(&xfrm_state_bydst[i]);
1420 INIT_LIST_HEAD(&xfrm_state_bysrc[i]);
1421 INIT_LIST_HEAD(&xfrm_state_byspi[i]);
1423 INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);