6 * Kazunori MIYAZAWA @USAGI
7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
9 * YOSHIFUJI Hideaki @USAGI
10 * Split up af-specific functions
11 * Derek Atkins <derek@ihtfp.com>
12 * Add UDP Encapsulation
16 #include <linux/workqueue.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/cache.h>
22 #include <asm/uaccess.h>
24 #include "xfrm_hash.h"
27 EXPORT_SYMBOL(xfrm_nl
);
29 u32 sysctl_xfrm_aevent_etime
= XFRM_AE_ETIME
;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime
);
32 u32 sysctl_xfrm_aevent_rseqth
= XFRM_AE_SEQT_SIZE
;
33 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth
);
35 /* Each xfrm_state may be linked to two tables:
37 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
38 2. Hash table by (daddr,family,reqid) to find what SAs exist for given
39 destination/tunnel endpoint. (output)
42 static DEFINE_SPINLOCK(xfrm_state_lock
);
44 /* Hash table to find appropriate SA towards given target (endpoint
45 * of tunnel or destination of transport mode) allowed by selector.
47 * Main use is finding SA after policy selected tunnel or transport mode.
48 * Also, it can be used by ah/esp icmp error handler to find offending SA.
50 static struct hlist_head
*xfrm_state_bydst __read_mostly
;
51 static struct hlist_head
*xfrm_state_bysrc __read_mostly
;
52 static struct hlist_head
*xfrm_state_byspi __read_mostly
;
53 static unsigned int xfrm_state_hmask __read_mostly
;
54 static unsigned int xfrm_state_hashmax __read_mostly
= 1 * 1024 * 1024;
55 static unsigned int xfrm_state_num
;
56 static unsigned int xfrm_state_genid
;
58 static inline unsigned int xfrm_dst_hash(xfrm_address_t
*daddr
,
59 xfrm_address_t
*saddr
,
61 unsigned short family
)
63 return __xfrm_dst_hash(daddr
, saddr
, reqid
, family
, xfrm_state_hmask
);
66 static inline unsigned int xfrm_src_hash(xfrm_address_t
*daddr
,
67 xfrm_address_t
*saddr
,
68 unsigned short family
)
70 return __xfrm_src_hash(daddr
, saddr
, family
, xfrm_state_hmask
);
73 static inline unsigned int
74 xfrm_spi_hash(xfrm_address_t
*daddr
, __be32 spi
, u8 proto
, unsigned short family
)
76 return __xfrm_spi_hash(daddr
, spi
, proto
, family
, xfrm_state_hmask
);
79 static void xfrm_hash_transfer(struct hlist_head
*list
,
80 struct hlist_head
*ndsttable
,
81 struct hlist_head
*nsrctable
,
82 struct hlist_head
*nspitable
,
83 unsigned int nhashmask
)
85 struct hlist_node
*entry
, *tmp
;
88 hlist_for_each_entry_safe(x
, entry
, tmp
, list
, bydst
) {
91 h
= __xfrm_dst_hash(&x
->id
.daddr
, &x
->props
.saddr
,
92 x
->props
.reqid
, x
->props
.family
,
94 hlist_add_head(&x
->bydst
, ndsttable
+h
);
96 h
= __xfrm_src_hash(&x
->id
.daddr
, &x
->props
.saddr
,
99 hlist_add_head(&x
->bysrc
, nsrctable
+h
);
102 h
= __xfrm_spi_hash(&x
->id
.daddr
, x
->id
.spi
,
103 x
->id
.proto
, x
->props
.family
,
105 hlist_add_head(&x
->byspi
, nspitable
+h
);
110 static unsigned long xfrm_hash_new_size(void)
112 return ((xfrm_state_hmask
+ 1) << 1) *
113 sizeof(struct hlist_head
);
116 static DEFINE_MUTEX(hash_resize_mutex
);
118 static void xfrm_hash_resize(void *__unused
)
120 struct hlist_head
*ndst
, *nsrc
, *nspi
, *odst
, *osrc
, *ospi
;
121 unsigned long nsize
, osize
;
122 unsigned int nhashmask
, ohashmask
;
125 mutex_lock(&hash_resize_mutex
);
127 nsize
= xfrm_hash_new_size();
128 ndst
= xfrm_hash_alloc(nsize
);
131 nsrc
= xfrm_hash_alloc(nsize
);
133 xfrm_hash_free(ndst
, nsize
);
136 nspi
= xfrm_hash_alloc(nsize
);
138 xfrm_hash_free(ndst
, nsize
);
139 xfrm_hash_free(nsrc
, nsize
);
143 spin_lock_bh(&xfrm_state_lock
);
145 nhashmask
= (nsize
/ sizeof(struct hlist_head
)) - 1U;
146 for (i
= xfrm_state_hmask
; i
>= 0; i
--)
147 xfrm_hash_transfer(xfrm_state_bydst
+i
, ndst
, nsrc
, nspi
,
150 odst
= xfrm_state_bydst
;
151 osrc
= xfrm_state_bysrc
;
152 ospi
= xfrm_state_byspi
;
153 ohashmask
= xfrm_state_hmask
;
155 xfrm_state_bydst
= ndst
;
156 xfrm_state_bysrc
= nsrc
;
157 xfrm_state_byspi
= nspi
;
158 xfrm_state_hmask
= nhashmask
;
160 spin_unlock_bh(&xfrm_state_lock
);
162 osize
= (ohashmask
+ 1) * sizeof(struct hlist_head
);
163 xfrm_hash_free(odst
, osize
);
164 xfrm_hash_free(osrc
, osize
);
165 xfrm_hash_free(ospi
, osize
);
168 mutex_unlock(&hash_resize_mutex
);
171 static DECLARE_WORK(xfrm_hash_work
, xfrm_hash_resize
, NULL
);
173 DECLARE_WAIT_QUEUE_HEAD(km_waitq
);
174 EXPORT_SYMBOL(km_waitq
);
176 static DEFINE_RWLOCK(xfrm_state_afinfo_lock
);
177 static struct xfrm_state_afinfo
*xfrm_state_afinfo
[NPROTO
];
179 static struct work_struct xfrm_state_gc_work
;
180 static HLIST_HEAD(xfrm_state_gc_list
);
181 static DEFINE_SPINLOCK(xfrm_state_gc_lock
);
183 int __xfrm_state_delete(struct xfrm_state
*x
);
185 static struct xfrm_state_afinfo
*xfrm_state_get_afinfo(unsigned short family
);
186 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo
*afinfo
);
188 int km_query(struct xfrm_state
*x
, struct xfrm_tmpl
*t
, struct xfrm_policy
*pol
);
189 void km_state_expired(struct xfrm_state
*x
, int hard
, u32 pid
);
191 static void xfrm_state_gc_destroy(struct xfrm_state
*x
)
193 del_timer_sync(&x
->timer
);
194 del_timer_sync(&x
->rtimer
);
201 xfrm_put_mode(x
->mode
);
203 x
->type
->destructor(x
);
204 xfrm_put_type(x
->type
);
206 security_xfrm_state_free(x
);
210 static void xfrm_state_gc_task(void *data
)
212 struct xfrm_state
*x
;
213 struct hlist_node
*entry
, *tmp
;
214 struct hlist_head gc_list
;
216 spin_lock_bh(&xfrm_state_gc_lock
);
217 gc_list
.first
= xfrm_state_gc_list
.first
;
218 INIT_HLIST_HEAD(&xfrm_state_gc_list
);
219 spin_unlock_bh(&xfrm_state_gc_lock
);
221 hlist_for_each_entry_safe(x
, entry
, tmp
, &gc_list
, bydst
)
222 xfrm_state_gc_destroy(x
);
227 static inline unsigned long make_jiffies(long secs
)
229 if (secs
>= (MAX_SCHEDULE_TIMEOUT
-1)/HZ
)
230 return MAX_SCHEDULE_TIMEOUT
-1;
235 static void xfrm_timer_handler(unsigned long data
)
237 struct xfrm_state
*x
= (struct xfrm_state
*)data
;
238 unsigned long now
= (unsigned long)xtime
.tv_sec
;
239 long next
= LONG_MAX
;
243 if (x
->km
.state
== XFRM_STATE_DEAD
)
245 if (x
->km
.state
== XFRM_STATE_EXPIRED
)
247 if (x
->lft
.hard_add_expires_seconds
) {
248 long tmo
= x
->lft
.hard_add_expires_seconds
+
249 x
->curlft
.add_time
- now
;
255 if (x
->lft
.hard_use_expires_seconds
) {
256 long tmo
= x
->lft
.hard_use_expires_seconds
+
257 (x
->curlft
.use_time
? : now
) - now
;
265 if (x
->lft
.soft_add_expires_seconds
) {
266 long tmo
= x
->lft
.soft_add_expires_seconds
+
267 x
->curlft
.add_time
- now
;
273 if (x
->lft
.soft_use_expires_seconds
) {
274 long tmo
= x
->lft
.soft_use_expires_seconds
+
275 (x
->curlft
.use_time
? : now
) - now
;
284 km_state_expired(x
, 0, 0);
286 if (next
!= LONG_MAX
)
287 mod_timer(&x
->timer
, jiffies
+ make_jiffies(next
));
292 if (x
->km
.state
== XFRM_STATE_ACQ
&& x
->id
.spi
== 0) {
293 x
->km
.state
= XFRM_STATE_EXPIRED
;
298 if (!__xfrm_state_delete(x
) && x
->id
.spi
)
299 km_state_expired(x
, 1, 0);
302 spin_unlock(&x
->lock
);
305 static void xfrm_replay_timer_handler(unsigned long data
);
307 struct xfrm_state
*xfrm_state_alloc(void)
309 struct xfrm_state
*x
;
311 x
= kzalloc(sizeof(struct xfrm_state
), GFP_ATOMIC
);
314 atomic_set(&x
->refcnt
, 1);
315 atomic_set(&x
->tunnel_users
, 0);
316 INIT_HLIST_NODE(&x
->bydst
);
317 INIT_HLIST_NODE(&x
->bysrc
);
318 INIT_HLIST_NODE(&x
->byspi
);
319 init_timer(&x
->timer
);
320 x
->timer
.function
= xfrm_timer_handler
;
321 x
->timer
.data
= (unsigned long)x
;
322 init_timer(&x
->rtimer
);
323 x
->rtimer
.function
= xfrm_replay_timer_handler
;
324 x
->rtimer
.data
= (unsigned long)x
;
325 x
->curlft
.add_time
= (unsigned long)xtime
.tv_sec
;
326 x
->lft
.soft_byte_limit
= XFRM_INF
;
327 x
->lft
.soft_packet_limit
= XFRM_INF
;
328 x
->lft
.hard_byte_limit
= XFRM_INF
;
329 x
->lft
.hard_packet_limit
= XFRM_INF
;
330 x
->replay_maxage
= 0;
331 x
->replay_maxdiff
= 0;
332 spin_lock_init(&x
->lock
);
336 EXPORT_SYMBOL(xfrm_state_alloc
);
338 void __xfrm_state_destroy(struct xfrm_state
*x
)
340 BUG_TRAP(x
->km
.state
== XFRM_STATE_DEAD
);
342 spin_lock_bh(&xfrm_state_gc_lock
);
343 hlist_add_head(&x
->bydst
, &xfrm_state_gc_list
);
344 spin_unlock_bh(&xfrm_state_gc_lock
);
345 schedule_work(&xfrm_state_gc_work
);
347 EXPORT_SYMBOL(__xfrm_state_destroy
);
349 int __xfrm_state_delete(struct xfrm_state
*x
)
353 if (x
->km
.state
!= XFRM_STATE_DEAD
) {
354 x
->km
.state
= XFRM_STATE_DEAD
;
355 spin_lock(&xfrm_state_lock
);
356 hlist_del(&x
->bydst
);
357 hlist_del(&x
->bysrc
);
359 hlist_del(&x
->byspi
);
361 spin_unlock(&xfrm_state_lock
);
363 /* All xfrm_state objects are created by xfrm_state_alloc.
364 * The xfrm_state_alloc call gives a reference, and that
365 * is what we are dropping here.
373 EXPORT_SYMBOL(__xfrm_state_delete
);
375 int xfrm_state_delete(struct xfrm_state
*x
)
379 spin_lock_bh(&x
->lock
);
380 err
= __xfrm_state_delete(x
);
381 spin_unlock_bh(&x
->lock
);
385 EXPORT_SYMBOL(xfrm_state_delete
);
387 void xfrm_state_flush(u8 proto
)
391 spin_lock_bh(&xfrm_state_lock
);
392 for (i
= 0; i
<= xfrm_state_hmask
; i
++) {
393 struct hlist_node
*entry
;
394 struct xfrm_state
*x
;
396 hlist_for_each_entry(x
, entry
, xfrm_state_bydst
+i
, bydst
) {
397 if (!xfrm_state_kern(x
) &&
398 xfrm_id_proto_match(x
->id
.proto
, proto
)) {
400 spin_unlock_bh(&xfrm_state_lock
);
402 xfrm_state_delete(x
);
405 spin_lock_bh(&xfrm_state_lock
);
410 spin_unlock_bh(&xfrm_state_lock
);
413 EXPORT_SYMBOL(xfrm_state_flush
);
416 xfrm_init_tempsel(struct xfrm_state
*x
, struct flowi
*fl
,
417 struct xfrm_tmpl
*tmpl
,
418 xfrm_address_t
*daddr
, xfrm_address_t
*saddr
,
419 unsigned short family
)
421 struct xfrm_state_afinfo
*afinfo
= xfrm_state_get_afinfo(family
);
424 afinfo
->init_tempsel(x
, fl
, tmpl
, daddr
, saddr
);
425 xfrm_state_put_afinfo(afinfo
);
429 static struct xfrm_state
*__xfrm_state_lookup(xfrm_address_t
*daddr
, __be32 spi
, u8 proto
, unsigned short family
)
431 unsigned int h
= xfrm_spi_hash(daddr
, spi
, proto
, family
);
432 struct xfrm_state
*x
;
433 struct hlist_node
*entry
;
435 hlist_for_each_entry(x
, entry
, xfrm_state_byspi
+h
, byspi
) {
436 if (x
->props
.family
!= family
||
438 x
->id
.proto
!= proto
)
443 if (x
->id
.daddr
.a4
!= daddr
->a4
)
447 if (!ipv6_addr_equal((struct in6_addr
*)daddr
,
461 static struct xfrm_state
*__xfrm_state_lookup_byaddr(xfrm_address_t
*daddr
, xfrm_address_t
*saddr
, u8 proto
, unsigned short family
)
463 unsigned int h
= xfrm_src_hash(daddr
, saddr
, family
);
464 struct xfrm_state
*x
;
465 struct hlist_node
*entry
;
467 hlist_for_each_entry(x
, entry
, xfrm_state_bysrc
+h
, bysrc
) {
468 if (x
->props
.family
!= family
||
469 x
->id
.proto
!= proto
)
474 if (x
->id
.daddr
.a4
!= daddr
->a4
||
475 x
->props
.saddr
.a4
!= saddr
->a4
)
479 if (!ipv6_addr_equal((struct in6_addr
*)daddr
,
482 !ipv6_addr_equal((struct in6_addr
*)saddr
,
496 static inline struct xfrm_state
*
497 __xfrm_state_locate(struct xfrm_state
*x
, int use_spi
, int family
)
500 return __xfrm_state_lookup(&x
->id
.daddr
, x
->id
.spi
,
501 x
->id
.proto
, family
);
503 return __xfrm_state_lookup_byaddr(&x
->id
.daddr
,
505 x
->id
.proto
, family
);
508 static void xfrm_hash_grow_check(int have_hash_collision
)
510 if (have_hash_collision
&&
511 (xfrm_state_hmask
+ 1) < xfrm_state_hashmax
&&
512 xfrm_state_num
> xfrm_state_hmask
)
513 schedule_work(&xfrm_hash_work
);
517 xfrm_state_find(xfrm_address_t
*daddr
, xfrm_address_t
*saddr
,
518 struct flowi
*fl
, struct xfrm_tmpl
*tmpl
,
519 struct xfrm_policy
*pol
, int *err
,
520 unsigned short family
)
522 unsigned int h
= xfrm_dst_hash(daddr
, saddr
, tmpl
->reqid
, family
);
523 struct hlist_node
*entry
;
524 struct xfrm_state
*x
, *x0
;
525 int acquire_in_progress
= 0;
527 struct xfrm_state
*best
= NULL
;
529 spin_lock_bh(&xfrm_state_lock
);
530 hlist_for_each_entry(x
, entry
, xfrm_state_bydst
+h
, bydst
) {
531 if (x
->props
.family
== family
&&
532 x
->props
.reqid
== tmpl
->reqid
&&
533 !(x
->props
.flags
& XFRM_STATE_WILDRECV
) &&
534 xfrm_state_addr_check(x
, daddr
, saddr
, family
) &&
535 tmpl
->mode
== x
->props
.mode
&&
536 tmpl
->id
.proto
== x
->id
.proto
&&
537 (tmpl
->id
.spi
== x
->id
.spi
|| !tmpl
->id
.spi
)) {
539 1. There is a valid state with matching selector.
541 2. Valid state with inappropriate selector. Skip.
543 Entering area of "sysdeps".
545 3. If state is not valid, selector is temporary,
546 it selects only session which triggered
547 previous resolution. Key manager will do
548 something to install a state with proper
551 if (x
->km
.state
== XFRM_STATE_VALID
) {
552 if (!xfrm_selector_match(&x
->sel
, fl
, family
) ||
553 !security_xfrm_state_pol_flow_match(x
, pol
, fl
))
556 best
->km
.dying
> x
->km
.dying
||
557 (best
->km
.dying
== x
->km
.dying
&&
558 best
->curlft
.add_time
< x
->curlft
.add_time
))
560 } else if (x
->km
.state
== XFRM_STATE_ACQ
) {
561 acquire_in_progress
= 1;
562 } else if (x
->km
.state
== XFRM_STATE_ERROR
||
563 x
->km
.state
== XFRM_STATE_EXPIRED
) {
564 if (xfrm_selector_match(&x
->sel
, fl
, family
) &&
565 security_xfrm_state_pol_flow_match(x
, pol
, fl
))
572 if (!x
&& !error
&& !acquire_in_progress
) {
574 (x0
= __xfrm_state_lookup(daddr
, tmpl
->id
.spi
,
575 tmpl
->id
.proto
, family
)) != NULL
) {
580 x
= xfrm_state_alloc();
585 /* Initialize temporary selector matching only
586 * to current session. */
587 xfrm_init_tempsel(x
, fl
, tmpl
, daddr
, saddr
, family
);
589 error
= security_xfrm_state_alloc_acquire(x
, pol
->security
, fl
->secid
);
591 x
->km
.state
= XFRM_STATE_DEAD
;
597 if (km_query(x
, tmpl
, pol
) == 0) {
598 x
->km
.state
= XFRM_STATE_ACQ
;
599 hlist_add_head(&x
->bydst
, xfrm_state_bydst
+h
);
600 h
= xfrm_src_hash(daddr
, saddr
, family
);
601 hlist_add_head(&x
->bysrc
, xfrm_state_bysrc
+h
);
603 h
= xfrm_spi_hash(&x
->id
.daddr
, x
->id
.spi
, x
->id
.proto
, family
);
604 hlist_add_head(&x
->byspi
, xfrm_state_byspi
+h
);
606 x
->lft
.hard_add_expires_seconds
= XFRM_ACQ_EXPIRES
;
607 x
->timer
.expires
= jiffies
+ XFRM_ACQ_EXPIRES
*HZ
;
608 add_timer(&x
->timer
);
610 xfrm_hash_grow_check(x
->bydst
.next
!= NULL
);
612 x
->km
.state
= XFRM_STATE_DEAD
;
622 *err
= acquire_in_progress
? -EAGAIN
: error
;
623 spin_unlock_bh(&xfrm_state_lock
);
627 static void __xfrm_state_insert(struct xfrm_state
*x
)
631 x
->genid
= ++xfrm_state_genid
;
633 h
= xfrm_dst_hash(&x
->id
.daddr
, &x
->props
.saddr
,
634 x
->props
.reqid
, x
->props
.family
);
635 hlist_add_head(&x
->bydst
, xfrm_state_bydst
+h
);
637 h
= xfrm_src_hash(&x
->id
.daddr
, &x
->props
.saddr
, x
->props
.family
);
638 hlist_add_head(&x
->bysrc
, xfrm_state_bysrc
+h
);
641 h
= xfrm_spi_hash(&x
->id
.daddr
, x
->id
.spi
, x
->id
.proto
,
644 hlist_add_head(&x
->byspi
, xfrm_state_byspi
+h
);
647 mod_timer(&x
->timer
, jiffies
+ HZ
);
648 if (x
->replay_maxage
)
649 mod_timer(&x
->rtimer
, jiffies
+ x
->replay_maxage
);
655 xfrm_hash_grow_check(x
->bydst
.next
!= NULL
);
658 /* xfrm_state_lock is held */
659 static void __xfrm_state_bump_genids(struct xfrm_state
*xnew
)
661 unsigned short family
= xnew
->props
.family
;
662 u32 reqid
= xnew
->props
.reqid
;
663 struct xfrm_state
*x
;
664 struct hlist_node
*entry
;
667 h
= xfrm_dst_hash(&xnew
->id
.daddr
, &xnew
->props
.saddr
, reqid
, family
);
668 hlist_for_each_entry(x
, entry
, xfrm_state_bydst
+h
, bydst
) {
669 if (x
->props
.family
== family
&&
670 x
->props
.reqid
== reqid
&&
671 !xfrm_addr_cmp(&x
->id
.daddr
, &xnew
->id
.daddr
, family
) &&
672 !xfrm_addr_cmp(&x
->props
.saddr
, &xnew
->props
.saddr
, family
))
673 x
->genid
= xfrm_state_genid
;
677 void xfrm_state_insert(struct xfrm_state
*x
)
679 spin_lock_bh(&xfrm_state_lock
);
680 __xfrm_state_bump_genids(x
);
681 __xfrm_state_insert(x
);
682 spin_unlock_bh(&xfrm_state_lock
);
684 EXPORT_SYMBOL(xfrm_state_insert
);
686 /* xfrm_state_lock is held */
687 static struct xfrm_state
*__find_acq_core(unsigned short family
, u8 mode
, u32 reqid
, u8 proto
, xfrm_address_t
*daddr
, xfrm_address_t
*saddr
, int create
)
689 unsigned int h
= xfrm_dst_hash(daddr
, saddr
, reqid
, family
);
690 struct hlist_node
*entry
;
691 struct xfrm_state
*x
;
693 hlist_for_each_entry(x
, entry
, xfrm_state_bydst
+h
, bydst
) {
694 if (x
->props
.reqid
!= reqid
||
695 x
->props
.mode
!= mode
||
696 x
->props
.family
!= family
||
697 x
->km
.state
!= XFRM_STATE_ACQ
||
703 if (x
->id
.daddr
.a4
!= daddr
->a4
||
704 x
->props
.saddr
.a4
!= saddr
->a4
)
708 if (!ipv6_addr_equal((struct in6_addr
*)x
->id
.daddr
.a6
,
709 (struct in6_addr
*)daddr
) ||
710 !ipv6_addr_equal((struct in6_addr
*)
712 (struct in6_addr
*)saddr
))
724 x
= xfrm_state_alloc();
728 x
->sel
.daddr
.a4
= daddr
->a4
;
729 x
->sel
.saddr
.a4
= saddr
->a4
;
730 x
->sel
.prefixlen_d
= 32;
731 x
->sel
.prefixlen_s
= 32;
732 x
->props
.saddr
.a4
= saddr
->a4
;
733 x
->id
.daddr
.a4
= daddr
->a4
;
737 ipv6_addr_copy((struct in6_addr
*)x
->sel
.daddr
.a6
,
738 (struct in6_addr
*)daddr
);
739 ipv6_addr_copy((struct in6_addr
*)x
->sel
.saddr
.a6
,
740 (struct in6_addr
*)saddr
);
741 x
->sel
.prefixlen_d
= 128;
742 x
->sel
.prefixlen_s
= 128;
743 ipv6_addr_copy((struct in6_addr
*)x
->props
.saddr
.a6
,
744 (struct in6_addr
*)saddr
);
745 ipv6_addr_copy((struct in6_addr
*)x
->id
.daddr
.a6
,
746 (struct in6_addr
*)daddr
);
750 x
->km
.state
= XFRM_STATE_ACQ
;
752 x
->props
.family
= family
;
753 x
->props
.mode
= mode
;
754 x
->props
.reqid
= reqid
;
755 x
->lft
.hard_add_expires_seconds
= XFRM_ACQ_EXPIRES
;
757 x
->timer
.expires
= jiffies
+ XFRM_ACQ_EXPIRES
*HZ
;
758 add_timer(&x
->timer
);
759 hlist_add_head(&x
->bydst
, xfrm_state_bydst
+h
);
760 h
= xfrm_src_hash(daddr
, saddr
, family
);
761 hlist_add_head(&x
->bysrc
, xfrm_state_bysrc
+h
);
766 xfrm_hash_grow_check(x
->bydst
.next
!= NULL
);
772 static struct xfrm_state
*__xfrm_find_acq_byseq(u32 seq
);
774 int xfrm_state_add(struct xfrm_state
*x
)
776 struct xfrm_state
*x1
;
779 int use_spi
= xfrm_id_proto_match(x
->id
.proto
, IPSEC_PROTO_ANY
);
781 family
= x
->props
.family
;
783 spin_lock_bh(&xfrm_state_lock
);
785 x1
= __xfrm_state_locate(x
, use_spi
, family
);
793 if (use_spi
&& x
->km
.seq
) {
794 x1
= __xfrm_find_acq_byseq(x
->km
.seq
);
795 if (x1
&& xfrm_addr_cmp(&x1
->id
.daddr
, &x
->id
.daddr
, family
)) {
802 x1
= __find_acq_core(family
, x
->props
.mode
, x
->props
.reqid
,
804 &x
->id
.daddr
, &x
->props
.saddr
, 0);
806 __xfrm_state_bump_genids(x
);
807 __xfrm_state_insert(x
);
811 spin_unlock_bh(&xfrm_state_lock
);
814 xfrm_state_delete(x1
);
820 EXPORT_SYMBOL(xfrm_state_add
);
822 int xfrm_state_update(struct xfrm_state
*x
)
824 struct xfrm_state
*x1
;
826 int use_spi
= xfrm_id_proto_match(x
->id
.proto
, IPSEC_PROTO_ANY
);
828 spin_lock_bh(&xfrm_state_lock
);
829 x1
= __xfrm_state_locate(x
, use_spi
, x
->props
.family
);
835 if (xfrm_state_kern(x1
)) {
841 if (x1
->km
.state
== XFRM_STATE_ACQ
) {
842 __xfrm_state_insert(x
);
848 spin_unlock_bh(&xfrm_state_lock
);
854 xfrm_state_delete(x1
);
860 spin_lock_bh(&x1
->lock
);
861 if (likely(x1
->km
.state
== XFRM_STATE_VALID
)) {
862 if (x
->encap
&& x1
->encap
)
863 memcpy(x1
->encap
, x
->encap
, sizeof(*x1
->encap
));
864 if (x
->coaddr
&& x1
->coaddr
) {
865 memcpy(x1
->coaddr
, x
->coaddr
, sizeof(*x1
->coaddr
));
867 if (!use_spi
&& memcmp(&x1
->sel
, &x
->sel
, sizeof(x1
->sel
)))
868 memcpy(&x1
->sel
, &x
->sel
, sizeof(x1
->sel
));
869 memcpy(&x1
->lft
, &x
->lft
, sizeof(x1
->lft
));
872 mod_timer(&x1
->timer
, jiffies
+ HZ
);
873 if (x1
->curlft
.use_time
)
874 xfrm_state_check_expire(x1
);
878 spin_unlock_bh(&x1
->lock
);
884 EXPORT_SYMBOL(xfrm_state_update
);
886 int xfrm_state_check_expire(struct xfrm_state
*x
)
888 if (!x
->curlft
.use_time
)
889 x
->curlft
.use_time
= (unsigned long)xtime
.tv_sec
;
891 if (x
->km
.state
!= XFRM_STATE_VALID
)
894 if (x
->curlft
.bytes
>= x
->lft
.hard_byte_limit
||
895 x
->curlft
.packets
>= x
->lft
.hard_packet_limit
) {
896 x
->km
.state
= XFRM_STATE_EXPIRED
;
897 mod_timer(&x
->timer
, jiffies
);
902 (x
->curlft
.bytes
>= x
->lft
.soft_byte_limit
||
903 x
->curlft
.packets
>= x
->lft
.soft_packet_limit
)) {
905 km_state_expired(x
, 0, 0);
909 EXPORT_SYMBOL(xfrm_state_check_expire
);
911 static int xfrm_state_check_space(struct xfrm_state
*x
, struct sk_buff
*skb
)
913 int nhead
= x
->props
.header_len
+ LL_RESERVED_SPACE(skb
->dst
->dev
)
917 return pskb_expand_head(skb
, nhead
, 0, GFP_ATOMIC
);
919 /* Check tail too... */
923 int xfrm_state_check(struct xfrm_state
*x
, struct sk_buff
*skb
)
925 int err
= xfrm_state_check_expire(x
);
928 err
= xfrm_state_check_space(x
, skb
);
932 EXPORT_SYMBOL(xfrm_state_check
);
935 xfrm_state_lookup(xfrm_address_t
*daddr
, __be32 spi
, u8 proto
,
936 unsigned short family
)
938 struct xfrm_state
*x
;
940 spin_lock_bh(&xfrm_state_lock
);
941 x
= __xfrm_state_lookup(daddr
, spi
, proto
, family
);
942 spin_unlock_bh(&xfrm_state_lock
);
945 EXPORT_SYMBOL(xfrm_state_lookup
);
948 xfrm_state_lookup_byaddr(xfrm_address_t
*daddr
, xfrm_address_t
*saddr
,
949 u8 proto
, unsigned short family
)
951 struct xfrm_state
*x
;
953 spin_lock_bh(&xfrm_state_lock
);
954 x
= __xfrm_state_lookup_byaddr(daddr
, saddr
, proto
, family
);
955 spin_unlock_bh(&xfrm_state_lock
);
958 EXPORT_SYMBOL(xfrm_state_lookup_byaddr
);
961 xfrm_find_acq(u8 mode
, u32 reqid
, u8 proto
,
962 xfrm_address_t
*daddr
, xfrm_address_t
*saddr
,
963 int create
, unsigned short family
)
965 struct xfrm_state
*x
;
967 spin_lock_bh(&xfrm_state_lock
);
968 x
= __find_acq_core(family
, mode
, reqid
, proto
, daddr
, saddr
, create
);
969 spin_unlock_bh(&xfrm_state_lock
);
973 EXPORT_SYMBOL(xfrm_find_acq
);
975 #ifdef CONFIG_XFRM_SUB_POLICY
977 xfrm_tmpl_sort(struct xfrm_tmpl
**dst
, struct xfrm_tmpl
**src
, int n
,
978 unsigned short family
)
981 struct xfrm_state_afinfo
*afinfo
= xfrm_state_get_afinfo(family
);
983 return -EAFNOSUPPORT
;
985 spin_lock_bh(&xfrm_state_lock
);
986 if (afinfo
->tmpl_sort
)
987 err
= afinfo
->tmpl_sort(dst
, src
, n
);
988 spin_unlock_bh(&xfrm_state_lock
);
989 xfrm_state_put_afinfo(afinfo
);
992 EXPORT_SYMBOL(xfrm_tmpl_sort
);
995 xfrm_state_sort(struct xfrm_state
**dst
, struct xfrm_state
**src
, int n
,
996 unsigned short family
)
999 struct xfrm_state_afinfo
*afinfo
= xfrm_state_get_afinfo(family
);
1001 return -EAFNOSUPPORT
;
1003 spin_lock_bh(&xfrm_state_lock
);
1004 if (afinfo
->state_sort
)
1005 err
= afinfo
->state_sort(dst
, src
, n
);
1006 spin_unlock_bh(&xfrm_state_lock
);
1007 xfrm_state_put_afinfo(afinfo
);
1010 EXPORT_SYMBOL(xfrm_state_sort
);
1013 /* Silly enough, but I'm lazy to build resolution list */
1015 static struct xfrm_state
*__xfrm_find_acq_byseq(u32 seq
)
1019 for (i
= 0; i
<= xfrm_state_hmask
; i
++) {
1020 struct hlist_node
*entry
;
1021 struct xfrm_state
*x
;
1023 hlist_for_each_entry(x
, entry
, xfrm_state_bydst
+i
, bydst
) {
1024 if (x
->km
.seq
== seq
&&
1025 x
->km
.state
== XFRM_STATE_ACQ
) {
1034 struct xfrm_state
*xfrm_find_acq_byseq(u32 seq
)
1036 struct xfrm_state
*x
;
1038 spin_lock_bh(&xfrm_state_lock
);
1039 x
= __xfrm_find_acq_byseq(seq
);
1040 spin_unlock_bh(&xfrm_state_lock
);
1043 EXPORT_SYMBOL(xfrm_find_acq_byseq
);
1045 u32
xfrm_get_acqseq(void)
1049 static DEFINE_SPINLOCK(acqseq_lock
);
1051 spin_lock_bh(&acqseq_lock
);
1052 res
= (++acqseq
? : ++acqseq
);
1053 spin_unlock_bh(&acqseq_lock
);
1056 EXPORT_SYMBOL(xfrm_get_acqseq
);
1059 xfrm_alloc_spi(struct xfrm_state
*x
, __be32 minspi
, __be32 maxspi
)
1062 struct xfrm_state
*x0
;
1067 if (minspi
== maxspi
) {
1068 x0
= xfrm_state_lookup(&x
->id
.daddr
, minspi
, x
->id
.proto
, x
->props
.family
);
1076 u32 low
= ntohl(minspi
);
1077 u32 high
= ntohl(maxspi
);
1078 for (h
=0; h
<high
-low
+1; h
++) {
1079 spi
= low
+ net_random()%(high
-low
+1);
1080 x0
= xfrm_state_lookup(&x
->id
.daddr
, htonl(spi
), x
->id
.proto
, x
->props
.family
);
1082 x
->id
.spi
= htonl(spi
);
1089 spin_lock_bh(&xfrm_state_lock
);
1090 h
= xfrm_spi_hash(&x
->id
.daddr
, x
->id
.spi
, x
->id
.proto
, x
->props
.family
);
1091 hlist_add_head(&x
->byspi
, xfrm_state_byspi
+h
);
1092 spin_unlock_bh(&xfrm_state_lock
);
1096 EXPORT_SYMBOL(xfrm_alloc_spi
);
1098 int xfrm_state_walk(u8 proto
, int (*func
)(struct xfrm_state
*, int, void*),
1102 struct xfrm_state
*x
;
1103 struct hlist_node
*entry
;
1107 spin_lock_bh(&xfrm_state_lock
);
1108 for (i
= 0; i
<= xfrm_state_hmask
; i
++) {
1109 hlist_for_each_entry(x
, entry
, xfrm_state_bydst
+i
, bydst
) {
1110 if (xfrm_id_proto_match(x
->id
.proto
, proto
))
1119 for (i
= 0; i
<= xfrm_state_hmask
; i
++) {
1120 hlist_for_each_entry(x
, entry
, xfrm_state_bydst
+i
, bydst
) {
1121 if (!xfrm_id_proto_match(x
->id
.proto
, proto
))
1123 err
= func(x
, --count
, data
);
1129 spin_unlock_bh(&xfrm_state_lock
);
1132 EXPORT_SYMBOL(xfrm_state_walk
);
1135 void xfrm_replay_notify(struct xfrm_state
*x
, int event
)
1138 /* we send notify messages in case
1139 * 1. we updated on of the sequence numbers, and the seqno difference
1140 * is at least x->replay_maxdiff, in this case we also update the
1141 * timeout of our timer function
1142 * 2. if x->replay_maxage has elapsed since last update,
1143 * and there were changes
1145 * The state structure must be locked!
1149 case XFRM_REPLAY_UPDATE
:
1150 if (x
->replay_maxdiff
&&
1151 (x
->replay
.seq
- x
->preplay
.seq
< x
->replay_maxdiff
) &&
1152 (x
->replay
.oseq
- x
->preplay
.oseq
< x
->replay_maxdiff
)) {
1153 if (x
->xflags
& XFRM_TIME_DEFER
)
1154 event
= XFRM_REPLAY_TIMEOUT
;
1161 case XFRM_REPLAY_TIMEOUT
:
1162 if ((x
->replay
.seq
== x
->preplay
.seq
) &&
1163 (x
->replay
.bitmap
== x
->preplay
.bitmap
) &&
1164 (x
->replay
.oseq
== x
->preplay
.oseq
)) {
1165 x
->xflags
|= XFRM_TIME_DEFER
;
1172 memcpy(&x
->preplay
, &x
->replay
, sizeof(struct xfrm_replay_state
));
1173 c
.event
= XFRM_MSG_NEWAE
;
1174 c
.data
.aevent
= event
;
1175 km_state_notify(x
, &c
);
1177 if (x
->replay_maxage
&&
1178 !mod_timer(&x
->rtimer
, jiffies
+ x
->replay_maxage
))
1179 x
->xflags
&= ~XFRM_TIME_DEFER
;
1181 EXPORT_SYMBOL(xfrm_replay_notify
);
1183 static void xfrm_replay_timer_handler(unsigned long data
)
1185 struct xfrm_state
*x
= (struct xfrm_state
*)data
;
1187 spin_lock(&x
->lock
);
1189 if (x
->km
.state
== XFRM_STATE_VALID
) {
1190 if (xfrm_aevent_is_on())
1191 xfrm_replay_notify(x
, XFRM_REPLAY_TIMEOUT
);
1193 x
->xflags
|= XFRM_TIME_DEFER
;
1196 spin_unlock(&x
->lock
);
1199 int xfrm_replay_check(struct xfrm_state
*x
, __be32 net_seq
)
1202 u32 seq
= ntohl(net_seq
);
1204 if (unlikely(seq
== 0))
1207 if (likely(seq
> x
->replay
.seq
))
1210 diff
= x
->replay
.seq
- seq
;
1211 if (diff
>= x
->props
.replay_window
) {
1212 x
->stats
.replay_window
++;
1216 if (x
->replay
.bitmap
& (1U << diff
)) {
1222 EXPORT_SYMBOL(xfrm_replay_check
);
1224 void xfrm_replay_advance(struct xfrm_state
*x
, __be32 net_seq
)
1227 u32 seq
= ntohl(net_seq
);
1229 if (seq
> x
->replay
.seq
) {
1230 diff
= seq
- x
->replay
.seq
;
1231 if (diff
< x
->props
.replay_window
)
1232 x
->replay
.bitmap
= ((x
->replay
.bitmap
) << diff
) | 1;
1234 x
->replay
.bitmap
= 1;
1235 x
->replay
.seq
= seq
;
1237 diff
= x
->replay
.seq
- seq
;
1238 x
->replay
.bitmap
|= (1U << diff
);
1241 if (xfrm_aevent_is_on())
1242 xfrm_replay_notify(x
, XFRM_REPLAY_UPDATE
);
1244 EXPORT_SYMBOL(xfrm_replay_advance
);
1246 static struct list_head xfrm_km_list
= LIST_HEAD_INIT(xfrm_km_list
);
1247 static DEFINE_RWLOCK(xfrm_km_lock
);
1249 void km_policy_notify(struct xfrm_policy
*xp
, int dir
, struct km_event
*c
)
1251 struct xfrm_mgr
*km
;
1253 read_lock(&xfrm_km_lock
);
1254 list_for_each_entry(km
, &xfrm_km_list
, list
)
1255 if (km
->notify_policy
)
1256 km
->notify_policy(xp
, dir
, c
);
1257 read_unlock(&xfrm_km_lock
);
1260 void km_state_notify(struct xfrm_state
*x
, struct km_event
*c
)
1262 struct xfrm_mgr
*km
;
1263 read_lock(&xfrm_km_lock
);
1264 list_for_each_entry(km
, &xfrm_km_list
, list
)
1267 read_unlock(&xfrm_km_lock
);
1270 EXPORT_SYMBOL(km_policy_notify
);
1271 EXPORT_SYMBOL(km_state_notify
);
1273 void km_state_expired(struct xfrm_state
*x
, int hard
, u32 pid
)
1279 c
.event
= XFRM_MSG_EXPIRE
;
1280 km_state_notify(x
, &c
);
1286 EXPORT_SYMBOL(km_state_expired
);
1288 * We send to all registered managers regardless of failure
1289 * We are happy with one success
1291 int km_query(struct xfrm_state
*x
, struct xfrm_tmpl
*t
, struct xfrm_policy
*pol
)
1293 int err
= -EINVAL
, acqret
;
1294 struct xfrm_mgr
*km
;
1296 read_lock(&xfrm_km_lock
);
1297 list_for_each_entry(km
, &xfrm_km_list
, list
) {
1298 acqret
= km
->acquire(x
, t
, pol
, XFRM_POLICY_OUT
);
1302 read_unlock(&xfrm_km_lock
);
1305 EXPORT_SYMBOL(km_query
);
1307 int km_new_mapping(struct xfrm_state
*x
, xfrm_address_t
*ipaddr
, u16 sport
)
1310 struct xfrm_mgr
*km
;
1312 read_lock(&xfrm_km_lock
);
1313 list_for_each_entry(km
, &xfrm_km_list
, list
) {
1314 if (km
->new_mapping
)
1315 err
= km
->new_mapping(x
, ipaddr
, sport
);
1319 read_unlock(&xfrm_km_lock
);
1322 EXPORT_SYMBOL(km_new_mapping
);
1324 void km_policy_expired(struct xfrm_policy
*pol
, int dir
, int hard
, u32 pid
)
1330 c
.event
= XFRM_MSG_POLEXPIRE
;
1331 km_policy_notify(pol
, dir
, &c
);
1336 EXPORT_SYMBOL(km_policy_expired
);
1338 int km_report(u8 proto
, struct xfrm_selector
*sel
, xfrm_address_t
*addr
)
1342 struct xfrm_mgr
*km
;
1344 read_lock(&xfrm_km_lock
);
1345 list_for_each_entry(km
, &xfrm_km_list
, list
) {
1347 ret
= km
->report(proto
, sel
, addr
);
1352 read_unlock(&xfrm_km_lock
);
1355 EXPORT_SYMBOL(km_report
);
1357 int xfrm_user_policy(struct sock
*sk
, int optname
, u8 __user
*optval
, int optlen
)
1361 struct xfrm_mgr
*km
;
1362 struct xfrm_policy
*pol
= NULL
;
1364 if (optlen
<= 0 || optlen
> PAGE_SIZE
)
1367 data
= kmalloc(optlen
, GFP_KERNEL
);
1372 if (copy_from_user(data
, optval
, optlen
))
1376 read_lock(&xfrm_km_lock
);
1377 list_for_each_entry(km
, &xfrm_km_list
, list
) {
1378 pol
= km
->compile_policy(sk
, optname
, data
,
1383 read_unlock(&xfrm_km_lock
);
1386 xfrm_sk_policy_insert(sk
, err
, pol
);
1395 EXPORT_SYMBOL(xfrm_user_policy
);
1397 int xfrm_register_km(struct xfrm_mgr
*km
)
1399 write_lock_bh(&xfrm_km_lock
);
1400 list_add_tail(&km
->list
, &xfrm_km_list
);
1401 write_unlock_bh(&xfrm_km_lock
);
1404 EXPORT_SYMBOL(xfrm_register_km
);
1406 int xfrm_unregister_km(struct xfrm_mgr
*km
)
1408 write_lock_bh(&xfrm_km_lock
);
1409 list_del(&km
->list
);
1410 write_unlock_bh(&xfrm_km_lock
);
1413 EXPORT_SYMBOL(xfrm_unregister_km
);
1415 int xfrm_state_register_afinfo(struct xfrm_state_afinfo
*afinfo
)
1418 if (unlikely(afinfo
== NULL
))
1420 if (unlikely(afinfo
->family
>= NPROTO
))
1421 return -EAFNOSUPPORT
;
1422 write_lock_bh(&xfrm_state_afinfo_lock
);
1423 if (unlikely(xfrm_state_afinfo
[afinfo
->family
] != NULL
))
1426 xfrm_state_afinfo
[afinfo
->family
] = afinfo
;
1427 write_unlock_bh(&xfrm_state_afinfo_lock
);
1430 EXPORT_SYMBOL(xfrm_state_register_afinfo
);
1432 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo
*afinfo
)
1435 if (unlikely(afinfo
== NULL
))
1437 if (unlikely(afinfo
->family
>= NPROTO
))
1438 return -EAFNOSUPPORT
;
1439 write_lock_bh(&xfrm_state_afinfo_lock
);
1440 if (likely(xfrm_state_afinfo
[afinfo
->family
] != NULL
)) {
1441 if (unlikely(xfrm_state_afinfo
[afinfo
->family
] != afinfo
))
1444 xfrm_state_afinfo
[afinfo
->family
] = NULL
;
1446 write_unlock_bh(&xfrm_state_afinfo_lock
);
1449 EXPORT_SYMBOL(xfrm_state_unregister_afinfo
);
1451 static struct xfrm_state_afinfo
*xfrm_state_get_afinfo(unsigned short family
)
1453 struct xfrm_state_afinfo
*afinfo
;
1454 if (unlikely(family
>= NPROTO
))
1456 read_lock(&xfrm_state_afinfo_lock
);
1457 afinfo
= xfrm_state_afinfo
[family
];
1458 if (unlikely(!afinfo
))
1459 read_unlock(&xfrm_state_afinfo_lock
);
1463 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo
*afinfo
)
1465 read_unlock(&xfrm_state_afinfo_lock
);
1468 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1469 void xfrm_state_delete_tunnel(struct xfrm_state
*x
)
1472 struct xfrm_state
*t
= x
->tunnel
;
1474 if (atomic_read(&t
->tunnel_users
) == 2)
1475 xfrm_state_delete(t
);
1476 atomic_dec(&t
->tunnel_users
);
1481 EXPORT_SYMBOL(xfrm_state_delete_tunnel
);
1484 * This function is NOT optimal. For example, with ESP it will give an
1485 * MTU that's usually two bytes short of being optimal. However, it will
1486 * usually give an answer that's a multiple of 4 provided the input is
1487 * also a multiple of 4.
1489 int xfrm_state_mtu(struct xfrm_state
*x
, int mtu
)
1493 res
-= x
->props
.header_len
;
1501 spin_lock_bh(&x
->lock
);
1502 if (x
->km
.state
== XFRM_STATE_VALID
&&
1503 x
->type
&& x
->type
->get_max_size
)
1504 m
= x
->type
->get_max_size(x
, m
);
1506 m
+= x
->props
.header_len
;
1507 spin_unlock_bh(&x
->lock
);
1517 int xfrm_init_state(struct xfrm_state
*x
)
1519 struct xfrm_state_afinfo
*afinfo
;
1520 int family
= x
->props
.family
;
1523 err
= -EAFNOSUPPORT
;
1524 afinfo
= xfrm_state_get_afinfo(family
);
1529 if (afinfo
->init_flags
)
1530 err
= afinfo
->init_flags(x
);
1532 xfrm_state_put_afinfo(afinfo
);
1537 err
= -EPROTONOSUPPORT
;
1538 x
->type
= xfrm_get_type(x
->id
.proto
, family
);
1539 if (x
->type
== NULL
)
1542 err
= x
->type
->init_state(x
);
1546 x
->mode
= xfrm_get_mode(x
->props
.mode
, family
);
1547 if (x
->mode
== NULL
)
1550 x
->km
.state
= XFRM_STATE_VALID
;
1556 EXPORT_SYMBOL(xfrm_init_state
);
1558 void __init
xfrm_state_init(void)
1562 sz
= sizeof(struct hlist_head
) * 8;
1564 xfrm_state_bydst
= xfrm_hash_alloc(sz
);
1565 xfrm_state_bysrc
= xfrm_hash_alloc(sz
);
1566 xfrm_state_byspi
= xfrm_hash_alloc(sz
);
1567 if (!xfrm_state_bydst
|| !xfrm_state_bysrc
|| !xfrm_state_byspi
)
1568 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1569 xfrm_state_hmask
= ((sz
/ sizeof(struct hlist_head
)) - 1);
1571 INIT_WORK(&xfrm_state_gc_work
, xfrm_state_gc_task
, NULL
);