xref: /openbmc/linux/net/xfrm/xfrm_state.c (revision 776cfebb)
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *	Mitsuru KANDA @USAGI
6  * 	Kazunori MIYAZAWA @USAGI
7  * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  * 		IPv6 support
9  * 	YOSHIFUJI Hideaki @USAGI
10  * 		Split up af-specific functions
11  *	Derek Atkins <derek@ihtfp.com>
12  *		Add UDP Encapsulation
13  *
14  */
15 
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <asm/uaccess.h>
22 
23 /* Each xfrm_state may be linked to two tables:
24 
25    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
26    2. Hash table by daddr to find what SAs exist for given
27       destination/tunnel endpoint. (output)
28  */
29 
30 static DEFINE_SPINLOCK(xfrm_state_lock);
31 
32 /* Hash table to find appropriate SA towards given target (endpoint
33  * of tunnel or destination of transport mode) allowed by selector.
34  *
35  * Main use is finding SA after policy selected tunnel or transport mode.
36  * Also, it can be used by ah/esp icmp error handler to find offending SA.
37  */
38 static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
39 static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
40 
41 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
42 EXPORT_SYMBOL(km_waitq);
43 
44 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
45 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
46 
47 static struct work_struct xfrm_state_gc_work;
48 static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
49 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
50 
51 static int xfrm_state_gc_flush_bundles;
52 
53 static void __xfrm_state_delete(struct xfrm_state *x);
54 
55 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
56 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
57 
58 static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
59 static void km_state_expired(struct xfrm_state *x, int hard);
60 
61 static void xfrm_state_gc_destroy(struct xfrm_state *x)
62 {
63 	if (del_timer(&x->timer))
64 		BUG();
65 	if (x->aalg)
66 		kfree(x->aalg);
67 	if (x->ealg)
68 		kfree(x->ealg);
69 	if (x->calg)
70 		kfree(x->calg);
71 	if (x->encap)
72 		kfree(x->encap);
73 	if (x->type) {
74 		x->type->destructor(x);
75 		xfrm_put_type(x->type);
76 	}
77 	kfree(x);
78 }
79 
80 static void xfrm_state_gc_task(void *data)
81 {
82 	struct xfrm_state *x;
83 	struct list_head *entry, *tmp;
84 	struct list_head gc_list = LIST_HEAD_INIT(gc_list);
85 
86 	if (xfrm_state_gc_flush_bundles) {
87 		xfrm_state_gc_flush_bundles = 0;
88 		xfrm_flush_bundles();
89 	}
90 
91 	spin_lock_bh(&xfrm_state_gc_lock);
92 	list_splice_init(&xfrm_state_gc_list, &gc_list);
93 	spin_unlock_bh(&xfrm_state_gc_lock);
94 
95 	list_for_each_safe(entry, tmp, &gc_list) {
96 		x = list_entry(entry, struct xfrm_state, bydst);
97 		xfrm_state_gc_destroy(x);
98 	}
99 	wake_up(&km_waitq);
100 }
101 
102 static inline unsigned long make_jiffies(long secs)
103 {
104 	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
105 		return MAX_SCHEDULE_TIMEOUT-1;
106 	else
107 	        return secs*HZ;
108 }
109 
110 static void xfrm_timer_handler(unsigned long data)
111 {
112 	struct xfrm_state *x = (struct xfrm_state*)data;
113 	unsigned long now = (unsigned long)xtime.tv_sec;
114 	long next = LONG_MAX;
115 	int warn = 0;
116 
117 	spin_lock(&x->lock);
118 	if (x->km.state == XFRM_STATE_DEAD)
119 		goto out;
120 	if (x->km.state == XFRM_STATE_EXPIRED)
121 		goto expired;
122 	if (x->lft.hard_add_expires_seconds) {
123 		long tmo = x->lft.hard_add_expires_seconds +
124 			x->curlft.add_time - now;
125 		if (tmo <= 0)
126 			goto expired;
127 		if (tmo < next)
128 			next = tmo;
129 	}
130 	if (x->lft.hard_use_expires_seconds) {
131 		long tmo = x->lft.hard_use_expires_seconds +
132 			(x->curlft.use_time ? : now) - now;
133 		if (tmo <= 0)
134 			goto expired;
135 		if (tmo < next)
136 			next = tmo;
137 	}
138 	if (x->km.dying)
139 		goto resched;
140 	if (x->lft.soft_add_expires_seconds) {
141 		long tmo = x->lft.soft_add_expires_seconds +
142 			x->curlft.add_time - now;
143 		if (tmo <= 0)
144 			warn = 1;
145 		else if (tmo < next)
146 			next = tmo;
147 	}
148 	if (x->lft.soft_use_expires_seconds) {
149 		long tmo = x->lft.soft_use_expires_seconds +
150 			(x->curlft.use_time ? : now) - now;
151 		if (tmo <= 0)
152 			warn = 1;
153 		else if (tmo < next)
154 			next = tmo;
155 	}
156 
157 	if (warn)
158 		km_state_expired(x, 0);
159 resched:
160 	if (next != LONG_MAX &&
161 	    !mod_timer(&x->timer, jiffies + make_jiffies(next)))
162 		xfrm_state_hold(x);
163 	goto out;
164 
165 expired:
166 	if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
167 		x->km.state = XFRM_STATE_EXPIRED;
168 		wake_up(&km_waitq);
169 		next = 2;
170 		goto resched;
171 	}
172 	if (x->id.spi != 0)
173 		km_state_expired(x, 1);
174 	__xfrm_state_delete(x);
175 
176 out:
177 	spin_unlock(&x->lock);
178 	xfrm_state_put(x);
179 }
180 
181 struct xfrm_state *xfrm_state_alloc(void)
182 {
183 	struct xfrm_state *x;
184 
185 	x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
186 
187 	if (x) {
188 		memset(x, 0, sizeof(struct xfrm_state));
189 		atomic_set(&x->refcnt, 1);
190 		atomic_set(&x->tunnel_users, 0);
191 		INIT_LIST_HEAD(&x->bydst);
192 		INIT_LIST_HEAD(&x->byspi);
193 		init_timer(&x->timer);
194 		x->timer.function = xfrm_timer_handler;
195 		x->timer.data	  = (unsigned long)x;
196 		x->curlft.add_time = (unsigned long)xtime.tv_sec;
197 		x->lft.soft_byte_limit = XFRM_INF;
198 		x->lft.soft_packet_limit = XFRM_INF;
199 		x->lft.hard_byte_limit = XFRM_INF;
200 		x->lft.hard_packet_limit = XFRM_INF;
201 		spin_lock_init(&x->lock);
202 	}
203 	return x;
204 }
205 EXPORT_SYMBOL(xfrm_state_alloc);
206 
207 void __xfrm_state_destroy(struct xfrm_state *x)
208 {
209 	BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
210 
211 	spin_lock_bh(&xfrm_state_gc_lock);
212 	list_add(&x->bydst, &xfrm_state_gc_list);
213 	spin_unlock_bh(&xfrm_state_gc_lock);
214 	schedule_work(&xfrm_state_gc_work);
215 }
216 EXPORT_SYMBOL(__xfrm_state_destroy);
217 
218 static void __xfrm_state_delete(struct xfrm_state *x)
219 {
220 	if (x->km.state != XFRM_STATE_DEAD) {
221 		x->km.state = XFRM_STATE_DEAD;
222 		spin_lock(&xfrm_state_lock);
223 		list_del(&x->bydst);
224 		atomic_dec(&x->refcnt);
225 		if (x->id.spi) {
226 			list_del(&x->byspi);
227 			atomic_dec(&x->refcnt);
228 		}
229 		spin_unlock(&xfrm_state_lock);
230 		if (del_timer(&x->timer))
231 			atomic_dec(&x->refcnt);
232 
233 		/* The number two in this test is the reference
234 		 * mentioned in the comment below plus the reference
235 		 * our caller holds.  A larger value means that
236 		 * there are DSTs attached to this xfrm_state.
237 		 */
238 		if (atomic_read(&x->refcnt) > 2) {
239 			xfrm_state_gc_flush_bundles = 1;
240 			schedule_work(&xfrm_state_gc_work);
241 		}
242 
243 		/* All xfrm_state objects are created by xfrm_state_alloc.
244 		 * The xfrm_state_alloc call gives a reference, and that
245 		 * is what we are dropping here.
246 		 */
247 		atomic_dec(&x->refcnt);
248 	}
249 }
250 
251 void xfrm_state_delete(struct xfrm_state *x)
252 {
253 	spin_lock_bh(&x->lock);
254 	__xfrm_state_delete(x);
255 	spin_unlock_bh(&x->lock);
256 }
257 EXPORT_SYMBOL(xfrm_state_delete);
258 
259 void xfrm_state_flush(u8 proto)
260 {
261 	int i;
262 	struct xfrm_state *x;
263 
264 	spin_lock_bh(&xfrm_state_lock);
265 	for (i = 0; i < XFRM_DST_HSIZE; i++) {
266 restart:
267 		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
268 			if (!xfrm_state_kern(x) &&
269 			    (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
270 				xfrm_state_hold(x);
271 				spin_unlock_bh(&xfrm_state_lock);
272 
273 				xfrm_state_delete(x);
274 				xfrm_state_put(x);
275 
276 				spin_lock_bh(&xfrm_state_lock);
277 				goto restart;
278 			}
279 		}
280 	}
281 	spin_unlock_bh(&xfrm_state_lock);
282 	wake_up(&km_waitq);
283 }
284 EXPORT_SYMBOL(xfrm_state_flush);
285 
286 static int
287 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
288 		  struct xfrm_tmpl *tmpl,
289 		  xfrm_address_t *daddr, xfrm_address_t *saddr,
290 		  unsigned short family)
291 {
292 	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
293 	if (!afinfo)
294 		return -1;
295 	afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
296 	xfrm_state_put_afinfo(afinfo);
297 	return 0;
298 }
299 
300 struct xfrm_state *
301 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
302 		struct flowi *fl, struct xfrm_tmpl *tmpl,
303 		struct xfrm_policy *pol, int *err,
304 		unsigned short family)
305 {
306 	unsigned h = xfrm_dst_hash(daddr, family);
307 	struct xfrm_state *x, *x0;
308 	int acquire_in_progress = 0;
309 	int error = 0;
310 	struct xfrm_state *best = NULL;
311 	struct xfrm_state_afinfo *afinfo;
312 
313 	afinfo = xfrm_state_get_afinfo(family);
314 	if (afinfo == NULL) {
315 		*err = -EAFNOSUPPORT;
316 		return NULL;
317 	}
318 
319 	spin_lock_bh(&xfrm_state_lock);
320 	list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
321 		if (x->props.family == family &&
322 		    x->props.reqid == tmpl->reqid &&
323 		    xfrm_state_addr_check(x, daddr, saddr, family) &&
324 		    tmpl->mode == x->props.mode &&
325 		    tmpl->id.proto == x->id.proto &&
326 		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
327 			/* Resolution logic:
328 			   1. There is a valid state with matching selector.
329 			      Done.
330 			   2. Valid state with inappropriate selector. Skip.
331 
332 			   Entering area of "sysdeps".
333 
334 			   3. If state is not valid, selector is temporary,
335 			      it selects only session which triggered
336 			      previous resolution. Key manager will do
337 			      something to install a state with proper
338 			      selector.
339 			 */
340 			if (x->km.state == XFRM_STATE_VALID) {
341 				if (!xfrm_selector_match(&x->sel, fl, family))
342 					continue;
343 				if (!best ||
344 				    best->km.dying > x->km.dying ||
345 				    (best->km.dying == x->km.dying &&
346 				     best->curlft.add_time < x->curlft.add_time))
347 					best = x;
348 			} else if (x->km.state == XFRM_STATE_ACQ) {
349 				acquire_in_progress = 1;
350 			} else if (x->km.state == XFRM_STATE_ERROR ||
351 				   x->km.state == XFRM_STATE_EXPIRED) {
352 				if (xfrm_selector_match(&x->sel, fl, family))
353 					error = -ESRCH;
354 			}
355 		}
356 	}
357 
358 	x = best;
359 	if (!x && !error && !acquire_in_progress) {
360 		if (tmpl->id.spi &&
361 		    (x0 = afinfo->state_lookup(daddr, tmpl->id.spi,
362 		                               tmpl->id.proto)) != NULL) {
363 			xfrm_state_put(x0);
364 			error = -EEXIST;
365 			goto out;
366 		}
367 		x = xfrm_state_alloc();
368 		if (x == NULL) {
369 			error = -ENOMEM;
370 			goto out;
371 		}
372 		/* Initialize temporary selector matching only
373 		 * to current session. */
374 		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
375 
376 		if (km_query(x, tmpl, pol) == 0) {
377 			x->km.state = XFRM_STATE_ACQ;
378 			list_add_tail(&x->bydst, xfrm_state_bydst+h);
379 			xfrm_state_hold(x);
380 			if (x->id.spi) {
381 				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
382 				list_add(&x->byspi, xfrm_state_byspi+h);
383 				xfrm_state_hold(x);
384 			}
385 			x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
386 			xfrm_state_hold(x);
387 			x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
388 			add_timer(&x->timer);
389 		} else {
390 			x->km.state = XFRM_STATE_DEAD;
391 			xfrm_state_put(x);
392 			x = NULL;
393 			error = -ESRCH;
394 		}
395 	}
396 out:
397 	if (x)
398 		xfrm_state_hold(x);
399 	else
400 		*err = acquire_in_progress ? -EAGAIN : error;
401 	spin_unlock_bh(&xfrm_state_lock);
402 	xfrm_state_put_afinfo(afinfo);
403 	return x;
404 }
405 
406 static void __xfrm_state_insert(struct xfrm_state *x)
407 {
408 	unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
409 
410 	list_add(&x->bydst, xfrm_state_bydst+h);
411 	xfrm_state_hold(x);
412 
413 	h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
414 
415 	list_add(&x->byspi, xfrm_state_byspi+h);
416 	xfrm_state_hold(x);
417 
418 	if (!mod_timer(&x->timer, jiffies + HZ))
419 		xfrm_state_hold(x);
420 
421 	wake_up(&km_waitq);
422 }
423 
424 void xfrm_state_insert(struct xfrm_state *x)
425 {
426 	spin_lock_bh(&xfrm_state_lock);
427 	__xfrm_state_insert(x);
428 	spin_unlock_bh(&xfrm_state_lock);
429 }
430 EXPORT_SYMBOL(xfrm_state_insert);
431 
432 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
433 
434 int xfrm_state_add(struct xfrm_state *x)
435 {
436 	struct xfrm_state_afinfo *afinfo;
437 	struct xfrm_state *x1;
438 	int family;
439 	int err;
440 
441 	family = x->props.family;
442 	afinfo = xfrm_state_get_afinfo(family);
443 	if (unlikely(afinfo == NULL))
444 		return -EAFNOSUPPORT;
445 
446 	spin_lock_bh(&xfrm_state_lock);
447 
448 	x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
449 	if (x1) {
450 		xfrm_state_put(x1);
451 		x1 = NULL;
452 		err = -EEXIST;
453 		goto out;
454 	}
455 
456 	if (x->km.seq) {
457 		x1 = __xfrm_find_acq_byseq(x->km.seq);
458 		if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
459 			xfrm_state_put(x1);
460 			x1 = NULL;
461 		}
462 	}
463 
464 	if (!x1)
465 		x1 = afinfo->find_acq(
466 			x->props.mode, x->props.reqid, x->id.proto,
467 			&x->id.daddr, &x->props.saddr, 0);
468 
469 	__xfrm_state_insert(x);
470 	err = 0;
471 
472 out:
473 	spin_unlock_bh(&xfrm_state_lock);
474 	xfrm_state_put_afinfo(afinfo);
475 
476 	if (x1) {
477 		xfrm_state_delete(x1);
478 		xfrm_state_put(x1);
479 	}
480 
481 	return err;
482 }
483 EXPORT_SYMBOL(xfrm_state_add);
484 
485 int xfrm_state_update(struct xfrm_state *x)
486 {
487 	struct xfrm_state_afinfo *afinfo;
488 	struct xfrm_state *x1;
489 	int err;
490 
491 	afinfo = xfrm_state_get_afinfo(x->props.family);
492 	if (unlikely(afinfo == NULL))
493 		return -EAFNOSUPPORT;
494 
495 	spin_lock_bh(&xfrm_state_lock);
496 	x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
497 
498 	err = -ESRCH;
499 	if (!x1)
500 		goto out;
501 
502 	if (xfrm_state_kern(x1)) {
503 		xfrm_state_put(x1);
504 		err = -EEXIST;
505 		goto out;
506 	}
507 
508 	if (x1->km.state == XFRM_STATE_ACQ) {
509 		__xfrm_state_insert(x);
510 		x = NULL;
511 	}
512 	err = 0;
513 
514 out:
515 	spin_unlock_bh(&xfrm_state_lock);
516 	xfrm_state_put_afinfo(afinfo);
517 
518 	if (err)
519 		return err;
520 
521 	if (!x) {
522 		xfrm_state_delete(x1);
523 		xfrm_state_put(x1);
524 		return 0;
525 	}
526 
527 	err = -EINVAL;
528 	spin_lock_bh(&x1->lock);
529 	if (likely(x1->km.state == XFRM_STATE_VALID)) {
530 		if (x->encap && x1->encap)
531 			memcpy(x1->encap, x->encap, sizeof(*x1->encap));
532 		memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
533 		x1->km.dying = 0;
534 
535 		if (!mod_timer(&x1->timer, jiffies + HZ))
536 			xfrm_state_hold(x1);
537 		if (x1->curlft.use_time)
538 			xfrm_state_check_expire(x1);
539 
540 		err = 0;
541 	}
542 	spin_unlock_bh(&x1->lock);
543 
544 	xfrm_state_put(x1);
545 
546 	return err;
547 }
548 EXPORT_SYMBOL(xfrm_state_update);
549 
550 int xfrm_state_check_expire(struct xfrm_state *x)
551 {
552 	if (!x->curlft.use_time)
553 		x->curlft.use_time = (unsigned long)xtime.tv_sec;
554 
555 	if (x->km.state != XFRM_STATE_VALID)
556 		return -EINVAL;
557 
558 	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
559 	    x->curlft.packets >= x->lft.hard_packet_limit) {
560 		km_state_expired(x, 1);
561 		if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ))
562 			xfrm_state_hold(x);
563 		return -EINVAL;
564 	}
565 
566 	if (!x->km.dying &&
567 	    (x->curlft.bytes >= x->lft.soft_byte_limit ||
568 	     x->curlft.packets >= x->lft.soft_packet_limit))
569 		km_state_expired(x, 0);
570 	return 0;
571 }
572 EXPORT_SYMBOL(xfrm_state_check_expire);
573 
574 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
575 {
576 	int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
577 		- skb_headroom(skb);
578 
579 	if (nhead > 0)
580 		return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
581 
582 	/* Check tail too... */
583 	return 0;
584 }
585 
586 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
587 {
588 	int err = xfrm_state_check_expire(x);
589 	if (err < 0)
590 		goto err;
591 	err = xfrm_state_check_space(x, skb);
592 err:
593 	return err;
594 }
595 EXPORT_SYMBOL(xfrm_state_check);
596 
597 struct xfrm_state *
598 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
599 		  unsigned short family)
600 {
601 	struct xfrm_state *x;
602 	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
603 	if (!afinfo)
604 		return NULL;
605 
606 	spin_lock_bh(&xfrm_state_lock);
607 	x = afinfo->state_lookup(daddr, spi, proto);
608 	spin_unlock_bh(&xfrm_state_lock);
609 	xfrm_state_put_afinfo(afinfo);
610 	return x;
611 }
612 EXPORT_SYMBOL(xfrm_state_lookup);
613 
614 struct xfrm_state *
615 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
616 	      xfrm_address_t *daddr, xfrm_address_t *saddr,
617 	      int create, unsigned short family)
618 {
619 	struct xfrm_state *x;
620 	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
621 	if (!afinfo)
622 		return NULL;
623 
624 	spin_lock_bh(&xfrm_state_lock);
625 	x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
626 	spin_unlock_bh(&xfrm_state_lock);
627 	xfrm_state_put_afinfo(afinfo);
628 	return x;
629 }
630 EXPORT_SYMBOL(xfrm_find_acq);
631 
632 /* Silly enough, but I'm lazy to build resolution list */
633 
634 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
635 {
636 	int i;
637 	struct xfrm_state *x;
638 
639 	for (i = 0; i < XFRM_DST_HSIZE; i++) {
640 		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
641 			if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) {
642 				xfrm_state_hold(x);
643 				return x;
644 			}
645 		}
646 	}
647 	return NULL;
648 }
649 
650 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
651 {
652 	struct xfrm_state *x;
653 
654 	spin_lock_bh(&xfrm_state_lock);
655 	x = __xfrm_find_acq_byseq(seq);
656 	spin_unlock_bh(&xfrm_state_lock);
657 	return x;
658 }
659 EXPORT_SYMBOL(xfrm_find_acq_byseq);
660 
661 u32 xfrm_get_acqseq(void)
662 {
663 	u32 res;
664 	static u32 acqseq;
665 	static DEFINE_SPINLOCK(acqseq_lock);
666 
667 	spin_lock_bh(&acqseq_lock);
668 	res = (++acqseq ? : ++acqseq);
669 	spin_unlock_bh(&acqseq_lock);
670 	return res;
671 }
672 EXPORT_SYMBOL(xfrm_get_acqseq);
673 
674 void
675 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
676 {
677 	u32 h;
678 	struct xfrm_state *x0;
679 
680 	if (x->id.spi)
681 		return;
682 
683 	if (minspi == maxspi) {
684 		x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
685 		if (x0) {
686 			xfrm_state_put(x0);
687 			return;
688 		}
689 		x->id.spi = minspi;
690 	} else {
691 		u32 spi = 0;
692 		minspi = ntohl(minspi);
693 		maxspi = ntohl(maxspi);
694 		for (h=0; h<maxspi-minspi+1; h++) {
695 			spi = minspi + net_random()%(maxspi-minspi+1);
696 			x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
697 			if (x0 == NULL) {
698 				x->id.spi = htonl(spi);
699 				break;
700 			}
701 			xfrm_state_put(x0);
702 		}
703 	}
704 	if (x->id.spi) {
705 		spin_lock_bh(&xfrm_state_lock);
706 		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
707 		list_add(&x->byspi, xfrm_state_byspi+h);
708 		xfrm_state_hold(x);
709 		spin_unlock_bh(&xfrm_state_lock);
710 		wake_up(&km_waitq);
711 	}
712 }
713 EXPORT_SYMBOL(xfrm_alloc_spi);
714 
715 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
716 		    void *data)
717 {
718 	int i;
719 	struct xfrm_state *x;
720 	int count = 0;
721 	int err = 0;
722 
723 	spin_lock_bh(&xfrm_state_lock);
724 	for (i = 0; i < XFRM_DST_HSIZE; i++) {
725 		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
726 			if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
727 				count++;
728 		}
729 	}
730 	if (count == 0) {
731 		err = -ENOENT;
732 		goto out;
733 	}
734 
735 	for (i = 0; i < XFRM_DST_HSIZE; i++) {
736 		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
737 			if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
738 				continue;
739 			err = func(x, --count, data);
740 			if (err)
741 				goto out;
742 		}
743 	}
744 out:
745 	spin_unlock_bh(&xfrm_state_lock);
746 	return err;
747 }
748 EXPORT_SYMBOL(xfrm_state_walk);
749 
750 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
751 {
752 	u32 diff;
753 
754 	seq = ntohl(seq);
755 
756 	if (unlikely(seq == 0))
757 		return -EINVAL;
758 
759 	if (likely(seq > x->replay.seq))
760 		return 0;
761 
762 	diff = x->replay.seq - seq;
763 	if (diff >= x->props.replay_window) {
764 		x->stats.replay_window++;
765 		return -EINVAL;
766 	}
767 
768 	if (x->replay.bitmap & (1U << diff)) {
769 		x->stats.replay++;
770 		return -EINVAL;
771 	}
772 	return 0;
773 }
774 EXPORT_SYMBOL(xfrm_replay_check);
775 
776 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
777 {
778 	u32 diff;
779 
780 	seq = ntohl(seq);
781 
782 	if (seq > x->replay.seq) {
783 		diff = seq - x->replay.seq;
784 		if (diff < x->props.replay_window)
785 			x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
786 		else
787 			x->replay.bitmap = 1;
788 		x->replay.seq = seq;
789 	} else {
790 		diff = x->replay.seq - seq;
791 		x->replay.bitmap |= (1U << diff);
792 	}
793 }
794 EXPORT_SYMBOL(xfrm_replay_advance);
795 
796 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
797 static DEFINE_RWLOCK(xfrm_km_lock);
798 
799 static void km_state_expired(struct xfrm_state *x, int hard)
800 {
801 	struct xfrm_mgr *km;
802 
803 	if (hard)
804 		x->km.state = XFRM_STATE_EXPIRED;
805 	else
806 		x->km.dying = 1;
807 
808 	read_lock(&xfrm_km_lock);
809 	list_for_each_entry(km, &xfrm_km_list, list)
810 		km->notify(x, hard);
811 	read_unlock(&xfrm_km_lock);
812 
813 	if (hard)
814 		wake_up(&km_waitq);
815 }
816 
817 static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
818 {
819 	int err = -EINVAL;
820 	struct xfrm_mgr *km;
821 
822 	read_lock(&xfrm_km_lock);
823 	list_for_each_entry(km, &xfrm_km_list, list) {
824 		err = km->acquire(x, t, pol, XFRM_POLICY_OUT);
825 		if (!err)
826 			break;
827 	}
828 	read_unlock(&xfrm_km_lock);
829 	return err;
830 }
831 
832 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
833 {
834 	int err = -EINVAL;
835 	struct xfrm_mgr *km;
836 
837 	read_lock(&xfrm_km_lock);
838 	list_for_each_entry(km, &xfrm_km_list, list) {
839 		if (km->new_mapping)
840 			err = km->new_mapping(x, ipaddr, sport);
841 		if (!err)
842 			break;
843 	}
844 	read_unlock(&xfrm_km_lock);
845 	return err;
846 }
847 EXPORT_SYMBOL(km_new_mapping);
848 
849 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard)
850 {
851 	struct xfrm_mgr *km;
852 
853 	read_lock(&xfrm_km_lock);
854 	list_for_each_entry(km, &xfrm_km_list, list)
855 		if (km->notify_policy)
856 			km->notify_policy(pol, dir, hard);
857 	read_unlock(&xfrm_km_lock);
858 
859 	if (hard)
860 		wake_up(&km_waitq);
861 }
862 
863 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
864 {
865 	int err;
866 	u8 *data;
867 	struct xfrm_mgr *km;
868 	struct xfrm_policy *pol = NULL;
869 
870 	if (optlen <= 0 || optlen > PAGE_SIZE)
871 		return -EMSGSIZE;
872 
873 	data = kmalloc(optlen, GFP_KERNEL);
874 	if (!data)
875 		return -ENOMEM;
876 
877 	err = -EFAULT;
878 	if (copy_from_user(data, optval, optlen))
879 		goto out;
880 
881 	err = -EINVAL;
882 	read_lock(&xfrm_km_lock);
883 	list_for_each_entry(km, &xfrm_km_list, list) {
884 		pol = km->compile_policy(sk->sk_family, optname, data,
885 					 optlen, &err);
886 		if (err >= 0)
887 			break;
888 	}
889 	read_unlock(&xfrm_km_lock);
890 
891 	if (err >= 0) {
892 		xfrm_sk_policy_insert(sk, err, pol);
893 		xfrm_pol_put(pol);
894 		err = 0;
895 	}
896 
897 out:
898 	kfree(data);
899 	return err;
900 }
901 EXPORT_SYMBOL(xfrm_user_policy);
902 
903 int xfrm_register_km(struct xfrm_mgr *km)
904 {
905 	write_lock_bh(&xfrm_km_lock);
906 	list_add_tail(&km->list, &xfrm_km_list);
907 	write_unlock_bh(&xfrm_km_lock);
908 	return 0;
909 }
910 EXPORT_SYMBOL(xfrm_register_km);
911 
912 int xfrm_unregister_km(struct xfrm_mgr *km)
913 {
914 	write_lock_bh(&xfrm_km_lock);
915 	list_del(&km->list);
916 	write_unlock_bh(&xfrm_km_lock);
917 	return 0;
918 }
919 EXPORT_SYMBOL(xfrm_unregister_km);
920 
921 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
922 {
923 	int err = 0;
924 	if (unlikely(afinfo == NULL))
925 		return -EINVAL;
926 	if (unlikely(afinfo->family >= NPROTO))
927 		return -EAFNOSUPPORT;
928 	write_lock(&xfrm_state_afinfo_lock);
929 	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
930 		err = -ENOBUFS;
931 	else {
932 		afinfo->state_bydst = xfrm_state_bydst;
933 		afinfo->state_byspi = xfrm_state_byspi;
934 		xfrm_state_afinfo[afinfo->family] = afinfo;
935 	}
936 	write_unlock(&xfrm_state_afinfo_lock);
937 	return err;
938 }
939 EXPORT_SYMBOL(xfrm_state_register_afinfo);
940 
941 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
942 {
943 	int err = 0;
944 	if (unlikely(afinfo == NULL))
945 		return -EINVAL;
946 	if (unlikely(afinfo->family >= NPROTO))
947 		return -EAFNOSUPPORT;
948 	write_lock(&xfrm_state_afinfo_lock);
949 	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
950 		if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
951 			err = -EINVAL;
952 		else {
953 			xfrm_state_afinfo[afinfo->family] = NULL;
954 			afinfo->state_byspi = NULL;
955 			afinfo->state_bydst = NULL;
956 		}
957 	}
958 	write_unlock(&xfrm_state_afinfo_lock);
959 	return err;
960 }
961 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
962 
963 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
964 {
965 	struct xfrm_state_afinfo *afinfo;
966 	if (unlikely(family >= NPROTO))
967 		return NULL;
968 	read_lock(&xfrm_state_afinfo_lock);
969 	afinfo = xfrm_state_afinfo[family];
970 	if (likely(afinfo != NULL))
971 		read_lock(&afinfo->lock);
972 	read_unlock(&xfrm_state_afinfo_lock);
973 	return afinfo;
974 }
975 
976 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
977 {
978 	if (unlikely(afinfo == NULL))
979 		return;
980 	read_unlock(&afinfo->lock);
981 }
982 
983 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
984 void xfrm_state_delete_tunnel(struct xfrm_state *x)
985 {
986 	if (x->tunnel) {
987 		struct xfrm_state *t = x->tunnel;
988 
989 		if (atomic_read(&t->tunnel_users) == 2)
990 			xfrm_state_delete(t);
991 		atomic_dec(&t->tunnel_users);
992 		xfrm_state_put(t);
993 		x->tunnel = NULL;
994 	}
995 }
996 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
997 
998 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
999 {
1000 	int res = mtu;
1001 
1002 	res -= x->props.header_len;
1003 
1004 	for (;;) {
1005 		int m = res;
1006 
1007 		if (m < 68)
1008 			return 68;
1009 
1010 		spin_lock_bh(&x->lock);
1011 		if (x->km.state == XFRM_STATE_VALID &&
1012 		    x->type && x->type->get_max_size)
1013 			m = x->type->get_max_size(x, m);
1014 		else
1015 			m += x->props.header_len;
1016 		spin_unlock_bh(&x->lock);
1017 
1018 		if (m <= mtu)
1019 			break;
1020 		res -= (m - mtu);
1021 	}
1022 
1023 	return res;
1024 }
1025 
1026 EXPORT_SYMBOL(xfrm_state_mtu);
1027 
1028 void __init xfrm_state_init(void)
1029 {
1030 	int i;
1031 
1032 	for (i=0; i<XFRM_DST_HSIZE; i++) {
1033 		INIT_LIST_HEAD(&xfrm_state_bydst[i]);
1034 		INIT_LIST_HEAD(&xfrm_state_byspi[i]);
1035 	}
1036 	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
1037 }
1038 
1039