xref: /openbmc/linux/net/xfrm/xfrm_state.c (revision 87c2ce3b)
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *	Mitsuru KANDA @USAGI
6  * 	Kazunori MIYAZAWA @USAGI
7  * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  * 		IPv6 support
9  * 	YOSHIFUJI Hideaki @USAGI
10  * 		Split up af-specific functions
11  *	Derek Atkins <derek@ihtfp.com>
12  *		Add UDP Encapsulation
13  *
14  */
15 
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <asm/uaccess.h>
22 
23 /* Each xfrm_state may be linked to two tables:
24 
25    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
26    2. Hash table by daddr to find what SAs exist for given
27       destination/tunnel endpoint. (output)
28  */
29 
30 static DEFINE_SPINLOCK(xfrm_state_lock);
31 
32 /* Hash table to find appropriate SA towards given target (endpoint
33  * of tunnel or destination of transport mode) allowed by selector.
34  *
35  * Main use is finding SA after policy selected tunnel or transport mode.
36  * Also, it can be used by ah/esp icmp error handler to find offending SA.
37  */
38 static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
39 static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
40 
41 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
42 EXPORT_SYMBOL(km_waitq);
43 
44 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
45 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
46 
47 static struct work_struct xfrm_state_gc_work;
48 static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
49 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
50 
51 static int xfrm_state_gc_flush_bundles;
52 
53 static int __xfrm_state_delete(struct xfrm_state *x);
54 
55 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
56 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
57 
58 static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
59 static void km_state_expired(struct xfrm_state *x, int hard);
60 
61 static void xfrm_state_gc_destroy(struct xfrm_state *x)
62 {
63 	if (del_timer(&x->timer))
64 		BUG();
65 	kfree(x->aalg);
66 	kfree(x->ealg);
67 	kfree(x->calg);
68 	kfree(x->encap);
69 	if (x->type) {
70 		x->type->destructor(x);
71 		xfrm_put_type(x->type);
72 	}
73 	security_xfrm_state_free(x);
74 	kfree(x);
75 }
76 
77 static void xfrm_state_gc_task(void *data)
78 {
79 	struct xfrm_state *x;
80 	struct list_head *entry, *tmp;
81 	struct list_head gc_list = LIST_HEAD_INIT(gc_list);
82 
83 	if (xfrm_state_gc_flush_bundles) {
84 		xfrm_state_gc_flush_bundles = 0;
85 		xfrm_flush_bundles();
86 	}
87 
88 	spin_lock_bh(&xfrm_state_gc_lock);
89 	list_splice_init(&xfrm_state_gc_list, &gc_list);
90 	spin_unlock_bh(&xfrm_state_gc_lock);
91 
92 	list_for_each_safe(entry, tmp, &gc_list) {
93 		x = list_entry(entry, struct xfrm_state, bydst);
94 		xfrm_state_gc_destroy(x);
95 	}
96 	wake_up(&km_waitq);
97 }
98 
99 static inline unsigned long make_jiffies(long secs)
100 {
101 	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
102 		return MAX_SCHEDULE_TIMEOUT-1;
103 	else
104 	        return secs*HZ;
105 }
106 
107 static void xfrm_timer_handler(unsigned long data)
108 {
109 	struct xfrm_state *x = (struct xfrm_state*)data;
110 	unsigned long now = (unsigned long)xtime.tv_sec;
111 	long next = LONG_MAX;
112 	int warn = 0;
113 
114 	spin_lock(&x->lock);
115 	if (x->km.state == XFRM_STATE_DEAD)
116 		goto out;
117 	if (x->km.state == XFRM_STATE_EXPIRED)
118 		goto expired;
119 	if (x->lft.hard_add_expires_seconds) {
120 		long tmo = x->lft.hard_add_expires_seconds +
121 			x->curlft.add_time - now;
122 		if (tmo <= 0)
123 			goto expired;
124 		if (tmo < next)
125 			next = tmo;
126 	}
127 	if (x->lft.hard_use_expires_seconds) {
128 		long tmo = x->lft.hard_use_expires_seconds +
129 			(x->curlft.use_time ? : now) - now;
130 		if (tmo <= 0)
131 			goto expired;
132 		if (tmo < next)
133 			next = tmo;
134 	}
135 	if (x->km.dying)
136 		goto resched;
137 	if (x->lft.soft_add_expires_seconds) {
138 		long tmo = x->lft.soft_add_expires_seconds +
139 			x->curlft.add_time - now;
140 		if (tmo <= 0)
141 			warn = 1;
142 		else if (tmo < next)
143 			next = tmo;
144 	}
145 	if (x->lft.soft_use_expires_seconds) {
146 		long tmo = x->lft.soft_use_expires_seconds +
147 			(x->curlft.use_time ? : now) - now;
148 		if (tmo <= 0)
149 			warn = 1;
150 		else if (tmo < next)
151 			next = tmo;
152 	}
153 
154 	x->km.dying = warn;
155 	if (warn)
156 		km_state_expired(x, 0);
157 resched:
158 	if (next != LONG_MAX &&
159 	    !mod_timer(&x->timer, jiffies + make_jiffies(next)))
160 		xfrm_state_hold(x);
161 	goto out;
162 
163 expired:
164 	if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
165 		x->km.state = XFRM_STATE_EXPIRED;
166 		wake_up(&km_waitq);
167 		next = 2;
168 		goto resched;
169 	}
170 	if (!__xfrm_state_delete(x) && x->id.spi)
171 		km_state_expired(x, 1);
172 
173 out:
174 	spin_unlock(&x->lock);
175 	xfrm_state_put(x);
176 }
177 
178 struct xfrm_state *xfrm_state_alloc(void)
179 {
180 	struct xfrm_state *x;
181 
182 	x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
183 
184 	if (x) {
185 		memset(x, 0, sizeof(struct xfrm_state));
186 		atomic_set(&x->refcnt, 1);
187 		atomic_set(&x->tunnel_users, 0);
188 		INIT_LIST_HEAD(&x->bydst);
189 		INIT_LIST_HEAD(&x->byspi);
190 		init_timer(&x->timer);
191 		x->timer.function = xfrm_timer_handler;
192 		x->timer.data	  = (unsigned long)x;
193 		x->curlft.add_time = (unsigned long)xtime.tv_sec;
194 		x->lft.soft_byte_limit = XFRM_INF;
195 		x->lft.soft_packet_limit = XFRM_INF;
196 		x->lft.hard_byte_limit = XFRM_INF;
197 		x->lft.hard_packet_limit = XFRM_INF;
198 		spin_lock_init(&x->lock);
199 	}
200 	return x;
201 }
202 EXPORT_SYMBOL(xfrm_state_alloc);
203 
204 void __xfrm_state_destroy(struct xfrm_state *x)
205 {
206 	BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
207 
208 	spin_lock_bh(&xfrm_state_gc_lock);
209 	list_add(&x->bydst, &xfrm_state_gc_list);
210 	spin_unlock_bh(&xfrm_state_gc_lock);
211 	schedule_work(&xfrm_state_gc_work);
212 }
213 EXPORT_SYMBOL(__xfrm_state_destroy);
214 
215 static int __xfrm_state_delete(struct xfrm_state *x)
216 {
217 	int err = -ESRCH;
218 
219 	if (x->km.state != XFRM_STATE_DEAD) {
220 		x->km.state = XFRM_STATE_DEAD;
221 		spin_lock(&xfrm_state_lock);
222 		list_del(&x->bydst);
223 		atomic_dec(&x->refcnt);
224 		if (x->id.spi) {
225 			list_del(&x->byspi);
226 			atomic_dec(&x->refcnt);
227 		}
228 		spin_unlock(&xfrm_state_lock);
229 		if (del_timer(&x->timer))
230 			atomic_dec(&x->refcnt);
231 
232 		/* The number two in this test is the reference
233 		 * mentioned in the comment below plus the reference
234 		 * our caller holds.  A larger value means that
235 		 * there are DSTs attached to this xfrm_state.
236 		 */
237 		if (atomic_read(&x->refcnt) > 2) {
238 			xfrm_state_gc_flush_bundles = 1;
239 			schedule_work(&xfrm_state_gc_work);
240 		}
241 
242 		/* All xfrm_state objects are created by xfrm_state_alloc.
243 		 * The xfrm_state_alloc call gives a reference, and that
244 		 * is what we are dropping here.
245 		 */
246 		atomic_dec(&x->refcnt);
247 		err = 0;
248 	}
249 
250 	return err;
251 }
252 
253 int xfrm_state_delete(struct xfrm_state *x)
254 {
255 	int err;
256 
257 	spin_lock_bh(&x->lock);
258 	err = __xfrm_state_delete(x);
259 	spin_unlock_bh(&x->lock);
260 
261 	return err;
262 }
263 EXPORT_SYMBOL(xfrm_state_delete);
264 
265 void xfrm_state_flush(u8 proto)
266 {
267 	int i;
268 	struct xfrm_state *x;
269 
270 	spin_lock_bh(&xfrm_state_lock);
271 	for (i = 0; i < XFRM_DST_HSIZE; i++) {
272 restart:
273 		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
274 			if (!xfrm_state_kern(x) &&
275 			    (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
276 				xfrm_state_hold(x);
277 				spin_unlock_bh(&xfrm_state_lock);
278 
279 				xfrm_state_delete(x);
280 				xfrm_state_put(x);
281 
282 				spin_lock_bh(&xfrm_state_lock);
283 				goto restart;
284 			}
285 		}
286 	}
287 	spin_unlock_bh(&xfrm_state_lock);
288 	wake_up(&km_waitq);
289 }
290 EXPORT_SYMBOL(xfrm_state_flush);
291 
292 static int
293 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
294 		  struct xfrm_tmpl *tmpl,
295 		  xfrm_address_t *daddr, xfrm_address_t *saddr,
296 		  unsigned short family)
297 {
298 	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
299 	if (!afinfo)
300 		return -1;
301 	afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
302 	xfrm_state_put_afinfo(afinfo);
303 	return 0;
304 }
305 
306 struct xfrm_state *
307 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
308 		struct flowi *fl, struct xfrm_tmpl *tmpl,
309 		struct xfrm_policy *pol, int *err,
310 		unsigned short family)
311 {
312 	unsigned h = xfrm_dst_hash(daddr, family);
313 	struct xfrm_state *x, *x0;
314 	int acquire_in_progress = 0;
315 	int error = 0;
316 	struct xfrm_state *best = NULL;
317 	struct xfrm_state_afinfo *afinfo;
318 
319 	afinfo = xfrm_state_get_afinfo(family);
320 	if (afinfo == NULL) {
321 		*err = -EAFNOSUPPORT;
322 		return NULL;
323 	}
324 
325 	spin_lock_bh(&xfrm_state_lock);
326 	list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
327 		if (x->props.family == family &&
328 		    x->props.reqid == tmpl->reqid &&
329 		    xfrm_state_addr_check(x, daddr, saddr, family) &&
330 		    tmpl->mode == x->props.mode &&
331 		    tmpl->id.proto == x->id.proto &&
332 		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
333 			/* Resolution logic:
334 			   1. There is a valid state with matching selector.
335 			      Done.
336 			   2. Valid state with inappropriate selector. Skip.
337 
338 			   Entering area of "sysdeps".
339 
340 			   3. If state is not valid, selector is temporary,
341 			      it selects only session which triggered
342 			      previous resolution. Key manager will do
343 			      something to install a state with proper
344 			      selector.
345 			 */
346 			if (x->km.state == XFRM_STATE_VALID) {
347 				if (!xfrm_selector_match(&x->sel, fl, family) ||
348 				    !xfrm_sec_ctx_match(pol->security, x->security))
349 					continue;
350 				if (!best ||
351 				    best->km.dying > x->km.dying ||
352 				    (best->km.dying == x->km.dying &&
353 				     best->curlft.add_time < x->curlft.add_time))
354 					best = x;
355 			} else if (x->km.state == XFRM_STATE_ACQ) {
356 				acquire_in_progress = 1;
357 			} else if (x->km.state == XFRM_STATE_ERROR ||
358 				   x->km.state == XFRM_STATE_EXPIRED) {
359  				if (xfrm_selector_match(&x->sel, fl, family) &&
360 				    xfrm_sec_ctx_match(pol->security, x->security))
361 					error = -ESRCH;
362 			}
363 		}
364 	}
365 
366 	x = best;
367 	if (!x && !error && !acquire_in_progress) {
368 		if (tmpl->id.spi &&
369 		    (x0 = afinfo->state_lookup(daddr, tmpl->id.spi,
370 		                               tmpl->id.proto)) != NULL) {
371 			xfrm_state_put(x0);
372 			error = -EEXIST;
373 			goto out;
374 		}
375 		x = xfrm_state_alloc();
376 		if (x == NULL) {
377 			error = -ENOMEM;
378 			goto out;
379 		}
380 		/* Initialize temporary selector matching only
381 		 * to current session. */
382 		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
383 
384 		if (km_query(x, tmpl, pol) == 0) {
385 			x->km.state = XFRM_STATE_ACQ;
386 			list_add_tail(&x->bydst, xfrm_state_bydst+h);
387 			xfrm_state_hold(x);
388 			if (x->id.spi) {
389 				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
390 				list_add(&x->byspi, xfrm_state_byspi+h);
391 				xfrm_state_hold(x);
392 			}
393 			x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
394 			xfrm_state_hold(x);
395 			x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
396 			add_timer(&x->timer);
397 		} else {
398 			x->km.state = XFRM_STATE_DEAD;
399 			xfrm_state_put(x);
400 			x = NULL;
401 			error = -ESRCH;
402 		}
403 	}
404 out:
405 	if (x)
406 		xfrm_state_hold(x);
407 	else
408 		*err = acquire_in_progress ? -EAGAIN : error;
409 	spin_unlock_bh(&xfrm_state_lock);
410 	xfrm_state_put_afinfo(afinfo);
411 	return x;
412 }
413 
414 static void __xfrm_state_insert(struct xfrm_state *x)
415 {
416 	unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
417 
418 	list_add(&x->bydst, xfrm_state_bydst+h);
419 	xfrm_state_hold(x);
420 
421 	h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
422 
423 	list_add(&x->byspi, xfrm_state_byspi+h);
424 	xfrm_state_hold(x);
425 
426 	if (!mod_timer(&x->timer, jiffies + HZ))
427 		xfrm_state_hold(x);
428 
429 	wake_up(&km_waitq);
430 }
431 
432 void xfrm_state_insert(struct xfrm_state *x)
433 {
434 	spin_lock_bh(&xfrm_state_lock);
435 	__xfrm_state_insert(x);
436 	spin_unlock_bh(&xfrm_state_lock);
437 
438 	xfrm_flush_all_bundles();
439 }
440 EXPORT_SYMBOL(xfrm_state_insert);
441 
442 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
443 
444 int xfrm_state_add(struct xfrm_state *x)
445 {
446 	struct xfrm_state_afinfo *afinfo;
447 	struct xfrm_state *x1;
448 	int family;
449 	int err;
450 
451 	family = x->props.family;
452 	afinfo = xfrm_state_get_afinfo(family);
453 	if (unlikely(afinfo == NULL))
454 		return -EAFNOSUPPORT;
455 
456 	spin_lock_bh(&xfrm_state_lock);
457 
458 	x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
459 	if (x1) {
460 		xfrm_state_put(x1);
461 		x1 = NULL;
462 		err = -EEXIST;
463 		goto out;
464 	}
465 
466 	if (x->km.seq) {
467 		x1 = __xfrm_find_acq_byseq(x->km.seq);
468 		if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
469 			xfrm_state_put(x1);
470 			x1 = NULL;
471 		}
472 	}
473 
474 	if (!x1)
475 		x1 = afinfo->find_acq(
476 			x->props.mode, x->props.reqid, x->id.proto,
477 			&x->id.daddr, &x->props.saddr, 0);
478 
479 	__xfrm_state_insert(x);
480 	err = 0;
481 
482 out:
483 	spin_unlock_bh(&xfrm_state_lock);
484 	xfrm_state_put_afinfo(afinfo);
485 
486 	if (!err)
487 		xfrm_flush_all_bundles();
488 
489 	if (x1) {
490 		xfrm_state_delete(x1);
491 		xfrm_state_put(x1);
492 	}
493 
494 	return err;
495 }
496 EXPORT_SYMBOL(xfrm_state_add);
497 
498 int xfrm_state_update(struct xfrm_state *x)
499 {
500 	struct xfrm_state_afinfo *afinfo;
501 	struct xfrm_state *x1;
502 	int err;
503 
504 	afinfo = xfrm_state_get_afinfo(x->props.family);
505 	if (unlikely(afinfo == NULL))
506 		return -EAFNOSUPPORT;
507 
508 	spin_lock_bh(&xfrm_state_lock);
509 	x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
510 
511 	err = -ESRCH;
512 	if (!x1)
513 		goto out;
514 
515 	if (xfrm_state_kern(x1)) {
516 		xfrm_state_put(x1);
517 		err = -EEXIST;
518 		goto out;
519 	}
520 
521 	if (x1->km.state == XFRM_STATE_ACQ) {
522 		__xfrm_state_insert(x);
523 		x = NULL;
524 	}
525 	err = 0;
526 
527 out:
528 	spin_unlock_bh(&xfrm_state_lock);
529 	xfrm_state_put_afinfo(afinfo);
530 
531 	if (err)
532 		return err;
533 
534 	if (!x) {
535 		xfrm_state_delete(x1);
536 		xfrm_state_put(x1);
537 		return 0;
538 	}
539 
540 	err = -EINVAL;
541 	spin_lock_bh(&x1->lock);
542 	if (likely(x1->km.state == XFRM_STATE_VALID)) {
543 		if (x->encap && x1->encap)
544 			memcpy(x1->encap, x->encap, sizeof(*x1->encap));
545 		memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
546 		x1->km.dying = 0;
547 
548 		if (!mod_timer(&x1->timer, jiffies + HZ))
549 			xfrm_state_hold(x1);
550 		if (x1->curlft.use_time)
551 			xfrm_state_check_expire(x1);
552 
553 		err = 0;
554 	}
555 	spin_unlock_bh(&x1->lock);
556 
557 	xfrm_state_put(x1);
558 
559 	return err;
560 }
561 EXPORT_SYMBOL(xfrm_state_update);
562 
563 int xfrm_state_check_expire(struct xfrm_state *x)
564 {
565 	if (!x->curlft.use_time)
566 		x->curlft.use_time = (unsigned long)xtime.tv_sec;
567 
568 	if (x->km.state != XFRM_STATE_VALID)
569 		return -EINVAL;
570 
571 	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
572 	    x->curlft.packets >= x->lft.hard_packet_limit) {
573 		x->km.state = XFRM_STATE_EXPIRED;
574 		if (!mod_timer(&x->timer, jiffies))
575 			xfrm_state_hold(x);
576 		return -EINVAL;
577 	}
578 
579 	if (!x->km.dying &&
580 	    (x->curlft.bytes >= x->lft.soft_byte_limit ||
581 	     x->curlft.packets >= x->lft.soft_packet_limit)) {
582 		x->km.dying = 1;
583 		km_state_expired(x, 0);
584 	}
585 	return 0;
586 }
587 EXPORT_SYMBOL(xfrm_state_check_expire);
588 
589 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
590 {
591 	int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
592 		- skb_headroom(skb);
593 
594 	if (nhead > 0)
595 		return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
596 
597 	/* Check tail too... */
598 	return 0;
599 }
600 
601 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
602 {
603 	int err = xfrm_state_check_expire(x);
604 	if (err < 0)
605 		goto err;
606 	err = xfrm_state_check_space(x, skb);
607 err:
608 	return err;
609 }
610 EXPORT_SYMBOL(xfrm_state_check);
611 
612 struct xfrm_state *
613 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
614 		  unsigned short family)
615 {
616 	struct xfrm_state *x;
617 	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
618 	if (!afinfo)
619 		return NULL;
620 
621 	spin_lock_bh(&xfrm_state_lock);
622 	x = afinfo->state_lookup(daddr, spi, proto);
623 	spin_unlock_bh(&xfrm_state_lock);
624 	xfrm_state_put_afinfo(afinfo);
625 	return x;
626 }
627 EXPORT_SYMBOL(xfrm_state_lookup);
628 
629 struct xfrm_state *
630 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
631 	      xfrm_address_t *daddr, xfrm_address_t *saddr,
632 	      int create, unsigned short family)
633 {
634 	struct xfrm_state *x;
635 	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
636 	if (!afinfo)
637 		return NULL;
638 
639 	spin_lock_bh(&xfrm_state_lock);
640 	x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
641 	spin_unlock_bh(&xfrm_state_lock);
642 	xfrm_state_put_afinfo(afinfo);
643 	return x;
644 }
645 EXPORT_SYMBOL(xfrm_find_acq);
646 
647 /* Silly enough, but I'm lazy to build resolution list */
648 
649 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
650 {
651 	int i;
652 	struct xfrm_state *x;
653 
654 	for (i = 0; i < XFRM_DST_HSIZE; i++) {
655 		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
656 			if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) {
657 				xfrm_state_hold(x);
658 				return x;
659 			}
660 		}
661 	}
662 	return NULL;
663 }
664 
665 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
666 {
667 	struct xfrm_state *x;
668 
669 	spin_lock_bh(&xfrm_state_lock);
670 	x = __xfrm_find_acq_byseq(seq);
671 	spin_unlock_bh(&xfrm_state_lock);
672 	return x;
673 }
674 EXPORT_SYMBOL(xfrm_find_acq_byseq);
675 
676 u32 xfrm_get_acqseq(void)
677 {
678 	u32 res;
679 	static u32 acqseq;
680 	static DEFINE_SPINLOCK(acqseq_lock);
681 
682 	spin_lock_bh(&acqseq_lock);
683 	res = (++acqseq ? : ++acqseq);
684 	spin_unlock_bh(&acqseq_lock);
685 	return res;
686 }
687 EXPORT_SYMBOL(xfrm_get_acqseq);
688 
689 void
690 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
691 {
692 	u32 h;
693 	struct xfrm_state *x0;
694 
695 	if (x->id.spi)
696 		return;
697 
698 	if (minspi == maxspi) {
699 		x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
700 		if (x0) {
701 			xfrm_state_put(x0);
702 			return;
703 		}
704 		x->id.spi = minspi;
705 	} else {
706 		u32 spi = 0;
707 		minspi = ntohl(minspi);
708 		maxspi = ntohl(maxspi);
709 		for (h=0; h<maxspi-minspi+1; h++) {
710 			spi = minspi + net_random()%(maxspi-minspi+1);
711 			x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
712 			if (x0 == NULL) {
713 				x->id.spi = htonl(spi);
714 				break;
715 			}
716 			xfrm_state_put(x0);
717 		}
718 	}
719 	if (x->id.spi) {
720 		spin_lock_bh(&xfrm_state_lock);
721 		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
722 		list_add(&x->byspi, xfrm_state_byspi+h);
723 		xfrm_state_hold(x);
724 		spin_unlock_bh(&xfrm_state_lock);
725 		wake_up(&km_waitq);
726 	}
727 }
728 EXPORT_SYMBOL(xfrm_alloc_spi);
729 
730 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
731 		    void *data)
732 {
733 	int i;
734 	struct xfrm_state *x;
735 	int count = 0;
736 	int err = 0;
737 
738 	spin_lock_bh(&xfrm_state_lock);
739 	for (i = 0; i < XFRM_DST_HSIZE; i++) {
740 		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
741 			if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
742 				count++;
743 		}
744 	}
745 	if (count == 0) {
746 		err = -ENOENT;
747 		goto out;
748 	}
749 
750 	for (i = 0; i < XFRM_DST_HSIZE; i++) {
751 		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
752 			if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
753 				continue;
754 			err = func(x, --count, data);
755 			if (err)
756 				goto out;
757 		}
758 	}
759 out:
760 	spin_unlock_bh(&xfrm_state_lock);
761 	return err;
762 }
763 EXPORT_SYMBOL(xfrm_state_walk);
764 
765 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
766 {
767 	u32 diff;
768 
769 	seq = ntohl(seq);
770 
771 	if (unlikely(seq == 0))
772 		return -EINVAL;
773 
774 	if (likely(seq > x->replay.seq))
775 		return 0;
776 
777 	diff = x->replay.seq - seq;
778 	if (diff >= x->props.replay_window) {
779 		x->stats.replay_window++;
780 		return -EINVAL;
781 	}
782 
783 	if (x->replay.bitmap & (1U << diff)) {
784 		x->stats.replay++;
785 		return -EINVAL;
786 	}
787 	return 0;
788 }
789 EXPORT_SYMBOL(xfrm_replay_check);
790 
791 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
792 {
793 	u32 diff;
794 
795 	seq = ntohl(seq);
796 
797 	if (seq > x->replay.seq) {
798 		diff = seq - x->replay.seq;
799 		if (diff < x->props.replay_window)
800 			x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
801 		else
802 			x->replay.bitmap = 1;
803 		x->replay.seq = seq;
804 	} else {
805 		diff = x->replay.seq - seq;
806 		x->replay.bitmap |= (1U << diff);
807 	}
808 }
809 EXPORT_SYMBOL(xfrm_replay_advance);
810 
811 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
812 static DEFINE_RWLOCK(xfrm_km_lock);
813 
814 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
815 {
816 	struct xfrm_mgr *km;
817 
818 	read_lock(&xfrm_km_lock);
819 	list_for_each_entry(km, &xfrm_km_list, list)
820 		if (km->notify_policy)
821 			km->notify_policy(xp, dir, c);
822 	read_unlock(&xfrm_km_lock);
823 }
824 
825 void km_state_notify(struct xfrm_state *x, struct km_event *c)
826 {
827 	struct xfrm_mgr *km;
828 	read_lock(&xfrm_km_lock);
829 	list_for_each_entry(km, &xfrm_km_list, list)
830 		if (km->notify)
831 			km->notify(x, c);
832 	read_unlock(&xfrm_km_lock);
833 }
834 
835 EXPORT_SYMBOL(km_policy_notify);
836 EXPORT_SYMBOL(km_state_notify);
837 
838 static void km_state_expired(struct xfrm_state *x, int hard)
839 {
840 	struct km_event c;
841 
842 	c.data.hard = hard;
843 	c.event = XFRM_MSG_EXPIRE;
844 	km_state_notify(x, &c);
845 
846 	if (hard)
847 		wake_up(&km_waitq);
848 }
849 
850 /*
851  * We send to all registered managers regardless of failure
852  * We are happy with one success
853 */
854 static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
855 {
856 	int err = -EINVAL, acqret;
857 	struct xfrm_mgr *km;
858 
859 	read_lock(&xfrm_km_lock);
860 	list_for_each_entry(km, &xfrm_km_list, list) {
861 		acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
862 		if (!acqret)
863 			err = acqret;
864 	}
865 	read_unlock(&xfrm_km_lock);
866 	return err;
867 }
868 
869 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
870 {
871 	int err = -EINVAL;
872 	struct xfrm_mgr *km;
873 
874 	read_lock(&xfrm_km_lock);
875 	list_for_each_entry(km, &xfrm_km_list, list) {
876 		if (km->new_mapping)
877 			err = km->new_mapping(x, ipaddr, sport);
878 		if (!err)
879 			break;
880 	}
881 	read_unlock(&xfrm_km_lock);
882 	return err;
883 }
884 EXPORT_SYMBOL(km_new_mapping);
885 
886 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard)
887 {
888 	struct km_event c;
889 
890 	c.data.hard = hard;
891 	c.event = XFRM_MSG_POLEXPIRE;
892 	km_policy_notify(pol, dir, &c);
893 
894 	if (hard)
895 		wake_up(&km_waitq);
896 }
897 
898 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
899 {
900 	int err;
901 	u8 *data;
902 	struct xfrm_mgr *km;
903 	struct xfrm_policy *pol = NULL;
904 
905 	if (optlen <= 0 || optlen > PAGE_SIZE)
906 		return -EMSGSIZE;
907 
908 	data = kmalloc(optlen, GFP_KERNEL);
909 	if (!data)
910 		return -ENOMEM;
911 
912 	err = -EFAULT;
913 	if (copy_from_user(data, optval, optlen))
914 		goto out;
915 
916 	err = -EINVAL;
917 	read_lock(&xfrm_km_lock);
918 	list_for_each_entry(km, &xfrm_km_list, list) {
919 		pol = km->compile_policy(sk->sk_family, optname, data,
920 					 optlen, &err);
921 		if (err >= 0)
922 			break;
923 	}
924 	read_unlock(&xfrm_km_lock);
925 
926 	if (err >= 0) {
927 		xfrm_sk_policy_insert(sk, err, pol);
928 		xfrm_pol_put(pol);
929 		err = 0;
930 	}
931 
932 out:
933 	kfree(data);
934 	return err;
935 }
936 EXPORT_SYMBOL(xfrm_user_policy);
937 
938 int xfrm_register_km(struct xfrm_mgr *km)
939 {
940 	write_lock_bh(&xfrm_km_lock);
941 	list_add_tail(&km->list, &xfrm_km_list);
942 	write_unlock_bh(&xfrm_km_lock);
943 	return 0;
944 }
945 EXPORT_SYMBOL(xfrm_register_km);
946 
947 int xfrm_unregister_km(struct xfrm_mgr *km)
948 {
949 	write_lock_bh(&xfrm_km_lock);
950 	list_del(&km->list);
951 	write_unlock_bh(&xfrm_km_lock);
952 	return 0;
953 }
954 EXPORT_SYMBOL(xfrm_unregister_km);
955 
956 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
957 {
958 	int err = 0;
959 	if (unlikely(afinfo == NULL))
960 		return -EINVAL;
961 	if (unlikely(afinfo->family >= NPROTO))
962 		return -EAFNOSUPPORT;
963 	write_lock(&xfrm_state_afinfo_lock);
964 	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
965 		err = -ENOBUFS;
966 	else {
967 		afinfo->state_bydst = xfrm_state_bydst;
968 		afinfo->state_byspi = xfrm_state_byspi;
969 		xfrm_state_afinfo[afinfo->family] = afinfo;
970 	}
971 	write_unlock(&xfrm_state_afinfo_lock);
972 	return err;
973 }
974 EXPORT_SYMBOL(xfrm_state_register_afinfo);
975 
976 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
977 {
978 	int err = 0;
979 	if (unlikely(afinfo == NULL))
980 		return -EINVAL;
981 	if (unlikely(afinfo->family >= NPROTO))
982 		return -EAFNOSUPPORT;
983 	write_lock(&xfrm_state_afinfo_lock);
984 	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
985 		if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
986 			err = -EINVAL;
987 		else {
988 			xfrm_state_afinfo[afinfo->family] = NULL;
989 			afinfo->state_byspi = NULL;
990 			afinfo->state_bydst = NULL;
991 		}
992 	}
993 	write_unlock(&xfrm_state_afinfo_lock);
994 	return err;
995 }
996 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
997 
998 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
999 {
1000 	struct xfrm_state_afinfo *afinfo;
1001 	if (unlikely(family >= NPROTO))
1002 		return NULL;
1003 	read_lock(&xfrm_state_afinfo_lock);
1004 	afinfo = xfrm_state_afinfo[family];
1005 	if (likely(afinfo != NULL))
1006 		read_lock(&afinfo->lock);
1007 	read_unlock(&xfrm_state_afinfo_lock);
1008 	return afinfo;
1009 }
1010 
1011 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1012 {
1013 	if (unlikely(afinfo == NULL))
1014 		return;
1015 	read_unlock(&afinfo->lock);
1016 }
1017 
1018 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1019 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1020 {
1021 	if (x->tunnel) {
1022 		struct xfrm_state *t = x->tunnel;
1023 
1024 		if (atomic_read(&t->tunnel_users) == 2)
1025 			xfrm_state_delete(t);
1026 		atomic_dec(&t->tunnel_users);
1027 		xfrm_state_put(t);
1028 		x->tunnel = NULL;
1029 	}
1030 }
1031 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1032 
1033 /*
1034  * This function is NOT optimal.  For example, with ESP it will give an
1035  * MTU that's usually two bytes short of being optimal.  However, it will
1036  * usually give an answer that's a multiple of 4 provided the input is
1037  * also a multiple of 4.
1038  */
1039 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1040 {
1041 	int res = mtu;
1042 
1043 	res -= x->props.header_len;
1044 
1045 	for (;;) {
1046 		int m = res;
1047 
1048 		if (m < 68)
1049 			return 68;
1050 
1051 		spin_lock_bh(&x->lock);
1052 		if (x->km.state == XFRM_STATE_VALID &&
1053 		    x->type && x->type->get_max_size)
1054 			m = x->type->get_max_size(x, m);
1055 		else
1056 			m += x->props.header_len;
1057 		spin_unlock_bh(&x->lock);
1058 
1059 		if (m <= mtu)
1060 			break;
1061 		res -= (m - mtu);
1062 	}
1063 
1064 	return res;
1065 }
1066 
1067 EXPORT_SYMBOL(xfrm_state_mtu);
1068 
1069 int xfrm_init_state(struct xfrm_state *x)
1070 {
1071 	struct xfrm_state_afinfo *afinfo;
1072 	int family = x->props.family;
1073 	int err;
1074 
1075 	err = -EAFNOSUPPORT;
1076 	afinfo = xfrm_state_get_afinfo(family);
1077 	if (!afinfo)
1078 		goto error;
1079 
1080 	err = 0;
1081 	if (afinfo->init_flags)
1082 		err = afinfo->init_flags(x);
1083 
1084 	xfrm_state_put_afinfo(afinfo);
1085 
1086 	if (err)
1087 		goto error;
1088 
1089 	err = -EPROTONOSUPPORT;
1090 	x->type = xfrm_get_type(x->id.proto, family);
1091 	if (x->type == NULL)
1092 		goto error;
1093 
1094 	err = x->type->init_state(x);
1095 	if (err)
1096 		goto error;
1097 
1098 	x->km.state = XFRM_STATE_VALID;
1099 
1100 error:
1101 	return err;
1102 }
1103 
1104 EXPORT_SYMBOL(xfrm_init_state);
1105 
1106 void __init xfrm_state_init(void)
1107 {
1108 	int i;
1109 
1110 	for (i=0; i<XFRM_DST_HSIZE; i++) {
1111 		INIT_LIST_HEAD(&xfrm_state_bydst[i]);
1112 		INIT_LIST_HEAD(&xfrm_state_byspi[i]);
1113 	}
1114 	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
1115 }
1116 
1117