xref: /openbmc/linux/net/xfrm/xfrm_policy.c (revision 64c70b1c)
1 /*
2  * xfrm_policy.c
3  *
4  * Changes:
5  *	Mitsuru KANDA @USAGI
6  * 	Kazunori MIYAZAWA @USAGI
7  * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  * 		IPv6 support
9  * 	Kazunori MIYAZAWA @USAGI
10  * 	YOSHIFUJI Hideaki
11  * 		Split up af-specific portion
12  *	Derek Atkins <derek@ihtfp.com>		Add the post_input processor
13  *
14  */
15 
16 #include <linux/slab.h>
17 #include <linux/kmod.h>
18 #include <linux/list.h>
19 #include <linux/spinlock.h>
20 #include <linux/workqueue.h>
21 #include <linux/notifier.h>
22 #include <linux/netdevice.h>
23 #include <linux/netfilter.h>
24 #include <linux/module.h>
25 #include <linux/cache.h>
26 #include <net/xfrm.h>
27 #include <net/ip.h>
28 #include <linux/audit.h>
29 #include <linux/cache.h>
30 
31 #include "xfrm_hash.h"
32 
33 int sysctl_xfrm_larval_drop __read_mostly;
34 
35 DEFINE_MUTEX(xfrm_cfg_mutex);
36 EXPORT_SYMBOL(xfrm_cfg_mutex);
37 
38 static DEFINE_RWLOCK(xfrm_policy_lock);
39 
40 unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
41 EXPORT_SYMBOL(xfrm_policy_count);
42 
43 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
44 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
45 
46 static struct kmem_cache *xfrm_dst_cache __read_mostly;
47 
48 static struct work_struct xfrm_policy_gc_work;
49 static HLIST_HEAD(xfrm_policy_gc_list);
50 static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
51 
52 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
53 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
54 static struct xfrm_policy_afinfo *xfrm_policy_lock_afinfo(unsigned int family);
55 static void xfrm_policy_unlock_afinfo(struct xfrm_policy_afinfo *afinfo);
56 
57 static inline int
58 __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
59 {
60 	return  addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) &&
61 		addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) &&
62 		!((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
63 		!((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
64 		(fl->proto == sel->proto || !sel->proto) &&
65 		(fl->oif == sel->ifindex || !sel->ifindex);
66 }
67 
68 static inline int
69 __xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl)
70 {
71 	return  addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) &&
72 		addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) &&
73 		!((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
74 		!((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
75 		(fl->proto == sel->proto || !sel->proto) &&
76 		(fl->oif == sel->ifindex || !sel->ifindex);
77 }
78 
79 int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
80 		    unsigned short family)
81 {
82 	switch (family) {
83 	case AF_INET:
84 		return __xfrm4_selector_match(sel, fl);
85 	case AF_INET6:
86 		return __xfrm6_selector_match(sel, fl);
87 	}
88 	return 0;
89 }
90 
91 int xfrm_register_type(struct xfrm_type *type, unsigned short family)
92 {
93 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_lock_afinfo(family);
94 	struct xfrm_type **typemap;
95 	int err = 0;
96 
97 	if (unlikely(afinfo == NULL))
98 		return -EAFNOSUPPORT;
99 	typemap = afinfo->type_map;
100 
101 	if (likely(typemap[type->proto] == NULL))
102 		typemap[type->proto] = type;
103 	else
104 		err = -EEXIST;
105 	xfrm_policy_unlock_afinfo(afinfo);
106 	return err;
107 }
108 EXPORT_SYMBOL(xfrm_register_type);
109 
110 int xfrm_unregister_type(struct xfrm_type *type, unsigned short family)
111 {
112 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_lock_afinfo(family);
113 	struct xfrm_type **typemap;
114 	int err = 0;
115 
116 	if (unlikely(afinfo == NULL))
117 		return -EAFNOSUPPORT;
118 	typemap = afinfo->type_map;
119 
120 	if (unlikely(typemap[type->proto] != type))
121 		err = -ENOENT;
122 	else
123 		typemap[type->proto] = NULL;
124 	xfrm_policy_unlock_afinfo(afinfo);
125 	return err;
126 }
127 EXPORT_SYMBOL(xfrm_unregister_type);
128 
129 struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
130 {
131 	struct xfrm_policy_afinfo *afinfo;
132 	struct xfrm_type **typemap;
133 	struct xfrm_type *type;
134 	int modload_attempted = 0;
135 
136 retry:
137 	afinfo = xfrm_policy_get_afinfo(family);
138 	if (unlikely(afinfo == NULL))
139 		return NULL;
140 	typemap = afinfo->type_map;
141 
142 	type = typemap[proto];
143 	if (unlikely(type && !try_module_get(type->owner)))
144 		type = NULL;
145 	if (!type && !modload_attempted) {
146 		xfrm_policy_put_afinfo(afinfo);
147 		request_module("xfrm-type-%d-%d",
148 			       (int) family, (int) proto);
149 		modload_attempted = 1;
150 		goto retry;
151 	}
152 
153 	xfrm_policy_put_afinfo(afinfo);
154 	return type;
155 }
156 
157 int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl,
158 		    unsigned short family)
159 {
160 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
161 	int err = 0;
162 
163 	if (unlikely(afinfo == NULL))
164 		return -EAFNOSUPPORT;
165 
166 	if (likely(afinfo->dst_lookup != NULL))
167 		err = afinfo->dst_lookup(dst, fl);
168 	else
169 		err = -EINVAL;
170 	xfrm_policy_put_afinfo(afinfo);
171 	return err;
172 }
173 EXPORT_SYMBOL(xfrm_dst_lookup);
174 
175 void xfrm_put_type(struct xfrm_type *type)
176 {
177 	module_put(type->owner);
178 }
179 
180 int xfrm_register_mode(struct xfrm_mode *mode, int family)
181 {
182 	struct xfrm_policy_afinfo *afinfo;
183 	struct xfrm_mode **modemap;
184 	int err;
185 
186 	if (unlikely(mode->encap >= XFRM_MODE_MAX))
187 		return -EINVAL;
188 
189 	afinfo = xfrm_policy_lock_afinfo(family);
190 	if (unlikely(afinfo == NULL))
191 		return -EAFNOSUPPORT;
192 
193 	err = -EEXIST;
194 	modemap = afinfo->mode_map;
195 	if (likely(modemap[mode->encap] == NULL)) {
196 		modemap[mode->encap] = mode;
197 		err = 0;
198 	}
199 
200 	xfrm_policy_unlock_afinfo(afinfo);
201 	return err;
202 }
203 EXPORT_SYMBOL(xfrm_register_mode);
204 
205 int xfrm_unregister_mode(struct xfrm_mode *mode, int family)
206 {
207 	struct xfrm_policy_afinfo *afinfo;
208 	struct xfrm_mode **modemap;
209 	int err;
210 
211 	if (unlikely(mode->encap >= XFRM_MODE_MAX))
212 		return -EINVAL;
213 
214 	afinfo = xfrm_policy_lock_afinfo(family);
215 	if (unlikely(afinfo == NULL))
216 		return -EAFNOSUPPORT;
217 
218 	err = -ENOENT;
219 	modemap = afinfo->mode_map;
220 	if (likely(modemap[mode->encap] == mode)) {
221 		modemap[mode->encap] = NULL;
222 		err = 0;
223 	}
224 
225 	xfrm_policy_unlock_afinfo(afinfo);
226 	return err;
227 }
228 EXPORT_SYMBOL(xfrm_unregister_mode);
229 
230 struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
231 {
232 	struct xfrm_policy_afinfo *afinfo;
233 	struct xfrm_mode *mode;
234 	int modload_attempted = 0;
235 
236 	if (unlikely(encap >= XFRM_MODE_MAX))
237 		return NULL;
238 
239 retry:
240 	afinfo = xfrm_policy_get_afinfo(family);
241 	if (unlikely(afinfo == NULL))
242 		return NULL;
243 
244 	mode = afinfo->mode_map[encap];
245 	if (unlikely(mode && !try_module_get(mode->owner)))
246 		mode = NULL;
247 	if (!mode && !modload_attempted) {
248 		xfrm_policy_put_afinfo(afinfo);
249 		request_module("xfrm-mode-%d-%d", family, encap);
250 		modload_attempted = 1;
251 		goto retry;
252 	}
253 
254 	xfrm_policy_put_afinfo(afinfo);
255 	return mode;
256 }
257 
258 void xfrm_put_mode(struct xfrm_mode *mode)
259 {
260 	module_put(mode->owner);
261 }
262 
263 static inline unsigned long make_jiffies(long secs)
264 {
265 	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
266 		return MAX_SCHEDULE_TIMEOUT-1;
267 	else
268 		return secs*HZ;
269 }
270 
271 static void xfrm_policy_timer(unsigned long data)
272 {
273 	struct xfrm_policy *xp = (struct xfrm_policy*)data;
274 	unsigned long now = get_seconds();
275 	long next = LONG_MAX;
276 	int warn = 0;
277 	int dir;
278 
279 	read_lock(&xp->lock);
280 
281 	if (xp->dead)
282 		goto out;
283 
284 	dir = xfrm_policy_id2dir(xp->index);
285 
286 	if (xp->lft.hard_add_expires_seconds) {
287 		long tmo = xp->lft.hard_add_expires_seconds +
288 			xp->curlft.add_time - now;
289 		if (tmo <= 0)
290 			goto expired;
291 		if (tmo < next)
292 			next = tmo;
293 	}
294 	if (xp->lft.hard_use_expires_seconds) {
295 		long tmo = xp->lft.hard_use_expires_seconds +
296 			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
297 		if (tmo <= 0)
298 			goto expired;
299 		if (tmo < next)
300 			next = tmo;
301 	}
302 	if (xp->lft.soft_add_expires_seconds) {
303 		long tmo = xp->lft.soft_add_expires_seconds +
304 			xp->curlft.add_time - now;
305 		if (tmo <= 0) {
306 			warn = 1;
307 			tmo = XFRM_KM_TIMEOUT;
308 		}
309 		if (tmo < next)
310 			next = tmo;
311 	}
312 	if (xp->lft.soft_use_expires_seconds) {
313 		long tmo = xp->lft.soft_use_expires_seconds +
314 			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
315 		if (tmo <= 0) {
316 			warn = 1;
317 			tmo = XFRM_KM_TIMEOUT;
318 		}
319 		if (tmo < next)
320 			next = tmo;
321 	}
322 
323 	if (warn)
324 		km_policy_expired(xp, dir, 0, 0);
325 	if (next != LONG_MAX &&
326 	    !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
327 		xfrm_pol_hold(xp);
328 
329 out:
330 	read_unlock(&xp->lock);
331 	xfrm_pol_put(xp);
332 	return;
333 
334 expired:
335 	read_unlock(&xp->lock);
336 	if (!xfrm_policy_delete(xp, dir))
337 		km_policy_expired(xp, dir, 1, 0);
338 	xfrm_pol_put(xp);
339 }
340 
341 
342 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
343  * SPD calls.
344  */
345 
346 struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
347 {
348 	struct xfrm_policy *policy;
349 
350 	policy = kzalloc(sizeof(struct xfrm_policy), gfp);
351 
352 	if (policy) {
353 		INIT_HLIST_NODE(&policy->bydst);
354 		INIT_HLIST_NODE(&policy->byidx);
355 		rwlock_init(&policy->lock);
356 		atomic_set(&policy->refcnt, 1);
357 		init_timer(&policy->timer);
358 		policy->timer.data = (unsigned long)policy;
359 		policy->timer.function = xfrm_policy_timer;
360 	}
361 	return policy;
362 }
363 EXPORT_SYMBOL(xfrm_policy_alloc);
364 
365 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
366 
367 void __xfrm_policy_destroy(struct xfrm_policy *policy)
368 {
369 	BUG_ON(!policy->dead);
370 
371 	BUG_ON(policy->bundles);
372 
373 	if (del_timer(&policy->timer))
374 		BUG();
375 
376 	security_xfrm_policy_free(policy);
377 	kfree(policy);
378 }
379 EXPORT_SYMBOL(__xfrm_policy_destroy);
380 
381 static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
382 {
383 	struct dst_entry *dst;
384 
385 	while ((dst = policy->bundles) != NULL) {
386 		policy->bundles = dst->next;
387 		dst_free(dst);
388 	}
389 
390 	if (del_timer(&policy->timer))
391 		atomic_dec(&policy->refcnt);
392 
393 	if (atomic_read(&policy->refcnt) > 1)
394 		flow_cache_flush();
395 
396 	xfrm_pol_put(policy);
397 }
398 
399 static void xfrm_policy_gc_task(struct work_struct *work)
400 {
401 	struct xfrm_policy *policy;
402 	struct hlist_node *entry, *tmp;
403 	struct hlist_head gc_list;
404 
405 	spin_lock_bh(&xfrm_policy_gc_lock);
406 	gc_list.first = xfrm_policy_gc_list.first;
407 	INIT_HLIST_HEAD(&xfrm_policy_gc_list);
408 	spin_unlock_bh(&xfrm_policy_gc_lock);
409 
410 	hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
411 		xfrm_policy_gc_kill(policy);
412 }
413 
414 /* Rule must be locked. Release descentant resources, announce
415  * entry dead. The rule must be unlinked from lists to the moment.
416  */
417 
418 static void xfrm_policy_kill(struct xfrm_policy *policy)
419 {
420 	int dead;
421 
422 	write_lock_bh(&policy->lock);
423 	dead = policy->dead;
424 	policy->dead = 1;
425 	write_unlock_bh(&policy->lock);
426 
427 	if (unlikely(dead)) {
428 		WARN_ON(1);
429 		return;
430 	}
431 
432 	spin_lock(&xfrm_policy_gc_lock);
433 	hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
434 	spin_unlock(&xfrm_policy_gc_lock);
435 
436 	schedule_work(&xfrm_policy_gc_work);
437 }
438 
439 struct xfrm_policy_hash {
440 	struct hlist_head	*table;
441 	unsigned int		hmask;
442 };
443 
444 static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2];
445 static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly;
446 static struct hlist_head *xfrm_policy_byidx __read_mostly;
447 static unsigned int xfrm_idx_hmask __read_mostly;
448 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
449 
450 static inline unsigned int idx_hash(u32 index)
451 {
452 	return __idx_hash(index, xfrm_idx_hmask);
453 }
454 
455 static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir)
456 {
457 	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
458 	unsigned int hash = __sel_hash(sel, family, hmask);
459 
460 	return (hash == hmask + 1 ?
461 		&xfrm_policy_inexact[dir] :
462 		xfrm_policy_bydst[dir].table + hash);
463 }
464 
465 static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir)
466 {
467 	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
468 	unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
469 
470 	return xfrm_policy_bydst[dir].table + hash;
471 }
472 
473 static void xfrm_dst_hash_transfer(struct hlist_head *list,
474 				   struct hlist_head *ndsttable,
475 				   unsigned int nhashmask)
476 {
477 	struct hlist_node *entry, *tmp;
478 	struct xfrm_policy *pol;
479 
480 	hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
481 		unsigned int h;
482 
483 		h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
484 				pol->family, nhashmask);
485 		hlist_add_head(&pol->bydst, ndsttable+h);
486 	}
487 }
488 
489 static void xfrm_idx_hash_transfer(struct hlist_head *list,
490 				   struct hlist_head *nidxtable,
491 				   unsigned int nhashmask)
492 {
493 	struct hlist_node *entry, *tmp;
494 	struct xfrm_policy *pol;
495 
496 	hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
497 		unsigned int h;
498 
499 		h = __idx_hash(pol->index, nhashmask);
500 		hlist_add_head(&pol->byidx, nidxtable+h);
501 	}
502 }
503 
504 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
505 {
506 	return ((old_hmask + 1) << 1) - 1;
507 }
508 
509 static void xfrm_bydst_resize(int dir)
510 {
511 	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
512 	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
513 	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
514 	struct hlist_head *odst = xfrm_policy_bydst[dir].table;
515 	struct hlist_head *ndst = xfrm_hash_alloc(nsize);
516 	int i;
517 
518 	if (!ndst)
519 		return;
520 
521 	write_lock_bh(&xfrm_policy_lock);
522 
523 	for (i = hmask; i >= 0; i--)
524 		xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
525 
526 	xfrm_policy_bydst[dir].table = ndst;
527 	xfrm_policy_bydst[dir].hmask = nhashmask;
528 
529 	write_unlock_bh(&xfrm_policy_lock);
530 
531 	xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
532 }
533 
534 static void xfrm_byidx_resize(int total)
535 {
536 	unsigned int hmask = xfrm_idx_hmask;
537 	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
538 	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
539 	struct hlist_head *oidx = xfrm_policy_byidx;
540 	struct hlist_head *nidx = xfrm_hash_alloc(nsize);
541 	int i;
542 
543 	if (!nidx)
544 		return;
545 
546 	write_lock_bh(&xfrm_policy_lock);
547 
548 	for (i = hmask; i >= 0; i--)
549 		xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
550 
551 	xfrm_policy_byidx = nidx;
552 	xfrm_idx_hmask = nhashmask;
553 
554 	write_unlock_bh(&xfrm_policy_lock);
555 
556 	xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
557 }
558 
559 static inline int xfrm_bydst_should_resize(int dir, int *total)
560 {
561 	unsigned int cnt = xfrm_policy_count[dir];
562 	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
563 
564 	if (total)
565 		*total += cnt;
566 
567 	if ((hmask + 1) < xfrm_policy_hashmax &&
568 	    cnt > hmask)
569 		return 1;
570 
571 	return 0;
572 }
573 
574 static inline int xfrm_byidx_should_resize(int total)
575 {
576 	unsigned int hmask = xfrm_idx_hmask;
577 
578 	if ((hmask + 1) < xfrm_policy_hashmax &&
579 	    total > hmask)
580 		return 1;
581 
582 	return 0;
583 }
584 
585 void xfrm_spd_getinfo(struct xfrmk_spdinfo *si)
586 {
587 	read_lock_bh(&xfrm_policy_lock);
588 	si->incnt = xfrm_policy_count[XFRM_POLICY_IN];
589 	si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT];
590 	si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD];
591 	si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
592 	si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
593 	si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
594 	si->spdhcnt = xfrm_idx_hmask;
595 	si->spdhmcnt = xfrm_policy_hashmax;
596 	read_unlock_bh(&xfrm_policy_lock);
597 }
598 EXPORT_SYMBOL(xfrm_spd_getinfo);
599 
600 static DEFINE_MUTEX(hash_resize_mutex);
601 static void xfrm_hash_resize(struct work_struct *__unused)
602 {
603 	int dir, total;
604 
605 	mutex_lock(&hash_resize_mutex);
606 
607 	total = 0;
608 	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
609 		if (xfrm_bydst_should_resize(dir, &total))
610 			xfrm_bydst_resize(dir);
611 	}
612 	if (xfrm_byidx_should_resize(total))
613 		xfrm_byidx_resize(total);
614 
615 	mutex_unlock(&hash_resize_mutex);
616 }
617 
618 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
619 
620 /* Generate new index... KAME seems to generate them ordered by cost
621  * of an absolute inpredictability of ordering of rules. This will not pass. */
622 static u32 xfrm_gen_index(u8 type, int dir)
623 {
624 	static u32 idx_generator;
625 
626 	for (;;) {
627 		struct hlist_node *entry;
628 		struct hlist_head *list;
629 		struct xfrm_policy *p;
630 		u32 idx;
631 		int found;
632 
633 		idx = (idx_generator | dir);
634 		idx_generator += 8;
635 		if (idx == 0)
636 			idx = 8;
637 		list = xfrm_policy_byidx + idx_hash(idx);
638 		found = 0;
639 		hlist_for_each_entry(p, entry, list, byidx) {
640 			if (p->index == idx) {
641 				found = 1;
642 				break;
643 			}
644 		}
645 		if (!found)
646 			return idx;
647 	}
648 }
649 
650 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
651 {
652 	u32 *p1 = (u32 *) s1;
653 	u32 *p2 = (u32 *) s2;
654 	int len = sizeof(struct xfrm_selector) / sizeof(u32);
655 	int i;
656 
657 	for (i = 0; i < len; i++) {
658 		if (p1[i] != p2[i])
659 			return 1;
660 	}
661 
662 	return 0;
663 }
664 
665 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
666 {
667 	struct xfrm_policy *pol;
668 	struct xfrm_policy *delpol;
669 	struct hlist_head *chain;
670 	struct hlist_node *entry, *newpos;
671 	struct dst_entry *gc_list;
672 
673 	write_lock_bh(&xfrm_policy_lock);
674 	chain = policy_hash_bysel(&policy->selector, policy->family, dir);
675 	delpol = NULL;
676 	newpos = NULL;
677 	hlist_for_each_entry(pol, entry, chain, bydst) {
678 		if (pol->type == policy->type &&
679 		    !selector_cmp(&pol->selector, &policy->selector) &&
680 		    xfrm_sec_ctx_match(pol->security, policy->security) &&
681 		    !WARN_ON(delpol)) {
682 			if (excl) {
683 				write_unlock_bh(&xfrm_policy_lock);
684 				return -EEXIST;
685 			}
686 			delpol = pol;
687 			if (policy->priority > pol->priority)
688 				continue;
689 		} else if (policy->priority >= pol->priority) {
690 			newpos = &pol->bydst;
691 			continue;
692 		}
693 		if (delpol)
694 			break;
695 	}
696 	if (newpos)
697 		hlist_add_after(newpos, &policy->bydst);
698 	else
699 		hlist_add_head(&policy->bydst, chain);
700 	xfrm_pol_hold(policy);
701 	xfrm_policy_count[dir]++;
702 	atomic_inc(&flow_cache_genid);
703 	if (delpol) {
704 		hlist_del(&delpol->bydst);
705 		hlist_del(&delpol->byidx);
706 		xfrm_policy_count[dir]--;
707 	}
708 	policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
709 	hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
710 	policy->curlft.add_time = get_seconds();
711 	policy->curlft.use_time = 0;
712 	if (!mod_timer(&policy->timer, jiffies + HZ))
713 		xfrm_pol_hold(policy);
714 	write_unlock_bh(&xfrm_policy_lock);
715 
716 	if (delpol)
717 		xfrm_policy_kill(delpol);
718 	else if (xfrm_bydst_should_resize(dir, NULL))
719 		schedule_work(&xfrm_hash_work);
720 
721 	read_lock_bh(&xfrm_policy_lock);
722 	gc_list = NULL;
723 	entry = &policy->bydst;
724 	hlist_for_each_entry_continue(policy, entry, bydst) {
725 		struct dst_entry *dst;
726 
727 		write_lock(&policy->lock);
728 		dst = policy->bundles;
729 		if (dst) {
730 			struct dst_entry *tail = dst;
731 			while (tail->next)
732 				tail = tail->next;
733 			tail->next = gc_list;
734 			gc_list = dst;
735 
736 			policy->bundles = NULL;
737 		}
738 		write_unlock(&policy->lock);
739 	}
740 	read_unlock_bh(&xfrm_policy_lock);
741 
742 	while (gc_list) {
743 		struct dst_entry *dst = gc_list;
744 
745 		gc_list = dst->next;
746 		dst_free(dst);
747 	}
748 
749 	return 0;
750 }
751 EXPORT_SYMBOL(xfrm_policy_insert);
752 
753 struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
754 					  struct xfrm_selector *sel,
755 					  struct xfrm_sec_ctx *ctx, int delete,
756 					  int *err)
757 {
758 	struct xfrm_policy *pol, *ret;
759 	struct hlist_head *chain;
760 	struct hlist_node *entry;
761 
762 	*err = 0;
763 	write_lock_bh(&xfrm_policy_lock);
764 	chain = policy_hash_bysel(sel, sel->family, dir);
765 	ret = NULL;
766 	hlist_for_each_entry(pol, entry, chain, bydst) {
767 		if (pol->type == type &&
768 		    !selector_cmp(sel, &pol->selector) &&
769 		    xfrm_sec_ctx_match(ctx, pol->security)) {
770 			xfrm_pol_hold(pol);
771 			if (delete) {
772 				*err = security_xfrm_policy_delete(pol);
773 				if (*err) {
774 					write_unlock_bh(&xfrm_policy_lock);
775 					return pol;
776 				}
777 				hlist_del(&pol->bydst);
778 				hlist_del(&pol->byidx);
779 				xfrm_policy_count[dir]--;
780 			}
781 			ret = pol;
782 			break;
783 		}
784 	}
785 	write_unlock_bh(&xfrm_policy_lock);
786 
787 	if (ret && delete) {
788 		atomic_inc(&flow_cache_genid);
789 		xfrm_policy_kill(ret);
790 	}
791 	return ret;
792 }
793 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
794 
795 struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete,
796 				     int *err)
797 {
798 	struct xfrm_policy *pol, *ret;
799 	struct hlist_head *chain;
800 	struct hlist_node *entry;
801 
802 	*err = -ENOENT;
803 	if (xfrm_policy_id2dir(id) != dir)
804 		return NULL;
805 
806 	*err = 0;
807 	write_lock_bh(&xfrm_policy_lock);
808 	chain = xfrm_policy_byidx + idx_hash(id);
809 	ret = NULL;
810 	hlist_for_each_entry(pol, entry, chain, byidx) {
811 		if (pol->type == type && pol->index == id) {
812 			xfrm_pol_hold(pol);
813 			if (delete) {
814 				*err = security_xfrm_policy_delete(pol);
815 				if (*err) {
816 					write_unlock_bh(&xfrm_policy_lock);
817 					return pol;
818 				}
819 				hlist_del(&pol->bydst);
820 				hlist_del(&pol->byidx);
821 				xfrm_policy_count[dir]--;
822 			}
823 			ret = pol;
824 			break;
825 		}
826 	}
827 	write_unlock_bh(&xfrm_policy_lock);
828 
829 	if (ret && delete) {
830 		atomic_inc(&flow_cache_genid);
831 		xfrm_policy_kill(ret);
832 	}
833 	return ret;
834 }
835 EXPORT_SYMBOL(xfrm_policy_byid);
836 
837 #ifdef CONFIG_SECURITY_NETWORK_XFRM
838 static inline int
839 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
840 {
841 	int dir, err = 0;
842 
843 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
844 		struct xfrm_policy *pol;
845 		struct hlist_node *entry;
846 		int i;
847 
848 		hlist_for_each_entry(pol, entry,
849 				     &xfrm_policy_inexact[dir], bydst) {
850 			if (pol->type != type)
851 				continue;
852 			err = security_xfrm_policy_delete(pol);
853 			if (err) {
854 				xfrm_audit_log(audit_info->loginuid,
855 					       audit_info->secid,
856 					       AUDIT_MAC_IPSEC_DELSPD, 0,
857 					       pol, NULL);
858 				return err;
859 			}
860                 }
861 		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
862 			hlist_for_each_entry(pol, entry,
863 					     xfrm_policy_bydst[dir].table + i,
864 					     bydst) {
865 				if (pol->type != type)
866 					continue;
867 				err = security_xfrm_policy_delete(pol);
868 				if (err) {
869 					xfrm_audit_log(audit_info->loginuid,
870 						       audit_info->secid,
871 						       AUDIT_MAC_IPSEC_DELSPD,
872 						       0, pol, NULL);
873 					return err;
874 				}
875 			}
876 		}
877 	}
878 	return err;
879 }
880 #else
881 static inline int
882 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
883 {
884 	return 0;
885 }
886 #endif
887 
888 int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
889 {
890 	int dir, err = 0;
891 
892 	write_lock_bh(&xfrm_policy_lock);
893 
894 	err = xfrm_policy_flush_secctx_check(type, audit_info);
895 	if (err)
896 		goto out;
897 
898 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
899 		struct xfrm_policy *pol;
900 		struct hlist_node *entry;
901 		int i, killed;
902 
903 		killed = 0;
904 	again1:
905 		hlist_for_each_entry(pol, entry,
906 				     &xfrm_policy_inexact[dir], bydst) {
907 			if (pol->type != type)
908 				continue;
909 			hlist_del(&pol->bydst);
910 			hlist_del(&pol->byidx);
911 			write_unlock_bh(&xfrm_policy_lock);
912 
913 			xfrm_audit_log(audit_info->loginuid, audit_info->secid,
914 				       AUDIT_MAC_IPSEC_DELSPD, 1, pol, NULL);
915 
916 			xfrm_policy_kill(pol);
917 			killed++;
918 
919 			write_lock_bh(&xfrm_policy_lock);
920 			goto again1;
921 		}
922 
923 		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
924 	again2:
925 			hlist_for_each_entry(pol, entry,
926 					     xfrm_policy_bydst[dir].table + i,
927 					     bydst) {
928 				if (pol->type != type)
929 					continue;
930 				hlist_del(&pol->bydst);
931 				hlist_del(&pol->byidx);
932 				write_unlock_bh(&xfrm_policy_lock);
933 
934 				xfrm_audit_log(audit_info->loginuid,
935 					       audit_info->secid,
936 					       AUDIT_MAC_IPSEC_DELSPD, 1,
937 					       pol, NULL);
938 
939 				xfrm_policy_kill(pol);
940 				killed++;
941 
942 				write_lock_bh(&xfrm_policy_lock);
943 				goto again2;
944 			}
945 		}
946 
947 		xfrm_policy_count[dir] -= killed;
948 	}
949 	atomic_inc(&flow_cache_genid);
950 out:
951 	write_unlock_bh(&xfrm_policy_lock);
952 	return err;
953 }
954 EXPORT_SYMBOL(xfrm_policy_flush);
955 
956 int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*),
957 		     void *data)
958 {
959 	struct xfrm_policy *pol, *last = NULL;
960 	struct hlist_node *entry;
961 	int dir, last_dir = 0, count, error;
962 
963 	read_lock_bh(&xfrm_policy_lock);
964 	count = 0;
965 
966 	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
967 		struct hlist_head *table = xfrm_policy_bydst[dir].table;
968 		int i;
969 
970 		hlist_for_each_entry(pol, entry,
971 				     &xfrm_policy_inexact[dir], bydst) {
972 			if (pol->type != type)
973 				continue;
974 			if (last) {
975 				error = func(last, last_dir % XFRM_POLICY_MAX,
976 					     count, data);
977 				if (error)
978 					goto out;
979 			}
980 			last = pol;
981 			last_dir = dir;
982 			count++;
983 		}
984 		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
985 			hlist_for_each_entry(pol, entry, table + i, bydst) {
986 				if (pol->type != type)
987 					continue;
988 				if (last) {
989 					error = func(last, last_dir % XFRM_POLICY_MAX,
990 						     count, data);
991 					if (error)
992 						goto out;
993 				}
994 				last = pol;
995 				last_dir = dir;
996 				count++;
997 			}
998 		}
999 	}
1000 	if (count == 0) {
1001 		error = -ENOENT;
1002 		goto out;
1003 	}
1004 	error = func(last, last_dir % XFRM_POLICY_MAX, 0, data);
1005 out:
1006 	read_unlock_bh(&xfrm_policy_lock);
1007 	return error;
1008 }
1009 EXPORT_SYMBOL(xfrm_policy_walk);
1010 
1011 /*
1012  * Find policy to apply to this flow.
1013  *
1014  * Returns 0 if policy found, else an -errno.
1015  */
1016 static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
1017 			     u8 type, u16 family, int dir)
1018 {
1019 	struct xfrm_selector *sel = &pol->selector;
1020 	int match, ret = -ESRCH;
1021 
1022 	if (pol->family != family ||
1023 	    pol->type != type)
1024 		return ret;
1025 
1026 	match = xfrm_selector_match(sel, fl, family);
1027 	if (match)
1028 		ret = security_xfrm_policy_lookup(pol, fl->secid, dir);
1029 
1030 	return ret;
1031 }
1032 
1033 static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
1034 						     u16 family, u8 dir)
1035 {
1036 	int err;
1037 	struct xfrm_policy *pol, *ret;
1038 	xfrm_address_t *daddr, *saddr;
1039 	struct hlist_node *entry;
1040 	struct hlist_head *chain;
1041 	u32 priority = ~0U;
1042 
1043 	daddr = xfrm_flowi_daddr(fl, family);
1044 	saddr = xfrm_flowi_saddr(fl, family);
1045 	if (unlikely(!daddr || !saddr))
1046 		return NULL;
1047 
1048 	read_lock_bh(&xfrm_policy_lock);
1049 	chain = policy_hash_direct(daddr, saddr, family, dir);
1050 	ret = NULL;
1051 	hlist_for_each_entry(pol, entry, chain, bydst) {
1052 		err = xfrm_policy_match(pol, fl, type, family, dir);
1053 		if (err) {
1054 			if (err == -ESRCH)
1055 				continue;
1056 			else {
1057 				ret = ERR_PTR(err);
1058 				goto fail;
1059 			}
1060 		} else {
1061 			ret = pol;
1062 			priority = ret->priority;
1063 			break;
1064 		}
1065 	}
1066 	chain = &xfrm_policy_inexact[dir];
1067 	hlist_for_each_entry(pol, entry, chain, bydst) {
1068 		err = xfrm_policy_match(pol, fl, type, family, dir);
1069 		if (err) {
1070 			if (err == -ESRCH)
1071 				continue;
1072 			else {
1073 				ret = ERR_PTR(err);
1074 				goto fail;
1075 			}
1076 		} else if (pol->priority < priority) {
1077 			ret = pol;
1078 			break;
1079 		}
1080 	}
1081 	if (ret)
1082 		xfrm_pol_hold(ret);
1083 fail:
1084 	read_unlock_bh(&xfrm_policy_lock);
1085 
1086 	return ret;
1087 }
1088 
1089 static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
1090 			       void **objp, atomic_t **obj_refp)
1091 {
1092 	struct xfrm_policy *pol;
1093 	int err = 0;
1094 
1095 #ifdef CONFIG_XFRM_SUB_POLICY
1096 	pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir);
1097 	if (IS_ERR(pol)) {
1098 		err = PTR_ERR(pol);
1099 		pol = NULL;
1100 	}
1101 	if (pol || err)
1102 		goto end;
1103 #endif
1104 	pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir);
1105 	if (IS_ERR(pol)) {
1106 		err = PTR_ERR(pol);
1107 		pol = NULL;
1108 	}
1109 #ifdef CONFIG_XFRM_SUB_POLICY
1110 end:
1111 #endif
1112 	if ((*objp = (void *) pol) != NULL)
1113 		*obj_refp = &pol->refcnt;
1114 	return err;
1115 }
1116 
1117 static inline int policy_to_flow_dir(int dir)
1118 {
1119 	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1120 	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1121 	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
1122 		return dir;
1123 	switch (dir) {
1124 	default:
1125 	case XFRM_POLICY_IN:
1126 		return FLOW_DIR_IN;
1127 	case XFRM_POLICY_OUT:
1128 		return FLOW_DIR_OUT;
1129 	case XFRM_POLICY_FWD:
1130 		return FLOW_DIR_FWD;
1131 	}
1132 }
1133 
1134 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
1135 {
1136 	struct xfrm_policy *pol;
1137 
1138 	read_lock_bh(&xfrm_policy_lock);
1139 	if ((pol = sk->sk_policy[dir]) != NULL) {
1140 		int match = xfrm_selector_match(&pol->selector, fl,
1141 						sk->sk_family);
1142 		int err = 0;
1143 
1144 		if (match) {
1145 			err = security_xfrm_policy_lookup(pol, fl->secid,
1146 					policy_to_flow_dir(dir));
1147 			if (!err)
1148 				xfrm_pol_hold(pol);
1149 			else if (err == -ESRCH)
1150 				pol = NULL;
1151 			else
1152 				pol = ERR_PTR(err);
1153 		} else
1154 			pol = NULL;
1155 	}
1156 	read_unlock_bh(&xfrm_policy_lock);
1157 	return pol;
1158 }
1159 
1160 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
1161 {
1162 	struct hlist_head *chain = policy_hash_bysel(&pol->selector,
1163 						     pol->family, dir);
1164 
1165 	hlist_add_head(&pol->bydst, chain);
1166 	hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index));
1167 	xfrm_policy_count[dir]++;
1168 	xfrm_pol_hold(pol);
1169 
1170 	if (xfrm_bydst_should_resize(dir, NULL))
1171 		schedule_work(&xfrm_hash_work);
1172 }
1173 
1174 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1175 						int dir)
1176 {
1177 	if (hlist_unhashed(&pol->bydst))
1178 		return NULL;
1179 
1180 	hlist_del(&pol->bydst);
1181 	hlist_del(&pol->byidx);
1182 	xfrm_policy_count[dir]--;
1183 
1184 	return pol;
1185 }
1186 
1187 int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1188 {
1189 	write_lock_bh(&xfrm_policy_lock);
1190 	pol = __xfrm_policy_unlink(pol, dir);
1191 	write_unlock_bh(&xfrm_policy_lock);
1192 	if (pol) {
1193 		if (dir < XFRM_POLICY_MAX)
1194 			atomic_inc(&flow_cache_genid);
1195 		xfrm_policy_kill(pol);
1196 		return 0;
1197 	}
1198 	return -ENOENT;
1199 }
1200 EXPORT_SYMBOL(xfrm_policy_delete);
1201 
1202 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1203 {
1204 	struct xfrm_policy *old_pol;
1205 
1206 #ifdef CONFIG_XFRM_SUB_POLICY
1207 	if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1208 		return -EINVAL;
1209 #endif
1210 
1211 	write_lock_bh(&xfrm_policy_lock);
1212 	old_pol = sk->sk_policy[dir];
1213 	sk->sk_policy[dir] = pol;
1214 	if (pol) {
1215 		pol->curlft.add_time = get_seconds();
1216 		pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir);
1217 		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
1218 	}
1219 	if (old_pol)
1220 		__xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
1221 	write_unlock_bh(&xfrm_policy_lock);
1222 
1223 	if (old_pol) {
1224 		xfrm_policy_kill(old_pol);
1225 	}
1226 	return 0;
1227 }
1228 
1229 static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
1230 {
1231 	struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
1232 
1233 	if (newp) {
1234 		newp->selector = old->selector;
1235 		if (security_xfrm_policy_clone(old, newp)) {
1236 			kfree(newp);
1237 			return NULL;  /* ENOMEM */
1238 		}
1239 		newp->lft = old->lft;
1240 		newp->curlft = old->curlft;
1241 		newp->action = old->action;
1242 		newp->flags = old->flags;
1243 		newp->xfrm_nr = old->xfrm_nr;
1244 		newp->index = old->index;
1245 		newp->type = old->type;
1246 		memcpy(newp->xfrm_vec, old->xfrm_vec,
1247 		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1248 		write_lock_bh(&xfrm_policy_lock);
1249 		__xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
1250 		write_unlock_bh(&xfrm_policy_lock);
1251 		xfrm_pol_put(newp);
1252 	}
1253 	return newp;
1254 }
1255 
1256 int __xfrm_sk_clone_policy(struct sock *sk)
1257 {
1258 	struct xfrm_policy *p0 = sk->sk_policy[0],
1259 			   *p1 = sk->sk_policy[1];
1260 
1261 	sk->sk_policy[0] = sk->sk_policy[1] = NULL;
1262 	if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
1263 		return -ENOMEM;
1264 	if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
1265 		return -ENOMEM;
1266 	return 0;
1267 }
1268 
1269 static int
1270 xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote,
1271 	       unsigned short family)
1272 {
1273 	int err;
1274 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1275 
1276 	if (unlikely(afinfo == NULL))
1277 		return -EINVAL;
1278 	err = afinfo->get_saddr(local, remote);
1279 	xfrm_policy_put_afinfo(afinfo);
1280 	return err;
1281 }
1282 
1283 /* Resolve list of templates for the flow, given policy. */
1284 
1285 static int
1286 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
1287 		      struct xfrm_state **xfrm,
1288 		      unsigned short family)
1289 {
1290 	int nx;
1291 	int i, error;
1292 	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1293 	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1294 	xfrm_address_t tmp;
1295 
1296 	for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
1297 		struct xfrm_state *x;
1298 		xfrm_address_t *remote = daddr;
1299 		xfrm_address_t *local  = saddr;
1300 		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1301 
1302 		if (tmpl->mode == XFRM_MODE_TUNNEL) {
1303 			remote = &tmpl->id.daddr;
1304 			local = &tmpl->saddr;
1305 			family = tmpl->encap_family;
1306 			if (xfrm_addr_any(local, family)) {
1307 				error = xfrm_get_saddr(&tmp, remote, family);
1308 				if (error)
1309 					goto fail;
1310 				local = &tmp;
1311 			}
1312 		}
1313 
1314 		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1315 
1316 		if (x && x->km.state == XFRM_STATE_VALID) {
1317 			xfrm[nx++] = x;
1318 			daddr = remote;
1319 			saddr = local;
1320 			continue;
1321 		}
1322 		if (x) {
1323 			error = (x->km.state == XFRM_STATE_ERROR ?
1324 				 -EINVAL : -EAGAIN);
1325 			xfrm_state_put(x);
1326 		}
1327 
1328 		if (!tmpl->optional)
1329 			goto fail;
1330 	}
1331 	return nx;
1332 
1333 fail:
1334 	for (nx--; nx>=0; nx--)
1335 		xfrm_state_put(xfrm[nx]);
1336 	return error;
1337 }
1338 
1339 static int
1340 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
1341 		  struct xfrm_state **xfrm,
1342 		  unsigned short family)
1343 {
1344 	struct xfrm_state *tp[XFRM_MAX_DEPTH];
1345 	struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1346 	int cnx = 0;
1347 	int error;
1348 	int ret;
1349 	int i;
1350 
1351 	for (i = 0; i < npols; i++) {
1352 		if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1353 			error = -ENOBUFS;
1354 			goto fail;
1355 		}
1356 
1357 		ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1358 		if (ret < 0) {
1359 			error = ret;
1360 			goto fail;
1361 		} else
1362 			cnx += ret;
1363 	}
1364 
1365 	/* found states are sorted for outbound processing */
1366 	if (npols > 1)
1367 		xfrm_state_sort(xfrm, tpp, cnx, family);
1368 
1369 	return cnx;
1370 
1371  fail:
1372 	for (cnx--; cnx>=0; cnx--)
1373 		xfrm_state_put(tpp[cnx]);
1374 	return error;
1375 
1376 }
1377 
1378 /* Check that the bundle accepts the flow and its components are
1379  * still valid.
1380  */
1381 
1382 static struct dst_entry *
1383 xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
1384 {
1385 	struct dst_entry *x;
1386 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1387 	if (unlikely(afinfo == NULL))
1388 		return ERR_PTR(-EINVAL);
1389 	x = afinfo->find_bundle(fl, policy);
1390 	xfrm_policy_put_afinfo(afinfo);
1391 	return x;
1392 }
1393 
1394 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
1395  * all the metrics... Shortly, bundle a bundle.
1396  */
1397 
1398 static int
1399 xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
1400 		   struct flowi *fl, struct dst_entry **dst_p,
1401 		   unsigned short family)
1402 {
1403 	int err;
1404 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1405 	if (unlikely(afinfo == NULL))
1406 		return -EINVAL;
1407 	err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p);
1408 	xfrm_policy_put_afinfo(afinfo);
1409 	return err;
1410 }
1411 
1412 static int inline
1413 xfrm_dst_alloc_copy(void **target, void *src, int size)
1414 {
1415 	if (!*target) {
1416 		*target = kmalloc(size, GFP_ATOMIC);
1417 		if (!*target)
1418 			return -ENOMEM;
1419 	}
1420 	memcpy(*target, src, size);
1421 	return 0;
1422 }
1423 
1424 static int inline
1425 xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel)
1426 {
1427 #ifdef CONFIG_XFRM_SUB_POLICY
1428 	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1429 	return xfrm_dst_alloc_copy((void **)&(xdst->partner),
1430 				   sel, sizeof(*sel));
1431 #else
1432 	return 0;
1433 #endif
1434 }
1435 
1436 static int inline
1437 xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
1438 {
1439 #ifdef CONFIG_XFRM_SUB_POLICY
1440 	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1441 	return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
1442 #else
1443 	return 0;
1444 #endif
1445 }
1446 
1447 static int stale_bundle(struct dst_entry *dst);
1448 
1449 /* Main function: finds/creates a bundle for given flow.
1450  *
1451  * At the moment we eat a raw IP route. Mostly to speed up lookups
1452  * on interfaces with disabled IPsec.
1453  */
1454 int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1455 		  struct sock *sk, int flags)
1456 {
1457 	struct xfrm_policy *policy;
1458 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1459 	int npols;
1460 	int pol_dead;
1461 	int xfrm_nr;
1462 	int pi;
1463 	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1464 	struct dst_entry *dst, *dst_orig = *dst_p;
1465 	int nx = 0;
1466 	int err;
1467 	u32 genid;
1468 	u16 family;
1469 	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
1470 
1471 restart:
1472 	genid = atomic_read(&flow_cache_genid);
1473 	policy = NULL;
1474 	for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
1475 		pols[pi] = NULL;
1476 	npols = 0;
1477 	pol_dead = 0;
1478 	xfrm_nr = 0;
1479 
1480 	if (sk && sk->sk_policy[1]) {
1481 		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
1482 		if (IS_ERR(policy))
1483 			return PTR_ERR(policy);
1484 	}
1485 
1486 	if (!policy) {
1487 		/* To accelerate a bit...  */
1488 		if ((dst_orig->flags & DST_NOXFRM) ||
1489 		    !xfrm_policy_count[XFRM_POLICY_OUT])
1490 			return 0;
1491 
1492 		policy = flow_cache_lookup(fl, dst_orig->ops->family,
1493 					   dir, xfrm_policy_lookup);
1494 		if (IS_ERR(policy))
1495 			return PTR_ERR(policy);
1496 	}
1497 
1498 	if (!policy)
1499 		return 0;
1500 
1501 	family = dst_orig->ops->family;
1502 	policy->curlft.use_time = get_seconds();
1503 	pols[0] = policy;
1504 	npols ++;
1505 	xfrm_nr += pols[0]->xfrm_nr;
1506 
1507 	switch (policy->action) {
1508 	case XFRM_POLICY_BLOCK:
1509 		/* Prohibit the flow */
1510 		err = -EPERM;
1511 		goto error;
1512 
1513 	case XFRM_POLICY_ALLOW:
1514 #ifndef CONFIG_XFRM_SUB_POLICY
1515 		if (policy->xfrm_nr == 0) {
1516 			/* Flow passes not transformed. */
1517 			xfrm_pol_put(policy);
1518 			return 0;
1519 		}
1520 #endif
1521 
1522 		/* Try to find matching bundle.
1523 		 *
1524 		 * LATER: help from flow cache. It is optional, this
1525 		 * is required only for output policy.
1526 		 */
1527 		dst = xfrm_find_bundle(fl, policy, family);
1528 		if (IS_ERR(dst)) {
1529 			err = PTR_ERR(dst);
1530 			goto error;
1531 		}
1532 
1533 		if (dst)
1534 			break;
1535 
1536 #ifdef CONFIG_XFRM_SUB_POLICY
1537 		if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1538 			pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1539 							    fl, family,
1540 							    XFRM_POLICY_OUT);
1541 			if (pols[1]) {
1542 				if (IS_ERR(pols[1])) {
1543 					err = PTR_ERR(pols[1]);
1544 					goto error;
1545 				}
1546 				if (pols[1]->action == XFRM_POLICY_BLOCK) {
1547 					err = -EPERM;
1548 					goto error;
1549 				}
1550 				npols ++;
1551 				xfrm_nr += pols[1]->xfrm_nr;
1552 			}
1553 		}
1554 
1555 		/*
1556 		 * Because neither flowi nor bundle information knows about
1557 		 * transformation template size. On more than one policy usage
1558 		 * we can realize whether all of them is bypass or not after
1559 		 * they are searched. See above not-transformed bypass
1560 		 * is surrounded by non-sub policy configuration, too.
1561 		 */
1562 		if (xfrm_nr == 0) {
1563 			/* Flow passes not transformed. */
1564 			xfrm_pols_put(pols, npols);
1565 			return 0;
1566 		}
1567 
1568 #endif
1569 		nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1570 
1571 		if (unlikely(nx<0)) {
1572 			err = nx;
1573 			if (err == -EAGAIN && sysctl_xfrm_larval_drop) {
1574 				/* EREMOTE tells the caller to generate
1575 				 * a one-shot blackhole route.
1576 				 */
1577 				xfrm_pol_put(policy);
1578 				return -EREMOTE;
1579 			}
1580 			if (err == -EAGAIN && flags) {
1581 				DECLARE_WAITQUEUE(wait, current);
1582 
1583 				add_wait_queue(&km_waitq, &wait);
1584 				set_current_state(TASK_INTERRUPTIBLE);
1585 				schedule();
1586 				set_current_state(TASK_RUNNING);
1587 				remove_wait_queue(&km_waitq, &wait);
1588 
1589 				nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1590 
1591 				if (nx == -EAGAIN && signal_pending(current)) {
1592 					err = -ERESTART;
1593 					goto error;
1594 				}
1595 				if (nx == -EAGAIN ||
1596 				    genid != atomic_read(&flow_cache_genid)) {
1597 					xfrm_pols_put(pols, npols);
1598 					goto restart;
1599 				}
1600 				err = nx;
1601 			}
1602 			if (err < 0)
1603 				goto error;
1604 		}
1605 		if (nx == 0) {
1606 			/* Flow passes not transformed. */
1607 			xfrm_pols_put(pols, npols);
1608 			return 0;
1609 		}
1610 
1611 		dst = dst_orig;
1612 		err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family);
1613 
1614 		if (unlikely(err)) {
1615 			int i;
1616 			for (i=0; i<nx; i++)
1617 				xfrm_state_put(xfrm[i]);
1618 			goto error;
1619 		}
1620 
1621 		for (pi = 0; pi < npols; pi++) {
1622 			read_lock_bh(&pols[pi]->lock);
1623 			pol_dead |= pols[pi]->dead;
1624 			read_unlock_bh(&pols[pi]->lock);
1625 		}
1626 
1627 		write_lock_bh(&policy->lock);
1628 		if (unlikely(pol_dead || stale_bundle(dst))) {
1629 			/* Wow! While we worked on resolving, this
1630 			 * policy has gone. Retry. It is not paranoia,
1631 			 * we just cannot enlist new bundle to dead object.
1632 			 * We can't enlist stable bundles either.
1633 			 */
1634 			write_unlock_bh(&policy->lock);
1635 			if (dst)
1636 				dst_free(dst);
1637 
1638 			err = -EHOSTUNREACH;
1639 			goto error;
1640 		}
1641 
1642 		if (npols > 1)
1643 			err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1644 		else
1645 			err = xfrm_dst_update_origin(dst, fl);
1646 		if (unlikely(err)) {
1647 			write_unlock_bh(&policy->lock);
1648 			if (dst)
1649 				dst_free(dst);
1650 			goto error;
1651 		}
1652 
1653 		dst->next = policy->bundles;
1654 		policy->bundles = dst;
1655 		dst_hold(dst);
1656 		write_unlock_bh(&policy->lock);
1657 	}
1658 	*dst_p = dst;
1659 	dst_release(dst_orig);
1660 	xfrm_pols_put(pols, npols);
1661 	return 0;
1662 
1663 error:
1664 	dst_release(dst_orig);
1665 	xfrm_pols_put(pols, npols);
1666 	*dst_p = NULL;
1667 	return err;
1668 }
1669 EXPORT_SYMBOL(__xfrm_lookup);
1670 
1671 int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1672 		struct sock *sk, int flags)
1673 {
1674 	int err = __xfrm_lookup(dst_p, fl, sk, flags);
1675 
1676 	if (err == -EREMOTE) {
1677 		dst_release(*dst_p);
1678 		*dst_p = NULL;
1679 		err = -EAGAIN;
1680 	}
1681 
1682 	return err;
1683 }
1684 EXPORT_SYMBOL(xfrm_lookup);
1685 
1686 static inline int
1687 xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)
1688 {
1689 	struct xfrm_state *x;
1690 	int err;
1691 
1692 	if (!skb->sp || idx < 0 || idx >= skb->sp->len)
1693 		return 0;
1694 	x = skb->sp->xvec[idx];
1695 	if (!x->type->reject)
1696 		return 0;
1697 	xfrm_state_hold(x);
1698 	err = x->type->reject(x, skb, fl);
1699 	xfrm_state_put(x);
1700 	return err;
1701 }
1702 
1703 /* When skb is transformed back to its "native" form, we have to
1704  * check policy restrictions. At the moment we make this in maximally
1705  * stupid way. Shame on me. :-) Of course, connected sockets must
1706  * have policy cached at them.
1707  */
1708 
1709 static inline int
1710 xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
1711 	      unsigned short family)
1712 {
1713 	if (xfrm_state_kern(x))
1714 		return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
1715 	return	x->id.proto == tmpl->id.proto &&
1716 		(x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
1717 		(x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
1718 		x->props.mode == tmpl->mode &&
1719 		((tmpl->aalgos & (1<<x->props.aalgo)) ||
1720 		 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
1721 		!(x->props.mode != XFRM_MODE_TRANSPORT &&
1722 		  xfrm_state_addr_cmp(tmpl, x, family));
1723 }
1724 
1725 /*
1726  * 0 or more than 0 is returned when validation is succeeded (either bypass
1727  * because of optional transport mode, or next index of the mathced secpath
1728  * state with the template.
1729  * -1 is returned when no matching template is found.
1730  * Otherwise "-2 - errored_index" is returned.
1731  */
1732 static inline int
1733 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
1734 	       unsigned short family)
1735 {
1736 	int idx = start;
1737 
1738 	if (tmpl->optional) {
1739 		if (tmpl->mode == XFRM_MODE_TRANSPORT)
1740 			return start;
1741 	} else
1742 		start = -1;
1743 	for (; idx < sp->len; idx++) {
1744 		if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
1745 			return ++idx;
1746 		if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
1747 			if (start == -1)
1748 				start = -2-idx;
1749 			break;
1750 		}
1751 	}
1752 	return start;
1753 }
1754 
1755 int
1756 xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
1757 {
1758 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1759 	int err;
1760 
1761 	if (unlikely(afinfo == NULL))
1762 		return -EAFNOSUPPORT;
1763 
1764 	afinfo->decode_session(skb, fl);
1765 	err = security_xfrm_decode_session(skb, &fl->secid);
1766 	xfrm_policy_put_afinfo(afinfo);
1767 	return err;
1768 }
1769 EXPORT_SYMBOL(xfrm_decode_session);
1770 
1771 static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
1772 {
1773 	for (; k < sp->len; k++) {
1774 		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
1775 			*idxp = k;
1776 			return 1;
1777 		}
1778 	}
1779 
1780 	return 0;
1781 }
1782 
1783 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1784 			unsigned short family)
1785 {
1786 	struct xfrm_policy *pol;
1787 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1788 	int npols = 0;
1789 	int xfrm_nr;
1790 	int pi;
1791 	struct flowi fl;
1792 	u8 fl_dir = policy_to_flow_dir(dir);
1793 	int xerr_idx = -1;
1794 
1795 	if (xfrm_decode_session(skb, &fl, family) < 0)
1796 		return 0;
1797 	nf_nat_decode_session(skb, &fl, family);
1798 
1799 	/* First, check used SA against their selectors. */
1800 	if (skb->sp) {
1801 		int i;
1802 
1803 		for (i=skb->sp->len-1; i>=0; i--) {
1804 			struct xfrm_state *x = skb->sp->xvec[i];
1805 			if (!xfrm_selector_match(&x->sel, &fl, family))
1806 				return 0;
1807 		}
1808 	}
1809 
1810 	pol = NULL;
1811 	if (sk && sk->sk_policy[dir]) {
1812 		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
1813 		if (IS_ERR(pol))
1814 			return 0;
1815 	}
1816 
1817 	if (!pol)
1818 		pol = flow_cache_lookup(&fl, family, fl_dir,
1819 					xfrm_policy_lookup);
1820 
1821 	if (IS_ERR(pol))
1822 		return 0;
1823 
1824 	if (!pol) {
1825 		if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
1826 			xfrm_secpath_reject(xerr_idx, skb, &fl);
1827 			return 0;
1828 		}
1829 		return 1;
1830 	}
1831 
1832 	pol->curlft.use_time = get_seconds();
1833 
1834 	pols[0] = pol;
1835 	npols ++;
1836 #ifdef CONFIG_XFRM_SUB_POLICY
1837 	if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1838 		pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1839 						    &fl, family,
1840 						    XFRM_POLICY_IN);
1841 		if (pols[1]) {
1842 			if (IS_ERR(pols[1]))
1843 				return 0;
1844 			pols[1]->curlft.use_time = get_seconds();
1845 			npols ++;
1846 		}
1847 	}
1848 #endif
1849 
1850 	if (pol->action == XFRM_POLICY_ALLOW) {
1851 		struct sec_path *sp;
1852 		static struct sec_path dummy;
1853 		struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
1854 		struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
1855 		struct xfrm_tmpl **tpp = tp;
1856 		int ti = 0;
1857 		int i, k;
1858 
1859 		if ((sp = skb->sp) == NULL)
1860 			sp = &dummy;
1861 
1862 		for (pi = 0; pi < npols; pi++) {
1863 			if (pols[pi] != pol &&
1864 			    pols[pi]->action != XFRM_POLICY_ALLOW)
1865 				goto reject;
1866 			if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH)
1867 				goto reject_error;
1868 			for (i = 0; i < pols[pi]->xfrm_nr; i++)
1869 				tpp[ti++] = &pols[pi]->xfrm_vec[i];
1870 		}
1871 		xfrm_nr = ti;
1872 		if (npols > 1) {
1873 			xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
1874 			tpp = stp;
1875 		}
1876 
1877 		/* For each tunnel xfrm, find the first matching tmpl.
1878 		 * For each tmpl before that, find corresponding xfrm.
1879 		 * Order is _important_. Later we will implement
1880 		 * some barriers, but at the moment barriers
1881 		 * are implied between each two transformations.
1882 		 */
1883 		for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
1884 			k = xfrm_policy_ok(tpp[i], sp, k, family);
1885 			if (k < 0) {
1886 				if (k < -1)
1887 					/* "-2 - errored_index" returned */
1888 					xerr_idx = -(2+k);
1889 				goto reject;
1890 			}
1891 		}
1892 
1893 		if (secpath_has_nontransport(sp, k, &xerr_idx))
1894 			goto reject;
1895 
1896 		xfrm_pols_put(pols, npols);
1897 		return 1;
1898 	}
1899 
1900 reject:
1901 	xfrm_secpath_reject(xerr_idx, skb, &fl);
1902 reject_error:
1903 	xfrm_pols_put(pols, npols);
1904 	return 0;
1905 }
1906 EXPORT_SYMBOL(__xfrm_policy_check);
1907 
1908 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
1909 {
1910 	struct flowi fl;
1911 
1912 	if (xfrm_decode_session(skb, &fl, family) < 0)
1913 		return 0;
1914 
1915 	return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
1916 }
1917 EXPORT_SYMBOL(__xfrm_route_forward);
1918 
1919 /* Optimize later using cookies and generation ids. */
1920 
1921 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
1922 {
1923 	/* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
1924 	 * to "-1" to force all XFRM destinations to get validated by
1925 	 * dst_ops->check on every use.  We do this because when a
1926 	 * normal route referenced by an XFRM dst is obsoleted we do
1927 	 * not go looking around for all parent referencing XFRM dsts
1928 	 * so that we can invalidate them.  It is just too much work.
1929 	 * Instead we make the checks here on every use.  For example:
1930 	 *
1931 	 *	XFRM dst A --> IPv4 dst X
1932 	 *
1933 	 * X is the "xdst->route" of A (X is also the "dst->path" of A
1934 	 * in this example).  If X is marked obsolete, "A" will not
1935 	 * notice.  That's what we are validating here via the
1936 	 * stale_bundle() check.
1937 	 *
1938 	 * When a policy's bundle is pruned, we dst_free() the XFRM
1939 	 * dst which causes it's ->obsolete field to be set to a
1940 	 * positive non-zero integer.  If an XFRM dst has been pruned
1941 	 * like this, we want to force a new route lookup.
1942 	 */
1943 	if (dst->obsolete < 0 && !stale_bundle(dst))
1944 		return dst;
1945 
1946 	return NULL;
1947 }
1948 
1949 static int stale_bundle(struct dst_entry *dst)
1950 {
1951 	return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);
1952 }
1953 
1954 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
1955 {
1956 	while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
1957 		dst->dev = &loopback_dev;
1958 		dev_hold(&loopback_dev);
1959 		dev_put(dev);
1960 	}
1961 }
1962 EXPORT_SYMBOL(xfrm_dst_ifdown);
1963 
1964 static void xfrm_link_failure(struct sk_buff *skb)
1965 {
1966 	/* Impossible. Such dst must be popped before reaches point of failure. */
1967 	return;
1968 }
1969 
1970 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
1971 {
1972 	if (dst) {
1973 		if (dst->obsolete) {
1974 			dst_release(dst);
1975 			dst = NULL;
1976 		}
1977 	}
1978 	return dst;
1979 }
1980 
1981 static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
1982 {
1983 	struct dst_entry *dst, **dstp;
1984 
1985 	write_lock(&pol->lock);
1986 	dstp = &pol->bundles;
1987 	while ((dst=*dstp) != NULL) {
1988 		if (func(dst)) {
1989 			*dstp = dst->next;
1990 			dst->next = *gc_list_p;
1991 			*gc_list_p = dst;
1992 		} else {
1993 			dstp = &dst->next;
1994 		}
1995 	}
1996 	write_unlock(&pol->lock);
1997 }
1998 
1999 static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
2000 {
2001 	struct dst_entry *gc_list = NULL;
2002 	int dir;
2003 
2004 	read_lock_bh(&xfrm_policy_lock);
2005 	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2006 		struct xfrm_policy *pol;
2007 		struct hlist_node *entry;
2008 		struct hlist_head *table;
2009 		int i;
2010 
2011 		hlist_for_each_entry(pol, entry,
2012 				     &xfrm_policy_inexact[dir], bydst)
2013 			prune_one_bundle(pol, func, &gc_list);
2014 
2015 		table = xfrm_policy_bydst[dir].table;
2016 		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
2017 			hlist_for_each_entry(pol, entry, table + i, bydst)
2018 				prune_one_bundle(pol, func, &gc_list);
2019 		}
2020 	}
2021 	read_unlock_bh(&xfrm_policy_lock);
2022 
2023 	while (gc_list) {
2024 		struct dst_entry *dst = gc_list;
2025 		gc_list = dst->next;
2026 		dst_free(dst);
2027 	}
2028 }
2029 
2030 static int unused_bundle(struct dst_entry *dst)
2031 {
2032 	return !atomic_read(&dst->__refcnt);
2033 }
2034 
2035 static void __xfrm_garbage_collect(void)
2036 {
2037 	xfrm_prune_bundles(unused_bundle);
2038 }
2039 
2040 static int xfrm_flush_bundles(void)
2041 {
2042 	xfrm_prune_bundles(stale_bundle);
2043 	return 0;
2044 }
2045 
2046 void xfrm_init_pmtu(struct dst_entry *dst)
2047 {
2048 	do {
2049 		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2050 		u32 pmtu, route_mtu_cached;
2051 
2052 		pmtu = dst_mtu(dst->child);
2053 		xdst->child_mtu_cached = pmtu;
2054 
2055 		pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
2056 
2057 		route_mtu_cached = dst_mtu(xdst->route);
2058 		xdst->route_mtu_cached = route_mtu_cached;
2059 
2060 		if (pmtu > route_mtu_cached)
2061 			pmtu = route_mtu_cached;
2062 
2063 		dst->metrics[RTAX_MTU-1] = pmtu;
2064 	} while ((dst = dst->next));
2065 }
2066 
2067 EXPORT_SYMBOL(xfrm_init_pmtu);
2068 
2069 /* Check that the bundle accepts the flow and its components are
2070  * still valid.
2071  */
2072 
2073 int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2074 		struct flowi *fl, int family, int strict)
2075 {
2076 	struct dst_entry *dst = &first->u.dst;
2077 	struct xfrm_dst *last;
2078 	u32 mtu;
2079 
2080 	if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
2081 	    (dst->dev && !netif_running(dst->dev)))
2082 		return 0;
2083 #ifdef CONFIG_XFRM_SUB_POLICY
2084 	if (fl) {
2085 		if (first->origin && !flow_cache_uli_match(first->origin, fl))
2086 			return 0;
2087 		if (first->partner &&
2088 		    !xfrm_selector_match(first->partner, fl, family))
2089 			return 0;
2090 	}
2091 #endif
2092 
2093 	last = NULL;
2094 
2095 	do {
2096 		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2097 
2098 		if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
2099 			return 0;
2100 		if (fl && pol &&
2101 		    !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl))
2102 			return 0;
2103 		if (dst->xfrm->km.state != XFRM_STATE_VALID)
2104 			return 0;
2105 		if (xdst->genid != dst->xfrm->genid)
2106 			return 0;
2107 
2108 		if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL &&
2109 		    !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
2110 			return 0;
2111 
2112 		mtu = dst_mtu(dst->child);
2113 		if (xdst->child_mtu_cached != mtu) {
2114 			last = xdst;
2115 			xdst->child_mtu_cached = mtu;
2116 		}
2117 
2118 		if (!dst_check(xdst->route, xdst->route_cookie))
2119 			return 0;
2120 		mtu = dst_mtu(xdst->route);
2121 		if (xdst->route_mtu_cached != mtu) {
2122 			last = xdst;
2123 			xdst->route_mtu_cached = mtu;
2124 		}
2125 
2126 		dst = dst->child;
2127 	} while (dst->xfrm);
2128 
2129 	if (likely(!last))
2130 		return 1;
2131 
2132 	mtu = last->child_mtu_cached;
2133 	for (;;) {
2134 		dst = &last->u.dst;
2135 
2136 		mtu = xfrm_state_mtu(dst->xfrm, mtu);
2137 		if (mtu > last->route_mtu_cached)
2138 			mtu = last->route_mtu_cached;
2139 		dst->metrics[RTAX_MTU-1] = mtu;
2140 
2141 		if (last == first)
2142 			break;
2143 
2144 		last = last->u.next;
2145 		last->child_mtu_cached = mtu;
2146 	}
2147 
2148 	return 1;
2149 }
2150 
2151 EXPORT_SYMBOL(xfrm_bundle_ok);
2152 
2153 #ifdef CONFIG_AUDITSYSCALL
2154 /* Audit addition and deletion of SAs and ipsec policy */
2155 
2156 void xfrm_audit_log(uid_t auid, u32 sid, int type, int result,
2157 		    struct xfrm_policy *xp, struct xfrm_state *x)
2158 {
2159 
2160 	char *secctx;
2161 	u32 secctx_len;
2162 	struct xfrm_sec_ctx *sctx = NULL;
2163 	struct audit_buffer *audit_buf;
2164 	int family;
2165 	extern int audit_enabled;
2166 
2167 	if (audit_enabled == 0)
2168 		return;
2169 
2170 	BUG_ON((type == AUDIT_MAC_IPSEC_ADDSA ||
2171 		type == AUDIT_MAC_IPSEC_DELSA) && !x);
2172 	BUG_ON((type == AUDIT_MAC_IPSEC_ADDSPD ||
2173 		type == AUDIT_MAC_IPSEC_DELSPD) && !xp);
2174 
2175 	audit_buf = audit_log_start(current->audit_context, GFP_ATOMIC, type);
2176 	if (audit_buf == NULL)
2177 		return;
2178 
2179 	switch(type) {
2180 	case AUDIT_MAC_IPSEC_ADDSA:
2181 		audit_log_format(audit_buf, "SAD add: auid=%u", auid);
2182 		break;
2183 	case AUDIT_MAC_IPSEC_DELSA:
2184 		audit_log_format(audit_buf, "SAD delete: auid=%u", auid);
2185 		break;
2186 	case AUDIT_MAC_IPSEC_ADDSPD:
2187 		audit_log_format(audit_buf, "SPD add: auid=%u", auid);
2188 		break;
2189 	case AUDIT_MAC_IPSEC_DELSPD:
2190 		audit_log_format(audit_buf, "SPD delete: auid=%u", auid);
2191 		break;
2192 	default:
2193 		return;
2194 	}
2195 
2196 	if (sid != 0 &&
2197 		security_secid_to_secctx(sid, &secctx, &secctx_len) == 0)
2198 		audit_log_format(audit_buf, " subj=%s", secctx);
2199 	else
2200 		audit_log_task_context(audit_buf);
2201 
2202 	if (xp) {
2203 		family = xp->selector.family;
2204 		if (xp->security)
2205 			sctx = xp->security;
2206 	} else {
2207 		family = x->props.family;
2208 		if (x->security)
2209 			sctx = x->security;
2210 	}
2211 
2212 	if (sctx)
2213 		audit_log_format(audit_buf,
2214 				" sec_alg=%u sec_doi=%u sec_obj=%s",
2215 				sctx->ctx_alg, sctx->ctx_doi, sctx->ctx_str);
2216 
2217 	switch(family) {
2218 	case AF_INET:
2219 		{
2220 			struct in_addr saddr, daddr;
2221 			if (xp) {
2222 				saddr.s_addr = xp->selector.saddr.a4;
2223 				daddr.s_addr = xp->selector.daddr.a4;
2224 			} else {
2225 				saddr.s_addr = x->props.saddr.a4;
2226 				daddr.s_addr = x->id.daddr.a4;
2227 			}
2228 			audit_log_format(audit_buf,
2229 					 " src=%u.%u.%u.%u dst=%u.%u.%u.%u",
2230 					 NIPQUAD(saddr), NIPQUAD(daddr));
2231 		}
2232 			break;
2233 	case AF_INET6:
2234 		{
2235 			struct in6_addr saddr6, daddr6;
2236 			if (xp) {
2237 				memcpy(&saddr6, xp->selector.saddr.a6,
2238 					sizeof(struct in6_addr));
2239 				memcpy(&daddr6, xp->selector.daddr.a6,
2240 					sizeof(struct in6_addr));
2241 			} else {
2242 				memcpy(&saddr6, x->props.saddr.a6,
2243 					sizeof(struct in6_addr));
2244 				memcpy(&daddr6, x->id.daddr.a6,
2245 					sizeof(struct in6_addr));
2246 			}
2247 			audit_log_format(audit_buf,
2248 					 " src=" NIP6_FMT " dst=" NIP6_FMT,
2249 					 NIP6(saddr6), NIP6(daddr6));
2250 		}
2251 		break;
2252 	}
2253 
2254 	if (x)
2255 		audit_log_format(audit_buf, " spi=%lu(0x%lx) protocol=%s",
2256 				(unsigned long)ntohl(x->id.spi),
2257 				(unsigned long)ntohl(x->id.spi),
2258 				x->id.proto == IPPROTO_AH ? "AH" :
2259 				(x->id.proto == IPPROTO_ESP ?
2260 				"ESP" : "IPCOMP"));
2261 
2262 	audit_log_format(audit_buf, " res=%u", result);
2263 	audit_log_end(audit_buf);
2264 }
2265 
2266 EXPORT_SYMBOL(xfrm_audit_log);
2267 #endif /* CONFIG_AUDITSYSCALL */
2268 
2269 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2270 {
2271 	int err = 0;
2272 	if (unlikely(afinfo == NULL))
2273 		return -EINVAL;
2274 	if (unlikely(afinfo->family >= NPROTO))
2275 		return -EAFNOSUPPORT;
2276 	write_lock_bh(&xfrm_policy_afinfo_lock);
2277 	if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2278 		err = -ENOBUFS;
2279 	else {
2280 		struct dst_ops *dst_ops = afinfo->dst_ops;
2281 		if (likely(dst_ops->kmem_cachep == NULL))
2282 			dst_ops->kmem_cachep = xfrm_dst_cache;
2283 		if (likely(dst_ops->check == NULL))
2284 			dst_ops->check = xfrm_dst_check;
2285 		if (likely(dst_ops->negative_advice == NULL))
2286 			dst_ops->negative_advice = xfrm_negative_advice;
2287 		if (likely(dst_ops->link_failure == NULL))
2288 			dst_ops->link_failure = xfrm_link_failure;
2289 		if (likely(afinfo->garbage_collect == NULL))
2290 			afinfo->garbage_collect = __xfrm_garbage_collect;
2291 		xfrm_policy_afinfo[afinfo->family] = afinfo;
2292 	}
2293 	write_unlock_bh(&xfrm_policy_afinfo_lock);
2294 	return err;
2295 }
2296 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
2297 
2298 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
2299 {
2300 	int err = 0;
2301 	if (unlikely(afinfo == NULL))
2302 		return -EINVAL;
2303 	if (unlikely(afinfo->family >= NPROTO))
2304 		return -EAFNOSUPPORT;
2305 	write_lock_bh(&xfrm_policy_afinfo_lock);
2306 	if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
2307 		if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
2308 			err = -EINVAL;
2309 		else {
2310 			struct dst_ops *dst_ops = afinfo->dst_ops;
2311 			xfrm_policy_afinfo[afinfo->family] = NULL;
2312 			dst_ops->kmem_cachep = NULL;
2313 			dst_ops->check = NULL;
2314 			dst_ops->negative_advice = NULL;
2315 			dst_ops->link_failure = NULL;
2316 			afinfo->garbage_collect = NULL;
2317 		}
2318 	}
2319 	write_unlock_bh(&xfrm_policy_afinfo_lock);
2320 	return err;
2321 }
2322 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
2323 
2324 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
2325 {
2326 	struct xfrm_policy_afinfo *afinfo;
2327 	if (unlikely(family >= NPROTO))
2328 		return NULL;
2329 	read_lock(&xfrm_policy_afinfo_lock);
2330 	afinfo = xfrm_policy_afinfo[family];
2331 	if (unlikely(!afinfo))
2332 		read_unlock(&xfrm_policy_afinfo_lock);
2333 	return afinfo;
2334 }
2335 
2336 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
2337 {
2338 	read_unlock(&xfrm_policy_afinfo_lock);
2339 }
2340 
2341 static struct xfrm_policy_afinfo *xfrm_policy_lock_afinfo(unsigned int family)
2342 {
2343 	struct xfrm_policy_afinfo *afinfo;
2344 	if (unlikely(family >= NPROTO))
2345 		return NULL;
2346 	write_lock_bh(&xfrm_policy_afinfo_lock);
2347 	afinfo = xfrm_policy_afinfo[family];
2348 	if (unlikely(!afinfo))
2349 		write_unlock_bh(&xfrm_policy_afinfo_lock);
2350 	return afinfo;
2351 }
2352 
2353 static void xfrm_policy_unlock_afinfo(struct xfrm_policy_afinfo *afinfo)
2354 {
2355 	write_unlock_bh(&xfrm_policy_afinfo_lock);
2356 }
2357 
2358 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2359 {
2360 	switch (event) {
2361 	case NETDEV_DOWN:
2362 		xfrm_flush_bundles();
2363 	}
2364 	return NOTIFY_DONE;
2365 }
2366 
2367 static struct notifier_block xfrm_dev_notifier = {
2368 	xfrm_dev_event,
2369 	NULL,
2370 	0
2371 };
2372 
2373 static void __init xfrm_policy_init(void)
2374 {
2375 	unsigned int hmask, sz;
2376 	int dir;
2377 
2378 	xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
2379 					   sizeof(struct xfrm_dst),
2380 					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2381 					   NULL, NULL);
2382 
2383 	hmask = 8 - 1;
2384 	sz = (hmask+1) * sizeof(struct hlist_head);
2385 
2386 	xfrm_policy_byidx = xfrm_hash_alloc(sz);
2387 	xfrm_idx_hmask = hmask;
2388 	if (!xfrm_policy_byidx)
2389 		panic("XFRM: failed to allocate byidx hash\n");
2390 
2391 	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2392 		struct xfrm_policy_hash *htab;
2393 
2394 		INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]);
2395 
2396 		htab = &xfrm_policy_bydst[dir];
2397 		htab->table = xfrm_hash_alloc(sz);
2398 		htab->hmask = hmask;
2399 		if (!htab->table)
2400 			panic("XFRM: failed to allocate bydst hash\n");
2401 	}
2402 
2403 	INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task);
2404 	register_netdevice_notifier(&xfrm_dev_notifier);
2405 }
2406 
2407 void __init xfrm_init(void)
2408 {
2409 	xfrm_state_init();
2410 	xfrm_policy_init();
2411 	xfrm_input_init();
2412 }
2413 
2414 #ifdef CONFIG_XFRM_MIGRATE
2415 static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp,
2416 				       struct xfrm_selector *sel_tgt)
2417 {
2418 	if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
2419 		if (sel_tgt->family == sel_cmp->family &&
2420 		    xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr,
2421 				  sel_cmp->family) == 0 &&
2422 		    xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr,
2423 				  sel_cmp->family) == 0 &&
2424 		    sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
2425 		    sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
2426 			return 1;
2427 		}
2428 	} else {
2429 		if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
2430 			return 1;
2431 		}
2432 	}
2433 	return 0;
2434 }
2435 
2436 static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel,
2437 						     u8 dir, u8 type)
2438 {
2439 	struct xfrm_policy *pol, *ret = NULL;
2440 	struct hlist_node *entry;
2441 	struct hlist_head *chain;
2442 	u32 priority = ~0U;
2443 
2444 	read_lock_bh(&xfrm_policy_lock);
2445 	chain = policy_hash_direct(&sel->daddr, &sel->saddr, sel->family, dir);
2446 	hlist_for_each_entry(pol, entry, chain, bydst) {
2447 		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2448 		    pol->type == type) {
2449 			ret = pol;
2450 			priority = ret->priority;
2451 			break;
2452 		}
2453 	}
2454 	chain = &xfrm_policy_inexact[dir];
2455 	hlist_for_each_entry(pol, entry, chain, bydst) {
2456 		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2457 		    pol->type == type &&
2458 		    pol->priority < priority) {
2459 			ret = pol;
2460 			break;
2461 		}
2462 	}
2463 
2464 	if (ret)
2465 		xfrm_pol_hold(ret);
2466 
2467 	read_unlock_bh(&xfrm_policy_lock);
2468 
2469 	return ret;
2470 }
2471 
2472 static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t)
2473 {
2474 	int match = 0;
2475 
2476 	if (t->mode == m->mode && t->id.proto == m->proto &&
2477 	    (m->reqid == 0 || t->reqid == m->reqid)) {
2478 		switch (t->mode) {
2479 		case XFRM_MODE_TUNNEL:
2480 		case XFRM_MODE_BEET:
2481 			if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr,
2482 					  m->old_family) == 0 &&
2483 			    xfrm_addr_cmp(&t->saddr, &m->old_saddr,
2484 					  m->old_family) == 0) {
2485 				match = 1;
2486 			}
2487 			break;
2488 		case XFRM_MODE_TRANSPORT:
2489 			/* in case of transport mode, template does not store
2490 			   any IP addresses, hence we just compare mode and
2491 			   protocol */
2492 			match = 1;
2493 			break;
2494 		default:
2495 			break;
2496 		}
2497 	}
2498 	return match;
2499 }
2500 
2501 /* update endpoint address(es) of template(s) */
2502 static int xfrm_policy_migrate(struct xfrm_policy *pol,
2503 			       struct xfrm_migrate *m, int num_migrate)
2504 {
2505 	struct xfrm_migrate *mp;
2506 	struct dst_entry *dst;
2507 	int i, j, n = 0;
2508 
2509 	write_lock_bh(&pol->lock);
2510 	if (unlikely(pol->dead)) {
2511 		/* target policy has been deleted */
2512 		write_unlock_bh(&pol->lock);
2513 		return -ENOENT;
2514 	}
2515 
2516 	for (i = 0; i < pol->xfrm_nr; i++) {
2517 		for (j = 0, mp = m; j < num_migrate; j++, mp++) {
2518 			if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
2519 				continue;
2520 			n++;
2521 			if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL)
2522 				continue;
2523 			/* update endpoints */
2524 			memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
2525 			       sizeof(pol->xfrm_vec[i].id.daddr));
2526 			memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
2527 			       sizeof(pol->xfrm_vec[i].saddr));
2528 			pol->xfrm_vec[i].encap_family = mp->new_family;
2529 			/* flush bundles */
2530 			while ((dst = pol->bundles) != NULL) {
2531 				pol->bundles = dst->next;
2532 				dst_free(dst);
2533 			}
2534 		}
2535 	}
2536 
2537 	write_unlock_bh(&pol->lock);
2538 
2539 	if (!n)
2540 		return -ENODATA;
2541 
2542 	return 0;
2543 }
2544 
2545 static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate)
2546 {
2547 	int i, j;
2548 
2549 	if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
2550 		return -EINVAL;
2551 
2552 	for (i = 0; i < num_migrate; i++) {
2553 		if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr,
2554 				   m[i].old_family) == 0) &&
2555 		    (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr,
2556 				   m[i].old_family) == 0))
2557 			return -EINVAL;
2558 		if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
2559 		    xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
2560 			return -EINVAL;
2561 
2562 		/* check if there is any duplicated entry */
2563 		for (j = i + 1; j < num_migrate; j++) {
2564 			if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
2565 				    sizeof(m[i].old_daddr)) &&
2566 			    !memcmp(&m[i].old_saddr, &m[j].old_saddr,
2567 				    sizeof(m[i].old_saddr)) &&
2568 			    m[i].proto == m[j].proto &&
2569 			    m[i].mode == m[j].mode &&
2570 			    m[i].reqid == m[j].reqid &&
2571 			    m[i].old_family == m[j].old_family)
2572 				return -EINVAL;
2573 		}
2574 	}
2575 
2576 	return 0;
2577 }
2578 
2579 int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
2580 		 struct xfrm_migrate *m, int num_migrate)
2581 {
2582 	int i, err, nx_cur = 0, nx_new = 0;
2583 	struct xfrm_policy *pol = NULL;
2584 	struct xfrm_state *x, *xc;
2585 	struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
2586 	struct xfrm_state *x_new[XFRM_MAX_DEPTH];
2587 	struct xfrm_migrate *mp;
2588 
2589 	if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
2590 		goto out;
2591 
2592 	/* Stage 1 - find policy */
2593 	if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) {
2594 		err = -ENOENT;
2595 		goto out;
2596 	}
2597 
2598 	/* Stage 2 - find and update state(s) */
2599 	for (i = 0, mp = m; i < num_migrate; i++, mp++) {
2600 		if ((x = xfrm_migrate_state_find(mp))) {
2601 			x_cur[nx_cur] = x;
2602 			nx_cur++;
2603 			if ((xc = xfrm_state_migrate(x, mp))) {
2604 				x_new[nx_new] = xc;
2605 				nx_new++;
2606 			} else {
2607 				err = -ENODATA;
2608 				goto restore_state;
2609 			}
2610 		}
2611 	}
2612 
2613 	/* Stage 3 - update policy */
2614 	if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
2615 		goto restore_state;
2616 
2617 	/* Stage 4 - delete old state(s) */
2618 	if (nx_cur) {
2619 		xfrm_states_put(x_cur, nx_cur);
2620 		xfrm_states_delete(x_cur, nx_cur);
2621 	}
2622 
2623 	/* Stage 5 - announce */
2624 	km_migrate(sel, dir, type, m, num_migrate);
2625 
2626 	xfrm_pol_put(pol);
2627 
2628 	return 0;
2629 out:
2630 	return err;
2631 
2632 restore_state:
2633 	if (pol)
2634 		xfrm_pol_put(pol);
2635 	if (nx_cur)
2636 		xfrm_states_put(x_cur, nx_cur);
2637 	if (nx_new)
2638 		xfrm_states_delete(x_new, nx_new);
2639 
2640 	return err;
2641 }
2642 EXPORT_SYMBOL(xfrm_migrate);
2643 #endif
2644