xref: /openbmc/linux/net/xfrm/xfrm_policy.c (revision 643d1f7f)
1 /*
2  * xfrm_policy.c
3  *
4  * Changes:
5  *	Mitsuru KANDA @USAGI
6  * 	Kazunori MIYAZAWA @USAGI
7  * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  * 		IPv6 support
9  * 	Kazunori MIYAZAWA @USAGI
10  * 	YOSHIFUJI Hideaki
11  * 		Split up af-specific portion
12  *	Derek Atkins <derek@ihtfp.com>		Add the post_input processor
13  *
14  */
15 
16 #include <linux/err.h>
17 #include <linux/slab.h>
18 #include <linux/kmod.h>
19 #include <linux/list.h>
20 #include <linux/spinlock.h>
21 #include <linux/workqueue.h>
22 #include <linux/notifier.h>
23 #include <linux/netdevice.h>
24 #include <linux/netfilter.h>
25 #include <linux/module.h>
26 #include <linux/cache.h>
27 #include <linux/audit.h>
28 #include <net/dst.h>
29 #include <net/xfrm.h>
30 #include <net/ip.h>
31 #ifdef CONFIG_XFRM_STATISTICS
32 #include <net/snmp.h>
33 #endif
34 
35 #include "xfrm_hash.h"
36 
37 int sysctl_xfrm_larval_drop __read_mostly;
38 
39 #ifdef CONFIG_XFRM_STATISTICS
40 DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly;
41 EXPORT_SYMBOL(xfrm_statistics);
42 #endif
43 
44 DEFINE_MUTEX(xfrm_cfg_mutex);
45 EXPORT_SYMBOL(xfrm_cfg_mutex);
46 
47 static DEFINE_RWLOCK(xfrm_policy_lock);
48 
49 unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
50 EXPORT_SYMBOL(xfrm_policy_count);
51 
52 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
53 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
54 
55 static struct kmem_cache *xfrm_dst_cache __read_mostly;
56 
57 static struct work_struct xfrm_policy_gc_work;
58 static HLIST_HEAD(xfrm_policy_gc_list);
59 static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
60 
61 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
62 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
63 static void xfrm_init_pmtu(struct dst_entry *dst);
64 
65 static inline int
66 __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
67 {
68 	return  addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) &&
69 		addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) &&
70 		!((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
71 		!((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
72 		(fl->proto == sel->proto || !sel->proto) &&
73 		(fl->oif == sel->ifindex || !sel->ifindex);
74 }
75 
76 static inline int
77 __xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl)
78 {
79 	return  addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) &&
80 		addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) &&
81 		!((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
82 		!((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
83 		(fl->proto == sel->proto || !sel->proto) &&
84 		(fl->oif == sel->ifindex || !sel->ifindex);
85 }
86 
87 int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
88 		    unsigned short family)
89 {
90 	switch (family) {
91 	case AF_INET:
92 		return __xfrm4_selector_match(sel, fl);
93 	case AF_INET6:
94 		return __xfrm6_selector_match(sel, fl);
95 	}
96 	return 0;
97 }
98 
99 static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
100 						int family)
101 {
102 	xfrm_address_t *saddr = &x->props.saddr;
103 	xfrm_address_t *daddr = &x->id.daddr;
104 	struct xfrm_policy_afinfo *afinfo;
105 	struct dst_entry *dst;
106 
107 	if (x->type->flags & XFRM_TYPE_LOCAL_COADDR)
108 		saddr = x->coaddr;
109 	if (x->type->flags & XFRM_TYPE_REMOTE_COADDR)
110 		daddr = x->coaddr;
111 
112 	afinfo = xfrm_policy_get_afinfo(family);
113 	if (unlikely(afinfo == NULL))
114 		return ERR_PTR(-EAFNOSUPPORT);
115 
116 	dst = afinfo->dst_lookup(tos, saddr, daddr);
117 	xfrm_policy_put_afinfo(afinfo);
118 	return dst;
119 }
120 
121 static inline unsigned long make_jiffies(long secs)
122 {
123 	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
124 		return MAX_SCHEDULE_TIMEOUT-1;
125 	else
126 		return secs*HZ;
127 }
128 
129 static void xfrm_policy_timer(unsigned long data)
130 {
131 	struct xfrm_policy *xp = (struct xfrm_policy*)data;
132 	unsigned long now = get_seconds();
133 	long next = LONG_MAX;
134 	int warn = 0;
135 	int dir;
136 
137 	read_lock(&xp->lock);
138 
139 	if (xp->dead)
140 		goto out;
141 
142 	dir = xfrm_policy_id2dir(xp->index);
143 
144 	if (xp->lft.hard_add_expires_seconds) {
145 		long tmo = xp->lft.hard_add_expires_seconds +
146 			xp->curlft.add_time - now;
147 		if (tmo <= 0)
148 			goto expired;
149 		if (tmo < next)
150 			next = tmo;
151 	}
152 	if (xp->lft.hard_use_expires_seconds) {
153 		long tmo = xp->lft.hard_use_expires_seconds +
154 			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
155 		if (tmo <= 0)
156 			goto expired;
157 		if (tmo < next)
158 			next = tmo;
159 	}
160 	if (xp->lft.soft_add_expires_seconds) {
161 		long tmo = xp->lft.soft_add_expires_seconds +
162 			xp->curlft.add_time - now;
163 		if (tmo <= 0) {
164 			warn = 1;
165 			tmo = XFRM_KM_TIMEOUT;
166 		}
167 		if (tmo < next)
168 			next = tmo;
169 	}
170 	if (xp->lft.soft_use_expires_seconds) {
171 		long tmo = xp->lft.soft_use_expires_seconds +
172 			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
173 		if (tmo <= 0) {
174 			warn = 1;
175 			tmo = XFRM_KM_TIMEOUT;
176 		}
177 		if (tmo < next)
178 			next = tmo;
179 	}
180 
181 	if (warn)
182 		km_policy_expired(xp, dir, 0, 0);
183 	if (next != LONG_MAX &&
184 	    !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
185 		xfrm_pol_hold(xp);
186 
187 out:
188 	read_unlock(&xp->lock);
189 	xfrm_pol_put(xp);
190 	return;
191 
192 expired:
193 	read_unlock(&xp->lock);
194 	if (!xfrm_policy_delete(xp, dir))
195 		km_policy_expired(xp, dir, 1, 0);
196 	xfrm_pol_put(xp);
197 }
198 
199 
200 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
201  * SPD calls.
202  */
203 
204 struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
205 {
206 	struct xfrm_policy *policy;
207 
208 	policy = kzalloc(sizeof(struct xfrm_policy), gfp);
209 
210 	if (policy) {
211 		INIT_HLIST_NODE(&policy->bydst);
212 		INIT_HLIST_NODE(&policy->byidx);
213 		rwlock_init(&policy->lock);
214 		atomic_set(&policy->refcnt, 1);
215 		setup_timer(&policy->timer, xfrm_policy_timer,
216 				(unsigned long)policy);
217 	}
218 	return policy;
219 }
220 EXPORT_SYMBOL(xfrm_policy_alloc);
221 
222 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
223 
224 void xfrm_policy_destroy(struct xfrm_policy *policy)
225 {
226 	BUG_ON(!policy->dead);
227 
228 	BUG_ON(policy->bundles);
229 
230 	if (del_timer(&policy->timer))
231 		BUG();
232 
233 	security_xfrm_policy_free(policy);
234 	kfree(policy);
235 }
236 EXPORT_SYMBOL(xfrm_policy_destroy);
237 
238 static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
239 {
240 	struct dst_entry *dst;
241 
242 	while ((dst = policy->bundles) != NULL) {
243 		policy->bundles = dst->next;
244 		dst_free(dst);
245 	}
246 
247 	if (del_timer(&policy->timer))
248 		atomic_dec(&policy->refcnt);
249 
250 	if (atomic_read(&policy->refcnt) > 1)
251 		flow_cache_flush();
252 
253 	xfrm_pol_put(policy);
254 }
255 
256 static void xfrm_policy_gc_task(struct work_struct *work)
257 {
258 	struct xfrm_policy *policy;
259 	struct hlist_node *entry, *tmp;
260 	struct hlist_head gc_list;
261 
262 	spin_lock_bh(&xfrm_policy_gc_lock);
263 	gc_list.first = xfrm_policy_gc_list.first;
264 	INIT_HLIST_HEAD(&xfrm_policy_gc_list);
265 	spin_unlock_bh(&xfrm_policy_gc_lock);
266 
267 	hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
268 		xfrm_policy_gc_kill(policy);
269 }
270 
271 /* Rule must be locked. Release descentant resources, announce
272  * entry dead. The rule must be unlinked from lists to the moment.
273  */
274 
275 static void xfrm_policy_kill(struct xfrm_policy *policy)
276 {
277 	int dead;
278 
279 	write_lock_bh(&policy->lock);
280 	dead = policy->dead;
281 	policy->dead = 1;
282 	write_unlock_bh(&policy->lock);
283 
284 	if (unlikely(dead)) {
285 		WARN_ON(1);
286 		return;
287 	}
288 
289 	spin_lock(&xfrm_policy_gc_lock);
290 	hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
291 	spin_unlock(&xfrm_policy_gc_lock);
292 
293 	schedule_work(&xfrm_policy_gc_work);
294 }
295 
296 struct xfrm_policy_hash {
297 	struct hlist_head	*table;
298 	unsigned int		hmask;
299 };
300 
301 static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2];
302 static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly;
303 static struct hlist_head *xfrm_policy_byidx __read_mostly;
304 static unsigned int xfrm_idx_hmask __read_mostly;
305 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
306 
307 static inline unsigned int idx_hash(u32 index)
308 {
309 	return __idx_hash(index, xfrm_idx_hmask);
310 }
311 
312 static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir)
313 {
314 	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
315 	unsigned int hash = __sel_hash(sel, family, hmask);
316 
317 	return (hash == hmask + 1 ?
318 		&xfrm_policy_inexact[dir] :
319 		xfrm_policy_bydst[dir].table + hash);
320 }
321 
322 static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir)
323 {
324 	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
325 	unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
326 
327 	return xfrm_policy_bydst[dir].table + hash;
328 }
329 
330 static void xfrm_dst_hash_transfer(struct hlist_head *list,
331 				   struct hlist_head *ndsttable,
332 				   unsigned int nhashmask)
333 {
334 	struct hlist_node *entry, *tmp;
335 	struct xfrm_policy *pol;
336 
337 	hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
338 		unsigned int h;
339 
340 		h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
341 				pol->family, nhashmask);
342 		hlist_add_head(&pol->bydst, ndsttable+h);
343 	}
344 }
345 
346 static void xfrm_idx_hash_transfer(struct hlist_head *list,
347 				   struct hlist_head *nidxtable,
348 				   unsigned int nhashmask)
349 {
350 	struct hlist_node *entry, *tmp;
351 	struct xfrm_policy *pol;
352 
353 	hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
354 		unsigned int h;
355 
356 		h = __idx_hash(pol->index, nhashmask);
357 		hlist_add_head(&pol->byidx, nidxtable+h);
358 	}
359 }
360 
361 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
362 {
363 	return ((old_hmask + 1) << 1) - 1;
364 }
365 
366 static void xfrm_bydst_resize(int dir)
367 {
368 	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
369 	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
370 	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
371 	struct hlist_head *odst = xfrm_policy_bydst[dir].table;
372 	struct hlist_head *ndst = xfrm_hash_alloc(nsize);
373 	int i;
374 
375 	if (!ndst)
376 		return;
377 
378 	write_lock_bh(&xfrm_policy_lock);
379 
380 	for (i = hmask; i >= 0; i--)
381 		xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
382 
383 	xfrm_policy_bydst[dir].table = ndst;
384 	xfrm_policy_bydst[dir].hmask = nhashmask;
385 
386 	write_unlock_bh(&xfrm_policy_lock);
387 
388 	xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
389 }
390 
391 static void xfrm_byidx_resize(int total)
392 {
393 	unsigned int hmask = xfrm_idx_hmask;
394 	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
395 	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
396 	struct hlist_head *oidx = xfrm_policy_byidx;
397 	struct hlist_head *nidx = xfrm_hash_alloc(nsize);
398 	int i;
399 
400 	if (!nidx)
401 		return;
402 
403 	write_lock_bh(&xfrm_policy_lock);
404 
405 	for (i = hmask; i >= 0; i--)
406 		xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
407 
408 	xfrm_policy_byidx = nidx;
409 	xfrm_idx_hmask = nhashmask;
410 
411 	write_unlock_bh(&xfrm_policy_lock);
412 
413 	xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
414 }
415 
416 static inline int xfrm_bydst_should_resize(int dir, int *total)
417 {
418 	unsigned int cnt = xfrm_policy_count[dir];
419 	unsigned int hmask = xfrm_policy_bydst[dir].hmask;
420 
421 	if (total)
422 		*total += cnt;
423 
424 	if ((hmask + 1) < xfrm_policy_hashmax &&
425 	    cnt > hmask)
426 		return 1;
427 
428 	return 0;
429 }
430 
431 static inline int xfrm_byidx_should_resize(int total)
432 {
433 	unsigned int hmask = xfrm_idx_hmask;
434 
435 	if ((hmask + 1) < xfrm_policy_hashmax &&
436 	    total > hmask)
437 		return 1;
438 
439 	return 0;
440 }
441 
442 void xfrm_spd_getinfo(struct xfrmk_spdinfo *si)
443 {
444 	read_lock_bh(&xfrm_policy_lock);
445 	si->incnt = xfrm_policy_count[XFRM_POLICY_IN];
446 	si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT];
447 	si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD];
448 	si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
449 	si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
450 	si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
451 	si->spdhcnt = xfrm_idx_hmask;
452 	si->spdhmcnt = xfrm_policy_hashmax;
453 	read_unlock_bh(&xfrm_policy_lock);
454 }
455 EXPORT_SYMBOL(xfrm_spd_getinfo);
456 
457 static DEFINE_MUTEX(hash_resize_mutex);
458 static void xfrm_hash_resize(struct work_struct *__unused)
459 {
460 	int dir, total;
461 
462 	mutex_lock(&hash_resize_mutex);
463 
464 	total = 0;
465 	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
466 		if (xfrm_bydst_should_resize(dir, &total))
467 			xfrm_bydst_resize(dir);
468 	}
469 	if (xfrm_byidx_should_resize(total))
470 		xfrm_byidx_resize(total);
471 
472 	mutex_unlock(&hash_resize_mutex);
473 }
474 
475 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
476 
477 /* Generate new index... KAME seems to generate them ordered by cost
478  * of an absolute inpredictability of ordering of rules. This will not pass. */
479 static u32 xfrm_gen_index(u8 type, int dir)
480 {
481 	static u32 idx_generator;
482 
483 	for (;;) {
484 		struct hlist_node *entry;
485 		struct hlist_head *list;
486 		struct xfrm_policy *p;
487 		u32 idx;
488 		int found;
489 
490 		idx = (idx_generator | dir);
491 		idx_generator += 8;
492 		if (idx == 0)
493 			idx = 8;
494 		list = xfrm_policy_byidx + idx_hash(idx);
495 		found = 0;
496 		hlist_for_each_entry(p, entry, list, byidx) {
497 			if (p->index == idx) {
498 				found = 1;
499 				break;
500 			}
501 		}
502 		if (!found)
503 			return idx;
504 	}
505 }
506 
507 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
508 {
509 	u32 *p1 = (u32 *) s1;
510 	u32 *p2 = (u32 *) s2;
511 	int len = sizeof(struct xfrm_selector) / sizeof(u32);
512 	int i;
513 
514 	for (i = 0; i < len; i++) {
515 		if (p1[i] != p2[i])
516 			return 1;
517 	}
518 
519 	return 0;
520 }
521 
522 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
523 {
524 	struct xfrm_policy *pol;
525 	struct xfrm_policy *delpol;
526 	struct hlist_head *chain;
527 	struct hlist_node *entry, *newpos;
528 	struct dst_entry *gc_list;
529 
530 	write_lock_bh(&xfrm_policy_lock);
531 	chain = policy_hash_bysel(&policy->selector, policy->family, dir);
532 	delpol = NULL;
533 	newpos = NULL;
534 	hlist_for_each_entry(pol, entry, chain, bydst) {
535 		if (pol->type == policy->type &&
536 		    !selector_cmp(&pol->selector, &policy->selector) &&
537 		    xfrm_sec_ctx_match(pol->security, policy->security) &&
538 		    !WARN_ON(delpol)) {
539 			if (excl) {
540 				write_unlock_bh(&xfrm_policy_lock);
541 				return -EEXIST;
542 			}
543 			delpol = pol;
544 			if (policy->priority > pol->priority)
545 				continue;
546 		} else if (policy->priority >= pol->priority) {
547 			newpos = &pol->bydst;
548 			continue;
549 		}
550 		if (delpol)
551 			break;
552 	}
553 	if (newpos)
554 		hlist_add_after(newpos, &policy->bydst);
555 	else
556 		hlist_add_head(&policy->bydst, chain);
557 	xfrm_pol_hold(policy);
558 	xfrm_policy_count[dir]++;
559 	atomic_inc(&flow_cache_genid);
560 	if (delpol) {
561 		hlist_del(&delpol->bydst);
562 		hlist_del(&delpol->byidx);
563 		xfrm_policy_count[dir]--;
564 	}
565 	policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
566 	hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
567 	policy->curlft.add_time = get_seconds();
568 	policy->curlft.use_time = 0;
569 	if (!mod_timer(&policy->timer, jiffies + HZ))
570 		xfrm_pol_hold(policy);
571 	write_unlock_bh(&xfrm_policy_lock);
572 
573 	if (delpol)
574 		xfrm_policy_kill(delpol);
575 	else if (xfrm_bydst_should_resize(dir, NULL))
576 		schedule_work(&xfrm_hash_work);
577 
578 	read_lock_bh(&xfrm_policy_lock);
579 	gc_list = NULL;
580 	entry = &policy->bydst;
581 	hlist_for_each_entry_continue(policy, entry, bydst) {
582 		struct dst_entry *dst;
583 
584 		write_lock(&policy->lock);
585 		dst = policy->bundles;
586 		if (dst) {
587 			struct dst_entry *tail = dst;
588 			while (tail->next)
589 				tail = tail->next;
590 			tail->next = gc_list;
591 			gc_list = dst;
592 
593 			policy->bundles = NULL;
594 		}
595 		write_unlock(&policy->lock);
596 	}
597 	read_unlock_bh(&xfrm_policy_lock);
598 
599 	while (gc_list) {
600 		struct dst_entry *dst = gc_list;
601 
602 		gc_list = dst->next;
603 		dst_free(dst);
604 	}
605 
606 	return 0;
607 }
608 EXPORT_SYMBOL(xfrm_policy_insert);
609 
610 struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
611 					  struct xfrm_selector *sel,
612 					  struct xfrm_sec_ctx *ctx, int delete,
613 					  int *err)
614 {
615 	struct xfrm_policy *pol, *ret;
616 	struct hlist_head *chain;
617 	struct hlist_node *entry;
618 
619 	*err = 0;
620 	write_lock_bh(&xfrm_policy_lock);
621 	chain = policy_hash_bysel(sel, sel->family, dir);
622 	ret = NULL;
623 	hlist_for_each_entry(pol, entry, chain, bydst) {
624 		if (pol->type == type &&
625 		    !selector_cmp(sel, &pol->selector) &&
626 		    xfrm_sec_ctx_match(ctx, pol->security)) {
627 			xfrm_pol_hold(pol);
628 			if (delete) {
629 				*err = security_xfrm_policy_delete(pol);
630 				if (*err) {
631 					write_unlock_bh(&xfrm_policy_lock);
632 					return pol;
633 				}
634 				hlist_del(&pol->bydst);
635 				hlist_del(&pol->byidx);
636 				xfrm_policy_count[dir]--;
637 			}
638 			ret = pol;
639 			break;
640 		}
641 	}
642 	write_unlock_bh(&xfrm_policy_lock);
643 
644 	if (ret && delete) {
645 		atomic_inc(&flow_cache_genid);
646 		xfrm_policy_kill(ret);
647 	}
648 	return ret;
649 }
650 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
651 
652 struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete,
653 				     int *err)
654 {
655 	struct xfrm_policy *pol, *ret;
656 	struct hlist_head *chain;
657 	struct hlist_node *entry;
658 
659 	*err = -ENOENT;
660 	if (xfrm_policy_id2dir(id) != dir)
661 		return NULL;
662 
663 	*err = 0;
664 	write_lock_bh(&xfrm_policy_lock);
665 	chain = xfrm_policy_byidx + idx_hash(id);
666 	ret = NULL;
667 	hlist_for_each_entry(pol, entry, chain, byidx) {
668 		if (pol->type == type && pol->index == id) {
669 			xfrm_pol_hold(pol);
670 			if (delete) {
671 				*err = security_xfrm_policy_delete(pol);
672 				if (*err) {
673 					write_unlock_bh(&xfrm_policy_lock);
674 					return pol;
675 				}
676 				hlist_del(&pol->bydst);
677 				hlist_del(&pol->byidx);
678 				xfrm_policy_count[dir]--;
679 			}
680 			ret = pol;
681 			break;
682 		}
683 	}
684 	write_unlock_bh(&xfrm_policy_lock);
685 
686 	if (ret && delete) {
687 		atomic_inc(&flow_cache_genid);
688 		xfrm_policy_kill(ret);
689 	}
690 	return ret;
691 }
692 EXPORT_SYMBOL(xfrm_policy_byid);
693 
694 #ifdef CONFIG_SECURITY_NETWORK_XFRM
695 static inline int
696 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
697 {
698 	int dir, err = 0;
699 
700 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
701 		struct xfrm_policy *pol;
702 		struct hlist_node *entry;
703 		int i;
704 
705 		hlist_for_each_entry(pol, entry,
706 				     &xfrm_policy_inexact[dir], bydst) {
707 			if (pol->type != type)
708 				continue;
709 			err = security_xfrm_policy_delete(pol);
710 			if (err) {
711 				xfrm_audit_policy_delete(pol, 0,
712 							 audit_info->loginuid,
713 							 audit_info->secid);
714 				return err;
715 			}
716 		}
717 		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
718 			hlist_for_each_entry(pol, entry,
719 					     xfrm_policy_bydst[dir].table + i,
720 					     bydst) {
721 				if (pol->type != type)
722 					continue;
723 				err = security_xfrm_policy_delete(pol);
724 				if (err) {
725 					xfrm_audit_policy_delete(pol, 0,
726 							audit_info->loginuid,
727 							audit_info->secid);
728 					return err;
729 				}
730 			}
731 		}
732 	}
733 	return err;
734 }
735 #else
736 static inline int
737 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
738 {
739 	return 0;
740 }
741 #endif
742 
743 int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
744 {
745 	int dir, err = 0;
746 
747 	write_lock_bh(&xfrm_policy_lock);
748 
749 	err = xfrm_policy_flush_secctx_check(type, audit_info);
750 	if (err)
751 		goto out;
752 
753 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
754 		struct xfrm_policy *pol;
755 		struct hlist_node *entry;
756 		int i, killed;
757 
758 		killed = 0;
759 	again1:
760 		hlist_for_each_entry(pol, entry,
761 				     &xfrm_policy_inexact[dir], bydst) {
762 			if (pol->type != type)
763 				continue;
764 			hlist_del(&pol->bydst);
765 			hlist_del(&pol->byidx);
766 			write_unlock_bh(&xfrm_policy_lock);
767 
768 			xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
769 						 audit_info->secid);
770 
771 			xfrm_policy_kill(pol);
772 			killed++;
773 
774 			write_lock_bh(&xfrm_policy_lock);
775 			goto again1;
776 		}
777 
778 		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
779 	again2:
780 			hlist_for_each_entry(pol, entry,
781 					     xfrm_policy_bydst[dir].table + i,
782 					     bydst) {
783 				if (pol->type != type)
784 					continue;
785 				hlist_del(&pol->bydst);
786 				hlist_del(&pol->byidx);
787 				write_unlock_bh(&xfrm_policy_lock);
788 
789 				xfrm_audit_policy_delete(pol, 1,
790 							 audit_info->loginuid,
791 							 audit_info->secid);
792 				xfrm_policy_kill(pol);
793 				killed++;
794 
795 				write_lock_bh(&xfrm_policy_lock);
796 				goto again2;
797 			}
798 		}
799 
800 		xfrm_policy_count[dir] -= killed;
801 	}
802 	atomic_inc(&flow_cache_genid);
803 out:
804 	write_unlock_bh(&xfrm_policy_lock);
805 	return err;
806 }
807 EXPORT_SYMBOL(xfrm_policy_flush);
808 
809 int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*),
810 		     void *data)
811 {
812 	struct xfrm_policy *pol, *last = NULL;
813 	struct hlist_node *entry;
814 	int dir, last_dir = 0, count, error;
815 
816 	read_lock_bh(&xfrm_policy_lock);
817 	count = 0;
818 
819 	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
820 		struct hlist_head *table = xfrm_policy_bydst[dir].table;
821 		int i;
822 
823 		hlist_for_each_entry(pol, entry,
824 				     &xfrm_policy_inexact[dir], bydst) {
825 			if (pol->type != type)
826 				continue;
827 			if (last) {
828 				error = func(last, last_dir % XFRM_POLICY_MAX,
829 					     count, data);
830 				if (error)
831 					goto out;
832 			}
833 			last = pol;
834 			last_dir = dir;
835 			count++;
836 		}
837 		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
838 			hlist_for_each_entry(pol, entry, table + i, bydst) {
839 				if (pol->type != type)
840 					continue;
841 				if (last) {
842 					error = func(last, last_dir % XFRM_POLICY_MAX,
843 						     count, data);
844 					if (error)
845 						goto out;
846 				}
847 				last = pol;
848 				last_dir = dir;
849 				count++;
850 			}
851 		}
852 	}
853 	if (count == 0) {
854 		error = -ENOENT;
855 		goto out;
856 	}
857 	error = func(last, last_dir % XFRM_POLICY_MAX, 0, data);
858 out:
859 	read_unlock_bh(&xfrm_policy_lock);
860 	return error;
861 }
862 EXPORT_SYMBOL(xfrm_policy_walk);
863 
864 /*
865  * Find policy to apply to this flow.
866  *
867  * Returns 0 if policy found, else an -errno.
868  */
869 static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
870 			     u8 type, u16 family, int dir)
871 {
872 	struct xfrm_selector *sel = &pol->selector;
873 	int match, ret = -ESRCH;
874 
875 	if (pol->family != family ||
876 	    pol->type != type)
877 		return ret;
878 
879 	match = xfrm_selector_match(sel, fl, family);
880 	if (match)
881 		ret = security_xfrm_policy_lookup(pol, fl->secid, dir);
882 
883 	return ret;
884 }
885 
886 static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
887 						     u16 family, u8 dir)
888 {
889 	int err;
890 	struct xfrm_policy *pol, *ret;
891 	xfrm_address_t *daddr, *saddr;
892 	struct hlist_node *entry;
893 	struct hlist_head *chain;
894 	u32 priority = ~0U;
895 
896 	daddr = xfrm_flowi_daddr(fl, family);
897 	saddr = xfrm_flowi_saddr(fl, family);
898 	if (unlikely(!daddr || !saddr))
899 		return NULL;
900 
901 	read_lock_bh(&xfrm_policy_lock);
902 	chain = policy_hash_direct(daddr, saddr, family, dir);
903 	ret = NULL;
904 	hlist_for_each_entry(pol, entry, chain, bydst) {
905 		err = xfrm_policy_match(pol, fl, type, family, dir);
906 		if (err) {
907 			if (err == -ESRCH)
908 				continue;
909 			else {
910 				ret = ERR_PTR(err);
911 				goto fail;
912 			}
913 		} else {
914 			ret = pol;
915 			priority = ret->priority;
916 			break;
917 		}
918 	}
919 	chain = &xfrm_policy_inexact[dir];
920 	hlist_for_each_entry(pol, entry, chain, bydst) {
921 		err = xfrm_policy_match(pol, fl, type, family, dir);
922 		if (err) {
923 			if (err == -ESRCH)
924 				continue;
925 			else {
926 				ret = ERR_PTR(err);
927 				goto fail;
928 			}
929 		} else if (pol->priority < priority) {
930 			ret = pol;
931 			break;
932 		}
933 	}
934 	if (ret)
935 		xfrm_pol_hold(ret);
936 fail:
937 	read_unlock_bh(&xfrm_policy_lock);
938 
939 	return ret;
940 }
941 
942 static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
943 			       void **objp, atomic_t **obj_refp)
944 {
945 	struct xfrm_policy *pol;
946 	int err = 0;
947 
948 #ifdef CONFIG_XFRM_SUB_POLICY
949 	pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir);
950 	if (IS_ERR(pol)) {
951 		err = PTR_ERR(pol);
952 		pol = NULL;
953 	}
954 	if (pol || err)
955 		goto end;
956 #endif
957 	pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir);
958 	if (IS_ERR(pol)) {
959 		err = PTR_ERR(pol);
960 		pol = NULL;
961 	}
962 #ifdef CONFIG_XFRM_SUB_POLICY
963 end:
964 #endif
965 	if ((*objp = (void *) pol) != NULL)
966 		*obj_refp = &pol->refcnt;
967 	return err;
968 }
969 
970 static inline int policy_to_flow_dir(int dir)
971 {
972 	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
973 	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
974 	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
975 		return dir;
976 	switch (dir) {
977 	default:
978 	case XFRM_POLICY_IN:
979 		return FLOW_DIR_IN;
980 	case XFRM_POLICY_OUT:
981 		return FLOW_DIR_OUT;
982 	case XFRM_POLICY_FWD:
983 		return FLOW_DIR_FWD;
984 	}
985 }
986 
987 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
988 {
989 	struct xfrm_policy *pol;
990 
991 	read_lock_bh(&xfrm_policy_lock);
992 	if ((pol = sk->sk_policy[dir]) != NULL) {
993 		int match = xfrm_selector_match(&pol->selector, fl,
994 						sk->sk_family);
995 		int err = 0;
996 
997 		if (match) {
998 			err = security_xfrm_policy_lookup(pol, fl->secid,
999 					policy_to_flow_dir(dir));
1000 			if (!err)
1001 				xfrm_pol_hold(pol);
1002 			else if (err == -ESRCH)
1003 				pol = NULL;
1004 			else
1005 				pol = ERR_PTR(err);
1006 		} else
1007 			pol = NULL;
1008 	}
1009 	read_unlock_bh(&xfrm_policy_lock);
1010 	return pol;
1011 }
1012 
1013 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
1014 {
1015 	struct hlist_head *chain = policy_hash_bysel(&pol->selector,
1016 						     pol->family, dir);
1017 
1018 	hlist_add_head(&pol->bydst, chain);
1019 	hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index));
1020 	xfrm_policy_count[dir]++;
1021 	xfrm_pol_hold(pol);
1022 
1023 	if (xfrm_bydst_should_resize(dir, NULL))
1024 		schedule_work(&xfrm_hash_work);
1025 }
1026 
1027 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1028 						int dir)
1029 {
1030 	if (hlist_unhashed(&pol->bydst))
1031 		return NULL;
1032 
1033 	hlist_del(&pol->bydst);
1034 	hlist_del(&pol->byidx);
1035 	xfrm_policy_count[dir]--;
1036 
1037 	return pol;
1038 }
1039 
1040 int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1041 {
1042 	write_lock_bh(&xfrm_policy_lock);
1043 	pol = __xfrm_policy_unlink(pol, dir);
1044 	write_unlock_bh(&xfrm_policy_lock);
1045 	if (pol) {
1046 		if (dir < XFRM_POLICY_MAX)
1047 			atomic_inc(&flow_cache_genid);
1048 		xfrm_policy_kill(pol);
1049 		return 0;
1050 	}
1051 	return -ENOENT;
1052 }
1053 EXPORT_SYMBOL(xfrm_policy_delete);
1054 
1055 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1056 {
1057 	struct xfrm_policy *old_pol;
1058 
1059 #ifdef CONFIG_XFRM_SUB_POLICY
1060 	if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1061 		return -EINVAL;
1062 #endif
1063 
1064 	write_lock_bh(&xfrm_policy_lock);
1065 	old_pol = sk->sk_policy[dir];
1066 	sk->sk_policy[dir] = pol;
1067 	if (pol) {
1068 		pol->curlft.add_time = get_seconds();
1069 		pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir);
1070 		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
1071 	}
1072 	if (old_pol)
1073 		__xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
1074 	write_unlock_bh(&xfrm_policy_lock);
1075 
1076 	if (old_pol) {
1077 		xfrm_policy_kill(old_pol);
1078 	}
1079 	return 0;
1080 }
1081 
1082 static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
1083 {
1084 	struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
1085 
1086 	if (newp) {
1087 		newp->selector = old->selector;
1088 		if (security_xfrm_policy_clone(old, newp)) {
1089 			kfree(newp);
1090 			return NULL;  /* ENOMEM */
1091 		}
1092 		newp->lft = old->lft;
1093 		newp->curlft = old->curlft;
1094 		newp->action = old->action;
1095 		newp->flags = old->flags;
1096 		newp->xfrm_nr = old->xfrm_nr;
1097 		newp->index = old->index;
1098 		newp->type = old->type;
1099 		memcpy(newp->xfrm_vec, old->xfrm_vec,
1100 		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1101 		write_lock_bh(&xfrm_policy_lock);
1102 		__xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
1103 		write_unlock_bh(&xfrm_policy_lock);
1104 		xfrm_pol_put(newp);
1105 	}
1106 	return newp;
1107 }
1108 
1109 int __xfrm_sk_clone_policy(struct sock *sk)
1110 {
1111 	struct xfrm_policy *p0 = sk->sk_policy[0],
1112 			   *p1 = sk->sk_policy[1];
1113 
1114 	sk->sk_policy[0] = sk->sk_policy[1] = NULL;
1115 	if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
1116 		return -ENOMEM;
1117 	if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
1118 		return -ENOMEM;
1119 	return 0;
1120 }
1121 
1122 static int
1123 xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote,
1124 	       unsigned short family)
1125 {
1126 	int err;
1127 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1128 
1129 	if (unlikely(afinfo == NULL))
1130 		return -EINVAL;
1131 	err = afinfo->get_saddr(local, remote);
1132 	xfrm_policy_put_afinfo(afinfo);
1133 	return err;
1134 }
1135 
1136 /* Resolve list of templates for the flow, given policy. */
1137 
1138 static int
1139 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
1140 		      struct xfrm_state **xfrm,
1141 		      unsigned short family)
1142 {
1143 	int nx;
1144 	int i, error;
1145 	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1146 	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1147 	xfrm_address_t tmp;
1148 
1149 	for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
1150 		struct xfrm_state *x;
1151 		xfrm_address_t *remote = daddr;
1152 		xfrm_address_t *local  = saddr;
1153 		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1154 
1155 		if (tmpl->mode == XFRM_MODE_TUNNEL ||
1156 		    tmpl->mode == XFRM_MODE_BEET) {
1157 			remote = &tmpl->id.daddr;
1158 			local = &tmpl->saddr;
1159 			family = tmpl->encap_family;
1160 			if (xfrm_addr_any(local, family)) {
1161 				error = xfrm_get_saddr(&tmp, remote, family);
1162 				if (error)
1163 					goto fail;
1164 				local = &tmp;
1165 			}
1166 		}
1167 
1168 		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1169 
1170 		if (x && x->km.state == XFRM_STATE_VALID) {
1171 			xfrm[nx++] = x;
1172 			daddr = remote;
1173 			saddr = local;
1174 			continue;
1175 		}
1176 		if (x) {
1177 			error = (x->km.state == XFRM_STATE_ERROR ?
1178 				 -EINVAL : -EAGAIN);
1179 			xfrm_state_put(x);
1180 		}
1181 
1182 		if (!tmpl->optional)
1183 			goto fail;
1184 	}
1185 	return nx;
1186 
1187 fail:
1188 	for (nx--; nx>=0; nx--)
1189 		xfrm_state_put(xfrm[nx]);
1190 	return error;
1191 }
1192 
1193 static int
1194 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
1195 		  struct xfrm_state **xfrm,
1196 		  unsigned short family)
1197 {
1198 	struct xfrm_state *tp[XFRM_MAX_DEPTH];
1199 	struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1200 	int cnx = 0;
1201 	int error;
1202 	int ret;
1203 	int i;
1204 
1205 	for (i = 0; i < npols; i++) {
1206 		if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1207 			error = -ENOBUFS;
1208 			goto fail;
1209 		}
1210 
1211 		ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1212 		if (ret < 0) {
1213 			error = ret;
1214 			goto fail;
1215 		} else
1216 			cnx += ret;
1217 	}
1218 
1219 	/* found states are sorted for outbound processing */
1220 	if (npols > 1)
1221 		xfrm_state_sort(xfrm, tpp, cnx, family);
1222 
1223 	return cnx;
1224 
1225  fail:
1226 	for (cnx--; cnx>=0; cnx--)
1227 		xfrm_state_put(tpp[cnx]);
1228 	return error;
1229 
1230 }
1231 
1232 /* Check that the bundle accepts the flow and its components are
1233  * still valid.
1234  */
1235 
1236 static struct dst_entry *
1237 xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
1238 {
1239 	struct dst_entry *x;
1240 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1241 	if (unlikely(afinfo == NULL))
1242 		return ERR_PTR(-EINVAL);
1243 	x = afinfo->find_bundle(fl, policy);
1244 	xfrm_policy_put_afinfo(afinfo);
1245 	return x;
1246 }
1247 
1248 static inline int xfrm_get_tos(struct flowi *fl, int family)
1249 {
1250 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1251 	int tos;
1252 
1253 	if (!afinfo)
1254 		return -EINVAL;
1255 
1256 	tos = afinfo->get_tos(fl);
1257 
1258 	xfrm_policy_put_afinfo(afinfo);
1259 
1260 	return tos;
1261 }
1262 
1263 static inline struct xfrm_dst *xfrm_alloc_dst(int family)
1264 {
1265 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1266 	struct xfrm_dst *xdst;
1267 
1268 	if (!afinfo)
1269 		return ERR_PTR(-EINVAL);
1270 
1271 	xdst = dst_alloc(afinfo->dst_ops) ?: ERR_PTR(-ENOBUFS);
1272 
1273 	xfrm_policy_put_afinfo(afinfo);
1274 
1275 	return xdst;
1276 }
1277 
1278 static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1279 				 int nfheader_len)
1280 {
1281 	struct xfrm_policy_afinfo *afinfo =
1282 		xfrm_policy_get_afinfo(dst->ops->family);
1283 	int err;
1284 
1285 	if (!afinfo)
1286 		return -EINVAL;
1287 
1288 	err = afinfo->init_path(path, dst, nfheader_len);
1289 
1290 	xfrm_policy_put_afinfo(afinfo);
1291 
1292 	return err;
1293 }
1294 
1295 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
1296 {
1297 	struct xfrm_policy_afinfo *afinfo =
1298 		xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
1299 	int err;
1300 
1301 	if (!afinfo)
1302 		return -EINVAL;
1303 
1304 	err = afinfo->fill_dst(xdst, dev);
1305 
1306 	xfrm_policy_put_afinfo(afinfo);
1307 
1308 	return err;
1309 }
1310 
1311 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
1312  * all the metrics... Shortly, bundle a bundle.
1313  */
1314 
1315 static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1316 					    struct xfrm_state **xfrm, int nx,
1317 					    struct flowi *fl,
1318 					    struct dst_entry *dst)
1319 {
1320 	unsigned long now = jiffies;
1321 	struct net_device *dev;
1322 	struct dst_entry *dst_prev = NULL;
1323 	struct dst_entry *dst0 = NULL;
1324 	int i = 0;
1325 	int err;
1326 	int header_len = 0;
1327 	int nfheader_len = 0;
1328 	int trailer_len = 0;
1329 	int tos;
1330 	int family = policy->selector.family;
1331 
1332 	tos = xfrm_get_tos(fl, family);
1333 	err = tos;
1334 	if (tos < 0)
1335 		goto put_states;
1336 
1337 	dst_hold(dst);
1338 
1339 	for (; i < nx; i++) {
1340 		struct xfrm_dst *xdst = xfrm_alloc_dst(family);
1341 		struct dst_entry *dst1 = &xdst->u.dst;
1342 
1343 		err = PTR_ERR(xdst);
1344 		if (IS_ERR(xdst)) {
1345 			dst_release(dst);
1346 			goto put_states;
1347 		}
1348 
1349 		if (!dst_prev)
1350 			dst0 = dst1;
1351 		else {
1352 			dst_prev->child = dst_clone(dst1);
1353 			dst1->flags |= DST_NOHASH;
1354 		}
1355 
1356 		xdst->route = dst;
1357 		memcpy(&dst1->metrics, &dst->metrics, sizeof(dst->metrics));
1358 
1359 		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
1360 			family = xfrm[i]->props.family;
1361 			dst = xfrm_dst_lookup(xfrm[i], tos, family);
1362 			err = PTR_ERR(dst);
1363 			if (IS_ERR(dst))
1364 				goto put_states;
1365 		} else
1366 			dst_hold(dst);
1367 
1368 		dst1->xfrm = xfrm[i];
1369 		xdst->genid = xfrm[i]->genid;
1370 
1371 		dst1->obsolete = -1;
1372 		dst1->flags |= DST_HOST;
1373 		dst1->lastuse = now;
1374 
1375 		dst1->input = dst_discard;
1376 		dst1->output = xfrm[i]->outer_mode->afinfo->output;
1377 
1378 		dst1->next = dst_prev;
1379 		dst_prev = dst1;
1380 
1381 		header_len += xfrm[i]->props.header_len;
1382 		if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
1383 			nfheader_len += xfrm[i]->props.header_len;
1384 		trailer_len += xfrm[i]->props.trailer_len;
1385 	}
1386 
1387 	dst_prev->child = dst;
1388 	dst0->path = dst;
1389 
1390 	err = -ENODEV;
1391 	dev = dst->dev;
1392 	if (!dev)
1393 		goto free_dst;
1394 
1395 	/* Copy neighbout for reachability confirmation */
1396 	dst0->neighbour = neigh_clone(dst->neighbour);
1397 
1398 	xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1399 	xfrm_init_pmtu(dst_prev);
1400 
1401 	for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
1402 		struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
1403 
1404 		err = xfrm_fill_dst(xdst, dev);
1405 		if (err)
1406 			goto free_dst;
1407 
1408 		dst_prev->header_len = header_len;
1409 		dst_prev->trailer_len = trailer_len;
1410 		header_len -= xdst->u.dst.xfrm->props.header_len;
1411 		trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
1412 	}
1413 
1414 out:
1415 	return dst0;
1416 
1417 put_states:
1418 	for (; i < nx; i++)
1419 		xfrm_state_put(xfrm[i]);
1420 free_dst:
1421 	if (dst0)
1422 		dst_free(dst0);
1423 	dst0 = ERR_PTR(err);
1424 	goto out;
1425 }
1426 
1427 static int inline
1428 xfrm_dst_alloc_copy(void **target, void *src, int size)
1429 {
1430 	if (!*target) {
1431 		*target = kmalloc(size, GFP_ATOMIC);
1432 		if (!*target)
1433 			return -ENOMEM;
1434 	}
1435 	memcpy(*target, src, size);
1436 	return 0;
1437 }
1438 
1439 static int inline
1440 xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel)
1441 {
1442 #ifdef CONFIG_XFRM_SUB_POLICY
1443 	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1444 	return xfrm_dst_alloc_copy((void **)&(xdst->partner),
1445 				   sel, sizeof(*sel));
1446 #else
1447 	return 0;
1448 #endif
1449 }
1450 
1451 static int inline
1452 xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
1453 {
1454 #ifdef CONFIG_XFRM_SUB_POLICY
1455 	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1456 	return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
1457 #else
1458 	return 0;
1459 #endif
1460 }
1461 
1462 static int stale_bundle(struct dst_entry *dst);
1463 
1464 /* Main function: finds/creates a bundle for given flow.
1465  *
1466  * At the moment we eat a raw IP route. Mostly to speed up lookups
1467  * on interfaces with disabled IPsec.
1468  */
1469 int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1470 		  struct sock *sk, int flags)
1471 {
1472 	struct xfrm_policy *policy;
1473 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1474 	int npols;
1475 	int pol_dead;
1476 	int xfrm_nr;
1477 	int pi;
1478 	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1479 	struct dst_entry *dst, *dst_orig = *dst_p;
1480 	int nx = 0;
1481 	int err;
1482 	u32 genid;
1483 	u16 family;
1484 	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
1485 
1486 restart:
1487 	genid = atomic_read(&flow_cache_genid);
1488 	policy = NULL;
1489 	for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
1490 		pols[pi] = NULL;
1491 	npols = 0;
1492 	pol_dead = 0;
1493 	xfrm_nr = 0;
1494 
1495 	if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
1496 		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
1497 		err = PTR_ERR(policy);
1498 		if (IS_ERR(policy)) {
1499 			XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1500 			goto dropdst;
1501 		}
1502 	}
1503 
1504 	if (!policy) {
1505 		/* To accelerate a bit...  */
1506 		if ((dst_orig->flags & DST_NOXFRM) ||
1507 		    !xfrm_policy_count[XFRM_POLICY_OUT])
1508 			goto nopol;
1509 
1510 		policy = flow_cache_lookup(fl, dst_orig->ops->family,
1511 					   dir, xfrm_policy_lookup);
1512 		err = PTR_ERR(policy);
1513 		if (IS_ERR(policy)) {
1514 			XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1515 			goto dropdst;
1516 		}
1517 	}
1518 
1519 	if (!policy)
1520 		goto nopol;
1521 
1522 	family = dst_orig->ops->family;
1523 	pols[0] = policy;
1524 	npols ++;
1525 	xfrm_nr += pols[0]->xfrm_nr;
1526 
1527 	err = -ENOENT;
1528 	if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP))
1529 		goto error;
1530 
1531 	policy->curlft.use_time = get_seconds();
1532 
1533 	switch (policy->action) {
1534 	default:
1535 	case XFRM_POLICY_BLOCK:
1536 		/* Prohibit the flow */
1537 		XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
1538 		err = -EPERM;
1539 		goto error;
1540 
1541 	case XFRM_POLICY_ALLOW:
1542 #ifndef CONFIG_XFRM_SUB_POLICY
1543 		if (policy->xfrm_nr == 0) {
1544 			/* Flow passes not transformed. */
1545 			xfrm_pol_put(policy);
1546 			return 0;
1547 		}
1548 #endif
1549 
1550 		/* Try to find matching bundle.
1551 		 *
1552 		 * LATER: help from flow cache. It is optional, this
1553 		 * is required only for output policy.
1554 		 */
1555 		dst = xfrm_find_bundle(fl, policy, family);
1556 		if (IS_ERR(dst)) {
1557 			XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1558 			err = PTR_ERR(dst);
1559 			goto error;
1560 		}
1561 
1562 		if (dst)
1563 			break;
1564 
1565 #ifdef CONFIG_XFRM_SUB_POLICY
1566 		if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1567 			pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1568 							    fl, family,
1569 							    XFRM_POLICY_OUT);
1570 			if (pols[1]) {
1571 				if (IS_ERR(pols[1])) {
1572 					XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1573 					err = PTR_ERR(pols[1]);
1574 					goto error;
1575 				}
1576 				if (pols[1]->action == XFRM_POLICY_BLOCK) {
1577 					XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
1578 					err = -EPERM;
1579 					goto error;
1580 				}
1581 				npols ++;
1582 				xfrm_nr += pols[1]->xfrm_nr;
1583 			}
1584 		}
1585 
1586 		/*
1587 		 * Because neither flowi nor bundle information knows about
1588 		 * transformation template size. On more than one policy usage
1589 		 * we can realize whether all of them is bypass or not after
1590 		 * they are searched. See above not-transformed bypass
1591 		 * is surrounded by non-sub policy configuration, too.
1592 		 */
1593 		if (xfrm_nr == 0) {
1594 			/* Flow passes not transformed. */
1595 			xfrm_pols_put(pols, npols);
1596 			return 0;
1597 		}
1598 
1599 #endif
1600 		nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1601 
1602 		if (unlikely(nx<0)) {
1603 			err = nx;
1604 			if (err == -EAGAIN && sysctl_xfrm_larval_drop) {
1605 				/* EREMOTE tells the caller to generate
1606 				 * a one-shot blackhole route.
1607 				 */
1608 				XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1609 				xfrm_pol_put(policy);
1610 				return -EREMOTE;
1611 			}
1612 			if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
1613 				DECLARE_WAITQUEUE(wait, current);
1614 
1615 				add_wait_queue(&km_waitq, &wait);
1616 				set_current_state(TASK_INTERRUPTIBLE);
1617 				schedule();
1618 				set_current_state(TASK_RUNNING);
1619 				remove_wait_queue(&km_waitq, &wait);
1620 
1621 				nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1622 
1623 				if (nx == -EAGAIN && signal_pending(current)) {
1624 					XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1625 					err = -ERESTART;
1626 					goto error;
1627 				}
1628 				if (nx == -EAGAIN ||
1629 				    genid != atomic_read(&flow_cache_genid)) {
1630 					xfrm_pols_put(pols, npols);
1631 					goto restart;
1632 				}
1633 				err = nx;
1634 			}
1635 			if (err < 0) {
1636 				XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1637 				goto error;
1638 			}
1639 		}
1640 		if (nx == 0) {
1641 			/* Flow passes not transformed. */
1642 			xfrm_pols_put(pols, npols);
1643 			return 0;
1644 		}
1645 
1646 		dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
1647 		err = PTR_ERR(dst);
1648 		if (IS_ERR(dst)) {
1649 			XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1650 			goto error;
1651 		}
1652 
1653 		for (pi = 0; pi < npols; pi++) {
1654 			read_lock_bh(&pols[pi]->lock);
1655 			pol_dead |= pols[pi]->dead;
1656 			read_unlock_bh(&pols[pi]->lock);
1657 		}
1658 
1659 		write_lock_bh(&policy->lock);
1660 		if (unlikely(pol_dead || stale_bundle(dst))) {
1661 			/* Wow! While we worked on resolving, this
1662 			 * policy has gone. Retry. It is not paranoia,
1663 			 * we just cannot enlist new bundle to dead object.
1664 			 * We can't enlist stable bundles either.
1665 			 */
1666 			write_unlock_bh(&policy->lock);
1667 			if (dst)
1668 				dst_free(dst);
1669 
1670 			if (pol_dead)
1671 				XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLDEAD);
1672 			else
1673 				XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1674 			err = -EHOSTUNREACH;
1675 			goto error;
1676 		}
1677 
1678 		if (npols > 1)
1679 			err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1680 		else
1681 			err = xfrm_dst_update_origin(dst, fl);
1682 		if (unlikely(err)) {
1683 			write_unlock_bh(&policy->lock);
1684 			if (dst)
1685 				dst_free(dst);
1686 			XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1687 			goto error;
1688 		}
1689 
1690 		dst->next = policy->bundles;
1691 		policy->bundles = dst;
1692 		dst_hold(dst);
1693 		write_unlock_bh(&policy->lock);
1694 	}
1695 	*dst_p = dst;
1696 	dst_release(dst_orig);
1697 	xfrm_pols_put(pols, npols);
1698 	return 0;
1699 
1700 error:
1701 	xfrm_pols_put(pols, npols);
1702 dropdst:
1703 	dst_release(dst_orig);
1704 	*dst_p = NULL;
1705 	return err;
1706 
1707 nopol:
1708 	err = -ENOENT;
1709 	if (flags & XFRM_LOOKUP_ICMP)
1710 		goto dropdst;
1711 	return 0;
1712 }
1713 EXPORT_SYMBOL(__xfrm_lookup);
1714 
1715 int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1716 		struct sock *sk, int flags)
1717 {
1718 	int err = __xfrm_lookup(dst_p, fl, sk, flags);
1719 
1720 	if (err == -EREMOTE) {
1721 		dst_release(*dst_p);
1722 		*dst_p = NULL;
1723 		err = -EAGAIN;
1724 	}
1725 
1726 	return err;
1727 }
1728 EXPORT_SYMBOL(xfrm_lookup);
1729 
1730 static inline int
1731 xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)
1732 {
1733 	struct xfrm_state *x;
1734 
1735 	if (!skb->sp || idx < 0 || idx >= skb->sp->len)
1736 		return 0;
1737 	x = skb->sp->xvec[idx];
1738 	if (!x->type->reject)
1739 		return 0;
1740 	return x->type->reject(x, skb, fl);
1741 }
1742 
1743 /* When skb is transformed back to its "native" form, we have to
1744  * check policy restrictions. At the moment we make this in maximally
1745  * stupid way. Shame on me. :-) Of course, connected sockets must
1746  * have policy cached at them.
1747  */
1748 
1749 static inline int
1750 xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
1751 	      unsigned short family)
1752 {
1753 	if (xfrm_state_kern(x))
1754 		return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
1755 	return	x->id.proto == tmpl->id.proto &&
1756 		(x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
1757 		(x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
1758 		x->props.mode == tmpl->mode &&
1759 		((tmpl->aalgos & (1<<x->props.aalgo)) ||
1760 		 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
1761 		!(x->props.mode != XFRM_MODE_TRANSPORT &&
1762 		  xfrm_state_addr_cmp(tmpl, x, family));
1763 }
1764 
1765 /*
1766  * 0 or more than 0 is returned when validation is succeeded (either bypass
1767  * because of optional transport mode, or next index of the mathced secpath
1768  * state with the template.
1769  * -1 is returned when no matching template is found.
1770  * Otherwise "-2 - errored_index" is returned.
1771  */
1772 static inline int
1773 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
1774 	       unsigned short family)
1775 {
1776 	int idx = start;
1777 
1778 	if (tmpl->optional) {
1779 		if (tmpl->mode == XFRM_MODE_TRANSPORT)
1780 			return start;
1781 	} else
1782 		start = -1;
1783 	for (; idx < sp->len; idx++) {
1784 		if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
1785 			return ++idx;
1786 		if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
1787 			if (start == -1)
1788 				start = -2-idx;
1789 			break;
1790 		}
1791 	}
1792 	return start;
1793 }
1794 
1795 int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
1796 			  unsigned int family, int reverse)
1797 {
1798 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1799 	int err;
1800 
1801 	if (unlikely(afinfo == NULL))
1802 		return -EAFNOSUPPORT;
1803 
1804 	afinfo->decode_session(skb, fl, reverse);
1805 	err = security_xfrm_decode_session(skb, &fl->secid);
1806 	xfrm_policy_put_afinfo(afinfo);
1807 	return err;
1808 }
1809 EXPORT_SYMBOL(__xfrm_decode_session);
1810 
1811 static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
1812 {
1813 	for (; k < sp->len; k++) {
1814 		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
1815 			*idxp = k;
1816 			return 1;
1817 		}
1818 	}
1819 
1820 	return 0;
1821 }
1822 
1823 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1824 			unsigned short family)
1825 {
1826 	struct xfrm_policy *pol;
1827 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1828 	int npols = 0;
1829 	int xfrm_nr;
1830 	int pi;
1831 	int reverse;
1832 	struct flowi fl;
1833 	u8 fl_dir;
1834 	int xerr_idx = -1;
1835 
1836 	reverse = dir & ~XFRM_POLICY_MASK;
1837 	dir &= XFRM_POLICY_MASK;
1838 	fl_dir = policy_to_flow_dir(dir);
1839 
1840 	if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
1841 		XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
1842 		return 0;
1843 	}
1844 
1845 	nf_nat_decode_session(skb, &fl, family);
1846 
1847 	/* First, check used SA against their selectors. */
1848 	if (skb->sp) {
1849 		int i;
1850 
1851 		for (i=skb->sp->len-1; i>=0; i--) {
1852 			struct xfrm_state *x = skb->sp->xvec[i];
1853 			if (!xfrm_selector_match(&x->sel, &fl, family)) {
1854 				XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH);
1855 				return 0;
1856 			}
1857 		}
1858 	}
1859 
1860 	pol = NULL;
1861 	if (sk && sk->sk_policy[dir]) {
1862 		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
1863 		if (IS_ERR(pol)) {
1864 			XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1865 			return 0;
1866 		}
1867 	}
1868 
1869 	if (!pol)
1870 		pol = flow_cache_lookup(&fl, family, fl_dir,
1871 					xfrm_policy_lookup);
1872 
1873 	if (IS_ERR(pol)) {
1874 		XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1875 		return 0;
1876 	}
1877 
1878 	if (!pol) {
1879 		if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
1880 			xfrm_secpath_reject(xerr_idx, skb, &fl);
1881 			XFRM_INC_STATS(LINUX_MIB_XFRMINNOPOLS);
1882 			return 0;
1883 		}
1884 		return 1;
1885 	}
1886 
1887 	pol->curlft.use_time = get_seconds();
1888 
1889 	pols[0] = pol;
1890 	npols ++;
1891 #ifdef CONFIG_XFRM_SUB_POLICY
1892 	if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1893 		pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1894 						    &fl, family,
1895 						    XFRM_POLICY_IN);
1896 		if (pols[1]) {
1897 			if (IS_ERR(pols[1])) {
1898 				XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1899 				return 0;
1900 			}
1901 			pols[1]->curlft.use_time = get_seconds();
1902 			npols ++;
1903 		}
1904 	}
1905 #endif
1906 
1907 	if (pol->action == XFRM_POLICY_ALLOW) {
1908 		struct sec_path *sp;
1909 		static struct sec_path dummy;
1910 		struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
1911 		struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
1912 		struct xfrm_tmpl **tpp = tp;
1913 		int ti = 0;
1914 		int i, k;
1915 
1916 		if ((sp = skb->sp) == NULL)
1917 			sp = &dummy;
1918 
1919 		for (pi = 0; pi < npols; pi++) {
1920 			if (pols[pi] != pol &&
1921 			    pols[pi]->action != XFRM_POLICY_ALLOW) {
1922 				XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
1923 				goto reject;
1924 			}
1925 			if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
1926 				XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR);
1927 				goto reject_error;
1928 			}
1929 			for (i = 0; i < pols[pi]->xfrm_nr; i++)
1930 				tpp[ti++] = &pols[pi]->xfrm_vec[i];
1931 		}
1932 		xfrm_nr = ti;
1933 		if (npols > 1) {
1934 			xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
1935 			tpp = stp;
1936 		}
1937 
1938 		/* For each tunnel xfrm, find the first matching tmpl.
1939 		 * For each tmpl before that, find corresponding xfrm.
1940 		 * Order is _important_. Later we will implement
1941 		 * some barriers, but at the moment barriers
1942 		 * are implied between each two transformations.
1943 		 */
1944 		for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
1945 			k = xfrm_policy_ok(tpp[i], sp, k, family);
1946 			if (k < 0) {
1947 				if (k < -1)
1948 					/* "-2 - errored_index" returned */
1949 					xerr_idx = -(2+k);
1950 				XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
1951 				goto reject;
1952 			}
1953 		}
1954 
1955 		if (secpath_has_nontransport(sp, k, &xerr_idx)) {
1956 			XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
1957 			goto reject;
1958 		}
1959 
1960 		xfrm_pols_put(pols, npols);
1961 		return 1;
1962 	}
1963 	XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
1964 
1965 reject:
1966 	xfrm_secpath_reject(xerr_idx, skb, &fl);
1967 reject_error:
1968 	xfrm_pols_put(pols, npols);
1969 	return 0;
1970 }
1971 EXPORT_SYMBOL(__xfrm_policy_check);
1972 
1973 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
1974 {
1975 	struct flowi fl;
1976 
1977 	if (xfrm_decode_session(skb, &fl, family) < 0) {
1978 		/* XXX: we should have something like FWDHDRERROR here. */
1979 		XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
1980 		return 0;
1981 	}
1982 
1983 	return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
1984 }
1985 EXPORT_SYMBOL(__xfrm_route_forward);
1986 
1987 /* Optimize later using cookies and generation ids. */
1988 
1989 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
1990 {
1991 	/* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
1992 	 * to "-1" to force all XFRM destinations to get validated by
1993 	 * dst_ops->check on every use.  We do this because when a
1994 	 * normal route referenced by an XFRM dst is obsoleted we do
1995 	 * not go looking around for all parent referencing XFRM dsts
1996 	 * so that we can invalidate them.  It is just too much work.
1997 	 * Instead we make the checks here on every use.  For example:
1998 	 *
1999 	 *	XFRM dst A --> IPv4 dst X
2000 	 *
2001 	 * X is the "xdst->route" of A (X is also the "dst->path" of A
2002 	 * in this example).  If X is marked obsolete, "A" will not
2003 	 * notice.  That's what we are validating here via the
2004 	 * stale_bundle() check.
2005 	 *
2006 	 * When a policy's bundle is pruned, we dst_free() the XFRM
2007 	 * dst which causes it's ->obsolete field to be set to a
2008 	 * positive non-zero integer.  If an XFRM dst has been pruned
2009 	 * like this, we want to force a new route lookup.
2010 	 */
2011 	if (dst->obsolete < 0 && !stale_bundle(dst))
2012 		return dst;
2013 
2014 	return NULL;
2015 }
2016 
2017 static int stale_bundle(struct dst_entry *dst)
2018 {
2019 	return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);
2020 }
2021 
2022 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
2023 {
2024 	while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2025 		dst->dev = dev->nd_net->loopback_dev;
2026 		dev_hold(dst->dev);
2027 		dev_put(dev);
2028 	}
2029 }
2030 EXPORT_SYMBOL(xfrm_dst_ifdown);
2031 
2032 static void xfrm_link_failure(struct sk_buff *skb)
2033 {
2034 	/* Impossible. Such dst must be popped before reaches point of failure. */
2035 	return;
2036 }
2037 
2038 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2039 {
2040 	if (dst) {
2041 		if (dst->obsolete) {
2042 			dst_release(dst);
2043 			dst = NULL;
2044 		}
2045 	}
2046 	return dst;
2047 }
2048 
2049 static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
2050 {
2051 	struct dst_entry *dst, **dstp;
2052 
2053 	write_lock(&pol->lock);
2054 	dstp = &pol->bundles;
2055 	while ((dst=*dstp) != NULL) {
2056 		if (func(dst)) {
2057 			*dstp = dst->next;
2058 			dst->next = *gc_list_p;
2059 			*gc_list_p = dst;
2060 		} else {
2061 			dstp = &dst->next;
2062 		}
2063 	}
2064 	write_unlock(&pol->lock);
2065 }
2066 
2067 static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
2068 {
2069 	struct dst_entry *gc_list = NULL;
2070 	int dir;
2071 
2072 	read_lock_bh(&xfrm_policy_lock);
2073 	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2074 		struct xfrm_policy *pol;
2075 		struct hlist_node *entry;
2076 		struct hlist_head *table;
2077 		int i;
2078 
2079 		hlist_for_each_entry(pol, entry,
2080 				     &xfrm_policy_inexact[dir], bydst)
2081 			prune_one_bundle(pol, func, &gc_list);
2082 
2083 		table = xfrm_policy_bydst[dir].table;
2084 		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
2085 			hlist_for_each_entry(pol, entry, table + i, bydst)
2086 				prune_one_bundle(pol, func, &gc_list);
2087 		}
2088 	}
2089 	read_unlock_bh(&xfrm_policy_lock);
2090 
2091 	while (gc_list) {
2092 		struct dst_entry *dst = gc_list;
2093 		gc_list = dst->next;
2094 		dst_free(dst);
2095 	}
2096 }
2097 
2098 static int unused_bundle(struct dst_entry *dst)
2099 {
2100 	return !atomic_read(&dst->__refcnt);
2101 }
2102 
2103 static void __xfrm_garbage_collect(void)
2104 {
2105 	xfrm_prune_bundles(unused_bundle);
2106 }
2107 
2108 static int xfrm_flush_bundles(void)
2109 {
2110 	xfrm_prune_bundles(stale_bundle);
2111 	return 0;
2112 }
2113 
2114 static void xfrm_init_pmtu(struct dst_entry *dst)
2115 {
2116 	do {
2117 		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2118 		u32 pmtu, route_mtu_cached;
2119 
2120 		pmtu = dst_mtu(dst->child);
2121 		xdst->child_mtu_cached = pmtu;
2122 
2123 		pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
2124 
2125 		route_mtu_cached = dst_mtu(xdst->route);
2126 		xdst->route_mtu_cached = route_mtu_cached;
2127 
2128 		if (pmtu > route_mtu_cached)
2129 			pmtu = route_mtu_cached;
2130 
2131 		dst->metrics[RTAX_MTU-1] = pmtu;
2132 	} while ((dst = dst->next));
2133 }
2134 
2135 /* Check that the bundle accepts the flow and its components are
2136  * still valid.
2137  */
2138 
2139 int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2140 		struct flowi *fl, int family, int strict)
2141 {
2142 	struct dst_entry *dst = &first->u.dst;
2143 	struct xfrm_dst *last;
2144 	u32 mtu;
2145 
2146 	if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
2147 	    (dst->dev && !netif_running(dst->dev)))
2148 		return 0;
2149 #ifdef CONFIG_XFRM_SUB_POLICY
2150 	if (fl) {
2151 		if (first->origin && !flow_cache_uli_match(first->origin, fl))
2152 			return 0;
2153 		if (first->partner &&
2154 		    !xfrm_selector_match(first->partner, fl, family))
2155 			return 0;
2156 	}
2157 #endif
2158 
2159 	last = NULL;
2160 
2161 	do {
2162 		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2163 
2164 		if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
2165 			return 0;
2166 		if (fl && pol &&
2167 		    !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl))
2168 			return 0;
2169 		if (dst->xfrm->km.state != XFRM_STATE_VALID)
2170 			return 0;
2171 		if (xdst->genid != dst->xfrm->genid)
2172 			return 0;
2173 
2174 		if (strict && fl &&
2175 		    !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
2176 		    !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
2177 			return 0;
2178 
2179 		mtu = dst_mtu(dst->child);
2180 		if (xdst->child_mtu_cached != mtu) {
2181 			last = xdst;
2182 			xdst->child_mtu_cached = mtu;
2183 		}
2184 
2185 		if (!dst_check(xdst->route, xdst->route_cookie))
2186 			return 0;
2187 		mtu = dst_mtu(xdst->route);
2188 		if (xdst->route_mtu_cached != mtu) {
2189 			last = xdst;
2190 			xdst->route_mtu_cached = mtu;
2191 		}
2192 
2193 		dst = dst->child;
2194 	} while (dst->xfrm);
2195 
2196 	if (likely(!last))
2197 		return 1;
2198 
2199 	mtu = last->child_mtu_cached;
2200 	for (;;) {
2201 		dst = &last->u.dst;
2202 
2203 		mtu = xfrm_state_mtu(dst->xfrm, mtu);
2204 		if (mtu > last->route_mtu_cached)
2205 			mtu = last->route_mtu_cached;
2206 		dst->metrics[RTAX_MTU-1] = mtu;
2207 
2208 		if (last == first)
2209 			break;
2210 
2211 		last = (struct xfrm_dst *)last->u.dst.next;
2212 		last->child_mtu_cached = mtu;
2213 	}
2214 
2215 	return 1;
2216 }
2217 
2218 EXPORT_SYMBOL(xfrm_bundle_ok);
2219 
2220 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2221 {
2222 	int err = 0;
2223 	if (unlikely(afinfo == NULL))
2224 		return -EINVAL;
2225 	if (unlikely(afinfo->family >= NPROTO))
2226 		return -EAFNOSUPPORT;
2227 	write_lock_bh(&xfrm_policy_afinfo_lock);
2228 	if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2229 		err = -ENOBUFS;
2230 	else {
2231 		struct dst_ops *dst_ops = afinfo->dst_ops;
2232 		if (likely(dst_ops->kmem_cachep == NULL))
2233 			dst_ops->kmem_cachep = xfrm_dst_cache;
2234 		if (likely(dst_ops->check == NULL))
2235 			dst_ops->check = xfrm_dst_check;
2236 		if (likely(dst_ops->negative_advice == NULL))
2237 			dst_ops->negative_advice = xfrm_negative_advice;
2238 		if (likely(dst_ops->link_failure == NULL))
2239 			dst_ops->link_failure = xfrm_link_failure;
2240 		if (likely(afinfo->garbage_collect == NULL))
2241 			afinfo->garbage_collect = __xfrm_garbage_collect;
2242 		xfrm_policy_afinfo[afinfo->family] = afinfo;
2243 	}
2244 	write_unlock_bh(&xfrm_policy_afinfo_lock);
2245 	return err;
2246 }
2247 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
2248 
2249 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
2250 {
2251 	int err = 0;
2252 	if (unlikely(afinfo == NULL))
2253 		return -EINVAL;
2254 	if (unlikely(afinfo->family >= NPROTO))
2255 		return -EAFNOSUPPORT;
2256 	write_lock_bh(&xfrm_policy_afinfo_lock);
2257 	if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
2258 		if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
2259 			err = -EINVAL;
2260 		else {
2261 			struct dst_ops *dst_ops = afinfo->dst_ops;
2262 			xfrm_policy_afinfo[afinfo->family] = NULL;
2263 			dst_ops->kmem_cachep = NULL;
2264 			dst_ops->check = NULL;
2265 			dst_ops->negative_advice = NULL;
2266 			dst_ops->link_failure = NULL;
2267 			afinfo->garbage_collect = NULL;
2268 		}
2269 	}
2270 	write_unlock_bh(&xfrm_policy_afinfo_lock);
2271 	return err;
2272 }
2273 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
2274 
2275 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
2276 {
2277 	struct xfrm_policy_afinfo *afinfo;
2278 	if (unlikely(family >= NPROTO))
2279 		return NULL;
2280 	read_lock(&xfrm_policy_afinfo_lock);
2281 	afinfo = xfrm_policy_afinfo[family];
2282 	if (unlikely(!afinfo))
2283 		read_unlock(&xfrm_policy_afinfo_lock);
2284 	return afinfo;
2285 }
2286 
2287 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
2288 {
2289 	read_unlock(&xfrm_policy_afinfo_lock);
2290 }
2291 
2292 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2293 {
2294 	struct net_device *dev = ptr;
2295 
2296 	if (dev->nd_net != &init_net)
2297 		return NOTIFY_DONE;
2298 
2299 	switch (event) {
2300 	case NETDEV_DOWN:
2301 		xfrm_flush_bundles();
2302 	}
2303 	return NOTIFY_DONE;
2304 }
2305 
2306 static struct notifier_block xfrm_dev_notifier = {
2307 	xfrm_dev_event,
2308 	NULL,
2309 	0
2310 };
2311 
2312 #ifdef CONFIG_XFRM_STATISTICS
2313 static int __init xfrm_statistics_init(void)
2314 {
2315 	if (snmp_mib_init((void **)xfrm_statistics,
2316 			  sizeof(struct linux_xfrm_mib)) < 0)
2317 		return -ENOMEM;
2318 	return 0;
2319 }
2320 #endif
2321 
2322 static void __init xfrm_policy_init(void)
2323 {
2324 	unsigned int hmask, sz;
2325 	int dir;
2326 
2327 	xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
2328 					   sizeof(struct xfrm_dst),
2329 					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2330 					   NULL);
2331 
2332 	hmask = 8 - 1;
2333 	sz = (hmask+1) * sizeof(struct hlist_head);
2334 
2335 	xfrm_policy_byidx = xfrm_hash_alloc(sz);
2336 	xfrm_idx_hmask = hmask;
2337 	if (!xfrm_policy_byidx)
2338 		panic("XFRM: failed to allocate byidx hash\n");
2339 
2340 	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2341 		struct xfrm_policy_hash *htab;
2342 
2343 		INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]);
2344 
2345 		htab = &xfrm_policy_bydst[dir];
2346 		htab->table = xfrm_hash_alloc(sz);
2347 		htab->hmask = hmask;
2348 		if (!htab->table)
2349 			panic("XFRM: failed to allocate bydst hash\n");
2350 	}
2351 
2352 	INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task);
2353 	register_netdevice_notifier(&xfrm_dev_notifier);
2354 }
2355 
2356 void __init xfrm_init(void)
2357 {
2358 #ifdef CONFIG_XFRM_STATISTICS
2359 	xfrm_statistics_init();
2360 #endif
2361 	xfrm_state_init();
2362 	xfrm_policy_init();
2363 	xfrm_input_init();
2364 #ifdef CONFIG_XFRM_STATISTICS
2365 	xfrm_proc_init();
2366 #endif
2367 }
2368 
2369 #ifdef CONFIG_AUDITSYSCALL
2370 static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
2371 					 struct audit_buffer *audit_buf)
2372 {
2373 	struct xfrm_sec_ctx *ctx = xp->security;
2374 	struct xfrm_selector *sel = &xp->selector;
2375 
2376 	if (ctx)
2377 		audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2378 				 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2379 
2380 	switch(sel->family) {
2381 	case AF_INET:
2382 		audit_log_format(audit_buf, " src=" NIPQUAD_FMT,
2383 				 NIPQUAD(sel->saddr.a4));
2384 		if (sel->prefixlen_s != 32)
2385 			audit_log_format(audit_buf, " src_prefixlen=%d",
2386 					 sel->prefixlen_s);
2387 		audit_log_format(audit_buf, " dst=" NIPQUAD_FMT,
2388 				 NIPQUAD(sel->daddr.a4));
2389 		if (sel->prefixlen_d != 32)
2390 			audit_log_format(audit_buf, " dst_prefixlen=%d",
2391 					 sel->prefixlen_d);
2392 		break;
2393 	case AF_INET6:
2394 		audit_log_format(audit_buf, " src=" NIP6_FMT,
2395 				 NIP6(*(struct in6_addr *)sel->saddr.a6));
2396 		if (sel->prefixlen_s != 128)
2397 			audit_log_format(audit_buf, " src_prefixlen=%d",
2398 					 sel->prefixlen_s);
2399 		audit_log_format(audit_buf, " dst=" NIP6_FMT,
2400 				 NIP6(*(struct in6_addr *)sel->daddr.a6));
2401 		if (sel->prefixlen_d != 128)
2402 			audit_log_format(audit_buf, " dst_prefixlen=%d",
2403 					 sel->prefixlen_d);
2404 		break;
2405 	}
2406 }
2407 
2408 void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
2409 			   u32 auid, u32 secid)
2410 {
2411 	struct audit_buffer *audit_buf;
2412 
2413 	audit_buf = xfrm_audit_start("SPD-add");
2414 	if (audit_buf == NULL)
2415 		return;
2416 	xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
2417 	audit_log_format(audit_buf, " res=%u", result);
2418 	xfrm_audit_common_policyinfo(xp, audit_buf);
2419 	audit_log_end(audit_buf);
2420 }
2421 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
2422 
2423 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
2424 			      u32 auid, u32 secid)
2425 {
2426 	struct audit_buffer *audit_buf;
2427 
2428 	audit_buf = xfrm_audit_start("SPD-delete");
2429 	if (audit_buf == NULL)
2430 		return;
2431 	xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
2432 	audit_log_format(audit_buf, " res=%u", result);
2433 	xfrm_audit_common_policyinfo(xp, audit_buf);
2434 	audit_log_end(audit_buf);
2435 }
2436 EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
2437 #endif
2438 
2439 #ifdef CONFIG_XFRM_MIGRATE
2440 static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp,
2441 				       struct xfrm_selector *sel_tgt)
2442 {
2443 	if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
2444 		if (sel_tgt->family == sel_cmp->family &&
2445 		    xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr,
2446 				  sel_cmp->family) == 0 &&
2447 		    xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr,
2448 				  sel_cmp->family) == 0 &&
2449 		    sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
2450 		    sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
2451 			return 1;
2452 		}
2453 	} else {
2454 		if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
2455 			return 1;
2456 		}
2457 	}
2458 	return 0;
2459 }
2460 
2461 static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel,
2462 						     u8 dir, u8 type)
2463 {
2464 	struct xfrm_policy *pol, *ret = NULL;
2465 	struct hlist_node *entry;
2466 	struct hlist_head *chain;
2467 	u32 priority = ~0U;
2468 
2469 	read_lock_bh(&xfrm_policy_lock);
2470 	chain = policy_hash_direct(&sel->daddr, &sel->saddr, sel->family, dir);
2471 	hlist_for_each_entry(pol, entry, chain, bydst) {
2472 		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2473 		    pol->type == type) {
2474 			ret = pol;
2475 			priority = ret->priority;
2476 			break;
2477 		}
2478 	}
2479 	chain = &xfrm_policy_inexact[dir];
2480 	hlist_for_each_entry(pol, entry, chain, bydst) {
2481 		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2482 		    pol->type == type &&
2483 		    pol->priority < priority) {
2484 			ret = pol;
2485 			break;
2486 		}
2487 	}
2488 
2489 	if (ret)
2490 		xfrm_pol_hold(ret);
2491 
2492 	read_unlock_bh(&xfrm_policy_lock);
2493 
2494 	return ret;
2495 }
2496 
2497 static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t)
2498 {
2499 	int match = 0;
2500 
2501 	if (t->mode == m->mode && t->id.proto == m->proto &&
2502 	    (m->reqid == 0 || t->reqid == m->reqid)) {
2503 		switch (t->mode) {
2504 		case XFRM_MODE_TUNNEL:
2505 		case XFRM_MODE_BEET:
2506 			if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr,
2507 					  m->old_family) == 0 &&
2508 			    xfrm_addr_cmp(&t->saddr, &m->old_saddr,
2509 					  m->old_family) == 0) {
2510 				match = 1;
2511 			}
2512 			break;
2513 		case XFRM_MODE_TRANSPORT:
2514 			/* in case of transport mode, template does not store
2515 			   any IP addresses, hence we just compare mode and
2516 			   protocol */
2517 			match = 1;
2518 			break;
2519 		default:
2520 			break;
2521 		}
2522 	}
2523 	return match;
2524 }
2525 
2526 /* update endpoint address(es) of template(s) */
2527 static int xfrm_policy_migrate(struct xfrm_policy *pol,
2528 			       struct xfrm_migrate *m, int num_migrate)
2529 {
2530 	struct xfrm_migrate *mp;
2531 	struct dst_entry *dst;
2532 	int i, j, n = 0;
2533 
2534 	write_lock_bh(&pol->lock);
2535 	if (unlikely(pol->dead)) {
2536 		/* target policy has been deleted */
2537 		write_unlock_bh(&pol->lock);
2538 		return -ENOENT;
2539 	}
2540 
2541 	for (i = 0; i < pol->xfrm_nr; i++) {
2542 		for (j = 0, mp = m; j < num_migrate; j++, mp++) {
2543 			if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
2544 				continue;
2545 			n++;
2546 			if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
2547 			    pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
2548 				continue;
2549 			/* update endpoints */
2550 			memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
2551 			       sizeof(pol->xfrm_vec[i].id.daddr));
2552 			memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
2553 			       sizeof(pol->xfrm_vec[i].saddr));
2554 			pol->xfrm_vec[i].encap_family = mp->new_family;
2555 			/* flush bundles */
2556 			while ((dst = pol->bundles) != NULL) {
2557 				pol->bundles = dst->next;
2558 				dst_free(dst);
2559 			}
2560 		}
2561 	}
2562 
2563 	write_unlock_bh(&pol->lock);
2564 
2565 	if (!n)
2566 		return -ENODATA;
2567 
2568 	return 0;
2569 }
2570 
2571 static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate)
2572 {
2573 	int i, j;
2574 
2575 	if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
2576 		return -EINVAL;
2577 
2578 	for (i = 0; i < num_migrate; i++) {
2579 		if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr,
2580 				   m[i].old_family) == 0) &&
2581 		    (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr,
2582 				   m[i].old_family) == 0))
2583 			return -EINVAL;
2584 		if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
2585 		    xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
2586 			return -EINVAL;
2587 
2588 		/* check if there is any duplicated entry */
2589 		for (j = i + 1; j < num_migrate; j++) {
2590 			if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
2591 				    sizeof(m[i].old_daddr)) &&
2592 			    !memcmp(&m[i].old_saddr, &m[j].old_saddr,
2593 				    sizeof(m[i].old_saddr)) &&
2594 			    m[i].proto == m[j].proto &&
2595 			    m[i].mode == m[j].mode &&
2596 			    m[i].reqid == m[j].reqid &&
2597 			    m[i].old_family == m[j].old_family)
2598 				return -EINVAL;
2599 		}
2600 	}
2601 
2602 	return 0;
2603 }
2604 
2605 int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
2606 		 struct xfrm_migrate *m, int num_migrate)
2607 {
2608 	int i, err, nx_cur = 0, nx_new = 0;
2609 	struct xfrm_policy *pol = NULL;
2610 	struct xfrm_state *x, *xc;
2611 	struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
2612 	struct xfrm_state *x_new[XFRM_MAX_DEPTH];
2613 	struct xfrm_migrate *mp;
2614 
2615 	if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
2616 		goto out;
2617 
2618 	/* Stage 1 - find policy */
2619 	if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) {
2620 		err = -ENOENT;
2621 		goto out;
2622 	}
2623 
2624 	/* Stage 2 - find and update state(s) */
2625 	for (i = 0, mp = m; i < num_migrate; i++, mp++) {
2626 		if ((x = xfrm_migrate_state_find(mp))) {
2627 			x_cur[nx_cur] = x;
2628 			nx_cur++;
2629 			if ((xc = xfrm_state_migrate(x, mp))) {
2630 				x_new[nx_new] = xc;
2631 				nx_new++;
2632 			} else {
2633 				err = -ENODATA;
2634 				goto restore_state;
2635 			}
2636 		}
2637 	}
2638 
2639 	/* Stage 3 - update policy */
2640 	if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
2641 		goto restore_state;
2642 
2643 	/* Stage 4 - delete old state(s) */
2644 	if (nx_cur) {
2645 		xfrm_states_put(x_cur, nx_cur);
2646 		xfrm_states_delete(x_cur, nx_cur);
2647 	}
2648 
2649 	/* Stage 5 - announce */
2650 	km_migrate(sel, dir, type, m, num_migrate);
2651 
2652 	xfrm_pol_put(pol);
2653 
2654 	return 0;
2655 out:
2656 	return err;
2657 
2658 restore_state:
2659 	if (pol)
2660 		xfrm_pol_put(pol);
2661 	if (nx_cur)
2662 		xfrm_states_put(x_cur, nx_cur);
2663 	if (nx_new)
2664 		xfrm_states_delete(x_new, nx_new);
2665 
2666 	return err;
2667 }
2668 EXPORT_SYMBOL(xfrm_migrate);
2669 #endif
2670