xref: /openbmc/linux/net/xfrm/xfrm_policy.c (revision df3305156f989339529b3d6744b898d498fb1f7b)
1 /*
2  * xfrm_policy.c
3  *
4  * Changes:
5  *	Mitsuru KANDA @USAGI
6  * 	Kazunori MIYAZAWA @USAGI
7  * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  * 		IPv6 support
9  * 	Kazunori MIYAZAWA @USAGI
10  * 	YOSHIFUJI Hideaki
11  * 		Split up af-specific portion
12  *	Derek Atkins <derek@ihtfp.com>		Add the post_input processor
13  *
14  */
15 
16 #include <linux/err.h>
17 #include <linux/slab.h>
18 #include <linux/kmod.h>
19 #include <linux/list.h>
20 #include <linux/spinlock.h>
21 #include <linux/workqueue.h>
22 #include <linux/notifier.h>
23 #include <linux/netdevice.h>
24 #include <linux/netfilter.h>
25 #include <linux/module.h>
26 #include <linux/cache.h>
27 #include <linux/audit.h>
28 #include <net/dst.h>
29 #include <net/flow.h>
30 #include <net/xfrm.h>
31 #include <net/ip.h>
32 #ifdef CONFIG_XFRM_STATISTICS
33 #include <net/snmp.h>
34 #endif
35 
36 #include "xfrm_hash.h"
37 
38 #define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10))
39 #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
40 #define XFRM_MAX_QUEUE_LEN	100
41 
42 struct xfrm_flo {
43 	struct dst_entry *dst_orig;
44 	u8 flags;
45 };
46 
47 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
48 static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
49 						__read_mostly;
50 
51 static struct kmem_cache *xfrm_dst_cache __read_mostly;
52 
53 static void xfrm_init_pmtu(struct dst_entry *dst);
54 static int stale_bundle(struct dst_entry *dst);
55 static int xfrm_bundle_ok(struct xfrm_dst *xdst);
56 static void xfrm_policy_queue_process(unsigned long arg);
57 
58 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
59 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
60 						int dir);
61 
62 static inline bool
63 __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
64 {
65 	const struct flowi4 *fl4 = &fl->u.ip4;
66 
67 	return  addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) &&
68 		addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) &&
69 		!((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) &&
70 		!((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) &&
71 		(fl4->flowi4_proto == sel->proto || !sel->proto) &&
72 		(fl4->flowi4_oif == sel->ifindex || !sel->ifindex);
73 }
74 
75 static inline bool
76 __xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
77 {
78 	const struct flowi6 *fl6 = &fl->u.ip6;
79 
80 	return  addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) &&
81 		addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) &&
82 		!((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) &&
83 		!((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) &&
84 		(fl6->flowi6_proto == sel->proto || !sel->proto) &&
85 		(fl6->flowi6_oif == sel->ifindex || !sel->ifindex);
86 }
87 
88 bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl,
89 			 unsigned short family)
90 {
91 	switch (family) {
92 	case AF_INET:
93 		return __xfrm4_selector_match(sel, fl);
94 	case AF_INET6:
95 		return __xfrm6_selector_match(sel, fl);
96 	}
97 	return false;
98 }
99 
100 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
101 {
102 	struct xfrm_policy_afinfo *afinfo;
103 
104 	if (unlikely(family >= NPROTO))
105 		return NULL;
106 	rcu_read_lock();
107 	afinfo = rcu_dereference(xfrm_policy_afinfo[family]);
108 	if (unlikely(!afinfo))
109 		rcu_read_unlock();
110 	return afinfo;
111 }
112 
113 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
114 {
115 	rcu_read_unlock();
116 }
117 
118 static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos,
119 						  const xfrm_address_t *saddr,
120 						  const xfrm_address_t *daddr,
121 						  int family)
122 {
123 	struct xfrm_policy_afinfo *afinfo;
124 	struct dst_entry *dst;
125 
126 	afinfo = xfrm_policy_get_afinfo(family);
127 	if (unlikely(afinfo == NULL))
128 		return ERR_PTR(-EAFNOSUPPORT);
129 
130 	dst = afinfo->dst_lookup(net, tos, saddr, daddr);
131 
132 	xfrm_policy_put_afinfo(afinfo);
133 
134 	return dst;
135 }
136 
137 static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
138 						xfrm_address_t *prev_saddr,
139 						xfrm_address_t *prev_daddr,
140 						int family)
141 {
142 	struct net *net = xs_net(x);
143 	xfrm_address_t *saddr = &x->props.saddr;
144 	xfrm_address_t *daddr = &x->id.daddr;
145 	struct dst_entry *dst;
146 
147 	if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) {
148 		saddr = x->coaddr;
149 		daddr = prev_daddr;
150 	}
151 	if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) {
152 		saddr = prev_saddr;
153 		daddr = x->coaddr;
154 	}
155 
156 	dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family);
157 
158 	if (!IS_ERR(dst)) {
159 		if (prev_saddr != saddr)
160 			memcpy(prev_saddr, saddr,  sizeof(*prev_saddr));
161 		if (prev_daddr != daddr)
162 			memcpy(prev_daddr, daddr,  sizeof(*prev_daddr));
163 	}
164 
165 	return dst;
166 }
167 
168 static inline unsigned long make_jiffies(long secs)
169 {
170 	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
171 		return MAX_SCHEDULE_TIMEOUT-1;
172 	else
173 		return secs*HZ;
174 }
175 
176 static void xfrm_policy_timer(unsigned long data)
177 {
178 	struct xfrm_policy *xp = (struct xfrm_policy *)data;
179 	unsigned long now = get_seconds();
180 	long next = LONG_MAX;
181 	int warn = 0;
182 	int dir;
183 
184 	read_lock(&xp->lock);
185 
186 	if (unlikely(xp->walk.dead))
187 		goto out;
188 
189 	dir = xfrm_policy_id2dir(xp->index);
190 
191 	if (xp->lft.hard_add_expires_seconds) {
192 		long tmo = xp->lft.hard_add_expires_seconds +
193 			xp->curlft.add_time - now;
194 		if (tmo <= 0)
195 			goto expired;
196 		if (tmo < next)
197 			next = tmo;
198 	}
199 	if (xp->lft.hard_use_expires_seconds) {
200 		long tmo = xp->lft.hard_use_expires_seconds +
201 			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
202 		if (tmo <= 0)
203 			goto expired;
204 		if (tmo < next)
205 			next = tmo;
206 	}
207 	if (xp->lft.soft_add_expires_seconds) {
208 		long tmo = xp->lft.soft_add_expires_seconds +
209 			xp->curlft.add_time - now;
210 		if (tmo <= 0) {
211 			warn = 1;
212 			tmo = XFRM_KM_TIMEOUT;
213 		}
214 		if (tmo < next)
215 			next = tmo;
216 	}
217 	if (xp->lft.soft_use_expires_seconds) {
218 		long tmo = xp->lft.soft_use_expires_seconds +
219 			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
220 		if (tmo <= 0) {
221 			warn = 1;
222 			tmo = XFRM_KM_TIMEOUT;
223 		}
224 		if (tmo < next)
225 			next = tmo;
226 	}
227 
228 	if (warn)
229 		km_policy_expired(xp, dir, 0, 0);
230 	if (next != LONG_MAX &&
231 	    !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
232 		xfrm_pol_hold(xp);
233 
234 out:
235 	read_unlock(&xp->lock);
236 	xfrm_pol_put(xp);
237 	return;
238 
239 expired:
240 	read_unlock(&xp->lock);
241 	if (!xfrm_policy_delete(xp, dir))
242 		km_policy_expired(xp, dir, 1, 0);
243 	xfrm_pol_put(xp);
244 }
245 
246 static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
247 {
248 	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
249 
250 	if (unlikely(pol->walk.dead))
251 		flo = NULL;
252 	else
253 		xfrm_pol_hold(pol);
254 
255 	return flo;
256 }
257 
258 static int xfrm_policy_flo_check(struct flow_cache_object *flo)
259 {
260 	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
261 
262 	return !pol->walk.dead;
263 }
264 
265 static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
266 {
267 	xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
268 }
269 
270 static const struct flow_cache_ops xfrm_policy_fc_ops = {
271 	.get = xfrm_policy_flo_get,
272 	.check = xfrm_policy_flo_check,
273 	.delete = xfrm_policy_flo_delete,
274 };
275 
276 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
277  * SPD calls.
278  */
279 
280 struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
281 {
282 	struct xfrm_policy *policy;
283 
284 	policy = kzalloc(sizeof(struct xfrm_policy), gfp);
285 
286 	if (policy) {
287 		write_pnet(&policy->xp_net, net);
288 		INIT_LIST_HEAD(&policy->walk.all);
289 		INIT_HLIST_NODE(&policy->bydst);
290 		INIT_HLIST_NODE(&policy->byidx);
291 		rwlock_init(&policy->lock);
292 		atomic_set(&policy->refcnt, 1);
293 		skb_queue_head_init(&policy->polq.hold_queue);
294 		setup_timer(&policy->timer, xfrm_policy_timer,
295 				(unsigned long)policy);
296 		setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
297 			    (unsigned long)policy);
298 		policy->flo.ops = &xfrm_policy_fc_ops;
299 	}
300 	return policy;
301 }
302 EXPORT_SYMBOL(xfrm_policy_alloc);
303 
304 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
305 
306 void xfrm_policy_destroy(struct xfrm_policy *policy)
307 {
308 	BUG_ON(!policy->walk.dead);
309 
310 	if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
311 		BUG();
312 
313 	security_xfrm_policy_free(policy->security);
314 	kfree(policy);
315 }
316 EXPORT_SYMBOL(xfrm_policy_destroy);
317 
318 static void xfrm_queue_purge(struct sk_buff_head *list)
319 {
320 	struct sk_buff *skb;
321 
322 	while ((skb = skb_dequeue(list)) != NULL)
323 		kfree_skb(skb);
324 }
325 
326 /* Rule must be locked. Release descentant resources, announce
327  * entry dead. The rule must be unlinked from lists to the moment.
328  */
329 
330 static void xfrm_policy_kill(struct xfrm_policy *policy)
331 {
332 	policy->walk.dead = 1;
333 
334 	atomic_inc(&policy->genid);
335 
336 	if (del_timer(&policy->polq.hold_timer))
337 		xfrm_pol_put(policy);
338 	xfrm_queue_purge(&policy->polq.hold_queue);
339 
340 	if (del_timer(&policy->timer))
341 		xfrm_pol_put(policy);
342 
343 	xfrm_pol_put(policy);
344 }
345 
346 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
347 
348 static inline unsigned int idx_hash(struct net *net, u32 index)
349 {
350 	return __idx_hash(index, net->xfrm.policy_idx_hmask);
351 }
352 
353 /* calculate policy hash thresholds */
354 static void __get_hash_thresh(struct net *net,
355 			      unsigned short family, int dir,
356 			      u8 *dbits, u8 *sbits)
357 {
358 	switch (family) {
359 	case AF_INET:
360 		*dbits = net->xfrm.policy_bydst[dir].dbits4;
361 		*sbits = net->xfrm.policy_bydst[dir].sbits4;
362 		break;
363 
364 	case AF_INET6:
365 		*dbits = net->xfrm.policy_bydst[dir].dbits6;
366 		*sbits = net->xfrm.policy_bydst[dir].sbits6;
367 		break;
368 
369 	default:
370 		*dbits = 0;
371 		*sbits = 0;
372 	}
373 }
374 
375 static struct hlist_head *policy_hash_bysel(struct net *net,
376 					    const struct xfrm_selector *sel,
377 					    unsigned short family, int dir)
378 {
379 	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
380 	unsigned int hash;
381 	u8 dbits;
382 	u8 sbits;
383 
384 	__get_hash_thresh(net, family, dir, &dbits, &sbits);
385 	hash = __sel_hash(sel, family, hmask, dbits, sbits);
386 
387 	return (hash == hmask + 1 ?
388 		&net->xfrm.policy_inexact[dir] :
389 		net->xfrm.policy_bydst[dir].table + hash);
390 }
391 
392 static struct hlist_head *policy_hash_direct(struct net *net,
393 					     const xfrm_address_t *daddr,
394 					     const xfrm_address_t *saddr,
395 					     unsigned short family, int dir)
396 {
397 	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
398 	unsigned int hash;
399 	u8 dbits;
400 	u8 sbits;
401 
402 	__get_hash_thresh(net, family, dir, &dbits, &sbits);
403 	hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits);
404 
405 	return net->xfrm.policy_bydst[dir].table + hash;
406 }
407 
408 static void xfrm_dst_hash_transfer(struct net *net,
409 				   struct hlist_head *list,
410 				   struct hlist_head *ndsttable,
411 				   unsigned int nhashmask,
412 				   int dir)
413 {
414 	struct hlist_node *tmp, *entry0 = NULL;
415 	struct xfrm_policy *pol;
416 	unsigned int h0 = 0;
417 	u8 dbits;
418 	u8 sbits;
419 
420 redo:
421 	hlist_for_each_entry_safe(pol, tmp, list, bydst) {
422 		unsigned int h;
423 
424 		__get_hash_thresh(net, pol->family, dir, &dbits, &sbits);
425 		h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
426 				pol->family, nhashmask, dbits, sbits);
427 		if (!entry0) {
428 			hlist_del(&pol->bydst);
429 			hlist_add_head(&pol->bydst, ndsttable+h);
430 			h0 = h;
431 		} else {
432 			if (h != h0)
433 				continue;
434 			hlist_del(&pol->bydst);
435 			hlist_add_behind(&pol->bydst, entry0);
436 		}
437 		entry0 = &pol->bydst;
438 	}
439 	if (!hlist_empty(list)) {
440 		entry0 = NULL;
441 		goto redo;
442 	}
443 }
444 
445 static void xfrm_idx_hash_transfer(struct hlist_head *list,
446 				   struct hlist_head *nidxtable,
447 				   unsigned int nhashmask)
448 {
449 	struct hlist_node *tmp;
450 	struct xfrm_policy *pol;
451 
452 	hlist_for_each_entry_safe(pol, tmp, list, byidx) {
453 		unsigned int h;
454 
455 		h = __idx_hash(pol->index, nhashmask);
456 		hlist_add_head(&pol->byidx, nidxtable+h);
457 	}
458 }
459 
460 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
461 {
462 	return ((old_hmask + 1) << 1) - 1;
463 }
464 
465 static void xfrm_bydst_resize(struct net *net, int dir)
466 {
467 	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
468 	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
469 	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
470 	struct hlist_head *odst = net->xfrm.policy_bydst[dir].table;
471 	struct hlist_head *ndst = xfrm_hash_alloc(nsize);
472 	int i;
473 
474 	if (!ndst)
475 		return;
476 
477 	write_lock_bh(&net->xfrm.xfrm_policy_lock);
478 
479 	for (i = hmask; i >= 0; i--)
480 		xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir);
481 
482 	net->xfrm.policy_bydst[dir].table = ndst;
483 	net->xfrm.policy_bydst[dir].hmask = nhashmask;
484 
485 	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
486 
487 	xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
488 }
489 
490 static void xfrm_byidx_resize(struct net *net, int total)
491 {
492 	unsigned int hmask = net->xfrm.policy_idx_hmask;
493 	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
494 	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
495 	struct hlist_head *oidx = net->xfrm.policy_byidx;
496 	struct hlist_head *nidx = xfrm_hash_alloc(nsize);
497 	int i;
498 
499 	if (!nidx)
500 		return;
501 
502 	write_lock_bh(&net->xfrm.xfrm_policy_lock);
503 
504 	for (i = hmask; i >= 0; i--)
505 		xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
506 
507 	net->xfrm.policy_byidx = nidx;
508 	net->xfrm.policy_idx_hmask = nhashmask;
509 
510 	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
511 
512 	xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
513 }
514 
515 static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total)
516 {
517 	unsigned int cnt = net->xfrm.policy_count[dir];
518 	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
519 
520 	if (total)
521 		*total += cnt;
522 
523 	if ((hmask + 1) < xfrm_policy_hashmax &&
524 	    cnt > hmask)
525 		return 1;
526 
527 	return 0;
528 }
529 
530 static inline int xfrm_byidx_should_resize(struct net *net, int total)
531 {
532 	unsigned int hmask = net->xfrm.policy_idx_hmask;
533 
534 	if ((hmask + 1) < xfrm_policy_hashmax &&
535 	    total > hmask)
536 		return 1;
537 
538 	return 0;
539 }
540 
541 void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
542 {
543 	read_lock_bh(&net->xfrm.xfrm_policy_lock);
544 	si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
545 	si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
546 	si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
547 	si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
548 	si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
549 	si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
550 	si->spdhcnt = net->xfrm.policy_idx_hmask;
551 	si->spdhmcnt = xfrm_policy_hashmax;
552 	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
553 }
554 EXPORT_SYMBOL(xfrm_spd_getinfo);
555 
556 static DEFINE_MUTEX(hash_resize_mutex);
557 static void xfrm_hash_resize(struct work_struct *work)
558 {
559 	struct net *net = container_of(work, struct net, xfrm.policy_hash_work);
560 	int dir, total;
561 
562 	mutex_lock(&hash_resize_mutex);
563 
564 	total = 0;
565 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
566 		if (xfrm_bydst_should_resize(net, dir, &total))
567 			xfrm_bydst_resize(net, dir);
568 	}
569 	if (xfrm_byidx_should_resize(net, total))
570 		xfrm_byidx_resize(net, total);
571 
572 	mutex_unlock(&hash_resize_mutex);
573 }
574 
575 static void xfrm_hash_rebuild(struct work_struct *work)
576 {
577 	struct net *net = container_of(work, struct net,
578 				       xfrm.policy_hthresh.work);
579 	unsigned int hmask;
580 	struct xfrm_policy *pol;
581 	struct xfrm_policy *policy;
582 	struct hlist_head *chain;
583 	struct hlist_head *odst;
584 	struct hlist_node *newpos;
585 	int i;
586 	int dir;
587 	unsigned seq;
588 	u8 lbits4, rbits4, lbits6, rbits6;
589 
590 	mutex_lock(&hash_resize_mutex);
591 
592 	/* read selector prefixlen thresholds */
593 	do {
594 		seq = read_seqbegin(&net->xfrm.policy_hthresh.lock);
595 
596 		lbits4 = net->xfrm.policy_hthresh.lbits4;
597 		rbits4 = net->xfrm.policy_hthresh.rbits4;
598 		lbits6 = net->xfrm.policy_hthresh.lbits6;
599 		rbits6 = net->xfrm.policy_hthresh.rbits6;
600 	} while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
601 
602 	write_lock_bh(&net->xfrm.xfrm_policy_lock);
603 
604 	/* reset the bydst and inexact table in all directions */
605 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
606 		INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
607 		hmask = net->xfrm.policy_bydst[dir].hmask;
608 		odst = net->xfrm.policy_bydst[dir].table;
609 		for (i = hmask; i >= 0; i--)
610 			INIT_HLIST_HEAD(odst + i);
611 		if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
612 			/* dir out => dst = remote, src = local */
613 			net->xfrm.policy_bydst[dir].dbits4 = rbits4;
614 			net->xfrm.policy_bydst[dir].sbits4 = lbits4;
615 			net->xfrm.policy_bydst[dir].dbits6 = rbits6;
616 			net->xfrm.policy_bydst[dir].sbits6 = lbits6;
617 		} else {
618 			/* dir in/fwd => dst = local, src = remote */
619 			net->xfrm.policy_bydst[dir].dbits4 = lbits4;
620 			net->xfrm.policy_bydst[dir].sbits4 = rbits4;
621 			net->xfrm.policy_bydst[dir].dbits6 = lbits6;
622 			net->xfrm.policy_bydst[dir].sbits6 = rbits6;
623 		}
624 	}
625 
626 	/* re-insert all policies by order of creation */
627 	list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
628 		newpos = NULL;
629 		chain = policy_hash_bysel(net, &policy->selector,
630 					  policy->family,
631 					  xfrm_policy_id2dir(policy->index));
632 		hlist_for_each_entry(pol, chain, bydst) {
633 			if (policy->priority >= pol->priority)
634 				newpos = &pol->bydst;
635 			else
636 				break;
637 		}
638 		if (newpos)
639 			hlist_add_behind(&policy->bydst, newpos);
640 		else
641 			hlist_add_head(&policy->bydst, chain);
642 	}
643 
644 	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
645 
646 	mutex_unlock(&hash_resize_mutex);
647 }
648 
649 void xfrm_policy_hash_rebuild(struct net *net)
650 {
651 	schedule_work(&net->xfrm.policy_hthresh.work);
652 }
653 EXPORT_SYMBOL(xfrm_policy_hash_rebuild);
654 
655 /* Generate new index... KAME seems to generate them ordered by cost
656  * of an absolute inpredictability of ordering of rules. This will not pass. */
657 static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
658 {
659 	static u32 idx_generator;
660 
661 	for (;;) {
662 		struct hlist_head *list;
663 		struct xfrm_policy *p;
664 		u32 idx;
665 		int found;
666 
667 		if (!index) {
668 			idx = (idx_generator | dir);
669 			idx_generator += 8;
670 		} else {
671 			idx = index;
672 			index = 0;
673 		}
674 
675 		if (idx == 0)
676 			idx = 8;
677 		list = net->xfrm.policy_byidx + idx_hash(net, idx);
678 		found = 0;
679 		hlist_for_each_entry(p, list, byidx) {
680 			if (p->index == idx) {
681 				found = 1;
682 				break;
683 			}
684 		}
685 		if (!found)
686 			return idx;
687 	}
688 }
689 
690 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
691 {
692 	u32 *p1 = (u32 *) s1;
693 	u32 *p2 = (u32 *) s2;
694 	int len = sizeof(struct xfrm_selector) / sizeof(u32);
695 	int i;
696 
697 	for (i = 0; i < len; i++) {
698 		if (p1[i] != p2[i])
699 			return 1;
700 	}
701 
702 	return 0;
703 }
704 
705 static void xfrm_policy_requeue(struct xfrm_policy *old,
706 				struct xfrm_policy *new)
707 {
708 	struct xfrm_policy_queue *pq = &old->polq;
709 	struct sk_buff_head list;
710 
711 	__skb_queue_head_init(&list);
712 
713 	spin_lock_bh(&pq->hold_queue.lock);
714 	skb_queue_splice_init(&pq->hold_queue, &list);
715 	if (del_timer(&pq->hold_timer))
716 		xfrm_pol_put(old);
717 	spin_unlock_bh(&pq->hold_queue.lock);
718 
719 	if (skb_queue_empty(&list))
720 		return;
721 
722 	pq = &new->polq;
723 
724 	spin_lock_bh(&pq->hold_queue.lock);
725 	skb_queue_splice(&list, &pq->hold_queue);
726 	pq->timeout = XFRM_QUEUE_TMO_MIN;
727 	if (!mod_timer(&pq->hold_timer, jiffies))
728 		xfrm_pol_hold(new);
729 	spin_unlock_bh(&pq->hold_queue.lock);
730 }
731 
732 static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
733 				   struct xfrm_policy *pol)
734 {
735 	u32 mark = policy->mark.v & policy->mark.m;
736 
737 	if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m)
738 		return true;
739 
740 	if ((mark & pol->mark.m) == pol->mark.v &&
741 	    policy->priority == pol->priority)
742 		return true;
743 
744 	return false;
745 }
746 
747 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
748 {
749 	struct net *net = xp_net(policy);
750 	struct xfrm_policy *pol;
751 	struct xfrm_policy *delpol;
752 	struct hlist_head *chain;
753 	struct hlist_node *newpos;
754 
755 	write_lock_bh(&net->xfrm.xfrm_policy_lock);
756 	chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
757 	delpol = NULL;
758 	newpos = NULL;
759 	hlist_for_each_entry(pol, chain, bydst) {
760 		if (pol->type == policy->type &&
761 		    !selector_cmp(&pol->selector, &policy->selector) &&
762 		    xfrm_policy_mark_match(policy, pol) &&
763 		    xfrm_sec_ctx_match(pol->security, policy->security) &&
764 		    !WARN_ON(delpol)) {
765 			if (excl) {
766 				write_unlock_bh(&net->xfrm.xfrm_policy_lock);
767 				return -EEXIST;
768 			}
769 			delpol = pol;
770 			if (policy->priority > pol->priority)
771 				continue;
772 		} else if (policy->priority >= pol->priority) {
773 			newpos = &pol->bydst;
774 			continue;
775 		}
776 		if (delpol)
777 			break;
778 	}
779 	if (newpos)
780 		hlist_add_behind(&policy->bydst, newpos);
781 	else
782 		hlist_add_head(&policy->bydst, chain);
783 	__xfrm_policy_link(policy, dir);
784 	atomic_inc(&net->xfrm.flow_cache_genid);
785 
786 	/* After previous checking, family can either be AF_INET or AF_INET6 */
787 	if (policy->family == AF_INET)
788 		rt_genid_bump_ipv4(net);
789 	else
790 		rt_genid_bump_ipv6(net);
791 
792 	if (delpol) {
793 		xfrm_policy_requeue(delpol, policy);
794 		__xfrm_policy_unlink(delpol, dir);
795 	}
796 	policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index);
797 	hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
798 	policy->curlft.add_time = get_seconds();
799 	policy->curlft.use_time = 0;
800 	if (!mod_timer(&policy->timer, jiffies + HZ))
801 		xfrm_pol_hold(policy);
802 	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
803 
804 	if (delpol)
805 		xfrm_policy_kill(delpol);
806 	else if (xfrm_bydst_should_resize(net, dir, NULL))
807 		schedule_work(&net->xfrm.policy_hash_work);
808 
809 	return 0;
810 }
811 EXPORT_SYMBOL(xfrm_policy_insert);
812 
813 struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
814 					  int dir, struct xfrm_selector *sel,
815 					  struct xfrm_sec_ctx *ctx, int delete,
816 					  int *err)
817 {
818 	struct xfrm_policy *pol, *ret;
819 	struct hlist_head *chain;
820 
821 	*err = 0;
822 	write_lock_bh(&net->xfrm.xfrm_policy_lock);
823 	chain = policy_hash_bysel(net, sel, sel->family, dir);
824 	ret = NULL;
825 	hlist_for_each_entry(pol, chain, bydst) {
826 		if (pol->type == type &&
827 		    (mark & pol->mark.m) == pol->mark.v &&
828 		    !selector_cmp(sel, &pol->selector) &&
829 		    xfrm_sec_ctx_match(ctx, pol->security)) {
830 			xfrm_pol_hold(pol);
831 			if (delete) {
832 				*err = security_xfrm_policy_delete(
833 								pol->security);
834 				if (*err) {
835 					write_unlock_bh(&net->xfrm.xfrm_policy_lock);
836 					return pol;
837 				}
838 				__xfrm_policy_unlink(pol, dir);
839 			}
840 			ret = pol;
841 			break;
842 		}
843 	}
844 	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
845 
846 	if (ret && delete)
847 		xfrm_policy_kill(ret);
848 	return ret;
849 }
850 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
851 
852 struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
853 				     int dir, u32 id, int delete, int *err)
854 {
855 	struct xfrm_policy *pol, *ret;
856 	struct hlist_head *chain;
857 
858 	*err = -ENOENT;
859 	if (xfrm_policy_id2dir(id) != dir)
860 		return NULL;
861 
862 	*err = 0;
863 	write_lock_bh(&net->xfrm.xfrm_policy_lock);
864 	chain = net->xfrm.policy_byidx + idx_hash(net, id);
865 	ret = NULL;
866 	hlist_for_each_entry(pol, chain, byidx) {
867 		if (pol->type == type && pol->index == id &&
868 		    (mark & pol->mark.m) == pol->mark.v) {
869 			xfrm_pol_hold(pol);
870 			if (delete) {
871 				*err = security_xfrm_policy_delete(
872 								pol->security);
873 				if (*err) {
874 					write_unlock_bh(&net->xfrm.xfrm_policy_lock);
875 					return pol;
876 				}
877 				__xfrm_policy_unlink(pol, dir);
878 			}
879 			ret = pol;
880 			break;
881 		}
882 	}
883 	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
884 
885 	if (ret && delete)
886 		xfrm_policy_kill(ret);
887 	return ret;
888 }
889 EXPORT_SYMBOL(xfrm_policy_byid);
890 
891 #ifdef CONFIG_SECURITY_NETWORK_XFRM
892 static inline int
893 xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
894 {
895 	int dir, err = 0;
896 
897 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
898 		struct xfrm_policy *pol;
899 		int i;
900 
901 		hlist_for_each_entry(pol,
902 				     &net->xfrm.policy_inexact[dir], bydst) {
903 			if (pol->type != type)
904 				continue;
905 			err = security_xfrm_policy_delete(pol->security);
906 			if (err) {
907 				xfrm_audit_policy_delete(pol, 0, task_valid);
908 				return err;
909 			}
910 		}
911 		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
912 			hlist_for_each_entry(pol,
913 					     net->xfrm.policy_bydst[dir].table + i,
914 					     bydst) {
915 				if (pol->type != type)
916 					continue;
917 				err = security_xfrm_policy_delete(
918 								pol->security);
919 				if (err) {
920 					xfrm_audit_policy_delete(pol, 0,
921 								 task_valid);
922 					return err;
923 				}
924 			}
925 		}
926 	}
927 	return err;
928 }
929 #else
930 static inline int
931 xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
932 {
933 	return 0;
934 }
935 #endif
936 
937 int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
938 {
939 	int dir, err = 0, cnt = 0;
940 
941 	write_lock_bh(&net->xfrm.xfrm_policy_lock);
942 
943 	err = xfrm_policy_flush_secctx_check(net, type, task_valid);
944 	if (err)
945 		goto out;
946 
947 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
948 		struct xfrm_policy *pol;
949 		int i;
950 
951 	again1:
952 		hlist_for_each_entry(pol,
953 				     &net->xfrm.policy_inexact[dir], bydst) {
954 			if (pol->type != type)
955 				continue;
956 			__xfrm_policy_unlink(pol, dir);
957 			write_unlock_bh(&net->xfrm.xfrm_policy_lock);
958 			cnt++;
959 
960 			xfrm_audit_policy_delete(pol, 1, task_valid);
961 
962 			xfrm_policy_kill(pol);
963 
964 			write_lock_bh(&net->xfrm.xfrm_policy_lock);
965 			goto again1;
966 		}
967 
968 		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
969 	again2:
970 			hlist_for_each_entry(pol,
971 					     net->xfrm.policy_bydst[dir].table + i,
972 					     bydst) {
973 				if (pol->type != type)
974 					continue;
975 				__xfrm_policy_unlink(pol, dir);
976 				write_unlock_bh(&net->xfrm.xfrm_policy_lock);
977 				cnt++;
978 
979 				xfrm_audit_policy_delete(pol, 1, task_valid);
980 				xfrm_policy_kill(pol);
981 
982 				write_lock_bh(&net->xfrm.xfrm_policy_lock);
983 				goto again2;
984 			}
985 		}
986 
987 	}
988 	if (!cnt)
989 		err = -ESRCH;
990 out:
991 	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
992 	return err;
993 }
994 EXPORT_SYMBOL(xfrm_policy_flush);
995 
996 int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
997 		     int (*func)(struct xfrm_policy *, int, int, void*),
998 		     void *data)
999 {
1000 	struct xfrm_policy *pol;
1001 	struct xfrm_policy_walk_entry *x;
1002 	int error = 0;
1003 
1004 	if (walk->type >= XFRM_POLICY_TYPE_MAX &&
1005 	    walk->type != XFRM_POLICY_TYPE_ANY)
1006 		return -EINVAL;
1007 
1008 	if (list_empty(&walk->walk.all) && walk->seq != 0)
1009 		return 0;
1010 
1011 	write_lock_bh(&net->xfrm.xfrm_policy_lock);
1012 	if (list_empty(&walk->walk.all))
1013 		x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
1014 	else
1015 		x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all);
1016 	list_for_each_entry_from(x, &net->xfrm.policy_all, all) {
1017 		if (x->dead)
1018 			continue;
1019 		pol = container_of(x, struct xfrm_policy, walk);
1020 		if (walk->type != XFRM_POLICY_TYPE_ANY &&
1021 		    walk->type != pol->type)
1022 			continue;
1023 		error = func(pol, xfrm_policy_id2dir(pol->index),
1024 			     walk->seq, data);
1025 		if (error) {
1026 			list_move_tail(&walk->walk.all, &x->all);
1027 			goto out;
1028 		}
1029 		walk->seq++;
1030 	}
1031 	if (walk->seq == 0) {
1032 		error = -ENOENT;
1033 		goto out;
1034 	}
1035 	list_del_init(&walk->walk.all);
1036 out:
1037 	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1038 	return error;
1039 }
1040 EXPORT_SYMBOL(xfrm_policy_walk);
1041 
1042 void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type)
1043 {
1044 	INIT_LIST_HEAD(&walk->walk.all);
1045 	walk->walk.dead = 1;
1046 	walk->type = type;
1047 	walk->seq = 0;
1048 }
1049 EXPORT_SYMBOL(xfrm_policy_walk_init);
1050 
1051 void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net)
1052 {
1053 	if (list_empty(&walk->walk.all))
1054 		return;
1055 
1056 	write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */
1057 	list_del(&walk->walk.all);
1058 	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1059 }
1060 EXPORT_SYMBOL(xfrm_policy_walk_done);
1061 
1062 /*
1063  * Find policy to apply to this flow.
1064  *
1065  * Returns 0 if policy found, else an -errno.
1066  */
1067 static int xfrm_policy_match(const struct xfrm_policy *pol,
1068 			     const struct flowi *fl,
1069 			     u8 type, u16 family, int dir)
1070 {
1071 	const struct xfrm_selector *sel = &pol->selector;
1072 	int ret = -ESRCH;
1073 	bool match;
1074 
1075 	if (pol->family != family ||
1076 	    (fl->flowi_mark & pol->mark.m) != pol->mark.v ||
1077 	    pol->type != type)
1078 		return ret;
1079 
1080 	match = xfrm_selector_match(sel, fl, family);
1081 	if (match)
1082 		ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid,
1083 						  dir);
1084 
1085 	return ret;
1086 }
1087 
1088 static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
1089 						     const struct flowi *fl,
1090 						     u16 family, u8 dir)
1091 {
1092 	int err;
1093 	struct xfrm_policy *pol, *ret;
1094 	const xfrm_address_t *daddr, *saddr;
1095 	struct hlist_head *chain;
1096 	u32 priority = ~0U;
1097 
1098 	daddr = xfrm_flowi_daddr(fl, family);
1099 	saddr = xfrm_flowi_saddr(fl, family);
1100 	if (unlikely(!daddr || !saddr))
1101 		return NULL;
1102 
1103 	read_lock_bh(&net->xfrm.xfrm_policy_lock);
1104 	chain = policy_hash_direct(net, daddr, saddr, family, dir);
1105 	ret = NULL;
1106 	hlist_for_each_entry(pol, chain, bydst) {
1107 		err = xfrm_policy_match(pol, fl, type, family, dir);
1108 		if (err) {
1109 			if (err == -ESRCH)
1110 				continue;
1111 			else {
1112 				ret = ERR_PTR(err);
1113 				goto fail;
1114 			}
1115 		} else {
1116 			ret = pol;
1117 			priority = ret->priority;
1118 			break;
1119 		}
1120 	}
1121 	chain = &net->xfrm.policy_inexact[dir];
1122 	hlist_for_each_entry(pol, chain, bydst) {
1123 		err = xfrm_policy_match(pol, fl, type, family, dir);
1124 		if (err) {
1125 			if (err == -ESRCH)
1126 				continue;
1127 			else {
1128 				ret = ERR_PTR(err);
1129 				goto fail;
1130 			}
1131 		} else if (pol->priority < priority) {
1132 			ret = pol;
1133 			break;
1134 		}
1135 	}
1136 	if (ret)
1137 		xfrm_pol_hold(ret);
1138 fail:
1139 	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
1140 
1141 	return ret;
1142 }
1143 
1144 static struct xfrm_policy *
1145 __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
1146 {
1147 #ifdef CONFIG_XFRM_SUB_POLICY
1148 	struct xfrm_policy *pol;
1149 
1150 	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
1151 	if (pol != NULL)
1152 		return pol;
1153 #endif
1154 	return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
1155 }
1156 
1157 static int flow_to_policy_dir(int dir)
1158 {
1159 	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1160 	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1161 	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
1162 		return dir;
1163 
1164 	switch (dir) {
1165 	default:
1166 	case FLOW_DIR_IN:
1167 		return XFRM_POLICY_IN;
1168 	case FLOW_DIR_OUT:
1169 		return XFRM_POLICY_OUT;
1170 	case FLOW_DIR_FWD:
1171 		return XFRM_POLICY_FWD;
1172 	}
1173 }
1174 
1175 static struct flow_cache_object *
1176 xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
1177 		   u8 dir, struct flow_cache_object *old_obj, void *ctx)
1178 {
1179 	struct xfrm_policy *pol;
1180 
1181 	if (old_obj)
1182 		xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
1183 
1184 	pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
1185 	if (IS_ERR_OR_NULL(pol))
1186 		return ERR_CAST(pol);
1187 
1188 	/* Resolver returns two references:
1189 	 * one for cache and one for caller of flow_cache_lookup() */
1190 	xfrm_pol_hold(pol);
1191 
1192 	return &pol->flo;
1193 }
1194 
1195 static inline int policy_to_flow_dir(int dir)
1196 {
1197 	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1198 	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1199 	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
1200 		return dir;
1201 	switch (dir) {
1202 	default:
1203 	case XFRM_POLICY_IN:
1204 		return FLOW_DIR_IN;
1205 	case XFRM_POLICY_OUT:
1206 		return FLOW_DIR_OUT;
1207 	case XFRM_POLICY_FWD:
1208 		return FLOW_DIR_FWD;
1209 	}
1210 }
1211 
1212 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir,
1213 						 const struct flowi *fl)
1214 {
1215 	struct xfrm_policy *pol;
1216 	struct net *net = sock_net(sk);
1217 
1218 	read_lock_bh(&net->xfrm.xfrm_policy_lock);
1219 	if ((pol = sk->sk_policy[dir]) != NULL) {
1220 		bool match = xfrm_selector_match(&pol->selector, fl,
1221 						 sk->sk_family);
1222 		int err = 0;
1223 
1224 		if (match) {
1225 			if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
1226 				pol = NULL;
1227 				goto out;
1228 			}
1229 			err = security_xfrm_policy_lookup(pol->security,
1230 						      fl->flowi_secid,
1231 						      policy_to_flow_dir(dir));
1232 			if (!err)
1233 				xfrm_pol_hold(pol);
1234 			else if (err == -ESRCH)
1235 				pol = NULL;
1236 			else
1237 				pol = ERR_PTR(err);
1238 		} else
1239 			pol = NULL;
1240 	}
1241 out:
1242 	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
1243 	return pol;
1244 }
1245 
1246 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
1247 {
1248 	struct net *net = xp_net(pol);
1249 
1250 	list_add(&pol->walk.all, &net->xfrm.policy_all);
1251 	net->xfrm.policy_count[dir]++;
1252 	xfrm_pol_hold(pol);
1253 }
1254 
1255 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1256 						int dir)
1257 {
1258 	struct net *net = xp_net(pol);
1259 
1260 	if (list_empty(&pol->walk.all))
1261 		return NULL;
1262 
1263 	/* Socket policies are not hashed. */
1264 	if (!hlist_unhashed(&pol->bydst)) {
1265 		hlist_del(&pol->bydst);
1266 		hlist_del(&pol->byidx);
1267 	}
1268 
1269 	list_del_init(&pol->walk.all);
1270 	net->xfrm.policy_count[dir]--;
1271 
1272 	return pol;
1273 }
1274 
1275 static void xfrm_sk_policy_link(struct xfrm_policy *pol, int dir)
1276 {
1277 	__xfrm_policy_link(pol, XFRM_POLICY_MAX + dir);
1278 }
1279 
1280 static void xfrm_sk_policy_unlink(struct xfrm_policy *pol, int dir)
1281 {
1282 	__xfrm_policy_unlink(pol, XFRM_POLICY_MAX + dir);
1283 }
1284 
1285 int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1286 {
1287 	struct net *net = xp_net(pol);
1288 
1289 	write_lock_bh(&net->xfrm.xfrm_policy_lock);
1290 	pol = __xfrm_policy_unlink(pol, dir);
1291 	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1292 	if (pol) {
1293 		xfrm_policy_kill(pol);
1294 		return 0;
1295 	}
1296 	return -ENOENT;
1297 }
1298 EXPORT_SYMBOL(xfrm_policy_delete);
1299 
1300 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1301 {
1302 	struct net *net = xp_net(pol);
1303 	struct xfrm_policy *old_pol;
1304 
1305 #ifdef CONFIG_XFRM_SUB_POLICY
1306 	if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1307 		return -EINVAL;
1308 #endif
1309 
1310 	write_lock_bh(&net->xfrm.xfrm_policy_lock);
1311 	old_pol = sk->sk_policy[dir];
1312 	sk->sk_policy[dir] = pol;
1313 	if (pol) {
1314 		pol->curlft.add_time = get_seconds();
1315 		pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0);
1316 		xfrm_sk_policy_link(pol, dir);
1317 	}
1318 	if (old_pol) {
1319 		if (pol)
1320 			xfrm_policy_requeue(old_pol, pol);
1321 
1322 		/* Unlinking succeeds always. This is the only function
1323 		 * allowed to delete or replace socket policy.
1324 		 */
1325 		xfrm_sk_policy_unlink(old_pol, dir);
1326 	}
1327 	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1328 
1329 	if (old_pol) {
1330 		xfrm_policy_kill(old_pol);
1331 	}
1332 	return 0;
1333 }
1334 
1335 static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
1336 {
1337 	struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC);
1338 	struct net *net = xp_net(old);
1339 
1340 	if (newp) {
1341 		newp->selector = old->selector;
1342 		if (security_xfrm_policy_clone(old->security,
1343 					       &newp->security)) {
1344 			kfree(newp);
1345 			return NULL;  /* ENOMEM */
1346 		}
1347 		newp->lft = old->lft;
1348 		newp->curlft = old->curlft;
1349 		newp->mark = old->mark;
1350 		newp->action = old->action;
1351 		newp->flags = old->flags;
1352 		newp->xfrm_nr = old->xfrm_nr;
1353 		newp->index = old->index;
1354 		newp->type = old->type;
1355 		memcpy(newp->xfrm_vec, old->xfrm_vec,
1356 		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1357 		write_lock_bh(&net->xfrm.xfrm_policy_lock);
1358 		xfrm_sk_policy_link(newp, dir);
1359 		write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1360 		xfrm_pol_put(newp);
1361 	}
1362 	return newp;
1363 }
1364 
1365 int __xfrm_sk_clone_policy(struct sock *sk)
1366 {
1367 	struct xfrm_policy *p0 = sk->sk_policy[0],
1368 			   *p1 = sk->sk_policy[1];
1369 
1370 	sk->sk_policy[0] = sk->sk_policy[1] = NULL;
1371 	if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
1372 		return -ENOMEM;
1373 	if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
1374 		return -ENOMEM;
1375 	return 0;
1376 }
1377 
1378 static int
1379 xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote,
1380 	       unsigned short family)
1381 {
1382 	int err;
1383 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1384 
1385 	if (unlikely(afinfo == NULL))
1386 		return -EINVAL;
1387 	err = afinfo->get_saddr(net, local, remote);
1388 	xfrm_policy_put_afinfo(afinfo);
1389 	return err;
1390 }
1391 
1392 /* Resolve list of templates for the flow, given policy. */
1393 
1394 static int
1395 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
1396 		      struct xfrm_state **xfrm, unsigned short family)
1397 {
1398 	struct net *net = xp_net(policy);
1399 	int nx;
1400 	int i, error;
1401 	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1402 	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1403 	xfrm_address_t tmp;
1404 
1405 	for (nx = 0, i = 0; i < policy->xfrm_nr; i++) {
1406 		struct xfrm_state *x;
1407 		xfrm_address_t *remote = daddr;
1408 		xfrm_address_t *local  = saddr;
1409 		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1410 
1411 		if (tmpl->mode == XFRM_MODE_TUNNEL ||
1412 		    tmpl->mode == XFRM_MODE_BEET) {
1413 			remote = &tmpl->id.daddr;
1414 			local = &tmpl->saddr;
1415 			if (xfrm_addr_any(local, tmpl->encap_family)) {
1416 				error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family);
1417 				if (error)
1418 					goto fail;
1419 				local = &tmp;
1420 			}
1421 		}
1422 
1423 		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1424 
1425 		if (x && x->km.state == XFRM_STATE_VALID) {
1426 			xfrm[nx++] = x;
1427 			daddr = remote;
1428 			saddr = local;
1429 			continue;
1430 		}
1431 		if (x) {
1432 			error = (x->km.state == XFRM_STATE_ERROR ?
1433 				 -EINVAL : -EAGAIN);
1434 			xfrm_state_put(x);
1435 		} else if (error == -ESRCH) {
1436 			error = -EAGAIN;
1437 		}
1438 
1439 		if (!tmpl->optional)
1440 			goto fail;
1441 	}
1442 	return nx;
1443 
1444 fail:
1445 	for (nx--; nx >= 0; nx--)
1446 		xfrm_state_put(xfrm[nx]);
1447 	return error;
1448 }
1449 
1450 static int
1451 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
1452 		  struct xfrm_state **xfrm, unsigned short family)
1453 {
1454 	struct xfrm_state *tp[XFRM_MAX_DEPTH];
1455 	struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1456 	int cnx = 0;
1457 	int error;
1458 	int ret;
1459 	int i;
1460 
1461 	for (i = 0; i < npols; i++) {
1462 		if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1463 			error = -ENOBUFS;
1464 			goto fail;
1465 		}
1466 
1467 		ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1468 		if (ret < 0) {
1469 			error = ret;
1470 			goto fail;
1471 		} else
1472 			cnx += ret;
1473 	}
1474 
1475 	/* found states are sorted for outbound processing */
1476 	if (npols > 1)
1477 		xfrm_state_sort(xfrm, tpp, cnx, family);
1478 
1479 	return cnx;
1480 
1481  fail:
1482 	for (cnx--; cnx >= 0; cnx--)
1483 		xfrm_state_put(tpp[cnx]);
1484 	return error;
1485 
1486 }
1487 
1488 /* Check that the bundle accepts the flow and its components are
1489  * still valid.
1490  */
1491 
1492 static inline int xfrm_get_tos(const struct flowi *fl, int family)
1493 {
1494 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1495 	int tos;
1496 
1497 	if (!afinfo)
1498 		return -EINVAL;
1499 
1500 	tos = afinfo->get_tos(fl);
1501 
1502 	xfrm_policy_put_afinfo(afinfo);
1503 
1504 	return tos;
1505 }
1506 
1507 static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
1508 {
1509 	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1510 	struct dst_entry *dst = &xdst->u.dst;
1511 
1512 	if (xdst->route == NULL) {
1513 		/* Dummy bundle - if it has xfrms we were not
1514 		 * able to build bundle as template resolution failed.
1515 		 * It means we need to try again resolving. */
1516 		if (xdst->num_xfrms > 0)
1517 			return NULL;
1518 	} else if (dst->flags & DST_XFRM_QUEUE) {
1519 		return NULL;
1520 	} else {
1521 		/* Real bundle */
1522 		if (stale_bundle(dst))
1523 			return NULL;
1524 	}
1525 
1526 	dst_hold(dst);
1527 	return flo;
1528 }
1529 
1530 static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
1531 {
1532 	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1533 	struct dst_entry *dst = &xdst->u.dst;
1534 
1535 	if (!xdst->route)
1536 		return 0;
1537 	if (stale_bundle(dst))
1538 		return 0;
1539 
1540 	return 1;
1541 }
1542 
1543 static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
1544 {
1545 	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1546 	struct dst_entry *dst = &xdst->u.dst;
1547 
1548 	dst_free(dst);
1549 }
1550 
1551 static const struct flow_cache_ops xfrm_bundle_fc_ops = {
1552 	.get = xfrm_bundle_flo_get,
1553 	.check = xfrm_bundle_flo_check,
1554 	.delete = xfrm_bundle_flo_delete,
1555 };
1556 
1557 static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
1558 {
1559 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1560 	struct dst_ops *dst_ops;
1561 	struct xfrm_dst *xdst;
1562 
1563 	if (!afinfo)
1564 		return ERR_PTR(-EINVAL);
1565 
1566 	switch (family) {
1567 	case AF_INET:
1568 		dst_ops = &net->xfrm.xfrm4_dst_ops;
1569 		break;
1570 #if IS_ENABLED(CONFIG_IPV6)
1571 	case AF_INET6:
1572 		dst_ops = &net->xfrm.xfrm6_dst_ops;
1573 		break;
1574 #endif
1575 	default:
1576 		BUG();
1577 	}
1578 	xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0);
1579 
1580 	if (likely(xdst)) {
1581 		struct dst_entry *dst = &xdst->u.dst;
1582 
1583 		memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
1584 		xdst->flo.ops = &xfrm_bundle_fc_ops;
1585 		if (afinfo->init_dst)
1586 			afinfo->init_dst(net, xdst);
1587 	} else
1588 		xdst = ERR_PTR(-ENOBUFS);
1589 
1590 	xfrm_policy_put_afinfo(afinfo);
1591 
1592 	return xdst;
1593 }
1594 
1595 static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1596 				 int nfheader_len)
1597 {
1598 	struct xfrm_policy_afinfo *afinfo =
1599 		xfrm_policy_get_afinfo(dst->ops->family);
1600 	int err;
1601 
1602 	if (!afinfo)
1603 		return -EINVAL;
1604 
1605 	err = afinfo->init_path(path, dst, nfheader_len);
1606 
1607 	xfrm_policy_put_afinfo(afinfo);
1608 
1609 	return err;
1610 }
1611 
1612 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
1613 				const struct flowi *fl)
1614 {
1615 	struct xfrm_policy_afinfo *afinfo =
1616 		xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
1617 	int err;
1618 
1619 	if (!afinfo)
1620 		return -EINVAL;
1621 
1622 	err = afinfo->fill_dst(xdst, dev, fl);
1623 
1624 	xfrm_policy_put_afinfo(afinfo);
1625 
1626 	return err;
1627 }
1628 
1629 
1630 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
1631  * all the metrics... Shortly, bundle a bundle.
1632  */
1633 
1634 static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1635 					    struct xfrm_state **xfrm, int nx,
1636 					    const struct flowi *fl,
1637 					    struct dst_entry *dst)
1638 {
1639 	struct net *net = xp_net(policy);
1640 	unsigned long now = jiffies;
1641 	struct net_device *dev;
1642 	struct xfrm_mode *inner_mode;
1643 	struct dst_entry *dst_prev = NULL;
1644 	struct dst_entry *dst0 = NULL;
1645 	int i = 0;
1646 	int err;
1647 	int header_len = 0;
1648 	int nfheader_len = 0;
1649 	int trailer_len = 0;
1650 	int tos;
1651 	int family = policy->selector.family;
1652 	xfrm_address_t saddr, daddr;
1653 
1654 	xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
1655 
1656 	tos = xfrm_get_tos(fl, family);
1657 	err = tos;
1658 	if (tos < 0)
1659 		goto put_states;
1660 
1661 	dst_hold(dst);
1662 
1663 	for (; i < nx; i++) {
1664 		struct xfrm_dst *xdst = xfrm_alloc_dst(net, family);
1665 		struct dst_entry *dst1 = &xdst->u.dst;
1666 
1667 		err = PTR_ERR(xdst);
1668 		if (IS_ERR(xdst)) {
1669 			dst_release(dst);
1670 			goto put_states;
1671 		}
1672 
1673 		if (xfrm[i]->sel.family == AF_UNSPEC) {
1674 			inner_mode = xfrm_ip2inner_mode(xfrm[i],
1675 							xfrm_af2proto(family));
1676 			if (!inner_mode) {
1677 				err = -EAFNOSUPPORT;
1678 				dst_release(dst);
1679 				goto put_states;
1680 			}
1681 		} else
1682 			inner_mode = xfrm[i]->inner_mode;
1683 
1684 		if (!dst_prev)
1685 			dst0 = dst1;
1686 		else {
1687 			dst_prev->child = dst_clone(dst1);
1688 			dst1->flags |= DST_NOHASH;
1689 		}
1690 
1691 		xdst->route = dst;
1692 		dst_copy_metrics(dst1, dst);
1693 
1694 		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
1695 			family = xfrm[i]->props.family;
1696 			dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr,
1697 					      family);
1698 			err = PTR_ERR(dst);
1699 			if (IS_ERR(dst))
1700 				goto put_states;
1701 		} else
1702 			dst_hold(dst);
1703 
1704 		dst1->xfrm = xfrm[i];
1705 		xdst->xfrm_genid = xfrm[i]->genid;
1706 
1707 		dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
1708 		dst1->flags |= DST_HOST;
1709 		dst1->lastuse = now;
1710 
1711 		dst1->input = dst_discard;
1712 		dst1->output = inner_mode->afinfo->output;
1713 
1714 		dst1->next = dst_prev;
1715 		dst_prev = dst1;
1716 
1717 		header_len += xfrm[i]->props.header_len;
1718 		if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
1719 			nfheader_len += xfrm[i]->props.header_len;
1720 		trailer_len += xfrm[i]->props.trailer_len;
1721 	}
1722 
1723 	dst_prev->child = dst;
1724 	dst0->path = dst;
1725 
1726 	err = -ENODEV;
1727 	dev = dst->dev;
1728 	if (!dev)
1729 		goto free_dst;
1730 
1731 	xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1732 	xfrm_init_pmtu(dst_prev);
1733 
1734 	for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
1735 		struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
1736 
1737 		err = xfrm_fill_dst(xdst, dev, fl);
1738 		if (err)
1739 			goto free_dst;
1740 
1741 		dst_prev->header_len = header_len;
1742 		dst_prev->trailer_len = trailer_len;
1743 		header_len -= xdst->u.dst.xfrm->props.header_len;
1744 		trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
1745 	}
1746 
1747 out:
1748 	return dst0;
1749 
1750 put_states:
1751 	for (; i < nx; i++)
1752 		xfrm_state_put(xfrm[i]);
1753 free_dst:
1754 	if (dst0)
1755 		dst_free(dst0);
1756 	dst0 = ERR_PTR(err);
1757 	goto out;
1758 }
1759 
1760 #ifdef CONFIG_XFRM_SUB_POLICY
1761 static int xfrm_dst_alloc_copy(void **target, const void *src, int size)
1762 {
1763 	if (!*target) {
1764 		*target = kmalloc(size, GFP_ATOMIC);
1765 		if (!*target)
1766 			return -ENOMEM;
1767 	}
1768 
1769 	memcpy(*target, src, size);
1770 	return 0;
1771 }
1772 #endif
1773 
1774 static int xfrm_dst_update_parent(struct dst_entry *dst,
1775 				  const struct xfrm_selector *sel)
1776 {
1777 #ifdef CONFIG_XFRM_SUB_POLICY
1778 	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1779 	return xfrm_dst_alloc_copy((void **)&(xdst->partner),
1780 				   sel, sizeof(*sel));
1781 #else
1782 	return 0;
1783 #endif
1784 }
1785 
1786 static int xfrm_dst_update_origin(struct dst_entry *dst,
1787 				  const struct flowi *fl)
1788 {
1789 #ifdef CONFIG_XFRM_SUB_POLICY
1790 	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1791 	return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
1792 #else
1793 	return 0;
1794 #endif
1795 }
1796 
1797 static int xfrm_expand_policies(const struct flowi *fl, u16 family,
1798 				struct xfrm_policy **pols,
1799 				int *num_pols, int *num_xfrms)
1800 {
1801 	int i;
1802 
1803 	if (*num_pols == 0 || !pols[0]) {
1804 		*num_pols = 0;
1805 		*num_xfrms = 0;
1806 		return 0;
1807 	}
1808 	if (IS_ERR(pols[0]))
1809 		return PTR_ERR(pols[0]);
1810 
1811 	*num_xfrms = pols[0]->xfrm_nr;
1812 
1813 #ifdef CONFIG_XFRM_SUB_POLICY
1814 	if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW &&
1815 	    pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1816 		pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
1817 						    XFRM_POLICY_TYPE_MAIN,
1818 						    fl, family,
1819 						    XFRM_POLICY_OUT);
1820 		if (pols[1]) {
1821 			if (IS_ERR(pols[1])) {
1822 				xfrm_pols_put(pols, *num_pols);
1823 				return PTR_ERR(pols[1]);
1824 			}
1825 			(*num_pols)++;
1826 			(*num_xfrms) += pols[1]->xfrm_nr;
1827 		}
1828 	}
1829 #endif
1830 	for (i = 0; i < *num_pols; i++) {
1831 		if (pols[i]->action != XFRM_POLICY_ALLOW) {
1832 			*num_xfrms = -1;
1833 			break;
1834 		}
1835 	}
1836 
1837 	return 0;
1838 
1839 }
1840 
1841 static struct xfrm_dst *
1842 xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1843 			       const struct flowi *fl, u16 family,
1844 			       struct dst_entry *dst_orig)
1845 {
1846 	struct net *net = xp_net(pols[0]);
1847 	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1848 	struct dst_entry *dst;
1849 	struct xfrm_dst *xdst;
1850 	int err;
1851 
1852 	/* Try to instantiate a bundle */
1853 	err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
1854 	if (err <= 0) {
1855 		if (err != 0 && err != -EAGAIN)
1856 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1857 		return ERR_PTR(err);
1858 	}
1859 
1860 	dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
1861 	if (IS_ERR(dst)) {
1862 		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1863 		return ERR_CAST(dst);
1864 	}
1865 
1866 	xdst = (struct xfrm_dst *)dst;
1867 	xdst->num_xfrms = err;
1868 	if (num_pols > 1)
1869 		err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1870 	else
1871 		err = xfrm_dst_update_origin(dst, fl);
1872 	if (unlikely(err)) {
1873 		dst_free(dst);
1874 		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1875 		return ERR_PTR(err);
1876 	}
1877 
1878 	xdst->num_pols = num_pols;
1879 	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
1880 	xdst->policy_genid = atomic_read(&pols[0]->genid);
1881 
1882 	return xdst;
1883 }
1884 
1885 static void xfrm_policy_queue_process(unsigned long arg)
1886 {
1887 	struct sk_buff *skb;
1888 	struct sock *sk;
1889 	struct dst_entry *dst;
1890 	struct xfrm_policy *pol = (struct xfrm_policy *)arg;
1891 	struct xfrm_policy_queue *pq = &pol->polq;
1892 	struct flowi fl;
1893 	struct sk_buff_head list;
1894 
1895 	spin_lock(&pq->hold_queue.lock);
1896 	skb = skb_peek(&pq->hold_queue);
1897 	if (!skb) {
1898 		spin_unlock(&pq->hold_queue.lock);
1899 		goto out;
1900 	}
1901 	dst = skb_dst(skb);
1902 	sk = skb->sk;
1903 	xfrm_decode_session(skb, &fl, dst->ops->family);
1904 	spin_unlock(&pq->hold_queue.lock);
1905 
1906 	dst_hold(dst->path);
1907 	dst = xfrm_lookup(xp_net(pol), dst->path, &fl,
1908 			  sk, 0);
1909 	if (IS_ERR(dst))
1910 		goto purge_queue;
1911 
1912 	if (dst->flags & DST_XFRM_QUEUE) {
1913 		dst_release(dst);
1914 
1915 		if (pq->timeout >= XFRM_QUEUE_TMO_MAX)
1916 			goto purge_queue;
1917 
1918 		pq->timeout = pq->timeout << 1;
1919 		if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout))
1920 			xfrm_pol_hold(pol);
1921 	goto out;
1922 	}
1923 
1924 	dst_release(dst);
1925 
1926 	__skb_queue_head_init(&list);
1927 
1928 	spin_lock(&pq->hold_queue.lock);
1929 	pq->timeout = 0;
1930 	skb_queue_splice_init(&pq->hold_queue, &list);
1931 	spin_unlock(&pq->hold_queue.lock);
1932 
1933 	while (!skb_queue_empty(&list)) {
1934 		skb = __skb_dequeue(&list);
1935 
1936 		xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
1937 		dst_hold(skb_dst(skb)->path);
1938 		dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path,
1939 				  &fl, skb->sk, 0);
1940 		if (IS_ERR(dst)) {
1941 			kfree_skb(skb);
1942 			continue;
1943 		}
1944 
1945 		nf_reset(skb);
1946 		skb_dst_drop(skb);
1947 		skb_dst_set(skb, dst);
1948 
1949 		dst_output(skb);
1950 	}
1951 
1952 out:
1953 	xfrm_pol_put(pol);
1954 	return;
1955 
1956 purge_queue:
1957 	pq->timeout = 0;
1958 	xfrm_queue_purge(&pq->hold_queue);
1959 	xfrm_pol_put(pol);
1960 }
1961 
1962 static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
1963 {
1964 	unsigned long sched_next;
1965 	struct dst_entry *dst = skb_dst(skb);
1966 	struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
1967 	struct xfrm_policy *pol = xdst->pols[0];
1968 	struct xfrm_policy_queue *pq = &pol->polq;
1969 
1970 	if (unlikely(skb_fclone_busy(sk, skb))) {
1971 		kfree_skb(skb);
1972 		return 0;
1973 	}
1974 
1975 	if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) {
1976 		kfree_skb(skb);
1977 		return -EAGAIN;
1978 	}
1979 
1980 	skb_dst_force(skb);
1981 
1982 	spin_lock_bh(&pq->hold_queue.lock);
1983 
1984 	if (!pq->timeout)
1985 		pq->timeout = XFRM_QUEUE_TMO_MIN;
1986 
1987 	sched_next = jiffies + pq->timeout;
1988 
1989 	if (del_timer(&pq->hold_timer)) {
1990 		if (time_before(pq->hold_timer.expires, sched_next))
1991 			sched_next = pq->hold_timer.expires;
1992 		xfrm_pol_put(pol);
1993 	}
1994 
1995 	__skb_queue_tail(&pq->hold_queue, skb);
1996 	if (!mod_timer(&pq->hold_timer, sched_next))
1997 		xfrm_pol_hold(pol);
1998 
1999 	spin_unlock_bh(&pq->hold_queue.lock);
2000 
2001 	return 0;
2002 }
2003 
2004 static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
2005 						 struct xfrm_flo *xflo,
2006 						 const struct flowi *fl,
2007 						 int num_xfrms,
2008 						 u16 family)
2009 {
2010 	int err;
2011 	struct net_device *dev;
2012 	struct dst_entry *dst;
2013 	struct dst_entry *dst1;
2014 	struct xfrm_dst *xdst;
2015 
2016 	xdst = xfrm_alloc_dst(net, family);
2017 	if (IS_ERR(xdst))
2018 		return xdst;
2019 
2020 	if (!(xflo->flags & XFRM_LOOKUP_QUEUE) ||
2021 	    net->xfrm.sysctl_larval_drop ||
2022 	    num_xfrms <= 0)
2023 		return xdst;
2024 
2025 	dst = xflo->dst_orig;
2026 	dst1 = &xdst->u.dst;
2027 	dst_hold(dst);
2028 	xdst->route = dst;
2029 
2030 	dst_copy_metrics(dst1, dst);
2031 
2032 	dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
2033 	dst1->flags |= DST_HOST | DST_XFRM_QUEUE;
2034 	dst1->lastuse = jiffies;
2035 
2036 	dst1->input = dst_discard;
2037 	dst1->output = xdst_queue_output;
2038 
2039 	dst_hold(dst);
2040 	dst1->child = dst;
2041 	dst1->path = dst;
2042 
2043 	xfrm_init_path((struct xfrm_dst *)dst1, dst, 0);
2044 
2045 	err = -ENODEV;
2046 	dev = dst->dev;
2047 	if (!dev)
2048 		goto free_dst;
2049 
2050 	err = xfrm_fill_dst(xdst, dev, fl);
2051 	if (err)
2052 		goto free_dst;
2053 
2054 out:
2055 	return xdst;
2056 
2057 free_dst:
2058 	dst_release(dst1);
2059 	xdst = ERR_PTR(err);
2060 	goto out;
2061 }
2062 
2063 static struct flow_cache_object *
2064 xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
2065 		   struct flow_cache_object *oldflo, void *ctx)
2066 {
2067 	struct xfrm_flo *xflo = (struct xfrm_flo *)ctx;
2068 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2069 	struct xfrm_dst *xdst, *new_xdst;
2070 	int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
2071 
2072 	/* Check if the policies from old bundle are usable */
2073 	xdst = NULL;
2074 	if (oldflo) {
2075 		xdst = container_of(oldflo, struct xfrm_dst, flo);
2076 		num_pols = xdst->num_pols;
2077 		num_xfrms = xdst->num_xfrms;
2078 		pol_dead = 0;
2079 		for (i = 0; i < num_pols; i++) {
2080 			pols[i] = xdst->pols[i];
2081 			pol_dead |= pols[i]->walk.dead;
2082 		}
2083 		if (pol_dead) {
2084 			dst_free(&xdst->u.dst);
2085 			xdst = NULL;
2086 			num_pols = 0;
2087 			num_xfrms = 0;
2088 			oldflo = NULL;
2089 		}
2090 	}
2091 
2092 	/* Resolve policies to use if we couldn't get them from
2093 	 * previous cache entry */
2094 	if (xdst == NULL) {
2095 		num_pols = 1;
2096 		pols[0] = __xfrm_policy_lookup(net, fl, family,
2097 					       flow_to_policy_dir(dir));
2098 		err = xfrm_expand_policies(fl, family, pols,
2099 					   &num_pols, &num_xfrms);
2100 		if (err < 0)
2101 			goto inc_error;
2102 		if (num_pols == 0)
2103 			return NULL;
2104 		if (num_xfrms <= 0)
2105 			goto make_dummy_bundle;
2106 	}
2107 
2108 	new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
2109 						  xflo->dst_orig);
2110 	if (IS_ERR(new_xdst)) {
2111 		err = PTR_ERR(new_xdst);
2112 		if (err != -EAGAIN)
2113 			goto error;
2114 		if (oldflo == NULL)
2115 			goto make_dummy_bundle;
2116 		dst_hold(&xdst->u.dst);
2117 		return oldflo;
2118 	} else if (new_xdst == NULL) {
2119 		num_xfrms = 0;
2120 		if (oldflo == NULL)
2121 			goto make_dummy_bundle;
2122 		xdst->num_xfrms = 0;
2123 		dst_hold(&xdst->u.dst);
2124 		return oldflo;
2125 	}
2126 
2127 	/* Kill the previous bundle */
2128 	if (xdst) {
2129 		/* The policies were stolen for newly generated bundle */
2130 		xdst->num_pols = 0;
2131 		dst_free(&xdst->u.dst);
2132 	}
2133 
2134 	/* Flow cache does not have reference, it dst_free()'s,
2135 	 * but we do need to return one reference for original caller */
2136 	dst_hold(&new_xdst->u.dst);
2137 	return &new_xdst->flo;
2138 
2139 make_dummy_bundle:
2140 	/* We found policies, but there's no bundles to instantiate:
2141 	 * either because the policy blocks, has no transformations or
2142 	 * we could not build template (no xfrm_states).*/
2143 	xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family);
2144 	if (IS_ERR(xdst)) {
2145 		xfrm_pols_put(pols, num_pols);
2146 		return ERR_CAST(xdst);
2147 	}
2148 	xdst->num_pols = num_pols;
2149 	xdst->num_xfrms = num_xfrms;
2150 	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
2151 
2152 	dst_hold(&xdst->u.dst);
2153 	return &xdst->flo;
2154 
2155 inc_error:
2156 	XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
2157 error:
2158 	if (xdst != NULL)
2159 		dst_free(&xdst->u.dst);
2160 	else
2161 		xfrm_pols_put(pols, num_pols);
2162 	return ERR_PTR(err);
2163 }
2164 
2165 static struct dst_entry *make_blackhole(struct net *net, u16 family,
2166 					struct dst_entry *dst_orig)
2167 {
2168 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
2169 	struct dst_entry *ret;
2170 
2171 	if (!afinfo) {
2172 		dst_release(dst_orig);
2173 		return ERR_PTR(-EINVAL);
2174 	} else {
2175 		ret = afinfo->blackhole_route(net, dst_orig);
2176 	}
2177 	xfrm_policy_put_afinfo(afinfo);
2178 
2179 	return ret;
2180 }
2181 
2182 /* Main function: finds/creates a bundle for given flow.
2183  *
2184  * At the moment we eat a raw IP route. Mostly to speed up lookups
2185  * on interfaces with disabled IPsec.
2186  */
2187 struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
2188 			      const struct flowi *fl,
2189 			      struct sock *sk, int flags)
2190 {
2191 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2192 	struct flow_cache_object *flo;
2193 	struct xfrm_dst *xdst;
2194 	struct dst_entry *dst, *route;
2195 	u16 family = dst_orig->ops->family;
2196 	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
2197 	int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
2198 
2199 	dst = NULL;
2200 	xdst = NULL;
2201 	route = NULL;
2202 
2203 	if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
2204 		num_pols = 1;
2205 		pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
2206 		err = xfrm_expand_policies(fl, family, pols,
2207 					   &num_pols, &num_xfrms);
2208 		if (err < 0)
2209 			goto dropdst;
2210 
2211 		if (num_pols) {
2212 			if (num_xfrms <= 0) {
2213 				drop_pols = num_pols;
2214 				goto no_transform;
2215 			}
2216 
2217 			xdst = xfrm_resolve_and_create_bundle(
2218 					pols, num_pols, fl,
2219 					family, dst_orig);
2220 			if (IS_ERR(xdst)) {
2221 				xfrm_pols_put(pols, num_pols);
2222 				err = PTR_ERR(xdst);
2223 				goto dropdst;
2224 			} else if (xdst == NULL) {
2225 				num_xfrms = 0;
2226 				drop_pols = num_pols;
2227 				goto no_transform;
2228 			}
2229 
2230 			dst_hold(&xdst->u.dst);
2231 			xdst->u.dst.flags |= DST_NOCACHE;
2232 			route = xdst->route;
2233 		}
2234 	}
2235 
2236 	if (xdst == NULL) {
2237 		struct xfrm_flo xflo;
2238 
2239 		xflo.dst_orig = dst_orig;
2240 		xflo.flags = flags;
2241 
2242 		/* To accelerate a bit...  */
2243 		if ((dst_orig->flags & DST_NOXFRM) ||
2244 		    !net->xfrm.policy_count[XFRM_POLICY_OUT])
2245 			goto nopol;
2246 
2247 		flo = flow_cache_lookup(net, fl, family, dir,
2248 					xfrm_bundle_lookup, &xflo);
2249 		if (flo == NULL)
2250 			goto nopol;
2251 		if (IS_ERR(flo)) {
2252 			err = PTR_ERR(flo);
2253 			goto dropdst;
2254 		}
2255 		xdst = container_of(flo, struct xfrm_dst, flo);
2256 
2257 		num_pols = xdst->num_pols;
2258 		num_xfrms = xdst->num_xfrms;
2259 		memcpy(pols, xdst->pols, sizeof(struct xfrm_policy *) * num_pols);
2260 		route = xdst->route;
2261 	}
2262 
2263 	dst = &xdst->u.dst;
2264 	if (route == NULL && num_xfrms > 0) {
2265 		/* The only case when xfrm_bundle_lookup() returns a
2266 		 * bundle with null route, is when the template could
2267 		 * not be resolved. It means policies are there, but
2268 		 * bundle could not be created, since we don't yet
2269 		 * have the xfrm_state's. We need to wait for KM to
2270 		 * negotiate new SA's or bail out with error.*/
2271 		if (net->xfrm.sysctl_larval_drop) {
2272 			dst_release(dst);
2273 			xfrm_pols_put(pols, drop_pols);
2274 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
2275 
2276 			return ERR_PTR(-EREMOTE);
2277 		}
2278 
2279 		err = -EAGAIN;
2280 
2281 		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
2282 		goto error;
2283 	}
2284 
2285 no_transform:
2286 	if (num_pols == 0)
2287 		goto nopol;
2288 
2289 	if ((flags & XFRM_LOOKUP_ICMP) &&
2290 	    !(pols[0]->flags & XFRM_POLICY_ICMP)) {
2291 		err = -ENOENT;
2292 		goto error;
2293 	}
2294 
2295 	for (i = 0; i < num_pols; i++)
2296 		pols[i]->curlft.use_time = get_seconds();
2297 
2298 	if (num_xfrms < 0) {
2299 		/* Prohibit the flow */
2300 		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
2301 		err = -EPERM;
2302 		goto error;
2303 	} else if (num_xfrms > 0) {
2304 		/* Flow transformed */
2305 		dst_release(dst_orig);
2306 	} else {
2307 		/* Flow passes untransformed */
2308 		dst_release(dst);
2309 		dst = dst_orig;
2310 	}
2311 ok:
2312 	xfrm_pols_put(pols, drop_pols);
2313 	if (dst && dst->xfrm &&
2314 	    dst->xfrm->props.mode == XFRM_MODE_TUNNEL)
2315 		dst->flags |= DST_XFRM_TUNNEL;
2316 	return dst;
2317 
2318 nopol:
2319 	if (!(flags & XFRM_LOOKUP_ICMP)) {
2320 		dst = dst_orig;
2321 		goto ok;
2322 	}
2323 	err = -ENOENT;
2324 error:
2325 	dst_release(dst);
2326 dropdst:
2327 	dst_release(dst_orig);
2328 	xfrm_pols_put(pols, drop_pols);
2329 	return ERR_PTR(err);
2330 }
2331 EXPORT_SYMBOL(xfrm_lookup);
2332 
2333 /* Callers of xfrm_lookup_route() must ensure a call to dst_output().
2334  * Otherwise we may send out blackholed packets.
2335  */
2336 struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
2337 				    const struct flowi *fl,
2338 				    struct sock *sk, int flags)
2339 {
2340 	struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk,
2341 					    flags | XFRM_LOOKUP_QUEUE);
2342 
2343 	if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE)
2344 		return make_blackhole(net, dst_orig->ops->family, dst_orig);
2345 
2346 	return dst;
2347 }
2348 EXPORT_SYMBOL(xfrm_lookup_route);
2349 
2350 static inline int
2351 xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
2352 {
2353 	struct xfrm_state *x;
2354 
2355 	if (!skb->sp || idx < 0 || idx >= skb->sp->len)
2356 		return 0;
2357 	x = skb->sp->xvec[idx];
2358 	if (!x->type->reject)
2359 		return 0;
2360 	return x->type->reject(x, skb, fl);
2361 }
2362 
2363 /* When skb is transformed back to its "native" form, we have to
2364  * check policy restrictions. At the moment we make this in maximally
2365  * stupid way. Shame on me. :-) Of course, connected sockets must
2366  * have policy cached at them.
2367  */
2368 
2369 static inline int
2370 xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
2371 	      unsigned short family)
2372 {
2373 	if (xfrm_state_kern(x))
2374 		return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
2375 	return	x->id.proto == tmpl->id.proto &&
2376 		(x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
2377 		(x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
2378 		x->props.mode == tmpl->mode &&
2379 		(tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
2380 		 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
2381 		!(x->props.mode != XFRM_MODE_TRANSPORT &&
2382 		  xfrm_state_addr_cmp(tmpl, x, family));
2383 }
2384 
2385 /*
2386  * 0 or more than 0 is returned when validation is succeeded (either bypass
2387  * because of optional transport mode, or next index of the mathced secpath
2388  * state with the template.
2389  * -1 is returned when no matching template is found.
2390  * Otherwise "-2 - errored_index" is returned.
2391  */
2392 static inline int
2393 xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start,
2394 	       unsigned short family)
2395 {
2396 	int idx = start;
2397 
2398 	if (tmpl->optional) {
2399 		if (tmpl->mode == XFRM_MODE_TRANSPORT)
2400 			return start;
2401 	} else
2402 		start = -1;
2403 	for (; idx < sp->len; idx++) {
2404 		if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
2405 			return ++idx;
2406 		if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
2407 			if (start == -1)
2408 				start = -2-idx;
2409 			break;
2410 		}
2411 	}
2412 	return start;
2413 }
2414 
2415 int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
2416 			  unsigned int family, int reverse)
2417 {
2418 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
2419 	int err;
2420 
2421 	if (unlikely(afinfo == NULL))
2422 		return -EAFNOSUPPORT;
2423 
2424 	afinfo->decode_session(skb, fl, reverse);
2425 	err = security_xfrm_decode_session(skb, &fl->flowi_secid);
2426 	xfrm_policy_put_afinfo(afinfo);
2427 	return err;
2428 }
2429 EXPORT_SYMBOL(__xfrm_decode_session);
2430 
2431 static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp)
2432 {
2433 	for (; k < sp->len; k++) {
2434 		if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
2435 			*idxp = k;
2436 			return 1;
2437 		}
2438 	}
2439 
2440 	return 0;
2441 }
2442 
2443 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
2444 			unsigned short family)
2445 {
2446 	struct net *net = dev_net(skb->dev);
2447 	struct xfrm_policy *pol;
2448 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2449 	int npols = 0;
2450 	int xfrm_nr;
2451 	int pi;
2452 	int reverse;
2453 	struct flowi fl;
2454 	u8 fl_dir;
2455 	int xerr_idx = -1;
2456 
2457 	reverse = dir & ~XFRM_POLICY_MASK;
2458 	dir &= XFRM_POLICY_MASK;
2459 	fl_dir = policy_to_flow_dir(dir);
2460 
2461 	if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
2462 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
2463 		return 0;
2464 	}
2465 
2466 	nf_nat_decode_session(skb, &fl, family);
2467 
2468 	/* First, check used SA against their selectors. */
2469 	if (skb->sp) {
2470 		int i;
2471 
2472 		for (i = skb->sp->len-1; i >= 0; i--) {
2473 			struct xfrm_state *x = skb->sp->xvec[i];
2474 			if (!xfrm_selector_match(&x->sel, &fl, family)) {
2475 				XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
2476 				return 0;
2477 			}
2478 		}
2479 	}
2480 
2481 	pol = NULL;
2482 	if (sk && sk->sk_policy[dir]) {
2483 		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
2484 		if (IS_ERR(pol)) {
2485 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2486 			return 0;
2487 		}
2488 	}
2489 
2490 	if (!pol) {
2491 		struct flow_cache_object *flo;
2492 
2493 		flo = flow_cache_lookup(net, &fl, family, fl_dir,
2494 					xfrm_policy_lookup, NULL);
2495 		if (IS_ERR_OR_NULL(flo))
2496 			pol = ERR_CAST(flo);
2497 		else
2498 			pol = container_of(flo, struct xfrm_policy, flo);
2499 	}
2500 
2501 	if (IS_ERR(pol)) {
2502 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2503 		return 0;
2504 	}
2505 
2506 	if (!pol) {
2507 		if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
2508 			xfrm_secpath_reject(xerr_idx, skb, &fl);
2509 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
2510 			return 0;
2511 		}
2512 		return 1;
2513 	}
2514 
2515 	pol->curlft.use_time = get_seconds();
2516 
2517 	pols[0] = pol;
2518 	npols++;
2519 #ifdef CONFIG_XFRM_SUB_POLICY
2520 	if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
2521 		pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN,
2522 						    &fl, family,
2523 						    XFRM_POLICY_IN);
2524 		if (pols[1]) {
2525 			if (IS_ERR(pols[1])) {
2526 				XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2527 				return 0;
2528 			}
2529 			pols[1]->curlft.use_time = get_seconds();
2530 			npols++;
2531 		}
2532 	}
2533 #endif
2534 
2535 	if (pol->action == XFRM_POLICY_ALLOW) {
2536 		struct sec_path *sp;
2537 		static struct sec_path dummy;
2538 		struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
2539 		struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
2540 		struct xfrm_tmpl **tpp = tp;
2541 		int ti = 0;
2542 		int i, k;
2543 
2544 		if ((sp = skb->sp) == NULL)
2545 			sp = &dummy;
2546 
2547 		for (pi = 0; pi < npols; pi++) {
2548 			if (pols[pi] != pol &&
2549 			    pols[pi]->action != XFRM_POLICY_ALLOW) {
2550 				XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2551 				goto reject;
2552 			}
2553 			if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
2554 				XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
2555 				goto reject_error;
2556 			}
2557 			for (i = 0; i < pols[pi]->xfrm_nr; i++)
2558 				tpp[ti++] = &pols[pi]->xfrm_vec[i];
2559 		}
2560 		xfrm_nr = ti;
2561 		if (npols > 1) {
2562 			xfrm_tmpl_sort(stp, tpp, xfrm_nr, family, net);
2563 			tpp = stp;
2564 		}
2565 
2566 		/* For each tunnel xfrm, find the first matching tmpl.
2567 		 * For each tmpl before that, find corresponding xfrm.
2568 		 * Order is _important_. Later we will implement
2569 		 * some barriers, but at the moment barriers
2570 		 * are implied between each two transformations.
2571 		 */
2572 		for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
2573 			k = xfrm_policy_ok(tpp[i], sp, k, family);
2574 			if (k < 0) {
2575 				if (k < -1)
2576 					/* "-2 - errored_index" returned */
2577 					xerr_idx = -(2+k);
2578 				XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
2579 				goto reject;
2580 			}
2581 		}
2582 
2583 		if (secpath_has_nontransport(sp, k, &xerr_idx)) {
2584 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
2585 			goto reject;
2586 		}
2587 
2588 		xfrm_pols_put(pols, npols);
2589 		return 1;
2590 	}
2591 	XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2592 
2593 reject:
2594 	xfrm_secpath_reject(xerr_idx, skb, &fl);
2595 reject_error:
2596 	xfrm_pols_put(pols, npols);
2597 	return 0;
2598 }
2599 EXPORT_SYMBOL(__xfrm_policy_check);
2600 
2601 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
2602 {
2603 	struct net *net = dev_net(skb->dev);
2604 	struct flowi fl;
2605 	struct dst_entry *dst;
2606 	int res = 1;
2607 
2608 	if (xfrm_decode_session(skb, &fl, family) < 0) {
2609 		XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
2610 		return 0;
2611 	}
2612 
2613 	skb_dst_force(skb);
2614 
2615 	dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE);
2616 	if (IS_ERR(dst)) {
2617 		res = 0;
2618 		dst = NULL;
2619 	}
2620 	skb_dst_set(skb, dst);
2621 	return res;
2622 }
2623 EXPORT_SYMBOL(__xfrm_route_forward);
2624 
2625 /* Optimize later using cookies and generation ids. */
2626 
2627 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
2628 {
2629 	/* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
2630 	 * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to
2631 	 * get validated by dst_ops->check on every use.  We do this
2632 	 * because when a normal route referenced by an XFRM dst is
2633 	 * obsoleted we do not go looking around for all parent
2634 	 * referencing XFRM dsts so that we can invalidate them.  It
2635 	 * is just too much work.  Instead we make the checks here on
2636 	 * every use.  For example:
2637 	 *
2638 	 *	XFRM dst A --> IPv4 dst X
2639 	 *
2640 	 * X is the "xdst->route" of A (X is also the "dst->path" of A
2641 	 * in this example).  If X is marked obsolete, "A" will not
2642 	 * notice.  That's what we are validating here via the
2643 	 * stale_bundle() check.
2644 	 *
2645 	 * When a policy's bundle is pruned, we dst_free() the XFRM
2646 	 * dst which causes it's ->obsolete field to be set to
2647 	 * DST_OBSOLETE_DEAD.  If an XFRM dst has been pruned like
2648 	 * this, we want to force a new route lookup.
2649 	 */
2650 	if (dst->obsolete < 0 && !stale_bundle(dst))
2651 		return dst;
2652 
2653 	return NULL;
2654 }
2655 
2656 static int stale_bundle(struct dst_entry *dst)
2657 {
2658 	return !xfrm_bundle_ok((struct xfrm_dst *)dst);
2659 }
2660 
2661 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
2662 {
2663 	while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2664 		dst->dev = dev_net(dev)->loopback_dev;
2665 		dev_hold(dst->dev);
2666 		dev_put(dev);
2667 	}
2668 }
2669 EXPORT_SYMBOL(xfrm_dst_ifdown);
2670 
2671 static void xfrm_link_failure(struct sk_buff *skb)
2672 {
2673 	/* Impossible. Such dst must be popped before reaches point of failure. */
2674 }
2675 
2676 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2677 {
2678 	if (dst) {
2679 		if (dst->obsolete) {
2680 			dst_release(dst);
2681 			dst = NULL;
2682 		}
2683 	}
2684 	return dst;
2685 }
2686 
2687 void xfrm_garbage_collect(struct net *net)
2688 {
2689 	flow_cache_flush(net);
2690 }
2691 EXPORT_SYMBOL(xfrm_garbage_collect);
2692 
2693 static void xfrm_garbage_collect_deferred(struct net *net)
2694 {
2695 	flow_cache_flush_deferred(net);
2696 }
2697 
2698 static void xfrm_init_pmtu(struct dst_entry *dst)
2699 {
2700 	do {
2701 		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2702 		u32 pmtu, route_mtu_cached;
2703 
2704 		pmtu = dst_mtu(dst->child);
2705 		xdst->child_mtu_cached = pmtu;
2706 
2707 		pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
2708 
2709 		route_mtu_cached = dst_mtu(xdst->route);
2710 		xdst->route_mtu_cached = route_mtu_cached;
2711 
2712 		if (pmtu > route_mtu_cached)
2713 			pmtu = route_mtu_cached;
2714 
2715 		dst_metric_set(dst, RTAX_MTU, pmtu);
2716 	} while ((dst = dst->next));
2717 }
2718 
2719 /* Check that the bundle accepts the flow and its components are
2720  * still valid.
2721  */
2722 
2723 static int xfrm_bundle_ok(struct xfrm_dst *first)
2724 {
2725 	struct dst_entry *dst = &first->u.dst;
2726 	struct xfrm_dst *last;
2727 	u32 mtu;
2728 
2729 	if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
2730 	    (dst->dev && !netif_running(dst->dev)))
2731 		return 0;
2732 
2733 	if (dst->flags & DST_XFRM_QUEUE)
2734 		return 1;
2735 
2736 	last = NULL;
2737 
2738 	do {
2739 		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2740 
2741 		if (dst->xfrm->km.state != XFRM_STATE_VALID)
2742 			return 0;
2743 		if (xdst->xfrm_genid != dst->xfrm->genid)
2744 			return 0;
2745 		if (xdst->num_pols > 0 &&
2746 		    xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
2747 			return 0;
2748 
2749 		mtu = dst_mtu(dst->child);
2750 		if (xdst->child_mtu_cached != mtu) {
2751 			last = xdst;
2752 			xdst->child_mtu_cached = mtu;
2753 		}
2754 
2755 		if (!dst_check(xdst->route, xdst->route_cookie))
2756 			return 0;
2757 		mtu = dst_mtu(xdst->route);
2758 		if (xdst->route_mtu_cached != mtu) {
2759 			last = xdst;
2760 			xdst->route_mtu_cached = mtu;
2761 		}
2762 
2763 		dst = dst->child;
2764 	} while (dst->xfrm);
2765 
2766 	if (likely(!last))
2767 		return 1;
2768 
2769 	mtu = last->child_mtu_cached;
2770 	for (;;) {
2771 		dst = &last->u.dst;
2772 
2773 		mtu = xfrm_state_mtu(dst->xfrm, mtu);
2774 		if (mtu > last->route_mtu_cached)
2775 			mtu = last->route_mtu_cached;
2776 		dst_metric_set(dst, RTAX_MTU, mtu);
2777 
2778 		if (last == first)
2779 			break;
2780 
2781 		last = (struct xfrm_dst *)last->u.dst.next;
2782 		last->child_mtu_cached = mtu;
2783 	}
2784 
2785 	return 1;
2786 }
2787 
2788 static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
2789 {
2790 	return dst_metric_advmss(dst->path);
2791 }
2792 
2793 static unsigned int xfrm_mtu(const struct dst_entry *dst)
2794 {
2795 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2796 
2797 	return mtu ? : dst_mtu(dst->path);
2798 }
2799 
2800 static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
2801 					   struct sk_buff *skb,
2802 					   const void *daddr)
2803 {
2804 	return dst->path->ops->neigh_lookup(dst, skb, daddr);
2805 }
2806 
2807 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2808 {
2809 	struct net *net;
2810 	int err = 0;
2811 	if (unlikely(afinfo == NULL))
2812 		return -EINVAL;
2813 	if (unlikely(afinfo->family >= NPROTO))
2814 		return -EAFNOSUPPORT;
2815 	spin_lock(&xfrm_policy_afinfo_lock);
2816 	if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2817 		err = -ENOBUFS;
2818 	else {
2819 		struct dst_ops *dst_ops = afinfo->dst_ops;
2820 		if (likely(dst_ops->kmem_cachep == NULL))
2821 			dst_ops->kmem_cachep = xfrm_dst_cache;
2822 		if (likely(dst_ops->check == NULL))
2823 			dst_ops->check = xfrm_dst_check;
2824 		if (likely(dst_ops->default_advmss == NULL))
2825 			dst_ops->default_advmss = xfrm_default_advmss;
2826 		if (likely(dst_ops->mtu == NULL))
2827 			dst_ops->mtu = xfrm_mtu;
2828 		if (likely(dst_ops->negative_advice == NULL))
2829 			dst_ops->negative_advice = xfrm_negative_advice;
2830 		if (likely(dst_ops->link_failure == NULL))
2831 			dst_ops->link_failure = xfrm_link_failure;
2832 		if (likely(dst_ops->neigh_lookup == NULL))
2833 			dst_ops->neigh_lookup = xfrm_neigh_lookup;
2834 		if (likely(afinfo->garbage_collect == NULL))
2835 			afinfo->garbage_collect = xfrm_garbage_collect_deferred;
2836 		rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo);
2837 	}
2838 	spin_unlock(&xfrm_policy_afinfo_lock);
2839 
2840 	rtnl_lock();
2841 	for_each_net(net) {
2842 		struct dst_ops *xfrm_dst_ops;
2843 
2844 		switch (afinfo->family) {
2845 		case AF_INET:
2846 			xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops;
2847 			break;
2848 #if IS_ENABLED(CONFIG_IPV6)
2849 		case AF_INET6:
2850 			xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops;
2851 			break;
2852 #endif
2853 		default:
2854 			BUG();
2855 		}
2856 		*xfrm_dst_ops = *afinfo->dst_ops;
2857 	}
2858 	rtnl_unlock();
2859 
2860 	return err;
2861 }
2862 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
2863 
2864 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
2865 {
2866 	int err = 0;
2867 	if (unlikely(afinfo == NULL))
2868 		return -EINVAL;
2869 	if (unlikely(afinfo->family >= NPROTO))
2870 		return -EAFNOSUPPORT;
2871 	spin_lock(&xfrm_policy_afinfo_lock);
2872 	if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
2873 		if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
2874 			err = -EINVAL;
2875 		else
2876 			RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family],
2877 					 NULL);
2878 	}
2879 	spin_unlock(&xfrm_policy_afinfo_lock);
2880 	if (!err) {
2881 		struct dst_ops *dst_ops = afinfo->dst_ops;
2882 
2883 		synchronize_rcu();
2884 
2885 		dst_ops->kmem_cachep = NULL;
2886 		dst_ops->check = NULL;
2887 		dst_ops->negative_advice = NULL;
2888 		dst_ops->link_failure = NULL;
2889 		afinfo->garbage_collect = NULL;
2890 	}
2891 	return err;
2892 }
2893 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
2894 
2895 static void __net_init xfrm_dst_ops_init(struct net *net)
2896 {
2897 	struct xfrm_policy_afinfo *afinfo;
2898 
2899 	rcu_read_lock();
2900 	afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]);
2901 	if (afinfo)
2902 		net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
2903 #if IS_ENABLED(CONFIG_IPV6)
2904 	afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]);
2905 	if (afinfo)
2906 		net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
2907 #endif
2908 	rcu_read_unlock();
2909 }
2910 
2911 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2912 {
2913 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2914 
2915 	switch (event) {
2916 	case NETDEV_DOWN:
2917 		xfrm_garbage_collect(dev_net(dev));
2918 	}
2919 	return NOTIFY_DONE;
2920 }
2921 
2922 static struct notifier_block xfrm_dev_notifier = {
2923 	.notifier_call	= xfrm_dev_event,
2924 };
2925 
2926 #ifdef CONFIG_XFRM_STATISTICS
2927 static int __net_init xfrm_statistics_init(struct net *net)
2928 {
2929 	int rv;
2930 	net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib);
2931 	if (!net->mib.xfrm_statistics)
2932 		return -ENOMEM;
2933 	rv = xfrm_proc_init(net);
2934 	if (rv < 0)
2935 		free_percpu(net->mib.xfrm_statistics);
2936 	return rv;
2937 }
2938 
2939 static void xfrm_statistics_fini(struct net *net)
2940 {
2941 	xfrm_proc_fini(net);
2942 	free_percpu(net->mib.xfrm_statistics);
2943 }
2944 #else
2945 static int __net_init xfrm_statistics_init(struct net *net)
2946 {
2947 	return 0;
2948 }
2949 
2950 static void xfrm_statistics_fini(struct net *net)
2951 {
2952 }
2953 #endif
2954 
2955 static int __net_init xfrm_policy_init(struct net *net)
2956 {
2957 	unsigned int hmask, sz;
2958 	int dir;
2959 
2960 	if (net_eq(net, &init_net))
2961 		xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
2962 					   sizeof(struct xfrm_dst),
2963 					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2964 					   NULL);
2965 
2966 	hmask = 8 - 1;
2967 	sz = (hmask+1) * sizeof(struct hlist_head);
2968 
2969 	net->xfrm.policy_byidx = xfrm_hash_alloc(sz);
2970 	if (!net->xfrm.policy_byidx)
2971 		goto out_byidx;
2972 	net->xfrm.policy_idx_hmask = hmask;
2973 
2974 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
2975 		struct xfrm_policy_hash *htab;
2976 
2977 		net->xfrm.policy_count[dir] = 0;
2978 		net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0;
2979 		INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
2980 
2981 		htab = &net->xfrm.policy_bydst[dir];
2982 		htab->table = xfrm_hash_alloc(sz);
2983 		if (!htab->table)
2984 			goto out_bydst;
2985 		htab->hmask = hmask;
2986 		htab->dbits4 = 32;
2987 		htab->sbits4 = 32;
2988 		htab->dbits6 = 128;
2989 		htab->sbits6 = 128;
2990 	}
2991 	net->xfrm.policy_hthresh.lbits4 = 32;
2992 	net->xfrm.policy_hthresh.rbits4 = 32;
2993 	net->xfrm.policy_hthresh.lbits6 = 128;
2994 	net->xfrm.policy_hthresh.rbits6 = 128;
2995 
2996 	seqlock_init(&net->xfrm.policy_hthresh.lock);
2997 
2998 	INIT_LIST_HEAD(&net->xfrm.policy_all);
2999 	INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
3000 	INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild);
3001 	if (net_eq(net, &init_net))
3002 		register_netdevice_notifier(&xfrm_dev_notifier);
3003 	return 0;
3004 
3005 out_bydst:
3006 	for (dir--; dir >= 0; dir--) {
3007 		struct xfrm_policy_hash *htab;
3008 
3009 		htab = &net->xfrm.policy_bydst[dir];
3010 		xfrm_hash_free(htab->table, sz);
3011 	}
3012 	xfrm_hash_free(net->xfrm.policy_byidx, sz);
3013 out_byidx:
3014 	return -ENOMEM;
3015 }
3016 
3017 static void xfrm_policy_fini(struct net *net)
3018 {
3019 	unsigned int sz;
3020 	int dir;
3021 
3022 	flush_work(&net->xfrm.policy_hash_work);
3023 #ifdef CONFIG_XFRM_SUB_POLICY
3024 	xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false);
3025 #endif
3026 	xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false);
3027 
3028 	WARN_ON(!list_empty(&net->xfrm.policy_all));
3029 
3030 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
3031 		struct xfrm_policy_hash *htab;
3032 
3033 		WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
3034 
3035 		htab = &net->xfrm.policy_bydst[dir];
3036 		sz = (htab->hmask + 1) * sizeof(struct hlist_head);
3037 		WARN_ON(!hlist_empty(htab->table));
3038 		xfrm_hash_free(htab->table, sz);
3039 	}
3040 
3041 	sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
3042 	WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
3043 	xfrm_hash_free(net->xfrm.policy_byidx, sz);
3044 }
3045 
3046 static int __net_init xfrm_net_init(struct net *net)
3047 {
3048 	int rv;
3049 
3050 	rv = xfrm_statistics_init(net);
3051 	if (rv < 0)
3052 		goto out_statistics;
3053 	rv = xfrm_state_init(net);
3054 	if (rv < 0)
3055 		goto out_state;
3056 	rv = xfrm_policy_init(net);
3057 	if (rv < 0)
3058 		goto out_policy;
3059 	xfrm_dst_ops_init(net);
3060 	rv = xfrm_sysctl_init(net);
3061 	if (rv < 0)
3062 		goto out_sysctl;
3063 	rv = flow_cache_init(net);
3064 	if (rv < 0)
3065 		goto out;
3066 
3067 	/* Initialize the per-net locks here */
3068 	spin_lock_init(&net->xfrm.xfrm_state_lock);
3069 	rwlock_init(&net->xfrm.xfrm_policy_lock);
3070 	mutex_init(&net->xfrm.xfrm_cfg_mutex);
3071 
3072 	return 0;
3073 
3074 out:
3075 	xfrm_sysctl_fini(net);
3076 out_sysctl:
3077 	xfrm_policy_fini(net);
3078 out_policy:
3079 	xfrm_state_fini(net);
3080 out_state:
3081 	xfrm_statistics_fini(net);
3082 out_statistics:
3083 	return rv;
3084 }
3085 
3086 static void __net_exit xfrm_net_exit(struct net *net)
3087 {
3088 	flow_cache_fini(net);
3089 	xfrm_sysctl_fini(net);
3090 	xfrm_policy_fini(net);
3091 	xfrm_state_fini(net);
3092 	xfrm_statistics_fini(net);
3093 }
3094 
3095 static struct pernet_operations __net_initdata xfrm_net_ops = {
3096 	.init = xfrm_net_init,
3097 	.exit = xfrm_net_exit,
3098 };
3099 
3100 void __init xfrm_init(void)
3101 {
3102 	register_pernet_subsys(&xfrm_net_ops);
3103 	xfrm_input_init();
3104 }
3105 
3106 #ifdef CONFIG_AUDITSYSCALL
3107 static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
3108 					 struct audit_buffer *audit_buf)
3109 {
3110 	struct xfrm_sec_ctx *ctx = xp->security;
3111 	struct xfrm_selector *sel = &xp->selector;
3112 
3113 	if (ctx)
3114 		audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
3115 				 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
3116 
3117 	switch (sel->family) {
3118 	case AF_INET:
3119 		audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4);
3120 		if (sel->prefixlen_s != 32)
3121 			audit_log_format(audit_buf, " src_prefixlen=%d",
3122 					 sel->prefixlen_s);
3123 		audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4);
3124 		if (sel->prefixlen_d != 32)
3125 			audit_log_format(audit_buf, " dst_prefixlen=%d",
3126 					 sel->prefixlen_d);
3127 		break;
3128 	case AF_INET6:
3129 		audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6);
3130 		if (sel->prefixlen_s != 128)
3131 			audit_log_format(audit_buf, " src_prefixlen=%d",
3132 					 sel->prefixlen_s);
3133 		audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6);
3134 		if (sel->prefixlen_d != 128)
3135 			audit_log_format(audit_buf, " dst_prefixlen=%d",
3136 					 sel->prefixlen_d);
3137 		break;
3138 	}
3139 }
3140 
3141 void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid)
3142 {
3143 	struct audit_buffer *audit_buf;
3144 
3145 	audit_buf = xfrm_audit_start("SPD-add");
3146 	if (audit_buf == NULL)
3147 		return;
3148 	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
3149 	audit_log_format(audit_buf, " res=%u", result);
3150 	xfrm_audit_common_policyinfo(xp, audit_buf);
3151 	audit_log_end(audit_buf);
3152 }
3153 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
3154 
3155 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
3156 			      bool task_valid)
3157 {
3158 	struct audit_buffer *audit_buf;
3159 
3160 	audit_buf = xfrm_audit_start("SPD-delete");
3161 	if (audit_buf == NULL)
3162 		return;
3163 	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
3164 	audit_log_format(audit_buf, " res=%u", result);
3165 	xfrm_audit_common_policyinfo(xp, audit_buf);
3166 	audit_log_end(audit_buf);
3167 }
3168 EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
3169 #endif
3170 
3171 #ifdef CONFIG_XFRM_MIGRATE
3172 static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
3173 					const struct xfrm_selector *sel_tgt)
3174 {
3175 	if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
3176 		if (sel_tgt->family == sel_cmp->family &&
3177 		    xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr,
3178 				    sel_cmp->family) &&
3179 		    xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr,
3180 				    sel_cmp->family) &&
3181 		    sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
3182 		    sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
3183 			return true;
3184 		}
3185 	} else {
3186 		if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
3187 			return true;
3188 		}
3189 	}
3190 	return false;
3191 }
3192 
3193 static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel,
3194 						    u8 dir, u8 type, struct net *net)
3195 {
3196 	struct xfrm_policy *pol, *ret = NULL;
3197 	struct hlist_head *chain;
3198 	u32 priority = ~0U;
3199 
3200 	read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/
3201 	chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
3202 	hlist_for_each_entry(pol, chain, bydst) {
3203 		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
3204 		    pol->type == type) {
3205 			ret = pol;
3206 			priority = ret->priority;
3207 			break;
3208 		}
3209 	}
3210 	chain = &net->xfrm.policy_inexact[dir];
3211 	hlist_for_each_entry(pol, chain, bydst) {
3212 		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
3213 		    pol->type == type &&
3214 		    pol->priority < priority) {
3215 			ret = pol;
3216 			break;
3217 		}
3218 	}
3219 
3220 	if (ret)
3221 		xfrm_pol_hold(ret);
3222 
3223 	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
3224 
3225 	return ret;
3226 }
3227 
3228 static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
3229 {
3230 	int match = 0;
3231 
3232 	if (t->mode == m->mode && t->id.proto == m->proto &&
3233 	    (m->reqid == 0 || t->reqid == m->reqid)) {
3234 		switch (t->mode) {
3235 		case XFRM_MODE_TUNNEL:
3236 		case XFRM_MODE_BEET:
3237 			if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr,
3238 					    m->old_family) &&
3239 			    xfrm_addr_equal(&t->saddr, &m->old_saddr,
3240 					    m->old_family)) {
3241 				match = 1;
3242 			}
3243 			break;
3244 		case XFRM_MODE_TRANSPORT:
3245 			/* in case of transport mode, template does not store
3246 			   any IP addresses, hence we just compare mode and
3247 			   protocol */
3248 			match = 1;
3249 			break;
3250 		default:
3251 			break;
3252 		}
3253 	}
3254 	return match;
3255 }
3256 
3257 /* update endpoint address(es) of template(s) */
3258 static int xfrm_policy_migrate(struct xfrm_policy *pol,
3259 			       struct xfrm_migrate *m, int num_migrate)
3260 {
3261 	struct xfrm_migrate *mp;
3262 	int i, j, n = 0;
3263 
3264 	write_lock_bh(&pol->lock);
3265 	if (unlikely(pol->walk.dead)) {
3266 		/* target policy has been deleted */
3267 		write_unlock_bh(&pol->lock);
3268 		return -ENOENT;
3269 	}
3270 
3271 	for (i = 0; i < pol->xfrm_nr; i++) {
3272 		for (j = 0, mp = m; j < num_migrate; j++, mp++) {
3273 			if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
3274 				continue;
3275 			n++;
3276 			if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
3277 			    pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
3278 				continue;
3279 			/* update endpoints */
3280 			memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
3281 			       sizeof(pol->xfrm_vec[i].id.daddr));
3282 			memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
3283 			       sizeof(pol->xfrm_vec[i].saddr));
3284 			pol->xfrm_vec[i].encap_family = mp->new_family;
3285 			/* flush bundles */
3286 			atomic_inc(&pol->genid);
3287 		}
3288 	}
3289 
3290 	write_unlock_bh(&pol->lock);
3291 
3292 	if (!n)
3293 		return -ENODATA;
3294 
3295 	return 0;
3296 }
3297 
3298 static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
3299 {
3300 	int i, j;
3301 
3302 	if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
3303 		return -EINVAL;
3304 
3305 	for (i = 0; i < num_migrate; i++) {
3306 		if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr,
3307 				    m[i].old_family) &&
3308 		    xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr,
3309 				    m[i].old_family))
3310 			return -EINVAL;
3311 		if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
3312 		    xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
3313 			return -EINVAL;
3314 
3315 		/* check if there is any duplicated entry */
3316 		for (j = i + 1; j < num_migrate; j++) {
3317 			if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
3318 				    sizeof(m[i].old_daddr)) &&
3319 			    !memcmp(&m[i].old_saddr, &m[j].old_saddr,
3320 				    sizeof(m[i].old_saddr)) &&
3321 			    m[i].proto == m[j].proto &&
3322 			    m[i].mode == m[j].mode &&
3323 			    m[i].reqid == m[j].reqid &&
3324 			    m[i].old_family == m[j].old_family)
3325 				return -EINVAL;
3326 		}
3327 	}
3328 
3329 	return 0;
3330 }
3331 
3332 int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
3333 		 struct xfrm_migrate *m, int num_migrate,
3334 		 struct xfrm_kmaddress *k, struct net *net)
3335 {
3336 	int i, err, nx_cur = 0, nx_new = 0;
3337 	struct xfrm_policy *pol = NULL;
3338 	struct xfrm_state *x, *xc;
3339 	struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
3340 	struct xfrm_state *x_new[XFRM_MAX_DEPTH];
3341 	struct xfrm_migrate *mp;
3342 
3343 	if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
3344 		goto out;
3345 
3346 	/* Stage 1 - find policy */
3347 	if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) {
3348 		err = -ENOENT;
3349 		goto out;
3350 	}
3351 
3352 	/* Stage 2 - find and update state(s) */
3353 	for (i = 0, mp = m; i < num_migrate; i++, mp++) {
3354 		if ((x = xfrm_migrate_state_find(mp, net))) {
3355 			x_cur[nx_cur] = x;
3356 			nx_cur++;
3357 			if ((xc = xfrm_state_migrate(x, mp))) {
3358 				x_new[nx_new] = xc;
3359 				nx_new++;
3360 			} else {
3361 				err = -ENODATA;
3362 				goto restore_state;
3363 			}
3364 		}
3365 	}
3366 
3367 	/* Stage 3 - update policy */
3368 	if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
3369 		goto restore_state;
3370 
3371 	/* Stage 4 - delete old state(s) */
3372 	if (nx_cur) {
3373 		xfrm_states_put(x_cur, nx_cur);
3374 		xfrm_states_delete(x_cur, nx_cur);
3375 	}
3376 
3377 	/* Stage 5 - announce */
3378 	km_migrate(sel, dir, type, m, num_migrate, k);
3379 
3380 	xfrm_pol_put(pol);
3381 
3382 	return 0;
3383 out:
3384 	return err;
3385 
3386 restore_state:
3387 	if (pol)
3388 		xfrm_pol_put(pol);
3389 	if (nx_cur)
3390 		xfrm_states_put(x_cur, nx_cur);
3391 	if (nx_new)
3392 		xfrm_states_delete(x_new, nx_new);
3393 
3394 	return err;
3395 }
3396 EXPORT_SYMBOL(xfrm_migrate);
3397 #endif
3398