xref: /openbmc/linux/net/xfrm/xfrm_input.c (revision 023e41632e065d49bcbe31b3c4b336217f96a271)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * xfrm_input.c
4  *
5  * Changes:
6  * 	YOSHIFUJI Hideaki @USAGI
7  * 		Split up af-specific portion
8  *
9  */
10 
11 #include <linux/bottom_half.h>
12 #include <linux/cache.h>
13 #include <linux/interrupt.h>
14 #include <linux/slab.h>
15 #include <linux/module.h>
16 #include <linux/netdevice.h>
17 #include <linux/percpu.h>
18 #include <net/dst.h>
19 #include <net/ip.h>
20 #include <net/xfrm.h>
21 #include <net/ip_tunnels.h>
22 #include <net/ip6_tunnel.h>
23 
24 struct xfrm_trans_tasklet {
25 	struct tasklet_struct tasklet;
26 	struct sk_buff_head queue;
27 };
28 
29 struct xfrm_trans_cb {
30 	union {
31 		struct inet_skb_parm	h4;
32 #if IS_ENABLED(CONFIG_IPV6)
33 		struct inet6_skb_parm	h6;
34 #endif
35 	} header;
36 	int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb);
37 };
38 
39 #define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))
40 
41 static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
42 static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1];
43 
44 static struct gro_cells gro_cells;
45 static struct net_device xfrm_napi_dev;
46 
47 static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet);
48 
49 int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo)
50 {
51 	int err = 0;
52 
53 	if (WARN_ON(afinfo->family >= ARRAY_SIZE(xfrm_input_afinfo)))
54 		return -EAFNOSUPPORT;
55 
56 	spin_lock_bh(&xfrm_input_afinfo_lock);
57 	if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL))
58 		err = -EEXIST;
59 	else
60 		rcu_assign_pointer(xfrm_input_afinfo[afinfo->family], afinfo);
61 	spin_unlock_bh(&xfrm_input_afinfo_lock);
62 	return err;
63 }
64 EXPORT_SYMBOL(xfrm_input_register_afinfo);
65 
66 int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo)
67 {
68 	int err = 0;
69 
70 	spin_lock_bh(&xfrm_input_afinfo_lock);
71 	if (likely(xfrm_input_afinfo[afinfo->family] != NULL)) {
72 		if (unlikely(xfrm_input_afinfo[afinfo->family] != afinfo))
73 			err = -EINVAL;
74 		else
75 			RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->family], NULL);
76 	}
77 	spin_unlock_bh(&xfrm_input_afinfo_lock);
78 	synchronize_rcu();
79 	return err;
80 }
81 EXPORT_SYMBOL(xfrm_input_unregister_afinfo);
82 
83 static const struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family)
84 {
85 	const struct xfrm_input_afinfo *afinfo;
86 
87 	if (WARN_ON_ONCE(family >= ARRAY_SIZE(xfrm_input_afinfo)))
88 		return NULL;
89 
90 	rcu_read_lock();
91 	afinfo = rcu_dereference(xfrm_input_afinfo[family]);
92 	if (unlikely(!afinfo))
93 		rcu_read_unlock();
94 	return afinfo;
95 }
96 
97 static int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family, u8 protocol,
98 		       int err)
99 {
100 	int ret;
101 	const struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family);
102 
103 	if (!afinfo)
104 		return -EAFNOSUPPORT;
105 
106 	ret = afinfo->callback(skb, protocol, err);
107 	rcu_read_unlock();
108 
109 	return ret;
110 }
111 
112 struct sec_path *secpath_set(struct sk_buff *skb)
113 {
114 	struct sec_path *sp, *tmp = skb_ext_find(skb, SKB_EXT_SEC_PATH);
115 
116 	sp = skb_ext_add(skb, SKB_EXT_SEC_PATH);
117 	if (!sp)
118 		return NULL;
119 
120 	if (tmp) /* reused existing one (was COW'd if needed) */
121 		return sp;
122 
123 	/* allocated new secpath */
124 	memset(sp->ovec, 0, sizeof(sp->ovec));
125 	sp->olen = 0;
126 	sp->len = 0;
127 
128 	return sp;
129 }
130 EXPORT_SYMBOL(secpath_set);
131 
132 /* Fetch spi and seq from ipsec header */
133 
134 int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
135 {
136 	int offset, offset_seq;
137 	int hlen;
138 
139 	switch (nexthdr) {
140 	case IPPROTO_AH:
141 		hlen = sizeof(struct ip_auth_hdr);
142 		offset = offsetof(struct ip_auth_hdr, spi);
143 		offset_seq = offsetof(struct ip_auth_hdr, seq_no);
144 		break;
145 	case IPPROTO_ESP:
146 		hlen = sizeof(struct ip_esp_hdr);
147 		offset = offsetof(struct ip_esp_hdr, spi);
148 		offset_seq = offsetof(struct ip_esp_hdr, seq_no);
149 		break;
150 	case IPPROTO_COMP:
151 		if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr)))
152 			return -EINVAL;
153 		*spi = htonl(ntohs(*(__be16 *)(skb_transport_header(skb) + 2)));
154 		*seq = 0;
155 		return 0;
156 	default:
157 		return 1;
158 	}
159 
160 	if (!pskb_may_pull(skb, hlen))
161 		return -EINVAL;
162 
163 	*spi = *(__be32 *)(skb_transport_header(skb) + offset);
164 	*seq = *(__be32 *)(skb_transport_header(skb) + offset_seq);
165 	return 0;
166 }
167 EXPORT_SYMBOL(xfrm_parse_spi);
168 
169 int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
170 {
171 	struct xfrm_mode *inner_mode = x->inner_mode;
172 	int err;
173 
174 	err = x->outer_mode->afinfo->extract_input(x, skb);
175 	if (err)
176 		return err;
177 
178 	if (x->sel.family == AF_UNSPEC) {
179 		inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
180 		if (inner_mode == NULL)
181 			return -EAFNOSUPPORT;
182 	}
183 
184 	skb->protocol = inner_mode->afinfo->eth_proto;
185 	return inner_mode->input2(x, skb);
186 }
187 EXPORT_SYMBOL(xfrm_prepare_input);
188 
189 int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
190 {
191 	struct net *net = dev_net(skb->dev);
192 	int err;
193 	__be32 seq;
194 	__be32 seq_hi;
195 	struct xfrm_state *x = NULL;
196 	xfrm_address_t *daddr;
197 	struct xfrm_mode *inner_mode;
198 	u32 mark = skb->mark;
199 	unsigned int family = AF_UNSPEC;
200 	int decaps = 0;
201 	int async = 0;
202 	bool xfrm_gro = false;
203 	bool crypto_done = false;
204 	struct xfrm_offload *xo = xfrm_offload(skb);
205 	struct sec_path *sp;
206 
207 	if (encap_type < 0) {
208 		x = xfrm_input_state(skb);
209 
210 		if (unlikely(x->km.state != XFRM_STATE_VALID)) {
211 			if (x->km.state == XFRM_STATE_ACQ)
212 				XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
213 			else
214 				XFRM_INC_STATS(net,
215 					       LINUX_MIB_XFRMINSTATEINVALID);
216 			goto drop;
217 		}
218 
219 		family = x->outer_mode->afinfo->family;
220 
221 		/* An encap_type of -1 indicates async resumption. */
222 		if (encap_type == -1) {
223 			async = 1;
224 			seq = XFRM_SKB_CB(skb)->seq.input.low;
225 			goto resume;
226 		}
227 
228 		/* encap_type < -1 indicates a GRO call. */
229 		encap_type = 0;
230 		seq = XFRM_SPI_SKB_CB(skb)->seq;
231 
232 		if (xo && (xo->flags & CRYPTO_DONE)) {
233 			crypto_done = true;
234 			family = XFRM_SPI_SKB_CB(skb)->family;
235 
236 			if (!(xo->status & CRYPTO_SUCCESS)) {
237 				if (xo->status &
238 				    (CRYPTO_TRANSPORT_AH_AUTH_FAILED |
239 				     CRYPTO_TRANSPORT_ESP_AUTH_FAILED |
240 				     CRYPTO_TUNNEL_AH_AUTH_FAILED |
241 				     CRYPTO_TUNNEL_ESP_AUTH_FAILED)) {
242 
243 					xfrm_audit_state_icvfail(x, skb,
244 								 x->type->proto);
245 					x->stats.integrity_failed++;
246 					XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR);
247 					goto drop;
248 				}
249 
250 				if (xo->status & CRYPTO_INVALID_PROTOCOL) {
251 					XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR);
252 					goto drop;
253 				}
254 
255 				XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
256 				goto drop;
257 			}
258 
259 			if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) {
260 				XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
261 				goto drop;
262 			}
263 		}
264 
265 		goto lock;
266 	}
267 
268 	family = XFRM_SPI_SKB_CB(skb)->family;
269 
270 	/* if tunnel is present override skb->mark value with tunnel i_key */
271 	switch (family) {
272 	case AF_INET:
273 		if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4)
274 			mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4->parms.i_key);
275 		break;
276 	case AF_INET6:
277 		if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6)
278 			mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6->parms.i_key);
279 		break;
280 	}
281 
282 	sp = secpath_set(skb);
283 	if (!sp) {
284 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR);
285 		goto drop;
286 	}
287 
288 	seq = 0;
289 	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) {
290 		secpath_reset(skb);
291 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
292 		goto drop;
293 	}
294 
295 	daddr = (xfrm_address_t *)(skb_network_header(skb) +
296 				   XFRM_SPI_SKB_CB(skb)->daddroff);
297 	do {
298 		sp = skb_sec_path(skb);
299 
300 		if (sp->len == XFRM_MAX_DEPTH) {
301 			secpath_reset(skb);
302 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
303 			goto drop;
304 		}
305 
306 		x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family);
307 		if (x == NULL) {
308 			secpath_reset(skb);
309 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
310 			xfrm_audit_state_notfound(skb, family, spi, seq);
311 			goto drop;
312 		}
313 
314 		skb->mark = xfrm_smark_get(skb->mark, x);
315 
316 		sp->xvec[sp->len++] = x;
317 
318 		skb_dst_force(skb);
319 		if (!skb_dst(skb)) {
320 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR);
321 			goto drop;
322 		}
323 
324 lock:
325 		spin_lock(&x->lock);
326 
327 		if (unlikely(x->km.state != XFRM_STATE_VALID)) {
328 			if (x->km.state == XFRM_STATE_ACQ)
329 				XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
330 			else
331 				XFRM_INC_STATS(net,
332 					       LINUX_MIB_XFRMINSTATEINVALID);
333 			goto drop_unlock;
334 		}
335 
336 		if ((x->encap ? x->encap->encap_type : 0) != encap_type) {
337 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
338 			goto drop_unlock;
339 		}
340 
341 		if (x->repl->check(x, skb, seq)) {
342 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
343 			goto drop_unlock;
344 		}
345 
346 		if (xfrm_state_check_expire(x)) {
347 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEEXPIRED);
348 			goto drop_unlock;
349 		}
350 
351 		spin_unlock(&x->lock);
352 
353 		if (xfrm_tunnel_check(skb, x, family)) {
354 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
355 			goto drop;
356 		}
357 
358 		seq_hi = htonl(xfrm_replay_seqhi(x, seq));
359 
360 		XFRM_SKB_CB(skb)->seq.input.low = seq;
361 		XFRM_SKB_CB(skb)->seq.input.hi = seq_hi;
362 
363 		dev_hold(skb->dev);
364 
365 		if (crypto_done)
366 			nexthdr = x->type_offload->input_tail(x, skb);
367 		else
368 			nexthdr = x->type->input(x, skb);
369 
370 		if (nexthdr == -EINPROGRESS)
371 			return 0;
372 resume:
373 		dev_put(skb->dev);
374 
375 		spin_lock(&x->lock);
376 		if (nexthdr <= 0) {
377 			if (nexthdr == -EBADMSG) {
378 				xfrm_audit_state_icvfail(x, skb,
379 							 x->type->proto);
380 				x->stats.integrity_failed++;
381 			}
382 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR);
383 			goto drop_unlock;
384 		}
385 
386 		/* only the first xfrm gets the encap type */
387 		encap_type = 0;
388 
389 		if (async && x->repl->recheck(x, skb, seq)) {
390 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
391 			goto drop_unlock;
392 		}
393 
394 		x->repl->advance(x, seq);
395 
396 		x->curlft.bytes += skb->len;
397 		x->curlft.packets++;
398 
399 		spin_unlock(&x->lock);
400 
401 		XFRM_MODE_SKB_CB(skb)->protocol = nexthdr;
402 
403 		inner_mode = x->inner_mode;
404 
405 		if (x->sel.family == AF_UNSPEC) {
406 			inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
407 			if (inner_mode == NULL) {
408 				XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
409 				goto drop;
410 			}
411 		}
412 
413 		if (inner_mode->input(x, skb)) {
414 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
415 			goto drop;
416 		}
417 
418 		if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
419 			decaps = 1;
420 			break;
421 		}
422 
423 		/*
424 		 * We need the inner address.  However, we only get here for
425 		 * transport mode so the outer address is identical.
426 		 */
427 		daddr = &x->id.daddr;
428 		family = x->outer_mode->afinfo->family;
429 
430 		err = xfrm_parse_spi(skb, nexthdr, &spi, &seq);
431 		if (err < 0) {
432 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
433 			goto drop;
434 		}
435 		crypto_done = false;
436 	} while (!err);
437 
438 	err = xfrm_rcv_cb(skb, family, x->type->proto, 0);
439 	if (err)
440 		goto drop;
441 
442 	nf_reset(skb);
443 
444 	if (decaps) {
445 		sp = skb_sec_path(skb);
446 		if (sp)
447 			sp->olen = 0;
448 		skb_dst_drop(skb);
449 		gro_cells_receive(&gro_cells, skb);
450 		return 0;
451 	} else {
452 		xo = xfrm_offload(skb);
453 		if (xo)
454 			xfrm_gro = xo->flags & XFRM_GRO;
455 
456 		err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async);
457 		if (xfrm_gro) {
458 			sp = skb_sec_path(skb);
459 			if (sp)
460 				sp->olen = 0;
461 			skb_dst_drop(skb);
462 			gro_cells_receive(&gro_cells, skb);
463 			return err;
464 		}
465 
466 		return err;
467 	}
468 
469 drop_unlock:
470 	spin_unlock(&x->lock);
471 drop:
472 	xfrm_rcv_cb(skb, family, x && x->type ? x->type->proto : nexthdr, -1);
473 	kfree_skb(skb);
474 	return 0;
475 }
476 EXPORT_SYMBOL(xfrm_input);
477 
478 int xfrm_input_resume(struct sk_buff *skb, int nexthdr)
479 {
480 	return xfrm_input(skb, nexthdr, 0, -1);
481 }
482 EXPORT_SYMBOL(xfrm_input_resume);
483 
484 static void xfrm_trans_reinject(unsigned long data)
485 {
486 	struct xfrm_trans_tasklet *trans = (void *)data;
487 	struct sk_buff_head queue;
488 	struct sk_buff *skb;
489 
490 	__skb_queue_head_init(&queue);
491 	skb_queue_splice_init(&trans->queue, &queue);
492 
493 	while ((skb = __skb_dequeue(&queue)))
494 		XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb);
495 }
496 
497 int xfrm_trans_queue(struct sk_buff *skb,
498 		     int (*finish)(struct net *, struct sock *,
499 				   struct sk_buff *))
500 {
501 	struct xfrm_trans_tasklet *trans;
502 
503 	trans = this_cpu_ptr(&xfrm_trans_tasklet);
504 
505 	if (skb_queue_len(&trans->queue) >= netdev_max_backlog)
506 		return -ENOBUFS;
507 
508 	XFRM_TRANS_SKB_CB(skb)->finish = finish;
509 	__skb_queue_tail(&trans->queue, skb);
510 	tasklet_schedule(&trans->tasklet);
511 	return 0;
512 }
513 EXPORT_SYMBOL(xfrm_trans_queue);
514 
515 void __init xfrm_input_init(void)
516 {
517 	int err;
518 	int i;
519 
520 	init_dummy_netdev(&xfrm_napi_dev);
521 	err = gro_cells_init(&gro_cells, &xfrm_napi_dev);
522 	if (err)
523 		gro_cells.cells = NULL;
524 
525 	for_each_possible_cpu(i) {
526 		struct xfrm_trans_tasklet *trans;
527 
528 		trans = &per_cpu(xfrm_trans_tasklet, i);
529 		__skb_queue_head_init(&trans->queue);
530 		tasklet_init(&trans->tasklet, xfrm_trans_reinject,
531 			     (unsigned long)trans);
532 	}
533 }
534