xref: /openbmc/linux/net/rxrpc/local_object.c (revision 74a22e8f)
1 /* Local endpoint object management
2  *
3  * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
4  * Written by David Howells (dhowells@redhat.com)
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public Licence
8  * as published by the Free Software Foundation; either version
9  * 2 of the Licence, or (at your option) any later version.
10  */
11 
12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 
14 #include <linux/module.h>
15 #include <linux/net.h>
16 #include <linux/skbuff.h>
17 #include <linux/slab.h>
18 #include <linux/udp.h>
19 #include <linux/ip.h>
20 #include <linux/hashtable.h>
21 #include <net/sock.h>
22 #include <net/udp.h>
23 #include <net/af_rxrpc.h>
24 #include "ar-internal.h"
25 
26 static void rxrpc_local_processor(struct work_struct *);
27 static void rxrpc_local_rcu(struct rcu_head *);
28 
29 /*
30  * Compare a local to an address.  Return -ve, 0 or +ve to indicate less than,
31  * same or greater than.
32  *
33  * We explicitly don't compare the RxRPC service ID as we want to reject
34  * conflicting uses by differing services.  Further, we don't want to share
35  * addresses with different options (IPv6), so we don't compare those bits
36  * either.
37  */
38 static long rxrpc_local_cmp_key(const struct rxrpc_local *local,
39 				const struct sockaddr_rxrpc *srx)
40 {
41 	long diff;
42 
43 	diff = ((local->srx.transport_type - srx->transport_type) ?:
44 		(local->srx.transport_len - srx->transport_len) ?:
45 		(local->srx.transport.family - srx->transport.family));
46 	if (diff != 0)
47 		return diff;
48 
49 	switch (srx->transport.family) {
50 	case AF_INET:
51 		/* If the choice of UDP port is left up to the transport, then
52 		 * the endpoint record doesn't match.
53 		 */
54 		return ((u16 __force)local->srx.transport.sin.sin_port -
55 			(u16 __force)srx->transport.sin.sin_port) ?:
56 			memcmp(&local->srx.transport.sin.sin_addr,
57 			       &srx->transport.sin.sin_addr,
58 			       sizeof(struct in_addr));
59 #ifdef CONFIG_AF_RXRPC_IPV6
60 	case AF_INET6:
61 		/* If the choice of UDP6 port is left up to the transport, then
62 		 * the endpoint record doesn't match.
63 		 */
64 		return ((u16 __force)local->srx.transport.sin6.sin6_port -
65 			(u16 __force)srx->transport.sin6.sin6_port) ?:
66 			memcmp(&local->srx.transport.sin6.sin6_addr,
67 			       &srx->transport.sin6.sin6_addr,
68 			       sizeof(struct in6_addr));
69 #endif
70 	default:
71 		BUG();
72 	}
73 }
74 
75 /*
76  * Allocate a new local endpoint.
77  */
78 static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
79 					     const struct sockaddr_rxrpc *srx)
80 {
81 	struct rxrpc_local *local;
82 
83 	local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL);
84 	if (local) {
85 		atomic_set(&local->usage, 1);
86 		local->rxnet = rxnet;
87 		INIT_LIST_HEAD(&local->link);
88 		INIT_WORK(&local->processor, rxrpc_local_processor);
89 		init_rwsem(&local->defrag_sem);
90 		skb_queue_head_init(&local->reject_queue);
91 		skb_queue_head_init(&local->event_queue);
92 		local->client_conns = RB_ROOT;
93 		spin_lock_init(&local->client_conns_lock);
94 		spin_lock_init(&local->lock);
95 		rwlock_init(&local->services_lock);
96 		local->debug_id = atomic_inc_return(&rxrpc_debug_id);
97 		memcpy(&local->srx, srx, sizeof(*srx));
98 		local->srx.srx_service = 0;
99 		trace_rxrpc_local(local, rxrpc_local_new, 1, NULL);
100 	}
101 
102 	_leave(" = %p", local);
103 	return local;
104 }
105 
106 /*
107  * create the local socket
108  * - must be called with rxrpc_local_mutex locked
109  */
110 static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
111 {
112 	struct sock *usk;
113 	int ret, opt;
114 
115 	_enter("%p{%d,%d}",
116 	       local, local->srx.transport_type, local->srx.transport.family);
117 
118 	/* create a socket to represent the local endpoint */
119 	ret = sock_create_kern(net, local->srx.transport.family,
120 			       local->srx.transport_type, 0, &local->socket);
121 	if (ret < 0) {
122 		_leave(" = %d [socket]", ret);
123 		return ret;
124 	}
125 
126 	/* set the socket up */
127 	usk = local->socket->sk;
128 	inet_sk(usk)->mc_loop = 0;
129 
130 	/* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */
131 	inet_inc_convert_csum(usk);
132 
133 	rcu_assign_sk_user_data(usk, local);
134 
135 	udp_sk(usk)->encap_type = UDP_ENCAP_RXRPC;
136 	udp_sk(usk)->encap_rcv = rxrpc_input_packet;
137 	udp_sk(usk)->encap_destroy = NULL;
138 	udp_sk(usk)->gro_receive = NULL;
139 	udp_sk(usk)->gro_complete = NULL;
140 
141 	udp_encap_enable();
142 #if IS_ENABLED(CONFIG_AF_RXRPC_IPV6)
143 	if (local->srx.transport.family == AF_INET6)
144 		udpv6_encap_enable();
145 #endif
146 	usk->sk_error_report = rxrpc_error_report;
147 
148 	/* if a local address was supplied then bind it */
149 	if (local->srx.transport_len > sizeof(sa_family_t)) {
150 		_debug("bind");
151 		ret = kernel_bind(local->socket,
152 				  (struct sockaddr *)&local->srx.transport,
153 				  local->srx.transport_len);
154 		if (ret < 0) {
155 			_debug("bind failed %d", ret);
156 			goto error;
157 		}
158 	}
159 
160 	switch (local->srx.transport.family) {
161 	case AF_INET6:
162 		/* we want to receive ICMPv6 errors */
163 		opt = 1;
164 		ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_RECVERR,
165 					(char *) &opt, sizeof(opt));
166 		if (ret < 0) {
167 			_debug("setsockopt failed");
168 			goto error;
169 		}
170 
171 		/* we want to set the don't fragment bit */
172 		opt = IPV6_PMTUDISC_DO;
173 		ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_MTU_DISCOVER,
174 					(char *) &opt, sizeof(opt));
175 		if (ret < 0) {
176 			_debug("setsockopt failed");
177 			goto error;
178 		}
179 
180 		/* Fall through and set IPv4 options too otherwise we don't get
181 		 * errors from IPv4 packets sent through the IPv6 socket.
182 		 */
183 		/* Fall through */
184 	case AF_INET:
185 		/* we want to receive ICMP errors */
186 		opt = 1;
187 		ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR,
188 					(char *) &opt, sizeof(opt));
189 		if (ret < 0) {
190 			_debug("setsockopt failed");
191 			goto error;
192 		}
193 
194 		/* we want to set the don't fragment bit */
195 		opt = IP_PMTUDISC_DO;
196 		ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER,
197 					(char *) &opt, sizeof(opt));
198 		if (ret < 0) {
199 			_debug("setsockopt failed");
200 			goto error;
201 		}
202 
203 		/* We want receive timestamps. */
204 		opt = 1;
205 		ret = kernel_setsockopt(local->socket, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
206 					(char *)&opt, sizeof(opt));
207 		if (ret < 0) {
208 			_debug("setsockopt failed");
209 			goto error;
210 		}
211 		break;
212 
213 	default:
214 		BUG();
215 	}
216 
217 	_leave(" = 0");
218 	return 0;
219 
220 error:
221 	kernel_sock_shutdown(local->socket, SHUT_RDWR);
222 	local->socket->sk->sk_user_data = NULL;
223 	sock_release(local->socket);
224 	local->socket = NULL;
225 
226 	_leave(" = %d", ret);
227 	return ret;
228 }
229 
230 /*
231  * Look up or create a new local endpoint using the specified local address.
232  */
233 struct rxrpc_local *rxrpc_lookup_local(struct net *net,
234 				       const struct sockaddr_rxrpc *srx)
235 {
236 	struct rxrpc_local *local;
237 	struct rxrpc_net *rxnet = rxrpc_net(net);
238 	struct list_head *cursor;
239 	const char *age;
240 	long diff;
241 	int ret;
242 
243 	_enter("{%d,%d,%pISp}",
244 	       srx->transport_type, srx->transport.family, &srx->transport);
245 
246 	mutex_lock(&rxnet->local_mutex);
247 
248 	for (cursor = rxnet->local_endpoints.next;
249 	     cursor != &rxnet->local_endpoints;
250 	     cursor = cursor->next) {
251 		local = list_entry(cursor, struct rxrpc_local, link);
252 
253 		diff = rxrpc_local_cmp_key(local, srx);
254 		if (diff < 0)
255 			continue;
256 		if (diff > 0)
257 			break;
258 
259 		/* Services aren't allowed to share transport sockets, so
260 		 * reject that here.  It is possible that the object is dying -
261 		 * but it may also still have the local transport address that
262 		 * we want bound.
263 		 */
264 		if (srx->srx_service) {
265 			local = NULL;
266 			goto addr_in_use;
267 		}
268 
269 		/* Found a match.  We replace a dying object.  Attempting to
270 		 * bind the transport socket may still fail if we're attempting
271 		 * to use a local address that the dying object is still using.
272 		 */
273 		if (!rxrpc_get_local_maybe(local)) {
274 			cursor = cursor->next;
275 			list_del_init(&local->link);
276 			break;
277 		}
278 
279 		age = "old";
280 		goto found;
281 	}
282 
283 	local = rxrpc_alloc_local(rxnet, srx);
284 	if (!local)
285 		goto nomem;
286 
287 	ret = rxrpc_open_socket(local, net);
288 	if (ret < 0)
289 		goto sock_error;
290 
291 	list_add_tail(&local->link, cursor);
292 	age = "new";
293 
294 found:
295 	mutex_unlock(&rxnet->local_mutex);
296 
297 	_net("LOCAL %s %d {%pISp}",
298 	     age, local->debug_id, &local->srx.transport);
299 
300 	_leave(" = %p", local);
301 	return local;
302 
303 nomem:
304 	ret = -ENOMEM;
305 sock_error:
306 	mutex_unlock(&rxnet->local_mutex);
307 	if (local)
308 		call_rcu(&local->rcu, rxrpc_local_rcu);
309 	_leave(" = %d", ret);
310 	return ERR_PTR(ret);
311 
312 addr_in_use:
313 	mutex_unlock(&rxnet->local_mutex);
314 	_leave(" = -EADDRINUSE");
315 	return ERR_PTR(-EADDRINUSE);
316 }
317 
318 /*
319  * Get a ref on a local endpoint.
320  */
321 struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local)
322 {
323 	const void *here = __builtin_return_address(0);
324 	int n;
325 
326 	n = atomic_inc_return(&local->usage);
327 	trace_rxrpc_local(local, rxrpc_local_got, n, here);
328 	return local;
329 }
330 
331 /*
332  * Get a ref on a local endpoint unless its usage has already reached 0.
333  */
334 struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local)
335 {
336 	const void *here = __builtin_return_address(0);
337 
338 	if (local) {
339 		int n = atomic_fetch_add_unless(&local->usage, 1, 0);
340 		if (n > 0)
341 			trace_rxrpc_local(local, rxrpc_local_got, n + 1, here);
342 		else
343 			local = NULL;
344 	}
345 	return local;
346 }
347 
348 /*
349  * Queue a local endpoint.
350  */
351 void rxrpc_queue_local(struct rxrpc_local *local)
352 {
353 	const void *here = __builtin_return_address(0);
354 
355 	if (rxrpc_queue_work(&local->processor))
356 		trace_rxrpc_local(local, rxrpc_local_queued,
357 				  atomic_read(&local->usage), here);
358 }
359 
360 /*
361  * A local endpoint reached its end of life.
362  */
363 static void __rxrpc_put_local(struct rxrpc_local *local)
364 {
365 	_enter("%d", local->debug_id);
366 	rxrpc_queue_work(&local->processor);
367 }
368 
369 /*
370  * Drop a ref on a local endpoint.
371  */
372 void rxrpc_put_local(struct rxrpc_local *local)
373 {
374 	const void *here = __builtin_return_address(0);
375 	int n;
376 
377 	if (local) {
378 		n = atomic_dec_return(&local->usage);
379 		trace_rxrpc_local(local, rxrpc_local_put, n, here);
380 
381 		if (n == 0)
382 			__rxrpc_put_local(local);
383 	}
384 }
385 
386 /*
387  * Destroy a local endpoint's socket and then hand the record to RCU to dispose
388  * of.
389  *
390  * Closing the socket cannot be done from bottom half context or RCU callback
391  * context because it might sleep.
392  */
393 static void rxrpc_local_destroyer(struct rxrpc_local *local)
394 {
395 	struct socket *socket = local->socket;
396 	struct rxrpc_net *rxnet = local->rxnet;
397 
398 	_enter("%d", local->debug_id);
399 
400 	/* We can get a race between an incoming call packet queueing the
401 	 * processor again and the work processor starting the destruction
402 	 * process which will shut down the UDP socket.
403 	 */
404 	if (local->dead) {
405 		_leave(" [already dead]");
406 		return;
407 	}
408 	local->dead = true;
409 
410 	mutex_lock(&rxnet->local_mutex);
411 	list_del_init(&local->link);
412 	mutex_unlock(&rxnet->local_mutex);
413 
414 	ASSERT(RB_EMPTY_ROOT(&local->client_conns));
415 	ASSERT(!local->service);
416 
417 	if (socket) {
418 		local->socket = NULL;
419 		kernel_sock_shutdown(socket, SHUT_RDWR);
420 		socket->sk->sk_user_data = NULL;
421 		sock_release(socket);
422 	}
423 
424 	/* At this point, there should be no more packets coming in to the
425 	 * local endpoint.
426 	 */
427 	rxrpc_purge_queue(&local->reject_queue);
428 	rxrpc_purge_queue(&local->event_queue);
429 
430 	_debug("rcu local %d", local->debug_id);
431 	call_rcu(&local->rcu, rxrpc_local_rcu);
432 }
433 
434 /*
435  * Process events on an endpoint
436  */
437 static void rxrpc_local_processor(struct work_struct *work)
438 {
439 	struct rxrpc_local *local =
440 		container_of(work, struct rxrpc_local, processor);
441 	bool again;
442 
443 	trace_rxrpc_local(local, rxrpc_local_processing,
444 			  atomic_read(&local->usage), NULL);
445 
446 	do {
447 		again = false;
448 		if (atomic_read(&local->usage) == 0)
449 			return rxrpc_local_destroyer(local);
450 
451 		if (!skb_queue_empty(&local->reject_queue)) {
452 			rxrpc_reject_packets(local);
453 			again = true;
454 		}
455 
456 		if (!skb_queue_empty(&local->event_queue)) {
457 			rxrpc_process_local_events(local);
458 			again = true;
459 		}
460 	} while (again);
461 }
462 
463 /*
464  * Destroy a local endpoint after the RCU grace period expires.
465  */
466 static void rxrpc_local_rcu(struct rcu_head *rcu)
467 {
468 	struct rxrpc_local *local = container_of(rcu, struct rxrpc_local, rcu);
469 
470 	_enter("%d", local->debug_id);
471 
472 	ASSERT(!work_pending(&local->processor));
473 
474 	_net("DESTROY LOCAL %d", local->debug_id);
475 	kfree(local);
476 	_leave("");
477 }
478 
479 /*
480  * Verify the local endpoint list is empty by this point.
481  */
482 void rxrpc_destroy_all_locals(struct rxrpc_net *rxnet)
483 {
484 	struct rxrpc_local *local;
485 
486 	_enter("");
487 
488 	flush_workqueue(rxrpc_workqueue);
489 
490 	if (!list_empty(&rxnet->local_endpoints)) {
491 		mutex_lock(&rxnet->local_mutex);
492 		list_for_each_entry(local, &rxnet->local_endpoints, link) {
493 			pr_err("AF_RXRPC: Leaked local %p {%d}\n",
494 			       local, atomic_read(&local->usage));
495 		}
496 		mutex_unlock(&rxnet->local_mutex);
497 		BUG();
498 	}
499 }
500