xref: /openbmc/linux/arch/um/drivers/virtio_uml.c (revision 0eb76ba2)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Virtio vhost-user driver
4  *
5  * Copyright(c) 2019 Intel Corporation
6  *
7  * This driver allows virtio devices to be used over a vhost-user socket.
8  *
9  * Guest devices can be instantiated by kernel module or command line
10  * parameters. One device will be created for each parameter. Syntax:
11  *
12  *		virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]
13  * where:
14  *		<socket>	:= vhost-user socket path to connect
15  *		<virtio_id>	:= virtio device id (as in virtio_ids.h)
16  *		<platform_id>	:= (optional) platform device id
17  *
18  * example:
19  *		virtio_uml.device=/var/uml.socket:1
20  *
21  * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
22  */
23 #include <linux/module.h>
24 #include <linux/platform_device.h>
25 #include <linux/slab.h>
26 #include <linux/virtio.h>
27 #include <linux/virtio_config.h>
28 #include <linux/virtio_ring.h>
29 #include <linux/time-internal.h>
30 #include <shared/as-layout.h>
31 #include <irq_kern.h>
32 #include <init.h>
33 #include <os.h>
34 #include "vhost_user.h"
35 
36 #define MAX_SUPPORTED_QUEUE_SIZE	256
37 
38 #define to_virtio_uml_device(_vdev) \
39 	container_of(_vdev, struct virtio_uml_device, vdev)
40 
41 struct virtio_uml_platform_data {
42 	u32 virtio_device_id;
43 	const char *socket_path;
44 	struct work_struct conn_broken_wk;
45 	struct platform_device *pdev;
46 };
47 
48 struct virtio_uml_device {
49 	struct virtio_device vdev;
50 	struct platform_device *pdev;
51 
52 	spinlock_t sock_lock;
53 	int sock, req_fd, irq;
54 	u64 features;
55 	u64 protocol_features;
56 	u8 status;
57 	u8 registered:1;
58 };
59 
60 struct virtio_uml_vq_info {
61 	int kick_fd, call_fd;
62 	char name[32];
63 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
64 	struct virtqueue *vq;
65 	vq_callback_t *callback;
66 	struct time_travel_event defer;
67 #endif
68 };
69 
70 extern unsigned long long physmem_size, highmem;
71 
72 #define vu_err(vu_dev, ...)	dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__)
73 
74 /* Vhost-user protocol */
75 
76 static int full_sendmsg_fds(int fd, const void *buf, unsigned int len,
77 			    const int *fds, unsigned int fds_num)
78 {
79 	int rc;
80 
81 	do {
82 		rc = os_sendmsg_fds(fd, buf, len, fds, fds_num);
83 		if (rc > 0) {
84 			buf += rc;
85 			len -= rc;
86 			fds = NULL;
87 			fds_num = 0;
88 		}
89 	} while (len && (rc >= 0 || rc == -EINTR));
90 
91 	if (rc < 0)
92 		return rc;
93 	return 0;
94 }
95 
96 static int full_read(int fd, void *buf, int len, bool abortable)
97 {
98 	int rc;
99 
100 	do {
101 		rc = os_read_file(fd, buf, len);
102 		if (rc > 0) {
103 			buf += rc;
104 			len -= rc;
105 		}
106 	} while (len && (rc > 0 || rc == -EINTR || (!abortable && rc == -EAGAIN)));
107 
108 	if (rc < 0)
109 		return rc;
110 	if (rc == 0)
111 		return -ECONNRESET;
112 	return 0;
113 }
114 
115 static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg)
116 {
117 	return full_read(fd, msg, sizeof(msg->header), true);
118 }
119 
120 static int vhost_user_recv(struct virtio_uml_device *vu_dev,
121 			   int fd, struct vhost_user_msg *msg,
122 			   size_t max_payload_size, bool wait)
123 {
124 	size_t size;
125 	int rc;
126 
127 	/*
128 	 * In virtio time-travel mode, we're handling all the vhost-user
129 	 * FDs by polling them whenever appropriate. However, we may get
130 	 * into a situation where we're sending out an interrupt message
131 	 * to a device (e.g. a net device) and need to handle a simulation
132 	 * time message while doing so, e.g. one that tells us to update
133 	 * our idea of how long we can run without scheduling.
134 	 *
135 	 * Thus, we need to not just read() from the given fd, but need
136 	 * to also handle messages for the simulation time - this function
137 	 * does that for us while waiting for the given fd to be readable.
138 	 */
139 	if (wait)
140 		time_travel_wait_readable(fd);
141 
142 	rc = vhost_user_recv_header(fd, msg);
143 
144 	if (rc == -ECONNRESET && vu_dev->registered) {
145 		struct virtio_uml_platform_data *pdata;
146 
147 		pdata = vu_dev->pdev->dev.platform_data;
148 
149 		virtio_break_device(&vu_dev->vdev);
150 		schedule_work(&pdata->conn_broken_wk);
151 	}
152 	if (rc)
153 		return rc;
154 	size = msg->header.size;
155 	if (size > max_payload_size)
156 		return -EPROTO;
157 	return full_read(fd, &msg->payload, size, false);
158 }
159 
160 static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev,
161 				struct vhost_user_msg *msg,
162 				size_t max_payload_size)
163 {
164 	int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg,
165 				 max_payload_size, true);
166 
167 	if (rc)
168 		return rc;
169 
170 	if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION))
171 		return -EPROTO;
172 
173 	return 0;
174 }
175 
176 static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
177 			       u64 *value)
178 {
179 	struct vhost_user_msg msg;
180 	int rc = vhost_user_recv_resp(vu_dev, &msg,
181 				      sizeof(msg.payload.integer));
182 
183 	if (rc)
184 		return rc;
185 	if (msg.header.size != sizeof(msg.payload.integer))
186 		return -EPROTO;
187 	*value = msg.payload.integer;
188 	return 0;
189 }
190 
191 static int vhost_user_recv_req(struct virtio_uml_device *vu_dev,
192 			       struct vhost_user_msg *msg,
193 			       size_t max_payload_size)
194 {
195 	int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg,
196 				 max_payload_size, false);
197 
198 	if (rc)
199 		return rc;
200 
201 	if ((msg->header.flags & ~VHOST_USER_FLAG_NEED_REPLY) !=
202 			VHOST_USER_VERSION)
203 		return -EPROTO;
204 
205 	return 0;
206 }
207 
208 static int vhost_user_send(struct virtio_uml_device *vu_dev,
209 			   bool need_response, struct vhost_user_msg *msg,
210 			   int *fds, size_t num_fds)
211 {
212 	size_t size = sizeof(msg->header) + msg->header.size;
213 	unsigned long flags;
214 	bool request_ack;
215 	int rc;
216 
217 	msg->header.flags |= VHOST_USER_VERSION;
218 
219 	/*
220 	 * The need_response flag indicates that we already need a response,
221 	 * e.g. to read the features. In these cases, don't request an ACK as
222 	 * it is meaningless. Also request an ACK only if supported.
223 	 */
224 	request_ack = !need_response;
225 	if (!(vu_dev->protocol_features &
226 			BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK)))
227 		request_ack = false;
228 
229 	if (request_ack)
230 		msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY;
231 
232 	spin_lock_irqsave(&vu_dev->sock_lock, flags);
233 	rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds);
234 	if (rc < 0)
235 		goto out;
236 
237 	if (request_ack) {
238 		uint64_t status;
239 
240 		rc = vhost_user_recv_u64(vu_dev, &status);
241 		if (rc)
242 			goto out;
243 
244 		if (status) {
245 			vu_err(vu_dev, "slave reports error: %llu\n", status);
246 			rc = -EIO;
247 			goto out;
248 		}
249 	}
250 
251 out:
252 	spin_unlock_irqrestore(&vu_dev->sock_lock, flags);
253 	return rc;
254 }
255 
256 static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
257 				      bool need_response, u32 request)
258 {
259 	struct vhost_user_msg msg = {
260 		.header.request = request,
261 	};
262 
263 	return vhost_user_send(vu_dev, need_response, &msg, NULL, 0);
264 }
265 
266 static int vhost_user_send_no_payload_fd(struct virtio_uml_device *vu_dev,
267 					 u32 request, int fd)
268 {
269 	struct vhost_user_msg msg = {
270 		.header.request = request,
271 	};
272 
273 	return vhost_user_send(vu_dev, false, &msg, &fd, 1);
274 }
275 
276 static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
277 			       u32 request, u64 value)
278 {
279 	struct vhost_user_msg msg = {
280 		.header.request = request,
281 		.header.size = sizeof(msg.payload.integer),
282 		.payload.integer = value,
283 	};
284 
285 	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
286 }
287 
288 static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
289 {
290 	return vhost_user_send_no_payload(vu_dev, false, VHOST_USER_SET_OWNER);
291 }
292 
293 static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
294 				   u64 *features)
295 {
296 	int rc = vhost_user_send_no_payload(vu_dev, true,
297 					    VHOST_USER_GET_FEATURES);
298 
299 	if (rc)
300 		return rc;
301 	return vhost_user_recv_u64(vu_dev, features);
302 }
303 
304 static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
305 				   u64 features)
306 {
307 	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features);
308 }
309 
310 static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev,
311 					    u64 *protocol_features)
312 {
313 	int rc = vhost_user_send_no_payload(vu_dev, true,
314 			VHOST_USER_GET_PROTOCOL_FEATURES);
315 
316 	if (rc)
317 		return rc;
318 	return vhost_user_recv_u64(vu_dev, protocol_features);
319 }
320 
321 static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev,
322 					    u64 protocol_features)
323 {
324 	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES,
325 				   protocol_features);
326 }
327 
328 static void vhost_user_reply(struct virtio_uml_device *vu_dev,
329 			     struct vhost_user_msg *msg, int response)
330 {
331 	struct vhost_user_msg reply = {
332 		.payload.integer = response,
333 	};
334 	size_t size = sizeof(reply.header) + sizeof(reply.payload.integer);
335 	int rc;
336 
337 	reply.header = msg->header;
338 	reply.header.flags &= ~VHOST_USER_FLAG_NEED_REPLY;
339 	reply.header.flags |= VHOST_USER_FLAG_REPLY;
340 	reply.header.size = sizeof(reply.payload.integer);
341 
342 	rc = full_sendmsg_fds(vu_dev->req_fd, &reply, size, NULL, 0);
343 
344 	if (rc)
345 		vu_err(vu_dev,
346 		       "sending reply to slave request failed: %d (size %zu)\n",
347 		       rc, size);
348 }
349 
350 static irqreturn_t vu_req_interrupt(int irq, void *data)
351 {
352 	struct virtio_uml_device *vu_dev = data;
353 	struct virtqueue *vq;
354 	int response = 1;
355 	struct {
356 		struct vhost_user_msg msg;
357 		u8 extra_payload[512];
358 	} msg;
359 	int rc;
360 
361 	rc = vhost_user_recv_req(vu_dev, &msg.msg,
362 				 sizeof(msg.msg.payload) +
363 				 sizeof(msg.extra_payload));
364 
365 	if (rc)
366 		return IRQ_NONE;
367 
368 	switch (msg.msg.header.request) {
369 	case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG:
370 		virtio_config_changed(&vu_dev->vdev);
371 		response = 0;
372 		break;
373 	case VHOST_USER_SLAVE_VRING_CALL:
374 		virtio_device_for_each_vq((&vu_dev->vdev), vq) {
375 			if (vq->index == msg.msg.payload.vring_state.index) {
376 				response = 0;
377 				vring_interrupt(0 /* ignored */, vq);
378 				break;
379 			}
380 		}
381 		break;
382 	case VHOST_USER_SLAVE_IOTLB_MSG:
383 		/* not supported - VIRTIO_F_ACCESS_PLATFORM */
384 	case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
385 		/* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */
386 	default:
387 		vu_err(vu_dev, "unexpected slave request %d\n",
388 		       msg.msg.header.request);
389 	}
390 
391 	if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY)
392 		vhost_user_reply(vu_dev, &msg.msg, response);
393 
394 	return IRQ_HANDLED;
395 }
396 
397 static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev)
398 {
399 	int rc, req_fds[2];
400 
401 	/* Use a pipe for slave req fd, SIGIO is not supported for eventfd */
402 	rc = os_pipe(req_fds, true, true);
403 	if (rc < 0)
404 		return rc;
405 	vu_dev->req_fd = req_fds[0];
406 
407 	rc = um_request_irq(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ,
408 			    vu_req_interrupt, IRQF_SHARED,
409 			    vu_dev->pdev->name, vu_dev);
410 	if (rc < 0)
411 		goto err_close;
412 
413 	vu_dev->irq = rc;
414 
415 	rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD,
416 					   req_fds[1]);
417 	if (rc)
418 		goto err_free_irq;
419 
420 	goto out;
421 
422 err_free_irq:
423 	um_free_irq(vu_dev->irq, vu_dev);
424 err_close:
425 	os_close_file(req_fds[0]);
426 out:
427 	/* Close unused write end of request fds */
428 	os_close_file(req_fds[1]);
429 	return rc;
430 }
431 
432 static int vhost_user_init(struct virtio_uml_device *vu_dev)
433 {
434 	int rc = vhost_user_set_owner(vu_dev);
435 
436 	if (rc)
437 		return rc;
438 	rc = vhost_user_get_features(vu_dev, &vu_dev->features);
439 	if (rc)
440 		return rc;
441 
442 	if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
443 		rc = vhost_user_get_protocol_features(vu_dev,
444 				&vu_dev->protocol_features);
445 		if (rc)
446 			return rc;
447 		vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
448 		rc = vhost_user_set_protocol_features(vu_dev,
449 				vu_dev->protocol_features);
450 		if (rc)
451 			return rc;
452 	}
453 
454 	if (vu_dev->protocol_features &
455 			BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
456 		rc = vhost_user_init_slave_req(vu_dev);
457 		if (rc)
458 			return rc;
459 	}
460 
461 	return 0;
462 }
463 
464 static void vhost_user_get_config(struct virtio_uml_device *vu_dev,
465 				  u32 offset, void *buf, u32 len)
466 {
467 	u32 cfg_size = offset + len;
468 	struct vhost_user_msg *msg;
469 	size_t payload_size = sizeof(msg->payload.config) + cfg_size;
470 	size_t msg_size = sizeof(msg->header) + payload_size;
471 	int rc;
472 
473 	if (!(vu_dev->protocol_features &
474 	      BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
475 		return;
476 
477 	msg = kzalloc(msg_size, GFP_KERNEL);
478 	if (!msg)
479 		return;
480 	msg->header.request = VHOST_USER_GET_CONFIG;
481 	msg->header.size = payload_size;
482 	msg->payload.config.offset = 0;
483 	msg->payload.config.size = cfg_size;
484 
485 	rc = vhost_user_send(vu_dev, true, msg, NULL, 0);
486 	if (rc) {
487 		vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n",
488 		       rc);
489 		goto free;
490 	}
491 
492 	rc = vhost_user_recv_resp(vu_dev, msg, msg_size);
493 	if (rc) {
494 		vu_err(vu_dev,
495 		       "receiving VHOST_USER_GET_CONFIG response failed: %d\n",
496 		       rc);
497 		goto free;
498 	}
499 
500 	if (msg->header.size != payload_size ||
501 	    msg->payload.config.size != cfg_size) {
502 		rc = -EPROTO;
503 		vu_err(vu_dev,
504 		       "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n",
505 		       msg->header.size, payload_size,
506 		       msg->payload.config.size, cfg_size);
507 		goto free;
508 	}
509 	memcpy(buf, msg->payload.config.payload + offset, len);
510 
511 free:
512 	kfree(msg);
513 }
514 
515 static void vhost_user_set_config(struct virtio_uml_device *vu_dev,
516 				  u32 offset, const void *buf, u32 len)
517 {
518 	struct vhost_user_msg *msg;
519 	size_t payload_size = sizeof(msg->payload.config) + len;
520 	size_t msg_size = sizeof(msg->header) + payload_size;
521 	int rc;
522 
523 	if (!(vu_dev->protocol_features &
524 	      BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
525 		return;
526 
527 	msg = kzalloc(msg_size, GFP_KERNEL);
528 	if (!msg)
529 		return;
530 	msg->header.request = VHOST_USER_SET_CONFIG;
531 	msg->header.size = payload_size;
532 	msg->payload.config.offset = offset;
533 	msg->payload.config.size = len;
534 	memcpy(msg->payload.config.payload, buf, len);
535 
536 	rc = vhost_user_send(vu_dev, false, msg, NULL, 0);
537 	if (rc)
538 		vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n",
539 		       rc);
540 
541 	kfree(msg);
542 }
543 
544 static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
545 				      struct vhost_user_mem_region *region_out)
546 {
547 	unsigned long long mem_offset;
548 	int rc = phys_mapping(addr, &mem_offset);
549 
550 	if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc))
551 		return -EFAULT;
552 	*fd_out = rc;
553 	region_out->guest_addr = addr;
554 	region_out->user_addr = addr;
555 	region_out->size = size;
556 	region_out->mmap_offset = mem_offset;
557 
558 	/* Ensure mapping is valid for the entire region */
559 	rc = phys_mapping(addr + size - 1, &mem_offset);
560 	if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n",
561 		 addr + size - 1, rc, *fd_out))
562 		return -EFAULT;
563 	return 0;
564 }
565 
566 static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
567 {
568 	struct vhost_user_msg msg = {
569 		.header.request = VHOST_USER_SET_MEM_TABLE,
570 		.header.size = sizeof(msg.payload.mem_regions),
571 		.payload.mem_regions.num = 1,
572 	};
573 	unsigned long reserved = uml_reserved - uml_physmem;
574 	int fds[2];
575 	int rc;
576 
577 	/*
578 	 * This is a bit tricky, see also the comment with setup_physmem().
579 	 *
580 	 * Essentially, setup_physmem() uses a file to mmap() our physmem,
581 	 * but the code and data we *already* have is omitted. To us, this
582 	 * is no difference, since they both become part of our address
583 	 * space and memory consumption. To somebody looking in from the
584 	 * outside, however, it is different because the part of our memory
585 	 * consumption that's already part of the binary (code/data) is not
586 	 * mapped from the file, so it's not visible to another mmap from
587 	 * the file descriptor.
588 	 *
589 	 * Thus, don't advertise this space to the vhost-user slave. This
590 	 * means that the slave will likely abort or similar when we give
591 	 * it an address from the hidden range, since it's not marked as
592 	 * a valid address, but at least that way we detect the issue and
593 	 * don't just have the slave read an all-zeroes buffer from the
594 	 * shared memory file, or write something there that we can never
595 	 * see (depending on the direction of the virtqueue traffic.)
596 	 *
597 	 * Since we usually don't want to use .text for virtio buffers,
598 	 * this effectively means that you cannot use
599 	 *  1) global variables, which are in the .bss and not in the shm
600 	 *     file-backed memory
601 	 *  2) the stack in some processes, depending on where they have
602 	 *     their stack (or maybe only no interrupt stack?)
603 	 *
604 	 * The stack is already not typically valid for DMA, so this isn't
605 	 * much of a restriction, but global variables might be encountered.
606 	 *
607 	 * It might be possible to fix it by copying around the data that's
608 	 * between bss_start and where we map the file now, but it's not
609 	 * something that you typically encounter with virtio drivers, so
610 	 * it didn't seem worthwhile.
611 	 */
612 	rc = vhost_user_init_mem_region(reserved, physmem_size - reserved,
613 					&fds[0],
614 					&msg.payload.mem_regions.regions[0]);
615 
616 	if (rc < 0)
617 		return rc;
618 	if (highmem) {
619 		msg.payload.mem_regions.num++;
620 		rc = vhost_user_init_mem_region(__pa(end_iomem), highmem,
621 				&fds[1], &msg.payload.mem_regions.regions[1]);
622 		if (rc < 0)
623 			return rc;
624 	}
625 
626 	return vhost_user_send(vu_dev, false, &msg, fds,
627 			       msg.payload.mem_regions.num);
628 }
629 
630 static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev,
631 				      u32 request, u32 index, u32 num)
632 {
633 	struct vhost_user_msg msg = {
634 		.header.request = request,
635 		.header.size = sizeof(msg.payload.vring_state),
636 		.payload.vring_state.index = index,
637 		.payload.vring_state.num = num,
638 	};
639 
640 	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
641 }
642 
643 static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
644 				    u32 index, u32 num)
645 {
646 	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM,
647 					  index, num);
648 }
649 
650 static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
651 				     u32 index, u32 offset)
652 {
653 	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE,
654 					  index, offset);
655 }
656 
657 static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
658 				     u32 index, u64 desc, u64 used, u64 avail,
659 				     u64 log)
660 {
661 	struct vhost_user_msg msg = {
662 		.header.request = VHOST_USER_SET_VRING_ADDR,
663 		.header.size = sizeof(msg.payload.vring_addr),
664 		.payload.vring_addr.index = index,
665 		.payload.vring_addr.desc = desc,
666 		.payload.vring_addr.used = used,
667 		.payload.vring_addr.avail = avail,
668 		.payload.vring_addr.log = log,
669 	};
670 
671 	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
672 }
673 
674 static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
675 				   u32 request, int index, int fd)
676 {
677 	struct vhost_user_msg msg = {
678 		.header.request = request,
679 		.header.size = sizeof(msg.payload.integer),
680 		.payload.integer = index,
681 	};
682 
683 	if (index & ~VHOST_USER_VRING_INDEX_MASK)
684 		return -EINVAL;
685 	if (fd < 0) {
686 		msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
687 		return vhost_user_send(vu_dev, false, &msg, NULL, 0);
688 	}
689 	return vhost_user_send(vu_dev, false, &msg, &fd, 1);
690 }
691 
692 static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
693 				     int index, int fd)
694 {
695 	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
696 				       index, fd);
697 }
698 
699 static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
700 				     int index, int fd)
701 {
702 	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
703 				       index, fd);
704 }
705 
706 static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev,
707 				       u32 index, bool enable)
708 {
709 	if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
710 		return 0;
711 
712 	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE,
713 					  index, enable);
714 }
715 
716 
717 /* Virtio interface */
718 
719 static bool vu_notify(struct virtqueue *vq)
720 {
721 	struct virtio_uml_vq_info *info = vq->priv;
722 	const uint64_t n = 1;
723 	int rc;
724 
725 	time_travel_propagate_time();
726 
727 	if (info->kick_fd < 0) {
728 		struct virtio_uml_device *vu_dev;
729 
730 		vu_dev = to_virtio_uml_device(vq->vdev);
731 
732 		return vhost_user_set_vring_state(vu_dev, VHOST_USER_VRING_KICK,
733 						  vq->index, 0) == 0;
734 	}
735 
736 	do {
737 		rc = os_write_file(info->kick_fd, &n, sizeof(n));
738 	} while (rc == -EINTR);
739 	return !WARN(rc != sizeof(n), "write returned %d\n", rc);
740 }
741 
742 static irqreturn_t vu_interrupt(int irq, void *opaque)
743 {
744 	struct virtqueue *vq = opaque;
745 	struct virtio_uml_vq_info *info = vq->priv;
746 	uint64_t n;
747 	int rc;
748 	irqreturn_t ret = IRQ_NONE;
749 
750 	do {
751 		rc = os_read_file(info->call_fd, &n, sizeof(n));
752 		if (rc == sizeof(n))
753 			ret |= vring_interrupt(irq, vq);
754 	} while (rc == sizeof(n) || rc == -EINTR);
755 	WARN(rc != -EAGAIN, "read returned %d\n", rc);
756 	return ret;
757 }
758 
759 
760 static void vu_get(struct virtio_device *vdev, unsigned offset,
761 		   void *buf, unsigned len)
762 {
763 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
764 
765 	vhost_user_get_config(vu_dev, offset, buf, len);
766 }
767 
768 static void vu_set(struct virtio_device *vdev, unsigned offset,
769 		   const void *buf, unsigned len)
770 {
771 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
772 
773 	vhost_user_set_config(vu_dev, offset, buf, len);
774 }
775 
776 static u8 vu_get_status(struct virtio_device *vdev)
777 {
778 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
779 
780 	return vu_dev->status;
781 }
782 
783 static void vu_set_status(struct virtio_device *vdev, u8 status)
784 {
785 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
786 
787 	vu_dev->status = status;
788 }
789 
790 static void vu_reset(struct virtio_device *vdev)
791 {
792 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
793 
794 	vu_dev->status = 0;
795 }
796 
797 static void vu_del_vq(struct virtqueue *vq)
798 {
799 	struct virtio_uml_vq_info *info = vq->priv;
800 
801 	if (info->call_fd >= 0) {
802 		struct virtio_uml_device *vu_dev;
803 
804 		vu_dev = to_virtio_uml_device(vq->vdev);
805 
806 		um_free_irq(vu_dev->irq, vq);
807 		os_close_file(info->call_fd);
808 	}
809 
810 	if (info->kick_fd >= 0)
811 		os_close_file(info->kick_fd);
812 
813 	vring_del_virtqueue(vq);
814 	kfree(info);
815 }
816 
817 static void vu_del_vqs(struct virtio_device *vdev)
818 {
819 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
820 	struct virtqueue *vq, *n;
821 	u64 features;
822 
823 	/* Note: reverse order as a workaround to a decoding bug in snabb */
824 	list_for_each_entry_reverse(vq, &vdev->vqs, list)
825 		WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false));
826 
827 	/* Ensure previous messages have been processed */
828 	WARN_ON(vhost_user_get_features(vu_dev, &features));
829 
830 	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
831 		vu_del_vq(vq);
832 }
833 
834 static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
835 			       struct virtqueue *vq)
836 {
837 	struct virtio_uml_vq_info *info = vq->priv;
838 	int call_fds[2];
839 	int rc;
840 
841 	/* no call FD needed/desired in this case */
842 	if (vu_dev->protocol_features &
843 			BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
844 	    vu_dev->protocol_features &
845 			BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
846 		info->call_fd = -1;
847 		return 0;
848 	}
849 
850 	/* Use a pipe for call fd, since SIGIO is not supported for eventfd */
851 	rc = os_pipe(call_fds, true, true);
852 	if (rc < 0)
853 		return rc;
854 
855 	info->call_fd = call_fds[0];
856 	rc = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ,
857 			    vu_interrupt, IRQF_SHARED, info->name, vq);
858 	if (rc < 0)
859 		goto close_both;
860 
861 	rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]);
862 	if (rc)
863 		goto release_irq;
864 
865 	goto out;
866 
867 release_irq:
868 	um_free_irq(vu_dev->irq, vq);
869 close_both:
870 	os_close_file(call_fds[0]);
871 out:
872 	/* Close (unused) write end of call fds */
873 	os_close_file(call_fds[1]);
874 
875 	return rc;
876 }
877 
878 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
879 static void vu_defer_irq_handle(struct time_travel_event *d)
880 {
881 	struct virtio_uml_vq_info *info;
882 
883 	info = container_of(d, struct virtio_uml_vq_info, defer);
884 	info->callback(info->vq);
885 }
886 
887 static void vu_defer_irq_callback(struct virtqueue *vq)
888 {
889 	struct virtio_uml_vq_info *info = vq->priv;
890 
891 	time_travel_add_irq_event(&info->defer);
892 }
893 #endif
894 
895 static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
896 				     unsigned index, vq_callback_t *callback,
897 				     const char *name, bool ctx)
898 {
899 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
900 	struct platform_device *pdev = vu_dev->pdev;
901 	struct virtio_uml_vq_info *info;
902 	struct virtqueue *vq;
903 	int num = MAX_SUPPORTED_QUEUE_SIZE;
904 	int rc;
905 
906 	info = kzalloc(sizeof(*info), GFP_KERNEL);
907 	if (!info) {
908 		rc = -ENOMEM;
909 		goto error_kzalloc;
910 	}
911 	snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
912 		 pdev->id, name);
913 
914 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
915 	/*
916 	 * When we get an interrupt, we must bounce it through the simulation
917 	 * calendar (the simtime device), except for the simtime device itself
918 	 * since that's part of the simulation control.
919 	 */
920 	if (time_travel_mode == TT_MODE_EXTERNAL && callback) {
921 		info->callback = callback;
922 		callback = vu_defer_irq_callback;
923 		time_travel_set_event_fn(&info->defer, vu_defer_irq_handle);
924 	}
925 #endif
926 
927 	vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
928 				    ctx, vu_notify, callback, info->name);
929 	if (!vq) {
930 		rc = -ENOMEM;
931 		goto error_create;
932 	}
933 	vq->priv = info;
934 	num = virtqueue_get_vring_size(vq);
935 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
936 	info->vq = vq;
937 #endif
938 
939 	if (vu_dev->protocol_features &
940 			BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) {
941 		info->kick_fd = -1;
942 	} else {
943 		rc = os_eventfd(0, 0);
944 		if (rc < 0)
945 			goto error_kick;
946 		info->kick_fd = rc;
947 	}
948 
949 	rc = vu_setup_vq_call_fd(vu_dev, vq);
950 	if (rc)
951 		goto error_call;
952 
953 	rc = vhost_user_set_vring_num(vu_dev, index, num);
954 	if (rc)
955 		goto error_setup;
956 
957 	rc = vhost_user_set_vring_base(vu_dev, index, 0);
958 	if (rc)
959 		goto error_setup;
960 
961 	rc = vhost_user_set_vring_addr(vu_dev, index,
962 				       virtqueue_get_desc_addr(vq),
963 				       virtqueue_get_used_addr(vq),
964 				       virtqueue_get_avail_addr(vq),
965 				       (u64) -1);
966 	if (rc)
967 		goto error_setup;
968 
969 	return vq;
970 
971 error_setup:
972 	if (info->call_fd >= 0) {
973 		um_free_irq(vu_dev->irq, vq);
974 		os_close_file(info->call_fd);
975 	}
976 error_call:
977 	if (info->kick_fd >= 0)
978 		os_close_file(info->kick_fd);
979 error_kick:
980 	vring_del_virtqueue(vq);
981 error_create:
982 	kfree(info);
983 error_kzalloc:
984 	return ERR_PTR(rc);
985 }
986 
987 static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
988 		       struct virtqueue *vqs[], vq_callback_t *callbacks[],
989 		       const char * const names[], const bool *ctx,
990 		       struct irq_affinity *desc)
991 {
992 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
993 	int i, queue_idx = 0, rc;
994 	struct virtqueue *vq;
995 
996 	rc = vhost_user_set_mem_table(vu_dev);
997 	if (rc)
998 		return rc;
999 
1000 	for (i = 0; i < nvqs; ++i) {
1001 		if (!names[i]) {
1002 			vqs[i] = NULL;
1003 			continue;
1004 		}
1005 
1006 		vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
1007 				     ctx ? ctx[i] : false);
1008 		if (IS_ERR(vqs[i])) {
1009 			rc = PTR_ERR(vqs[i]);
1010 			goto error_setup;
1011 		}
1012 	}
1013 
1014 	list_for_each_entry(vq, &vdev->vqs, list) {
1015 		struct virtio_uml_vq_info *info = vq->priv;
1016 
1017 		if (info->kick_fd >= 0) {
1018 			rc = vhost_user_set_vring_kick(vu_dev, vq->index,
1019 						       info->kick_fd);
1020 			if (rc)
1021 				goto error_setup;
1022 		}
1023 
1024 		rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
1025 		if (rc)
1026 			goto error_setup;
1027 	}
1028 
1029 	return 0;
1030 
1031 error_setup:
1032 	vu_del_vqs(vdev);
1033 	return rc;
1034 }
1035 
1036 static u64 vu_get_features(struct virtio_device *vdev)
1037 {
1038 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1039 
1040 	return vu_dev->features;
1041 }
1042 
1043 static int vu_finalize_features(struct virtio_device *vdev)
1044 {
1045 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1046 	u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
1047 
1048 	vring_transport_features(vdev);
1049 	vu_dev->features = vdev->features | supported;
1050 
1051 	return vhost_user_set_features(vu_dev, vu_dev->features);
1052 }
1053 
1054 static const char *vu_bus_name(struct virtio_device *vdev)
1055 {
1056 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1057 
1058 	return vu_dev->pdev->name;
1059 }
1060 
1061 static const struct virtio_config_ops virtio_uml_config_ops = {
1062 	.get = vu_get,
1063 	.set = vu_set,
1064 	.get_status = vu_get_status,
1065 	.set_status = vu_set_status,
1066 	.reset = vu_reset,
1067 	.find_vqs = vu_find_vqs,
1068 	.del_vqs = vu_del_vqs,
1069 	.get_features = vu_get_features,
1070 	.finalize_features = vu_finalize_features,
1071 	.bus_name = vu_bus_name,
1072 };
1073 
1074 static void virtio_uml_release_dev(struct device *d)
1075 {
1076 	struct virtio_device *vdev =
1077 			container_of(d, struct virtio_device, dev);
1078 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1079 
1080 	/* might not have been opened due to not negotiating the feature */
1081 	if (vu_dev->req_fd >= 0) {
1082 		um_free_irq(vu_dev->irq, vu_dev);
1083 		os_close_file(vu_dev->req_fd);
1084 	}
1085 
1086 	os_close_file(vu_dev->sock);
1087 }
1088 
1089 /* Platform device */
1090 
1091 static int virtio_uml_probe(struct platform_device *pdev)
1092 {
1093 	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1094 	struct virtio_uml_device *vu_dev;
1095 	int rc;
1096 
1097 	if (!pdata)
1098 		return -EINVAL;
1099 
1100 	vu_dev = devm_kzalloc(&pdev->dev, sizeof(*vu_dev), GFP_KERNEL);
1101 	if (!vu_dev)
1102 		return -ENOMEM;
1103 
1104 	vu_dev->vdev.dev.parent = &pdev->dev;
1105 	vu_dev->vdev.dev.release = virtio_uml_release_dev;
1106 	vu_dev->vdev.config = &virtio_uml_config_ops;
1107 	vu_dev->vdev.id.device = pdata->virtio_device_id;
1108 	vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
1109 	vu_dev->pdev = pdev;
1110 	vu_dev->req_fd = -1;
1111 
1112 	do {
1113 		rc = os_connect_socket(pdata->socket_path);
1114 	} while (rc == -EINTR);
1115 	if (rc < 0)
1116 		return rc;
1117 	vu_dev->sock = rc;
1118 
1119 	spin_lock_init(&vu_dev->sock_lock);
1120 
1121 	rc = vhost_user_init(vu_dev);
1122 	if (rc)
1123 		goto error_init;
1124 
1125 	platform_set_drvdata(pdev, vu_dev);
1126 
1127 	rc = register_virtio_device(&vu_dev->vdev);
1128 	if (rc)
1129 		put_device(&vu_dev->vdev.dev);
1130 	vu_dev->registered = 1;
1131 	return rc;
1132 
1133 error_init:
1134 	os_close_file(vu_dev->sock);
1135 	return rc;
1136 }
1137 
1138 static int virtio_uml_remove(struct platform_device *pdev)
1139 {
1140 	struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1141 
1142 	unregister_virtio_device(&vu_dev->vdev);
1143 	return 0;
1144 }
1145 
1146 /* Command line device list */
1147 
1148 static void vu_cmdline_release_dev(struct device *d)
1149 {
1150 }
1151 
1152 static struct device vu_cmdline_parent = {
1153 	.init_name = "virtio-uml-cmdline",
1154 	.release = vu_cmdline_release_dev,
1155 };
1156 
1157 static bool vu_cmdline_parent_registered;
1158 static int vu_cmdline_id;
1159 
1160 static int vu_unregister_cmdline_device(struct device *dev, void *data)
1161 {
1162 	struct platform_device *pdev = to_platform_device(dev);
1163 	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1164 
1165 	kfree(pdata->socket_path);
1166 	platform_device_unregister(pdev);
1167 	return 0;
1168 }
1169 
1170 static void vu_conn_broken(struct work_struct *wk)
1171 {
1172 	struct virtio_uml_platform_data *pdata;
1173 
1174 	pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk);
1175 	vu_unregister_cmdline_device(&pdata->pdev->dev, NULL);
1176 }
1177 
1178 static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
1179 {
1180 	const char *ids = strchr(device, ':');
1181 	unsigned int virtio_device_id;
1182 	int processed, consumed, err;
1183 	char *socket_path;
1184 	struct virtio_uml_platform_data pdata, *ppdata;
1185 	struct platform_device *pdev;
1186 
1187 	if (!ids || ids == device)
1188 		return -EINVAL;
1189 
1190 	processed = sscanf(ids, ":%u%n:%d%n",
1191 			   &virtio_device_id, &consumed,
1192 			   &vu_cmdline_id, &consumed);
1193 
1194 	if (processed < 1 || ids[consumed])
1195 		return -EINVAL;
1196 
1197 	if (!vu_cmdline_parent_registered) {
1198 		err = device_register(&vu_cmdline_parent);
1199 		if (err) {
1200 			pr_err("Failed to register parent device!\n");
1201 			put_device(&vu_cmdline_parent);
1202 			return err;
1203 		}
1204 		vu_cmdline_parent_registered = true;
1205 	}
1206 
1207 	socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
1208 	if (!socket_path)
1209 		return -ENOMEM;
1210 
1211 	pdata.virtio_device_id = (u32) virtio_device_id;
1212 	pdata.socket_path = socket_path;
1213 
1214 	pr_info("Registering device virtio-uml.%d id=%d at %s\n",
1215 		vu_cmdline_id, virtio_device_id, socket_path);
1216 
1217 	pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml",
1218 					     vu_cmdline_id++, &pdata,
1219 					     sizeof(pdata));
1220 	err = PTR_ERR_OR_ZERO(pdev);
1221 	if (err)
1222 		goto free;
1223 
1224 	ppdata = pdev->dev.platform_data;
1225 	ppdata->pdev = pdev;
1226 	INIT_WORK(&ppdata->conn_broken_wk, vu_conn_broken);
1227 
1228 	return 0;
1229 
1230 free:
1231 	kfree(socket_path);
1232 	return err;
1233 }
1234 
1235 static int vu_cmdline_get_device(struct device *dev, void *data)
1236 {
1237 	struct platform_device *pdev = to_platform_device(dev);
1238 	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1239 	char *buffer = data;
1240 	unsigned int len = strlen(buffer);
1241 
1242 	snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
1243 		 pdata->socket_path, pdata->virtio_device_id, pdev->id);
1244 	return 0;
1245 }
1246 
1247 static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
1248 {
1249 	buffer[0] = '\0';
1250 	if (vu_cmdline_parent_registered)
1251 		device_for_each_child(&vu_cmdline_parent, buffer,
1252 				      vu_cmdline_get_device);
1253 	return strlen(buffer) + 1;
1254 }
1255 
1256 static const struct kernel_param_ops vu_cmdline_param_ops = {
1257 	.set = vu_cmdline_set,
1258 	.get = vu_cmdline_get,
1259 };
1260 
1261 device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
1262 __uml_help(vu_cmdline_param_ops,
1263 "virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n"
1264 "    Configure a virtio device over a vhost-user socket.\n"
1265 "    See virtio_ids.h for a list of possible virtio device id values.\n"
1266 "    Optionally use a specific platform_device id.\n\n"
1267 );
1268 
1269 
1270 static void vu_unregister_cmdline_devices(void)
1271 {
1272 	if (vu_cmdline_parent_registered) {
1273 		device_for_each_child(&vu_cmdline_parent, NULL,
1274 				      vu_unregister_cmdline_device);
1275 		device_unregister(&vu_cmdline_parent);
1276 		vu_cmdline_parent_registered = false;
1277 	}
1278 }
1279 
1280 /* Platform driver */
1281 
1282 static const struct of_device_id virtio_uml_match[] = {
1283 	{ .compatible = "virtio,uml", },
1284 	{ }
1285 };
1286 MODULE_DEVICE_TABLE(of, virtio_uml_match);
1287 
1288 static struct platform_driver virtio_uml_driver = {
1289 	.probe = virtio_uml_probe,
1290 	.remove = virtio_uml_remove,
1291 	.driver = {
1292 		.name = "virtio-uml",
1293 		.of_match_table = virtio_uml_match,
1294 	},
1295 };
1296 
1297 static int __init virtio_uml_init(void)
1298 {
1299 	return platform_driver_register(&virtio_uml_driver);
1300 }
1301 
1302 static void __exit virtio_uml_exit(void)
1303 {
1304 	platform_driver_unregister(&virtio_uml_driver);
1305 	vu_unregister_cmdline_devices();
1306 }
1307 
1308 module_init(virtio_uml_init);
1309 module_exit(virtio_uml_exit);
1310 __uml_exitcall(virtio_uml_exit);
1311 
1312 MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
1313 MODULE_LICENSE("GPL");
1314