xref: /openbmc/linux/arch/um/drivers/virtio_uml.c (revision 82df5b73)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Virtio vhost-user driver
4  *
5  * Copyright(c) 2019 Intel Corporation
6  *
7  * This driver allows virtio devices to be used over a vhost-user socket.
8  *
9  * Guest devices can be instantiated by kernel module or command line
10  * parameters. One device will be created for each parameter. Syntax:
11  *
12  *		virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]
13  * where:
14  *		<socket>	:= vhost-user socket path to connect
15  *		<virtio_id>	:= virtio device id (as in virtio_ids.h)
16  *		<platform_id>	:= (optional) platform device id
17  *
18  * example:
19  *		virtio_uml.device=/var/uml.socket:1
20  *
21  * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
22  */
23 #include <linux/module.h>
24 #include <linux/platform_device.h>
25 #include <linux/slab.h>
26 #include <linux/virtio.h>
27 #include <linux/virtio_config.h>
28 #include <linux/virtio_ring.h>
29 #include <linux/time-internal.h>
30 #include <shared/as-layout.h>
31 #include <irq_kern.h>
32 #include <init.h>
33 #include <os.h>
34 #include "vhost_user.h"
35 
36 /* Workaround due to a conflict between irq_user.h and irqreturn.h */
37 #ifdef IRQ_NONE
38 #undef IRQ_NONE
39 #endif
40 
41 #define MAX_SUPPORTED_QUEUE_SIZE	256
42 
43 #define to_virtio_uml_device(_vdev) \
44 	container_of(_vdev, struct virtio_uml_device, vdev)
45 
46 struct virtio_uml_platform_data {
47 	u32 virtio_device_id;
48 	const char *socket_path;
49 	struct work_struct conn_broken_wk;
50 	struct platform_device *pdev;
51 };
52 
53 struct virtio_uml_device {
54 	struct virtio_device vdev;
55 	struct platform_device *pdev;
56 
57 	spinlock_t sock_lock;
58 	int sock, req_fd;
59 	u64 features;
60 	u64 protocol_features;
61 	u8 status;
62 	u8 registered:1;
63 };
64 
65 struct virtio_uml_vq_info {
66 	int kick_fd, call_fd;
67 	char name[32];
68 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
69 	struct virtqueue *vq;
70 	vq_callback_t *callback;
71 	struct time_travel_event defer;
72 #endif
73 };
74 
75 extern unsigned long long physmem_size, highmem;
76 
77 #define vu_err(vu_dev, ...)	dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__)
78 
79 /* Vhost-user protocol */
80 
81 static int full_sendmsg_fds(int fd, const void *buf, unsigned int len,
82 			    const int *fds, unsigned int fds_num)
83 {
84 	int rc;
85 
86 	do {
87 		rc = os_sendmsg_fds(fd, buf, len, fds, fds_num);
88 		if (rc > 0) {
89 			buf += rc;
90 			len -= rc;
91 			fds = NULL;
92 			fds_num = 0;
93 		}
94 	} while (len && (rc >= 0 || rc == -EINTR));
95 
96 	if (rc < 0)
97 		return rc;
98 	return 0;
99 }
100 
101 static int full_read(int fd, void *buf, int len, bool abortable)
102 {
103 	int rc;
104 
105 	do {
106 		rc = os_read_file(fd, buf, len);
107 		if (rc > 0) {
108 			buf += rc;
109 			len -= rc;
110 		}
111 	} while (len && (rc > 0 || rc == -EINTR || (!abortable && rc == -EAGAIN)));
112 
113 	if (rc < 0)
114 		return rc;
115 	if (rc == 0)
116 		return -ECONNRESET;
117 	return 0;
118 }
119 
120 static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg)
121 {
122 	return full_read(fd, msg, sizeof(msg->header), true);
123 }
124 
125 static int vhost_user_recv(struct virtio_uml_device *vu_dev,
126 			   int fd, struct vhost_user_msg *msg,
127 			   size_t max_payload_size, bool wait)
128 {
129 	size_t size;
130 	int rc;
131 
132 	/*
133 	 * In virtio time-travel mode, we're handling all the vhost-user
134 	 * FDs by polling them whenever appropriate. However, we may get
135 	 * into a situation where we're sending out an interrupt message
136 	 * to a device (e.g. a net device) and need to handle a simulation
137 	 * time message while doing so, e.g. one that tells us to update
138 	 * our idea of how long we can run without scheduling.
139 	 *
140 	 * Thus, we need to not just read() from the given fd, but need
141 	 * to also handle messages for the simulation time - this function
142 	 * does that for us while waiting for the given fd to be readable.
143 	 */
144 	if (wait)
145 		time_travel_wait_readable(fd);
146 
147 	rc = vhost_user_recv_header(fd, msg);
148 
149 	if (rc == -ECONNRESET && vu_dev->registered) {
150 		struct virtio_uml_platform_data *pdata;
151 
152 		pdata = vu_dev->pdev->dev.platform_data;
153 
154 		virtio_break_device(&vu_dev->vdev);
155 		schedule_work(&pdata->conn_broken_wk);
156 	}
157 	if (rc)
158 		return rc;
159 	size = msg->header.size;
160 	if (size > max_payload_size)
161 		return -EPROTO;
162 	return full_read(fd, &msg->payload, size, false);
163 }
164 
165 static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev,
166 				struct vhost_user_msg *msg,
167 				size_t max_payload_size)
168 {
169 	int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg,
170 				 max_payload_size, true);
171 
172 	if (rc)
173 		return rc;
174 
175 	if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION))
176 		return -EPROTO;
177 
178 	return 0;
179 }
180 
181 static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
182 			       u64 *value)
183 {
184 	struct vhost_user_msg msg;
185 	int rc = vhost_user_recv_resp(vu_dev, &msg,
186 				      sizeof(msg.payload.integer));
187 
188 	if (rc)
189 		return rc;
190 	if (msg.header.size != sizeof(msg.payload.integer))
191 		return -EPROTO;
192 	*value = msg.payload.integer;
193 	return 0;
194 }
195 
196 static int vhost_user_recv_req(struct virtio_uml_device *vu_dev,
197 			       struct vhost_user_msg *msg,
198 			       size_t max_payload_size)
199 {
200 	int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg,
201 				 max_payload_size, false);
202 
203 	if (rc)
204 		return rc;
205 
206 	if ((msg->header.flags & ~VHOST_USER_FLAG_NEED_REPLY) !=
207 			VHOST_USER_VERSION)
208 		return -EPROTO;
209 
210 	return 0;
211 }
212 
213 static int vhost_user_send(struct virtio_uml_device *vu_dev,
214 			   bool need_response, struct vhost_user_msg *msg,
215 			   int *fds, size_t num_fds)
216 {
217 	size_t size = sizeof(msg->header) + msg->header.size;
218 	unsigned long flags;
219 	bool request_ack;
220 	int rc;
221 
222 	msg->header.flags |= VHOST_USER_VERSION;
223 
224 	/*
225 	 * The need_response flag indicates that we already need a response,
226 	 * e.g. to read the features. In these cases, don't request an ACK as
227 	 * it is meaningless. Also request an ACK only if supported.
228 	 */
229 	request_ack = !need_response;
230 	if (!(vu_dev->protocol_features &
231 			BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK)))
232 		request_ack = false;
233 
234 	if (request_ack)
235 		msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY;
236 
237 	spin_lock_irqsave(&vu_dev->sock_lock, flags);
238 	rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds);
239 	if (rc < 0)
240 		goto out;
241 
242 	if (request_ack) {
243 		uint64_t status;
244 
245 		rc = vhost_user_recv_u64(vu_dev, &status);
246 		if (rc)
247 			goto out;
248 
249 		if (status) {
250 			vu_err(vu_dev, "slave reports error: %llu\n", status);
251 			rc = -EIO;
252 			goto out;
253 		}
254 	}
255 
256 out:
257 	spin_unlock_irqrestore(&vu_dev->sock_lock, flags);
258 	return rc;
259 }
260 
261 static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
262 				      bool need_response, u32 request)
263 {
264 	struct vhost_user_msg msg = {
265 		.header.request = request,
266 	};
267 
268 	return vhost_user_send(vu_dev, need_response, &msg, NULL, 0);
269 }
270 
271 static int vhost_user_send_no_payload_fd(struct virtio_uml_device *vu_dev,
272 					 u32 request, int fd)
273 {
274 	struct vhost_user_msg msg = {
275 		.header.request = request,
276 	};
277 
278 	return vhost_user_send(vu_dev, false, &msg, &fd, 1);
279 }
280 
281 static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
282 			       u32 request, u64 value)
283 {
284 	struct vhost_user_msg msg = {
285 		.header.request = request,
286 		.header.size = sizeof(msg.payload.integer),
287 		.payload.integer = value,
288 	};
289 
290 	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
291 }
292 
293 static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
294 {
295 	return vhost_user_send_no_payload(vu_dev, false, VHOST_USER_SET_OWNER);
296 }
297 
298 static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
299 				   u64 *features)
300 {
301 	int rc = vhost_user_send_no_payload(vu_dev, true,
302 					    VHOST_USER_GET_FEATURES);
303 
304 	if (rc)
305 		return rc;
306 	return vhost_user_recv_u64(vu_dev, features);
307 }
308 
309 static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
310 				   u64 features)
311 {
312 	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features);
313 }
314 
315 static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev,
316 					    u64 *protocol_features)
317 {
318 	int rc = vhost_user_send_no_payload(vu_dev, true,
319 			VHOST_USER_GET_PROTOCOL_FEATURES);
320 
321 	if (rc)
322 		return rc;
323 	return vhost_user_recv_u64(vu_dev, protocol_features);
324 }
325 
326 static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev,
327 					    u64 protocol_features)
328 {
329 	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES,
330 				   protocol_features);
331 }
332 
333 static void vhost_user_reply(struct virtio_uml_device *vu_dev,
334 			     struct vhost_user_msg *msg, int response)
335 {
336 	struct vhost_user_msg reply = {
337 		.payload.integer = response,
338 	};
339 	size_t size = sizeof(reply.header) + sizeof(reply.payload.integer);
340 	int rc;
341 
342 	reply.header = msg->header;
343 	reply.header.flags &= ~VHOST_USER_FLAG_NEED_REPLY;
344 	reply.header.flags |= VHOST_USER_FLAG_REPLY;
345 	reply.header.size = sizeof(reply.payload.integer);
346 
347 	rc = full_sendmsg_fds(vu_dev->req_fd, &reply, size, NULL, 0);
348 
349 	if (rc)
350 		vu_err(vu_dev,
351 		       "sending reply to slave request failed: %d (size %zu)\n",
352 		       rc, size);
353 }
354 
355 static irqreturn_t vu_req_interrupt(int irq, void *data)
356 {
357 	struct virtio_uml_device *vu_dev = data;
358 	struct virtqueue *vq;
359 	int response = 1;
360 	struct {
361 		struct vhost_user_msg msg;
362 		u8 extra_payload[512];
363 	} msg;
364 	int rc;
365 
366 	rc = vhost_user_recv_req(vu_dev, &msg.msg,
367 				 sizeof(msg.msg.payload) +
368 				 sizeof(msg.extra_payload));
369 
370 	if (rc)
371 		return IRQ_NONE;
372 
373 	switch (msg.msg.header.request) {
374 	case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG:
375 		virtio_config_changed(&vu_dev->vdev);
376 		response = 0;
377 		break;
378 	case VHOST_USER_SLAVE_VRING_CALL:
379 		virtio_device_for_each_vq((&vu_dev->vdev), vq) {
380 			if (vq->index == msg.msg.payload.vring_state.index) {
381 				response = 0;
382 				vring_interrupt(0 /* ignored */, vq);
383 				break;
384 			}
385 		}
386 		break;
387 	case VHOST_USER_SLAVE_IOTLB_MSG:
388 		/* not supported - VIRTIO_F_IOMMU_PLATFORM */
389 	case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
390 		/* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */
391 	default:
392 		vu_err(vu_dev, "unexpected slave request %d\n",
393 		       msg.msg.header.request);
394 	}
395 
396 	if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY)
397 		vhost_user_reply(vu_dev, &msg.msg, response);
398 
399 	return IRQ_HANDLED;
400 }
401 
402 static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev)
403 {
404 	int rc, req_fds[2];
405 
406 	/* Use a pipe for slave req fd, SIGIO is not supported for eventfd */
407 	rc = os_pipe(req_fds, true, true);
408 	if (rc < 0)
409 		return rc;
410 	vu_dev->req_fd = req_fds[0];
411 
412 	rc = um_request_irq(VIRTIO_IRQ, vu_dev->req_fd, IRQ_READ,
413 			    vu_req_interrupt, IRQF_SHARED,
414 			    vu_dev->pdev->name, vu_dev);
415 	if (rc)
416 		goto err_close;
417 
418 	rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD,
419 					   req_fds[1]);
420 	if (rc)
421 		goto err_free_irq;
422 
423 	goto out;
424 
425 err_free_irq:
426 	um_free_irq(VIRTIO_IRQ, vu_dev);
427 err_close:
428 	os_close_file(req_fds[0]);
429 out:
430 	/* Close unused write end of request fds */
431 	os_close_file(req_fds[1]);
432 	return rc;
433 }
434 
435 static int vhost_user_init(struct virtio_uml_device *vu_dev)
436 {
437 	int rc = vhost_user_set_owner(vu_dev);
438 
439 	if (rc)
440 		return rc;
441 	rc = vhost_user_get_features(vu_dev, &vu_dev->features);
442 	if (rc)
443 		return rc;
444 
445 	if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
446 		rc = vhost_user_get_protocol_features(vu_dev,
447 				&vu_dev->protocol_features);
448 		if (rc)
449 			return rc;
450 		vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
451 		rc = vhost_user_set_protocol_features(vu_dev,
452 				vu_dev->protocol_features);
453 		if (rc)
454 			return rc;
455 	}
456 
457 	if (vu_dev->protocol_features &
458 			BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
459 		rc = vhost_user_init_slave_req(vu_dev);
460 		if (rc)
461 			return rc;
462 	}
463 
464 	return 0;
465 }
466 
467 static void vhost_user_get_config(struct virtio_uml_device *vu_dev,
468 				  u32 offset, void *buf, u32 len)
469 {
470 	u32 cfg_size = offset + len;
471 	struct vhost_user_msg *msg;
472 	size_t payload_size = sizeof(msg->payload.config) + cfg_size;
473 	size_t msg_size = sizeof(msg->header) + payload_size;
474 	int rc;
475 
476 	if (!(vu_dev->protocol_features &
477 	      BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
478 		return;
479 
480 	msg = kzalloc(msg_size, GFP_KERNEL);
481 	if (!msg)
482 		return;
483 	msg->header.request = VHOST_USER_GET_CONFIG;
484 	msg->header.size = payload_size;
485 	msg->payload.config.offset = 0;
486 	msg->payload.config.size = cfg_size;
487 
488 	rc = vhost_user_send(vu_dev, true, msg, NULL, 0);
489 	if (rc) {
490 		vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n",
491 		       rc);
492 		goto free;
493 	}
494 
495 	rc = vhost_user_recv_resp(vu_dev, msg, msg_size);
496 	if (rc) {
497 		vu_err(vu_dev,
498 		       "receiving VHOST_USER_GET_CONFIG response failed: %d\n",
499 		       rc);
500 		goto free;
501 	}
502 
503 	if (msg->header.size != payload_size ||
504 	    msg->payload.config.size != cfg_size) {
505 		rc = -EPROTO;
506 		vu_err(vu_dev,
507 		       "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n",
508 		       msg->header.size, payload_size,
509 		       msg->payload.config.size, cfg_size);
510 		goto free;
511 	}
512 	memcpy(buf, msg->payload.config.payload + offset, len);
513 
514 free:
515 	kfree(msg);
516 }
517 
518 static void vhost_user_set_config(struct virtio_uml_device *vu_dev,
519 				  u32 offset, const void *buf, u32 len)
520 {
521 	struct vhost_user_msg *msg;
522 	size_t payload_size = sizeof(msg->payload.config) + len;
523 	size_t msg_size = sizeof(msg->header) + payload_size;
524 	int rc;
525 
526 	if (!(vu_dev->protocol_features &
527 	      BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
528 		return;
529 
530 	msg = kzalloc(msg_size, GFP_KERNEL);
531 	if (!msg)
532 		return;
533 	msg->header.request = VHOST_USER_SET_CONFIG;
534 	msg->header.size = payload_size;
535 	msg->payload.config.offset = offset;
536 	msg->payload.config.size = len;
537 	memcpy(msg->payload.config.payload, buf, len);
538 
539 	rc = vhost_user_send(vu_dev, false, msg, NULL, 0);
540 	if (rc)
541 		vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n",
542 		       rc);
543 
544 	kfree(msg);
545 }
546 
547 static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
548 				      struct vhost_user_mem_region *region_out)
549 {
550 	unsigned long long mem_offset;
551 	int rc = phys_mapping(addr, &mem_offset);
552 
553 	if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc))
554 		return -EFAULT;
555 	*fd_out = rc;
556 	region_out->guest_addr = addr;
557 	region_out->user_addr = addr;
558 	region_out->size = size;
559 	region_out->mmap_offset = mem_offset;
560 
561 	/* Ensure mapping is valid for the entire region */
562 	rc = phys_mapping(addr + size - 1, &mem_offset);
563 	if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n",
564 		 addr + size - 1, rc, *fd_out))
565 		return -EFAULT;
566 	return 0;
567 }
568 
569 static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
570 {
571 	struct vhost_user_msg msg = {
572 		.header.request = VHOST_USER_SET_MEM_TABLE,
573 		.header.size = sizeof(msg.payload.mem_regions),
574 		.payload.mem_regions.num = 1,
575 	};
576 	unsigned long reserved = uml_reserved - uml_physmem;
577 	int fds[2];
578 	int rc;
579 
580 	/*
581 	 * This is a bit tricky, see also the comment with setup_physmem().
582 	 *
583 	 * Essentially, setup_physmem() uses a file to mmap() our physmem,
584 	 * but the code and data we *already* have is omitted. To us, this
585 	 * is no difference, since they both become part of our address
586 	 * space and memory consumption. To somebody looking in from the
587 	 * outside, however, it is different because the part of our memory
588 	 * consumption that's already part of the binary (code/data) is not
589 	 * mapped from the file, so it's not visible to another mmap from
590 	 * the file descriptor.
591 	 *
592 	 * Thus, don't advertise this space to the vhost-user slave. This
593 	 * means that the slave will likely abort or similar when we give
594 	 * it an address from the hidden range, since it's not marked as
595 	 * a valid address, but at least that way we detect the issue and
596 	 * don't just have the slave read an all-zeroes buffer from the
597 	 * shared memory file, or write something there that we can never
598 	 * see (depending on the direction of the virtqueue traffic.)
599 	 *
600 	 * Since we usually don't want to use .text for virtio buffers,
601 	 * this effectively means that you cannot use
602 	 *  1) global variables, which are in the .bss and not in the shm
603 	 *     file-backed memory
604 	 *  2) the stack in some processes, depending on where they have
605 	 *     their stack (or maybe only no interrupt stack?)
606 	 *
607 	 * The stack is already not typically valid for DMA, so this isn't
608 	 * much of a restriction, but global variables might be encountered.
609 	 *
610 	 * It might be possible to fix it by copying around the data that's
611 	 * between bss_start and where we map the file now, but it's not
612 	 * something that you typically encounter with virtio drivers, so
613 	 * it didn't seem worthwhile.
614 	 */
615 	rc = vhost_user_init_mem_region(reserved, physmem_size - reserved,
616 					&fds[0],
617 					&msg.payload.mem_regions.regions[0]);
618 
619 	if (rc < 0)
620 		return rc;
621 	if (highmem) {
622 		msg.payload.mem_regions.num++;
623 		rc = vhost_user_init_mem_region(__pa(end_iomem), highmem,
624 				&fds[1], &msg.payload.mem_regions.regions[1]);
625 		if (rc < 0)
626 			return rc;
627 	}
628 
629 	return vhost_user_send(vu_dev, false, &msg, fds,
630 			       msg.payload.mem_regions.num);
631 }
632 
633 static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev,
634 				      u32 request, u32 index, u32 num)
635 {
636 	struct vhost_user_msg msg = {
637 		.header.request = request,
638 		.header.size = sizeof(msg.payload.vring_state),
639 		.payload.vring_state.index = index,
640 		.payload.vring_state.num = num,
641 	};
642 
643 	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
644 }
645 
646 static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
647 				    u32 index, u32 num)
648 {
649 	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM,
650 					  index, num);
651 }
652 
653 static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
654 				     u32 index, u32 offset)
655 {
656 	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE,
657 					  index, offset);
658 }
659 
660 static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
661 				     u32 index, u64 desc, u64 used, u64 avail,
662 				     u64 log)
663 {
664 	struct vhost_user_msg msg = {
665 		.header.request = VHOST_USER_SET_VRING_ADDR,
666 		.header.size = sizeof(msg.payload.vring_addr),
667 		.payload.vring_addr.index = index,
668 		.payload.vring_addr.desc = desc,
669 		.payload.vring_addr.used = used,
670 		.payload.vring_addr.avail = avail,
671 		.payload.vring_addr.log = log,
672 	};
673 
674 	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
675 }
676 
677 static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
678 				   u32 request, int index, int fd)
679 {
680 	struct vhost_user_msg msg = {
681 		.header.request = request,
682 		.header.size = sizeof(msg.payload.integer),
683 		.payload.integer = index,
684 	};
685 
686 	if (index & ~VHOST_USER_VRING_INDEX_MASK)
687 		return -EINVAL;
688 	if (fd < 0) {
689 		msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
690 		return vhost_user_send(vu_dev, false, &msg, NULL, 0);
691 	}
692 	return vhost_user_send(vu_dev, false, &msg, &fd, 1);
693 }
694 
695 static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
696 				     int index, int fd)
697 {
698 	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
699 				       index, fd);
700 }
701 
702 static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
703 				     int index, int fd)
704 {
705 	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
706 				       index, fd);
707 }
708 
709 static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev,
710 				       u32 index, bool enable)
711 {
712 	if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
713 		return 0;
714 
715 	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE,
716 					  index, enable);
717 }
718 
719 
720 /* Virtio interface */
721 
722 static bool vu_notify(struct virtqueue *vq)
723 {
724 	struct virtio_uml_vq_info *info = vq->priv;
725 	const uint64_t n = 1;
726 	int rc;
727 
728 	time_travel_propagate_time();
729 
730 	if (info->kick_fd < 0) {
731 		struct virtio_uml_device *vu_dev;
732 
733 		vu_dev = to_virtio_uml_device(vq->vdev);
734 
735 		return vhost_user_set_vring_state(vu_dev, VHOST_USER_VRING_KICK,
736 						  vq->index, 0) == 0;
737 	}
738 
739 	do {
740 		rc = os_write_file(info->kick_fd, &n, sizeof(n));
741 	} while (rc == -EINTR);
742 	return !WARN(rc != sizeof(n), "write returned %d\n", rc);
743 }
744 
745 static irqreturn_t vu_interrupt(int irq, void *opaque)
746 {
747 	struct virtqueue *vq = opaque;
748 	struct virtio_uml_vq_info *info = vq->priv;
749 	uint64_t n;
750 	int rc;
751 	irqreturn_t ret = IRQ_NONE;
752 
753 	do {
754 		rc = os_read_file(info->call_fd, &n, sizeof(n));
755 		if (rc == sizeof(n))
756 			ret |= vring_interrupt(irq, vq);
757 	} while (rc == sizeof(n) || rc == -EINTR);
758 	WARN(rc != -EAGAIN, "read returned %d\n", rc);
759 	return ret;
760 }
761 
762 
763 static void vu_get(struct virtio_device *vdev, unsigned offset,
764 		   void *buf, unsigned len)
765 {
766 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
767 
768 	vhost_user_get_config(vu_dev, offset, buf, len);
769 }
770 
771 static void vu_set(struct virtio_device *vdev, unsigned offset,
772 		   const void *buf, unsigned len)
773 {
774 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
775 
776 	vhost_user_set_config(vu_dev, offset, buf, len);
777 }
778 
779 static u8 vu_get_status(struct virtio_device *vdev)
780 {
781 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
782 
783 	return vu_dev->status;
784 }
785 
786 static void vu_set_status(struct virtio_device *vdev, u8 status)
787 {
788 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
789 
790 	vu_dev->status = status;
791 }
792 
793 static void vu_reset(struct virtio_device *vdev)
794 {
795 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
796 
797 	vu_dev->status = 0;
798 }
799 
800 static void vu_del_vq(struct virtqueue *vq)
801 {
802 	struct virtio_uml_vq_info *info = vq->priv;
803 
804 	if (info->call_fd >= 0) {
805 		um_free_irq(VIRTIO_IRQ, vq);
806 		os_close_file(info->call_fd);
807 	}
808 
809 	if (info->kick_fd >= 0)
810 		os_close_file(info->kick_fd);
811 
812 	vring_del_virtqueue(vq);
813 	kfree(info);
814 }
815 
816 static void vu_del_vqs(struct virtio_device *vdev)
817 {
818 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
819 	struct virtqueue *vq, *n;
820 	u64 features;
821 
822 	/* Note: reverse order as a workaround to a decoding bug in snabb */
823 	list_for_each_entry_reverse(vq, &vdev->vqs, list)
824 		WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false));
825 
826 	/* Ensure previous messages have been processed */
827 	WARN_ON(vhost_user_get_features(vu_dev, &features));
828 
829 	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
830 		vu_del_vq(vq);
831 }
832 
833 static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
834 			       struct virtqueue *vq)
835 {
836 	struct virtio_uml_vq_info *info = vq->priv;
837 	int call_fds[2];
838 	int rc;
839 
840 	/* no call FD needed/desired in this case */
841 	if (vu_dev->protocol_features &
842 			BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
843 	    vu_dev->protocol_features &
844 			BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
845 		info->call_fd = -1;
846 		return 0;
847 	}
848 
849 	/* Use a pipe for call fd, since SIGIO is not supported for eventfd */
850 	rc = os_pipe(call_fds, true, true);
851 	if (rc < 0)
852 		return rc;
853 
854 	info->call_fd = call_fds[0];
855 	rc = um_request_irq(VIRTIO_IRQ, info->call_fd, IRQ_READ,
856 			    vu_interrupt, IRQF_SHARED, info->name, vq);
857 	if (rc)
858 		goto close_both;
859 
860 	rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]);
861 	if (rc)
862 		goto release_irq;
863 
864 	goto out;
865 
866 release_irq:
867 	um_free_irq(VIRTIO_IRQ, vq);
868 close_both:
869 	os_close_file(call_fds[0]);
870 out:
871 	/* Close (unused) write end of call fds */
872 	os_close_file(call_fds[1]);
873 
874 	return rc;
875 }
876 
877 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
878 static void vu_defer_irq_handle(struct time_travel_event *d)
879 {
880 	struct virtio_uml_vq_info *info;
881 
882 	info = container_of(d, struct virtio_uml_vq_info, defer);
883 	info->callback(info->vq);
884 }
885 
886 static void vu_defer_irq_callback(struct virtqueue *vq)
887 {
888 	struct virtio_uml_vq_info *info = vq->priv;
889 
890 	time_travel_add_irq_event(&info->defer);
891 }
892 #endif
893 
894 static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
895 				     unsigned index, vq_callback_t *callback,
896 				     const char *name, bool ctx)
897 {
898 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
899 	struct platform_device *pdev = vu_dev->pdev;
900 	struct virtio_uml_vq_info *info;
901 	struct virtqueue *vq;
902 	int num = MAX_SUPPORTED_QUEUE_SIZE;
903 	int rc;
904 
905 	info = kzalloc(sizeof(*info), GFP_KERNEL);
906 	if (!info) {
907 		rc = -ENOMEM;
908 		goto error_kzalloc;
909 	}
910 	snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
911 		 pdev->id, name);
912 
913 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
914 	/*
915 	 * When we get an interrupt, we must bounce it through the simulation
916 	 * calendar (the simtime device), except for the simtime device itself
917 	 * since that's part of the simulation control.
918 	 */
919 	if (time_travel_mode == TT_MODE_EXTERNAL && callback) {
920 		info->callback = callback;
921 		callback = vu_defer_irq_callback;
922 		time_travel_set_event_fn(&info->defer, vu_defer_irq_handle);
923 	}
924 #endif
925 
926 	vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
927 				    ctx, vu_notify, callback, info->name);
928 	if (!vq) {
929 		rc = -ENOMEM;
930 		goto error_create;
931 	}
932 	vq->priv = info;
933 	num = virtqueue_get_vring_size(vq);
934 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
935 	info->vq = vq;
936 #endif
937 
938 	if (vu_dev->protocol_features &
939 			BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) {
940 		info->kick_fd = -1;
941 	} else {
942 		rc = os_eventfd(0, 0);
943 		if (rc < 0)
944 			goto error_kick;
945 		info->kick_fd = rc;
946 	}
947 
948 	rc = vu_setup_vq_call_fd(vu_dev, vq);
949 	if (rc)
950 		goto error_call;
951 
952 	rc = vhost_user_set_vring_num(vu_dev, index, num);
953 	if (rc)
954 		goto error_setup;
955 
956 	rc = vhost_user_set_vring_base(vu_dev, index, 0);
957 	if (rc)
958 		goto error_setup;
959 
960 	rc = vhost_user_set_vring_addr(vu_dev, index,
961 				       virtqueue_get_desc_addr(vq),
962 				       virtqueue_get_used_addr(vq),
963 				       virtqueue_get_avail_addr(vq),
964 				       (u64) -1);
965 	if (rc)
966 		goto error_setup;
967 
968 	return vq;
969 
970 error_setup:
971 	if (info->call_fd >= 0) {
972 		um_free_irq(VIRTIO_IRQ, vq);
973 		os_close_file(info->call_fd);
974 	}
975 error_call:
976 	if (info->kick_fd >= 0)
977 		os_close_file(info->kick_fd);
978 error_kick:
979 	vring_del_virtqueue(vq);
980 error_create:
981 	kfree(info);
982 error_kzalloc:
983 	return ERR_PTR(rc);
984 }
985 
986 static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
987 		       struct virtqueue *vqs[], vq_callback_t *callbacks[],
988 		       const char * const names[], const bool *ctx,
989 		       struct irq_affinity *desc)
990 {
991 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
992 	int i, queue_idx = 0, rc;
993 	struct virtqueue *vq;
994 
995 	rc = vhost_user_set_mem_table(vu_dev);
996 	if (rc)
997 		return rc;
998 
999 	for (i = 0; i < nvqs; ++i) {
1000 		if (!names[i]) {
1001 			vqs[i] = NULL;
1002 			continue;
1003 		}
1004 
1005 		vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
1006 				     ctx ? ctx[i] : false);
1007 		if (IS_ERR(vqs[i])) {
1008 			rc = PTR_ERR(vqs[i]);
1009 			goto error_setup;
1010 		}
1011 	}
1012 
1013 	list_for_each_entry(vq, &vdev->vqs, list) {
1014 		struct virtio_uml_vq_info *info = vq->priv;
1015 
1016 		if (info->kick_fd >= 0) {
1017 			rc = vhost_user_set_vring_kick(vu_dev, vq->index,
1018 						       info->kick_fd);
1019 			if (rc)
1020 				goto error_setup;
1021 		}
1022 
1023 		rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
1024 		if (rc)
1025 			goto error_setup;
1026 	}
1027 
1028 	return 0;
1029 
1030 error_setup:
1031 	vu_del_vqs(vdev);
1032 	return rc;
1033 }
1034 
1035 static u64 vu_get_features(struct virtio_device *vdev)
1036 {
1037 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1038 
1039 	return vu_dev->features;
1040 }
1041 
1042 static int vu_finalize_features(struct virtio_device *vdev)
1043 {
1044 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1045 	u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
1046 
1047 	vring_transport_features(vdev);
1048 	vu_dev->features = vdev->features | supported;
1049 
1050 	return vhost_user_set_features(vu_dev, vu_dev->features);
1051 }
1052 
1053 static const char *vu_bus_name(struct virtio_device *vdev)
1054 {
1055 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1056 
1057 	return vu_dev->pdev->name;
1058 }
1059 
1060 static const struct virtio_config_ops virtio_uml_config_ops = {
1061 	.get = vu_get,
1062 	.set = vu_set,
1063 	.get_status = vu_get_status,
1064 	.set_status = vu_set_status,
1065 	.reset = vu_reset,
1066 	.find_vqs = vu_find_vqs,
1067 	.del_vqs = vu_del_vqs,
1068 	.get_features = vu_get_features,
1069 	.finalize_features = vu_finalize_features,
1070 	.bus_name = vu_bus_name,
1071 };
1072 
1073 static void virtio_uml_release_dev(struct device *d)
1074 {
1075 	struct virtio_device *vdev =
1076 			container_of(d, struct virtio_device, dev);
1077 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1078 
1079 	/* might not have been opened due to not negotiating the feature */
1080 	if (vu_dev->req_fd >= 0) {
1081 		um_free_irq(VIRTIO_IRQ, vu_dev);
1082 		os_close_file(vu_dev->req_fd);
1083 	}
1084 
1085 	os_close_file(vu_dev->sock);
1086 }
1087 
1088 /* Platform device */
1089 
1090 static int virtio_uml_probe(struct platform_device *pdev)
1091 {
1092 	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1093 	struct virtio_uml_device *vu_dev;
1094 	int rc;
1095 
1096 	if (!pdata)
1097 		return -EINVAL;
1098 
1099 	vu_dev = devm_kzalloc(&pdev->dev, sizeof(*vu_dev), GFP_KERNEL);
1100 	if (!vu_dev)
1101 		return -ENOMEM;
1102 
1103 	vu_dev->vdev.dev.parent = &pdev->dev;
1104 	vu_dev->vdev.dev.release = virtio_uml_release_dev;
1105 	vu_dev->vdev.config = &virtio_uml_config_ops;
1106 	vu_dev->vdev.id.device = pdata->virtio_device_id;
1107 	vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
1108 	vu_dev->pdev = pdev;
1109 	vu_dev->req_fd = -1;
1110 
1111 	do {
1112 		rc = os_connect_socket(pdata->socket_path);
1113 	} while (rc == -EINTR);
1114 	if (rc < 0)
1115 		return rc;
1116 	vu_dev->sock = rc;
1117 
1118 	spin_lock_init(&vu_dev->sock_lock);
1119 
1120 	rc = vhost_user_init(vu_dev);
1121 	if (rc)
1122 		goto error_init;
1123 
1124 	platform_set_drvdata(pdev, vu_dev);
1125 
1126 	rc = register_virtio_device(&vu_dev->vdev);
1127 	if (rc)
1128 		put_device(&vu_dev->vdev.dev);
1129 	vu_dev->registered = 1;
1130 	return rc;
1131 
1132 error_init:
1133 	os_close_file(vu_dev->sock);
1134 	return rc;
1135 }
1136 
1137 static int virtio_uml_remove(struct platform_device *pdev)
1138 {
1139 	struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1140 
1141 	unregister_virtio_device(&vu_dev->vdev);
1142 	return 0;
1143 }
1144 
1145 /* Command line device list */
1146 
1147 static void vu_cmdline_release_dev(struct device *d)
1148 {
1149 }
1150 
1151 static struct device vu_cmdline_parent = {
1152 	.init_name = "virtio-uml-cmdline",
1153 	.release = vu_cmdline_release_dev,
1154 };
1155 
1156 static bool vu_cmdline_parent_registered;
1157 static int vu_cmdline_id;
1158 
1159 static int vu_unregister_cmdline_device(struct device *dev, void *data)
1160 {
1161 	struct platform_device *pdev = to_platform_device(dev);
1162 	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1163 
1164 	kfree(pdata->socket_path);
1165 	platform_device_unregister(pdev);
1166 	return 0;
1167 }
1168 
1169 static void vu_conn_broken(struct work_struct *wk)
1170 {
1171 	struct virtio_uml_platform_data *pdata;
1172 
1173 	pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk);
1174 	vu_unregister_cmdline_device(&pdata->pdev->dev, NULL);
1175 }
1176 
1177 static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
1178 {
1179 	const char *ids = strchr(device, ':');
1180 	unsigned int virtio_device_id;
1181 	int processed, consumed, err;
1182 	char *socket_path;
1183 	struct virtio_uml_platform_data pdata, *ppdata;
1184 	struct platform_device *pdev;
1185 
1186 	if (!ids || ids == device)
1187 		return -EINVAL;
1188 
1189 	processed = sscanf(ids, ":%u%n:%d%n",
1190 			   &virtio_device_id, &consumed,
1191 			   &vu_cmdline_id, &consumed);
1192 
1193 	if (processed < 1 || ids[consumed])
1194 		return -EINVAL;
1195 
1196 	if (!vu_cmdline_parent_registered) {
1197 		err = device_register(&vu_cmdline_parent);
1198 		if (err) {
1199 			pr_err("Failed to register parent device!\n");
1200 			put_device(&vu_cmdline_parent);
1201 			return err;
1202 		}
1203 		vu_cmdline_parent_registered = true;
1204 	}
1205 
1206 	socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
1207 	if (!socket_path)
1208 		return -ENOMEM;
1209 
1210 	pdata.virtio_device_id = (u32) virtio_device_id;
1211 	pdata.socket_path = socket_path;
1212 
1213 	pr_info("Registering device virtio-uml.%d id=%d at %s\n",
1214 		vu_cmdline_id, virtio_device_id, socket_path);
1215 
1216 	pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml",
1217 					     vu_cmdline_id++, &pdata,
1218 					     sizeof(pdata));
1219 	err = PTR_ERR_OR_ZERO(pdev);
1220 	if (err)
1221 		goto free;
1222 
1223 	ppdata = pdev->dev.platform_data;
1224 	ppdata->pdev = pdev;
1225 	INIT_WORK(&ppdata->conn_broken_wk, vu_conn_broken);
1226 
1227 	return 0;
1228 
1229 free:
1230 	kfree(socket_path);
1231 	return err;
1232 }
1233 
1234 static int vu_cmdline_get_device(struct device *dev, void *data)
1235 {
1236 	struct platform_device *pdev = to_platform_device(dev);
1237 	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1238 	char *buffer = data;
1239 	unsigned int len = strlen(buffer);
1240 
1241 	snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
1242 		 pdata->socket_path, pdata->virtio_device_id, pdev->id);
1243 	return 0;
1244 }
1245 
1246 static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
1247 {
1248 	buffer[0] = '\0';
1249 	if (vu_cmdline_parent_registered)
1250 		device_for_each_child(&vu_cmdline_parent, buffer,
1251 				      vu_cmdline_get_device);
1252 	return strlen(buffer) + 1;
1253 }
1254 
1255 static const struct kernel_param_ops vu_cmdline_param_ops = {
1256 	.set = vu_cmdline_set,
1257 	.get = vu_cmdline_get,
1258 };
1259 
1260 device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
1261 __uml_help(vu_cmdline_param_ops,
1262 "virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n"
1263 "    Configure a virtio device over a vhost-user socket.\n"
1264 "    See virtio_ids.h for a list of possible virtio device id values.\n"
1265 "    Optionally use a specific platform_device id.\n\n"
1266 );
1267 
1268 
1269 static void vu_unregister_cmdline_devices(void)
1270 {
1271 	if (vu_cmdline_parent_registered) {
1272 		device_for_each_child(&vu_cmdline_parent, NULL,
1273 				      vu_unregister_cmdline_device);
1274 		device_unregister(&vu_cmdline_parent);
1275 		vu_cmdline_parent_registered = false;
1276 	}
1277 }
1278 
1279 /* Platform driver */
1280 
1281 static const struct of_device_id virtio_uml_match[] = {
1282 	{ .compatible = "virtio,uml", },
1283 	{ }
1284 };
1285 MODULE_DEVICE_TABLE(of, virtio_uml_match);
1286 
1287 static struct platform_driver virtio_uml_driver = {
1288 	.probe = virtio_uml_probe,
1289 	.remove = virtio_uml_remove,
1290 	.driver = {
1291 		.name = "virtio-uml",
1292 		.of_match_table = virtio_uml_match,
1293 	},
1294 };
1295 
1296 static int __init virtio_uml_init(void)
1297 {
1298 	return platform_driver_register(&virtio_uml_driver);
1299 }
1300 
1301 static void __exit virtio_uml_exit(void)
1302 {
1303 	platform_driver_unregister(&virtio_uml_driver);
1304 	vu_unregister_cmdline_devices();
1305 }
1306 
1307 module_init(virtio_uml_init);
1308 module_exit(virtio_uml_exit);
1309 __uml_exitcall(virtio_uml_exit);
1310 
1311 MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
1312 MODULE_LICENSE("GPL");
1313