xref: /openbmc/linux/arch/um/drivers/virtio_uml.c (revision fcbd8037f7df694aa7bfb7ce82c0c7f5e53e7b7b)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Virtio vhost-user driver
4  *
5  * Copyright(c) 2019 Intel Corporation
6  *
7  * This module allows virtio devices to be used over a vhost-user socket.
8  *
9  * Guest devices can be instantiated by kernel module or command line
10  * parameters. One device will be created for each parameter. Syntax:
11  *
12  *		[virtio_uml.]device=<socket>:<virtio_id>[:<platform_id>]
13  * where:
14  *		<socket>	:= vhost-user socket path to connect
15  *		<virtio_id>	:= virtio device id (as in virtio_ids.h)
16  *		<platform_id>	:= (optional) platform device id
17  *
18  * example:
19  *		virtio_uml.device=/var/uml.socket:1
20  *
21  * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
22  */
23 #include <linux/module.h>
24 #include <linux/platform_device.h>
25 #include <linux/slab.h>
26 #include <linux/virtio.h>
27 #include <linux/virtio_config.h>
28 #include <linux/virtio_ring.h>
29 #include <shared/as-layout.h>
30 #include <irq_kern.h>
31 #include <init.h>
32 #include <os.h>
33 #include "vhost_user.h"
34 
35 /* Workaround due to a conflict between irq_user.h and irqreturn.h */
36 #ifdef IRQ_NONE
37 #undef IRQ_NONE
38 #endif
39 
40 #define MAX_SUPPORTED_QUEUE_SIZE	256
41 
42 #define to_virtio_uml_device(_vdev) \
43 	container_of(_vdev, struct virtio_uml_device, vdev)
44 
45 struct virtio_uml_device {
46 	struct virtio_device vdev;
47 	struct platform_device *pdev;
48 
49 	int sock, req_fd;
50 	u64 features;
51 	u64 protocol_features;
52 	u8 status;
53 };
54 
55 struct virtio_uml_vq_info {
56 	int kick_fd, call_fd;
57 	char name[32];
58 };
59 
60 extern unsigned long long physmem_size, highmem;
61 
62 #define vu_err(vu_dev, ...)	dev_err(&(vu_dev)->pdev->dev, __VA_ARGS__)
63 
64 /* Vhost-user protocol */
65 
66 static int full_sendmsg_fds(int fd, const void *buf, unsigned int len,
67 			    const int *fds, unsigned int fds_num)
68 {
69 	int rc;
70 
71 	do {
72 		rc = os_sendmsg_fds(fd, buf, len, fds, fds_num);
73 		if (rc > 0) {
74 			buf += rc;
75 			len -= rc;
76 			fds = NULL;
77 			fds_num = 0;
78 		}
79 	} while (len && (rc >= 0 || rc == -EINTR));
80 
81 	if (rc < 0)
82 		return rc;
83 	return 0;
84 }
85 
86 static int full_read(int fd, void *buf, int len)
87 {
88 	int rc;
89 
90 	do {
91 		rc = os_read_file(fd, buf, len);
92 		if (rc > 0) {
93 			buf += rc;
94 			len -= rc;
95 		}
96 	} while (len && (rc > 0 || rc == -EINTR));
97 
98 	if (rc < 0)
99 		return rc;
100 	if (rc == 0)
101 		return -ECONNRESET;
102 	return 0;
103 }
104 
105 static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg)
106 {
107 	return full_read(fd, msg, sizeof(msg->header));
108 }
109 
110 static int vhost_user_recv(int fd, struct vhost_user_msg *msg,
111 			   size_t max_payload_size)
112 {
113 	size_t size;
114 	int rc = vhost_user_recv_header(fd, msg);
115 
116 	if (rc)
117 		return rc;
118 	size = msg->header.size;
119 	if (size > max_payload_size)
120 		return -EPROTO;
121 	return full_read(fd, &msg->payload, size);
122 }
123 
124 static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev,
125 				struct vhost_user_msg *msg,
126 				size_t max_payload_size)
127 {
128 	int rc = vhost_user_recv(vu_dev->sock, msg, max_payload_size);
129 
130 	if (rc)
131 		return rc;
132 
133 	if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION))
134 		return -EPROTO;
135 
136 	return 0;
137 }
138 
139 static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
140 			       u64 *value)
141 {
142 	struct vhost_user_msg msg;
143 	int rc = vhost_user_recv_resp(vu_dev, &msg,
144 				      sizeof(msg.payload.integer));
145 
146 	if (rc)
147 		return rc;
148 	if (msg.header.size != sizeof(msg.payload.integer))
149 		return -EPROTO;
150 	*value = msg.payload.integer;
151 	return 0;
152 }
153 
154 static int vhost_user_recv_req(struct virtio_uml_device *vu_dev,
155 			       struct vhost_user_msg *msg,
156 			       size_t max_payload_size)
157 {
158 	int rc = vhost_user_recv(vu_dev->req_fd, msg, max_payload_size);
159 
160 	if (rc)
161 		return rc;
162 
163 	if ((msg->header.flags & ~VHOST_USER_FLAG_NEED_REPLY) !=
164 			VHOST_USER_VERSION)
165 		return -EPROTO;
166 
167 	return 0;
168 }
169 
170 static int vhost_user_send(struct virtio_uml_device *vu_dev,
171 			   bool need_response, struct vhost_user_msg *msg,
172 			   int *fds, size_t num_fds)
173 {
174 	size_t size = sizeof(msg->header) + msg->header.size;
175 	bool request_ack;
176 	int rc;
177 
178 	msg->header.flags |= VHOST_USER_VERSION;
179 
180 	/*
181 	 * The need_response flag indicates that we already need a response,
182 	 * e.g. to read the features. In these cases, don't request an ACK as
183 	 * it is meaningless. Also request an ACK only if supported.
184 	 */
185 	request_ack = !need_response;
186 	if (!(vu_dev->protocol_features &
187 			BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK)))
188 		request_ack = false;
189 
190 	if (request_ack)
191 		msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY;
192 
193 	rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds);
194 	if (rc < 0)
195 		return rc;
196 
197 	if (request_ack) {
198 		uint64_t status;
199 
200 		rc = vhost_user_recv_u64(vu_dev, &status);
201 		if (rc)
202 			return rc;
203 
204 		if (status) {
205 			vu_err(vu_dev, "slave reports error: %llu\n", status);
206 			return -EIO;
207 		}
208 	}
209 
210 	return 0;
211 }
212 
213 static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
214 				      bool need_response, u32 request)
215 {
216 	struct vhost_user_msg msg = {
217 		.header.request = request,
218 	};
219 
220 	return vhost_user_send(vu_dev, need_response, &msg, NULL, 0);
221 }
222 
223 static int vhost_user_send_no_payload_fd(struct virtio_uml_device *vu_dev,
224 					 u32 request, int fd)
225 {
226 	struct vhost_user_msg msg = {
227 		.header.request = request,
228 	};
229 
230 	return vhost_user_send(vu_dev, false, &msg, &fd, 1);
231 }
232 
233 static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
234 			       u32 request, u64 value)
235 {
236 	struct vhost_user_msg msg = {
237 		.header.request = request,
238 		.header.size = sizeof(msg.payload.integer),
239 		.payload.integer = value,
240 	};
241 
242 	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
243 }
244 
245 static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
246 {
247 	return vhost_user_send_no_payload(vu_dev, false, VHOST_USER_SET_OWNER);
248 }
249 
250 static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
251 				   u64 *features)
252 {
253 	int rc = vhost_user_send_no_payload(vu_dev, true,
254 					    VHOST_USER_GET_FEATURES);
255 
256 	if (rc)
257 		return rc;
258 	return vhost_user_recv_u64(vu_dev, features);
259 }
260 
261 static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
262 				   u64 features)
263 {
264 	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features);
265 }
266 
267 static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev,
268 					    u64 *protocol_features)
269 {
270 	int rc = vhost_user_send_no_payload(vu_dev, true,
271 			VHOST_USER_GET_PROTOCOL_FEATURES);
272 
273 	if (rc)
274 		return rc;
275 	return vhost_user_recv_u64(vu_dev, protocol_features);
276 }
277 
278 static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev,
279 					    u64 protocol_features)
280 {
281 	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES,
282 				   protocol_features);
283 }
284 
285 static void vhost_user_reply(struct virtio_uml_device *vu_dev,
286 			     struct vhost_user_msg *msg, int response)
287 {
288 	struct vhost_user_msg reply = {
289 		.payload.integer = response,
290 	};
291 	size_t size = sizeof(reply.header) + sizeof(reply.payload.integer);
292 	int rc;
293 
294 	reply.header = msg->header;
295 	reply.header.flags &= ~VHOST_USER_FLAG_NEED_REPLY;
296 	reply.header.flags |= VHOST_USER_FLAG_REPLY;
297 	reply.header.size = sizeof(reply.payload.integer);
298 
299 	rc = full_sendmsg_fds(vu_dev->req_fd, &reply, size, NULL, 0);
300 
301 	if (rc)
302 		vu_err(vu_dev,
303 		       "sending reply to slave request failed: %d (size %zu)\n",
304 		       rc, size);
305 }
306 
307 static irqreturn_t vu_req_interrupt(int irq, void *data)
308 {
309 	struct virtio_uml_device *vu_dev = data;
310 	int response = 1;
311 	struct {
312 		struct vhost_user_msg msg;
313 		u8 extra_payload[512];
314 	} msg;
315 	int rc;
316 
317 	rc = vhost_user_recv_req(vu_dev, &msg.msg,
318 				 sizeof(msg.msg.payload) +
319 				 sizeof(msg.extra_payload));
320 
321 	if (rc)
322 		return IRQ_NONE;
323 
324 	switch (msg.msg.header.request) {
325 	case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG:
326 		virtio_config_changed(&vu_dev->vdev);
327 		response = 0;
328 		break;
329 	case VHOST_USER_SLAVE_IOTLB_MSG:
330 		/* not supported - VIRTIO_F_IOMMU_PLATFORM */
331 	case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
332 		/* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */
333 	default:
334 		vu_err(vu_dev, "unexpected slave request %d\n",
335 		       msg.msg.header.request);
336 	}
337 
338 	if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY)
339 		vhost_user_reply(vu_dev, &msg.msg, response);
340 
341 	return IRQ_HANDLED;
342 }
343 
344 static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev)
345 {
346 	int rc, req_fds[2];
347 
348 	/* Use a pipe for slave req fd, SIGIO is not supported for eventfd */
349 	rc = os_pipe(req_fds, true, true);
350 	if (rc < 0)
351 		return rc;
352 	vu_dev->req_fd = req_fds[0];
353 
354 	rc = um_request_irq(VIRTIO_IRQ, vu_dev->req_fd, IRQ_READ,
355 			    vu_req_interrupt, IRQF_SHARED,
356 			    vu_dev->pdev->name, vu_dev);
357 	if (rc)
358 		goto err_close;
359 
360 	rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD,
361 					   req_fds[1]);
362 	if (rc)
363 		goto err_free_irq;
364 
365 	goto out;
366 
367 err_free_irq:
368 	um_free_irq(VIRTIO_IRQ, vu_dev);
369 err_close:
370 	os_close_file(req_fds[0]);
371 out:
372 	/* Close unused write end of request fds */
373 	os_close_file(req_fds[1]);
374 	return rc;
375 }
376 
377 static int vhost_user_init(struct virtio_uml_device *vu_dev)
378 {
379 	int rc = vhost_user_set_owner(vu_dev);
380 
381 	if (rc)
382 		return rc;
383 	rc = vhost_user_get_features(vu_dev, &vu_dev->features);
384 	if (rc)
385 		return rc;
386 
387 	if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
388 		rc = vhost_user_get_protocol_features(vu_dev,
389 				&vu_dev->protocol_features);
390 		if (rc)
391 			return rc;
392 		vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
393 		rc = vhost_user_set_protocol_features(vu_dev,
394 				vu_dev->protocol_features);
395 		if (rc)
396 			return rc;
397 	}
398 
399 	if (vu_dev->protocol_features &
400 			BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
401 		rc = vhost_user_init_slave_req(vu_dev);
402 		if (rc)
403 			return rc;
404 	}
405 
406 	return 0;
407 }
408 
409 static void vhost_user_get_config(struct virtio_uml_device *vu_dev,
410 				  u32 offset, void *buf, u32 len)
411 {
412 	u32 cfg_size = offset + len;
413 	struct vhost_user_msg *msg;
414 	size_t payload_size = sizeof(msg->payload.config) + cfg_size;
415 	size_t msg_size = sizeof(msg->header) + payload_size;
416 	int rc;
417 
418 	if (!(vu_dev->protocol_features &
419 	      BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
420 		return;
421 
422 	msg = kzalloc(msg_size, GFP_KERNEL);
423 	if (!msg)
424 		return;
425 	msg->header.request = VHOST_USER_GET_CONFIG;
426 	msg->header.size = payload_size;
427 	msg->payload.config.offset = 0;
428 	msg->payload.config.size = cfg_size;
429 
430 	rc = vhost_user_send(vu_dev, true, msg, NULL, 0);
431 	if (rc) {
432 		vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n",
433 		       rc);
434 		goto free;
435 	}
436 
437 	rc = vhost_user_recv_resp(vu_dev, msg, msg_size);
438 	if (rc) {
439 		vu_err(vu_dev,
440 		       "receiving VHOST_USER_GET_CONFIG response failed: %d\n",
441 		       rc);
442 		goto free;
443 	}
444 
445 	if (msg->header.size != payload_size ||
446 	    msg->payload.config.size != cfg_size) {
447 		rc = -EPROTO;
448 		vu_err(vu_dev,
449 		       "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n",
450 		       msg->header.size, payload_size,
451 		       msg->payload.config.size, cfg_size);
452 		goto free;
453 	}
454 	memcpy(buf, msg->payload.config.payload + offset, len);
455 
456 free:
457 	kfree(msg);
458 }
459 
460 static void vhost_user_set_config(struct virtio_uml_device *vu_dev,
461 				  u32 offset, const void *buf, u32 len)
462 {
463 	struct vhost_user_msg *msg;
464 	size_t payload_size = sizeof(msg->payload.config) + len;
465 	size_t msg_size = sizeof(msg->header) + payload_size;
466 	int rc;
467 
468 	if (!(vu_dev->protocol_features &
469 	      BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
470 		return;
471 
472 	msg = kzalloc(msg_size, GFP_KERNEL);
473 	if (!msg)
474 		return;
475 	msg->header.request = VHOST_USER_SET_CONFIG;
476 	msg->header.size = payload_size;
477 	msg->payload.config.offset = offset;
478 	msg->payload.config.size = len;
479 	memcpy(msg->payload.config.payload, buf, len);
480 
481 	rc = vhost_user_send(vu_dev, false, msg, NULL, 0);
482 	if (rc)
483 		vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n",
484 		       rc);
485 
486 	kfree(msg);
487 }
488 
489 static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
490 				      struct vhost_user_mem_region *region_out)
491 {
492 	unsigned long long mem_offset;
493 	int rc = phys_mapping(addr, &mem_offset);
494 
495 	if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc))
496 		return -EFAULT;
497 	*fd_out = rc;
498 	region_out->guest_addr = addr;
499 	region_out->user_addr = addr;
500 	region_out->size = size;
501 	region_out->mmap_offset = mem_offset;
502 
503 	/* Ensure mapping is valid for the entire region */
504 	rc = phys_mapping(addr + size - 1, &mem_offset);
505 	if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n",
506 		 addr + size - 1, rc, *fd_out))
507 		return -EFAULT;
508 	return 0;
509 }
510 
511 static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
512 {
513 	struct vhost_user_msg msg = {
514 		.header.request = VHOST_USER_SET_MEM_TABLE,
515 		.header.size = sizeof(msg.payload.mem_regions),
516 		.payload.mem_regions.num = 1,
517 	};
518 	unsigned long reserved = uml_reserved - uml_physmem;
519 	int fds[2];
520 	int rc;
521 
522 	/*
523 	 * This is a bit tricky, see also the comment with setup_physmem().
524 	 *
525 	 * Essentially, setup_physmem() uses a file to mmap() our physmem,
526 	 * but the code and data we *already* have is omitted. To us, this
527 	 * is no difference, since they both become part of our address
528 	 * space and memory consumption. To somebody looking in from the
529 	 * outside, however, it is different because the part of our memory
530 	 * consumption that's already part of the binary (code/data) is not
531 	 * mapped from the file, so it's not visible to another mmap from
532 	 * the file descriptor.
533 	 *
534 	 * Thus, don't advertise this space to the vhost-user slave. This
535 	 * means that the slave will likely abort or similar when we give
536 	 * it an address from the hidden range, since it's not marked as
537 	 * a valid address, but at least that way we detect the issue and
538 	 * don't just have the slave read an all-zeroes buffer from the
539 	 * shared memory file, or write something there that we can never
540 	 * see (depending on the direction of the virtqueue traffic.)
541 	 *
542 	 * Since we usually don't want to use .text for virtio buffers,
543 	 * this effectively means that you cannot use
544 	 *  1) global variables, which are in the .bss and not in the shm
545 	 *     file-backed memory
546 	 *  2) the stack in some processes, depending on where they have
547 	 *     their stack (or maybe only no interrupt stack?)
548 	 *
549 	 * The stack is already not typically valid for DMA, so this isn't
550 	 * much of a restriction, but global variables might be encountered.
551 	 *
552 	 * It might be possible to fix it by copying around the data that's
553 	 * between bss_start and where we map the file now, but it's not
554 	 * something that you typically encounter with virtio drivers, so
555 	 * it didn't seem worthwhile.
556 	 */
557 	rc = vhost_user_init_mem_region(reserved, physmem_size - reserved,
558 					&fds[0],
559 					&msg.payload.mem_regions.regions[0]);
560 
561 	if (rc < 0)
562 		return rc;
563 	if (highmem) {
564 		msg.payload.mem_regions.num++;
565 		rc = vhost_user_init_mem_region(__pa(end_iomem), highmem,
566 				&fds[1], &msg.payload.mem_regions.regions[1]);
567 		if (rc < 0)
568 			return rc;
569 	}
570 
571 	return vhost_user_send(vu_dev, false, &msg, fds,
572 			       msg.payload.mem_regions.num);
573 }
574 
575 static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev,
576 				      u32 request, u32 index, u32 num)
577 {
578 	struct vhost_user_msg msg = {
579 		.header.request = request,
580 		.header.size = sizeof(msg.payload.vring_state),
581 		.payload.vring_state.index = index,
582 		.payload.vring_state.num = num,
583 	};
584 
585 	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
586 }
587 
588 static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
589 				    u32 index, u32 num)
590 {
591 	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM,
592 					  index, num);
593 }
594 
595 static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
596 				     u32 index, u32 offset)
597 {
598 	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE,
599 					  index, offset);
600 }
601 
602 static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
603 				     u32 index, u64 desc, u64 used, u64 avail,
604 				     u64 log)
605 {
606 	struct vhost_user_msg msg = {
607 		.header.request = VHOST_USER_SET_VRING_ADDR,
608 		.header.size = sizeof(msg.payload.vring_addr),
609 		.payload.vring_addr.index = index,
610 		.payload.vring_addr.desc = desc,
611 		.payload.vring_addr.used = used,
612 		.payload.vring_addr.avail = avail,
613 		.payload.vring_addr.log = log,
614 	};
615 
616 	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
617 }
618 
619 static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
620 				   u32 request, int index, int fd)
621 {
622 	struct vhost_user_msg msg = {
623 		.header.request = request,
624 		.header.size = sizeof(msg.payload.integer),
625 		.payload.integer = index,
626 	};
627 
628 	if (index & ~VHOST_USER_VRING_INDEX_MASK)
629 		return -EINVAL;
630 	if (fd < 0) {
631 		msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
632 		return vhost_user_send(vu_dev, false, &msg, NULL, 0);
633 	}
634 	return vhost_user_send(vu_dev, false, &msg, &fd, 1);
635 }
636 
637 static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
638 				     int index, int fd)
639 {
640 	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
641 				       index, fd);
642 }
643 
644 static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
645 				     int index, int fd)
646 {
647 	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
648 				       index, fd);
649 }
650 
651 static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev,
652 				       u32 index, bool enable)
653 {
654 	if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
655 		return 0;
656 
657 	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE,
658 					  index, enable);
659 }
660 
661 
662 /* Virtio interface */
663 
664 static bool vu_notify(struct virtqueue *vq)
665 {
666 	struct virtio_uml_vq_info *info = vq->priv;
667 	const uint64_t n = 1;
668 	int rc;
669 
670 	do {
671 		rc = os_write_file(info->kick_fd, &n, sizeof(n));
672 	} while (rc == -EINTR);
673 	return !WARN(rc != sizeof(n), "write returned %d\n", rc);
674 }
675 
676 static irqreturn_t vu_interrupt(int irq, void *opaque)
677 {
678 	struct virtqueue *vq = opaque;
679 	struct virtio_uml_vq_info *info = vq->priv;
680 	uint64_t n;
681 	int rc;
682 	irqreturn_t ret = IRQ_NONE;
683 
684 	do {
685 		rc = os_read_file(info->call_fd, &n, sizeof(n));
686 		if (rc == sizeof(n))
687 			ret |= vring_interrupt(irq, vq);
688 	} while (rc == sizeof(n) || rc == -EINTR);
689 	WARN(rc != -EAGAIN, "read returned %d\n", rc);
690 	return ret;
691 }
692 
693 
694 static void vu_get(struct virtio_device *vdev, unsigned offset,
695 		   void *buf, unsigned len)
696 {
697 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
698 
699 	vhost_user_get_config(vu_dev, offset, buf, len);
700 }
701 
702 static void vu_set(struct virtio_device *vdev, unsigned offset,
703 		   const void *buf, unsigned len)
704 {
705 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
706 
707 	vhost_user_set_config(vu_dev, offset, buf, len);
708 }
709 
710 static u8 vu_get_status(struct virtio_device *vdev)
711 {
712 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
713 
714 	return vu_dev->status;
715 }
716 
717 static void vu_set_status(struct virtio_device *vdev, u8 status)
718 {
719 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
720 
721 	vu_dev->status = status;
722 }
723 
724 static void vu_reset(struct virtio_device *vdev)
725 {
726 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
727 
728 	vu_dev->status = 0;
729 }
730 
731 static void vu_del_vq(struct virtqueue *vq)
732 {
733 	struct virtio_uml_vq_info *info = vq->priv;
734 
735 	um_free_irq(VIRTIO_IRQ, vq);
736 
737 	os_close_file(info->call_fd);
738 	os_close_file(info->kick_fd);
739 
740 	vring_del_virtqueue(vq);
741 	kfree(info);
742 }
743 
744 static void vu_del_vqs(struct virtio_device *vdev)
745 {
746 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
747 	struct virtqueue *vq, *n;
748 	u64 features;
749 
750 	/* Note: reverse order as a workaround to a decoding bug in snabb */
751 	list_for_each_entry_reverse(vq, &vdev->vqs, list)
752 		WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false));
753 
754 	/* Ensure previous messages have been processed */
755 	WARN_ON(vhost_user_get_features(vu_dev, &features));
756 
757 	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
758 		vu_del_vq(vq);
759 }
760 
761 static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
762 			       struct virtqueue *vq)
763 {
764 	struct virtio_uml_vq_info *info = vq->priv;
765 	int call_fds[2];
766 	int rc;
767 
768 	/* Use a pipe for call fd, since SIGIO is not supported for eventfd */
769 	rc = os_pipe(call_fds, true, true);
770 	if (rc < 0)
771 		return rc;
772 
773 	info->call_fd = call_fds[0];
774 	rc = um_request_irq(VIRTIO_IRQ, info->call_fd, IRQ_READ,
775 			    vu_interrupt, IRQF_SHARED, info->name, vq);
776 	if (rc)
777 		goto close_both;
778 
779 	rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]);
780 	if (rc)
781 		goto release_irq;
782 
783 	goto out;
784 
785 release_irq:
786 	um_free_irq(VIRTIO_IRQ, vq);
787 close_both:
788 	os_close_file(call_fds[0]);
789 out:
790 	/* Close (unused) write end of call fds */
791 	os_close_file(call_fds[1]);
792 
793 	return rc;
794 }
795 
796 static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
797 				     unsigned index, vq_callback_t *callback,
798 				     const char *name, bool ctx)
799 {
800 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
801 	struct platform_device *pdev = vu_dev->pdev;
802 	struct virtio_uml_vq_info *info;
803 	struct virtqueue *vq;
804 	int num = MAX_SUPPORTED_QUEUE_SIZE;
805 	int rc;
806 
807 	info = kzalloc(sizeof(*info), GFP_KERNEL);
808 	if (!info) {
809 		rc = -ENOMEM;
810 		goto error_kzalloc;
811 	}
812 	snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
813 		 pdev->id, name);
814 
815 	vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
816 				    ctx, vu_notify, callback, info->name);
817 	if (!vq) {
818 		rc = -ENOMEM;
819 		goto error_create;
820 	}
821 	vq->priv = info;
822 	num = virtqueue_get_vring_size(vq);
823 
824 	rc = os_eventfd(0, 0);
825 	if (rc < 0)
826 		goto error_kick;
827 	info->kick_fd = rc;
828 
829 	rc = vu_setup_vq_call_fd(vu_dev, vq);
830 	if (rc)
831 		goto error_call;
832 
833 	rc = vhost_user_set_vring_num(vu_dev, index, num);
834 	if (rc)
835 		goto error_setup;
836 
837 	rc = vhost_user_set_vring_base(vu_dev, index, 0);
838 	if (rc)
839 		goto error_setup;
840 
841 	rc = vhost_user_set_vring_addr(vu_dev, index,
842 				       virtqueue_get_desc_addr(vq),
843 				       virtqueue_get_used_addr(vq),
844 				       virtqueue_get_avail_addr(vq),
845 				       (u64) -1);
846 	if (rc)
847 		goto error_setup;
848 
849 	return vq;
850 
851 error_setup:
852 	um_free_irq(VIRTIO_IRQ, vq);
853 	os_close_file(info->call_fd);
854 error_call:
855 	os_close_file(info->kick_fd);
856 error_kick:
857 	vring_del_virtqueue(vq);
858 error_create:
859 	kfree(info);
860 error_kzalloc:
861 	return ERR_PTR(rc);
862 }
863 
864 static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
865 		       struct virtqueue *vqs[], vq_callback_t *callbacks[],
866 		       const char * const names[], const bool *ctx,
867 		       struct irq_affinity *desc)
868 {
869 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
870 	int i, queue_idx = 0, rc;
871 	struct virtqueue *vq;
872 
873 	rc = vhost_user_set_mem_table(vu_dev);
874 	if (rc)
875 		return rc;
876 
877 	for (i = 0; i < nvqs; ++i) {
878 		if (!names[i]) {
879 			vqs[i] = NULL;
880 			continue;
881 		}
882 
883 		vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
884 				     ctx ? ctx[i] : false);
885 		if (IS_ERR(vqs[i])) {
886 			rc = PTR_ERR(vqs[i]);
887 			goto error_setup;
888 		}
889 	}
890 
891 	list_for_each_entry(vq, &vdev->vqs, list) {
892 		struct virtio_uml_vq_info *info = vq->priv;
893 
894 		rc = vhost_user_set_vring_kick(vu_dev, vq->index,
895 					       info->kick_fd);
896 		if (rc)
897 			goto error_setup;
898 
899 		rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
900 		if (rc)
901 			goto error_setup;
902 	}
903 
904 	return 0;
905 
906 error_setup:
907 	vu_del_vqs(vdev);
908 	return rc;
909 }
910 
911 static u64 vu_get_features(struct virtio_device *vdev)
912 {
913 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
914 
915 	return vu_dev->features;
916 }
917 
918 static int vu_finalize_features(struct virtio_device *vdev)
919 {
920 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
921 	u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
922 
923 	vring_transport_features(vdev);
924 	vu_dev->features = vdev->features | supported;
925 
926 	return vhost_user_set_features(vu_dev, vu_dev->features);
927 }
928 
929 static const char *vu_bus_name(struct virtio_device *vdev)
930 {
931 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
932 
933 	return vu_dev->pdev->name;
934 }
935 
936 static const struct virtio_config_ops virtio_uml_config_ops = {
937 	.get = vu_get,
938 	.set = vu_set,
939 	.get_status = vu_get_status,
940 	.set_status = vu_set_status,
941 	.reset = vu_reset,
942 	.find_vqs = vu_find_vqs,
943 	.del_vqs = vu_del_vqs,
944 	.get_features = vu_get_features,
945 	.finalize_features = vu_finalize_features,
946 	.bus_name = vu_bus_name,
947 };
948 
949 static void virtio_uml_release_dev(struct device *d)
950 {
951 	struct virtio_device *vdev =
952 			container_of(d, struct virtio_device, dev);
953 	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
954 
955 	/* might not have been opened due to not negotiating the feature */
956 	if (vu_dev->req_fd >= 0) {
957 		um_free_irq(VIRTIO_IRQ, vu_dev);
958 		os_close_file(vu_dev->req_fd);
959 	}
960 
961 	os_close_file(vu_dev->sock);
962 }
963 
964 /* Platform device */
965 
966 struct virtio_uml_platform_data {
967 	u32 virtio_device_id;
968 	const char *socket_path;
969 };
970 
971 static int virtio_uml_probe(struct platform_device *pdev)
972 {
973 	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
974 	struct virtio_uml_device *vu_dev;
975 	int rc;
976 
977 	if (!pdata)
978 		return -EINVAL;
979 
980 	vu_dev = devm_kzalloc(&pdev->dev, sizeof(*vu_dev), GFP_KERNEL);
981 	if (!vu_dev)
982 		return -ENOMEM;
983 
984 	vu_dev->vdev.dev.parent = &pdev->dev;
985 	vu_dev->vdev.dev.release = virtio_uml_release_dev;
986 	vu_dev->vdev.config = &virtio_uml_config_ops;
987 	vu_dev->vdev.id.device = pdata->virtio_device_id;
988 	vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
989 	vu_dev->pdev = pdev;
990 	vu_dev->req_fd = -1;
991 
992 	do {
993 		rc = os_connect_socket(pdata->socket_path);
994 	} while (rc == -EINTR);
995 	if (rc < 0)
996 		return rc;
997 	vu_dev->sock = rc;
998 
999 	rc = vhost_user_init(vu_dev);
1000 	if (rc)
1001 		goto error_init;
1002 
1003 	platform_set_drvdata(pdev, vu_dev);
1004 
1005 	rc = register_virtio_device(&vu_dev->vdev);
1006 	if (rc)
1007 		put_device(&vu_dev->vdev.dev);
1008 	return rc;
1009 
1010 error_init:
1011 	os_close_file(vu_dev->sock);
1012 	return rc;
1013 }
1014 
1015 static int virtio_uml_remove(struct platform_device *pdev)
1016 {
1017 	struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1018 
1019 	unregister_virtio_device(&vu_dev->vdev);
1020 	return 0;
1021 }
1022 
1023 /* Command line device list */
1024 
1025 static void vu_cmdline_release_dev(struct device *d)
1026 {
1027 }
1028 
1029 static struct device vu_cmdline_parent = {
1030 	.init_name = "virtio-uml-cmdline",
1031 	.release = vu_cmdline_release_dev,
1032 };
1033 
1034 static bool vu_cmdline_parent_registered;
1035 static int vu_cmdline_id;
1036 
1037 static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
1038 {
1039 	const char *ids = strchr(device, ':');
1040 	unsigned int virtio_device_id;
1041 	int processed, consumed, err;
1042 	char *socket_path;
1043 	struct virtio_uml_platform_data pdata;
1044 	struct platform_device *pdev;
1045 
1046 	if (!ids || ids == device)
1047 		return -EINVAL;
1048 
1049 	processed = sscanf(ids, ":%u%n:%d%n",
1050 			   &virtio_device_id, &consumed,
1051 			   &vu_cmdline_id, &consumed);
1052 
1053 	if (processed < 1 || ids[consumed])
1054 		return -EINVAL;
1055 
1056 	if (!vu_cmdline_parent_registered) {
1057 		err = device_register(&vu_cmdline_parent);
1058 		if (err) {
1059 			pr_err("Failed to register parent device!\n");
1060 			put_device(&vu_cmdline_parent);
1061 			return err;
1062 		}
1063 		vu_cmdline_parent_registered = true;
1064 	}
1065 
1066 	socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
1067 	if (!socket_path)
1068 		return -ENOMEM;
1069 
1070 	pdata.virtio_device_id = (u32) virtio_device_id;
1071 	pdata.socket_path = socket_path;
1072 
1073 	pr_info("Registering device virtio-uml.%d id=%d at %s\n",
1074 		vu_cmdline_id, virtio_device_id, socket_path);
1075 
1076 	pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml",
1077 					     vu_cmdline_id++, &pdata,
1078 					     sizeof(pdata));
1079 	err = PTR_ERR_OR_ZERO(pdev);
1080 	if (err)
1081 		goto free;
1082 	return 0;
1083 
1084 free:
1085 	kfree(socket_path);
1086 	return err;
1087 }
1088 
1089 static int vu_cmdline_get_device(struct device *dev, void *data)
1090 {
1091 	struct platform_device *pdev = to_platform_device(dev);
1092 	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1093 	char *buffer = data;
1094 	unsigned int len = strlen(buffer);
1095 
1096 	snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
1097 		 pdata->socket_path, pdata->virtio_device_id, pdev->id);
1098 	return 0;
1099 }
1100 
1101 static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
1102 {
1103 	buffer[0] = '\0';
1104 	if (vu_cmdline_parent_registered)
1105 		device_for_each_child(&vu_cmdline_parent, buffer,
1106 				      vu_cmdline_get_device);
1107 	return strlen(buffer) + 1;
1108 }
1109 
1110 static const struct kernel_param_ops vu_cmdline_param_ops = {
1111 	.set = vu_cmdline_set,
1112 	.get = vu_cmdline_get,
1113 };
1114 
1115 device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
1116 __uml_help(vu_cmdline_param_ops,
1117 "virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n"
1118 "    Configure a virtio device over a vhost-user socket.\n"
1119 "    See virtio_ids.h for a list of possible virtio device id values.\n"
1120 "    Optionally use a specific platform_device id.\n\n"
1121 );
1122 
1123 
1124 static int vu_unregister_cmdline_device(struct device *dev, void *data)
1125 {
1126 	struct platform_device *pdev = to_platform_device(dev);
1127 	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1128 
1129 	kfree(pdata->socket_path);
1130 	platform_device_unregister(pdev);
1131 	return 0;
1132 }
1133 
1134 static void vu_unregister_cmdline_devices(void)
1135 {
1136 	if (vu_cmdline_parent_registered) {
1137 		device_for_each_child(&vu_cmdline_parent, NULL,
1138 				      vu_unregister_cmdline_device);
1139 		device_unregister(&vu_cmdline_parent);
1140 		vu_cmdline_parent_registered = false;
1141 	}
1142 }
1143 
1144 /* Platform driver */
1145 
1146 static const struct of_device_id virtio_uml_match[] = {
1147 	{ .compatible = "virtio,uml", },
1148 	{ }
1149 };
1150 MODULE_DEVICE_TABLE(of, virtio_uml_match);
1151 
1152 static struct platform_driver virtio_uml_driver = {
1153 	.probe = virtio_uml_probe,
1154 	.remove = virtio_uml_remove,
1155 	.driver = {
1156 		.name = "virtio-uml",
1157 		.of_match_table = virtio_uml_match,
1158 	},
1159 };
1160 
1161 static int __init virtio_uml_init(void)
1162 {
1163 	return platform_driver_register(&virtio_uml_driver);
1164 }
1165 
1166 static void __exit virtio_uml_exit(void)
1167 {
1168 	platform_driver_unregister(&virtio_uml_driver);
1169 	vu_unregister_cmdline_devices();
1170 }
1171 
1172 module_init(virtio_uml_init);
1173 module_exit(virtio_uml_exit);
1174 __uml_exitcall(virtio_uml_exit);
1175 
1176 MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
1177 MODULE_LICENSE("GPL");
1178