1 /*
2  * VMware VMCI Driver
3  *
4  * Copyright (C) 2012 VMware, Inc. All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the
8  * Free Software Foundation version 2 and no later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13  * for more details.
14  */
15 
16 #include <linux/vmw_vmci_defs.h>
17 #include <linux/vmw_vmci_api.h>
18 #include <linux/highmem.h>
19 #include <linux/kernel.h>
20 #include <linux/mm.h>
21 #include <linux/module.h>
22 #include <linux/mutex.h>
23 #include <linux/pagemap.h>
24 #include <linux/sched.h>
25 #include <linux/slab.h>
26 #include <linux/uio.h>
27 #include <linux/wait.h>
28 #include <linux/vmalloc.h>
29 
30 #include "vmci_handle_array.h"
31 #include "vmci_queue_pair.h"
32 #include "vmci_datagram.h"
33 #include "vmci_resource.h"
34 #include "vmci_context.h"
35 #include "vmci_driver.h"
36 #include "vmci_event.h"
37 #include "vmci_route.h"
38 
39 /*
40  * In the following, we will distinguish between two kinds of VMX processes -
41  * the ones with versions lower than VMCI_VERSION_NOVMVM that use specialized
42  * VMCI page files in the VMX and supporting VM to VM communication and the
43  * newer ones that use the guest memory directly. We will in the following
44  * refer to the older VMX versions as old-style VMX'en, and the newer ones as
45  * new-style VMX'en.
46  *
47  * The state transition datagram is as follows (the VMCIQPB_ prefix has been
48  * removed for readability) - see below for more details on the transtions:
49  *
50  *            --------------  NEW  -------------
51  *            |                                |
52  *           \_/                              \_/
53  *     CREATED_NO_MEM <-----------------> CREATED_MEM
54  *            |    |                           |
55  *            |    o-----------------------o   |
56  *            |                            |   |
57  *           \_/                          \_/ \_/
58  *     ATTACHED_NO_MEM <----------------> ATTACHED_MEM
59  *            |                            |   |
60  *            |     o----------------------o   |
61  *            |     |                          |
62  *           \_/   \_/                        \_/
63  *     SHUTDOWN_NO_MEM <----------------> SHUTDOWN_MEM
64  *            |                                |
65  *            |                                |
66  *            -------------> gone <-------------
67  *
68  * In more detail. When a VMCI queue pair is first created, it will be in the
69  * VMCIQPB_NEW state. It will then move into one of the following states:
70  *
71  * - VMCIQPB_CREATED_NO_MEM: this state indicates that either:
72  *
73  *     - the created was performed by a host endpoint, in which case there is
74  *       no backing memory yet.
75  *
76  *     - the create was initiated by an old-style VMX, that uses
77  *       vmci_qp_broker_set_page_store to specify the UVAs of the queue pair at
78  *       a later point in time. This state can be distinguished from the one
79  *       above by the context ID of the creator. A host side is not allowed to
80  *       attach until the page store has been set.
81  *
82  * - VMCIQPB_CREATED_MEM: this state is the result when the queue pair
83  *     is created by a VMX using the queue pair device backend that
84  *     sets the UVAs of the queue pair immediately and stores the
85  *     information for later attachers. At this point, it is ready for
86  *     the host side to attach to it.
87  *
88  * Once the queue pair is in one of the created states (with the exception of
89  * the case mentioned for older VMX'en above), it is possible to attach to the
90  * queue pair. Again we have two new states possible:
91  *
92  * - VMCIQPB_ATTACHED_MEM: this state can be reached through the following
93  *   paths:
94  *
95  *     - from VMCIQPB_CREATED_NO_MEM when a new-style VMX allocates a queue
96  *       pair, and attaches to a queue pair previously created by the host side.
97  *
98  *     - from VMCIQPB_CREATED_MEM when the host side attaches to a queue pair
99  *       already created by a guest.
100  *
101  *     - from VMCIQPB_ATTACHED_NO_MEM, when an old-style VMX calls
102  *       vmci_qp_broker_set_page_store (see below).
103  *
104  * - VMCIQPB_ATTACHED_NO_MEM: If the queue pair already was in the
105  *     VMCIQPB_CREATED_NO_MEM due to a host side create, an old-style VMX will
106  *     bring the queue pair into this state. Once vmci_qp_broker_set_page_store
107  *     is called to register the user memory, the VMCIQPB_ATTACH_MEM state
108  *     will be entered.
109  *
110  * From the attached queue pair, the queue pair can enter the shutdown states
111  * when either side of the queue pair detaches. If the guest side detaches
112  * first, the queue pair will enter the VMCIQPB_SHUTDOWN_NO_MEM state, where
113  * the content of the queue pair will no longer be available. If the host
114  * side detaches first, the queue pair will either enter the
115  * VMCIQPB_SHUTDOWN_MEM, if the guest memory is currently mapped, or
116  * VMCIQPB_SHUTDOWN_NO_MEM, if the guest memory is not mapped
117  * (e.g., the host detaches while a guest is stunned).
118  *
119  * New-style VMX'en will also unmap guest memory, if the guest is
120  * quiesced, e.g., during a snapshot operation. In that case, the guest
121  * memory will no longer be available, and the queue pair will transition from
122  * *_MEM state to a *_NO_MEM state. The VMX may later map the memory once more,
123  * in which case the queue pair will transition from the *_NO_MEM state at that
124  * point back to the *_MEM state. Note that the *_NO_MEM state may have changed,
125  * since the peer may have either attached or detached in the meantime. The
126  * values are laid out such that ++ on a state will move from a *_NO_MEM to a
127  * *_MEM state, and vice versa.
128  */
129 
130 /*
131  * VMCIMemcpy{To,From}QueueFunc() prototypes.  Functions of these
132  * types are passed around to enqueue and dequeue routines.  Note that
133  * often the functions passed are simply wrappers around memcpy
134  * itself.
135  *
136  * Note: In order for the memcpy typedefs to be compatible with the VMKernel,
137  * there's an unused last parameter for the hosted side.  In
138  * ESX, that parameter holds a buffer type.
139  */
140 typedef int vmci_memcpy_to_queue_func(struct vmci_queue *queue,
141 				      u64 queue_offset, const void *src,
142 				      size_t src_offset, size_t size);
143 typedef int vmci_memcpy_from_queue_func(void *dest, size_t dest_offset,
144 					const struct vmci_queue *queue,
145 					u64 queue_offset, size_t size);
146 
147 /* The Kernel specific component of the struct vmci_queue structure. */
148 struct vmci_queue_kern_if {
149 	struct page **page;
150 	struct page **header_page;
151 	void *va;
152 	struct mutex __mutex;	/* Protects the queue. */
153 	struct mutex *mutex;	/* Shared by producer and consumer queues. */
154 	bool host;
155 	size_t num_pages;
156 	bool mapped;
157 };
158 
159 /*
160  * This structure is opaque to the clients.
161  */
162 struct vmci_qp {
163 	struct vmci_handle handle;
164 	struct vmci_queue *produce_q;
165 	struct vmci_queue *consume_q;
166 	u64 produce_q_size;
167 	u64 consume_q_size;
168 	u32 peer;
169 	u32 flags;
170 	u32 priv_flags;
171 	bool guest_endpoint;
172 	unsigned int blocked;
173 	unsigned int generation;
174 	wait_queue_head_t event;
175 };
176 
177 enum qp_broker_state {
178 	VMCIQPB_NEW,
179 	VMCIQPB_CREATED_NO_MEM,
180 	VMCIQPB_CREATED_MEM,
181 	VMCIQPB_ATTACHED_NO_MEM,
182 	VMCIQPB_ATTACHED_MEM,
183 	VMCIQPB_SHUTDOWN_NO_MEM,
184 	VMCIQPB_SHUTDOWN_MEM,
185 	VMCIQPB_GONE
186 };
187 
188 #define QPBROKERSTATE_HAS_MEM(_qpb) (_qpb->state == VMCIQPB_CREATED_MEM || \
189 				     _qpb->state == VMCIQPB_ATTACHED_MEM || \
190 				     _qpb->state == VMCIQPB_SHUTDOWN_MEM)
191 
192 /*
193  * In the queue pair broker, we always use the guest point of view for
194  * the produce and consume queue values and references, e.g., the
195  * produce queue size stored is the guests produce queue size. The
196  * host endpoint will need to swap these around. The only exception is
197  * the local queue pairs on the host, in which case the host endpoint
198  * that creates the queue pair will have the right orientation, and
199  * the attaching host endpoint will need to swap.
200  */
201 struct qp_entry {
202 	struct list_head list_item;
203 	struct vmci_handle handle;
204 	u32 peer;
205 	u32 flags;
206 	u64 produce_size;
207 	u64 consume_size;
208 	u32 ref_count;
209 };
210 
211 struct qp_broker_entry {
212 	struct vmci_resource resource;
213 	struct qp_entry qp;
214 	u32 create_id;
215 	u32 attach_id;
216 	enum qp_broker_state state;
217 	bool require_trusted_attach;
218 	bool created_by_trusted;
219 	bool vmci_page_files;	/* Created by VMX using VMCI page files */
220 	struct vmci_queue *produce_q;
221 	struct vmci_queue *consume_q;
222 	struct vmci_queue_header saved_produce_q;
223 	struct vmci_queue_header saved_consume_q;
224 	vmci_event_release_cb wakeup_cb;
225 	void *client_data;
226 	void *local_mem;	/* Kernel memory for local queue pair */
227 };
228 
229 struct qp_guest_endpoint {
230 	struct vmci_resource resource;
231 	struct qp_entry qp;
232 	u64 num_ppns;
233 	void *produce_q;
234 	void *consume_q;
235 	struct ppn_set ppn_set;
236 };
237 
238 struct qp_list {
239 	struct list_head head;
240 	struct mutex mutex;	/* Protect queue list. */
241 };
242 
243 static struct qp_list qp_broker_list = {
244 	.head = LIST_HEAD_INIT(qp_broker_list.head),
245 	.mutex = __MUTEX_INITIALIZER(qp_broker_list.mutex),
246 };
247 
248 static struct qp_list qp_guest_endpoints = {
249 	.head = LIST_HEAD_INIT(qp_guest_endpoints.head),
250 	.mutex = __MUTEX_INITIALIZER(qp_guest_endpoints.mutex),
251 };
252 
253 #define INVALID_VMCI_GUEST_MEM_ID  0
254 #define QPE_NUM_PAGES(_QPE) ((u32) \
255 			     (DIV_ROUND_UP(_QPE.produce_size, PAGE_SIZE) + \
256 			      DIV_ROUND_UP(_QPE.consume_size, PAGE_SIZE) + 2))
257 
258 
259 /*
260  * Frees kernel VA space for a given queue and its queue header, and
261  * frees physical data pages.
262  */
263 static void qp_free_queue(void *q, u64 size)
264 {
265 	struct vmci_queue *queue = q;
266 
267 	if (queue) {
268 		u64 i = DIV_ROUND_UP(size, PAGE_SIZE);
269 
270 		if (queue->kernel_if->mapped) {
271 			vunmap(queue->kernel_if->va);
272 			queue->kernel_if->va = NULL;
273 		}
274 
275 		while (i)
276 			__free_page(queue->kernel_if->page[--i]);
277 
278 		vfree(queue->q_header);
279 	}
280 }
281 
282 /*
283  * Allocates kernel VA space of specified size, plus space for the
284  * queue structure/kernel interface and the queue header.  Allocates
285  * physical pages for the queue data pages.
286  *
287  * PAGE m:      struct vmci_queue_header (struct vmci_queue->q_header)
288  * PAGE m+1:    struct vmci_queue
289  * PAGE m+1+q:  struct vmci_queue_kern_if (struct vmci_queue->kernel_if)
290  * PAGE n-size: Data pages (struct vmci_queue->kernel_if->page[])
291  */
292 static void *qp_alloc_queue(u64 size, u32 flags)
293 {
294 	u64 i;
295 	struct vmci_queue *queue;
296 	struct vmci_queue_header *q_header;
297 	const u64 num_data_pages = DIV_ROUND_UP(size, PAGE_SIZE);
298 	const uint queue_size =
299 	    PAGE_SIZE +
300 	    sizeof(*queue) + sizeof(*(queue->kernel_if)) +
301 	    num_data_pages * sizeof(*(queue->kernel_if->page));
302 
303 	q_header = vmalloc(queue_size);
304 	if (!q_header)
305 		return NULL;
306 
307 	queue = (void *)q_header + PAGE_SIZE;
308 	queue->q_header = q_header;
309 	queue->saved_header = NULL;
310 	queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1);
311 	queue->kernel_if->header_page = NULL;	/* Unused in guest. */
312 	queue->kernel_if->page = (struct page **)(queue->kernel_if + 1);
313 	queue->kernel_if->host = false;
314 	queue->kernel_if->va = NULL;
315 	queue->kernel_if->mapped = false;
316 
317 	for (i = 0; i < num_data_pages; i++) {
318 		queue->kernel_if->page[i] = alloc_pages(GFP_KERNEL, 0);
319 		if (!queue->kernel_if->page[i])
320 			goto fail;
321 	}
322 
323 	if (vmci_qp_pinned(flags)) {
324 		queue->kernel_if->va =
325 		    vmap(queue->kernel_if->page, num_data_pages, VM_MAP,
326 			 PAGE_KERNEL);
327 		if (!queue->kernel_if->va)
328 			goto fail;
329 
330 		queue->kernel_if->mapped = true;
331 	}
332 
333 	return (void *)queue;
334 
335  fail:
336 	qp_free_queue(queue, i * PAGE_SIZE);
337 	return NULL;
338 }
339 
340 /*
341  * Copies from a given buffer or iovector to a VMCI Queue.  Uses
342  * kmap()/kunmap() to dynamically map/unmap required portions of the queue
343  * by traversing the offset -> page translation structure for the queue.
344  * Assumes that offset + size does not wrap around in the queue.
345  */
346 static int __qp_memcpy_to_queue(struct vmci_queue *queue,
347 				u64 queue_offset,
348 				const void *src,
349 				size_t size,
350 				bool is_iovec)
351 {
352 	struct vmci_queue_kern_if *kernel_if = queue->kernel_if;
353 	size_t bytes_copied = 0;
354 
355 	while (bytes_copied < size) {
356 		u64 page_index = (queue_offset + bytes_copied) / PAGE_SIZE;
357 		size_t page_offset =
358 		    (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
359 		void *va;
360 		size_t to_copy;
361 
362 		if (!kernel_if->mapped)
363 			va = kmap(kernel_if->page[page_index]);
364 		else
365 			va = (void *)((u8 *)kernel_if->va +
366 				      (page_index * PAGE_SIZE));
367 
368 		if (size - bytes_copied > PAGE_SIZE - page_offset)
369 			/* Enough payload to fill up from this page. */
370 			to_copy = PAGE_SIZE - page_offset;
371 		else
372 			to_copy = size - bytes_copied;
373 
374 		if (is_iovec) {
375 			struct iovec *iov = (struct iovec *)src;
376 			int err;
377 
378 			/* The iovec will track bytes_copied internally. */
379 			err = memcpy_fromiovec((u8 *)va + page_offset,
380 					       iov, to_copy);
381 			if (err != 0) {
382 				kunmap(kernel_if->page[page_index]);
383 				return VMCI_ERROR_INVALID_ARGS;
384 			}
385 		} else {
386 			memcpy((u8 *)va + page_offset,
387 			       (u8 *)src + bytes_copied, to_copy);
388 		}
389 
390 		bytes_copied += to_copy;
391 		if (!kernel_if->mapped)
392 			kunmap(kernel_if->page[page_index]);
393 	}
394 
395 	return VMCI_SUCCESS;
396 }
397 
398 /*
399  * Copies to a given buffer or iovector from a VMCI Queue.  Uses
400  * kmap()/kunmap() to dynamically map/unmap required portions of the queue
401  * by traversing the offset -> page translation structure for the queue.
402  * Assumes that offset + size does not wrap around in the queue.
403  */
404 static int __qp_memcpy_from_queue(void *dest,
405 				  const struct vmci_queue *queue,
406 				  u64 queue_offset,
407 				  size_t size,
408 				  bool is_iovec)
409 {
410 	struct vmci_queue_kern_if *kernel_if = queue->kernel_if;
411 	size_t bytes_copied = 0;
412 
413 	while (bytes_copied < size) {
414 		u64 page_index = (queue_offset + bytes_copied) / PAGE_SIZE;
415 		size_t page_offset =
416 		    (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
417 		void *va;
418 		size_t to_copy;
419 
420 		if (!kernel_if->mapped)
421 			va = kmap(kernel_if->page[page_index]);
422 		else
423 			va = (void *)((u8 *)kernel_if->va +
424 				      (page_index * PAGE_SIZE));
425 
426 		if (size - bytes_copied > PAGE_SIZE - page_offset)
427 			/* Enough payload to fill up this page. */
428 			to_copy = PAGE_SIZE - page_offset;
429 		else
430 			to_copy = size - bytes_copied;
431 
432 		if (is_iovec) {
433 			struct iovec *iov = (struct iovec *)dest;
434 			int err;
435 
436 			/* The iovec will track bytes_copied internally. */
437 			err = memcpy_toiovec(iov, (u8 *)va + page_offset,
438 					     to_copy);
439 			if (err != 0) {
440 				kunmap(kernel_if->page[page_index]);
441 				return VMCI_ERROR_INVALID_ARGS;
442 			}
443 		} else {
444 			memcpy((u8 *)dest + bytes_copied,
445 			       (u8 *)va + page_offset, to_copy);
446 		}
447 
448 		bytes_copied += to_copy;
449 		if (!kernel_if->mapped)
450 			kunmap(kernel_if->page[page_index]);
451 	}
452 
453 	return VMCI_SUCCESS;
454 }
455 
456 /*
457  * Allocates two list of PPNs --- one for the pages in the produce queue,
458  * and the other for the pages in the consume queue. Intializes the list
459  * of PPNs with the page frame numbers of the KVA for the two queues (and
460  * the queue headers).
461  */
462 static int qp_alloc_ppn_set(void *prod_q,
463 			    u64 num_produce_pages,
464 			    void *cons_q,
465 			    u64 num_consume_pages, struct ppn_set *ppn_set)
466 {
467 	u32 *produce_ppns;
468 	u32 *consume_ppns;
469 	struct vmci_queue *produce_q = prod_q;
470 	struct vmci_queue *consume_q = cons_q;
471 	u64 i;
472 
473 	if (!produce_q || !num_produce_pages || !consume_q ||
474 	    !num_consume_pages || !ppn_set)
475 		return VMCI_ERROR_INVALID_ARGS;
476 
477 	if (ppn_set->initialized)
478 		return VMCI_ERROR_ALREADY_EXISTS;
479 
480 	produce_ppns =
481 	    kmalloc(num_produce_pages * sizeof(*produce_ppns), GFP_KERNEL);
482 	if (!produce_ppns)
483 		return VMCI_ERROR_NO_MEM;
484 
485 	consume_ppns =
486 	    kmalloc(num_consume_pages * sizeof(*consume_ppns), GFP_KERNEL);
487 	if (!consume_ppns) {
488 		kfree(produce_ppns);
489 		return VMCI_ERROR_NO_MEM;
490 	}
491 
492 	produce_ppns[0] = page_to_pfn(vmalloc_to_page(produce_q->q_header));
493 	for (i = 1; i < num_produce_pages; i++) {
494 		unsigned long pfn;
495 
496 		produce_ppns[i] =
497 		    page_to_pfn(produce_q->kernel_if->page[i - 1]);
498 		pfn = produce_ppns[i];
499 
500 		/* Fail allocation if PFN isn't supported by hypervisor. */
501 		if (sizeof(pfn) > sizeof(*produce_ppns)
502 		    && pfn != produce_ppns[i])
503 			goto ppn_error;
504 	}
505 
506 	consume_ppns[0] = page_to_pfn(vmalloc_to_page(consume_q->q_header));
507 	for (i = 1; i < num_consume_pages; i++) {
508 		unsigned long pfn;
509 
510 		consume_ppns[i] =
511 		    page_to_pfn(consume_q->kernel_if->page[i - 1]);
512 		pfn = consume_ppns[i];
513 
514 		/* Fail allocation if PFN isn't supported by hypervisor. */
515 		if (sizeof(pfn) > sizeof(*consume_ppns)
516 		    && pfn != consume_ppns[i])
517 			goto ppn_error;
518 	}
519 
520 	ppn_set->num_produce_pages = num_produce_pages;
521 	ppn_set->num_consume_pages = num_consume_pages;
522 	ppn_set->produce_ppns = produce_ppns;
523 	ppn_set->consume_ppns = consume_ppns;
524 	ppn_set->initialized = true;
525 	return VMCI_SUCCESS;
526 
527  ppn_error:
528 	kfree(produce_ppns);
529 	kfree(consume_ppns);
530 	return VMCI_ERROR_INVALID_ARGS;
531 }
532 
533 /*
534  * Frees the two list of PPNs for a queue pair.
535  */
536 static void qp_free_ppn_set(struct ppn_set *ppn_set)
537 {
538 	if (ppn_set->initialized) {
539 		/* Do not call these functions on NULL inputs. */
540 		kfree(ppn_set->produce_ppns);
541 		kfree(ppn_set->consume_ppns);
542 	}
543 	memset(ppn_set, 0, sizeof(*ppn_set));
544 }
545 
546 /*
547  * Populates the list of PPNs in the hypercall structure with the PPNS
548  * of the produce queue and the consume queue.
549  */
550 static int qp_populate_ppn_set(u8 *call_buf, const struct ppn_set *ppn_set)
551 {
552 	memcpy(call_buf, ppn_set->produce_ppns,
553 	       ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns));
554 	memcpy(call_buf +
555 	       ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns),
556 	       ppn_set->consume_ppns,
557 	       ppn_set->num_consume_pages * sizeof(*ppn_set->consume_ppns));
558 
559 	return VMCI_SUCCESS;
560 }
561 
562 static int qp_memcpy_to_queue(struct vmci_queue *queue,
563 			      u64 queue_offset,
564 			      const void *src, size_t src_offset, size_t size)
565 {
566 	return __qp_memcpy_to_queue(queue, queue_offset,
567 				    (u8 *)src + src_offset, size, false);
568 }
569 
570 static int qp_memcpy_from_queue(void *dest,
571 				size_t dest_offset,
572 				const struct vmci_queue *queue,
573 				u64 queue_offset, size_t size)
574 {
575 	return __qp_memcpy_from_queue((u8 *)dest + dest_offset,
576 				      queue, queue_offset, size, false);
577 }
578 
579 /*
580  * Copies from a given iovec from a VMCI Queue.
581  */
582 static int qp_memcpy_to_queue_iov(struct vmci_queue *queue,
583 				  u64 queue_offset,
584 				  const void *src,
585 				  size_t src_offset, size_t size)
586 {
587 
588 	/*
589 	 * We ignore src_offset because src is really a struct iovec * and will
590 	 * maintain offset internally.
591 	 */
592 	return __qp_memcpy_to_queue(queue, queue_offset, src, size, true);
593 }
594 
595 /*
596  * Copies to a given iovec from a VMCI Queue.
597  */
598 static int qp_memcpy_from_queue_iov(void *dest,
599 				    size_t dest_offset,
600 				    const struct vmci_queue *queue,
601 				    u64 queue_offset, size_t size)
602 {
603 	/*
604 	 * We ignore dest_offset because dest is really a struct iovec * and
605 	 * will maintain offset internally.
606 	 */
607 	return __qp_memcpy_from_queue(dest, queue, queue_offset, size, true);
608 }
609 
610 /*
611  * Allocates kernel VA space of specified size plus space for the queue
612  * and kernel interface.  This is different from the guest queue allocator,
613  * because we do not allocate our own queue header/data pages here but
614  * share those of the guest.
615  */
616 static struct vmci_queue *qp_host_alloc_queue(u64 size)
617 {
618 	struct vmci_queue *queue;
619 	const size_t num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1;
620 	const size_t queue_size = sizeof(*queue) + sizeof(*(queue->kernel_if));
621 	const size_t queue_page_size =
622 	    num_pages * sizeof(*queue->kernel_if->page);
623 
624 	queue = kzalloc(queue_size + queue_page_size, GFP_KERNEL);
625 	if (queue) {
626 		queue->q_header = NULL;
627 		queue->saved_header = NULL;
628 		queue->kernel_if =
629 		    (struct vmci_queue_kern_if *)((u8 *)queue +
630 						  sizeof(*queue));
631 		queue->kernel_if->host = true;
632 		queue->kernel_if->mutex = NULL;
633 		queue->kernel_if->num_pages = num_pages;
634 		queue->kernel_if->header_page =
635 		    (struct page **)((u8 *)queue + queue_size);
636 		queue->kernel_if->page = &queue->kernel_if->header_page[1];
637 		queue->kernel_if->va = NULL;
638 		queue->kernel_if->mapped = false;
639 	}
640 
641 	return queue;
642 }
643 
644 /*
645  * Frees kernel memory for a given queue (header plus translation
646  * structure).
647  */
648 static void qp_host_free_queue(struct vmci_queue *queue, u64 queue_size)
649 {
650 	kfree(queue);
651 }
652 
653 /*
654  * Initialize the mutex for the pair of queues.  This mutex is used to
655  * protect the q_header and the buffer from changing out from under any
656  * users of either queue.  Of course, it's only any good if the mutexes
657  * are actually acquired.  Queue structure must lie on non-paged memory
658  * or we cannot guarantee access to the mutex.
659  */
660 static void qp_init_queue_mutex(struct vmci_queue *produce_q,
661 				struct vmci_queue *consume_q)
662 {
663 	/*
664 	 * Only the host queue has shared state - the guest queues do not
665 	 * need to synchronize access using a queue mutex.
666 	 */
667 
668 	if (produce_q->kernel_if->host) {
669 		produce_q->kernel_if->mutex = &produce_q->kernel_if->__mutex;
670 		consume_q->kernel_if->mutex = &produce_q->kernel_if->__mutex;
671 		mutex_init(produce_q->kernel_if->mutex);
672 	}
673 }
674 
675 /*
676  * Cleans up the mutex for the pair of queues.
677  */
678 static void qp_cleanup_queue_mutex(struct vmci_queue *produce_q,
679 				   struct vmci_queue *consume_q)
680 {
681 	if (produce_q->kernel_if->host) {
682 		produce_q->kernel_if->mutex = NULL;
683 		consume_q->kernel_if->mutex = NULL;
684 	}
685 }
686 
687 /*
688  * Acquire the mutex for the queue.  Note that the produce_q and
689  * the consume_q share a mutex.  So, only one of the two need to
690  * be passed in to this routine.  Either will work just fine.
691  */
692 static void qp_acquire_queue_mutex(struct vmci_queue *queue)
693 {
694 	if (queue->kernel_if->host)
695 		mutex_lock(queue->kernel_if->mutex);
696 }
697 
698 /*
699  * Release the mutex for the queue.  Note that the produce_q and
700  * the consume_q share a mutex.  So, only one of the two need to
701  * be passed in to this routine.  Either will work just fine.
702  */
703 static void qp_release_queue_mutex(struct vmci_queue *queue)
704 {
705 	if (queue->kernel_if->host)
706 		mutex_unlock(queue->kernel_if->mutex);
707 }
708 
709 /*
710  * Helper function to release pages in the PageStoreAttachInfo
711  * previously obtained using get_user_pages.
712  */
713 static void qp_release_pages(struct page **pages,
714 			     u64 num_pages, bool dirty)
715 {
716 	int i;
717 
718 	for (i = 0; i < num_pages; i++) {
719 		if (dirty)
720 			set_page_dirty(pages[i]);
721 
722 		page_cache_release(pages[i]);
723 		pages[i] = NULL;
724 	}
725 }
726 
727 /*
728  * Lock the user pages referenced by the {produce,consume}Buffer
729  * struct into memory and populate the {produce,consume}Pages
730  * arrays in the attach structure with them.
731  */
732 static int qp_host_get_user_memory(u64 produce_uva,
733 				   u64 consume_uva,
734 				   struct vmci_queue *produce_q,
735 				   struct vmci_queue *consume_q)
736 {
737 	int retval;
738 	int err = VMCI_SUCCESS;
739 
740 	down_write(&current->mm->mmap_sem);
741 	retval = get_user_pages(current,
742 				current->mm,
743 				(uintptr_t) produce_uva,
744 				produce_q->kernel_if->num_pages,
745 				1, 0, produce_q->kernel_if->header_page, NULL);
746 	if (retval < produce_q->kernel_if->num_pages) {
747 		pr_warn("get_user_pages(produce) failed (retval=%d)", retval);
748 		qp_release_pages(produce_q->kernel_if->header_page, retval,
749 				 false);
750 		err = VMCI_ERROR_NO_MEM;
751 		goto out;
752 	}
753 
754 	retval = get_user_pages(current,
755 				current->mm,
756 				(uintptr_t) consume_uva,
757 				consume_q->kernel_if->num_pages,
758 				1, 0, consume_q->kernel_if->header_page, NULL);
759 	if (retval < consume_q->kernel_if->num_pages) {
760 		pr_warn("get_user_pages(consume) failed (retval=%d)", retval);
761 		qp_release_pages(consume_q->kernel_if->header_page, retval,
762 				 false);
763 		qp_release_pages(produce_q->kernel_if->header_page,
764 				 produce_q->kernel_if->num_pages, false);
765 		err = VMCI_ERROR_NO_MEM;
766 	}
767 
768  out:
769 	up_write(&current->mm->mmap_sem);
770 
771 	return err;
772 }
773 
774 /*
775  * Registers the specification of the user pages used for backing a queue
776  * pair. Enough information to map in pages is stored in the OS specific
777  * part of the struct vmci_queue structure.
778  */
779 static int qp_host_register_user_memory(struct vmci_qp_page_store *page_store,
780 					struct vmci_queue *produce_q,
781 					struct vmci_queue *consume_q)
782 {
783 	u64 produce_uva;
784 	u64 consume_uva;
785 
786 	/*
787 	 * The new style and the old style mapping only differs in
788 	 * that we either get a single or two UVAs, so we split the
789 	 * single UVA range at the appropriate spot.
790 	 */
791 	produce_uva = page_store->pages;
792 	consume_uva = page_store->pages +
793 	    produce_q->kernel_if->num_pages * PAGE_SIZE;
794 	return qp_host_get_user_memory(produce_uva, consume_uva, produce_q,
795 				       consume_q);
796 }
797 
798 /*
799  * Releases and removes the references to user pages stored in the attach
800  * struct.  Pages are released from the page cache and may become
801  * swappable again.
802  */
803 static void qp_host_unregister_user_memory(struct vmci_queue *produce_q,
804 					   struct vmci_queue *consume_q)
805 {
806 	qp_release_pages(produce_q->kernel_if->header_page,
807 			 produce_q->kernel_if->num_pages, true);
808 	memset(produce_q->kernel_if->header_page, 0,
809 	       sizeof(*produce_q->kernel_if->header_page) *
810 	       produce_q->kernel_if->num_pages);
811 	qp_release_pages(consume_q->kernel_if->header_page,
812 			 consume_q->kernel_if->num_pages, true);
813 	memset(consume_q->kernel_if->header_page, 0,
814 	       sizeof(*consume_q->kernel_if->header_page) *
815 	       consume_q->kernel_if->num_pages);
816 }
817 
818 /*
819  * Once qp_host_register_user_memory has been performed on a
820  * queue, the queue pair headers can be mapped into the
821  * kernel. Once mapped, they must be unmapped with
822  * qp_host_unmap_queues prior to calling
823  * qp_host_unregister_user_memory.
824  * Pages are pinned.
825  */
826 static int qp_host_map_queues(struct vmci_queue *produce_q,
827 			      struct vmci_queue *consume_q)
828 {
829 	int result;
830 
831 	if (!produce_q->q_header || !consume_q->q_header) {
832 		struct page *headers[2];
833 
834 		if (produce_q->q_header != consume_q->q_header)
835 			return VMCI_ERROR_QUEUEPAIR_MISMATCH;
836 
837 		if (produce_q->kernel_if->header_page == NULL ||
838 		    *produce_q->kernel_if->header_page == NULL)
839 			return VMCI_ERROR_UNAVAILABLE;
840 
841 		headers[0] = *produce_q->kernel_if->header_page;
842 		headers[1] = *consume_q->kernel_if->header_page;
843 
844 		produce_q->q_header = vmap(headers, 2, VM_MAP, PAGE_KERNEL);
845 		if (produce_q->q_header != NULL) {
846 			consume_q->q_header =
847 			    (struct vmci_queue_header *)((u8 *)
848 							 produce_q->q_header +
849 							 PAGE_SIZE);
850 			result = VMCI_SUCCESS;
851 		} else {
852 			pr_warn("vmap failed\n");
853 			result = VMCI_ERROR_NO_MEM;
854 		}
855 	} else {
856 		result = VMCI_SUCCESS;
857 	}
858 
859 	return result;
860 }
861 
862 /*
863  * Unmaps previously mapped queue pair headers from the kernel.
864  * Pages are unpinned.
865  */
866 static int qp_host_unmap_queues(u32 gid,
867 				struct vmci_queue *produce_q,
868 				struct vmci_queue *consume_q)
869 {
870 	if (produce_q->q_header) {
871 		if (produce_q->q_header < consume_q->q_header)
872 			vunmap(produce_q->q_header);
873 		else
874 			vunmap(consume_q->q_header);
875 
876 		produce_q->q_header = NULL;
877 		consume_q->q_header = NULL;
878 	}
879 
880 	return VMCI_SUCCESS;
881 }
882 
883 /*
884  * Finds the entry in the list corresponding to a given handle. Assumes
885  * that the list is locked.
886  */
887 static struct qp_entry *qp_list_find(struct qp_list *qp_list,
888 				     struct vmci_handle handle)
889 {
890 	struct qp_entry *entry;
891 
892 	if (vmci_handle_is_invalid(handle))
893 		return NULL;
894 
895 	list_for_each_entry(entry, &qp_list->head, list_item) {
896 		if (vmci_handle_is_equal(entry->handle, handle))
897 			return entry;
898 	}
899 
900 	return NULL;
901 }
902 
903 /*
904  * Finds the entry in the list corresponding to a given handle.
905  */
906 static struct qp_guest_endpoint *
907 qp_guest_handle_to_entry(struct vmci_handle handle)
908 {
909 	struct qp_guest_endpoint *entry;
910 	struct qp_entry *qp = qp_list_find(&qp_guest_endpoints, handle);
911 
912 	entry = qp ? container_of(
913 		qp, struct qp_guest_endpoint, qp) : NULL;
914 	return entry;
915 }
916 
917 /*
918  * Finds the entry in the list corresponding to a given handle.
919  */
920 static struct qp_broker_entry *
921 qp_broker_handle_to_entry(struct vmci_handle handle)
922 {
923 	struct qp_broker_entry *entry;
924 	struct qp_entry *qp = qp_list_find(&qp_broker_list, handle);
925 
926 	entry = qp ? container_of(
927 		qp, struct qp_broker_entry, qp) : NULL;
928 	return entry;
929 }
930 
931 /*
932  * Dispatches a queue pair event message directly into the local event
933  * queue.
934  */
935 static int qp_notify_peer_local(bool attach, struct vmci_handle handle)
936 {
937 	u32 context_id = vmci_get_context_id();
938 	struct vmci_event_qp ev;
939 
940 	ev.msg.hdr.dst = vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
941 	ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
942 					  VMCI_CONTEXT_RESOURCE_ID);
943 	ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
944 	ev.msg.event_data.event =
945 	    attach ? VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH;
946 	ev.payload.peer_id = context_id;
947 	ev.payload.handle = handle;
948 
949 	return vmci_event_dispatch(&ev.msg.hdr);
950 }
951 
952 /*
953  * Allocates and initializes a qp_guest_endpoint structure.
954  * Allocates a queue_pair rid (and handle) iff the given entry has
955  * an invalid handle.  0 through VMCI_RESERVED_RESOURCE_ID_MAX
956  * are reserved handles.  Assumes that the QP list mutex is held
957  * by the caller.
958  */
959 static struct qp_guest_endpoint *
960 qp_guest_endpoint_create(struct vmci_handle handle,
961 			 u32 peer,
962 			 u32 flags,
963 			 u64 produce_size,
964 			 u64 consume_size,
965 			 void *produce_q,
966 			 void *consume_q)
967 {
968 	int result;
969 	struct qp_guest_endpoint *entry;
970 	/* One page each for the queue headers. */
971 	const u64 num_ppns = DIV_ROUND_UP(produce_size, PAGE_SIZE) +
972 	    DIV_ROUND_UP(consume_size, PAGE_SIZE) + 2;
973 
974 	if (vmci_handle_is_invalid(handle)) {
975 		u32 context_id = vmci_get_context_id();
976 
977 		handle = vmci_make_handle(context_id, VMCI_INVALID_ID);
978 	}
979 
980 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
981 	if (entry) {
982 		entry->qp.peer = peer;
983 		entry->qp.flags = flags;
984 		entry->qp.produce_size = produce_size;
985 		entry->qp.consume_size = consume_size;
986 		entry->qp.ref_count = 0;
987 		entry->num_ppns = num_ppns;
988 		entry->produce_q = produce_q;
989 		entry->consume_q = consume_q;
990 		INIT_LIST_HEAD(&entry->qp.list_item);
991 
992 		/* Add resource obj */
993 		result = vmci_resource_add(&entry->resource,
994 					   VMCI_RESOURCE_TYPE_QPAIR_GUEST,
995 					   handle);
996 		entry->qp.handle = vmci_resource_handle(&entry->resource);
997 		if ((result != VMCI_SUCCESS) ||
998 		    qp_list_find(&qp_guest_endpoints, entry->qp.handle)) {
999 			pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d",
1000 				handle.context, handle.resource, result);
1001 			kfree(entry);
1002 			entry = NULL;
1003 		}
1004 	}
1005 	return entry;
1006 }
1007 
1008 /*
1009  * Frees a qp_guest_endpoint structure.
1010  */
1011 static void qp_guest_endpoint_destroy(struct qp_guest_endpoint *entry)
1012 {
1013 	qp_free_ppn_set(&entry->ppn_set);
1014 	qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q);
1015 	qp_free_queue(entry->produce_q, entry->qp.produce_size);
1016 	qp_free_queue(entry->consume_q, entry->qp.consume_size);
1017 	/* Unlink from resource hash table and free callback */
1018 	vmci_resource_remove(&entry->resource);
1019 
1020 	kfree(entry);
1021 }
1022 
1023 /*
1024  * Helper to make a queue_pairAlloc hypercall when the driver is
1025  * supporting a guest device.
1026  */
1027 static int qp_alloc_hypercall(const struct qp_guest_endpoint *entry)
1028 {
1029 	struct vmci_qp_alloc_msg *alloc_msg;
1030 	size_t msg_size;
1031 	int result;
1032 
1033 	if (!entry || entry->num_ppns <= 2)
1034 		return VMCI_ERROR_INVALID_ARGS;
1035 
1036 	msg_size = sizeof(*alloc_msg) +
1037 	    (size_t) entry->num_ppns * sizeof(u32);
1038 	alloc_msg = kmalloc(msg_size, GFP_KERNEL);
1039 	if (!alloc_msg)
1040 		return VMCI_ERROR_NO_MEM;
1041 
1042 	alloc_msg->hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
1043 					      VMCI_QUEUEPAIR_ALLOC);
1044 	alloc_msg->hdr.src = VMCI_ANON_SRC_HANDLE;
1045 	alloc_msg->hdr.payload_size = msg_size - VMCI_DG_HEADERSIZE;
1046 	alloc_msg->handle = entry->qp.handle;
1047 	alloc_msg->peer = entry->qp.peer;
1048 	alloc_msg->flags = entry->qp.flags;
1049 	alloc_msg->produce_size = entry->qp.produce_size;
1050 	alloc_msg->consume_size = entry->qp.consume_size;
1051 	alloc_msg->num_ppns = entry->num_ppns;
1052 
1053 	result = qp_populate_ppn_set((u8 *)alloc_msg + sizeof(*alloc_msg),
1054 				     &entry->ppn_set);
1055 	if (result == VMCI_SUCCESS)
1056 		result = vmci_send_datagram(&alloc_msg->hdr);
1057 
1058 	kfree(alloc_msg);
1059 
1060 	return result;
1061 }
1062 
1063 /*
1064  * Helper to make a queue_pairDetach hypercall when the driver is
1065  * supporting a guest device.
1066  */
1067 static int qp_detatch_hypercall(struct vmci_handle handle)
1068 {
1069 	struct vmci_qp_detach_msg detach_msg;
1070 
1071 	detach_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
1072 					      VMCI_QUEUEPAIR_DETACH);
1073 	detach_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
1074 	detach_msg.hdr.payload_size = sizeof(handle);
1075 	detach_msg.handle = handle;
1076 
1077 	return vmci_send_datagram(&detach_msg.hdr);
1078 }
1079 
1080 /*
1081  * Adds the given entry to the list. Assumes that the list is locked.
1082  */
1083 static void qp_list_add_entry(struct qp_list *qp_list, struct qp_entry *entry)
1084 {
1085 	if (entry)
1086 		list_add(&entry->list_item, &qp_list->head);
1087 }
1088 
1089 /*
1090  * Removes the given entry from the list. Assumes that the list is locked.
1091  */
1092 static void qp_list_remove_entry(struct qp_list *qp_list,
1093 				 struct qp_entry *entry)
1094 {
1095 	if (entry)
1096 		list_del(&entry->list_item);
1097 }
1098 
1099 /*
1100  * Helper for VMCI queue_pair detach interface. Frees the physical
1101  * pages for the queue pair.
1102  */
1103 static int qp_detatch_guest_work(struct vmci_handle handle)
1104 {
1105 	int result;
1106 	struct qp_guest_endpoint *entry;
1107 	u32 ref_count = ~0;	/* To avoid compiler warning below */
1108 
1109 	mutex_lock(&qp_guest_endpoints.mutex);
1110 
1111 	entry = qp_guest_handle_to_entry(handle);
1112 	if (!entry) {
1113 		mutex_unlock(&qp_guest_endpoints.mutex);
1114 		return VMCI_ERROR_NOT_FOUND;
1115 	}
1116 
1117 	if (entry->qp.flags & VMCI_QPFLAG_LOCAL) {
1118 		result = VMCI_SUCCESS;
1119 
1120 		if (entry->qp.ref_count > 1) {
1121 			result = qp_notify_peer_local(false, handle);
1122 			/*
1123 			 * We can fail to notify a local queuepair
1124 			 * because we can't allocate.  We still want
1125 			 * to release the entry if that happens, so
1126 			 * don't bail out yet.
1127 			 */
1128 		}
1129 	} else {
1130 		result = qp_detatch_hypercall(handle);
1131 		if (result < VMCI_SUCCESS) {
1132 			/*
1133 			 * We failed to notify a non-local queuepair.
1134 			 * That other queuepair might still be
1135 			 * accessing the shared memory, so don't
1136 			 * release the entry yet.  It will get cleaned
1137 			 * up by VMCIqueue_pair_Exit() if necessary
1138 			 * (assuming we are going away, otherwise why
1139 			 * did this fail?).
1140 			 */
1141 
1142 			mutex_unlock(&qp_guest_endpoints.mutex);
1143 			return result;
1144 		}
1145 	}
1146 
1147 	/*
1148 	 * If we get here then we either failed to notify a local queuepair, or
1149 	 * we succeeded in all cases.  Release the entry if required.
1150 	 */
1151 
1152 	entry->qp.ref_count--;
1153 	if (entry->qp.ref_count == 0)
1154 		qp_list_remove_entry(&qp_guest_endpoints, &entry->qp);
1155 
1156 	/* If we didn't remove the entry, this could change once we unlock. */
1157 	if (entry)
1158 		ref_count = entry->qp.ref_count;
1159 
1160 	mutex_unlock(&qp_guest_endpoints.mutex);
1161 
1162 	if (ref_count == 0)
1163 		qp_guest_endpoint_destroy(entry);
1164 
1165 	return result;
1166 }
1167 
1168 /*
1169  * This functions handles the actual allocation of a VMCI queue
1170  * pair guest endpoint. Allocates physical pages for the queue
1171  * pair. It makes OS dependent calls through generic wrappers.
1172  */
1173 static int qp_alloc_guest_work(struct vmci_handle *handle,
1174 			       struct vmci_queue **produce_q,
1175 			       u64 produce_size,
1176 			       struct vmci_queue **consume_q,
1177 			       u64 consume_size,
1178 			       u32 peer,
1179 			       u32 flags,
1180 			       u32 priv_flags)
1181 {
1182 	const u64 num_produce_pages =
1183 	    DIV_ROUND_UP(produce_size, PAGE_SIZE) + 1;
1184 	const u64 num_consume_pages =
1185 	    DIV_ROUND_UP(consume_size, PAGE_SIZE) + 1;
1186 	void *my_produce_q = NULL;
1187 	void *my_consume_q = NULL;
1188 	int result;
1189 	struct qp_guest_endpoint *queue_pair_entry = NULL;
1190 
1191 	if (priv_flags != VMCI_NO_PRIVILEGE_FLAGS)
1192 		return VMCI_ERROR_NO_ACCESS;
1193 
1194 	mutex_lock(&qp_guest_endpoints.mutex);
1195 
1196 	queue_pair_entry = qp_guest_handle_to_entry(*handle);
1197 	if (queue_pair_entry) {
1198 		if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) {
1199 			/* Local attach case. */
1200 			if (queue_pair_entry->qp.ref_count > 1) {
1201 				pr_devel("Error attempting to attach more than once\n");
1202 				result = VMCI_ERROR_UNAVAILABLE;
1203 				goto error_keep_entry;
1204 			}
1205 
1206 			if (queue_pair_entry->qp.produce_size != consume_size ||
1207 			    queue_pair_entry->qp.consume_size !=
1208 			    produce_size ||
1209 			    queue_pair_entry->qp.flags !=
1210 			    (flags & ~VMCI_QPFLAG_ATTACH_ONLY)) {
1211 				pr_devel("Error mismatched queue pair in local attach\n");
1212 				result = VMCI_ERROR_QUEUEPAIR_MISMATCH;
1213 				goto error_keep_entry;
1214 			}
1215 
1216 			/*
1217 			 * Do a local attach.  We swap the consume and
1218 			 * produce queues for the attacher and deliver
1219 			 * an attach event.
1220 			 */
1221 			result = qp_notify_peer_local(true, *handle);
1222 			if (result < VMCI_SUCCESS)
1223 				goto error_keep_entry;
1224 
1225 			my_produce_q = queue_pair_entry->consume_q;
1226 			my_consume_q = queue_pair_entry->produce_q;
1227 			goto out;
1228 		}
1229 
1230 		result = VMCI_ERROR_ALREADY_EXISTS;
1231 		goto error_keep_entry;
1232 	}
1233 
1234 	my_produce_q = qp_alloc_queue(produce_size, flags);
1235 	if (!my_produce_q) {
1236 		pr_warn("Error allocating pages for produce queue\n");
1237 		result = VMCI_ERROR_NO_MEM;
1238 		goto error;
1239 	}
1240 
1241 	my_consume_q = qp_alloc_queue(consume_size, flags);
1242 	if (!my_consume_q) {
1243 		pr_warn("Error allocating pages for consume queue\n");
1244 		result = VMCI_ERROR_NO_MEM;
1245 		goto error;
1246 	}
1247 
1248 	queue_pair_entry = qp_guest_endpoint_create(*handle, peer, flags,
1249 						    produce_size, consume_size,
1250 						    my_produce_q, my_consume_q);
1251 	if (!queue_pair_entry) {
1252 		pr_warn("Error allocating memory in %s\n", __func__);
1253 		result = VMCI_ERROR_NO_MEM;
1254 		goto error;
1255 	}
1256 
1257 	result = qp_alloc_ppn_set(my_produce_q, num_produce_pages, my_consume_q,
1258 				  num_consume_pages,
1259 				  &queue_pair_entry->ppn_set);
1260 	if (result < VMCI_SUCCESS) {
1261 		pr_warn("qp_alloc_ppn_set failed\n");
1262 		goto error;
1263 	}
1264 
1265 	/*
1266 	 * It's only necessary to notify the host if this queue pair will be
1267 	 * attached to from another context.
1268 	 */
1269 	if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) {
1270 		/* Local create case. */
1271 		u32 context_id = vmci_get_context_id();
1272 
1273 		/*
1274 		 * Enforce similar checks on local queue pairs as we
1275 		 * do for regular ones.  The handle's context must
1276 		 * match the creator or attacher context id (here they
1277 		 * are both the current context id) and the
1278 		 * attach-only flag cannot exist during create.  We
1279 		 * also ensure specified peer is this context or an
1280 		 * invalid one.
1281 		 */
1282 		if (queue_pair_entry->qp.handle.context != context_id ||
1283 		    (queue_pair_entry->qp.peer != VMCI_INVALID_ID &&
1284 		     queue_pair_entry->qp.peer != context_id)) {
1285 			result = VMCI_ERROR_NO_ACCESS;
1286 			goto error;
1287 		}
1288 
1289 		if (queue_pair_entry->qp.flags & VMCI_QPFLAG_ATTACH_ONLY) {
1290 			result = VMCI_ERROR_NOT_FOUND;
1291 			goto error;
1292 		}
1293 	} else {
1294 		result = qp_alloc_hypercall(queue_pair_entry);
1295 		if (result < VMCI_SUCCESS) {
1296 			pr_warn("qp_alloc_hypercall result = %d\n", result);
1297 			goto error;
1298 		}
1299 	}
1300 
1301 	qp_init_queue_mutex((struct vmci_queue *)my_produce_q,
1302 			    (struct vmci_queue *)my_consume_q);
1303 
1304 	qp_list_add_entry(&qp_guest_endpoints, &queue_pair_entry->qp);
1305 
1306  out:
1307 	queue_pair_entry->qp.ref_count++;
1308 	*handle = queue_pair_entry->qp.handle;
1309 	*produce_q = (struct vmci_queue *)my_produce_q;
1310 	*consume_q = (struct vmci_queue *)my_consume_q;
1311 
1312 	/*
1313 	 * We should initialize the queue pair header pages on a local
1314 	 * queue pair create.  For non-local queue pairs, the
1315 	 * hypervisor initializes the header pages in the create step.
1316 	 */
1317 	if ((queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) &&
1318 	    queue_pair_entry->qp.ref_count == 1) {
1319 		vmci_q_header_init((*produce_q)->q_header, *handle);
1320 		vmci_q_header_init((*consume_q)->q_header, *handle);
1321 	}
1322 
1323 	mutex_unlock(&qp_guest_endpoints.mutex);
1324 
1325 	return VMCI_SUCCESS;
1326 
1327  error:
1328 	mutex_unlock(&qp_guest_endpoints.mutex);
1329 	if (queue_pair_entry) {
1330 		/* The queues will be freed inside the destroy routine. */
1331 		qp_guest_endpoint_destroy(queue_pair_entry);
1332 	} else {
1333 		qp_free_queue(my_produce_q, produce_size);
1334 		qp_free_queue(my_consume_q, consume_size);
1335 	}
1336 	return result;
1337 
1338  error_keep_entry:
1339 	/* This path should only be used when an existing entry was found. */
1340 	mutex_unlock(&qp_guest_endpoints.mutex);
1341 	return result;
1342 }
1343 
1344 /*
1345  * The first endpoint issuing a queue pair allocation will create the state
1346  * of the queue pair in the queue pair broker.
1347  *
1348  * If the creator is a guest, it will associate a VMX virtual address range
1349  * with the queue pair as specified by the page_store. For compatibility with
1350  * older VMX'en, that would use a separate step to set the VMX virtual
1351  * address range, the virtual address range can be registered later using
1352  * vmci_qp_broker_set_page_store. In that case, a page_store of NULL should be
1353  * used.
1354  *
1355  * If the creator is the host, a page_store of NULL should be used as well,
1356  * since the host is not able to supply a page store for the queue pair.
1357  *
1358  * For older VMX and host callers, the queue pair will be created in the
1359  * VMCIQPB_CREATED_NO_MEM state, and for current VMX callers, it will be
1360  * created in VMCOQPB_CREATED_MEM state.
1361  */
1362 static int qp_broker_create(struct vmci_handle handle,
1363 			    u32 peer,
1364 			    u32 flags,
1365 			    u32 priv_flags,
1366 			    u64 produce_size,
1367 			    u64 consume_size,
1368 			    struct vmci_qp_page_store *page_store,
1369 			    struct vmci_ctx *context,
1370 			    vmci_event_release_cb wakeup_cb,
1371 			    void *client_data, struct qp_broker_entry **ent)
1372 {
1373 	struct qp_broker_entry *entry = NULL;
1374 	const u32 context_id = vmci_ctx_get_id(context);
1375 	bool is_local = flags & VMCI_QPFLAG_LOCAL;
1376 	int result;
1377 	u64 guest_produce_size;
1378 	u64 guest_consume_size;
1379 
1380 	/* Do not create if the caller asked not to. */
1381 	if (flags & VMCI_QPFLAG_ATTACH_ONLY)
1382 		return VMCI_ERROR_NOT_FOUND;
1383 
1384 	/*
1385 	 * Creator's context ID should match handle's context ID or the creator
1386 	 * must allow the context in handle's context ID as the "peer".
1387 	 */
1388 	if (handle.context != context_id && handle.context != peer)
1389 		return VMCI_ERROR_NO_ACCESS;
1390 
1391 	if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(peer))
1392 		return VMCI_ERROR_DST_UNREACHABLE;
1393 
1394 	/*
1395 	 * Creator's context ID for local queue pairs should match the
1396 	 * peer, if a peer is specified.
1397 	 */
1398 	if (is_local && peer != VMCI_INVALID_ID && context_id != peer)
1399 		return VMCI_ERROR_NO_ACCESS;
1400 
1401 	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
1402 	if (!entry)
1403 		return VMCI_ERROR_NO_MEM;
1404 
1405 	if (vmci_ctx_get_id(context) == VMCI_HOST_CONTEXT_ID && !is_local) {
1406 		/*
1407 		 * The queue pair broker entry stores values from the guest
1408 		 * point of view, so a creating host side endpoint should swap
1409 		 * produce and consume values -- unless it is a local queue
1410 		 * pair, in which case no swapping is necessary, since the local
1411 		 * attacher will swap queues.
1412 		 */
1413 
1414 		guest_produce_size = consume_size;
1415 		guest_consume_size = produce_size;
1416 	} else {
1417 		guest_produce_size = produce_size;
1418 		guest_consume_size = consume_size;
1419 	}
1420 
1421 	entry->qp.handle = handle;
1422 	entry->qp.peer = peer;
1423 	entry->qp.flags = flags;
1424 	entry->qp.produce_size = guest_produce_size;
1425 	entry->qp.consume_size = guest_consume_size;
1426 	entry->qp.ref_count = 1;
1427 	entry->create_id = context_id;
1428 	entry->attach_id = VMCI_INVALID_ID;
1429 	entry->state = VMCIQPB_NEW;
1430 	entry->require_trusted_attach =
1431 	    !!(context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED);
1432 	entry->created_by_trusted =
1433 	    !!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED);
1434 	entry->vmci_page_files = false;
1435 	entry->wakeup_cb = wakeup_cb;
1436 	entry->client_data = client_data;
1437 	entry->produce_q = qp_host_alloc_queue(guest_produce_size);
1438 	if (entry->produce_q == NULL) {
1439 		result = VMCI_ERROR_NO_MEM;
1440 		goto error;
1441 	}
1442 	entry->consume_q = qp_host_alloc_queue(guest_consume_size);
1443 	if (entry->consume_q == NULL) {
1444 		result = VMCI_ERROR_NO_MEM;
1445 		goto error;
1446 	}
1447 
1448 	qp_init_queue_mutex(entry->produce_q, entry->consume_q);
1449 
1450 	INIT_LIST_HEAD(&entry->qp.list_item);
1451 
1452 	if (is_local) {
1453 		u8 *tmp;
1454 
1455 		entry->local_mem = kcalloc(QPE_NUM_PAGES(entry->qp),
1456 					   PAGE_SIZE, GFP_KERNEL);
1457 		if (entry->local_mem == NULL) {
1458 			result = VMCI_ERROR_NO_MEM;
1459 			goto error;
1460 		}
1461 		entry->state = VMCIQPB_CREATED_MEM;
1462 		entry->produce_q->q_header = entry->local_mem;
1463 		tmp = (u8 *)entry->local_mem + PAGE_SIZE *
1464 		    (DIV_ROUND_UP(entry->qp.produce_size, PAGE_SIZE) + 1);
1465 		entry->consume_q->q_header = (struct vmci_queue_header *)tmp;
1466 	} else if (page_store) {
1467 		/*
1468 		 * The VMX already initialized the queue pair headers, so no
1469 		 * need for the kernel side to do that.
1470 		 */
1471 		result = qp_host_register_user_memory(page_store,
1472 						      entry->produce_q,
1473 						      entry->consume_q);
1474 		if (result < VMCI_SUCCESS)
1475 			goto error;
1476 
1477 		entry->state = VMCIQPB_CREATED_MEM;
1478 	} else {
1479 		/*
1480 		 * A create without a page_store may be either a host
1481 		 * side create (in which case we are waiting for the
1482 		 * guest side to supply the memory) or an old style
1483 		 * queue pair create (in which case we will expect a
1484 		 * set page store call as the next step).
1485 		 */
1486 		entry->state = VMCIQPB_CREATED_NO_MEM;
1487 	}
1488 
1489 	qp_list_add_entry(&qp_broker_list, &entry->qp);
1490 	if (ent != NULL)
1491 		*ent = entry;
1492 
1493 	/* Add to resource obj */
1494 	result = vmci_resource_add(&entry->resource,
1495 				   VMCI_RESOURCE_TYPE_QPAIR_HOST,
1496 				   handle);
1497 	if (result != VMCI_SUCCESS) {
1498 		pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d",
1499 			handle.context, handle.resource, result);
1500 		goto error;
1501 	}
1502 
1503 	entry->qp.handle = vmci_resource_handle(&entry->resource);
1504 	if (is_local) {
1505 		vmci_q_header_init(entry->produce_q->q_header,
1506 				   entry->qp.handle);
1507 		vmci_q_header_init(entry->consume_q->q_header,
1508 				   entry->qp.handle);
1509 	}
1510 
1511 	vmci_ctx_qp_create(context, entry->qp.handle);
1512 
1513 	return VMCI_SUCCESS;
1514 
1515  error:
1516 	if (entry != NULL) {
1517 		qp_host_free_queue(entry->produce_q, guest_produce_size);
1518 		qp_host_free_queue(entry->consume_q, guest_consume_size);
1519 		kfree(entry);
1520 	}
1521 
1522 	return result;
1523 }
1524 
1525 /*
1526  * Enqueues an event datagram to notify the peer VM attached to
1527  * the given queue pair handle about attach/detach event by the
1528  * given VM.  Returns Payload size of datagram enqueued on
1529  * success, error code otherwise.
1530  */
1531 static int qp_notify_peer(bool attach,
1532 			  struct vmci_handle handle,
1533 			  u32 my_id,
1534 			  u32 peer_id)
1535 {
1536 	int rv;
1537 	struct vmci_event_qp ev;
1538 
1539 	if (vmci_handle_is_invalid(handle) || my_id == VMCI_INVALID_ID ||
1540 	    peer_id == VMCI_INVALID_ID)
1541 		return VMCI_ERROR_INVALID_ARGS;
1542 
1543 	/*
1544 	 * In vmci_ctx_enqueue_datagram() we enforce the upper limit on
1545 	 * number of pending events from the hypervisor to a given VM
1546 	 * otherwise a rogue VM could do an arbitrary number of attach
1547 	 * and detach operations causing memory pressure in the host
1548 	 * kernel.
1549 	 */
1550 
1551 	ev.msg.hdr.dst = vmci_make_handle(peer_id, VMCI_EVENT_HANDLER);
1552 	ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
1553 					  VMCI_CONTEXT_RESOURCE_ID);
1554 	ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
1555 	ev.msg.event_data.event = attach ?
1556 	    VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH;
1557 	ev.payload.handle = handle;
1558 	ev.payload.peer_id = my_id;
1559 
1560 	rv = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID,
1561 				    &ev.msg.hdr, false);
1562 	if (rv < VMCI_SUCCESS)
1563 		pr_warn("Failed to enqueue queue_pair %s event datagram for context (ID=0x%x)\n",
1564 			attach ? "ATTACH" : "DETACH", peer_id);
1565 
1566 	return rv;
1567 }
1568 
1569 /*
1570  * The second endpoint issuing a queue pair allocation will attach to
1571  * the queue pair registered with the queue pair broker.
1572  *
1573  * If the attacher is a guest, it will associate a VMX virtual address
1574  * range with the queue pair as specified by the page_store. At this
1575  * point, the already attach host endpoint may start using the queue
1576  * pair, and an attach event is sent to it. For compatibility with
1577  * older VMX'en, that used a separate step to set the VMX virtual
1578  * address range, the virtual address range can be registered later
1579  * using vmci_qp_broker_set_page_store. In that case, a page_store of
1580  * NULL should be used, and the attach event will be generated once
1581  * the actual page store has been set.
1582  *
1583  * If the attacher is the host, a page_store of NULL should be used as
1584  * well, since the page store information is already set by the guest.
1585  *
1586  * For new VMX and host callers, the queue pair will be moved to the
1587  * VMCIQPB_ATTACHED_MEM state, and for older VMX callers, it will be
1588  * moved to the VMCOQPB_ATTACHED_NO_MEM state.
1589  */
1590 static int qp_broker_attach(struct qp_broker_entry *entry,
1591 			    u32 peer,
1592 			    u32 flags,
1593 			    u32 priv_flags,
1594 			    u64 produce_size,
1595 			    u64 consume_size,
1596 			    struct vmci_qp_page_store *page_store,
1597 			    struct vmci_ctx *context,
1598 			    vmci_event_release_cb wakeup_cb,
1599 			    void *client_data,
1600 			    struct qp_broker_entry **ent)
1601 {
1602 	const u32 context_id = vmci_ctx_get_id(context);
1603 	bool is_local = flags & VMCI_QPFLAG_LOCAL;
1604 	int result;
1605 
1606 	if (entry->state != VMCIQPB_CREATED_NO_MEM &&
1607 	    entry->state != VMCIQPB_CREATED_MEM)
1608 		return VMCI_ERROR_UNAVAILABLE;
1609 
1610 	if (is_local) {
1611 		if (!(entry->qp.flags & VMCI_QPFLAG_LOCAL) ||
1612 		    context_id != entry->create_id) {
1613 			return VMCI_ERROR_INVALID_ARGS;
1614 		}
1615 	} else if (context_id == entry->create_id ||
1616 		   context_id == entry->attach_id) {
1617 		return VMCI_ERROR_ALREADY_EXISTS;
1618 	}
1619 
1620 	if (VMCI_CONTEXT_IS_VM(context_id) &&
1621 	    VMCI_CONTEXT_IS_VM(entry->create_id))
1622 		return VMCI_ERROR_DST_UNREACHABLE;
1623 
1624 	/*
1625 	 * If we are attaching from a restricted context then the queuepair
1626 	 * must have been created by a trusted endpoint.
1627 	 */
1628 	if ((context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) &&
1629 	    !entry->created_by_trusted)
1630 		return VMCI_ERROR_NO_ACCESS;
1631 
1632 	/*
1633 	 * If we are attaching to a queuepair that was created by a restricted
1634 	 * context then we must be trusted.
1635 	 */
1636 	if (entry->require_trusted_attach &&
1637 	    (!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED)))
1638 		return VMCI_ERROR_NO_ACCESS;
1639 
1640 	/*
1641 	 * If the creator specifies VMCI_INVALID_ID in "peer" field, access
1642 	 * control check is not performed.
1643 	 */
1644 	if (entry->qp.peer != VMCI_INVALID_ID && entry->qp.peer != context_id)
1645 		return VMCI_ERROR_NO_ACCESS;
1646 
1647 	if (entry->create_id == VMCI_HOST_CONTEXT_ID) {
1648 		/*
1649 		 * Do not attach if the caller doesn't support Host Queue Pairs
1650 		 * and a host created this queue pair.
1651 		 */
1652 
1653 		if (!vmci_ctx_supports_host_qp(context))
1654 			return VMCI_ERROR_INVALID_RESOURCE;
1655 
1656 	} else if (context_id == VMCI_HOST_CONTEXT_ID) {
1657 		struct vmci_ctx *create_context;
1658 		bool supports_host_qp;
1659 
1660 		/*
1661 		 * Do not attach a host to a user created queue pair if that
1662 		 * user doesn't support host queue pair end points.
1663 		 */
1664 
1665 		create_context = vmci_ctx_get(entry->create_id);
1666 		supports_host_qp = vmci_ctx_supports_host_qp(create_context);
1667 		vmci_ctx_put(create_context);
1668 
1669 		if (!supports_host_qp)
1670 			return VMCI_ERROR_INVALID_RESOURCE;
1671 	}
1672 
1673 	if ((entry->qp.flags & ~VMCI_QP_ASYMM) != (flags & ~VMCI_QP_ASYMM_PEER))
1674 		return VMCI_ERROR_QUEUEPAIR_MISMATCH;
1675 
1676 	if (context_id != VMCI_HOST_CONTEXT_ID) {
1677 		/*
1678 		 * The queue pair broker entry stores values from the guest
1679 		 * point of view, so an attaching guest should match the values
1680 		 * stored in the entry.
1681 		 */
1682 
1683 		if (entry->qp.produce_size != produce_size ||
1684 		    entry->qp.consume_size != consume_size) {
1685 			return VMCI_ERROR_QUEUEPAIR_MISMATCH;
1686 		}
1687 	} else if (entry->qp.produce_size != consume_size ||
1688 		   entry->qp.consume_size != produce_size) {
1689 		return VMCI_ERROR_QUEUEPAIR_MISMATCH;
1690 	}
1691 
1692 	if (context_id != VMCI_HOST_CONTEXT_ID) {
1693 		/*
1694 		 * If a guest attached to a queue pair, it will supply
1695 		 * the backing memory.  If this is a pre NOVMVM vmx,
1696 		 * the backing memory will be supplied by calling
1697 		 * vmci_qp_broker_set_page_store() following the
1698 		 * return of the vmci_qp_broker_alloc() call. If it is
1699 		 * a vmx of version NOVMVM or later, the page store
1700 		 * must be supplied as part of the
1701 		 * vmci_qp_broker_alloc call.  Under all circumstances
1702 		 * must the initially created queue pair not have any
1703 		 * memory associated with it already.
1704 		 */
1705 
1706 		if (entry->state != VMCIQPB_CREATED_NO_MEM)
1707 			return VMCI_ERROR_INVALID_ARGS;
1708 
1709 		if (page_store != NULL) {
1710 			/*
1711 			 * Patch up host state to point to guest
1712 			 * supplied memory. The VMX already
1713 			 * initialized the queue pair headers, so no
1714 			 * need for the kernel side to do that.
1715 			 */
1716 
1717 			result = qp_host_register_user_memory(page_store,
1718 							      entry->produce_q,
1719 							      entry->consume_q);
1720 			if (result < VMCI_SUCCESS)
1721 				return result;
1722 
1723 			/*
1724 			 * Preemptively load in the headers if non-blocking to
1725 			 * prevent blocking later.
1726 			 */
1727 			if (entry->qp.flags & VMCI_QPFLAG_NONBLOCK) {
1728 				result = qp_host_map_queues(entry->produce_q,
1729 							    entry->consume_q);
1730 				if (result < VMCI_SUCCESS) {
1731 					qp_host_unregister_user_memory(
1732 						entry->produce_q,
1733 						entry->consume_q);
1734 					return result;
1735 				}
1736 			}
1737 
1738 			entry->state = VMCIQPB_ATTACHED_MEM;
1739 		} else {
1740 			entry->state = VMCIQPB_ATTACHED_NO_MEM;
1741 		}
1742 	} else if (entry->state == VMCIQPB_CREATED_NO_MEM) {
1743 		/*
1744 		 * The host side is attempting to attach to a queue
1745 		 * pair that doesn't have any memory associated with
1746 		 * it. This must be a pre NOVMVM vmx that hasn't set
1747 		 * the page store information yet, or a quiesced VM.
1748 		 */
1749 
1750 		return VMCI_ERROR_UNAVAILABLE;
1751 	} else {
1752 		/*
1753 		 * For non-blocking queue pairs, we cannot rely on
1754 		 * enqueue/dequeue to map in the pages on the
1755 		 * host-side, since it may block, so we make an
1756 		 * attempt here.
1757 		 */
1758 
1759 		if (flags & VMCI_QPFLAG_NONBLOCK) {
1760 			result =
1761 			    qp_host_map_queues(entry->produce_q,
1762 					       entry->consume_q);
1763 			if (result < VMCI_SUCCESS)
1764 				return result;
1765 
1766 			entry->qp.flags |= flags &
1767 			    (VMCI_QPFLAG_NONBLOCK | VMCI_QPFLAG_PINNED);
1768 		}
1769 
1770 		/* The host side has successfully attached to a queue pair. */
1771 		entry->state = VMCIQPB_ATTACHED_MEM;
1772 	}
1773 
1774 	if (entry->state == VMCIQPB_ATTACHED_MEM) {
1775 		result =
1776 		    qp_notify_peer(true, entry->qp.handle, context_id,
1777 				   entry->create_id);
1778 		if (result < VMCI_SUCCESS)
1779 			pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n",
1780 				entry->create_id, entry->qp.handle.context,
1781 				entry->qp.handle.resource);
1782 	}
1783 
1784 	entry->attach_id = context_id;
1785 	entry->qp.ref_count++;
1786 	if (wakeup_cb) {
1787 		entry->wakeup_cb = wakeup_cb;
1788 		entry->client_data = client_data;
1789 	}
1790 
1791 	/*
1792 	 * When attaching to local queue pairs, the context already has
1793 	 * an entry tracking the queue pair, so don't add another one.
1794 	 */
1795 	if (!is_local)
1796 		vmci_ctx_qp_create(context, entry->qp.handle);
1797 
1798 	if (ent != NULL)
1799 		*ent = entry;
1800 
1801 	return VMCI_SUCCESS;
1802 }
1803 
1804 /*
1805  * queue_pair_Alloc for use when setting up queue pair endpoints
1806  * on the host.
1807  */
1808 static int qp_broker_alloc(struct vmci_handle handle,
1809 			   u32 peer,
1810 			   u32 flags,
1811 			   u32 priv_flags,
1812 			   u64 produce_size,
1813 			   u64 consume_size,
1814 			   struct vmci_qp_page_store *page_store,
1815 			   struct vmci_ctx *context,
1816 			   vmci_event_release_cb wakeup_cb,
1817 			   void *client_data,
1818 			   struct qp_broker_entry **ent,
1819 			   bool *swap)
1820 {
1821 	const u32 context_id = vmci_ctx_get_id(context);
1822 	bool create;
1823 	struct qp_broker_entry *entry = NULL;
1824 	bool is_local = flags & VMCI_QPFLAG_LOCAL;
1825 	int result;
1826 
1827 	if (vmci_handle_is_invalid(handle) ||
1828 	    (flags & ~VMCI_QP_ALL_FLAGS) || is_local ||
1829 	    !(produce_size || consume_size) ||
1830 	    !context || context_id == VMCI_INVALID_ID ||
1831 	    handle.context == VMCI_INVALID_ID) {
1832 		return VMCI_ERROR_INVALID_ARGS;
1833 	}
1834 
1835 	if (page_store && !VMCI_QP_PAGESTORE_IS_WELLFORMED(page_store))
1836 		return VMCI_ERROR_INVALID_ARGS;
1837 
1838 	/*
1839 	 * In the initial argument check, we ensure that non-vmkernel hosts
1840 	 * are not allowed to create local queue pairs.
1841 	 */
1842 
1843 	mutex_lock(&qp_broker_list.mutex);
1844 
1845 	if (!is_local && vmci_ctx_qp_exists(context, handle)) {
1846 		pr_devel("Context (ID=0x%x) already attached to queue pair (handle=0x%x:0x%x)\n",
1847 			 context_id, handle.context, handle.resource);
1848 		mutex_unlock(&qp_broker_list.mutex);
1849 		return VMCI_ERROR_ALREADY_EXISTS;
1850 	}
1851 
1852 	if (handle.resource != VMCI_INVALID_ID)
1853 		entry = qp_broker_handle_to_entry(handle);
1854 
1855 	if (!entry) {
1856 		create = true;
1857 		result =
1858 		    qp_broker_create(handle, peer, flags, priv_flags,
1859 				     produce_size, consume_size, page_store,
1860 				     context, wakeup_cb, client_data, ent);
1861 	} else {
1862 		create = false;
1863 		result =
1864 		    qp_broker_attach(entry, peer, flags, priv_flags,
1865 				     produce_size, consume_size, page_store,
1866 				     context, wakeup_cb, client_data, ent);
1867 	}
1868 
1869 	mutex_unlock(&qp_broker_list.mutex);
1870 
1871 	if (swap)
1872 		*swap = (context_id == VMCI_HOST_CONTEXT_ID) &&
1873 		    !(create && is_local);
1874 
1875 	return result;
1876 }
1877 
1878 /*
1879  * This function implements the kernel API for allocating a queue
1880  * pair.
1881  */
1882 static int qp_alloc_host_work(struct vmci_handle *handle,
1883 			      struct vmci_queue **produce_q,
1884 			      u64 produce_size,
1885 			      struct vmci_queue **consume_q,
1886 			      u64 consume_size,
1887 			      u32 peer,
1888 			      u32 flags,
1889 			      u32 priv_flags,
1890 			      vmci_event_release_cb wakeup_cb,
1891 			      void *client_data)
1892 {
1893 	struct vmci_handle new_handle;
1894 	struct vmci_ctx *context;
1895 	struct qp_broker_entry *entry;
1896 	int result;
1897 	bool swap;
1898 
1899 	if (vmci_handle_is_invalid(*handle)) {
1900 		new_handle = vmci_make_handle(
1901 			VMCI_HOST_CONTEXT_ID, VMCI_INVALID_ID);
1902 	} else
1903 		new_handle = *handle;
1904 
1905 	context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID);
1906 	entry = NULL;
1907 	result =
1908 	    qp_broker_alloc(new_handle, peer, flags, priv_flags,
1909 			    produce_size, consume_size, NULL, context,
1910 			    wakeup_cb, client_data, &entry, &swap);
1911 	if (result == VMCI_SUCCESS) {
1912 		if (swap) {
1913 			/*
1914 			 * If this is a local queue pair, the attacher
1915 			 * will swap around produce and consume
1916 			 * queues.
1917 			 */
1918 
1919 			*produce_q = entry->consume_q;
1920 			*consume_q = entry->produce_q;
1921 		} else {
1922 			*produce_q = entry->produce_q;
1923 			*consume_q = entry->consume_q;
1924 		}
1925 
1926 		*handle = vmci_resource_handle(&entry->resource);
1927 	} else {
1928 		*handle = VMCI_INVALID_HANDLE;
1929 		pr_devel("queue pair broker failed to alloc (result=%d)\n",
1930 			 result);
1931 	}
1932 	vmci_ctx_put(context);
1933 	return result;
1934 }
1935 
1936 /*
1937  * Allocates a VMCI queue_pair. Only checks validity of input
1938  * arguments. The real work is done in the host or guest
1939  * specific function.
1940  */
1941 int vmci_qp_alloc(struct vmci_handle *handle,
1942 		  struct vmci_queue **produce_q,
1943 		  u64 produce_size,
1944 		  struct vmci_queue **consume_q,
1945 		  u64 consume_size,
1946 		  u32 peer,
1947 		  u32 flags,
1948 		  u32 priv_flags,
1949 		  bool guest_endpoint,
1950 		  vmci_event_release_cb wakeup_cb,
1951 		  void *client_data)
1952 {
1953 	if (!handle || !produce_q || !consume_q ||
1954 	    (!produce_size && !consume_size) || (flags & ~VMCI_QP_ALL_FLAGS))
1955 		return VMCI_ERROR_INVALID_ARGS;
1956 
1957 	if (guest_endpoint) {
1958 		return qp_alloc_guest_work(handle, produce_q,
1959 					   produce_size, consume_q,
1960 					   consume_size, peer,
1961 					   flags, priv_flags);
1962 	} else {
1963 		return qp_alloc_host_work(handle, produce_q,
1964 					  produce_size, consume_q,
1965 					  consume_size, peer, flags,
1966 					  priv_flags, wakeup_cb, client_data);
1967 	}
1968 }
1969 
1970 /*
1971  * This function implements the host kernel API for detaching from
1972  * a queue pair.
1973  */
1974 static int qp_detatch_host_work(struct vmci_handle handle)
1975 {
1976 	int result;
1977 	struct vmci_ctx *context;
1978 
1979 	context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID);
1980 
1981 	result = vmci_qp_broker_detach(handle, context);
1982 
1983 	vmci_ctx_put(context);
1984 	return result;
1985 }
1986 
1987 /*
1988  * Detaches from a VMCI queue_pair. Only checks validity of input argument.
1989  * Real work is done in the host or guest specific function.
1990  */
1991 static int qp_detatch(struct vmci_handle handle, bool guest_endpoint)
1992 {
1993 	if (vmci_handle_is_invalid(handle))
1994 		return VMCI_ERROR_INVALID_ARGS;
1995 
1996 	if (guest_endpoint)
1997 		return qp_detatch_guest_work(handle);
1998 	else
1999 		return qp_detatch_host_work(handle);
2000 }
2001 
2002 /*
2003  * Returns the entry from the head of the list. Assumes that the list is
2004  * locked.
2005  */
2006 static struct qp_entry *qp_list_get_head(struct qp_list *qp_list)
2007 {
2008 	if (!list_empty(&qp_list->head)) {
2009 		struct qp_entry *entry =
2010 		    list_first_entry(&qp_list->head, struct qp_entry,
2011 				     list_item);
2012 		return entry;
2013 	}
2014 
2015 	return NULL;
2016 }
2017 
2018 void vmci_qp_broker_exit(void)
2019 {
2020 	struct qp_entry *entry;
2021 	struct qp_broker_entry *be;
2022 
2023 	mutex_lock(&qp_broker_list.mutex);
2024 
2025 	while ((entry = qp_list_get_head(&qp_broker_list))) {
2026 		be = (struct qp_broker_entry *)entry;
2027 
2028 		qp_list_remove_entry(&qp_broker_list, entry);
2029 		kfree(be);
2030 	}
2031 
2032 	mutex_unlock(&qp_broker_list.mutex);
2033 }
2034 
2035 /*
2036  * Requests that a queue pair be allocated with the VMCI queue
2037  * pair broker. Allocates a queue pair entry if one does not
2038  * exist. Attaches to one if it exists, and retrieves the page
2039  * files backing that queue_pair.  Assumes that the queue pair
2040  * broker lock is held.
2041  */
2042 int vmci_qp_broker_alloc(struct vmci_handle handle,
2043 			 u32 peer,
2044 			 u32 flags,
2045 			 u32 priv_flags,
2046 			 u64 produce_size,
2047 			 u64 consume_size,
2048 			 struct vmci_qp_page_store *page_store,
2049 			 struct vmci_ctx *context)
2050 {
2051 	return qp_broker_alloc(handle, peer, flags, priv_flags,
2052 			       produce_size, consume_size,
2053 			       page_store, context, NULL, NULL, NULL, NULL);
2054 }
2055 
2056 /*
2057  * VMX'en with versions lower than VMCI_VERSION_NOVMVM use a separate
2058  * step to add the UVAs of the VMX mapping of the queue pair. This function
2059  * provides backwards compatibility with such VMX'en, and takes care of
2060  * registering the page store for a queue pair previously allocated by the
2061  * VMX during create or attach. This function will move the queue pair state
2062  * to either from VMCIQBP_CREATED_NO_MEM to VMCIQBP_CREATED_MEM or
2063  * VMCIQBP_ATTACHED_NO_MEM to VMCIQBP_ATTACHED_MEM. If moving to the
2064  * attached state with memory, the queue pair is ready to be used by the
2065  * host peer, and an attached event will be generated.
2066  *
2067  * Assumes that the queue pair broker lock is held.
2068  *
2069  * This function is only used by the hosted platform, since there is no
2070  * issue with backwards compatibility for vmkernel.
2071  */
2072 int vmci_qp_broker_set_page_store(struct vmci_handle handle,
2073 				  u64 produce_uva,
2074 				  u64 consume_uva,
2075 				  struct vmci_ctx *context)
2076 {
2077 	struct qp_broker_entry *entry;
2078 	int result;
2079 	const u32 context_id = vmci_ctx_get_id(context);
2080 
2081 	if (vmci_handle_is_invalid(handle) || !context ||
2082 	    context_id == VMCI_INVALID_ID)
2083 		return VMCI_ERROR_INVALID_ARGS;
2084 
2085 	/*
2086 	 * We only support guest to host queue pairs, so the VMX must
2087 	 * supply UVAs for the mapped page files.
2088 	 */
2089 
2090 	if (produce_uva == 0 || consume_uva == 0)
2091 		return VMCI_ERROR_INVALID_ARGS;
2092 
2093 	mutex_lock(&qp_broker_list.mutex);
2094 
2095 	if (!vmci_ctx_qp_exists(context, handle)) {
2096 		pr_warn("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
2097 			context_id, handle.context, handle.resource);
2098 		result = VMCI_ERROR_NOT_FOUND;
2099 		goto out;
2100 	}
2101 
2102 	entry = qp_broker_handle_to_entry(handle);
2103 	if (!entry) {
2104 		result = VMCI_ERROR_NOT_FOUND;
2105 		goto out;
2106 	}
2107 
2108 	/*
2109 	 * If I'm the owner then I can set the page store.
2110 	 *
2111 	 * Or, if a host created the queue_pair and I'm the attached peer
2112 	 * then I can set the page store.
2113 	 */
2114 	if (entry->create_id != context_id &&
2115 	    (entry->create_id != VMCI_HOST_CONTEXT_ID ||
2116 	     entry->attach_id != context_id)) {
2117 		result = VMCI_ERROR_QUEUEPAIR_NOTOWNER;
2118 		goto out;
2119 	}
2120 
2121 	if (entry->state != VMCIQPB_CREATED_NO_MEM &&
2122 	    entry->state != VMCIQPB_ATTACHED_NO_MEM) {
2123 		result = VMCI_ERROR_UNAVAILABLE;
2124 		goto out;
2125 	}
2126 
2127 	result = qp_host_get_user_memory(produce_uva, consume_uva,
2128 					 entry->produce_q, entry->consume_q);
2129 	if (result < VMCI_SUCCESS)
2130 		goto out;
2131 
2132 	result = qp_host_map_queues(entry->produce_q, entry->consume_q);
2133 	if (result < VMCI_SUCCESS) {
2134 		qp_host_unregister_user_memory(entry->produce_q,
2135 					       entry->consume_q);
2136 		goto out;
2137 	}
2138 
2139 	if (entry->state == VMCIQPB_CREATED_NO_MEM)
2140 		entry->state = VMCIQPB_CREATED_MEM;
2141 	else
2142 		entry->state = VMCIQPB_ATTACHED_MEM;
2143 
2144 	entry->vmci_page_files = true;
2145 
2146 	if (entry->state == VMCIQPB_ATTACHED_MEM) {
2147 		result =
2148 		    qp_notify_peer(true, handle, context_id, entry->create_id);
2149 		if (result < VMCI_SUCCESS) {
2150 			pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n",
2151 				entry->create_id, entry->qp.handle.context,
2152 				entry->qp.handle.resource);
2153 		}
2154 	}
2155 
2156 	result = VMCI_SUCCESS;
2157  out:
2158 	mutex_unlock(&qp_broker_list.mutex);
2159 	return result;
2160 }
2161 
2162 /*
2163  * Resets saved queue headers for the given QP broker
2164  * entry. Should be used when guest memory becomes available
2165  * again, or the guest detaches.
2166  */
2167 static void qp_reset_saved_headers(struct qp_broker_entry *entry)
2168 {
2169 	entry->produce_q->saved_header = NULL;
2170 	entry->consume_q->saved_header = NULL;
2171 }
2172 
2173 /*
2174  * The main entry point for detaching from a queue pair registered with the
2175  * queue pair broker. If more than one endpoint is attached to the queue
2176  * pair, the first endpoint will mainly decrement a reference count and
2177  * generate a notification to its peer. The last endpoint will clean up
2178  * the queue pair state registered with the broker.
2179  *
2180  * When a guest endpoint detaches, it will unmap and unregister the guest
2181  * memory backing the queue pair. If the host is still attached, it will
2182  * no longer be able to access the queue pair content.
2183  *
2184  * If the queue pair is already in a state where there is no memory
2185  * registered for the queue pair (any *_NO_MEM state), it will transition to
2186  * the VMCIQPB_SHUTDOWN_NO_MEM state. This will also happen, if a guest
2187  * endpoint is the first of two endpoints to detach. If the host endpoint is
2188  * the first out of two to detach, the queue pair will move to the
2189  * VMCIQPB_SHUTDOWN_MEM state.
2190  */
2191 int vmci_qp_broker_detach(struct vmci_handle handle, struct vmci_ctx *context)
2192 {
2193 	struct qp_broker_entry *entry;
2194 	const u32 context_id = vmci_ctx_get_id(context);
2195 	u32 peer_id;
2196 	bool is_local = false;
2197 	int result;
2198 
2199 	if (vmci_handle_is_invalid(handle) || !context ||
2200 	    context_id == VMCI_INVALID_ID) {
2201 		return VMCI_ERROR_INVALID_ARGS;
2202 	}
2203 
2204 	mutex_lock(&qp_broker_list.mutex);
2205 
2206 	if (!vmci_ctx_qp_exists(context, handle)) {
2207 		pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
2208 			 context_id, handle.context, handle.resource);
2209 		result = VMCI_ERROR_NOT_FOUND;
2210 		goto out;
2211 	}
2212 
2213 	entry = qp_broker_handle_to_entry(handle);
2214 	if (!entry) {
2215 		pr_devel("Context (ID=0x%x) reports being attached to queue pair(handle=0x%x:0x%x) that isn't present in broker\n",
2216 			 context_id, handle.context, handle.resource);
2217 		result = VMCI_ERROR_NOT_FOUND;
2218 		goto out;
2219 	}
2220 
2221 	if (context_id != entry->create_id && context_id != entry->attach_id) {
2222 		result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
2223 		goto out;
2224 	}
2225 
2226 	if (context_id == entry->create_id) {
2227 		peer_id = entry->attach_id;
2228 		entry->create_id = VMCI_INVALID_ID;
2229 	} else {
2230 		peer_id = entry->create_id;
2231 		entry->attach_id = VMCI_INVALID_ID;
2232 	}
2233 	entry->qp.ref_count--;
2234 
2235 	is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL;
2236 
2237 	if (context_id != VMCI_HOST_CONTEXT_ID) {
2238 		bool headers_mapped;
2239 
2240 		/*
2241 		 * Pre NOVMVM vmx'en may detach from a queue pair
2242 		 * before setting the page store, and in that case
2243 		 * there is no user memory to detach from. Also, more
2244 		 * recent VMX'en may detach from a queue pair in the
2245 		 * quiesced state.
2246 		 */
2247 
2248 		qp_acquire_queue_mutex(entry->produce_q);
2249 		headers_mapped = entry->produce_q->q_header ||
2250 		    entry->consume_q->q_header;
2251 		if (QPBROKERSTATE_HAS_MEM(entry)) {
2252 			result =
2253 			    qp_host_unmap_queues(INVALID_VMCI_GUEST_MEM_ID,
2254 						 entry->produce_q,
2255 						 entry->consume_q);
2256 			if (result < VMCI_SUCCESS)
2257 				pr_warn("Failed to unmap queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n",
2258 					handle.context, handle.resource,
2259 					result);
2260 
2261 			if (entry->vmci_page_files)
2262 				qp_host_unregister_user_memory(entry->produce_q,
2263 							       entry->
2264 							       consume_q);
2265 			else
2266 				qp_host_unregister_user_memory(entry->produce_q,
2267 							       entry->
2268 							       consume_q);
2269 
2270 		}
2271 
2272 		if (!headers_mapped)
2273 			qp_reset_saved_headers(entry);
2274 
2275 		qp_release_queue_mutex(entry->produce_q);
2276 
2277 		if (!headers_mapped && entry->wakeup_cb)
2278 			entry->wakeup_cb(entry->client_data);
2279 
2280 	} else {
2281 		if (entry->wakeup_cb) {
2282 			entry->wakeup_cb = NULL;
2283 			entry->client_data = NULL;
2284 		}
2285 	}
2286 
2287 	if (entry->qp.ref_count == 0) {
2288 		qp_list_remove_entry(&qp_broker_list, &entry->qp);
2289 
2290 		if (is_local)
2291 			kfree(entry->local_mem);
2292 
2293 		qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q);
2294 		qp_host_free_queue(entry->produce_q, entry->qp.produce_size);
2295 		qp_host_free_queue(entry->consume_q, entry->qp.consume_size);
2296 		/* Unlink from resource hash table and free callback */
2297 		vmci_resource_remove(&entry->resource);
2298 
2299 		kfree(entry);
2300 
2301 		vmci_ctx_qp_destroy(context, handle);
2302 	} else {
2303 		qp_notify_peer(false, handle, context_id, peer_id);
2304 		if (context_id == VMCI_HOST_CONTEXT_ID &&
2305 		    QPBROKERSTATE_HAS_MEM(entry)) {
2306 			entry->state = VMCIQPB_SHUTDOWN_MEM;
2307 		} else {
2308 			entry->state = VMCIQPB_SHUTDOWN_NO_MEM;
2309 		}
2310 
2311 		if (!is_local)
2312 			vmci_ctx_qp_destroy(context, handle);
2313 
2314 	}
2315 	result = VMCI_SUCCESS;
2316  out:
2317 	mutex_unlock(&qp_broker_list.mutex);
2318 	return result;
2319 }
2320 
2321 /*
2322  * Establishes the necessary mappings for a queue pair given a
2323  * reference to the queue pair guest memory. This is usually
2324  * called when a guest is unquiesced and the VMX is allowed to
2325  * map guest memory once again.
2326  */
2327 int vmci_qp_broker_map(struct vmci_handle handle,
2328 		       struct vmci_ctx *context,
2329 		       u64 guest_mem)
2330 {
2331 	struct qp_broker_entry *entry;
2332 	const u32 context_id = vmci_ctx_get_id(context);
2333 	bool is_local = false;
2334 	int result;
2335 
2336 	if (vmci_handle_is_invalid(handle) || !context ||
2337 	    context_id == VMCI_INVALID_ID)
2338 		return VMCI_ERROR_INVALID_ARGS;
2339 
2340 	mutex_lock(&qp_broker_list.mutex);
2341 
2342 	if (!vmci_ctx_qp_exists(context, handle)) {
2343 		pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
2344 			 context_id, handle.context, handle.resource);
2345 		result = VMCI_ERROR_NOT_FOUND;
2346 		goto out;
2347 	}
2348 
2349 	entry = qp_broker_handle_to_entry(handle);
2350 	if (!entry) {
2351 		pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n",
2352 			 context_id, handle.context, handle.resource);
2353 		result = VMCI_ERROR_NOT_FOUND;
2354 		goto out;
2355 	}
2356 
2357 	if (context_id != entry->create_id && context_id != entry->attach_id) {
2358 		result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
2359 		goto out;
2360 	}
2361 
2362 	is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL;
2363 	result = VMCI_SUCCESS;
2364 
2365 	if (context_id != VMCI_HOST_CONTEXT_ID) {
2366 		struct vmci_qp_page_store page_store;
2367 
2368 		page_store.pages = guest_mem;
2369 		page_store.len = QPE_NUM_PAGES(entry->qp);
2370 
2371 		qp_acquire_queue_mutex(entry->produce_q);
2372 		qp_reset_saved_headers(entry);
2373 		result =
2374 		    qp_host_register_user_memory(&page_store,
2375 						 entry->produce_q,
2376 						 entry->consume_q);
2377 		qp_release_queue_mutex(entry->produce_q);
2378 		if (result == VMCI_SUCCESS) {
2379 			/* Move state from *_NO_MEM to *_MEM */
2380 
2381 			entry->state++;
2382 
2383 			if (entry->wakeup_cb)
2384 				entry->wakeup_cb(entry->client_data);
2385 		}
2386 	}
2387 
2388  out:
2389 	mutex_unlock(&qp_broker_list.mutex);
2390 	return result;
2391 }
2392 
2393 /*
2394  * Saves a snapshot of the queue headers for the given QP broker
2395  * entry. Should be used when guest memory is unmapped.
2396  * Results:
2397  * VMCI_SUCCESS on success, appropriate error code if guest memory
2398  * can't be accessed..
2399  */
2400 static int qp_save_headers(struct qp_broker_entry *entry)
2401 {
2402 	int result;
2403 
2404 	if (entry->produce_q->saved_header != NULL &&
2405 	    entry->consume_q->saved_header != NULL) {
2406 		/*
2407 		 *  If the headers have already been saved, we don't need to do
2408 		 *  it again, and we don't want to map in the headers
2409 		 *  unnecessarily.
2410 		 */
2411 
2412 		return VMCI_SUCCESS;
2413 	}
2414 
2415 	if (NULL == entry->produce_q->q_header ||
2416 	    NULL == entry->consume_q->q_header) {
2417 		result = qp_host_map_queues(entry->produce_q, entry->consume_q);
2418 		if (result < VMCI_SUCCESS)
2419 			return result;
2420 	}
2421 
2422 	memcpy(&entry->saved_produce_q, entry->produce_q->q_header,
2423 	       sizeof(entry->saved_produce_q));
2424 	entry->produce_q->saved_header = &entry->saved_produce_q;
2425 	memcpy(&entry->saved_consume_q, entry->consume_q->q_header,
2426 	       sizeof(entry->saved_consume_q));
2427 	entry->consume_q->saved_header = &entry->saved_consume_q;
2428 
2429 	return VMCI_SUCCESS;
2430 }
2431 
2432 /*
2433  * Removes all references to the guest memory of a given queue pair, and
2434  * will move the queue pair from state *_MEM to *_NO_MEM. It is usually
2435  * called when a VM is being quiesced where access to guest memory should
2436  * avoided.
2437  */
2438 int vmci_qp_broker_unmap(struct vmci_handle handle,
2439 			 struct vmci_ctx *context,
2440 			 u32 gid)
2441 {
2442 	struct qp_broker_entry *entry;
2443 	const u32 context_id = vmci_ctx_get_id(context);
2444 	bool is_local = false;
2445 	int result;
2446 
2447 	if (vmci_handle_is_invalid(handle) || !context ||
2448 	    context_id == VMCI_INVALID_ID)
2449 		return VMCI_ERROR_INVALID_ARGS;
2450 
2451 	mutex_lock(&qp_broker_list.mutex);
2452 
2453 	if (!vmci_ctx_qp_exists(context, handle)) {
2454 		pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
2455 			 context_id, handle.context, handle.resource);
2456 		result = VMCI_ERROR_NOT_FOUND;
2457 		goto out;
2458 	}
2459 
2460 	entry = qp_broker_handle_to_entry(handle);
2461 	if (!entry) {
2462 		pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n",
2463 			 context_id, handle.context, handle.resource);
2464 		result = VMCI_ERROR_NOT_FOUND;
2465 		goto out;
2466 	}
2467 
2468 	if (context_id != entry->create_id && context_id != entry->attach_id) {
2469 		result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
2470 		goto out;
2471 	}
2472 
2473 	is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL;
2474 
2475 	if (context_id != VMCI_HOST_CONTEXT_ID) {
2476 		qp_acquire_queue_mutex(entry->produce_q);
2477 		result = qp_save_headers(entry);
2478 		if (result < VMCI_SUCCESS)
2479 			pr_warn("Failed to save queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n",
2480 				handle.context, handle.resource, result);
2481 
2482 		qp_host_unmap_queues(gid, entry->produce_q, entry->consume_q);
2483 
2484 		/*
2485 		 * On hosted, when we unmap queue pairs, the VMX will also
2486 		 * unmap the guest memory, so we invalidate the previously
2487 		 * registered memory. If the queue pair is mapped again at a
2488 		 * later point in time, we will need to reregister the user
2489 		 * memory with a possibly new user VA.
2490 		 */
2491 		qp_host_unregister_user_memory(entry->produce_q,
2492 					       entry->consume_q);
2493 
2494 		/*
2495 		 * Move state from *_MEM to *_NO_MEM.
2496 		 */
2497 		entry->state--;
2498 
2499 		qp_release_queue_mutex(entry->produce_q);
2500 	}
2501 
2502 	result = VMCI_SUCCESS;
2503 
2504  out:
2505 	mutex_unlock(&qp_broker_list.mutex);
2506 	return result;
2507 }
2508 
2509 /*
2510  * Destroys all guest queue pair endpoints. If active guest queue
2511  * pairs still exist, hypercalls to attempt detach from these
2512  * queue pairs will be made. Any failure to detach is silently
2513  * ignored.
2514  */
2515 void vmci_qp_guest_endpoints_exit(void)
2516 {
2517 	struct qp_entry *entry;
2518 	struct qp_guest_endpoint *ep;
2519 
2520 	mutex_lock(&qp_guest_endpoints.mutex);
2521 
2522 	while ((entry = qp_list_get_head(&qp_guest_endpoints))) {
2523 		ep = (struct qp_guest_endpoint *)entry;
2524 
2525 		/* Don't make a hypercall for local queue_pairs. */
2526 		if (!(entry->flags & VMCI_QPFLAG_LOCAL))
2527 			qp_detatch_hypercall(entry->handle);
2528 
2529 		/* We cannot fail the exit, so let's reset ref_count. */
2530 		entry->ref_count = 0;
2531 		qp_list_remove_entry(&qp_guest_endpoints, entry);
2532 
2533 		qp_guest_endpoint_destroy(ep);
2534 	}
2535 
2536 	mutex_unlock(&qp_guest_endpoints.mutex);
2537 }
2538 
2539 /*
2540  * Helper routine that will lock the queue pair before subsequent
2541  * operations.
2542  * Note: Non-blocking on the host side is currently only implemented in ESX.
2543  * Since non-blocking isn't yet implemented on the host personality we
2544  * have no reason to acquire a spin lock.  So to avoid the use of an
2545  * unnecessary lock only acquire the mutex if we can block.
2546  * Note: It is assumed that QPFLAG_PINNED implies QPFLAG_NONBLOCK.  Therefore
2547  * we can use the same locking function for access to both the queue
2548  * and the queue headers as it is the same logic.  Assert this behvior.
2549  */
2550 static void qp_lock(const struct vmci_qp *qpair)
2551 {
2552 	if (vmci_can_block(qpair->flags))
2553 		qp_acquire_queue_mutex(qpair->produce_q);
2554 }
2555 
2556 /*
2557  * Helper routine that unlocks the queue pair after calling
2558  * qp_lock.  Respects non-blocking and pinning flags.
2559  */
2560 static void qp_unlock(const struct vmci_qp *qpair)
2561 {
2562 	if (vmci_can_block(qpair->flags))
2563 		qp_release_queue_mutex(qpair->produce_q);
2564 }
2565 
2566 /*
2567  * The queue headers may not be mapped at all times. If a queue is
2568  * currently not mapped, it will be attempted to do so.
2569  */
2570 static int qp_map_queue_headers(struct vmci_queue *produce_q,
2571 				struct vmci_queue *consume_q,
2572 				bool can_block)
2573 {
2574 	int result;
2575 
2576 	if (NULL == produce_q->q_header || NULL == consume_q->q_header) {
2577 		if (can_block)
2578 			result = qp_host_map_queues(produce_q, consume_q);
2579 		else
2580 			result = VMCI_ERROR_QUEUEPAIR_NOT_READY;
2581 
2582 		if (result < VMCI_SUCCESS)
2583 			return (produce_q->saved_header &&
2584 				consume_q->saved_header) ?
2585 			    VMCI_ERROR_QUEUEPAIR_NOT_READY :
2586 			    VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
2587 	}
2588 
2589 	return VMCI_SUCCESS;
2590 }
2591 
2592 /*
2593  * Helper routine that will retrieve the produce and consume
2594  * headers of a given queue pair. If the guest memory of the
2595  * queue pair is currently not available, the saved queue headers
2596  * will be returned, if these are available.
2597  */
2598 static int qp_get_queue_headers(const struct vmci_qp *qpair,
2599 				struct vmci_queue_header **produce_q_header,
2600 				struct vmci_queue_header **consume_q_header)
2601 {
2602 	int result;
2603 
2604 	result = qp_map_queue_headers(qpair->produce_q, qpair->consume_q,
2605 				      vmci_can_block(qpair->flags));
2606 	if (result == VMCI_SUCCESS) {
2607 		*produce_q_header = qpair->produce_q->q_header;
2608 		*consume_q_header = qpair->consume_q->q_header;
2609 	} else if (qpair->produce_q->saved_header &&
2610 		   qpair->consume_q->saved_header) {
2611 		*produce_q_header = qpair->produce_q->saved_header;
2612 		*consume_q_header = qpair->consume_q->saved_header;
2613 		result = VMCI_SUCCESS;
2614 	}
2615 
2616 	return result;
2617 }
2618 
2619 /*
2620  * Callback from VMCI queue pair broker indicating that a queue
2621  * pair that was previously not ready, now either is ready or
2622  * gone forever.
2623  */
2624 static int qp_wakeup_cb(void *client_data)
2625 {
2626 	struct vmci_qp *qpair = (struct vmci_qp *)client_data;
2627 
2628 	qp_lock(qpair);
2629 	while (qpair->blocked > 0) {
2630 		qpair->blocked--;
2631 		qpair->generation++;
2632 		wake_up(&qpair->event);
2633 	}
2634 	qp_unlock(qpair);
2635 
2636 	return VMCI_SUCCESS;
2637 }
2638 
2639 /*
2640  * Makes the calling thread wait for the queue pair to become
2641  * ready for host side access.  Returns true when thread is
2642  * woken up after queue pair state change, false otherwise.
2643  */
2644 static bool qp_wait_for_ready_queue(struct vmci_qp *qpair)
2645 {
2646 	unsigned int generation;
2647 
2648 	if (qpair->flags & VMCI_QPFLAG_NONBLOCK)
2649 		return false;
2650 
2651 	qpair->blocked++;
2652 	generation = qpair->generation;
2653 	qp_unlock(qpair);
2654 	wait_event(qpair->event, generation != qpair->generation);
2655 	qp_lock(qpair);
2656 
2657 	return true;
2658 }
2659 
2660 /*
2661  * Enqueues a given buffer to the produce queue using the provided
2662  * function. As many bytes as possible (space available in the queue)
2663  * are enqueued.  Assumes the queue->mutex has been acquired.  Returns
2664  * VMCI_ERROR_QUEUEPAIR_NOSPACE if no space was available to enqueue
2665  * data, VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the
2666  * queue (as defined by the queue size), VMCI_ERROR_INVALID_ARGS, if
2667  * an error occured when accessing the buffer,
2668  * VMCI_ERROR_QUEUEPAIR_NOTATTACHED, if the queue pair pages aren't
2669  * available.  Otherwise, the number of bytes written to the queue is
2670  * returned.  Updates the tail pointer of the produce queue.
2671  */
2672 static ssize_t qp_enqueue_locked(struct vmci_queue *produce_q,
2673 				 struct vmci_queue *consume_q,
2674 				 const u64 produce_q_size,
2675 				 const void *buf,
2676 				 size_t buf_size,
2677 				 vmci_memcpy_to_queue_func memcpy_to_queue,
2678 				 bool can_block)
2679 {
2680 	s64 free_space;
2681 	u64 tail;
2682 	size_t written;
2683 	ssize_t result;
2684 
2685 	result = qp_map_queue_headers(produce_q, consume_q, can_block);
2686 	if (unlikely(result != VMCI_SUCCESS))
2687 		return result;
2688 
2689 	free_space = vmci_q_header_free_space(produce_q->q_header,
2690 					      consume_q->q_header,
2691 					      produce_q_size);
2692 	if (free_space == 0)
2693 		return VMCI_ERROR_QUEUEPAIR_NOSPACE;
2694 
2695 	if (free_space < VMCI_SUCCESS)
2696 		return (ssize_t) free_space;
2697 
2698 	written = (size_t) (free_space > buf_size ? buf_size : free_space);
2699 	tail = vmci_q_header_producer_tail(produce_q->q_header);
2700 	if (likely(tail + written < produce_q_size)) {
2701 		result = memcpy_to_queue(produce_q, tail, buf, 0, written);
2702 	} else {
2703 		/* Tail pointer wraps around. */
2704 
2705 		const size_t tmp = (size_t) (produce_q_size - tail);
2706 
2707 		result = memcpy_to_queue(produce_q, tail, buf, 0, tmp);
2708 		if (result >= VMCI_SUCCESS)
2709 			result = memcpy_to_queue(produce_q, 0, buf, tmp,
2710 						 written - tmp);
2711 	}
2712 
2713 	if (result < VMCI_SUCCESS)
2714 		return result;
2715 
2716 	vmci_q_header_add_producer_tail(produce_q->q_header, written,
2717 					produce_q_size);
2718 	return written;
2719 }
2720 
2721 /*
2722  * Dequeues data (if available) from the given consume queue. Writes data
2723  * to the user provided buffer using the provided function.
2724  * Assumes the queue->mutex has been acquired.
2725  * Results:
2726  * VMCI_ERROR_QUEUEPAIR_NODATA if no data was available to dequeue.
2727  * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue
2728  * (as defined by the queue size).
2729  * VMCI_ERROR_INVALID_ARGS, if an error occured when accessing the buffer.
2730  * Otherwise the number of bytes dequeued is returned.
2731  * Side effects:
2732  * Updates the head pointer of the consume queue.
2733  */
2734 static ssize_t qp_dequeue_locked(struct vmci_queue *produce_q,
2735 				 struct vmci_queue *consume_q,
2736 				 const u64 consume_q_size,
2737 				 void *buf,
2738 				 size_t buf_size,
2739 				 vmci_memcpy_from_queue_func memcpy_from_queue,
2740 				 bool update_consumer,
2741 				 bool can_block)
2742 {
2743 	s64 buf_ready;
2744 	u64 head;
2745 	size_t read;
2746 	ssize_t result;
2747 
2748 	result = qp_map_queue_headers(produce_q, consume_q, can_block);
2749 	if (unlikely(result != VMCI_SUCCESS))
2750 		return result;
2751 
2752 	buf_ready = vmci_q_header_buf_ready(consume_q->q_header,
2753 					    produce_q->q_header,
2754 					    consume_q_size);
2755 	if (buf_ready == 0)
2756 		return VMCI_ERROR_QUEUEPAIR_NODATA;
2757 
2758 	if (buf_ready < VMCI_SUCCESS)
2759 		return (ssize_t) buf_ready;
2760 
2761 	read = (size_t) (buf_ready > buf_size ? buf_size : buf_ready);
2762 	head = vmci_q_header_consumer_head(produce_q->q_header);
2763 	if (likely(head + read < consume_q_size)) {
2764 		result = memcpy_from_queue(buf, 0, consume_q, head, read);
2765 	} else {
2766 		/* Head pointer wraps around. */
2767 
2768 		const size_t tmp = (size_t) (consume_q_size - head);
2769 
2770 		result = memcpy_from_queue(buf, 0, consume_q, head, tmp);
2771 		if (result >= VMCI_SUCCESS)
2772 			result = memcpy_from_queue(buf, tmp, consume_q, 0,
2773 						   read - tmp);
2774 
2775 	}
2776 
2777 	if (result < VMCI_SUCCESS)
2778 		return result;
2779 
2780 	if (update_consumer)
2781 		vmci_q_header_add_consumer_head(produce_q->q_header,
2782 						read, consume_q_size);
2783 
2784 	return read;
2785 }
2786 
2787 /*
2788  * vmci_qpair_alloc() - Allocates a queue pair.
2789  * @qpair:      Pointer for the new vmci_qp struct.
2790  * @handle:     Handle to track the resource.
2791  * @produce_qsize:      Desired size of the producer queue.
2792  * @consume_qsize:      Desired size of the consumer queue.
2793  * @peer:       ContextID of the peer.
2794  * @flags:      VMCI flags.
2795  * @priv_flags: VMCI priviledge flags.
2796  *
2797  * This is the client interface for allocating the memory for a
2798  * vmci_qp structure and then attaching to the underlying
2799  * queue.  If an error occurs allocating the memory for the
2800  * vmci_qp structure no attempt is made to attach.  If an
2801  * error occurs attaching, then the structure is freed.
2802  */
2803 int vmci_qpair_alloc(struct vmci_qp **qpair,
2804 		     struct vmci_handle *handle,
2805 		     u64 produce_qsize,
2806 		     u64 consume_qsize,
2807 		     u32 peer,
2808 		     u32 flags,
2809 		     u32 priv_flags)
2810 {
2811 	struct vmci_qp *my_qpair;
2812 	int retval;
2813 	struct vmci_handle src = VMCI_INVALID_HANDLE;
2814 	struct vmci_handle dst = vmci_make_handle(peer, VMCI_INVALID_ID);
2815 	enum vmci_route route;
2816 	vmci_event_release_cb wakeup_cb;
2817 	void *client_data;
2818 
2819 	/*
2820 	 * Restrict the size of a queuepair.  The device already
2821 	 * enforces a limit on the total amount of memory that can be
2822 	 * allocated to queuepairs for a guest.  However, we try to
2823 	 * allocate this memory before we make the queuepair
2824 	 * allocation hypercall.  On Linux, we allocate each page
2825 	 * separately, which means rather than fail, the guest will
2826 	 * thrash while it tries to allocate, and will become
2827 	 * increasingly unresponsive to the point where it appears to
2828 	 * be hung.  So we place a limit on the size of an individual
2829 	 * queuepair here, and leave the device to enforce the
2830 	 * restriction on total queuepair memory.  (Note that this
2831 	 * doesn't prevent all cases; a user with only this much
2832 	 * physical memory could still get into trouble.)  The error
2833 	 * used by the device is NO_RESOURCES, so use that here too.
2834 	 */
2835 
2836 	if (produce_qsize + consume_qsize < max(produce_qsize, consume_qsize) ||
2837 	    produce_qsize + consume_qsize > VMCI_MAX_GUEST_QP_MEMORY)
2838 		return VMCI_ERROR_NO_RESOURCES;
2839 
2840 	retval = vmci_route(&src, &dst, false, &route);
2841 	if (retval < VMCI_SUCCESS)
2842 		route = vmci_guest_code_active() ?
2843 		    VMCI_ROUTE_AS_GUEST : VMCI_ROUTE_AS_HOST;
2844 
2845 	/* If NONBLOCK or PINNED is set, we better be the guest personality. */
2846 	if ((!vmci_can_block(flags) || vmci_qp_pinned(flags)) &&
2847 	    VMCI_ROUTE_AS_GUEST != route) {
2848 		pr_devel("Not guest personality w/ NONBLOCK OR PINNED set");
2849 		return VMCI_ERROR_INVALID_ARGS;
2850 	}
2851 
2852 	/*
2853 	 * Limit the size of pinned QPs and check sanity.
2854 	 *
2855 	 * Pinned pages implies non-blocking mode.  Mutexes aren't acquired
2856 	 * when the NONBLOCK flag is set in qpair code; and also should not be
2857 	 * acquired when the PINNED flagged is set.  Since pinning pages
2858 	 * implies we want speed, it makes no sense not to have NONBLOCK
2859 	 * set if PINNED is set.  Hence enforce this implication.
2860 	 */
2861 	if (vmci_qp_pinned(flags)) {
2862 		if (vmci_can_block(flags)) {
2863 			pr_err("Attempted to enable pinning w/o non-blocking");
2864 			return VMCI_ERROR_INVALID_ARGS;
2865 		}
2866 
2867 		if (produce_qsize + consume_qsize > VMCI_MAX_PINNED_QP_MEMORY)
2868 			return VMCI_ERROR_NO_RESOURCES;
2869 	}
2870 
2871 	my_qpair = kzalloc(sizeof(*my_qpair), GFP_KERNEL);
2872 	if (!my_qpair)
2873 		return VMCI_ERROR_NO_MEM;
2874 
2875 	my_qpair->produce_q_size = produce_qsize;
2876 	my_qpair->consume_q_size = consume_qsize;
2877 	my_qpair->peer = peer;
2878 	my_qpair->flags = flags;
2879 	my_qpair->priv_flags = priv_flags;
2880 
2881 	wakeup_cb = NULL;
2882 	client_data = NULL;
2883 
2884 	if (VMCI_ROUTE_AS_HOST == route) {
2885 		my_qpair->guest_endpoint = false;
2886 		if (!(flags & VMCI_QPFLAG_LOCAL)) {
2887 			my_qpair->blocked = 0;
2888 			my_qpair->generation = 0;
2889 			init_waitqueue_head(&my_qpair->event);
2890 			wakeup_cb = qp_wakeup_cb;
2891 			client_data = (void *)my_qpair;
2892 		}
2893 	} else {
2894 		my_qpair->guest_endpoint = true;
2895 	}
2896 
2897 	retval = vmci_qp_alloc(handle,
2898 			       &my_qpair->produce_q,
2899 			       my_qpair->produce_q_size,
2900 			       &my_qpair->consume_q,
2901 			       my_qpair->consume_q_size,
2902 			       my_qpair->peer,
2903 			       my_qpair->flags,
2904 			       my_qpair->priv_flags,
2905 			       my_qpair->guest_endpoint,
2906 			       wakeup_cb, client_data);
2907 
2908 	if (retval < VMCI_SUCCESS) {
2909 		kfree(my_qpair);
2910 		return retval;
2911 	}
2912 
2913 	*qpair = my_qpair;
2914 	my_qpair->handle = *handle;
2915 
2916 	return retval;
2917 }
2918 EXPORT_SYMBOL_GPL(vmci_qpair_alloc);
2919 
2920 /*
2921  * vmci_qpair_detach() - Detatches the client from a queue pair.
2922  * @qpair:      Reference of a pointer to the qpair struct.
2923  *
2924  * This is the client interface for detaching from a VMCIQPair.
2925  * Note that this routine will free the memory allocated for the
2926  * vmci_qp structure too.
2927  */
2928 int vmci_qpair_detach(struct vmci_qp **qpair)
2929 {
2930 	int result;
2931 	struct vmci_qp *old_qpair;
2932 
2933 	if (!qpair || !(*qpair))
2934 		return VMCI_ERROR_INVALID_ARGS;
2935 
2936 	old_qpair = *qpair;
2937 	result = qp_detatch(old_qpair->handle, old_qpair->guest_endpoint);
2938 
2939 	/*
2940 	 * The guest can fail to detach for a number of reasons, and
2941 	 * if it does so, it will cleanup the entry (if there is one).
2942 	 * The host can fail too, but it won't cleanup the entry
2943 	 * immediately, it will do that later when the context is
2944 	 * freed.  Either way, we need to release the qpair struct
2945 	 * here; there isn't much the caller can do, and we don't want
2946 	 * to leak.
2947 	 */
2948 
2949 	memset(old_qpair, 0, sizeof(*old_qpair));
2950 	old_qpair->handle = VMCI_INVALID_HANDLE;
2951 	old_qpair->peer = VMCI_INVALID_ID;
2952 	kfree(old_qpair);
2953 	*qpair = NULL;
2954 
2955 	return result;
2956 }
2957 EXPORT_SYMBOL_GPL(vmci_qpair_detach);
2958 
2959 /*
2960  * vmci_qpair_get_produce_indexes() - Retrieves the indexes of the producer.
2961  * @qpair:      Pointer to the queue pair struct.
2962  * @producer_tail:      Reference used for storing producer tail index.
2963  * @consumer_head:      Reference used for storing the consumer head index.
2964  *
2965  * This is the client interface for getting the current indexes of the
2966  * QPair from the point of the view of the caller as the producer.
2967  */
2968 int vmci_qpair_get_produce_indexes(const struct vmci_qp *qpair,
2969 				   u64 *producer_tail,
2970 				   u64 *consumer_head)
2971 {
2972 	struct vmci_queue_header *produce_q_header;
2973 	struct vmci_queue_header *consume_q_header;
2974 	int result;
2975 
2976 	if (!qpair)
2977 		return VMCI_ERROR_INVALID_ARGS;
2978 
2979 	qp_lock(qpair);
2980 	result =
2981 	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
2982 	if (result == VMCI_SUCCESS)
2983 		vmci_q_header_get_pointers(produce_q_header, consume_q_header,
2984 					   producer_tail, consumer_head);
2985 	qp_unlock(qpair);
2986 
2987 	if (result == VMCI_SUCCESS &&
2988 	    ((producer_tail && *producer_tail >= qpair->produce_q_size) ||
2989 	     (consumer_head && *consumer_head >= qpair->produce_q_size)))
2990 		return VMCI_ERROR_INVALID_SIZE;
2991 
2992 	return result;
2993 }
2994 EXPORT_SYMBOL_GPL(vmci_qpair_get_produce_indexes);
2995 
2996 /*
2997  * vmci_qpair_get_consume_indexes() - Retrieves the indexes of the comsumer.
2998  * @qpair:      Pointer to the queue pair struct.
2999  * @consumer_tail:      Reference used for storing consumer tail index.
3000  * @producer_head:      Reference used for storing the producer head index.
3001  *
3002  * This is the client interface for getting the current indexes of the
3003  * QPair from the point of the view of the caller as the consumer.
3004  */
3005 int vmci_qpair_get_consume_indexes(const struct vmci_qp *qpair,
3006 				   u64 *consumer_tail,
3007 				   u64 *producer_head)
3008 {
3009 	struct vmci_queue_header *produce_q_header;
3010 	struct vmci_queue_header *consume_q_header;
3011 	int result;
3012 
3013 	if (!qpair)
3014 		return VMCI_ERROR_INVALID_ARGS;
3015 
3016 	qp_lock(qpair);
3017 	result =
3018 	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
3019 	if (result == VMCI_SUCCESS)
3020 		vmci_q_header_get_pointers(consume_q_header, produce_q_header,
3021 					   consumer_tail, producer_head);
3022 	qp_unlock(qpair);
3023 
3024 	if (result == VMCI_SUCCESS &&
3025 	    ((consumer_tail && *consumer_tail >= qpair->consume_q_size) ||
3026 	     (producer_head && *producer_head >= qpair->consume_q_size)))
3027 		return VMCI_ERROR_INVALID_SIZE;
3028 
3029 	return result;
3030 }
3031 EXPORT_SYMBOL_GPL(vmci_qpair_get_consume_indexes);
3032 
3033 /*
3034  * vmci_qpair_produce_free_space() - Retrieves free space in producer queue.
3035  * @qpair:      Pointer to the queue pair struct.
3036  *
3037  * This is the client interface for getting the amount of free
3038  * space in the QPair from the point of the view of the caller as
3039  * the producer which is the common case.  Returns < 0 if err, else
3040  * available bytes into which data can be enqueued if > 0.
3041  */
3042 s64 vmci_qpair_produce_free_space(const struct vmci_qp *qpair)
3043 {
3044 	struct vmci_queue_header *produce_q_header;
3045 	struct vmci_queue_header *consume_q_header;
3046 	s64 result;
3047 
3048 	if (!qpair)
3049 		return VMCI_ERROR_INVALID_ARGS;
3050 
3051 	qp_lock(qpair);
3052 	result =
3053 	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
3054 	if (result == VMCI_SUCCESS)
3055 		result = vmci_q_header_free_space(produce_q_header,
3056 						  consume_q_header,
3057 						  qpair->produce_q_size);
3058 	else
3059 		result = 0;
3060 
3061 	qp_unlock(qpair);
3062 
3063 	return result;
3064 }
3065 EXPORT_SYMBOL_GPL(vmci_qpair_produce_free_space);
3066 
3067 /*
3068  * vmci_qpair_consume_free_space() - Retrieves free space in consumer queue.
3069  * @qpair:      Pointer to the queue pair struct.
3070  *
3071  * This is the client interface for getting the amount of free
3072  * space in the QPair from the point of the view of the caller as
3073  * the consumer which is not the common case.  Returns < 0 if err, else
3074  * available bytes into which data can be enqueued if > 0.
3075  */
3076 s64 vmci_qpair_consume_free_space(const struct vmci_qp *qpair)
3077 {
3078 	struct vmci_queue_header *produce_q_header;
3079 	struct vmci_queue_header *consume_q_header;
3080 	s64 result;
3081 
3082 	if (!qpair)
3083 		return VMCI_ERROR_INVALID_ARGS;
3084 
3085 	qp_lock(qpair);
3086 	result =
3087 	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
3088 	if (result == VMCI_SUCCESS)
3089 		result = vmci_q_header_free_space(consume_q_header,
3090 						  produce_q_header,
3091 						  qpair->consume_q_size);
3092 	else
3093 		result = 0;
3094 
3095 	qp_unlock(qpair);
3096 
3097 	return result;
3098 }
3099 EXPORT_SYMBOL_GPL(vmci_qpair_consume_free_space);
3100 
3101 /*
3102  * vmci_qpair_produce_buf_ready() - Gets bytes ready to read from
3103  * producer queue.
3104  * @qpair:      Pointer to the queue pair struct.
3105  *
3106  * This is the client interface for getting the amount of
3107  * enqueued data in the QPair from the point of the view of the
3108  * caller as the producer which is not the common case.  Returns < 0 if err,
3109  * else available bytes that may be read.
3110  */
3111 s64 vmci_qpair_produce_buf_ready(const struct vmci_qp *qpair)
3112 {
3113 	struct vmci_queue_header *produce_q_header;
3114 	struct vmci_queue_header *consume_q_header;
3115 	s64 result;
3116 
3117 	if (!qpair)
3118 		return VMCI_ERROR_INVALID_ARGS;
3119 
3120 	qp_lock(qpair);
3121 	result =
3122 	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
3123 	if (result == VMCI_SUCCESS)
3124 		result = vmci_q_header_buf_ready(produce_q_header,
3125 						 consume_q_header,
3126 						 qpair->produce_q_size);
3127 	else
3128 		result = 0;
3129 
3130 	qp_unlock(qpair);
3131 
3132 	return result;
3133 }
3134 EXPORT_SYMBOL_GPL(vmci_qpair_produce_buf_ready);
3135 
3136 /*
3137  * vmci_qpair_consume_buf_ready() - Gets bytes ready to read from
3138  * consumer queue.
3139  * @qpair:      Pointer to the queue pair struct.
3140  *
3141  * This is the client interface for getting the amount of
3142  * enqueued data in the QPair from the point of the view of the
3143  * caller as the consumer which is the normal case.  Returns < 0 if err,
3144  * else available bytes that may be read.
3145  */
3146 s64 vmci_qpair_consume_buf_ready(const struct vmci_qp *qpair)
3147 {
3148 	struct vmci_queue_header *produce_q_header;
3149 	struct vmci_queue_header *consume_q_header;
3150 	s64 result;
3151 
3152 	if (!qpair)
3153 		return VMCI_ERROR_INVALID_ARGS;
3154 
3155 	qp_lock(qpair);
3156 	result =
3157 	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
3158 	if (result == VMCI_SUCCESS)
3159 		result = vmci_q_header_buf_ready(consume_q_header,
3160 						 produce_q_header,
3161 						 qpair->consume_q_size);
3162 	else
3163 		result = 0;
3164 
3165 	qp_unlock(qpair);
3166 
3167 	return result;
3168 }
3169 EXPORT_SYMBOL_GPL(vmci_qpair_consume_buf_ready);
3170 
3171 /*
3172  * vmci_qpair_enqueue() - Throw data on the queue.
3173  * @qpair:      Pointer to the queue pair struct.
3174  * @buf:        Pointer to buffer containing data
3175  * @buf_size:   Length of buffer.
3176  * @buf_type:   Buffer type (Unused).
3177  *
3178  * This is the client interface for enqueueing data into the queue.
3179  * Returns number of bytes enqueued or < 0 on error.
3180  */
3181 ssize_t vmci_qpair_enqueue(struct vmci_qp *qpair,
3182 			   const void *buf,
3183 			   size_t buf_size,
3184 			   int buf_type)
3185 {
3186 	ssize_t result;
3187 
3188 	if (!qpair || !buf)
3189 		return VMCI_ERROR_INVALID_ARGS;
3190 
3191 	qp_lock(qpair);
3192 
3193 	do {
3194 		result = qp_enqueue_locked(qpair->produce_q,
3195 					   qpair->consume_q,
3196 					   qpair->produce_q_size,
3197 					   buf, buf_size,
3198 					   qp_memcpy_to_queue,
3199 					   vmci_can_block(qpair->flags));
3200 
3201 		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3202 		    !qp_wait_for_ready_queue(qpair))
3203 			result = VMCI_ERROR_WOULD_BLOCK;
3204 
3205 	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3206 
3207 	qp_unlock(qpair);
3208 
3209 	return result;
3210 }
3211 EXPORT_SYMBOL_GPL(vmci_qpair_enqueue);
3212 
3213 /*
3214  * vmci_qpair_dequeue() - Get data from the queue.
3215  * @qpair:      Pointer to the queue pair struct.
3216  * @buf:        Pointer to buffer for the data
3217  * @buf_size:   Length of buffer.
3218  * @buf_type:   Buffer type (Unused).
3219  *
3220  * This is the client interface for dequeueing data from the queue.
3221  * Returns number of bytes dequeued or < 0 on error.
3222  */
3223 ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair,
3224 			   void *buf,
3225 			   size_t buf_size,
3226 			   int buf_type)
3227 {
3228 	ssize_t result;
3229 
3230 	if (!qpair || !buf)
3231 		return VMCI_ERROR_INVALID_ARGS;
3232 
3233 	qp_lock(qpair);
3234 
3235 	do {
3236 		result = qp_dequeue_locked(qpair->produce_q,
3237 					   qpair->consume_q,
3238 					   qpair->consume_q_size,
3239 					   buf, buf_size,
3240 					   qp_memcpy_from_queue, true,
3241 					   vmci_can_block(qpair->flags));
3242 
3243 		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3244 		    !qp_wait_for_ready_queue(qpair))
3245 			result = VMCI_ERROR_WOULD_BLOCK;
3246 
3247 	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3248 
3249 	qp_unlock(qpair);
3250 
3251 	return result;
3252 }
3253 EXPORT_SYMBOL_GPL(vmci_qpair_dequeue);
3254 
3255 /*
3256  * vmci_qpair_peek() - Peek at the data in the queue.
3257  * @qpair:      Pointer to the queue pair struct.
3258  * @buf:        Pointer to buffer for the data
3259  * @buf_size:   Length of buffer.
3260  * @buf_type:   Buffer type (Unused on Linux).
3261  *
3262  * This is the client interface for peeking into a queue.  (I.e.,
3263  * copy data from the queue without updating the head pointer.)
3264  * Returns number of bytes dequeued or < 0 on error.
3265  */
3266 ssize_t vmci_qpair_peek(struct vmci_qp *qpair,
3267 			void *buf,
3268 			size_t buf_size,
3269 			int buf_type)
3270 {
3271 	ssize_t result;
3272 
3273 	if (!qpair || !buf)
3274 		return VMCI_ERROR_INVALID_ARGS;
3275 
3276 	qp_lock(qpair);
3277 
3278 	do {
3279 		result = qp_dequeue_locked(qpair->produce_q,
3280 					   qpair->consume_q,
3281 					   qpair->consume_q_size,
3282 					   buf, buf_size,
3283 					   qp_memcpy_from_queue, false,
3284 					   vmci_can_block(qpair->flags));
3285 
3286 		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3287 		    !qp_wait_for_ready_queue(qpair))
3288 			result = VMCI_ERROR_WOULD_BLOCK;
3289 
3290 	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3291 
3292 	qp_unlock(qpair);
3293 
3294 	return result;
3295 }
3296 EXPORT_SYMBOL_GPL(vmci_qpair_peek);
3297 
3298 /*
3299  * vmci_qpair_enquev() - Throw data on the queue using iov.
3300  * @qpair:      Pointer to the queue pair struct.
3301  * @iov:        Pointer to buffer containing data
3302  * @iov_size:   Length of buffer.
3303  * @buf_type:   Buffer type (Unused).
3304  *
3305  * This is the client interface for enqueueing data into the queue.
3306  * This function uses IO vectors to handle the work. Returns number
3307  * of bytes enqueued or < 0 on error.
3308  */
3309 ssize_t vmci_qpair_enquev(struct vmci_qp *qpair,
3310 			  void *iov,
3311 			  size_t iov_size,
3312 			  int buf_type)
3313 {
3314 	ssize_t result;
3315 
3316 	if (!qpair || !iov)
3317 		return VMCI_ERROR_INVALID_ARGS;
3318 
3319 	qp_lock(qpair);
3320 
3321 	do {
3322 		result = qp_enqueue_locked(qpair->produce_q,
3323 					   qpair->consume_q,
3324 					   qpair->produce_q_size,
3325 					   iov, iov_size,
3326 					   qp_memcpy_to_queue_iov,
3327 					   vmci_can_block(qpair->flags));
3328 
3329 		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3330 		    !qp_wait_for_ready_queue(qpair))
3331 			result = VMCI_ERROR_WOULD_BLOCK;
3332 
3333 	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3334 
3335 	qp_unlock(qpair);
3336 
3337 	return result;
3338 }
3339 EXPORT_SYMBOL_GPL(vmci_qpair_enquev);
3340 
3341 /*
3342  * vmci_qpair_dequev() - Get data from the queue using iov.
3343  * @qpair:      Pointer to the queue pair struct.
3344  * @iov:        Pointer to buffer for the data
3345  * @iov_size:   Length of buffer.
3346  * @buf_type:   Buffer type (Unused).
3347  *
3348  * This is the client interface for dequeueing data from the queue.
3349  * This function uses IO vectors to handle the work. Returns number
3350  * of bytes dequeued or < 0 on error.
3351  */
3352 ssize_t vmci_qpair_dequev(struct vmci_qp *qpair,
3353 			  void *iov,
3354 			  size_t iov_size,
3355 			  int buf_type)
3356 {
3357 	ssize_t result;
3358 
3359 	if (!qpair || !iov)
3360 		return VMCI_ERROR_INVALID_ARGS;
3361 
3362 	qp_lock(qpair);
3363 
3364 	do {
3365 		result = qp_dequeue_locked(qpair->produce_q,
3366 					   qpair->consume_q,
3367 					   qpair->consume_q_size,
3368 					   iov, iov_size,
3369 					   qp_memcpy_from_queue_iov,
3370 					   true, vmci_can_block(qpair->flags));
3371 
3372 		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3373 		    !qp_wait_for_ready_queue(qpair))
3374 			result = VMCI_ERROR_WOULD_BLOCK;
3375 
3376 	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3377 
3378 	qp_unlock(qpair);
3379 
3380 	return result;
3381 }
3382 EXPORT_SYMBOL_GPL(vmci_qpair_dequev);
3383 
3384 /*
3385  * vmci_qpair_peekv() - Peek at the data in the queue using iov.
3386  * @qpair:      Pointer to the queue pair struct.
3387  * @iov:        Pointer to buffer for the data
3388  * @iov_size:   Length of buffer.
3389  * @buf_type:   Buffer type (Unused on Linux).
3390  *
3391  * This is the client interface for peeking into a queue.  (I.e.,
3392  * copy data from the queue without updating the head pointer.)
3393  * This function uses IO vectors to handle the work. Returns number
3394  * of bytes peeked or < 0 on error.
3395  */
3396 ssize_t vmci_qpair_peekv(struct vmci_qp *qpair,
3397 			 void *iov,
3398 			 size_t iov_size,
3399 			 int buf_type)
3400 {
3401 	ssize_t result;
3402 
3403 	if (!qpair || !iov)
3404 		return VMCI_ERROR_INVALID_ARGS;
3405 
3406 	qp_lock(qpair);
3407 
3408 	do {
3409 		result = qp_dequeue_locked(qpair->produce_q,
3410 					   qpair->consume_q,
3411 					   qpair->consume_q_size,
3412 					   iov, iov_size,
3413 					   qp_memcpy_from_queue_iov,
3414 					   false, vmci_can_block(qpair->flags));
3415 
3416 		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3417 		    !qp_wait_for_ready_queue(qpair))
3418 			result = VMCI_ERROR_WOULD_BLOCK;
3419 
3420 	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3421 
3422 	qp_unlock(qpair);
3423 	return result;
3424 }
3425 EXPORT_SYMBOL_GPL(vmci_qpair_peekv);
3426