1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VMware VMCI Driver
4  *
5  * Copyright (C) 2012 VMware, Inc. All rights reserved.
6  */
7 
8 #include <linux/vmw_vmci_defs.h>
9 #include <linux/vmw_vmci_api.h>
10 #include <linux/highmem.h>
11 #include <linux/kernel.h>
12 #include <linux/module.h>
13 #include <linux/sched.h>
14 #include <linux/cred.h>
15 #include <linux/slab.h>
16 
17 #include "vmci_queue_pair.h"
18 #include "vmci_datagram.h"
19 #include "vmci_doorbell.h"
20 #include "vmci_context.h"
21 #include "vmci_driver.h"
22 #include "vmci_event.h"
23 
24 /*
25  * List of current VMCI contexts.  Contexts can be added by
26  * vmci_ctx_create() and removed via vmci_ctx_destroy().
27  * These, along with context lookup, are protected by the
28  * list structure's lock.
29  */
30 static struct {
31 	struct list_head head;
32 	spinlock_t lock; /* Spinlock for context list operations */
33 } ctx_list = {
34 	.head = LIST_HEAD_INIT(ctx_list.head),
35 	.lock = __SPIN_LOCK_UNLOCKED(ctx_list.lock),
36 };
37 
38 /* Used by contexts that did not set up notify flag pointers */
39 static bool ctx_dummy_notify;
40 
41 static void ctx_signal_notify(struct vmci_ctx *context)
42 {
43 	*context->notify = true;
44 }
45 
46 static void ctx_clear_notify(struct vmci_ctx *context)
47 {
48 	*context->notify = false;
49 }
50 
51 /*
52  * If nothing requires the attention of the guest, clears both
53  * notify flag and call.
54  */
55 static void ctx_clear_notify_call(struct vmci_ctx *context)
56 {
57 	if (context->pending_datagrams == 0 &&
58 	    vmci_handle_arr_get_size(context->pending_doorbell_array) == 0)
59 		ctx_clear_notify(context);
60 }
61 
62 /*
63  * Sets the context's notify flag iff datagrams are pending for this
64  * context.  Called from vmci_setup_notify().
65  */
66 void vmci_ctx_check_signal_notify(struct vmci_ctx *context)
67 {
68 	spin_lock(&context->lock);
69 	if (context->pending_datagrams)
70 		ctx_signal_notify(context);
71 	spin_unlock(&context->lock);
72 }
73 
74 /*
75  * Allocates and initializes a VMCI context.
76  */
77 struct vmci_ctx *vmci_ctx_create(u32 cid, u32 priv_flags,
78 				 uintptr_t event_hnd,
79 				 int user_version,
80 				 const struct cred *cred)
81 {
82 	struct vmci_ctx *context;
83 	int error;
84 
85 	if (cid == VMCI_INVALID_ID) {
86 		pr_devel("Invalid context ID for VMCI context\n");
87 		error = -EINVAL;
88 		goto err_out;
89 	}
90 
91 	if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS) {
92 		pr_devel("Invalid flag (flags=0x%x) for VMCI context\n",
93 			 priv_flags);
94 		error = -EINVAL;
95 		goto err_out;
96 	}
97 
98 	if (user_version == 0) {
99 		pr_devel("Invalid suer_version %d\n", user_version);
100 		error = -EINVAL;
101 		goto err_out;
102 	}
103 
104 	context = kzalloc(sizeof(*context), GFP_KERNEL);
105 	if (!context) {
106 		pr_warn("Failed to allocate memory for VMCI context\n");
107 		error = -EINVAL;
108 		goto err_out;
109 	}
110 
111 	kref_init(&context->kref);
112 	spin_lock_init(&context->lock);
113 	INIT_LIST_HEAD(&context->list_item);
114 	INIT_LIST_HEAD(&context->datagram_queue);
115 	INIT_LIST_HEAD(&context->notifier_list);
116 
117 	/* Initialize host-specific VMCI context. */
118 	init_waitqueue_head(&context->host_context.wait_queue);
119 
120 	context->queue_pair_array = vmci_handle_arr_create(0);
121 	if (!context->queue_pair_array) {
122 		error = -ENOMEM;
123 		goto err_free_ctx;
124 	}
125 
126 	context->doorbell_array = vmci_handle_arr_create(0);
127 	if (!context->doorbell_array) {
128 		error = -ENOMEM;
129 		goto err_free_qp_array;
130 	}
131 
132 	context->pending_doorbell_array = vmci_handle_arr_create(0);
133 	if (!context->pending_doorbell_array) {
134 		error = -ENOMEM;
135 		goto err_free_db_array;
136 	}
137 
138 	context->user_version = user_version;
139 
140 	context->priv_flags = priv_flags;
141 
142 	if (cred)
143 		context->cred = get_cred(cred);
144 
145 	context->notify = &ctx_dummy_notify;
146 	context->notify_page = NULL;
147 
148 	/*
149 	 * If we collide with an existing context we generate a new
150 	 * and use it instead. The VMX will determine if regeneration
151 	 * is okay. Since there isn't 4B - 16 VMs running on a given
152 	 * host, the below loop will terminate.
153 	 */
154 	spin_lock(&ctx_list.lock);
155 
156 	while (vmci_ctx_exists(cid)) {
157 		/* We reserve the lowest 16 ids for fixed contexts. */
158 		cid = max(cid, VMCI_RESERVED_CID_LIMIT - 1) + 1;
159 		if (cid == VMCI_INVALID_ID)
160 			cid = VMCI_RESERVED_CID_LIMIT;
161 	}
162 	context->cid = cid;
163 
164 	list_add_tail_rcu(&context->list_item, &ctx_list.head);
165 	spin_unlock(&ctx_list.lock);
166 
167 	return context;
168 
169  err_free_db_array:
170 	vmci_handle_arr_destroy(context->doorbell_array);
171  err_free_qp_array:
172 	vmci_handle_arr_destroy(context->queue_pair_array);
173  err_free_ctx:
174 	kfree(context);
175  err_out:
176 	return ERR_PTR(error);
177 }
178 
179 /*
180  * Destroy VMCI context.
181  */
182 void vmci_ctx_destroy(struct vmci_ctx *context)
183 {
184 	spin_lock(&ctx_list.lock);
185 	list_del_rcu(&context->list_item);
186 	spin_unlock(&ctx_list.lock);
187 	synchronize_rcu();
188 
189 	vmci_ctx_put(context);
190 }
191 
192 /*
193  * Fire notification for all contexts interested in given cid.
194  */
195 static int ctx_fire_notification(u32 context_id, u32 priv_flags)
196 {
197 	u32 i, array_size;
198 	struct vmci_ctx *sub_ctx;
199 	struct vmci_handle_arr *subscriber_array;
200 	struct vmci_handle context_handle =
201 		vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
202 
203 	/*
204 	 * We create an array to hold the subscribers we find when
205 	 * scanning through all contexts.
206 	 */
207 	subscriber_array = vmci_handle_arr_create(0);
208 	if (subscriber_array == NULL)
209 		return VMCI_ERROR_NO_MEM;
210 
211 	/*
212 	 * Scan all contexts to find who is interested in being
213 	 * notified about given contextID.
214 	 */
215 	rcu_read_lock();
216 	list_for_each_entry_rcu(sub_ctx, &ctx_list.head, list_item) {
217 		struct vmci_handle_list *node;
218 
219 		/*
220 		 * We only deliver notifications of the removal of
221 		 * contexts, if the two contexts are allowed to
222 		 * interact.
223 		 */
224 		if (vmci_deny_interaction(priv_flags, sub_ctx->priv_flags))
225 			continue;
226 
227 		list_for_each_entry_rcu(node, &sub_ctx->notifier_list, node) {
228 			if (!vmci_handle_is_equal(node->handle, context_handle))
229 				continue;
230 
231 			vmci_handle_arr_append_entry(&subscriber_array,
232 					vmci_make_handle(sub_ctx->cid,
233 							 VMCI_EVENT_HANDLER));
234 		}
235 	}
236 	rcu_read_unlock();
237 
238 	/* Fire event to all subscribers. */
239 	array_size = vmci_handle_arr_get_size(subscriber_array);
240 	for (i = 0; i < array_size; i++) {
241 		int result;
242 		struct vmci_event_ctx ev;
243 
244 		ev.msg.hdr.dst = vmci_handle_arr_get_entry(subscriber_array, i);
245 		ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
246 						  VMCI_CONTEXT_RESOURCE_ID);
247 		ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
248 		ev.msg.event_data.event = VMCI_EVENT_CTX_REMOVED;
249 		ev.payload.context_id = context_id;
250 
251 		result = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID,
252 						&ev.msg.hdr, false);
253 		if (result < VMCI_SUCCESS) {
254 			pr_devel("Failed to enqueue event datagram (type=%d) for context (ID=0x%x)\n",
255 				 ev.msg.event_data.event,
256 				 ev.msg.hdr.dst.context);
257 			/* We continue to enqueue on next subscriber. */
258 		}
259 	}
260 	vmci_handle_arr_destroy(subscriber_array);
261 
262 	return VMCI_SUCCESS;
263 }
264 
265 /*
266  * Returns the current number of pending datagrams. The call may
267  * also serve as a synchronization point for the datagram queue,
268  * as no enqueue operations can occur concurrently.
269  */
270 int vmci_ctx_pending_datagrams(u32 cid, u32 *pending)
271 {
272 	struct vmci_ctx *context;
273 
274 	context = vmci_ctx_get(cid);
275 	if (context == NULL)
276 		return VMCI_ERROR_INVALID_ARGS;
277 
278 	spin_lock(&context->lock);
279 	if (pending)
280 		*pending = context->pending_datagrams;
281 	spin_unlock(&context->lock);
282 	vmci_ctx_put(context);
283 
284 	return VMCI_SUCCESS;
285 }
286 
287 /*
288  * Queues a VMCI datagram for the appropriate target VM context.
289  */
290 int vmci_ctx_enqueue_datagram(u32 cid, struct vmci_datagram *dg)
291 {
292 	struct vmci_datagram_queue_entry *dq_entry;
293 	struct vmci_ctx *context;
294 	struct vmci_handle dg_src;
295 	size_t vmci_dg_size;
296 
297 	vmci_dg_size = VMCI_DG_SIZE(dg);
298 	if (vmci_dg_size > VMCI_MAX_DG_SIZE) {
299 		pr_devel("Datagram too large (bytes=%zu)\n", vmci_dg_size);
300 		return VMCI_ERROR_INVALID_ARGS;
301 	}
302 
303 	/* Get the target VM's VMCI context. */
304 	context = vmci_ctx_get(cid);
305 	if (!context) {
306 		pr_devel("Invalid context (ID=0x%x)\n", cid);
307 		return VMCI_ERROR_INVALID_ARGS;
308 	}
309 
310 	/* Allocate guest call entry and add it to the target VM's queue. */
311 	dq_entry = kmalloc(sizeof(*dq_entry), GFP_KERNEL);
312 	if (dq_entry == NULL) {
313 		pr_warn("Failed to allocate memory for datagram\n");
314 		vmci_ctx_put(context);
315 		return VMCI_ERROR_NO_MEM;
316 	}
317 	dq_entry->dg = dg;
318 	dq_entry->dg_size = vmci_dg_size;
319 	dg_src = dg->src;
320 	INIT_LIST_HEAD(&dq_entry->list_item);
321 
322 	spin_lock(&context->lock);
323 
324 	/*
325 	 * We put a higher limit on datagrams from the hypervisor.  If
326 	 * the pending datagram is not from hypervisor, then we check
327 	 * if enqueueing it would exceed the
328 	 * VMCI_MAX_DATAGRAM_QUEUE_SIZE limit on the destination.  If
329 	 * the pending datagram is from hypervisor, we allow it to be
330 	 * queued at the destination side provided we don't reach the
331 	 * VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE limit.
332 	 */
333 	if (context->datagram_queue_size + vmci_dg_size >=
334 	    VMCI_MAX_DATAGRAM_QUEUE_SIZE &&
335 	    (!vmci_handle_is_equal(dg_src,
336 				vmci_make_handle
337 				(VMCI_HYPERVISOR_CONTEXT_ID,
338 				 VMCI_CONTEXT_RESOURCE_ID)) ||
339 	     context->datagram_queue_size + vmci_dg_size >=
340 	     VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE)) {
341 		spin_unlock(&context->lock);
342 		vmci_ctx_put(context);
343 		kfree(dq_entry);
344 		pr_devel("Context (ID=0x%x) receive queue is full\n", cid);
345 		return VMCI_ERROR_NO_RESOURCES;
346 	}
347 
348 	list_add(&dq_entry->list_item, &context->datagram_queue);
349 	context->pending_datagrams++;
350 	context->datagram_queue_size += vmci_dg_size;
351 	ctx_signal_notify(context);
352 	wake_up(&context->host_context.wait_queue);
353 	spin_unlock(&context->lock);
354 	vmci_ctx_put(context);
355 
356 	return vmci_dg_size;
357 }
358 
359 /*
360  * Verifies whether a context with the specified context ID exists.
361  * FIXME: utility is dubious as no decisions can be reliably made
362  * using this data as context can appear and disappear at any time.
363  */
364 bool vmci_ctx_exists(u32 cid)
365 {
366 	struct vmci_ctx *context;
367 	bool exists = false;
368 
369 	rcu_read_lock();
370 
371 	list_for_each_entry_rcu(context, &ctx_list.head, list_item) {
372 		if (context->cid == cid) {
373 			exists = true;
374 			break;
375 		}
376 	}
377 
378 	rcu_read_unlock();
379 	return exists;
380 }
381 
382 /*
383  * Retrieves VMCI context corresponding to the given cid.
384  */
385 struct vmci_ctx *vmci_ctx_get(u32 cid)
386 {
387 	struct vmci_ctx *c, *context = NULL;
388 
389 	if (cid == VMCI_INVALID_ID)
390 		return NULL;
391 
392 	rcu_read_lock();
393 	list_for_each_entry_rcu(c, &ctx_list.head, list_item) {
394 		if (c->cid == cid) {
395 			/*
396 			 * The context owner drops its own reference to the
397 			 * context only after removing it from the list and
398 			 * waiting for RCU grace period to expire. This
399 			 * means that we are not about to increase the
400 			 * reference count of something that is in the
401 			 * process of being destroyed.
402 			 */
403 			context = c;
404 			kref_get(&context->kref);
405 			break;
406 		}
407 	}
408 	rcu_read_unlock();
409 
410 	return context;
411 }
412 
413 /*
414  * Deallocates all parts of a context data structure. This
415  * function doesn't lock the context, because it assumes that
416  * the caller was holding the last reference to context.
417  */
418 static void ctx_free_ctx(struct kref *kref)
419 {
420 	struct vmci_ctx *context = container_of(kref, struct vmci_ctx, kref);
421 	struct vmci_datagram_queue_entry *dq_entry, *dq_entry_tmp;
422 	struct vmci_handle temp_handle;
423 	struct vmci_handle_list *notifier, *tmp;
424 
425 	/*
426 	 * Fire event to all contexts interested in knowing this
427 	 * context is dying.
428 	 */
429 	ctx_fire_notification(context->cid, context->priv_flags);
430 
431 	/*
432 	 * Cleanup all queue pair resources attached to context.  If
433 	 * the VM dies without cleaning up, this code will make sure
434 	 * that no resources are leaked.
435 	 */
436 	temp_handle = vmci_handle_arr_get_entry(context->queue_pair_array, 0);
437 	while (!vmci_handle_is_equal(temp_handle, VMCI_INVALID_HANDLE)) {
438 		if (vmci_qp_broker_detach(temp_handle,
439 					  context) < VMCI_SUCCESS) {
440 			/*
441 			 * When vmci_qp_broker_detach() succeeds it
442 			 * removes the handle from the array.  If
443 			 * detach fails, we must remove the handle
444 			 * ourselves.
445 			 */
446 			vmci_handle_arr_remove_entry(context->queue_pair_array,
447 						     temp_handle);
448 		}
449 		temp_handle =
450 		    vmci_handle_arr_get_entry(context->queue_pair_array, 0);
451 	}
452 
453 	/*
454 	 * It is fine to destroy this without locking the callQueue, as
455 	 * this is the only thread having a reference to the context.
456 	 */
457 	list_for_each_entry_safe(dq_entry, dq_entry_tmp,
458 				 &context->datagram_queue, list_item) {
459 		WARN_ON(dq_entry->dg_size != VMCI_DG_SIZE(dq_entry->dg));
460 		list_del(&dq_entry->list_item);
461 		kfree(dq_entry->dg);
462 		kfree(dq_entry);
463 	}
464 
465 	list_for_each_entry_safe(notifier, tmp,
466 				 &context->notifier_list, node) {
467 		list_del(&notifier->node);
468 		kfree(notifier);
469 	}
470 
471 	vmci_handle_arr_destroy(context->queue_pair_array);
472 	vmci_handle_arr_destroy(context->doorbell_array);
473 	vmci_handle_arr_destroy(context->pending_doorbell_array);
474 	vmci_ctx_unset_notify(context);
475 	if (context->cred)
476 		put_cred(context->cred);
477 	kfree(context);
478 }
479 
480 /*
481  * Drops reference to VMCI context. If this is the last reference to
482  * the context it will be deallocated. A context is created with
483  * a reference count of one, and on destroy, it is removed from
484  * the context list before its reference count is decremented. Thus,
485  * if we reach zero, we are sure that nobody else are about to increment
486  * it (they need the entry in the context list for that), and so there
487  * is no need for locking.
488  */
489 void vmci_ctx_put(struct vmci_ctx *context)
490 {
491 	kref_put(&context->kref, ctx_free_ctx);
492 }
493 
494 /*
495  * Dequeues the next datagram and returns it to caller.
496  * The caller passes in a pointer to the max size datagram
497  * it can handle and the datagram is only unqueued if the
498  * size is less than max_size. If larger max_size is set to
499  * the size of the datagram to give the caller a chance to
500  * set up a larger buffer for the guestcall.
501  */
502 int vmci_ctx_dequeue_datagram(struct vmci_ctx *context,
503 			      size_t *max_size,
504 			      struct vmci_datagram **dg)
505 {
506 	struct vmci_datagram_queue_entry *dq_entry;
507 	struct list_head *list_item;
508 	int rv;
509 
510 	/* Dequeue the next datagram entry. */
511 	spin_lock(&context->lock);
512 	if (context->pending_datagrams == 0) {
513 		ctx_clear_notify_call(context);
514 		spin_unlock(&context->lock);
515 		pr_devel("No datagrams pending\n");
516 		return VMCI_ERROR_NO_MORE_DATAGRAMS;
517 	}
518 
519 	list_item = context->datagram_queue.next;
520 
521 	dq_entry =
522 	    list_entry(list_item, struct vmci_datagram_queue_entry, list_item);
523 
524 	/* Check size of caller's buffer. */
525 	if (*max_size < dq_entry->dg_size) {
526 		*max_size = dq_entry->dg_size;
527 		spin_unlock(&context->lock);
528 		pr_devel("Caller's buffer should be at least (size=%u bytes)\n",
529 			 (u32) *max_size);
530 		return VMCI_ERROR_NO_MEM;
531 	}
532 
533 	list_del(list_item);
534 	context->pending_datagrams--;
535 	context->datagram_queue_size -= dq_entry->dg_size;
536 	if (context->pending_datagrams == 0) {
537 		ctx_clear_notify_call(context);
538 		rv = VMCI_SUCCESS;
539 	} else {
540 		/*
541 		 * Return the size of the next datagram.
542 		 */
543 		struct vmci_datagram_queue_entry *next_entry;
544 
545 		list_item = context->datagram_queue.next;
546 		next_entry =
547 		    list_entry(list_item, struct vmci_datagram_queue_entry,
548 			       list_item);
549 
550 		/*
551 		 * The following size_t -> int truncation is fine as
552 		 * the maximum size of a (routable) datagram is 68KB.
553 		 */
554 		rv = (int)next_entry->dg_size;
555 	}
556 	spin_unlock(&context->lock);
557 
558 	/* Caller must free datagram. */
559 	*dg = dq_entry->dg;
560 	dq_entry->dg = NULL;
561 	kfree(dq_entry);
562 
563 	return rv;
564 }
565 
566 /*
567  * Reverts actions set up by vmci_setup_notify().  Unmaps and unlocks the
568  * page mapped/locked by vmci_setup_notify().
569  */
570 void vmci_ctx_unset_notify(struct vmci_ctx *context)
571 {
572 	struct page *notify_page;
573 
574 	spin_lock(&context->lock);
575 
576 	notify_page = context->notify_page;
577 	context->notify = &ctx_dummy_notify;
578 	context->notify_page = NULL;
579 
580 	spin_unlock(&context->lock);
581 
582 	if (notify_page) {
583 		kunmap(notify_page);
584 		put_page(notify_page);
585 	}
586 }
587 
588 /*
589  * Add remote_cid to list of contexts current contexts wants
590  * notifications from/about.
591  */
592 int vmci_ctx_add_notification(u32 context_id, u32 remote_cid)
593 {
594 	struct vmci_ctx *context;
595 	struct vmci_handle_list *notifier, *n;
596 	int result;
597 	bool exists = false;
598 
599 	context = vmci_ctx_get(context_id);
600 	if (!context)
601 		return VMCI_ERROR_NOT_FOUND;
602 
603 	if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(remote_cid)) {
604 		pr_devel("Context removed notifications for other VMs not supported (src=0x%x, remote=0x%x)\n",
605 			 context_id, remote_cid);
606 		result = VMCI_ERROR_DST_UNREACHABLE;
607 		goto out;
608 	}
609 
610 	if (context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) {
611 		result = VMCI_ERROR_NO_ACCESS;
612 		goto out;
613 	}
614 
615 	notifier = kmalloc(sizeof(struct vmci_handle_list), GFP_KERNEL);
616 	if (!notifier) {
617 		result = VMCI_ERROR_NO_MEM;
618 		goto out;
619 	}
620 
621 	INIT_LIST_HEAD(&notifier->node);
622 	notifier->handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
623 
624 	spin_lock(&context->lock);
625 
626 	list_for_each_entry(n, &context->notifier_list, node) {
627 		if (vmci_handle_is_equal(n->handle, notifier->handle)) {
628 			exists = true;
629 			break;
630 		}
631 	}
632 
633 	if (exists) {
634 		kfree(notifier);
635 		result = VMCI_ERROR_ALREADY_EXISTS;
636 	} else {
637 		list_add_tail_rcu(&notifier->node, &context->notifier_list);
638 		context->n_notifiers++;
639 		result = VMCI_SUCCESS;
640 	}
641 
642 	spin_unlock(&context->lock);
643 
644  out:
645 	vmci_ctx_put(context);
646 	return result;
647 }
648 
649 /*
650  * Remove remote_cid from current context's list of contexts it is
651  * interested in getting notifications from/about.
652  */
653 int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid)
654 {
655 	struct vmci_ctx *context;
656 	struct vmci_handle_list *notifier, *tmp;
657 	struct vmci_handle handle;
658 	bool found = false;
659 
660 	context = vmci_ctx_get(context_id);
661 	if (!context)
662 		return VMCI_ERROR_NOT_FOUND;
663 
664 	handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
665 
666 	spin_lock(&context->lock);
667 	list_for_each_entry_safe(notifier, tmp,
668 				 &context->notifier_list, node) {
669 		if (vmci_handle_is_equal(notifier->handle, handle)) {
670 			list_del_rcu(&notifier->node);
671 			context->n_notifiers--;
672 			found = true;
673 			break;
674 		}
675 	}
676 	spin_unlock(&context->lock);
677 
678 	if (found) {
679 		synchronize_rcu();
680 		kfree(notifier);
681 	}
682 
683 	vmci_ctx_put(context);
684 
685 	return found ? VMCI_SUCCESS : VMCI_ERROR_NOT_FOUND;
686 }
687 
688 static int vmci_ctx_get_chkpt_notifiers(struct vmci_ctx *context,
689 					u32 *buf_size, void **pbuf)
690 {
691 	u32 *notifiers;
692 	size_t data_size;
693 	struct vmci_handle_list *entry;
694 	int i = 0;
695 
696 	if (context->n_notifiers == 0) {
697 		*buf_size = 0;
698 		*pbuf = NULL;
699 		return VMCI_SUCCESS;
700 	}
701 
702 	data_size = context->n_notifiers * sizeof(*notifiers);
703 	if (*buf_size < data_size) {
704 		*buf_size = data_size;
705 		return VMCI_ERROR_MORE_DATA;
706 	}
707 
708 	notifiers = kmalloc(data_size, GFP_ATOMIC); /* FIXME: want GFP_KERNEL */
709 	if (!notifiers)
710 		return VMCI_ERROR_NO_MEM;
711 
712 	list_for_each_entry(entry, &context->notifier_list, node)
713 		notifiers[i++] = entry->handle.context;
714 
715 	*buf_size = data_size;
716 	*pbuf = notifiers;
717 	return VMCI_SUCCESS;
718 }
719 
720 static int vmci_ctx_get_chkpt_doorbells(struct vmci_ctx *context,
721 					u32 *buf_size, void **pbuf)
722 {
723 	struct dbell_cpt_state *dbells;
724 	size_t n_doorbells;
725 	int i;
726 
727 	n_doorbells = vmci_handle_arr_get_size(context->doorbell_array);
728 	if (n_doorbells > 0) {
729 		size_t data_size = n_doorbells * sizeof(*dbells);
730 		if (*buf_size < data_size) {
731 			*buf_size = data_size;
732 			return VMCI_ERROR_MORE_DATA;
733 		}
734 
735 		dbells = kmalloc(data_size, GFP_ATOMIC);
736 		if (!dbells)
737 			return VMCI_ERROR_NO_MEM;
738 
739 		for (i = 0; i < n_doorbells; i++)
740 			dbells[i].handle = vmci_handle_arr_get_entry(
741 						context->doorbell_array, i);
742 
743 		*buf_size = data_size;
744 		*pbuf = dbells;
745 	} else {
746 		*buf_size = 0;
747 		*pbuf = NULL;
748 	}
749 
750 	return VMCI_SUCCESS;
751 }
752 
753 /*
754  * Get current context's checkpoint state of given type.
755  */
756 int vmci_ctx_get_chkpt_state(u32 context_id,
757 			     u32 cpt_type,
758 			     u32 *buf_size,
759 			     void **pbuf)
760 {
761 	struct vmci_ctx *context;
762 	int result;
763 
764 	context = vmci_ctx_get(context_id);
765 	if (!context)
766 		return VMCI_ERROR_NOT_FOUND;
767 
768 	spin_lock(&context->lock);
769 
770 	switch (cpt_type) {
771 	case VMCI_NOTIFICATION_CPT_STATE:
772 		result = vmci_ctx_get_chkpt_notifiers(context, buf_size, pbuf);
773 		break;
774 
775 	case VMCI_WELLKNOWN_CPT_STATE:
776 		/*
777 		 * For compatibility with VMX'en with VM to VM communication, we
778 		 * always return zero wellknown handles.
779 		 */
780 
781 		*buf_size = 0;
782 		*pbuf = NULL;
783 		result = VMCI_SUCCESS;
784 		break;
785 
786 	case VMCI_DOORBELL_CPT_STATE:
787 		result = vmci_ctx_get_chkpt_doorbells(context, buf_size, pbuf);
788 		break;
789 
790 	default:
791 		pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
792 		result = VMCI_ERROR_INVALID_ARGS;
793 		break;
794 	}
795 
796 	spin_unlock(&context->lock);
797 	vmci_ctx_put(context);
798 
799 	return result;
800 }
801 
802 /*
803  * Set current context's checkpoint state of given type.
804  */
805 int vmci_ctx_set_chkpt_state(u32 context_id,
806 			     u32 cpt_type,
807 			     u32 buf_size,
808 			     void *cpt_buf)
809 {
810 	u32 i;
811 	u32 current_id;
812 	int result = VMCI_SUCCESS;
813 	u32 num_ids = buf_size / sizeof(u32);
814 
815 	if (cpt_type == VMCI_WELLKNOWN_CPT_STATE && num_ids > 0) {
816 		/*
817 		 * We would end up here if VMX with VM to VM communication
818 		 * attempts to restore a checkpoint with wellknown handles.
819 		 */
820 		pr_warn("Attempt to restore checkpoint with obsolete wellknown handles\n");
821 		return VMCI_ERROR_OBSOLETE;
822 	}
823 
824 	if (cpt_type != VMCI_NOTIFICATION_CPT_STATE) {
825 		pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
826 		return VMCI_ERROR_INVALID_ARGS;
827 	}
828 
829 	for (i = 0; i < num_ids && result == VMCI_SUCCESS; i++) {
830 		current_id = ((u32 *)cpt_buf)[i];
831 		result = vmci_ctx_add_notification(context_id, current_id);
832 		if (result != VMCI_SUCCESS)
833 			break;
834 	}
835 	if (result != VMCI_SUCCESS)
836 		pr_devel("Failed to set cpt state (type=%d) (error=%d)\n",
837 			 cpt_type, result);
838 
839 	return result;
840 }
841 
842 /*
843  * Retrieves the specified context's pending notifications in the
844  * form of a handle array. The handle arrays returned are the
845  * actual data - not a copy and should not be modified by the
846  * caller. They must be released using
847  * vmci_ctx_rcv_notifications_release.
848  */
849 int vmci_ctx_rcv_notifications_get(u32 context_id,
850 				   struct vmci_handle_arr **db_handle_array,
851 				   struct vmci_handle_arr **qp_handle_array)
852 {
853 	struct vmci_ctx *context;
854 	int result = VMCI_SUCCESS;
855 
856 	context = vmci_ctx_get(context_id);
857 	if (context == NULL)
858 		return VMCI_ERROR_NOT_FOUND;
859 
860 	spin_lock(&context->lock);
861 
862 	*db_handle_array = context->pending_doorbell_array;
863 	context->pending_doorbell_array = vmci_handle_arr_create(0);
864 	if (!context->pending_doorbell_array) {
865 		context->pending_doorbell_array = *db_handle_array;
866 		*db_handle_array = NULL;
867 		result = VMCI_ERROR_NO_MEM;
868 	}
869 	*qp_handle_array = NULL;
870 
871 	spin_unlock(&context->lock);
872 	vmci_ctx_put(context);
873 
874 	return result;
875 }
876 
877 /*
878  * Releases handle arrays with pending notifications previously
879  * retrieved using vmci_ctx_rcv_notifications_get. If the
880  * notifications were not successfully handed over to the guest,
881  * success must be false.
882  */
883 void vmci_ctx_rcv_notifications_release(u32 context_id,
884 					struct vmci_handle_arr *db_handle_array,
885 					struct vmci_handle_arr *qp_handle_array,
886 					bool success)
887 {
888 	struct vmci_ctx *context = vmci_ctx_get(context_id);
889 
890 	spin_lock(&context->lock);
891 	if (!success) {
892 		struct vmci_handle handle;
893 
894 		/*
895 		 * New notifications may have been added while we were not
896 		 * holding the context lock, so we transfer any new pending
897 		 * doorbell notifications to the old array, and reinstate the
898 		 * old array.
899 		 */
900 
901 		handle = vmci_handle_arr_remove_tail(
902 					context->pending_doorbell_array);
903 		while (!vmci_handle_is_invalid(handle)) {
904 			if (!vmci_handle_arr_has_entry(db_handle_array,
905 						       handle)) {
906 				vmci_handle_arr_append_entry(
907 						&db_handle_array, handle);
908 			}
909 			handle = vmci_handle_arr_remove_tail(
910 					context->pending_doorbell_array);
911 		}
912 		vmci_handle_arr_destroy(context->pending_doorbell_array);
913 		context->pending_doorbell_array = db_handle_array;
914 		db_handle_array = NULL;
915 	} else {
916 		ctx_clear_notify_call(context);
917 	}
918 	spin_unlock(&context->lock);
919 	vmci_ctx_put(context);
920 
921 	if (db_handle_array)
922 		vmci_handle_arr_destroy(db_handle_array);
923 
924 	if (qp_handle_array)
925 		vmci_handle_arr_destroy(qp_handle_array);
926 }
927 
928 /*
929  * Registers that a new doorbell handle has been allocated by the
930  * context. Only doorbell handles registered can be notified.
931  */
932 int vmci_ctx_dbell_create(u32 context_id, struct vmci_handle handle)
933 {
934 	struct vmci_ctx *context;
935 	int result;
936 
937 	if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
938 		return VMCI_ERROR_INVALID_ARGS;
939 
940 	context = vmci_ctx_get(context_id);
941 	if (context == NULL)
942 		return VMCI_ERROR_NOT_FOUND;
943 
944 	spin_lock(&context->lock);
945 	if (!vmci_handle_arr_has_entry(context->doorbell_array, handle)) {
946 		vmci_handle_arr_append_entry(&context->doorbell_array, handle);
947 		result = VMCI_SUCCESS;
948 	} else {
949 		result = VMCI_ERROR_DUPLICATE_ENTRY;
950 	}
951 
952 	spin_unlock(&context->lock);
953 	vmci_ctx_put(context);
954 
955 	return result;
956 }
957 
958 /*
959  * Unregisters a doorbell handle that was previously registered
960  * with vmci_ctx_dbell_create.
961  */
962 int vmci_ctx_dbell_destroy(u32 context_id, struct vmci_handle handle)
963 {
964 	struct vmci_ctx *context;
965 	struct vmci_handle removed_handle;
966 
967 	if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
968 		return VMCI_ERROR_INVALID_ARGS;
969 
970 	context = vmci_ctx_get(context_id);
971 	if (context == NULL)
972 		return VMCI_ERROR_NOT_FOUND;
973 
974 	spin_lock(&context->lock);
975 	removed_handle =
976 	    vmci_handle_arr_remove_entry(context->doorbell_array, handle);
977 	vmci_handle_arr_remove_entry(context->pending_doorbell_array, handle);
978 	spin_unlock(&context->lock);
979 
980 	vmci_ctx_put(context);
981 
982 	return vmci_handle_is_invalid(removed_handle) ?
983 	    VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
984 }
985 
986 /*
987  * Unregisters all doorbell handles that were previously
988  * registered with vmci_ctx_dbell_create.
989  */
990 int vmci_ctx_dbell_destroy_all(u32 context_id)
991 {
992 	struct vmci_ctx *context;
993 	struct vmci_handle handle;
994 
995 	if (context_id == VMCI_INVALID_ID)
996 		return VMCI_ERROR_INVALID_ARGS;
997 
998 	context = vmci_ctx_get(context_id);
999 	if (context == NULL)
1000 		return VMCI_ERROR_NOT_FOUND;
1001 
1002 	spin_lock(&context->lock);
1003 	do {
1004 		struct vmci_handle_arr *arr = context->doorbell_array;
1005 		handle = vmci_handle_arr_remove_tail(arr);
1006 	} while (!vmci_handle_is_invalid(handle));
1007 	do {
1008 		struct vmci_handle_arr *arr = context->pending_doorbell_array;
1009 		handle = vmci_handle_arr_remove_tail(arr);
1010 	} while (!vmci_handle_is_invalid(handle));
1011 	spin_unlock(&context->lock);
1012 
1013 	vmci_ctx_put(context);
1014 
1015 	return VMCI_SUCCESS;
1016 }
1017 
1018 /*
1019  * Registers a notification of a doorbell handle initiated by the
1020  * specified source context. The notification of doorbells are
1021  * subject to the same isolation rules as datagram delivery. To
1022  * allow host side senders of notifications a finer granularity
1023  * of sender rights than those assigned to the sending context
1024  * itself, the host context is required to specify a different
1025  * set of privilege flags that will override the privileges of
1026  * the source context.
1027  */
1028 int vmci_ctx_notify_dbell(u32 src_cid,
1029 			  struct vmci_handle handle,
1030 			  u32 src_priv_flags)
1031 {
1032 	struct vmci_ctx *dst_context;
1033 	int result;
1034 
1035 	if (vmci_handle_is_invalid(handle))
1036 		return VMCI_ERROR_INVALID_ARGS;
1037 
1038 	/* Get the target VM's VMCI context. */
1039 	dst_context = vmci_ctx_get(handle.context);
1040 	if (!dst_context) {
1041 		pr_devel("Invalid context (ID=0x%x)\n", handle.context);
1042 		return VMCI_ERROR_NOT_FOUND;
1043 	}
1044 
1045 	if (src_cid != handle.context) {
1046 		u32 dst_priv_flags;
1047 
1048 		if (VMCI_CONTEXT_IS_VM(src_cid) &&
1049 		    VMCI_CONTEXT_IS_VM(handle.context)) {
1050 			pr_devel("Doorbell notification from VM to VM not supported (src=0x%x, dst=0x%x)\n",
1051 				 src_cid, handle.context);
1052 			result = VMCI_ERROR_DST_UNREACHABLE;
1053 			goto out;
1054 		}
1055 
1056 		result = vmci_dbell_get_priv_flags(handle, &dst_priv_flags);
1057 		if (result < VMCI_SUCCESS) {
1058 			pr_warn("Failed to get privilege flags for destination (handle=0x%x:0x%x)\n",
1059 				handle.context, handle.resource);
1060 			goto out;
1061 		}
1062 
1063 		if (src_cid != VMCI_HOST_CONTEXT_ID ||
1064 		    src_priv_flags == VMCI_NO_PRIVILEGE_FLAGS) {
1065 			src_priv_flags = vmci_context_get_priv_flags(src_cid);
1066 		}
1067 
1068 		if (vmci_deny_interaction(src_priv_flags, dst_priv_flags)) {
1069 			result = VMCI_ERROR_NO_ACCESS;
1070 			goto out;
1071 		}
1072 	}
1073 
1074 	if (handle.context == VMCI_HOST_CONTEXT_ID) {
1075 		result = vmci_dbell_host_context_notify(src_cid, handle);
1076 	} else {
1077 		spin_lock(&dst_context->lock);
1078 
1079 		if (!vmci_handle_arr_has_entry(dst_context->doorbell_array,
1080 					       handle)) {
1081 			result = VMCI_ERROR_NOT_FOUND;
1082 		} else {
1083 			if (!vmci_handle_arr_has_entry(
1084 					dst_context->pending_doorbell_array,
1085 					handle)) {
1086 				vmci_handle_arr_append_entry(
1087 					&dst_context->pending_doorbell_array,
1088 					handle);
1089 
1090 				ctx_signal_notify(dst_context);
1091 				wake_up(&dst_context->host_context.wait_queue);
1092 
1093 			}
1094 			result = VMCI_SUCCESS;
1095 		}
1096 		spin_unlock(&dst_context->lock);
1097 	}
1098 
1099  out:
1100 	vmci_ctx_put(dst_context);
1101 
1102 	return result;
1103 }
1104 
1105 bool vmci_ctx_supports_host_qp(struct vmci_ctx *context)
1106 {
1107 	return context && context->user_version >= VMCI_VERSION_HOSTQP;
1108 }
1109 
1110 /*
1111  * Registers that a new queue pair handle has been allocated by
1112  * the context.
1113  */
1114 int vmci_ctx_qp_create(struct vmci_ctx *context, struct vmci_handle handle)
1115 {
1116 	int result;
1117 
1118 	if (context == NULL || vmci_handle_is_invalid(handle))
1119 		return VMCI_ERROR_INVALID_ARGS;
1120 
1121 	if (!vmci_handle_arr_has_entry(context->queue_pair_array, handle)) {
1122 		vmci_handle_arr_append_entry(&context->queue_pair_array,
1123 					     handle);
1124 		result = VMCI_SUCCESS;
1125 	} else {
1126 		result = VMCI_ERROR_DUPLICATE_ENTRY;
1127 	}
1128 
1129 	return result;
1130 }
1131 
1132 /*
1133  * Unregisters a queue pair handle that was previously registered
1134  * with vmci_ctx_qp_create.
1135  */
1136 int vmci_ctx_qp_destroy(struct vmci_ctx *context, struct vmci_handle handle)
1137 {
1138 	struct vmci_handle hndl;
1139 
1140 	if (context == NULL || vmci_handle_is_invalid(handle))
1141 		return VMCI_ERROR_INVALID_ARGS;
1142 
1143 	hndl = vmci_handle_arr_remove_entry(context->queue_pair_array, handle);
1144 
1145 	return vmci_handle_is_invalid(hndl) ?
1146 		VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
1147 }
1148 
1149 /*
1150  * Determines whether a given queue pair handle is registered
1151  * with the given context.
1152  */
1153 bool vmci_ctx_qp_exists(struct vmci_ctx *context, struct vmci_handle handle)
1154 {
1155 	if (context == NULL || vmci_handle_is_invalid(handle))
1156 		return false;
1157 
1158 	return vmci_handle_arr_has_entry(context->queue_pair_array, handle);
1159 }
1160 
1161 /*
1162  * vmci_context_get_priv_flags() - Retrieve privilege flags.
1163  * @context_id: The context ID of the VMCI context.
1164  *
1165  * Retrieves privilege flags of the given VMCI context ID.
1166  */
1167 u32 vmci_context_get_priv_flags(u32 context_id)
1168 {
1169 	if (vmci_host_code_active()) {
1170 		u32 flags;
1171 		struct vmci_ctx *context;
1172 
1173 		context = vmci_ctx_get(context_id);
1174 		if (!context)
1175 			return VMCI_LEAST_PRIVILEGE_FLAGS;
1176 
1177 		flags = context->priv_flags;
1178 		vmci_ctx_put(context);
1179 		return flags;
1180 	}
1181 	return VMCI_NO_PRIVILEGE_FLAGS;
1182 }
1183 EXPORT_SYMBOL_GPL(vmci_context_get_priv_flags);
1184 
1185 /*
1186  * vmci_is_context_owner() - Determimnes if user is the context owner
1187  * @context_id: The context ID of the VMCI context.
1188  * @uid:        The host user id (real kernel value).
1189  *
1190  * Determines whether a given UID is the owner of given VMCI context.
1191  */
1192 bool vmci_is_context_owner(u32 context_id, kuid_t uid)
1193 {
1194 	bool is_owner = false;
1195 
1196 	if (vmci_host_code_active()) {
1197 		struct vmci_ctx *context = vmci_ctx_get(context_id);
1198 		if (context) {
1199 			if (context->cred)
1200 				is_owner = uid_eq(context->cred->uid, uid);
1201 			vmci_ctx_put(context);
1202 		}
1203 	}
1204 
1205 	return is_owner;
1206 }
1207 EXPORT_SYMBOL_GPL(vmci_is_context_owner);
1208