1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VMware VMCI Driver
4  *
5  * Copyright (C) 2012 VMware, Inc. All rights reserved.
6  */
7 
8 #include <linux/vmw_vmci_defs.h>
9 #include <linux/vmw_vmci_api.h>
10 #include <linux/highmem.h>
11 #include <linux/kernel.h>
12 #include <linux/module.h>
13 #include <linux/sched.h>
14 #include <linux/cred.h>
15 #include <linux/slab.h>
16 
17 #include "vmci_queue_pair.h"
18 #include "vmci_datagram.h"
19 #include "vmci_doorbell.h"
20 #include "vmci_context.h"
21 #include "vmci_driver.h"
22 #include "vmci_event.h"
23 
24 /* Use a wide upper bound for the maximum contexts. */
25 #define VMCI_MAX_CONTEXTS 2000
26 
27 /*
28  * List of current VMCI contexts.  Contexts can be added by
29  * vmci_ctx_create() and removed via vmci_ctx_destroy().
30  * These, along with context lookup, are protected by the
31  * list structure's lock.
32  */
33 static struct {
34 	struct list_head head;
35 	spinlock_t lock; /* Spinlock for context list operations */
36 } ctx_list = {
37 	.head = LIST_HEAD_INIT(ctx_list.head),
38 	.lock = __SPIN_LOCK_UNLOCKED(ctx_list.lock),
39 };
40 
41 /* Used by contexts that did not set up notify flag pointers */
42 static bool ctx_dummy_notify;
43 
44 static void ctx_signal_notify(struct vmci_ctx *context)
45 {
46 	*context->notify = true;
47 }
48 
49 static void ctx_clear_notify(struct vmci_ctx *context)
50 {
51 	*context->notify = false;
52 }
53 
54 /*
55  * If nothing requires the attention of the guest, clears both
56  * notify flag and call.
57  */
58 static void ctx_clear_notify_call(struct vmci_ctx *context)
59 {
60 	if (context->pending_datagrams == 0 &&
61 	    vmci_handle_arr_get_size(context->pending_doorbell_array) == 0)
62 		ctx_clear_notify(context);
63 }
64 
65 /*
66  * Sets the context's notify flag iff datagrams are pending for this
67  * context.  Called from vmci_setup_notify().
68  */
69 void vmci_ctx_check_signal_notify(struct vmci_ctx *context)
70 {
71 	spin_lock(&context->lock);
72 	if (context->pending_datagrams)
73 		ctx_signal_notify(context);
74 	spin_unlock(&context->lock);
75 }
76 
77 /*
78  * Allocates and initializes a VMCI context.
79  */
80 struct vmci_ctx *vmci_ctx_create(u32 cid, u32 priv_flags,
81 				 uintptr_t event_hnd,
82 				 int user_version,
83 				 const struct cred *cred)
84 {
85 	struct vmci_ctx *context;
86 	int error;
87 
88 	if (cid == VMCI_INVALID_ID) {
89 		pr_devel("Invalid context ID for VMCI context\n");
90 		error = -EINVAL;
91 		goto err_out;
92 	}
93 
94 	if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS) {
95 		pr_devel("Invalid flag (flags=0x%x) for VMCI context\n",
96 			 priv_flags);
97 		error = -EINVAL;
98 		goto err_out;
99 	}
100 
101 	if (user_version == 0) {
102 		pr_devel("Invalid suer_version %d\n", user_version);
103 		error = -EINVAL;
104 		goto err_out;
105 	}
106 
107 	context = kzalloc(sizeof(*context), GFP_KERNEL);
108 	if (!context) {
109 		pr_warn("Failed to allocate memory for VMCI context\n");
110 		error = -ENOMEM;
111 		goto err_out;
112 	}
113 
114 	kref_init(&context->kref);
115 	spin_lock_init(&context->lock);
116 	INIT_LIST_HEAD(&context->list_item);
117 	INIT_LIST_HEAD(&context->datagram_queue);
118 	INIT_LIST_HEAD(&context->notifier_list);
119 
120 	/* Initialize host-specific VMCI context. */
121 	init_waitqueue_head(&context->host_context.wait_queue);
122 
123 	context->queue_pair_array =
124 		vmci_handle_arr_create(0, VMCI_MAX_GUEST_QP_COUNT);
125 	if (!context->queue_pair_array) {
126 		error = -ENOMEM;
127 		goto err_free_ctx;
128 	}
129 
130 	context->doorbell_array =
131 		vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT);
132 	if (!context->doorbell_array) {
133 		error = -ENOMEM;
134 		goto err_free_qp_array;
135 	}
136 
137 	context->pending_doorbell_array =
138 		vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT);
139 	if (!context->pending_doorbell_array) {
140 		error = -ENOMEM;
141 		goto err_free_db_array;
142 	}
143 
144 	context->user_version = user_version;
145 
146 	context->priv_flags = priv_flags;
147 
148 	if (cred)
149 		context->cred = get_cred(cred);
150 
151 	context->notify = &ctx_dummy_notify;
152 	context->notify_page = NULL;
153 
154 	/*
155 	 * If we collide with an existing context we generate a new
156 	 * and use it instead. The VMX will determine if regeneration
157 	 * is okay. Since there isn't 4B - 16 VMs running on a given
158 	 * host, the below loop will terminate.
159 	 */
160 	spin_lock(&ctx_list.lock);
161 
162 	while (vmci_ctx_exists(cid)) {
163 		/* We reserve the lowest 16 ids for fixed contexts. */
164 		cid = max(cid, VMCI_RESERVED_CID_LIMIT - 1) + 1;
165 		if (cid == VMCI_INVALID_ID)
166 			cid = VMCI_RESERVED_CID_LIMIT;
167 	}
168 	context->cid = cid;
169 
170 	list_add_tail_rcu(&context->list_item, &ctx_list.head);
171 	spin_unlock(&ctx_list.lock);
172 
173 	return context;
174 
175  err_free_db_array:
176 	vmci_handle_arr_destroy(context->doorbell_array);
177  err_free_qp_array:
178 	vmci_handle_arr_destroy(context->queue_pair_array);
179  err_free_ctx:
180 	kfree(context);
181  err_out:
182 	return ERR_PTR(error);
183 }
184 
185 /*
186  * Destroy VMCI context.
187  */
188 void vmci_ctx_destroy(struct vmci_ctx *context)
189 {
190 	spin_lock(&ctx_list.lock);
191 	list_del_rcu(&context->list_item);
192 	spin_unlock(&ctx_list.lock);
193 	synchronize_rcu();
194 
195 	vmci_ctx_put(context);
196 }
197 
198 /*
199  * Fire notification for all contexts interested in given cid.
200  */
201 static int ctx_fire_notification(u32 context_id, u32 priv_flags)
202 {
203 	u32 i, array_size;
204 	struct vmci_ctx *sub_ctx;
205 	struct vmci_handle_arr *subscriber_array;
206 	struct vmci_handle context_handle =
207 		vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
208 
209 	/*
210 	 * We create an array to hold the subscribers we find when
211 	 * scanning through all contexts.
212 	 */
213 	subscriber_array = vmci_handle_arr_create(0, VMCI_MAX_CONTEXTS);
214 	if (subscriber_array == NULL)
215 		return VMCI_ERROR_NO_MEM;
216 
217 	/*
218 	 * Scan all contexts to find who is interested in being
219 	 * notified about given contextID.
220 	 */
221 	rcu_read_lock();
222 	list_for_each_entry_rcu(sub_ctx, &ctx_list.head, list_item) {
223 		struct vmci_handle_list *node;
224 
225 		/*
226 		 * We only deliver notifications of the removal of
227 		 * contexts, if the two contexts are allowed to
228 		 * interact.
229 		 */
230 		if (vmci_deny_interaction(priv_flags, sub_ctx->priv_flags))
231 			continue;
232 
233 		list_for_each_entry_rcu(node, &sub_ctx->notifier_list, node) {
234 			if (!vmci_handle_is_equal(node->handle, context_handle))
235 				continue;
236 
237 			vmci_handle_arr_append_entry(&subscriber_array,
238 					vmci_make_handle(sub_ctx->cid,
239 							 VMCI_EVENT_HANDLER));
240 		}
241 	}
242 	rcu_read_unlock();
243 
244 	/* Fire event to all subscribers. */
245 	array_size = vmci_handle_arr_get_size(subscriber_array);
246 	for (i = 0; i < array_size; i++) {
247 		int result;
248 		struct vmci_event_ctx ev;
249 
250 		ev.msg.hdr.dst = vmci_handle_arr_get_entry(subscriber_array, i);
251 		ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
252 						  VMCI_CONTEXT_RESOURCE_ID);
253 		ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
254 		ev.msg.event_data.event = VMCI_EVENT_CTX_REMOVED;
255 		ev.payload.context_id = context_id;
256 
257 		result = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID,
258 						&ev.msg.hdr, false);
259 		if (result < VMCI_SUCCESS) {
260 			pr_devel("Failed to enqueue event datagram (type=%d) for context (ID=0x%x)\n",
261 				 ev.msg.event_data.event,
262 				 ev.msg.hdr.dst.context);
263 			/* We continue to enqueue on next subscriber. */
264 		}
265 	}
266 	vmci_handle_arr_destroy(subscriber_array);
267 
268 	return VMCI_SUCCESS;
269 }
270 
271 /*
272  * Returns the current number of pending datagrams. The call may
273  * also serve as a synchronization point for the datagram queue,
274  * as no enqueue operations can occur concurrently.
275  */
276 int vmci_ctx_pending_datagrams(u32 cid, u32 *pending)
277 {
278 	struct vmci_ctx *context;
279 
280 	context = vmci_ctx_get(cid);
281 	if (context == NULL)
282 		return VMCI_ERROR_INVALID_ARGS;
283 
284 	spin_lock(&context->lock);
285 	if (pending)
286 		*pending = context->pending_datagrams;
287 	spin_unlock(&context->lock);
288 	vmci_ctx_put(context);
289 
290 	return VMCI_SUCCESS;
291 }
292 
293 /*
294  * Queues a VMCI datagram for the appropriate target VM context.
295  */
296 int vmci_ctx_enqueue_datagram(u32 cid, struct vmci_datagram *dg)
297 {
298 	struct vmci_datagram_queue_entry *dq_entry;
299 	struct vmci_ctx *context;
300 	struct vmci_handle dg_src;
301 	size_t vmci_dg_size;
302 
303 	vmci_dg_size = VMCI_DG_SIZE(dg);
304 	if (vmci_dg_size > VMCI_MAX_DG_SIZE) {
305 		pr_devel("Datagram too large (bytes=%zu)\n", vmci_dg_size);
306 		return VMCI_ERROR_INVALID_ARGS;
307 	}
308 
309 	/* Get the target VM's VMCI context. */
310 	context = vmci_ctx_get(cid);
311 	if (!context) {
312 		pr_devel("Invalid context (ID=0x%x)\n", cid);
313 		return VMCI_ERROR_INVALID_ARGS;
314 	}
315 
316 	/* Allocate guest call entry and add it to the target VM's queue. */
317 	dq_entry = kmalloc(sizeof(*dq_entry), GFP_KERNEL);
318 	if (dq_entry == NULL) {
319 		pr_warn("Failed to allocate memory for datagram\n");
320 		vmci_ctx_put(context);
321 		return VMCI_ERROR_NO_MEM;
322 	}
323 	dq_entry->dg = dg;
324 	dq_entry->dg_size = vmci_dg_size;
325 	dg_src = dg->src;
326 	INIT_LIST_HEAD(&dq_entry->list_item);
327 
328 	spin_lock(&context->lock);
329 
330 	/*
331 	 * We put a higher limit on datagrams from the hypervisor.  If
332 	 * the pending datagram is not from hypervisor, then we check
333 	 * if enqueueing it would exceed the
334 	 * VMCI_MAX_DATAGRAM_QUEUE_SIZE limit on the destination.  If
335 	 * the pending datagram is from hypervisor, we allow it to be
336 	 * queued at the destination side provided we don't reach the
337 	 * VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE limit.
338 	 */
339 	if (context->datagram_queue_size + vmci_dg_size >=
340 	    VMCI_MAX_DATAGRAM_QUEUE_SIZE &&
341 	    (!vmci_handle_is_equal(dg_src,
342 				vmci_make_handle
343 				(VMCI_HYPERVISOR_CONTEXT_ID,
344 				 VMCI_CONTEXT_RESOURCE_ID)) ||
345 	     context->datagram_queue_size + vmci_dg_size >=
346 	     VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE)) {
347 		spin_unlock(&context->lock);
348 		vmci_ctx_put(context);
349 		kfree(dq_entry);
350 		pr_devel("Context (ID=0x%x) receive queue is full\n", cid);
351 		return VMCI_ERROR_NO_RESOURCES;
352 	}
353 
354 	list_add(&dq_entry->list_item, &context->datagram_queue);
355 	context->pending_datagrams++;
356 	context->datagram_queue_size += vmci_dg_size;
357 	ctx_signal_notify(context);
358 	wake_up(&context->host_context.wait_queue);
359 	spin_unlock(&context->lock);
360 	vmci_ctx_put(context);
361 
362 	return vmci_dg_size;
363 }
364 
365 /*
366  * Verifies whether a context with the specified context ID exists.
367  * FIXME: utility is dubious as no decisions can be reliably made
368  * using this data as context can appear and disappear at any time.
369  */
370 bool vmci_ctx_exists(u32 cid)
371 {
372 	struct vmci_ctx *context;
373 	bool exists = false;
374 
375 	rcu_read_lock();
376 
377 	list_for_each_entry_rcu(context, &ctx_list.head, list_item) {
378 		if (context->cid == cid) {
379 			exists = true;
380 			break;
381 		}
382 	}
383 
384 	rcu_read_unlock();
385 	return exists;
386 }
387 
388 /*
389  * Retrieves VMCI context corresponding to the given cid.
390  */
391 struct vmci_ctx *vmci_ctx_get(u32 cid)
392 {
393 	struct vmci_ctx *c, *context = NULL;
394 
395 	if (cid == VMCI_INVALID_ID)
396 		return NULL;
397 
398 	rcu_read_lock();
399 	list_for_each_entry_rcu(c, &ctx_list.head, list_item) {
400 		if (c->cid == cid) {
401 			/*
402 			 * The context owner drops its own reference to the
403 			 * context only after removing it from the list and
404 			 * waiting for RCU grace period to expire. This
405 			 * means that we are not about to increase the
406 			 * reference count of something that is in the
407 			 * process of being destroyed.
408 			 */
409 			context = c;
410 			kref_get(&context->kref);
411 			break;
412 		}
413 	}
414 	rcu_read_unlock();
415 
416 	return context;
417 }
418 
419 /*
420  * Deallocates all parts of a context data structure. This
421  * function doesn't lock the context, because it assumes that
422  * the caller was holding the last reference to context.
423  */
424 static void ctx_free_ctx(struct kref *kref)
425 {
426 	struct vmci_ctx *context = container_of(kref, struct vmci_ctx, kref);
427 	struct vmci_datagram_queue_entry *dq_entry, *dq_entry_tmp;
428 	struct vmci_handle temp_handle;
429 	struct vmci_handle_list *notifier, *tmp;
430 
431 	/*
432 	 * Fire event to all contexts interested in knowing this
433 	 * context is dying.
434 	 */
435 	ctx_fire_notification(context->cid, context->priv_flags);
436 
437 	/*
438 	 * Cleanup all queue pair resources attached to context.  If
439 	 * the VM dies without cleaning up, this code will make sure
440 	 * that no resources are leaked.
441 	 */
442 	temp_handle = vmci_handle_arr_get_entry(context->queue_pair_array, 0);
443 	while (!vmci_handle_is_equal(temp_handle, VMCI_INVALID_HANDLE)) {
444 		if (vmci_qp_broker_detach(temp_handle,
445 					  context) < VMCI_SUCCESS) {
446 			/*
447 			 * When vmci_qp_broker_detach() succeeds it
448 			 * removes the handle from the array.  If
449 			 * detach fails, we must remove the handle
450 			 * ourselves.
451 			 */
452 			vmci_handle_arr_remove_entry(context->queue_pair_array,
453 						     temp_handle);
454 		}
455 		temp_handle =
456 		    vmci_handle_arr_get_entry(context->queue_pair_array, 0);
457 	}
458 
459 	/*
460 	 * It is fine to destroy this without locking the callQueue, as
461 	 * this is the only thread having a reference to the context.
462 	 */
463 	list_for_each_entry_safe(dq_entry, dq_entry_tmp,
464 				 &context->datagram_queue, list_item) {
465 		WARN_ON(dq_entry->dg_size != VMCI_DG_SIZE(dq_entry->dg));
466 		list_del(&dq_entry->list_item);
467 		kfree(dq_entry->dg);
468 		kfree(dq_entry);
469 	}
470 
471 	list_for_each_entry_safe(notifier, tmp,
472 				 &context->notifier_list, node) {
473 		list_del(&notifier->node);
474 		kfree(notifier);
475 	}
476 
477 	vmci_handle_arr_destroy(context->queue_pair_array);
478 	vmci_handle_arr_destroy(context->doorbell_array);
479 	vmci_handle_arr_destroy(context->pending_doorbell_array);
480 	vmci_ctx_unset_notify(context);
481 	if (context->cred)
482 		put_cred(context->cred);
483 	kfree(context);
484 }
485 
486 /*
487  * Drops reference to VMCI context. If this is the last reference to
488  * the context it will be deallocated. A context is created with
489  * a reference count of one, and on destroy, it is removed from
490  * the context list before its reference count is decremented. Thus,
491  * if we reach zero, we are sure that nobody else are about to increment
492  * it (they need the entry in the context list for that), and so there
493  * is no need for locking.
494  */
495 void vmci_ctx_put(struct vmci_ctx *context)
496 {
497 	kref_put(&context->kref, ctx_free_ctx);
498 }
499 
500 /*
501  * Dequeues the next datagram and returns it to caller.
502  * The caller passes in a pointer to the max size datagram
503  * it can handle and the datagram is only unqueued if the
504  * size is less than max_size. If larger max_size is set to
505  * the size of the datagram to give the caller a chance to
506  * set up a larger buffer for the guestcall.
507  */
508 int vmci_ctx_dequeue_datagram(struct vmci_ctx *context,
509 			      size_t *max_size,
510 			      struct vmci_datagram **dg)
511 {
512 	struct vmci_datagram_queue_entry *dq_entry;
513 	struct list_head *list_item;
514 	int rv;
515 
516 	/* Dequeue the next datagram entry. */
517 	spin_lock(&context->lock);
518 	if (context->pending_datagrams == 0) {
519 		ctx_clear_notify_call(context);
520 		spin_unlock(&context->lock);
521 		pr_devel("No datagrams pending\n");
522 		return VMCI_ERROR_NO_MORE_DATAGRAMS;
523 	}
524 
525 	list_item = context->datagram_queue.next;
526 
527 	dq_entry =
528 	    list_entry(list_item, struct vmci_datagram_queue_entry, list_item);
529 
530 	/* Check size of caller's buffer. */
531 	if (*max_size < dq_entry->dg_size) {
532 		*max_size = dq_entry->dg_size;
533 		spin_unlock(&context->lock);
534 		pr_devel("Caller's buffer should be at least (size=%u bytes)\n",
535 			 (u32) *max_size);
536 		return VMCI_ERROR_NO_MEM;
537 	}
538 
539 	list_del(list_item);
540 	context->pending_datagrams--;
541 	context->datagram_queue_size -= dq_entry->dg_size;
542 	if (context->pending_datagrams == 0) {
543 		ctx_clear_notify_call(context);
544 		rv = VMCI_SUCCESS;
545 	} else {
546 		/*
547 		 * Return the size of the next datagram.
548 		 */
549 		struct vmci_datagram_queue_entry *next_entry;
550 
551 		list_item = context->datagram_queue.next;
552 		next_entry =
553 		    list_entry(list_item, struct vmci_datagram_queue_entry,
554 			       list_item);
555 
556 		/*
557 		 * The following size_t -> int truncation is fine as
558 		 * the maximum size of a (routable) datagram is 68KB.
559 		 */
560 		rv = (int)next_entry->dg_size;
561 	}
562 	spin_unlock(&context->lock);
563 
564 	/* Caller must free datagram. */
565 	*dg = dq_entry->dg;
566 	dq_entry->dg = NULL;
567 	kfree(dq_entry);
568 
569 	return rv;
570 }
571 
572 /*
573  * Reverts actions set up by vmci_setup_notify().  Unmaps and unlocks the
574  * page mapped/locked by vmci_setup_notify().
575  */
576 void vmci_ctx_unset_notify(struct vmci_ctx *context)
577 {
578 	struct page *notify_page;
579 
580 	spin_lock(&context->lock);
581 
582 	notify_page = context->notify_page;
583 	context->notify = &ctx_dummy_notify;
584 	context->notify_page = NULL;
585 
586 	spin_unlock(&context->lock);
587 
588 	if (notify_page) {
589 		kunmap(notify_page);
590 		put_page(notify_page);
591 	}
592 }
593 
594 /*
595  * Add remote_cid to list of contexts current contexts wants
596  * notifications from/about.
597  */
598 int vmci_ctx_add_notification(u32 context_id, u32 remote_cid)
599 {
600 	struct vmci_ctx *context;
601 	struct vmci_handle_list *notifier, *n;
602 	int result;
603 	bool exists = false;
604 
605 	context = vmci_ctx_get(context_id);
606 	if (!context)
607 		return VMCI_ERROR_NOT_FOUND;
608 
609 	if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(remote_cid)) {
610 		pr_devel("Context removed notifications for other VMs not supported (src=0x%x, remote=0x%x)\n",
611 			 context_id, remote_cid);
612 		result = VMCI_ERROR_DST_UNREACHABLE;
613 		goto out;
614 	}
615 
616 	if (context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) {
617 		result = VMCI_ERROR_NO_ACCESS;
618 		goto out;
619 	}
620 
621 	notifier = kmalloc(sizeof(struct vmci_handle_list), GFP_KERNEL);
622 	if (!notifier) {
623 		result = VMCI_ERROR_NO_MEM;
624 		goto out;
625 	}
626 
627 	INIT_LIST_HEAD(&notifier->node);
628 	notifier->handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
629 
630 	spin_lock(&context->lock);
631 
632 	if (context->n_notifiers < VMCI_MAX_CONTEXTS) {
633 		list_for_each_entry(n, &context->notifier_list, node) {
634 			if (vmci_handle_is_equal(n->handle, notifier->handle)) {
635 				exists = true;
636 				break;
637 			}
638 		}
639 
640 		if (exists) {
641 			kfree(notifier);
642 			result = VMCI_ERROR_ALREADY_EXISTS;
643 		} else {
644 			list_add_tail_rcu(&notifier->node,
645 					  &context->notifier_list);
646 			context->n_notifiers++;
647 			result = VMCI_SUCCESS;
648 		}
649 	} else {
650 		kfree(notifier);
651 		result = VMCI_ERROR_NO_MEM;
652 	}
653 
654 	spin_unlock(&context->lock);
655 
656  out:
657 	vmci_ctx_put(context);
658 	return result;
659 }
660 
661 /*
662  * Remove remote_cid from current context's list of contexts it is
663  * interested in getting notifications from/about.
664  */
665 int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid)
666 {
667 	struct vmci_ctx *context;
668 	struct vmci_handle_list *notifier, *tmp;
669 	struct vmci_handle handle;
670 	bool found = false;
671 
672 	context = vmci_ctx_get(context_id);
673 	if (!context)
674 		return VMCI_ERROR_NOT_FOUND;
675 
676 	handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
677 
678 	spin_lock(&context->lock);
679 	list_for_each_entry_safe(notifier, tmp,
680 				 &context->notifier_list, node) {
681 		if (vmci_handle_is_equal(notifier->handle, handle)) {
682 			list_del_rcu(&notifier->node);
683 			context->n_notifiers--;
684 			found = true;
685 			break;
686 		}
687 	}
688 	spin_unlock(&context->lock);
689 
690 	if (found) {
691 		synchronize_rcu();
692 		kfree(notifier);
693 	}
694 
695 	vmci_ctx_put(context);
696 
697 	return found ? VMCI_SUCCESS : VMCI_ERROR_NOT_FOUND;
698 }
699 
700 static int vmci_ctx_get_chkpt_notifiers(struct vmci_ctx *context,
701 					u32 *buf_size, void **pbuf)
702 {
703 	u32 *notifiers;
704 	size_t data_size;
705 	struct vmci_handle_list *entry;
706 	int i = 0;
707 
708 	if (context->n_notifiers == 0) {
709 		*buf_size = 0;
710 		*pbuf = NULL;
711 		return VMCI_SUCCESS;
712 	}
713 
714 	data_size = context->n_notifiers * sizeof(*notifiers);
715 	if (*buf_size < data_size) {
716 		*buf_size = data_size;
717 		return VMCI_ERROR_MORE_DATA;
718 	}
719 
720 	notifiers = kmalloc(data_size, GFP_ATOMIC); /* FIXME: want GFP_KERNEL */
721 	if (!notifiers)
722 		return VMCI_ERROR_NO_MEM;
723 
724 	list_for_each_entry(entry, &context->notifier_list, node)
725 		notifiers[i++] = entry->handle.context;
726 
727 	*buf_size = data_size;
728 	*pbuf = notifiers;
729 	return VMCI_SUCCESS;
730 }
731 
732 static int vmci_ctx_get_chkpt_doorbells(struct vmci_ctx *context,
733 					u32 *buf_size, void **pbuf)
734 {
735 	struct dbell_cpt_state *dbells;
736 	u32 i, n_doorbells;
737 
738 	n_doorbells = vmci_handle_arr_get_size(context->doorbell_array);
739 	if (n_doorbells > 0) {
740 		size_t data_size = n_doorbells * sizeof(*dbells);
741 		if (*buf_size < data_size) {
742 			*buf_size = data_size;
743 			return VMCI_ERROR_MORE_DATA;
744 		}
745 
746 		dbells = kzalloc(data_size, GFP_ATOMIC);
747 		if (!dbells)
748 			return VMCI_ERROR_NO_MEM;
749 
750 		for (i = 0; i < n_doorbells; i++)
751 			dbells[i].handle = vmci_handle_arr_get_entry(
752 						context->doorbell_array, i);
753 
754 		*buf_size = data_size;
755 		*pbuf = dbells;
756 	} else {
757 		*buf_size = 0;
758 		*pbuf = NULL;
759 	}
760 
761 	return VMCI_SUCCESS;
762 }
763 
764 /*
765  * Get current context's checkpoint state of given type.
766  */
767 int vmci_ctx_get_chkpt_state(u32 context_id,
768 			     u32 cpt_type,
769 			     u32 *buf_size,
770 			     void **pbuf)
771 {
772 	struct vmci_ctx *context;
773 	int result;
774 
775 	context = vmci_ctx_get(context_id);
776 	if (!context)
777 		return VMCI_ERROR_NOT_FOUND;
778 
779 	spin_lock(&context->lock);
780 
781 	switch (cpt_type) {
782 	case VMCI_NOTIFICATION_CPT_STATE:
783 		result = vmci_ctx_get_chkpt_notifiers(context, buf_size, pbuf);
784 		break;
785 
786 	case VMCI_WELLKNOWN_CPT_STATE:
787 		/*
788 		 * For compatibility with VMX'en with VM to VM communication, we
789 		 * always return zero wellknown handles.
790 		 */
791 
792 		*buf_size = 0;
793 		*pbuf = NULL;
794 		result = VMCI_SUCCESS;
795 		break;
796 
797 	case VMCI_DOORBELL_CPT_STATE:
798 		result = vmci_ctx_get_chkpt_doorbells(context, buf_size, pbuf);
799 		break;
800 
801 	default:
802 		pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
803 		result = VMCI_ERROR_INVALID_ARGS;
804 		break;
805 	}
806 
807 	spin_unlock(&context->lock);
808 	vmci_ctx_put(context);
809 
810 	return result;
811 }
812 
813 /*
814  * Set current context's checkpoint state of given type.
815  */
816 int vmci_ctx_set_chkpt_state(u32 context_id,
817 			     u32 cpt_type,
818 			     u32 buf_size,
819 			     void *cpt_buf)
820 {
821 	u32 i;
822 	u32 current_id;
823 	int result = VMCI_SUCCESS;
824 	u32 num_ids = buf_size / sizeof(u32);
825 
826 	if (cpt_type == VMCI_WELLKNOWN_CPT_STATE && num_ids > 0) {
827 		/*
828 		 * We would end up here if VMX with VM to VM communication
829 		 * attempts to restore a checkpoint with wellknown handles.
830 		 */
831 		pr_warn("Attempt to restore checkpoint with obsolete wellknown handles\n");
832 		return VMCI_ERROR_OBSOLETE;
833 	}
834 
835 	if (cpt_type != VMCI_NOTIFICATION_CPT_STATE) {
836 		pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
837 		return VMCI_ERROR_INVALID_ARGS;
838 	}
839 
840 	for (i = 0; i < num_ids && result == VMCI_SUCCESS; i++) {
841 		current_id = ((u32 *)cpt_buf)[i];
842 		result = vmci_ctx_add_notification(context_id, current_id);
843 		if (result != VMCI_SUCCESS)
844 			break;
845 	}
846 	if (result != VMCI_SUCCESS)
847 		pr_devel("Failed to set cpt state (type=%d) (error=%d)\n",
848 			 cpt_type, result);
849 
850 	return result;
851 }
852 
853 /*
854  * Retrieves the specified context's pending notifications in the
855  * form of a handle array. The handle arrays returned are the
856  * actual data - not a copy and should not be modified by the
857  * caller. They must be released using
858  * vmci_ctx_rcv_notifications_release.
859  */
860 int vmci_ctx_rcv_notifications_get(u32 context_id,
861 				   struct vmci_handle_arr **db_handle_array,
862 				   struct vmci_handle_arr **qp_handle_array)
863 {
864 	struct vmci_ctx *context;
865 	int result = VMCI_SUCCESS;
866 
867 	context = vmci_ctx_get(context_id);
868 	if (context == NULL)
869 		return VMCI_ERROR_NOT_FOUND;
870 
871 	spin_lock(&context->lock);
872 
873 	*db_handle_array = context->pending_doorbell_array;
874 	context->pending_doorbell_array =
875 		vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT);
876 	if (!context->pending_doorbell_array) {
877 		context->pending_doorbell_array = *db_handle_array;
878 		*db_handle_array = NULL;
879 		result = VMCI_ERROR_NO_MEM;
880 	}
881 	*qp_handle_array = NULL;
882 
883 	spin_unlock(&context->lock);
884 	vmci_ctx_put(context);
885 
886 	return result;
887 }
888 
889 /*
890  * Releases handle arrays with pending notifications previously
891  * retrieved using vmci_ctx_rcv_notifications_get. If the
892  * notifications were not successfully handed over to the guest,
893  * success must be false.
894  */
895 void vmci_ctx_rcv_notifications_release(u32 context_id,
896 					struct vmci_handle_arr *db_handle_array,
897 					struct vmci_handle_arr *qp_handle_array,
898 					bool success)
899 {
900 	struct vmci_ctx *context = vmci_ctx_get(context_id);
901 
902 	spin_lock(&context->lock);
903 	if (!success) {
904 		struct vmci_handle handle;
905 
906 		/*
907 		 * New notifications may have been added while we were not
908 		 * holding the context lock, so we transfer any new pending
909 		 * doorbell notifications to the old array, and reinstate the
910 		 * old array.
911 		 */
912 
913 		handle = vmci_handle_arr_remove_tail(
914 					context->pending_doorbell_array);
915 		while (!vmci_handle_is_invalid(handle)) {
916 			if (!vmci_handle_arr_has_entry(db_handle_array,
917 						       handle)) {
918 				vmci_handle_arr_append_entry(
919 						&db_handle_array, handle);
920 			}
921 			handle = vmci_handle_arr_remove_tail(
922 					context->pending_doorbell_array);
923 		}
924 		vmci_handle_arr_destroy(context->pending_doorbell_array);
925 		context->pending_doorbell_array = db_handle_array;
926 		db_handle_array = NULL;
927 	} else {
928 		ctx_clear_notify_call(context);
929 	}
930 	spin_unlock(&context->lock);
931 	vmci_ctx_put(context);
932 
933 	if (db_handle_array)
934 		vmci_handle_arr_destroy(db_handle_array);
935 
936 	if (qp_handle_array)
937 		vmci_handle_arr_destroy(qp_handle_array);
938 }
939 
940 /*
941  * Registers that a new doorbell handle has been allocated by the
942  * context. Only doorbell handles registered can be notified.
943  */
944 int vmci_ctx_dbell_create(u32 context_id, struct vmci_handle handle)
945 {
946 	struct vmci_ctx *context;
947 	int result;
948 
949 	if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
950 		return VMCI_ERROR_INVALID_ARGS;
951 
952 	context = vmci_ctx_get(context_id);
953 	if (context == NULL)
954 		return VMCI_ERROR_NOT_FOUND;
955 
956 	spin_lock(&context->lock);
957 	if (!vmci_handle_arr_has_entry(context->doorbell_array, handle))
958 		result = vmci_handle_arr_append_entry(&context->doorbell_array,
959 						      handle);
960 	else
961 		result = VMCI_ERROR_DUPLICATE_ENTRY;
962 
963 	spin_unlock(&context->lock);
964 	vmci_ctx_put(context);
965 
966 	return result;
967 }
968 
969 /*
970  * Unregisters a doorbell handle that was previously registered
971  * with vmci_ctx_dbell_create.
972  */
973 int vmci_ctx_dbell_destroy(u32 context_id, struct vmci_handle handle)
974 {
975 	struct vmci_ctx *context;
976 	struct vmci_handle removed_handle;
977 
978 	if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
979 		return VMCI_ERROR_INVALID_ARGS;
980 
981 	context = vmci_ctx_get(context_id);
982 	if (context == NULL)
983 		return VMCI_ERROR_NOT_FOUND;
984 
985 	spin_lock(&context->lock);
986 	removed_handle =
987 	    vmci_handle_arr_remove_entry(context->doorbell_array, handle);
988 	vmci_handle_arr_remove_entry(context->pending_doorbell_array, handle);
989 	spin_unlock(&context->lock);
990 
991 	vmci_ctx_put(context);
992 
993 	return vmci_handle_is_invalid(removed_handle) ?
994 	    VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
995 }
996 
997 /*
998  * Unregisters all doorbell handles that were previously
999  * registered with vmci_ctx_dbell_create.
1000  */
1001 int vmci_ctx_dbell_destroy_all(u32 context_id)
1002 {
1003 	struct vmci_ctx *context;
1004 	struct vmci_handle handle;
1005 
1006 	if (context_id == VMCI_INVALID_ID)
1007 		return VMCI_ERROR_INVALID_ARGS;
1008 
1009 	context = vmci_ctx_get(context_id);
1010 	if (context == NULL)
1011 		return VMCI_ERROR_NOT_FOUND;
1012 
1013 	spin_lock(&context->lock);
1014 	do {
1015 		struct vmci_handle_arr *arr = context->doorbell_array;
1016 		handle = vmci_handle_arr_remove_tail(arr);
1017 	} while (!vmci_handle_is_invalid(handle));
1018 	do {
1019 		struct vmci_handle_arr *arr = context->pending_doorbell_array;
1020 		handle = vmci_handle_arr_remove_tail(arr);
1021 	} while (!vmci_handle_is_invalid(handle));
1022 	spin_unlock(&context->lock);
1023 
1024 	vmci_ctx_put(context);
1025 
1026 	return VMCI_SUCCESS;
1027 }
1028 
1029 /*
1030  * Registers a notification of a doorbell handle initiated by the
1031  * specified source context. The notification of doorbells are
1032  * subject to the same isolation rules as datagram delivery. To
1033  * allow host side senders of notifications a finer granularity
1034  * of sender rights than those assigned to the sending context
1035  * itself, the host context is required to specify a different
1036  * set of privilege flags that will override the privileges of
1037  * the source context.
1038  */
1039 int vmci_ctx_notify_dbell(u32 src_cid,
1040 			  struct vmci_handle handle,
1041 			  u32 src_priv_flags)
1042 {
1043 	struct vmci_ctx *dst_context;
1044 	int result;
1045 
1046 	if (vmci_handle_is_invalid(handle))
1047 		return VMCI_ERROR_INVALID_ARGS;
1048 
1049 	/* Get the target VM's VMCI context. */
1050 	dst_context = vmci_ctx_get(handle.context);
1051 	if (!dst_context) {
1052 		pr_devel("Invalid context (ID=0x%x)\n", handle.context);
1053 		return VMCI_ERROR_NOT_FOUND;
1054 	}
1055 
1056 	if (src_cid != handle.context) {
1057 		u32 dst_priv_flags;
1058 
1059 		if (VMCI_CONTEXT_IS_VM(src_cid) &&
1060 		    VMCI_CONTEXT_IS_VM(handle.context)) {
1061 			pr_devel("Doorbell notification from VM to VM not supported (src=0x%x, dst=0x%x)\n",
1062 				 src_cid, handle.context);
1063 			result = VMCI_ERROR_DST_UNREACHABLE;
1064 			goto out;
1065 		}
1066 
1067 		result = vmci_dbell_get_priv_flags(handle, &dst_priv_flags);
1068 		if (result < VMCI_SUCCESS) {
1069 			pr_warn("Failed to get privilege flags for destination (handle=0x%x:0x%x)\n",
1070 				handle.context, handle.resource);
1071 			goto out;
1072 		}
1073 
1074 		if (src_cid != VMCI_HOST_CONTEXT_ID ||
1075 		    src_priv_flags == VMCI_NO_PRIVILEGE_FLAGS) {
1076 			src_priv_flags = vmci_context_get_priv_flags(src_cid);
1077 		}
1078 
1079 		if (vmci_deny_interaction(src_priv_flags, dst_priv_flags)) {
1080 			result = VMCI_ERROR_NO_ACCESS;
1081 			goto out;
1082 		}
1083 	}
1084 
1085 	if (handle.context == VMCI_HOST_CONTEXT_ID) {
1086 		result = vmci_dbell_host_context_notify(src_cid, handle);
1087 	} else {
1088 		spin_lock(&dst_context->lock);
1089 
1090 		if (!vmci_handle_arr_has_entry(dst_context->doorbell_array,
1091 					       handle)) {
1092 			result = VMCI_ERROR_NOT_FOUND;
1093 		} else {
1094 			if (!vmci_handle_arr_has_entry(
1095 					dst_context->pending_doorbell_array,
1096 					handle)) {
1097 				result = vmci_handle_arr_append_entry(
1098 					&dst_context->pending_doorbell_array,
1099 					handle);
1100 				if (result == VMCI_SUCCESS) {
1101 					ctx_signal_notify(dst_context);
1102 					wake_up(&dst_context->host_context.wait_queue);
1103 				}
1104 			} else {
1105 				result = VMCI_SUCCESS;
1106 			}
1107 		}
1108 		spin_unlock(&dst_context->lock);
1109 	}
1110 
1111  out:
1112 	vmci_ctx_put(dst_context);
1113 
1114 	return result;
1115 }
1116 
1117 bool vmci_ctx_supports_host_qp(struct vmci_ctx *context)
1118 {
1119 	return context && context->user_version >= VMCI_VERSION_HOSTQP;
1120 }
1121 
1122 /*
1123  * Registers that a new queue pair handle has been allocated by
1124  * the context.
1125  */
1126 int vmci_ctx_qp_create(struct vmci_ctx *context, struct vmci_handle handle)
1127 {
1128 	int result;
1129 
1130 	if (context == NULL || vmci_handle_is_invalid(handle))
1131 		return VMCI_ERROR_INVALID_ARGS;
1132 
1133 	if (!vmci_handle_arr_has_entry(context->queue_pair_array, handle))
1134 		result = vmci_handle_arr_append_entry(
1135 			&context->queue_pair_array, handle);
1136 	else
1137 		result = VMCI_ERROR_DUPLICATE_ENTRY;
1138 
1139 	return result;
1140 }
1141 
1142 /*
1143  * Unregisters a queue pair handle that was previously registered
1144  * with vmci_ctx_qp_create.
1145  */
1146 int vmci_ctx_qp_destroy(struct vmci_ctx *context, struct vmci_handle handle)
1147 {
1148 	struct vmci_handle hndl;
1149 
1150 	if (context == NULL || vmci_handle_is_invalid(handle))
1151 		return VMCI_ERROR_INVALID_ARGS;
1152 
1153 	hndl = vmci_handle_arr_remove_entry(context->queue_pair_array, handle);
1154 
1155 	return vmci_handle_is_invalid(hndl) ?
1156 		VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
1157 }
1158 
1159 /*
1160  * Determines whether a given queue pair handle is registered
1161  * with the given context.
1162  */
1163 bool vmci_ctx_qp_exists(struct vmci_ctx *context, struct vmci_handle handle)
1164 {
1165 	if (context == NULL || vmci_handle_is_invalid(handle))
1166 		return false;
1167 
1168 	return vmci_handle_arr_has_entry(context->queue_pair_array, handle);
1169 }
1170 
1171 /*
1172  * vmci_context_get_priv_flags() - Retrieve privilege flags.
1173  * @context_id: The context ID of the VMCI context.
1174  *
1175  * Retrieves privilege flags of the given VMCI context ID.
1176  */
1177 u32 vmci_context_get_priv_flags(u32 context_id)
1178 {
1179 	if (vmci_host_code_active()) {
1180 		u32 flags;
1181 		struct vmci_ctx *context;
1182 
1183 		context = vmci_ctx_get(context_id);
1184 		if (!context)
1185 			return VMCI_LEAST_PRIVILEGE_FLAGS;
1186 
1187 		flags = context->priv_flags;
1188 		vmci_ctx_put(context);
1189 		return flags;
1190 	}
1191 	return VMCI_NO_PRIVILEGE_FLAGS;
1192 }
1193 EXPORT_SYMBOL_GPL(vmci_context_get_priv_flags);
1194 
1195 /*
1196  * vmci_is_context_owner() - Determimnes if user is the context owner
1197  * @context_id: The context ID of the VMCI context.
1198  * @uid:        The host user id (real kernel value).
1199  *
1200  * Determines whether a given UID is the owner of given VMCI context.
1201  */
1202 bool vmci_is_context_owner(u32 context_id, kuid_t uid)
1203 {
1204 	bool is_owner = false;
1205 
1206 	if (vmci_host_code_active()) {
1207 		struct vmci_ctx *context = vmci_ctx_get(context_id);
1208 		if (context) {
1209 			if (context->cred)
1210 				is_owner = uid_eq(context->cred->uid, uid);
1211 			vmci_ctx_put(context);
1212 		}
1213 	}
1214 
1215 	return is_owner;
1216 }
1217 EXPORT_SYMBOL_GPL(vmci_is_context_owner);
1218