xref: /openbmc/linux/drivers/hv/channel.c (revision e721eb06)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2009, Microsoft Corporation.
4  *
5  * Authors:
6  *   Haiyang Zhang <haiyangz@microsoft.com>
7  *   Hank Janssen  <hjanssen@microsoft.com>
8  */
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10 
11 #include <linux/kernel.h>
12 #include <linux/sched.h>
13 #include <linux/wait.h>
14 #include <linux/mm.h>
15 #include <linux/slab.h>
16 #include <linux/module.h>
17 #include <linux/hyperv.h>
18 #include <linux/uio.h>
19 #include <linux/interrupt.h>
20 #include <asm/page.h>
21 
22 #include "hyperv_vmbus.h"
23 
24 #define NUM_PAGES_SPANNED(addr, len) \
25 ((PAGE_ALIGN(addr + len) >> PAGE_SHIFT) - (addr >> PAGE_SHIFT))
26 
27 static unsigned long virt_to_hvpfn(void *addr)
28 {
29 	phys_addr_t paddr;
30 
31 	if (is_vmalloc_addr(addr))
32 		paddr = page_to_phys(vmalloc_to_page(addr)) +
33 					 offset_in_page(addr);
34 	else
35 		paddr = __pa(addr);
36 
37 	return  paddr >> PAGE_SHIFT;
38 }
39 
40 /*
41  * vmbus_setevent- Trigger an event notification on the specified
42  * channel.
43  */
44 void vmbus_setevent(struct vmbus_channel *channel)
45 {
46 	struct hv_monitor_page *monitorpage;
47 
48 	trace_vmbus_setevent(channel);
49 
50 	/*
51 	 * For channels marked as in "low latency" mode
52 	 * bypass the monitor page mechanism.
53 	 */
54 	if (channel->offermsg.monitor_allocated && !channel->low_latency) {
55 		vmbus_send_interrupt(channel->offermsg.child_relid);
56 
57 		/* Get the child to parent monitor page */
58 		monitorpage = vmbus_connection.monitor_pages[1];
59 
60 		sync_set_bit(channel->monitor_bit,
61 			(unsigned long *)&monitorpage->trigger_group
62 					[channel->monitor_grp].pending);
63 
64 	} else {
65 		vmbus_set_event(channel);
66 	}
67 }
68 EXPORT_SYMBOL_GPL(vmbus_setevent);
69 
70 /* vmbus_free_ring - drop mapping of ring buffer */
71 void vmbus_free_ring(struct vmbus_channel *channel)
72 {
73 	hv_ringbuffer_cleanup(&channel->outbound);
74 	hv_ringbuffer_cleanup(&channel->inbound);
75 
76 	if (channel->ringbuffer_page) {
77 		__free_pages(channel->ringbuffer_page,
78 			     get_order(channel->ringbuffer_pagecount
79 				       << PAGE_SHIFT));
80 		channel->ringbuffer_page = NULL;
81 	}
82 }
83 EXPORT_SYMBOL_GPL(vmbus_free_ring);
84 
85 /* vmbus_alloc_ring - allocate and map pages for ring buffer */
86 int vmbus_alloc_ring(struct vmbus_channel *newchannel,
87 		     u32 send_size, u32 recv_size)
88 {
89 	struct page *page;
90 	int order;
91 
92 	if (send_size % PAGE_SIZE || recv_size % PAGE_SIZE)
93 		return -EINVAL;
94 
95 	/* Allocate the ring buffer */
96 	order = get_order(send_size + recv_size);
97 	page = alloc_pages_node(cpu_to_node(newchannel->target_cpu),
98 				GFP_KERNEL|__GFP_ZERO, order);
99 
100 	if (!page)
101 		page = alloc_pages(GFP_KERNEL|__GFP_ZERO, order);
102 
103 	if (!page)
104 		return -ENOMEM;
105 
106 	newchannel->ringbuffer_page = page;
107 	newchannel->ringbuffer_pagecount = (send_size + recv_size) >> PAGE_SHIFT;
108 	newchannel->ringbuffer_send_offset = send_size >> PAGE_SHIFT;
109 
110 	return 0;
111 }
112 EXPORT_SYMBOL_GPL(vmbus_alloc_ring);
113 
114 static int __vmbus_open(struct vmbus_channel *newchannel,
115 		       void *userdata, u32 userdatalen,
116 		       void (*onchannelcallback)(void *context), void *context)
117 {
118 	struct vmbus_channel_open_channel *open_msg;
119 	struct vmbus_channel_msginfo *open_info = NULL;
120 	struct page *page = newchannel->ringbuffer_page;
121 	u32 send_pages, recv_pages;
122 	unsigned long flags;
123 	int err;
124 
125 	if (userdatalen > MAX_USER_DEFINED_BYTES)
126 		return -EINVAL;
127 
128 	send_pages = newchannel->ringbuffer_send_offset;
129 	recv_pages = newchannel->ringbuffer_pagecount - send_pages;
130 
131 	spin_lock_irqsave(&newchannel->lock, flags);
132 	if (newchannel->state != CHANNEL_OPEN_STATE) {
133 		spin_unlock_irqrestore(&newchannel->lock, flags);
134 		return -EINVAL;
135 	}
136 	spin_unlock_irqrestore(&newchannel->lock, flags);
137 
138 	newchannel->state = CHANNEL_OPENING_STATE;
139 	newchannel->onchannel_callback = onchannelcallback;
140 	newchannel->channel_callback_context = context;
141 
142 	err = hv_ringbuffer_init(&newchannel->outbound, page, send_pages);
143 	if (err)
144 		goto error_clean_ring;
145 
146 	err = hv_ringbuffer_init(&newchannel->inbound,
147 				 &page[send_pages], recv_pages);
148 	if (err)
149 		goto error_clean_ring;
150 
151 	/* Establish the gpadl for the ring buffer */
152 	newchannel->ringbuffer_gpadlhandle = 0;
153 
154 	err = vmbus_establish_gpadl(newchannel,
155 				    page_address(newchannel->ringbuffer_page),
156 				    (send_pages + recv_pages) << PAGE_SHIFT,
157 				    &newchannel->ringbuffer_gpadlhandle);
158 	if (err)
159 		goto error_clean_ring;
160 
161 	/* Create and init the channel open message */
162 	open_info = kmalloc(sizeof(*open_info) +
163 			   sizeof(struct vmbus_channel_open_channel),
164 			   GFP_KERNEL);
165 	if (!open_info) {
166 		err = -ENOMEM;
167 		goto error_free_gpadl;
168 	}
169 
170 	init_completion(&open_info->waitevent);
171 	open_info->waiting_channel = newchannel;
172 
173 	open_msg = (struct vmbus_channel_open_channel *)open_info->msg;
174 	open_msg->header.msgtype = CHANNELMSG_OPENCHANNEL;
175 	open_msg->openid = newchannel->offermsg.child_relid;
176 	open_msg->child_relid = newchannel->offermsg.child_relid;
177 	open_msg->ringbuffer_gpadlhandle = newchannel->ringbuffer_gpadlhandle;
178 	open_msg->downstream_ringbuffer_pageoffset = newchannel->ringbuffer_send_offset;
179 	open_msg->target_vp = newchannel->target_vp;
180 
181 	if (userdatalen)
182 		memcpy(open_msg->userdata, userdata, userdatalen);
183 
184 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
185 	list_add_tail(&open_info->msglistentry,
186 		      &vmbus_connection.chn_msg_list);
187 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
188 
189 	if (newchannel->rescind) {
190 		err = -ENODEV;
191 		goto error_free_info;
192 	}
193 
194 	err = vmbus_post_msg(open_msg,
195 			     sizeof(struct vmbus_channel_open_channel), true);
196 
197 	trace_vmbus_open(open_msg, err);
198 
199 	if (err != 0)
200 		goto error_clean_msglist;
201 
202 	wait_for_completion(&open_info->waitevent);
203 
204 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
205 	list_del(&open_info->msglistentry);
206 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
207 
208 	if (newchannel->rescind) {
209 		err = -ENODEV;
210 		goto error_free_info;
211 	}
212 
213 	if (open_info->response.open_result.status) {
214 		err = -EAGAIN;
215 		goto error_free_info;
216 	}
217 
218 	newchannel->state = CHANNEL_OPENED_STATE;
219 	kfree(open_info);
220 	return 0;
221 
222 error_clean_msglist:
223 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
224 	list_del(&open_info->msglistentry);
225 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
226 error_free_info:
227 	kfree(open_info);
228 error_free_gpadl:
229 	vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle);
230 	newchannel->ringbuffer_gpadlhandle = 0;
231 error_clean_ring:
232 	hv_ringbuffer_cleanup(&newchannel->outbound);
233 	hv_ringbuffer_cleanup(&newchannel->inbound);
234 	newchannel->state = CHANNEL_OPEN_STATE;
235 	return err;
236 }
237 
238 /*
239  * vmbus_connect_ring - Open the channel but reuse ring buffer
240  */
241 int vmbus_connect_ring(struct vmbus_channel *newchannel,
242 		       void (*onchannelcallback)(void *context), void *context)
243 {
244 	return  __vmbus_open(newchannel, NULL, 0, onchannelcallback, context);
245 }
246 EXPORT_SYMBOL_GPL(vmbus_connect_ring);
247 
248 /*
249  * vmbus_open - Open the specified channel.
250  */
251 int vmbus_open(struct vmbus_channel *newchannel,
252 	       u32 send_ringbuffer_size, u32 recv_ringbuffer_size,
253 	       void *userdata, u32 userdatalen,
254 	       void (*onchannelcallback)(void *context), void *context)
255 {
256 	int err;
257 
258 	err = vmbus_alloc_ring(newchannel, send_ringbuffer_size,
259 			       recv_ringbuffer_size);
260 	if (err)
261 		return err;
262 
263 	err = __vmbus_open(newchannel, userdata, userdatalen,
264 			   onchannelcallback, context);
265 	if (err)
266 		vmbus_free_ring(newchannel);
267 
268 	return err;
269 }
270 EXPORT_SYMBOL_GPL(vmbus_open);
271 
272 /* Used for Hyper-V Socket: a guest client's connect() to the host */
273 int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id,
274 				  const guid_t *shv_host_servie_id)
275 {
276 	struct vmbus_channel_tl_connect_request conn_msg;
277 	int ret;
278 
279 	memset(&conn_msg, 0, sizeof(conn_msg));
280 	conn_msg.header.msgtype = CHANNELMSG_TL_CONNECT_REQUEST;
281 	conn_msg.guest_endpoint_id = *shv_guest_servie_id;
282 	conn_msg.host_service_id = *shv_host_servie_id;
283 
284 	ret = vmbus_post_msg(&conn_msg, sizeof(conn_msg), true);
285 
286 	trace_vmbus_send_tl_connect_request(&conn_msg, ret);
287 
288 	return ret;
289 }
290 EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request);
291 
292 /*
293  * Set/change the vCPU (@target_vp) the channel (@child_relid) will interrupt.
294  *
295  * CHANNELMSG_MODIFYCHANNEL messages are aynchronous.  Also, Hyper-V does not
296  * ACK such messages.  IOW we can't know when the host will stop interrupting
297  * the "old" vCPU and start interrupting the "new" vCPU for the given channel.
298  *
299  * The CHANNELMSG_MODIFYCHANNEL message type is supported since VMBus version
300  * VERSION_WIN10_V4_1.
301  */
302 int vmbus_send_modifychannel(u32 child_relid, u32 target_vp)
303 {
304 	struct vmbus_channel_modifychannel conn_msg;
305 	int ret;
306 
307 	memset(&conn_msg, 0, sizeof(conn_msg));
308 	conn_msg.header.msgtype = CHANNELMSG_MODIFYCHANNEL;
309 	conn_msg.child_relid = child_relid;
310 	conn_msg.target_vp = target_vp;
311 
312 	ret = vmbus_post_msg(&conn_msg, sizeof(conn_msg), true);
313 
314 	trace_vmbus_send_modifychannel(&conn_msg, ret);
315 
316 	return ret;
317 }
318 EXPORT_SYMBOL_GPL(vmbus_send_modifychannel);
319 
320 /*
321  * create_gpadl_header - Creates a gpadl for the specified buffer
322  */
323 static int create_gpadl_header(void *kbuffer, u32 size,
324 			       struct vmbus_channel_msginfo **msginfo)
325 {
326 	int i;
327 	int pagecount;
328 	struct vmbus_channel_gpadl_header *gpadl_header;
329 	struct vmbus_channel_gpadl_body *gpadl_body;
330 	struct vmbus_channel_msginfo *msgheader;
331 	struct vmbus_channel_msginfo *msgbody = NULL;
332 	u32 msgsize;
333 
334 	int pfnsum, pfncount, pfnleft, pfncurr, pfnsize;
335 
336 	pagecount = size >> PAGE_SHIFT;
337 
338 	/* do we need a gpadl body msg */
339 	pfnsize = MAX_SIZE_CHANNEL_MESSAGE -
340 		  sizeof(struct vmbus_channel_gpadl_header) -
341 		  sizeof(struct gpa_range);
342 	pfncount = pfnsize / sizeof(u64);
343 
344 	if (pagecount > pfncount) {
345 		/* we need a gpadl body */
346 		/* fill in the header */
347 		msgsize = sizeof(struct vmbus_channel_msginfo) +
348 			  sizeof(struct vmbus_channel_gpadl_header) +
349 			  sizeof(struct gpa_range) + pfncount * sizeof(u64);
350 		msgheader =  kzalloc(msgsize, GFP_KERNEL);
351 		if (!msgheader)
352 			goto nomem;
353 
354 		INIT_LIST_HEAD(&msgheader->submsglist);
355 		msgheader->msgsize = msgsize;
356 
357 		gpadl_header = (struct vmbus_channel_gpadl_header *)
358 			msgheader->msg;
359 		gpadl_header->rangecount = 1;
360 		gpadl_header->range_buflen = sizeof(struct gpa_range) +
361 					 pagecount * sizeof(u64);
362 		gpadl_header->range[0].byte_offset = 0;
363 		gpadl_header->range[0].byte_count = size;
364 		for (i = 0; i < pfncount; i++)
365 			gpadl_header->range[0].pfn_array[i] = virt_to_hvpfn(
366 				kbuffer + PAGE_SIZE * i);
367 		*msginfo = msgheader;
368 
369 		pfnsum = pfncount;
370 		pfnleft = pagecount - pfncount;
371 
372 		/* how many pfns can we fit */
373 		pfnsize = MAX_SIZE_CHANNEL_MESSAGE -
374 			  sizeof(struct vmbus_channel_gpadl_body);
375 		pfncount = pfnsize / sizeof(u64);
376 
377 		/* fill in the body */
378 		while (pfnleft) {
379 			if (pfnleft > pfncount)
380 				pfncurr = pfncount;
381 			else
382 				pfncurr = pfnleft;
383 
384 			msgsize = sizeof(struct vmbus_channel_msginfo) +
385 				  sizeof(struct vmbus_channel_gpadl_body) +
386 				  pfncurr * sizeof(u64);
387 			msgbody = kzalloc(msgsize, GFP_KERNEL);
388 
389 			if (!msgbody) {
390 				struct vmbus_channel_msginfo *pos = NULL;
391 				struct vmbus_channel_msginfo *tmp = NULL;
392 				/*
393 				 * Free up all the allocated messages.
394 				 */
395 				list_for_each_entry_safe(pos, tmp,
396 					&msgheader->submsglist,
397 					msglistentry) {
398 
399 					list_del(&pos->msglistentry);
400 					kfree(pos);
401 				}
402 
403 				goto nomem;
404 			}
405 
406 			msgbody->msgsize = msgsize;
407 			gpadl_body =
408 				(struct vmbus_channel_gpadl_body *)msgbody->msg;
409 
410 			/*
411 			 * Gpadl is u32 and we are using a pointer which could
412 			 * be 64-bit
413 			 * This is governed by the guest/host protocol and
414 			 * so the hypervisor guarantees that this is ok.
415 			 */
416 			for (i = 0; i < pfncurr; i++)
417 				gpadl_body->pfn[i] = virt_to_hvpfn(
418 					kbuffer + PAGE_SIZE * (pfnsum + i));
419 
420 			/* add to msg header */
421 			list_add_tail(&msgbody->msglistentry,
422 				      &msgheader->submsglist);
423 			pfnsum += pfncurr;
424 			pfnleft -= pfncurr;
425 		}
426 	} else {
427 		/* everything fits in a header */
428 		msgsize = sizeof(struct vmbus_channel_msginfo) +
429 			  sizeof(struct vmbus_channel_gpadl_header) +
430 			  sizeof(struct gpa_range) + pagecount * sizeof(u64);
431 		msgheader = kzalloc(msgsize, GFP_KERNEL);
432 		if (msgheader == NULL)
433 			goto nomem;
434 
435 		INIT_LIST_HEAD(&msgheader->submsglist);
436 		msgheader->msgsize = msgsize;
437 
438 		gpadl_header = (struct vmbus_channel_gpadl_header *)
439 			msgheader->msg;
440 		gpadl_header->rangecount = 1;
441 		gpadl_header->range_buflen = sizeof(struct gpa_range) +
442 					 pagecount * sizeof(u64);
443 		gpadl_header->range[0].byte_offset = 0;
444 		gpadl_header->range[0].byte_count = size;
445 		for (i = 0; i < pagecount; i++)
446 			gpadl_header->range[0].pfn_array[i] = virt_to_hvpfn(
447 				kbuffer + PAGE_SIZE * i);
448 
449 		*msginfo = msgheader;
450 	}
451 
452 	return 0;
453 nomem:
454 	kfree(msgheader);
455 	kfree(msgbody);
456 	return -ENOMEM;
457 }
458 
459 /*
460  * vmbus_establish_gpadl - Establish a GPADL for the specified buffer
461  *
462  * @channel: a channel
463  * @kbuffer: from kmalloc or vmalloc
464  * @size: page-size multiple
465  * @gpadl_handle: some funky thing
466  */
467 int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
468 			       u32 size, u32 *gpadl_handle)
469 {
470 	struct vmbus_channel_gpadl_header *gpadlmsg;
471 	struct vmbus_channel_gpadl_body *gpadl_body;
472 	struct vmbus_channel_msginfo *msginfo = NULL;
473 	struct vmbus_channel_msginfo *submsginfo, *tmp;
474 	struct list_head *curr;
475 	u32 next_gpadl_handle;
476 	unsigned long flags;
477 	int ret = 0;
478 
479 	next_gpadl_handle =
480 		(atomic_inc_return(&vmbus_connection.next_gpadl_handle) - 1);
481 
482 	ret = create_gpadl_header(kbuffer, size, &msginfo);
483 	if (ret)
484 		return ret;
485 
486 	init_completion(&msginfo->waitevent);
487 	msginfo->waiting_channel = channel;
488 
489 	gpadlmsg = (struct vmbus_channel_gpadl_header *)msginfo->msg;
490 	gpadlmsg->header.msgtype = CHANNELMSG_GPADL_HEADER;
491 	gpadlmsg->child_relid = channel->offermsg.child_relid;
492 	gpadlmsg->gpadl = next_gpadl_handle;
493 
494 
495 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
496 	list_add_tail(&msginfo->msglistentry,
497 		      &vmbus_connection.chn_msg_list);
498 
499 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
500 
501 	if (channel->rescind) {
502 		ret = -ENODEV;
503 		goto cleanup;
504 	}
505 
506 	ret = vmbus_post_msg(gpadlmsg, msginfo->msgsize -
507 			     sizeof(*msginfo), true);
508 
509 	trace_vmbus_establish_gpadl_header(gpadlmsg, ret);
510 
511 	if (ret != 0)
512 		goto cleanup;
513 
514 	list_for_each(curr, &msginfo->submsglist) {
515 		submsginfo = (struct vmbus_channel_msginfo *)curr;
516 		gpadl_body =
517 			(struct vmbus_channel_gpadl_body *)submsginfo->msg;
518 
519 		gpadl_body->header.msgtype =
520 			CHANNELMSG_GPADL_BODY;
521 		gpadl_body->gpadl = next_gpadl_handle;
522 
523 		ret = vmbus_post_msg(gpadl_body,
524 				     submsginfo->msgsize - sizeof(*submsginfo),
525 				     true);
526 
527 		trace_vmbus_establish_gpadl_body(gpadl_body, ret);
528 
529 		if (ret != 0)
530 			goto cleanup;
531 
532 	}
533 	wait_for_completion(&msginfo->waitevent);
534 
535 	if (msginfo->response.gpadl_created.creation_status != 0) {
536 		pr_err("Failed to establish GPADL: err = 0x%x\n",
537 		       msginfo->response.gpadl_created.creation_status);
538 
539 		ret = -EDQUOT;
540 		goto cleanup;
541 	}
542 
543 	if (channel->rescind) {
544 		ret = -ENODEV;
545 		goto cleanup;
546 	}
547 
548 	/* At this point, we received the gpadl created msg */
549 	*gpadl_handle = gpadlmsg->gpadl;
550 
551 cleanup:
552 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
553 	list_del(&msginfo->msglistentry);
554 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
555 	list_for_each_entry_safe(submsginfo, tmp, &msginfo->submsglist,
556 				 msglistentry) {
557 		kfree(submsginfo);
558 	}
559 
560 	kfree(msginfo);
561 	return ret;
562 }
563 EXPORT_SYMBOL_GPL(vmbus_establish_gpadl);
564 
565 /*
566  * vmbus_teardown_gpadl -Teardown the specified GPADL handle
567  */
568 int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle)
569 {
570 	struct vmbus_channel_gpadl_teardown *msg;
571 	struct vmbus_channel_msginfo *info;
572 	unsigned long flags;
573 	int ret;
574 
575 	info = kmalloc(sizeof(*info) +
576 		       sizeof(struct vmbus_channel_gpadl_teardown), GFP_KERNEL);
577 	if (!info)
578 		return -ENOMEM;
579 
580 	init_completion(&info->waitevent);
581 	info->waiting_channel = channel;
582 
583 	msg = (struct vmbus_channel_gpadl_teardown *)info->msg;
584 
585 	msg->header.msgtype = CHANNELMSG_GPADL_TEARDOWN;
586 	msg->child_relid = channel->offermsg.child_relid;
587 	msg->gpadl = gpadl_handle;
588 
589 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
590 	list_add_tail(&info->msglistentry,
591 		      &vmbus_connection.chn_msg_list);
592 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
593 
594 	if (channel->rescind)
595 		goto post_msg_err;
596 
597 	ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_gpadl_teardown),
598 			     true);
599 
600 	trace_vmbus_teardown_gpadl(msg, ret);
601 
602 	if (ret)
603 		goto post_msg_err;
604 
605 	wait_for_completion(&info->waitevent);
606 
607 post_msg_err:
608 	/*
609 	 * If the channel has been rescinded;
610 	 * we will be awakened by the rescind
611 	 * handler; set the error code to zero so we don't leak memory.
612 	 */
613 	if (channel->rescind)
614 		ret = 0;
615 
616 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
617 	list_del(&info->msglistentry);
618 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
619 
620 	kfree(info);
621 	return ret;
622 }
623 EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl);
624 
625 void vmbus_reset_channel_cb(struct vmbus_channel *channel)
626 {
627 	unsigned long flags;
628 
629 	/*
630 	 * vmbus_on_event(), running in the per-channel tasklet, can race
631 	 * with vmbus_close_internal() in the case of SMP guest, e.g., when
632 	 * the former is accessing channel->inbound.ring_buffer, the latter
633 	 * could be freeing the ring_buffer pages, so here we must stop it
634 	 * first.
635 	 *
636 	 * vmbus_chan_sched() might call the netvsc driver callback function
637 	 * that ends up scheduling NAPI work that accesses the ring buffer.
638 	 * At this point, we have to ensure that any such work is completed
639 	 * and that the channel ring buffer is no longer being accessed, cf.
640 	 * the calls to napi_disable() in netvsc_device_remove().
641 	 */
642 	tasklet_disable(&channel->callback_event);
643 
644 	/* See the inline comments in vmbus_chan_sched(). */
645 	spin_lock_irqsave(&channel->sched_lock, flags);
646 	channel->onchannel_callback = NULL;
647 	spin_unlock_irqrestore(&channel->sched_lock, flags);
648 
649 	channel->sc_creation_callback = NULL;
650 
651 	/* Re-enable tasklet for use on re-open */
652 	tasklet_enable(&channel->callback_event);
653 }
654 
655 static int vmbus_close_internal(struct vmbus_channel *channel)
656 {
657 	struct vmbus_channel_close_channel *msg;
658 	int ret;
659 
660 	vmbus_reset_channel_cb(channel);
661 
662 	/*
663 	 * In case a device driver's probe() fails (e.g.,
664 	 * util_probe() -> vmbus_open() returns -ENOMEM) and the device is
665 	 * rescinded later (e.g., we dynamically disable an Integrated Service
666 	 * in Hyper-V Manager), the driver's remove() invokes vmbus_close():
667 	 * here we should skip most of the below cleanup work.
668 	 */
669 	if (channel->state != CHANNEL_OPENED_STATE)
670 		return -EINVAL;
671 
672 	channel->state = CHANNEL_OPEN_STATE;
673 
674 	/* Send a closing message */
675 
676 	msg = &channel->close_msg.msg;
677 
678 	msg->header.msgtype = CHANNELMSG_CLOSECHANNEL;
679 	msg->child_relid = channel->offermsg.child_relid;
680 
681 	ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_close_channel),
682 			     true);
683 
684 	trace_vmbus_close_internal(msg, ret);
685 
686 	if (ret) {
687 		pr_err("Close failed: close post msg return is %d\n", ret);
688 		/*
689 		 * If we failed to post the close msg,
690 		 * it is perhaps better to leak memory.
691 		 */
692 	}
693 
694 	/* Tear down the gpadl for the channel's ring buffer */
695 	else if (channel->ringbuffer_gpadlhandle) {
696 		ret = vmbus_teardown_gpadl(channel,
697 					   channel->ringbuffer_gpadlhandle);
698 		if (ret) {
699 			pr_err("Close failed: teardown gpadl return %d\n", ret);
700 			/*
701 			 * If we failed to teardown gpadl,
702 			 * it is perhaps better to leak memory.
703 			 */
704 		}
705 
706 		channel->ringbuffer_gpadlhandle = 0;
707 	}
708 
709 	return ret;
710 }
711 
712 /* disconnect ring - close all channels */
713 int vmbus_disconnect_ring(struct vmbus_channel *channel)
714 {
715 	struct vmbus_channel *cur_channel, *tmp;
716 	int ret;
717 
718 	if (channel->primary_channel != NULL)
719 		return -EINVAL;
720 
721 	list_for_each_entry_safe(cur_channel, tmp, &channel->sc_list, sc_list) {
722 		if (cur_channel->rescind)
723 			wait_for_completion(&cur_channel->rescind_event);
724 
725 		mutex_lock(&vmbus_connection.channel_mutex);
726 		if (vmbus_close_internal(cur_channel) == 0) {
727 			vmbus_free_ring(cur_channel);
728 
729 			if (cur_channel->rescind)
730 				hv_process_channel_removal(cur_channel);
731 		}
732 		mutex_unlock(&vmbus_connection.channel_mutex);
733 	}
734 
735 	/*
736 	 * Now close the primary.
737 	 */
738 	mutex_lock(&vmbus_connection.channel_mutex);
739 	ret = vmbus_close_internal(channel);
740 	mutex_unlock(&vmbus_connection.channel_mutex);
741 
742 	return ret;
743 }
744 EXPORT_SYMBOL_GPL(vmbus_disconnect_ring);
745 
746 /*
747  * vmbus_close - Close the specified channel
748  */
749 void vmbus_close(struct vmbus_channel *channel)
750 {
751 	if (vmbus_disconnect_ring(channel) == 0)
752 		vmbus_free_ring(channel);
753 }
754 EXPORT_SYMBOL_GPL(vmbus_close);
755 
756 /**
757  * vmbus_sendpacket() - Send the specified buffer on the given channel
758  * @channel: Pointer to vmbus_channel structure
759  * @buffer: Pointer to the buffer you want to send the data from.
760  * @bufferlen: Maximum size of what the buffer holds.
761  * @requestid: Identifier of the request
762  * @type: Type of packet that is being sent e.g. negotiate, time
763  *	  packet etc.
764  * @flags: 0 or VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED
765  *
766  * Sends data in @buffer directly to Hyper-V via the vmbus.
767  * This will send the data unparsed to Hyper-V.
768  *
769  * Mainly used by Hyper-V drivers.
770  */
771 int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer,
772 			   u32 bufferlen, u64 requestid,
773 			   enum vmbus_packet_type type, u32 flags)
774 {
775 	struct vmpacket_descriptor desc;
776 	u32 packetlen = sizeof(struct vmpacket_descriptor) + bufferlen;
777 	u32 packetlen_aligned = ALIGN(packetlen, sizeof(u64));
778 	struct kvec bufferlist[3];
779 	u64 aligned_data = 0;
780 	int num_vecs = ((bufferlen != 0) ? 3 : 1);
781 
782 
783 	/* Setup the descriptor */
784 	desc.type = type; /* VmbusPacketTypeDataInBand; */
785 	desc.flags = flags; /* VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; */
786 	/* in 8-bytes granularity */
787 	desc.offset8 = sizeof(struct vmpacket_descriptor) >> 3;
788 	desc.len8 = (u16)(packetlen_aligned >> 3);
789 	desc.trans_id = requestid;
790 
791 	bufferlist[0].iov_base = &desc;
792 	bufferlist[0].iov_len = sizeof(struct vmpacket_descriptor);
793 	bufferlist[1].iov_base = buffer;
794 	bufferlist[1].iov_len = bufferlen;
795 	bufferlist[2].iov_base = &aligned_data;
796 	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
797 
798 	return hv_ringbuffer_write(channel, bufferlist, num_vecs);
799 }
800 EXPORT_SYMBOL(vmbus_sendpacket);
801 
802 /*
803  * vmbus_sendpacket_pagebuffer - Send a range of single-page buffer
804  * packets using a GPADL Direct packet type. This interface allows you
805  * to control notifying the host. This will be useful for sending
806  * batched data. Also the sender can control the send flags
807  * explicitly.
808  */
809 int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
810 				struct hv_page_buffer pagebuffers[],
811 				u32 pagecount, void *buffer, u32 bufferlen,
812 				u64 requestid)
813 {
814 	int i;
815 	struct vmbus_channel_packet_page_buffer desc;
816 	u32 descsize;
817 	u32 packetlen;
818 	u32 packetlen_aligned;
819 	struct kvec bufferlist[3];
820 	u64 aligned_data = 0;
821 
822 	if (pagecount > MAX_PAGE_BUFFER_COUNT)
823 		return -EINVAL;
824 
825 	/*
826 	 * Adjust the size down since vmbus_channel_packet_page_buffer is the
827 	 * largest size we support
828 	 */
829 	descsize = sizeof(struct vmbus_channel_packet_page_buffer) -
830 			  ((MAX_PAGE_BUFFER_COUNT - pagecount) *
831 			  sizeof(struct hv_page_buffer));
832 	packetlen = descsize + bufferlen;
833 	packetlen_aligned = ALIGN(packetlen, sizeof(u64));
834 
835 	/* Setup the descriptor */
836 	desc.type = VM_PKT_DATA_USING_GPA_DIRECT;
837 	desc.flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
838 	desc.dataoffset8 = descsize >> 3; /* in 8-bytes granularity */
839 	desc.length8 = (u16)(packetlen_aligned >> 3);
840 	desc.transactionid = requestid;
841 	desc.reserved = 0;
842 	desc.rangecount = pagecount;
843 
844 	for (i = 0; i < pagecount; i++) {
845 		desc.range[i].len = pagebuffers[i].len;
846 		desc.range[i].offset = pagebuffers[i].offset;
847 		desc.range[i].pfn	 = pagebuffers[i].pfn;
848 	}
849 
850 	bufferlist[0].iov_base = &desc;
851 	bufferlist[0].iov_len = descsize;
852 	bufferlist[1].iov_base = buffer;
853 	bufferlist[1].iov_len = bufferlen;
854 	bufferlist[2].iov_base = &aligned_data;
855 	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
856 
857 	return hv_ringbuffer_write(channel, bufferlist, 3);
858 }
859 EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer);
860 
861 /*
862  * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet
863  * using a GPADL Direct packet type.
864  * The buffer includes the vmbus descriptor.
865  */
866 int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
867 			      struct vmbus_packet_mpb_array *desc,
868 			      u32 desc_size,
869 			      void *buffer, u32 bufferlen, u64 requestid)
870 {
871 	u32 packetlen;
872 	u32 packetlen_aligned;
873 	struct kvec bufferlist[3];
874 	u64 aligned_data = 0;
875 
876 	packetlen = desc_size + bufferlen;
877 	packetlen_aligned = ALIGN(packetlen, sizeof(u64));
878 
879 	/* Setup the descriptor */
880 	desc->type = VM_PKT_DATA_USING_GPA_DIRECT;
881 	desc->flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
882 	desc->dataoffset8 = desc_size >> 3; /* in 8-bytes granularity */
883 	desc->length8 = (u16)(packetlen_aligned >> 3);
884 	desc->transactionid = requestid;
885 	desc->reserved = 0;
886 	desc->rangecount = 1;
887 
888 	bufferlist[0].iov_base = desc;
889 	bufferlist[0].iov_len = desc_size;
890 	bufferlist[1].iov_base = buffer;
891 	bufferlist[1].iov_len = bufferlen;
892 	bufferlist[2].iov_base = &aligned_data;
893 	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
894 
895 	return hv_ringbuffer_write(channel, bufferlist, 3);
896 }
897 EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc);
898 
899 /**
900  * __vmbus_recvpacket() - Retrieve the user packet on the specified channel
901  * @channel: Pointer to vmbus_channel structure
902  * @buffer: Pointer to the buffer you want to receive the data into.
903  * @bufferlen: Maximum size of what the buffer can hold.
904  * @buffer_actual_len: The actual size of the data after it was received.
905  * @requestid: Identifier of the request
906  * @raw: true means keep the vmpacket_descriptor header in the received data.
907  *
908  * Receives directly from the hyper-v vmbus and puts the data it received
909  * into Buffer. This will receive the data unparsed from hyper-v.
910  *
911  * Mainly used by Hyper-V drivers.
912  */
913 static inline int
914 __vmbus_recvpacket(struct vmbus_channel *channel, void *buffer,
915 		   u32 bufferlen, u32 *buffer_actual_len, u64 *requestid,
916 		   bool raw)
917 {
918 	return hv_ringbuffer_read(channel, buffer, bufferlen,
919 				  buffer_actual_len, requestid, raw);
920 
921 }
922 
923 int vmbus_recvpacket(struct vmbus_channel *channel, void *buffer,
924 		     u32 bufferlen, u32 *buffer_actual_len,
925 		     u64 *requestid)
926 {
927 	return __vmbus_recvpacket(channel, buffer, bufferlen,
928 				  buffer_actual_len, requestid, false);
929 }
930 EXPORT_SYMBOL(vmbus_recvpacket);
931 
932 /*
933  * vmbus_recvpacket_raw - Retrieve the raw packet on the specified channel
934  */
935 int vmbus_recvpacket_raw(struct vmbus_channel *channel, void *buffer,
936 			      u32 bufferlen, u32 *buffer_actual_len,
937 			      u64 *requestid)
938 {
939 	return __vmbus_recvpacket(channel, buffer, bufferlen,
940 				  buffer_actual_len, requestid, true);
941 }
942 EXPORT_SYMBOL_GPL(vmbus_recvpacket_raw);
943