xref: /openbmc/linux/drivers/hv/channel_mgmt.c (revision 41571916)
1 /*
2  * Copyright (c) 2009, Microsoft Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Authors:
18  *   Haiyang Zhang <haiyangz@microsoft.com>
19  *   Hank Janssen  <hjanssen@microsoft.com>
20  */
21 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22 
23 #include <linux/kernel.h>
24 #include <linux/sched.h>
25 #include <linux/wait.h>
26 #include <linux/mm.h>
27 #include <linux/slab.h>
28 #include <linux/list.h>
29 #include <linux/module.h>
30 #include <linux/completion.h>
31 #include <linux/delay.h>
32 #include <linux/hyperv.h>
33 
34 #include "hyperv_vmbus.h"
35 
36 static void init_vp_index(struct vmbus_channel *channel, u16 dev_type);
37 
38 static const struct vmbus_device vmbus_devs[] = {
39 	/* IDE */
40 	{ .dev_type = HV_IDE,
41 	  HV_IDE_GUID,
42 	  .perf_device = true,
43 	},
44 
45 	/* SCSI */
46 	{ .dev_type = HV_SCSI,
47 	  HV_SCSI_GUID,
48 	  .perf_device = true,
49 	},
50 
51 	/* Fibre Channel */
52 	{ .dev_type = HV_FC,
53 	  HV_SYNTHFC_GUID,
54 	  .perf_device = true,
55 	},
56 
57 	/* Synthetic NIC */
58 	{ .dev_type = HV_NIC,
59 	  HV_NIC_GUID,
60 	  .perf_device = true,
61 	},
62 
63 	/* Network Direct */
64 	{ .dev_type = HV_ND,
65 	  HV_ND_GUID,
66 	  .perf_device = true,
67 	},
68 
69 	/* PCIE */
70 	{ .dev_type = HV_PCIE,
71 	  HV_PCIE_GUID,
72 	  .perf_device = true,
73 	},
74 
75 	/* Synthetic Frame Buffer */
76 	{ .dev_type = HV_FB,
77 	  HV_SYNTHVID_GUID,
78 	  .perf_device = false,
79 	},
80 
81 	/* Synthetic Keyboard */
82 	{ .dev_type = HV_KBD,
83 	  HV_KBD_GUID,
84 	  .perf_device = false,
85 	},
86 
87 	/* Synthetic MOUSE */
88 	{ .dev_type = HV_MOUSE,
89 	  HV_MOUSE_GUID,
90 	  .perf_device = false,
91 	},
92 
93 	/* KVP */
94 	{ .dev_type = HV_KVP,
95 	  HV_KVP_GUID,
96 	  .perf_device = false,
97 	},
98 
99 	/* Time Synch */
100 	{ .dev_type = HV_TS,
101 	  HV_TS_GUID,
102 	  .perf_device = false,
103 	},
104 
105 	/* Heartbeat */
106 	{ .dev_type = HV_HB,
107 	  HV_HEART_BEAT_GUID,
108 	  .perf_device = false,
109 	},
110 
111 	/* Shutdown */
112 	{ .dev_type = HV_SHUTDOWN,
113 	  HV_SHUTDOWN_GUID,
114 	  .perf_device = false,
115 	},
116 
117 	/* File copy */
118 	{ .dev_type = HV_FCOPY,
119 	  HV_FCOPY_GUID,
120 	  .perf_device = false,
121 	},
122 
123 	/* Backup */
124 	{ .dev_type = HV_BACKUP,
125 	  HV_VSS_GUID,
126 	  .perf_device = false,
127 	},
128 
129 	/* Dynamic Memory */
130 	{ .dev_type = HV_DM,
131 	  HV_DM_GUID,
132 	  .perf_device = false,
133 	},
134 
135 	/* Unknown GUID */
136 	{ .dev_type = HV_UNKOWN,
137 	  .perf_device = false,
138 	},
139 };
140 
141 static u16 hv_get_dev_type(const uuid_le *guid)
142 {
143 	u16 i;
144 
145 	for (i = HV_IDE; i < HV_UNKOWN; i++) {
146 		if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
147 			return i;
148 	}
149 	pr_info("Unknown GUID: %pUl\n", guid);
150 	return i;
151 }
152 
153 /**
154  * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
155  * @icmsghdrp: Pointer to msg header structure
156  * @icmsg_negotiate: Pointer to negotiate message structure
157  * @buf: Raw buffer channel data
158  *
159  * @icmsghdrp is of type &struct icmsg_hdr.
160  * @negop is of type &struct icmsg_negotiate.
161  * Set up and fill in default negotiate response message.
162  *
163  * The fw_version specifies the  framework version that
164  * we can support and srv_version specifies the service
165  * version we can support.
166  *
167  * Mainly used by Hyper-V drivers.
168  */
169 bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
170 				struct icmsg_negotiate *negop, u8 *buf,
171 				int fw_version, int srv_version)
172 {
173 	int icframe_major, icframe_minor;
174 	int icmsg_major, icmsg_minor;
175 	int fw_major, fw_minor;
176 	int srv_major, srv_minor;
177 	int i;
178 	bool found_match = false;
179 
180 	icmsghdrp->icmsgsize = 0x10;
181 	fw_major = (fw_version >> 16);
182 	fw_minor = (fw_version & 0xFFFF);
183 
184 	srv_major = (srv_version >> 16);
185 	srv_minor = (srv_version & 0xFFFF);
186 
187 	negop = (struct icmsg_negotiate *)&buf[
188 		sizeof(struct vmbuspipe_hdr) +
189 		sizeof(struct icmsg_hdr)];
190 
191 	icframe_major = negop->icframe_vercnt;
192 	icframe_minor = 0;
193 
194 	icmsg_major = negop->icmsg_vercnt;
195 	icmsg_minor = 0;
196 
197 	/*
198 	 * Select the framework version number we will
199 	 * support.
200 	 */
201 
202 	for (i = 0; i < negop->icframe_vercnt; i++) {
203 		if ((negop->icversion_data[i].major == fw_major) &&
204 		   (negop->icversion_data[i].minor == fw_minor)) {
205 			icframe_major = negop->icversion_data[i].major;
206 			icframe_minor = negop->icversion_data[i].minor;
207 			found_match = true;
208 		}
209 	}
210 
211 	if (!found_match)
212 		goto fw_error;
213 
214 	found_match = false;
215 
216 	for (i = negop->icframe_vercnt;
217 		 (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) {
218 		if ((negop->icversion_data[i].major == srv_major) &&
219 		   (negop->icversion_data[i].minor == srv_minor)) {
220 			icmsg_major = negop->icversion_data[i].major;
221 			icmsg_minor = negop->icversion_data[i].minor;
222 			found_match = true;
223 		}
224 	}
225 
226 	/*
227 	 * Respond with the framework and service
228 	 * version numbers we can support.
229 	 */
230 
231 fw_error:
232 	if (!found_match) {
233 		negop->icframe_vercnt = 0;
234 		negop->icmsg_vercnt = 0;
235 	} else {
236 		negop->icframe_vercnt = 1;
237 		negop->icmsg_vercnt = 1;
238 	}
239 
240 	negop->icversion_data[0].major = icframe_major;
241 	negop->icversion_data[0].minor = icframe_minor;
242 	negop->icversion_data[1].major = icmsg_major;
243 	negop->icversion_data[1].minor = icmsg_minor;
244 	return found_match;
245 }
246 
247 EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
248 
249 /*
250  * alloc_channel - Allocate and initialize a vmbus channel object
251  */
252 static struct vmbus_channel *alloc_channel(void)
253 {
254 	static atomic_t chan_num = ATOMIC_INIT(0);
255 	struct vmbus_channel *channel;
256 
257 	channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
258 	if (!channel)
259 		return NULL;
260 
261 	channel->id = atomic_inc_return(&chan_num);
262 	spin_lock_init(&channel->inbound_lock);
263 	spin_lock_init(&channel->lock);
264 
265 	INIT_LIST_HEAD(&channel->sc_list);
266 	INIT_LIST_HEAD(&channel->percpu_list);
267 
268 	return channel;
269 }
270 
271 /*
272  * free_channel - Release the resources used by the vmbus channel object
273  */
274 static void free_channel(struct vmbus_channel *channel)
275 {
276 	kfree(channel);
277 }
278 
279 static void percpu_channel_enq(void *arg)
280 {
281 	struct vmbus_channel *channel = arg;
282 	int cpu = smp_processor_id();
283 
284 	list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]);
285 }
286 
287 static void percpu_channel_deq(void *arg)
288 {
289 	struct vmbus_channel *channel = arg;
290 
291 	list_del(&channel->percpu_list);
292 }
293 
294 
295 static void vmbus_release_relid(u32 relid)
296 {
297 	struct vmbus_channel_relid_released msg;
298 
299 	memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
300 	msg.child_relid = relid;
301 	msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
302 	vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));
303 }
304 
305 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
306 {
307 	unsigned long flags;
308 	struct vmbus_channel *primary_channel;
309 
310 	vmbus_release_relid(relid);
311 
312 	BUG_ON(!channel->rescind);
313 
314 	if (channel->target_cpu != get_cpu()) {
315 		put_cpu();
316 		smp_call_function_single(channel->target_cpu,
317 					 percpu_channel_deq, channel, true);
318 	} else {
319 		percpu_channel_deq(channel);
320 		put_cpu();
321 	}
322 
323 	if (channel->primary_channel == NULL) {
324 		mutex_lock(&vmbus_connection.channel_mutex);
325 		list_del(&channel->listentry);
326 		mutex_unlock(&vmbus_connection.channel_mutex);
327 
328 		primary_channel = channel;
329 	} else {
330 		primary_channel = channel->primary_channel;
331 		spin_lock_irqsave(&primary_channel->lock, flags);
332 		list_del(&channel->sc_list);
333 		primary_channel->num_sc--;
334 		spin_unlock_irqrestore(&primary_channel->lock, flags);
335 	}
336 
337 	/*
338 	 * We need to free the bit for init_vp_index() to work in the case
339 	 * of sub-channel, when we reload drivers like hv_netvsc.
340 	 */
341 	cpumask_clear_cpu(channel->target_cpu,
342 			  &primary_channel->alloced_cpus_in_node);
343 
344 	free_channel(channel);
345 }
346 
347 void vmbus_free_channels(void)
348 {
349 	struct vmbus_channel *channel, *tmp;
350 
351 	list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
352 		listentry) {
353 		/* hv_process_channel_removal() needs this */
354 		channel->rescind = true;
355 
356 		vmbus_device_unregister(channel->device_obj);
357 	}
358 }
359 
360 /*
361  * vmbus_process_offer - Process the offer by creating a channel/device
362  * associated with this offer
363  */
364 static void vmbus_process_offer(struct vmbus_channel *newchannel)
365 {
366 	struct vmbus_channel *channel;
367 	bool fnew = true;
368 	unsigned long flags;
369 	u16 dev_type;
370 
371 	/* Make sure this is a new offer */
372 	mutex_lock(&vmbus_connection.channel_mutex);
373 
374 	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
375 		if (!uuid_le_cmp(channel->offermsg.offer.if_type,
376 			newchannel->offermsg.offer.if_type) &&
377 			!uuid_le_cmp(channel->offermsg.offer.if_instance,
378 				newchannel->offermsg.offer.if_instance)) {
379 			fnew = false;
380 			break;
381 		}
382 	}
383 
384 	if (fnew)
385 		list_add_tail(&newchannel->listentry,
386 			      &vmbus_connection.chn_list);
387 
388 	mutex_unlock(&vmbus_connection.channel_mutex);
389 
390 	if (!fnew) {
391 		/*
392 		 * Check to see if this is a sub-channel.
393 		 */
394 		if (newchannel->offermsg.offer.sub_channel_index != 0) {
395 			/*
396 			 * Process the sub-channel.
397 			 */
398 			newchannel->primary_channel = channel;
399 			spin_lock_irqsave(&channel->lock, flags);
400 			list_add_tail(&newchannel->sc_list, &channel->sc_list);
401 			channel->num_sc++;
402 			spin_unlock_irqrestore(&channel->lock, flags);
403 		} else
404 			goto err_free_chan;
405 	}
406 
407 	dev_type = hv_get_dev_type(&newchannel->offermsg.offer.if_type);
408 
409 	init_vp_index(newchannel, dev_type);
410 
411 	if (newchannel->target_cpu != get_cpu()) {
412 		put_cpu();
413 		smp_call_function_single(newchannel->target_cpu,
414 					 percpu_channel_enq,
415 					 newchannel, true);
416 	} else {
417 		percpu_channel_enq(newchannel);
418 		put_cpu();
419 	}
420 
421 	/*
422 	 * This state is used to indicate a successful open
423 	 * so that when we do close the channel normally, we
424 	 * can cleanup properly
425 	 */
426 	newchannel->state = CHANNEL_OPEN_STATE;
427 
428 	if (!fnew) {
429 		if (channel->sc_creation_callback != NULL)
430 			channel->sc_creation_callback(newchannel);
431 		return;
432 	}
433 
434 	/*
435 	 * Start the process of binding this offer to the driver
436 	 * We need to set the DeviceObject field before calling
437 	 * vmbus_child_dev_add()
438 	 */
439 	newchannel->device_obj = vmbus_device_create(
440 		&newchannel->offermsg.offer.if_type,
441 		&newchannel->offermsg.offer.if_instance,
442 		newchannel);
443 	if (!newchannel->device_obj)
444 		goto err_deq_chan;
445 
446 	newchannel->device_obj->device_id = dev_type;
447 	/*
448 	 * Add the new device to the bus. This will kick off device-driver
449 	 * binding which eventually invokes the device driver's AddDevice()
450 	 * method.
451 	 */
452 	if (vmbus_device_register(newchannel->device_obj) != 0) {
453 		pr_err("unable to add child device object (relid %d)\n",
454 			newchannel->offermsg.child_relid);
455 		kfree(newchannel->device_obj);
456 		goto err_deq_chan;
457 	}
458 	return;
459 
460 err_deq_chan:
461 	vmbus_release_relid(newchannel->offermsg.child_relid);
462 
463 	mutex_lock(&vmbus_connection.channel_mutex);
464 	list_del(&newchannel->listentry);
465 	mutex_unlock(&vmbus_connection.channel_mutex);
466 
467 	if (newchannel->target_cpu != get_cpu()) {
468 		put_cpu();
469 		smp_call_function_single(newchannel->target_cpu,
470 					 percpu_channel_deq, newchannel, true);
471 	} else {
472 		percpu_channel_deq(newchannel);
473 		put_cpu();
474 	}
475 
476 err_free_chan:
477 	free_channel(newchannel);
478 }
479 
480 /*
481  * We use this state to statically distribute the channel interrupt load.
482  */
483 static int next_numa_node_id;
484 
485 /*
486  * Starting with Win8, we can statically distribute the incoming
487  * channel interrupt load by binding a channel to VCPU.
488  * We do this in a hierarchical fashion:
489  * First distribute the primary channels across available NUMA nodes
490  * and then distribute the subchannels amongst the CPUs in the NUMA
491  * node assigned to the primary channel.
492  *
493  * For pre-win8 hosts or non-performance critical channels we assign the
494  * first CPU in the first NUMA node.
495  */
496 static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
497 {
498 	u32 cur_cpu;
499 	bool perf_chn = vmbus_devs[dev_type].perf_device;
500 	struct vmbus_channel *primary = channel->primary_channel;
501 	int next_node;
502 	struct cpumask available_mask;
503 	struct cpumask *alloced_mask;
504 
505 	if ((vmbus_proto_version == VERSION_WS2008) ||
506 	    (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
507 		/*
508 		 * Prior to win8, all channel interrupts are
509 		 * delivered on cpu 0.
510 		 * Also if the channel is not a performance critical
511 		 * channel, bind it to cpu 0.
512 		 */
513 		channel->numa_node = 0;
514 		channel->target_cpu = 0;
515 		channel->target_vp = hv_context.vp_index[0];
516 		return;
517 	}
518 
519 	/*
520 	 * We distribute primary channels evenly across all the available
521 	 * NUMA nodes and within the assigned NUMA node we will assign the
522 	 * first available CPU to the primary channel.
523 	 * The sub-channels will be assigned to the CPUs available in the
524 	 * NUMA node evenly.
525 	 */
526 	if (!primary) {
527 		while (true) {
528 			next_node = next_numa_node_id++;
529 			if (next_node == nr_node_ids)
530 				next_node = next_numa_node_id = 0;
531 			if (cpumask_empty(cpumask_of_node(next_node)))
532 				continue;
533 			break;
534 		}
535 		channel->numa_node = next_node;
536 		primary = channel;
537 	}
538 	alloced_mask = &hv_context.hv_numa_map[primary->numa_node];
539 
540 	if (cpumask_weight(alloced_mask) ==
541 	    cpumask_weight(cpumask_of_node(primary->numa_node))) {
542 		/*
543 		 * We have cycled through all the CPUs in the node;
544 		 * reset the alloced map.
545 		 */
546 		cpumask_clear(alloced_mask);
547 	}
548 
549 	cpumask_xor(&available_mask, alloced_mask,
550 		    cpumask_of_node(primary->numa_node));
551 
552 	cur_cpu = -1;
553 
554 	/*
555 	 * Normally Hyper-V host doesn't create more subchannels than there
556 	 * are VCPUs on the node but it is possible when not all present VCPUs
557 	 * on the node are initialized by guest. Clear the alloced_cpus_in_node
558 	 * to start over.
559 	 */
560 	if (cpumask_equal(&primary->alloced_cpus_in_node,
561 			  cpumask_of_node(primary->numa_node)))
562 		cpumask_clear(&primary->alloced_cpus_in_node);
563 
564 	while (true) {
565 		cur_cpu = cpumask_next(cur_cpu, &available_mask);
566 		if (cur_cpu >= nr_cpu_ids) {
567 			cur_cpu = -1;
568 			cpumask_copy(&available_mask,
569 				     cpumask_of_node(primary->numa_node));
570 			continue;
571 		}
572 
573 		/*
574 		 * NOTE: in the case of sub-channel, we clear the sub-channel
575 		 * related bit(s) in primary->alloced_cpus_in_node in
576 		 * hv_process_channel_removal(), so when we reload drivers
577 		 * like hv_netvsc in SMP guest, here we're able to re-allocate
578 		 * bit from primary->alloced_cpus_in_node.
579 		 */
580 		if (!cpumask_test_cpu(cur_cpu,
581 				&primary->alloced_cpus_in_node)) {
582 			cpumask_set_cpu(cur_cpu,
583 					&primary->alloced_cpus_in_node);
584 			cpumask_set_cpu(cur_cpu, alloced_mask);
585 			break;
586 		}
587 	}
588 
589 	channel->target_cpu = cur_cpu;
590 	channel->target_vp = hv_context.vp_index[cur_cpu];
591 }
592 
593 static void vmbus_wait_for_unload(void)
594 {
595 	int cpu = smp_processor_id();
596 	void *page_addr = hv_context.synic_message_page[cpu];
597 	struct hv_message *msg = (struct hv_message *)page_addr +
598 				  VMBUS_MESSAGE_SINT;
599 	struct vmbus_channel_message_header *hdr;
600 	bool unloaded = false;
601 
602 	while (1) {
603 		if (msg->header.message_type == HVMSG_NONE) {
604 			mdelay(10);
605 			continue;
606 		}
607 
608 		hdr = (struct vmbus_channel_message_header *)msg->u.payload;
609 		if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE)
610 			unloaded = true;
611 
612 		msg->header.message_type = HVMSG_NONE;
613 		/*
614 		 * header.message_type needs to be written before we do
615 		 * wrmsrl() below.
616 		 */
617 		mb();
618 
619 		if (msg->header.message_flags.msg_pending)
620 			wrmsrl(HV_X64_MSR_EOM, 0);
621 
622 		if (unloaded)
623 			break;
624 	}
625 }
626 
627 /*
628  * vmbus_unload_response - Handler for the unload response.
629  */
630 static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
631 {
632 	/*
633 	 * This is a global event; just wakeup the waiting thread.
634 	 * Once we successfully unload, we can cleanup the monitor state.
635 	 */
636 	complete(&vmbus_connection.unload_event);
637 }
638 
639 void vmbus_initiate_unload(void)
640 {
641 	struct vmbus_channel_message_header hdr;
642 
643 	/* Pre-Win2012R2 hosts don't support reconnect */
644 	if (vmbus_proto_version < VERSION_WIN8_1)
645 		return;
646 
647 	init_completion(&vmbus_connection.unload_event);
648 	memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
649 	hdr.msgtype = CHANNELMSG_UNLOAD;
650 	vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header));
651 
652 	/*
653 	 * vmbus_initiate_unload() is also called on crash and the crash can be
654 	 * happening in an interrupt context, where scheduling is impossible.
655 	 */
656 	if (!in_interrupt())
657 		wait_for_completion(&vmbus_connection.unload_event);
658 	else
659 		vmbus_wait_for_unload();
660 }
661 
662 /*
663  * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
664  *
665  */
666 static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
667 {
668 	struct vmbus_channel_offer_channel *offer;
669 	struct vmbus_channel *newchannel;
670 
671 	offer = (struct vmbus_channel_offer_channel *)hdr;
672 
673 	/* Allocate the channel object and save this offer. */
674 	newchannel = alloc_channel();
675 	if (!newchannel) {
676 		pr_err("Unable to allocate channel object\n");
677 		return;
678 	}
679 
680 	/*
681 	 * By default we setup state to enable batched
682 	 * reading. A specific service can choose to
683 	 * disable this prior to opening the channel.
684 	 */
685 	newchannel->batched_reading = true;
686 
687 	/*
688 	 * Setup state for signalling the host.
689 	 */
690 	newchannel->sig_event = (struct hv_input_signal_event *)
691 				(ALIGN((unsigned long)
692 				&newchannel->sig_buf,
693 				HV_HYPERCALL_PARAM_ALIGN));
694 
695 	newchannel->sig_event->connectionid.asu32 = 0;
696 	newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
697 	newchannel->sig_event->flag_number = 0;
698 	newchannel->sig_event->rsvdz = 0;
699 
700 	if (vmbus_proto_version != VERSION_WS2008) {
701 		newchannel->is_dedicated_interrupt =
702 				(offer->is_dedicated_interrupt != 0);
703 		newchannel->sig_event->connectionid.u.id =
704 				offer->connection_id;
705 	}
706 
707 	memcpy(&newchannel->offermsg, offer,
708 	       sizeof(struct vmbus_channel_offer_channel));
709 	newchannel->monitor_grp = (u8)offer->monitorid / 32;
710 	newchannel->monitor_bit = (u8)offer->monitorid % 32;
711 
712 	vmbus_process_offer(newchannel);
713 }
714 
715 /*
716  * vmbus_onoffer_rescind - Rescind offer handler.
717  *
718  * We queue a work item to process this offer synchronously
719  */
720 static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
721 {
722 	struct vmbus_channel_rescind_offer *rescind;
723 	struct vmbus_channel *channel;
724 	unsigned long flags;
725 	struct device *dev;
726 
727 	rescind = (struct vmbus_channel_rescind_offer *)hdr;
728 	channel = relid2channel(rescind->child_relid);
729 
730 	if (channel == NULL) {
731 		/*
732 		 * This is very impossible, because in
733 		 * vmbus_process_offer(), we have already invoked
734 		 * vmbus_release_relid() on error.
735 		 */
736 		return;
737 	}
738 
739 	spin_lock_irqsave(&channel->lock, flags);
740 	channel->rescind = true;
741 	spin_unlock_irqrestore(&channel->lock, flags);
742 
743 	if (channel->device_obj) {
744 		/*
745 		 * We will have to unregister this device from the
746 		 * driver core.
747 		 */
748 		dev = get_device(&channel->device_obj->device);
749 		if (dev) {
750 			vmbus_device_unregister(channel->device_obj);
751 			put_device(dev);
752 		}
753 	} else {
754 		hv_process_channel_removal(channel,
755 			channel->offermsg.child_relid);
756 	}
757 }
758 
759 /*
760  * vmbus_onoffers_delivered -
761  * This is invoked when all offers have been delivered.
762  *
763  * Nothing to do here.
764  */
765 static void vmbus_onoffers_delivered(
766 			struct vmbus_channel_message_header *hdr)
767 {
768 }
769 
770 /*
771  * vmbus_onopen_result - Open result handler.
772  *
773  * This is invoked when we received a response to our channel open request.
774  * Find the matching request, copy the response and signal the requesting
775  * thread.
776  */
777 static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
778 {
779 	struct vmbus_channel_open_result *result;
780 	struct vmbus_channel_msginfo *msginfo;
781 	struct vmbus_channel_message_header *requestheader;
782 	struct vmbus_channel_open_channel *openmsg;
783 	unsigned long flags;
784 
785 	result = (struct vmbus_channel_open_result *)hdr;
786 
787 	/*
788 	 * Find the open msg, copy the result and signal/unblock the wait event
789 	 */
790 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
791 
792 	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
793 				msglistentry) {
794 		requestheader =
795 			(struct vmbus_channel_message_header *)msginfo->msg;
796 
797 		if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
798 			openmsg =
799 			(struct vmbus_channel_open_channel *)msginfo->msg;
800 			if (openmsg->child_relid == result->child_relid &&
801 			    openmsg->openid == result->openid) {
802 				memcpy(&msginfo->response.open_result,
803 				       result,
804 				       sizeof(
805 					struct vmbus_channel_open_result));
806 				complete(&msginfo->waitevent);
807 				break;
808 			}
809 		}
810 	}
811 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
812 }
813 
814 /*
815  * vmbus_ongpadl_created - GPADL created handler.
816  *
817  * This is invoked when we received a response to our gpadl create request.
818  * Find the matching request, copy the response and signal the requesting
819  * thread.
820  */
821 static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
822 {
823 	struct vmbus_channel_gpadl_created *gpadlcreated;
824 	struct vmbus_channel_msginfo *msginfo;
825 	struct vmbus_channel_message_header *requestheader;
826 	struct vmbus_channel_gpadl_header *gpadlheader;
827 	unsigned long flags;
828 
829 	gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
830 
831 	/*
832 	 * Find the establish msg, copy the result and signal/unblock the wait
833 	 * event
834 	 */
835 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
836 
837 	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
838 				msglistentry) {
839 		requestheader =
840 			(struct vmbus_channel_message_header *)msginfo->msg;
841 
842 		if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
843 			gpadlheader =
844 			(struct vmbus_channel_gpadl_header *)requestheader;
845 
846 			if ((gpadlcreated->child_relid ==
847 			     gpadlheader->child_relid) &&
848 			    (gpadlcreated->gpadl == gpadlheader->gpadl)) {
849 				memcpy(&msginfo->response.gpadl_created,
850 				       gpadlcreated,
851 				       sizeof(
852 					struct vmbus_channel_gpadl_created));
853 				complete(&msginfo->waitevent);
854 				break;
855 			}
856 		}
857 	}
858 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
859 }
860 
861 /*
862  * vmbus_ongpadl_torndown - GPADL torndown handler.
863  *
864  * This is invoked when we received a response to our gpadl teardown request.
865  * Find the matching request, copy the response and signal the requesting
866  * thread.
867  */
868 static void vmbus_ongpadl_torndown(
869 			struct vmbus_channel_message_header *hdr)
870 {
871 	struct vmbus_channel_gpadl_torndown *gpadl_torndown;
872 	struct vmbus_channel_msginfo *msginfo;
873 	struct vmbus_channel_message_header *requestheader;
874 	struct vmbus_channel_gpadl_teardown *gpadl_teardown;
875 	unsigned long flags;
876 
877 	gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
878 
879 	/*
880 	 * Find the open msg, copy the result and signal/unblock the wait event
881 	 */
882 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
883 
884 	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
885 				msglistentry) {
886 		requestheader =
887 			(struct vmbus_channel_message_header *)msginfo->msg;
888 
889 		if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
890 			gpadl_teardown =
891 			(struct vmbus_channel_gpadl_teardown *)requestheader;
892 
893 			if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
894 				memcpy(&msginfo->response.gpadl_torndown,
895 				       gpadl_torndown,
896 				       sizeof(
897 					struct vmbus_channel_gpadl_torndown));
898 				complete(&msginfo->waitevent);
899 				break;
900 			}
901 		}
902 	}
903 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
904 }
905 
906 /*
907  * vmbus_onversion_response - Version response handler
908  *
909  * This is invoked when we received a response to our initiate contact request.
910  * Find the matching request, copy the response and signal the requesting
911  * thread.
912  */
913 static void vmbus_onversion_response(
914 		struct vmbus_channel_message_header *hdr)
915 {
916 	struct vmbus_channel_msginfo *msginfo;
917 	struct vmbus_channel_message_header *requestheader;
918 	struct vmbus_channel_version_response *version_response;
919 	unsigned long flags;
920 
921 	version_response = (struct vmbus_channel_version_response *)hdr;
922 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
923 
924 	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
925 				msglistentry) {
926 		requestheader =
927 			(struct vmbus_channel_message_header *)msginfo->msg;
928 
929 		if (requestheader->msgtype ==
930 		    CHANNELMSG_INITIATE_CONTACT) {
931 			memcpy(&msginfo->response.version_response,
932 			      version_response,
933 			      sizeof(struct vmbus_channel_version_response));
934 			complete(&msginfo->waitevent);
935 		}
936 	}
937 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
938 }
939 
940 /* Channel message dispatch table */
941 struct vmbus_channel_message_table_entry
942 	channel_message_table[CHANNELMSG_COUNT] = {
943 	{CHANNELMSG_INVALID,			0, NULL},
944 	{CHANNELMSG_OFFERCHANNEL,		0, vmbus_onoffer},
945 	{CHANNELMSG_RESCIND_CHANNELOFFER,	0, vmbus_onoffer_rescind},
946 	{CHANNELMSG_REQUESTOFFERS,		0, NULL},
947 	{CHANNELMSG_ALLOFFERS_DELIVERED,	1, vmbus_onoffers_delivered},
948 	{CHANNELMSG_OPENCHANNEL,		0, NULL},
949 	{CHANNELMSG_OPENCHANNEL_RESULT,		1, vmbus_onopen_result},
950 	{CHANNELMSG_CLOSECHANNEL,		0, NULL},
951 	{CHANNELMSG_GPADL_HEADER,		0, NULL},
952 	{CHANNELMSG_GPADL_BODY,			0, NULL},
953 	{CHANNELMSG_GPADL_CREATED,		1, vmbus_ongpadl_created},
954 	{CHANNELMSG_GPADL_TEARDOWN,		0, NULL},
955 	{CHANNELMSG_GPADL_TORNDOWN,		1, vmbus_ongpadl_torndown},
956 	{CHANNELMSG_RELID_RELEASED,		0, NULL},
957 	{CHANNELMSG_INITIATE_CONTACT,		0, NULL},
958 	{CHANNELMSG_VERSION_RESPONSE,		1, vmbus_onversion_response},
959 	{CHANNELMSG_UNLOAD,			0, NULL},
960 	{CHANNELMSG_UNLOAD_RESPONSE,		1, vmbus_unload_response},
961 };
962 
963 /*
964  * vmbus_onmessage - Handler for channel protocol messages.
965  *
966  * This is invoked in the vmbus worker thread context.
967  */
968 void vmbus_onmessage(void *context)
969 {
970 	struct hv_message *msg = context;
971 	struct vmbus_channel_message_header *hdr;
972 	int size;
973 
974 	hdr = (struct vmbus_channel_message_header *)msg->u.payload;
975 	size = msg->header.payload_size;
976 
977 	if (hdr->msgtype >= CHANNELMSG_COUNT) {
978 		pr_err("Received invalid channel message type %d size %d\n",
979 			   hdr->msgtype, size);
980 		print_hex_dump_bytes("", DUMP_PREFIX_NONE,
981 				     (unsigned char *)msg->u.payload, size);
982 		return;
983 	}
984 
985 	if (channel_message_table[hdr->msgtype].message_handler)
986 		channel_message_table[hdr->msgtype].message_handler(hdr);
987 	else
988 		pr_err("Unhandled channel message type %d\n", hdr->msgtype);
989 }
990 
991 /*
992  * vmbus_request_offers - Send a request to get all our pending offers.
993  */
994 int vmbus_request_offers(void)
995 {
996 	struct vmbus_channel_message_header *msg;
997 	struct vmbus_channel_msginfo *msginfo;
998 	int ret;
999 
1000 	msginfo = kmalloc(sizeof(*msginfo) +
1001 			  sizeof(struct vmbus_channel_message_header),
1002 			  GFP_KERNEL);
1003 	if (!msginfo)
1004 		return -ENOMEM;
1005 
1006 	msg = (struct vmbus_channel_message_header *)msginfo->msg;
1007 
1008 	msg->msgtype = CHANNELMSG_REQUESTOFFERS;
1009 
1010 
1011 	ret = vmbus_post_msg(msg,
1012 			       sizeof(struct vmbus_channel_message_header));
1013 	if (ret != 0) {
1014 		pr_err("Unable to request offers - %d\n", ret);
1015 
1016 		goto cleanup;
1017 	}
1018 
1019 cleanup:
1020 	kfree(msginfo);
1021 
1022 	return ret;
1023 }
1024 
1025 /*
1026  * Retrieve the (sub) channel on which to send an outgoing request.
1027  * When a primary channel has multiple sub-channels, we try to
1028  * distribute the load equally amongst all available channels.
1029  */
1030 struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
1031 {
1032 	struct list_head *cur, *tmp;
1033 	int cur_cpu;
1034 	struct vmbus_channel *cur_channel;
1035 	struct vmbus_channel *outgoing_channel = primary;
1036 	int next_channel;
1037 	int i = 1;
1038 
1039 	if (list_empty(&primary->sc_list))
1040 		return outgoing_channel;
1041 
1042 	next_channel = primary->next_oc++;
1043 
1044 	if (next_channel > (primary->num_sc)) {
1045 		primary->next_oc = 0;
1046 		return outgoing_channel;
1047 	}
1048 
1049 	cur_cpu = hv_context.vp_index[get_cpu()];
1050 	put_cpu();
1051 	list_for_each_safe(cur, tmp, &primary->sc_list) {
1052 		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
1053 		if (cur_channel->state != CHANNEL_OPENED_STATE)
1054 			continue;
1055 
1056 		if (cur_channel->target_vp == cur_cpu)
1057 			return cur_channel;
1058 
1059 		if (i == next_channel)
1060 			return cur_channel;
1061 
1062 		i++;
1063 	}
1064 
1065 	return outgoing_channel;
1066 }
1067 EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel);
1068 
1069 static void invoke_sc_cb(struct vmbus_channel *primary_channel)
1070 {
1071 	struct list_head *cur, *tmp;
1072 	struct vmbus_channel *cur_channel;
1073 
1074 	if (primary_channel->sc_creation_callback == NULL)
1075 		return;
1076 
1077 	list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
1078 		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
1079 
1080 		primary_channel->sc_creation_callback(cur_channel);
1081 	}
1082 }
1083 
1084 void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
1085 				void (*sc_cr_cb)(struct vmbus_channel *new_sc))
1086 {
1087 	primary_channel->sc_creation_callback = sc_cr_cb;
1088 }
1089 EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);
1090 
1091 bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
1092 {
1093 	bool ret;
1094 
1095 	ret = !list_empty(&primary->sc_list);
1096 
1097 	if (ret) {
1098 		/*
1099 		 * Invoke the callback on sub-channel creation.
1100 		 * This will present a uniform interface to the
1101 		 * clients.
1102 		 */
1103 		invoke_sc_cb(primary);
1104 	}
1105 
1106 	return ret;
1107 }
1108 EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);
1109