xref: /openbmc/linux/drivers/misc/sgi-xp/xpc_uv.c (revision b8bb76713ec50df2f11efee386e16f93d51e1076)
1 /*
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License.  See the file "COPYING" in the main directory of this archive
4  * for more details.
5  *
6  * Copyright (c) 2008-2009 Silicon Graphics, Inc.  All Rights Reserved.
7  */
8 
9 /*
10  * Cross Partition Communication (XPC) uv-based functions.
11  *
12  *     Architecture specific implementation of common functions.
13  *
14  */
15 
16 #include <linux/kernel.h>
17 #include <linux/mm.h>
18 #include <linux/interrupt.h>
19 #include <linux/delay.h>
20 #include <linux/device.h>
21 #include <linux/err.h>
22 #include <asm/uv/uv_hub.h>
23 #if defined CONFIG_X86_64
24 #include <asm/uv/bios.h>
25 #include <asm/uv/uv_irq.h>
26 #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
27 #include <asm/sn/intr.h>
28 #include <asm/sn/sn_sal.h>
29 #endif
30 #include "../sgi-gru/gru.h"
31 #include "../sgi-gru/grukservices.h"
32 #include "xpc.h"
33 
34 static atomic64_t xpc_heartbeat_uv;
35 static DECLARE_BITMAP(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
36 
37 #define XPC_ACTIVATE_MSG_SIZE_UV	(1 * GRU_CACHE_LINE_BYTES)
38 #define XPC_ACTIVATE_MQ_SIZE_UV		(4 * XP_MAX_NPARTITIONS_UV * \
39 					 XPC_ACTIVATE_MSG_SIZE_UV)
40 #define XPC_ACTIVATE_IRQ_NAME		"xpc_activate"
41 
42 #define XPC_NOTIFY_MSG_SIZE_UV		(2 * GRU_CACHE_LINE_BYTES)
43 #define XPC_NOTIFY_MQ_SIZE_UV		(4 * XP_MAX_NPARTITIONS_UV * \
44 					 XPC_NOTIFY_MSG_SIZE_UV)
45 #define XPC_NOTIFY_IRQ_NAME		"xpc_notify"
46 
47 static struct xpc_gru_mq_uv *xpc_activate_mq_uv;
48 static struct xpc_gru_mq_uv *xpc_notify_mq_uv;
49 
50 static int
51 xpc_setup_partitions_sn_uv(void)
52 {
53 	short partid;
54 	struct xpc_partition_uv *part_uv;
55 
56 	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
57 		part_uv = &xpc_partitions[partid].sn.uv;
58 
59 		spin_lock_init(&part_uv->flags_lock);
60 		part_uv->remote_act_state = XPC_P_AS_INACTIVE;
61 	}
62 	return 0;
63 }
64 
65 static int
66 xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name)
67 {
68 #if defined CONFIG_X86_64
69 	mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset);
70 	if (mq->irq < 0) {
71 		dev_err(xpc_part, "uv_setup_irq() returned error=%d\n",
72 			mq->irq);
73 	}
74 
75 #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
76 	int mmr_pnode;
77 	unsigned long mmr_value;
78 
79 	if (strcmp(irq_name, XPC_ACTIVATE_IRQ_NAME) == 0)
80 		mq->irq = SGI_XPC_ACTIVATE;
81 	else if (strcmp(irq_name, XPC_NOTIFY_IRQ_NAME) == 0)
82 		mq->irq = SGI_XPC_NOTIFY;
83 	else
84 		return -EINVAL;
85 
86 	mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
87 	mmr_value = (unsigned long)cpu_physical_id(cpu) << 32 | mq->irq;
88 
89 	uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mmr_value);
90 #else
91 	#error not a supported configuration
92 #endif
93 
94 	return 0;
95 }
96 
97 static void
98 xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq)
99 {
100 #if defined CONFIG_X86_64
101 	uv_teardown_irq(mq->irq, mq->mmr_blade, mq->mmr_offset);
102 
103 #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
104 	int mmr_pnode;
105 	unsigned long mmr_value;
106 
107 	mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
108 	mmr_value = 1UL << 16;
109 
110 	uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mmr_value);
111 #else
112 	#error not a supported configuration
113 #endif
114 }
115 
116 static int
117 xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq)
118 {
119 	int ret;
120 
121 #if defined CONFIG_X86_64
122 	ret = uv_bios_mq_watchlist_alloc(mq->mmr_blade, uv_gpa(mq->address),
123 					 mq->order, &mq->mmr_offset);
124 	if (ret < 0) {
125 		dev_err(xpc_part, "uv_bios_mq_watchlist_alloc() failed, "
126 			"ret=%d\n", ret);
127 		return ret;
128 	}
129 #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
130 	ret = sn_mq_watchlist_alloc(mq->mmr_blade, uv_gpa(mq->address),
131 				    mq->order, &mq->mmr_offset);
132 	if (ret < 0) {
133 		dev_err(xpc_part, "sn_mq_watchlist_alloc() failed, ret=%d\n",
134 			ret);
135 		return -EBUSY;
136 	}
137 #else
138 	#error not a supported configuration
139 #endif
140 
141 	mq->watchlist_num = ret;
142 	return 0;
143 }
144 
145 static void
146 xpc_gru_mq_watchlist_free_uv(struct xpc_gru_mq_uv *mq)
147 {
148 	int ret;
149 
150 #if defined CONFIG_X86_64
151 	ret = uv_bios_mq_watchlist_free(mq->mmr_blade, mq->watchlist_num);
152 	BUG_ON(ret != BIOS_STATUS_SUCCESS);
153 #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
154 	ret = sn_mq_watchlist_free(mq->mmr_blade, mq->watchlist_num);
155 	BUG_ON(ret != SALRET_OK);
156 #else
157 	#error not a supported configuration
158 #endif
159 }
160 
161 static struct xpc_gru_mq_uv *
162 xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name,
163 		     irq_handler_t irq_handler)
164 {
165 	enum xp_retval xp_ret;
166 	int ret;
167 	int nid;
168 	int pg_order;
169 	struct page *page;
170 	struct xpc_gru_mq_uv *mq;
171 
172 	mq = kmalloc(sizeof(struct xpc_gru_mq_uv), GFP_KERNEL);
173 	if (mq == NULL) {
174 		dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() "
175 			"a xpc_gru_mq_uv structure\n");
176 		ret = -ENOMEM;
177 		goto out_1;
178 	}
179 
180 	pg_order = get_order(mq_size);
181 	mq->order = pg_order + PAGE_SHIFT;
182 	mq_size = 1UL << mq->order;
183 
184 	mq->mmr_blade = uv_cpu_to_blade_id(cpu);
185 
186 	nid = cpu_to_node(cpu);
187 	page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
188 				pg_order);
189 	if (page == NULL) {
190 		dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d "
191 			"bytes of memory on nid=%d for GRU mq\n", mq_size, nid);
192 		ret = -ENOMEM;
193 		goto out_2;
194 	}
195 	mq->address = page_address(page);
196 
197 	ret = gru_create_message_queue(mq->address, mq_size);
198 	if (ret != 0) {
199 		dev_err(xpc_part, "gru_create_message_queue() returned "
200 			"error=%d\n", ret);
201 		ret = -EINVAL;
202 		goto out_3;
203 	}
204 
205 	/* enable generation of irq when GRU mq operation occurs to this mq */
206 	ret = xpc_gru_mq_watchlist_alloc_uv(mq);
207 	if (ret != 0)
208 		goto out_3;
209 
210 	ret = xpc_get_gru_mq_irq_uv(mq, cpu, irq_name);
211 	if (ret != 0)
212 		goto out_4;
213 
214 	ret = request_irq(mq->irq, irq_handler, 0, irq_name, NULL);
215 	if (ret != 0) {
216 		dev_err(xpc_part, "request_irq(irq=%d) returned error=%d\n",
217 			mq->irq, ret);
218 		goto out_5;
219 	}
220 
221 	/* allow other partitions to access this GRU mq */
222 	xp_ret = xp_expand_memprotect(xp_pa(mq->address), mq_size);
223 	if (xp_ret != xpSuccess) {
224 		ret = -EACCES;
225 		goto out_6;
226 	}
227 
228 	return mq;
229 
230 	/* something went wrong */
231 out_6:
232 	free_irq(mq->irq, NULL);
233 out_5:
234 	xpc_release_gru_mq_irq_uv(mq);
235 out_4:
236 	xpc_gru_mq_watchlist_free_uv(mq);
237 out_3:
238 	free_pages((unsigned long)mq->address, pg_order);
239 out_2:
240 	kfree(mq);
241 out_1:
242 	return ERR_PTR(ret);
243 }
244 
245 static void
246 xpc_destroy_gru_mq_uv(struct xpc_gru_mq_uv *mq)
247 {
248 	unsigned int mq_size;
249 	int pg_order;
250 	int ret;
251 
252 	/* disallow other partitions to access GRU mq */
253 	mq_size = 1UL << mq->order;
254 	ret = xp_restrict_memprotect(xp_pa(mq->address), mq_size);
255 	BUG_ON(ret != xpSuccess);
256 
257 	/* unregister irq handler and release mq irq/vector mapping */
258 	free_irq(mq->irq, NULL);
259 	xpc_release_gru_mq_irq_uv(mq);
260 
261 	/* disable generation of irq when GRU mq op occurs to this mq */
262 	xpc_gru_mq_watchlist_free_uv(mq);
263 
264 	pg_order = mq->order - PAGE_SHIFT;
265 	free_pages((unsigned long)mq->address, pg_order);
266 
267 	kfree(mq);
268 }
269 
270 static enum xp_retval
271 xpc_send_gru_msg(unsigned long mq_gpa, void *msg, size_t msg_size)
272 {
273 	enum xp_retval xp_ret;
274 	int ret;
275 
276 	while (1) {
277 		ret = gru_send_message_gpa(mq_gpa, msg, msg_size);
278 		if (ret == MQE_OK) {
279 			xp_ret = xpSuccess;
280 			break;
281 		}
282 
283 		if (ret == MQE_QUEUE_FULL) {
284 			dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
285 				"error=MQE_QUEUE_FULL\n");
286 			/* !!! handle QLimit reached; delay & try again */
287 			/* ??? Do we add a limit to the number of retries? */
288 			(void)msleep_interruptible(10);
289 		} else if (ret == MQE_CONGESTION) {
290 			dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
291 				"error=MQE_CONGESTION\n");
292 			/* !!! handle LB Overflow; simply try again */
293 			/* ??? Do we add a limit to the number of retries? */
294 		} else {
295 			/* !!! Currently this is MQE_UNEXPECTED_CB_ERR */
296 			dev_err(xpc_chan, "gru_send_message_gpa() returned "
297 				"error=%d\n", ret);
298 			xp_ret = xpGruSendMqError;
299 			break;
300 		}
301 	}
302 	return xp_ret;
303 }
304 
305 static void
306 xpc_process_activate_IRQ_rcvd_uv(void)
307 {
308 	unsigned long irq_flags;
309 	short partid;
310 	struct xpc_partition *part;
311 	u8 act_state_req;
312 
313 	DBUG_ON(xpc_activate_IRQ_rcvd == 0);
314 
315 	spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
316 	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
317 		part = &xpc_partitions[partid];
318 
319 		if (part->sn.uv.act_state_req == 0)
320 			continue;
321 
322 		xpc_activate_IRQ_rcvd--;
323 		BUG_ON(xpc_activate_IRQ_rcvd < 0);
324 
325 		act_state_req = part->sn.uv.act_state_req;
326 		part->sn.uv.act_state_req = 0;
327 		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
328 
329 		if (act_state_req == XPC_P_ASR_ACTIVATE_UV) {
330 			if (part->act_state == XPC_P_AS_INACTIVE)
331 				xpc_activate_partition(part);
332 			else if (part->act_state == XPC_P_AS_DEACTIVATING)
333 				XPC_DEACTIVATE_PARTITION(part, xpReactivating);
334 
335 		} else if (act_state_req == XPC_P_ASR_REACTIVATE_UV) {
336 			if (part->act_state == XPC_P_AS_INACTIVE)
337 				xpc_activate_partition(part);
338 			else
339 				XPC_DEACTIVATE_PARTITION(part, xpReactivating);
340 
341 		} else if (act_state_req == XPC_P_ASR_DEACTIVATE_UV) {
342 			XPC_DEACTIVATE_PARTITION(part, part->sn.uv.reason);
343 
344 		} else {
345 			BUG();
346 		}
347 
348 		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
349 		if (xpc_activate_IRQ_rcvd == 0)
350 			break;
351 	}
352 	spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
353 
354 }
355 
356 static void
357 xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
358 			      struct xpc_activate_mq_msghdr_uv *msg_hdr,
359 			      int *wakeup_hb_checker)
360 {
361 	unsigned long irq_flags;
362 	struct xpc_partition_uv *part_uv = &part->sn.uv;
363 	struct xpc_openclose_args *args;
364 
365 	part_uv->remote_act_state = msg_hdr->act_state;
366 
367 	switch (msg_hdr->type) {
368 	case XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV:
369 		/* syncing of remote_act_state was just done above */
370 		break;
371 
372 	case XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV: {
373 		struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
374 
375 		msg = container_of(msg_hdr,
376 				   struct xpc_activate_mq_msg_heartbeat_req_uv,
377 				   hdr);
378 		part_uv->heartbeat = msg->heartbeat;
379 		break;
380 	}
381 	case XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV: {
382 		struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
383 
384 		msg = container_of(msg_hdr,
385 				   struct xpc_activate_mq_msg_heartbeat_req_uv,
386 				   hdr);
387 		part_uv->heartbeat = msg->heartbeat;
388 
389 		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
390 		part_uv->flags |= XPC_P_HEARTBEAT_OFFLINE_UV;
391 		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
392 		break;
393 	}
394 	case XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV: {
395 		struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
396 
397 		msg = container_of(msg_hdr,
398 				   struct xpc_activate_mq_msg_heartbeat_req_uv,
399 				   hdr);
400 		part_uv->heartbeat = msg->heartbeat;
401 
402 		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
403 		part_uv->flags &= ~XPC_P_HEARTBEAT_OFFLINE_UV;
404 		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
405 		break;
406 	}
407 	case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: {
408 		struct xpc_activate_mq_msg_activate_req_uv *msg;
409 
410 		/*
411 		 * ??? Do we deal here with ts_jiffies being different
412 		 * ??? if act_state != XPC_P_AS_INACTIVE instead of
413 		 * ??? below?
414 		 */
415 		msg = container_of(msg_hdr, struct
416 				   xpc_activate_mq_msg_activate_req_uv, hdr);
417 
418 		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
419 		if (part_uv->act_state_req == 0)
420 			xpc_activate_IRQ_rcvd++;
421 		part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV;
422 		part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */
423 		part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies;
424 		part_uv->remote_activate_mq_gpa = msg->activate_mq_gpa;
425 		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
426 
427 		(*wakeup_hb_checker)++;
428 		break;
429 	}
430 	case XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV: {
431 		struct xpc_activate_mq_msg_deactivate_req_uv *msg;
432 
433 		msg = container_of(msg_hdr, struct
434 				   xpc_activate_mq_msg_deactivate_req_uv, hdr);
435 
436 		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
437 		if (part_uv->act_state_req == 0)
438 			xpc_activate_IRQ_rcvd++;
439 		part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
440 		part_uv->reason = msg->reason;
441 		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
442 
443 		(*wakeup_hb_checker)++;
444 		return;
445 	}
446 	case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV: {
447 		struct xpc_activate_mq_msg_chctl_closerequest_uv *msg;
448 
449 		msg = container_of(msg_hdr, struct
450 				   xpc_activate_mq_msg_chctl_closerequest_uv,
451 				   hdr);
452 		args = &part->remote_openclose_args[msg->ch_number];
453 		args->reason = msg->reason;
454 
455 		spin_lock_irqsave(&part->chctl_lock, irq_flags);
456 		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREQUEST;
457 		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
458 
459 		xpc_wakeup_channel_mgr(part);
460 		break;
461 	}
462 	case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV: {
463 		struct xpc_activate_mq_msg_chctl_closereply_uv *msg;
464 
465 		msg = container_of(msg_hdr, struct
466 				   xpc_activate_mq_msg_chctl_closereply_uv,
467 				   hdr);
468 
469 		spin_lock_irqsave(&part->chctl_lock, irq_flags);
470 		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREPLY;
471 		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
472 
473 		xpc_wakeup_channel_mgr(part);
474 		break;
475 	}
476 	case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV: {
477 		struct xpc_activate_mq_msg_chctl_openrequest_uv *msg;
478 
479 		msg = container_of(msg_hdr, struct
480 				   xpc_activate_mq_msg_chctl_openrequest_uv,
481 				   hdr);
482 		args = &part->remote_openclose_args[msg->ch_number];
483 		args->entry_size = msg->entry_size;
484 		args->local_nentries = msg->local_nentries;
485 
486 		spin_lock_irqsave(&part->chctl_lock, irq_flags);
487 		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREQUEST;
488 		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
489 
490 		xpc_wakeup_channel_mgr(part);
491 		break;
492 	}
493 	case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV: {
494 		struct xpc_activate_mq_msg_chctl_openreply_uv *msg;
495 
496 		msg = container_of(msg_hdr, struct
497 				   xpc_activate_mq_msg_chctl_openreply_uv, hdr);
498 		args = &part->remote_openclose_args[msg->ch_number];
499 		args->remote_nentries = msg->remote_nentries;
500 		args->local_nentries = msg->local_nentries;
501 		args->local_msgqueue_pa = msg->local_notify_mq_gpa;
502 
503 		spin_lock_irqsave(&part->chctl_lock, irq_flags);
504 		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREPLY;
505 		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
506 
507 		xpc_wakeup_channel_mgr(part);
508 		break;
509 	}
510 	case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV:
511 		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
512 		part_uv->flags |= XPC_P_ENGAGED_UV;
513 		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
514 		break;
515 
516 	case XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV:
517 		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
518 		part_uv->flags &= ~XPC_P_ENGAGED_UV;
519 		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
520 		break;
521 
522 	default:
523 		dev_err(xpc_part, "received unknown activate_mq msg type=%d "
524 			"from partition=%d\n", msg_hdr->type, XPC_PARTID(part));
525 
526 		/* get hb checker to deactivate from the remote partition */
527 		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
528 		if (part_uv->act_state_req == 0)
529 			xpc_activate_IRQ_rcvd++;
530 		part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
531 		part_uv->reason = xpBadMsgType;
532 		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
533 
534 		(*wakeup_hb_checker)++;
535 		return;
536 	}
537 
538 	if (msg_hdr->rp_ts_jiffies != part->remote_rp_ts_jiffies &&
539 	    part->remote_rp_ts_jiffies != 0) {
540 		/*
541 		 * ??? Does what we do here need to be sensitive to
542 		 * ??? act_state or remote_act_state?
543 		 */
544 		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
545 		if (part_uv->act_state_req == 0)
546 			xpc_activate_IRQ_rcvd++;
547 		part_uv->act_state_req = XPC_P_ASR_REACTIVATE_UV;
548 		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
549 
550 		(*wakeup_hb_checker)++;
551 	}
552 }
553 
554 static irqreturn_t
555 xpc_handle_activate_IRQ_uv(int irq, void *dev_id)
556 {
557 	struct xpc_activate_mq_msghdr_uv *msg_hdr;
558 	short partid;
559 	struct xpc_partition *part;
560 	int wakeup_hb_checker = 0;
561 
562 	while (1) {
563 		msg_hdr = gru_get_next_message(xpc_activate_mq_uv->address);
564 		if (msg_hdr == NULL)
565 			break;
566 
567 		partid = msg_hdr->partid;
568 		if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
569 			dev_err(xpc_part, "xpc_handle_activate_IRQ_uv() "
570 				"received invalid partid=0x%x in message\n",
571 				partid);
572 		} else {
573 			part = &xpc_partitions[partid];
574 			if (xpc_part_ref(part)) {
575 				xpc_handle_activate_mq_msg_uv(part, msg_hdr,
576 							    &wakeup_hb_checker);
577 				xpc_part_deref(part);
578 			}
579 		}
580 
581 		gru_free_message(xpc_activate_mq_uv->address, msg_hdr);
582 	}
583 
584 	if (wakeup_hb_checker)
585 		wake_up_interruptible(&xpc_activate_IRQ_wq);
586 
587 	return IRQ_HANDLED;
588 }
589 
590 static enum xp_retval
591 xpc_send_activate_IRQ_uv(struct xpc_partition *part, void *msg, size_t msg_size,
592 			 int msg_type)
593 {
594 	struct xpc_activate_mq_msghdr_uv *msg_hdr = msg;
595 
596 	DBUG_ON(msg_size > XPC_ACTIVATE_MSG_SIZE_UV);
597 
598 	msg_hdr->type = msg_type;
599 	msg_hdr->partid = XPC_PARTID(part);
600 	msg_hdr->act_state = part->act_state;
601 	msg_hdr->rp_ts_jiffies = xpc_rsvd_page->ts_jiffies;
602 
603 	/* ??? Is holding a spin_lock (ch->lock) during this call a bad idea? */
604 	return xpc_send_gru_msg(part->sn.uv.remote_activate_mq_gpa, msg,
605 				msg_size);
606 }
607 
608 static void
609 xpc_send_activate_IRQ_part_uv(struct xpc_partition *part, void *msg,
610 			      size_t msg_size, int msg_type)
611 {
612 	enum xp_retval ret;
613 
614 	ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
615 	if (unlikely(ret != xpSuccess))
616 		XPC_DEACTIVATE_PARTITION(part, ret);
617 }
618 
619 static void
620 xpc_send_activate_IRQ_ch_uv(struct xpc_channel *ch, unsigned long *irq_flags,
621 			 void *msg, size_t msg_size, int msg_type)
622 {
623 	struct xpc_partition *part = &xpc_partitions[ch->number];
624 	enum xp_retval ret;
625 
626 	ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
627 	if (unlikely(ret != xpSuccess)) {
628 		if (irq_flags != NULL)
629 			spin_unlock_irqrestore(&ch->lock, *irq_flags);
630 
631 		XPC_DEACTIVATE_PARTITION(part, ret);
632 
633 		if (irq_flags != NULL)
634 			spin_lock_irqsave(&ch->lock, *irq_flags);
635 	}
636 }
637 
638 static void
639 xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req)
640 {
641 	unsigned long irq_flags;
642 	struct xpc_partition_uv *part_uv = &part->sn.uv;
643 
644 	/*
645 	 * !!! Make our side think that the remote partition sent an activate
646 	 * !!! message our way by doing what the activate IRQ handler would
647 	 * !!! do had one really been sent.
648 	 */
649 
650 	spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
651 	if (part_uv->act_state_req == 0)
652 		xpc_activate_IRQ_rcvd++;
653 	part_uv->act_state_req = act_state_req;
654 	spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
655 
656 	wake_up_interruptible(&xpc_activate_IRQ_wq);
657 }
658 
659 static enum xp_retval
660 xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa,
661 				  size_t *len)
662 {
663 	s64 status;
664 	enum xp_retval ret;
665 
666 #if defined CONFIG_X86_64
667 	status = uv_bios_reserved_page_pa((u64)buf, cookie, (u64 *)rp_pa,
668 					  (u64 *)len);
669 	if (status == BIOS_STATUS_SUCCESS)
670 		ret = xpSuccess;
671 	else if (status == BIOS_STATUS_MORE_PASSES)
672 		ret = xpNeedMoreInfo;
673 	else
674 		ret = xpBiosError;
675 
676 #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
677 	status = sn_partition_reserved_page_pa((u64)buf, cookie, rp_pa, len);
678 	if (status == SALRET_OK)
679 		ret = xpSuccess;
680 	else if (status == SALRET_MORE_PASSES)
681 		ret = xpNeedMoreInfo;
682 	else
683 		ret = xpSalError;
684 
685 #else
686 	#error not a supported configuration
687 #endif
688 
689 	return ret;
690 }
691 
692 static int
693 xpc_setup_rsvd_page_sn_uv(struct xpc_rsvd_page *rp)
694 {
695 	rp->sn.activate_mq_gpa = uv_gpa(xpc_activate_mq_uv->address);
696 	return 0;
697 }
698 
699 static void
700 xpc_send_heartbeat_uv(int msg_type)
701 {
702 	short partid;
703 	struct xpc_partition *part;
704 	struct xpc_activate_mq_msg_heartbeat_req_uv msg;
705 
706 	/*
707 	 * !!! On uv we're broadcasting a heartbeat message every 5 seconds.
708 	 * !!! Whereas on sn2 we're bte_copy'ng the heartbeat info every 20
709 	 * !!! seconds. This is an increase in numalink traffic.
710 	 * ??? Is this good?
711 	 */
712 
713 	msg.heartbeat = atomic64_inc_return(&xpc_heartbeat_uv);
714 
715 	partid = find_first_bit(xpc_heartbeating_to_mask_uv,
716 				XP_MAX_NPARTITIONS_UV);
717 
718 	while (partid < XP_MAX_NPARTITIONS_UV) {
719 		part = &xpc_partitions[partid];
720 
721 		xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
722 					      msg_type);
723 
724 		partid = find_next_bit(xpc_heartbeating_to_mask_uv,
725 				       XP_MAX_NPARTITIONS_UV, partid + 1);
726 	}
727 }
728 
729 static void
730 xpc_increment_heartbeat_uv(void)
731 {
732 	xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV);
733 }
734 
735 static void
736 xpc_offline_heartbeat_uv(void)
737 {
738 	xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV);
739 }
740 
741 static void
742 xpc_online_heartbeat_uv(void)
743 {
744 	xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV);
745 }
746 
747 static void
748 xpc_heartbeat_init_uv(void)
749 {
750 	atomic64_set(&xpc_heartbeat_uv, 0);
751 	bitmap_zero(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
752 	xpc_heartbeating_to_mask = &xpc_heartbeating_to_mask_uv[0];
753 }
754 
755 static void
756 xpc_heartbeat_exit_uv(void)
757 {
758 	xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV);
759 }
760 
761 static enum xp_retval
762 xpc_get_remote_heartbeat_uv(struct xpc_partition *part)
763 {
764 	struct xpc_partition_uv *part_uv = &part->sn.uv;
765 	enum xp_retval ret = xpNoHeartbeat;
766 
767 	if (part_uv->remote_act_state != XPC_P_AS_INACTIVE &&
768 	    part_uv->remote_act_state != XPC_P_AS_DEACTIVATING) {
769 
770 		if (part_uv->heartbeat != part->last_heartbeat ||
771 		    (part_uv->flags & XPC_P_HEARTBEAT_OFFLINE_UV)) {
772 
773 			part->last_heartbeat = part_uv->heartbeat;
774 			ret = xpSuccess;
775 		}
776 	}
777 	return ret;
778 }
779 
780 static void
781 xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
782 				    unsigned long remote_rp_gpa, int nasid)
783 {
784 	short partid = remote_rp->SAL_partid;
785 	struct xpc_partition *part = &xpc_partitions[partid];
786 	struct xpc_activate_mq_msg_activate_req_uv msg;
787 
788 	part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */
789 	part->remote_rp_ts_jiffies = remote_rp->ts_jiffies;
790 	part->sn.uv.remote_activate_mq_gpa = remote_rp->sn.activate_mq_gpa;
791 
792 	/*
793 	 * ??? Is it a good idea to make this conditional on what is
794 	 * ??? potentially stale state information?
795 	 */
796 	if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) {
797 		msg.rp_gpa = uv_gpa(xpc_rsvd_page);
798 		msg.activate_mq_gpa = xpc_rsvd_page->sn.activate_mq_gpa;
799 		xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
800 					   XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV);
801 	}
802 
803 	if (part->act_state == XPC_P_AS_INACTIVE)
804 		xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
805 }
806 
807 static void
808 xpc_request_partition_reactivation_uv(struct xpc_partition *part)
809 {
810 	xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
811 }
812 
813 static void
814 xpc_request_partition_deactivation_uv(struct xpc_partition *part)
815 {
816 	struct xpc_activate_mq_msg_deactivate_req_uv msg;
817 
818 	/*
819 	 * ??? Is it a good idea to make this conditional on what is
820 	 * ??? potentially stale state information?
821 	 */
822 	if (part->sn.uv.remote_act_state != XPC_P_AS_DEACTIVATING &&
823 	    part->sn.uv.remote_act_state != XPC_P_AS_INACTIVE) {
824 
825 		msg.reason = part->reason;
826 		xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
827 					 XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV);
828 	}
829 }
830 
831 static void
832 xpc_cancel_partition_deactivation_request_uv(struct xpc_partition *part)
833 {
834 	/* nothing needs to be done */
835 	return;
836 }
837 
838 static void
839 xpc_init_fifo_uv(struct xpc_fifo_head_uv *head)
840 {
841 	head->first = NULL;
842 	head->last = NULL;
843 	spin_lock_init(&head->lock);
844 	head->n_entries = 0;
845 }
846 
847 static void *
848 xpc_get_fifo_entry_uv(struct xpc_fifo_head_uv *head)
849 {
850 	unsigned long irq_flags;
851 	struct xpc_fifo_entry_uv *first;
852 
853 	spin_lock_irqsave(&head->lock, irq_flags);
854 	first = head->first;
855 	if (head->first != NULL) {
856 		head->first = first->next;
857 		if (head->first == NULL)
858 			head->last = NULL;
859 	}
860 	head->n_entries++;
861 	spin_unlock_irqrestore(&head->lock, irq_flags);
862 	first->next = NULL;
863 	return first;
864 }
865 
866 static void
867 xpc_put_fifo_entry_uv(struct xpc_fifo_head_uv *head,
868 		      struct xpc_fifo_entry_uv *last)
869 {
870 	unsigned long irq_flags;
871 
872 	last->next = NULL;
873 	spin_lock_irqsave(&head->lock, irq_flags);
874 	if (head->last != NULL)
875 		head->last->next = last;
876 	else
877 		head->first = last;
878 	head->last = last;
879 	head->n_entries--;
880 	BUG_ON(head->n_entries < 0);
881 	spin_unlock_irqrestore(&head->lock, irq_flags);
882 }
883 
884 static int
885 xpc_n_of_fifo_entries_uv(struct xpc_fifo_head_uv *head)
886 {
887 	return head->n_entries;
888 }
889 
890 /*
891  * Setup the channel structures that are uv specific.
892  */
893 static enum xp_retval
894 xpc_setup_ch_structures_sn_uv(struct xpc_partition *part)
895 {
896 	struct xpc_channel_uv *ch_uv;
897 	int ch_number;
898 
899 	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
900 		ch_uv = &part->channels[ch_number].sn.uv;
901 
902 		xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
903 		xpc_init_fifo_uv(&ch_uv->recv_msg_list);
904 	}
905 
906 	return xpSuccess;
907 }
908 
909 /*
910  * Teardown the channel structures that are uv specific.
911  */
912 static void
913 xpc_teardown_ch_structures_sn_uv(struct xpc_partition *part)
914 {
915 	/* nothing needs to be done */
916 	return;
917 }
918 
919 static enum xp_retval
920 xpc_make_first_contact_uv(struct xpc_partition *part)
921 {
922 	struct xpc_activate_mq_msg_uv msg;
923 
924 	/*
925 	 * We send a sync msg to get the remote partition's remote_act_state
926 	 * updated to our current act_state which at this point should
927 	 * be XPC_P_AS_ACTIVATING.
928 	 */
929 	xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
930 				      XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV);
931 
932 	while (part->sn.uv.remote_act_state != XPC_P_AS_ACTIVATING) {
933 
934 		dev_dbg(xpc_part, "waiting to make first contact with "
935 			"partition %d\n", XPC_PARTID(part));
936 
937 		/* wait a 1/4 of a second or so */
938 		(void)msleep_interruptible(250);
939 
940 		if (part->act_state == XPC_P_AS_DEACTIVATING)
941 			return part->reason;
942 	}
943 
944 	return xpSuccess;
945 }
946 
947 static u64
948 xpc_get_chctl_all_flags_uv(struct xpc_partition *part)
949 {
950 	unsigned long irq_flags;
951 	union xpc_channel_ctl_flags chctl;
952 
953 	spin_lock_irqsave(&part->chctl_lock, irq_flags);
954 	chctl = part->chctl;
955 	if (chctl.all_flags != 0)
956 		part->chctl.all_flags = 0;
957 
958 	spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
959 	return chctl.all_flags;
960 }
961 
962 static enum xp_retval
963 xpc_allocate_send_msg_slot_uv(struct xpc_channel *ch)
964 {
965 	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
966 	struct xpc_send_msg_slot_uv *msg_slot;
967 	unsigned long irq_flags;
968 	int nentries;
969 	int entry;
970 	size_t nbytes;
971 
972 	for (nentries = ch->local_nentries; nentries > 0; nentries--) {
973 		nbytes = nentries * sizeof(struct xpc_send_msg_slot_uv);
974 		ch_uv->send_msg_slots = kzalloc(nbytes, GFP_KERNEL);
975 		if (ch_uv->send_msg_slots == NULL)
976 			continue;
977 
978 		for (entry = 0; entry < nentries; entry++) {
979 			msg_slot = &ch_uv->send_msg_slots[entry];
980 
981 			msg_slot->msg_slot_number = entry;
982 			xpc_put_fifo_entry_uv(&ch_uv->msg_slot_free_list,
983 					      &msg_slot->next);
984 		}
985 
986 		spin_lock_irqsave(&ch->lock, irq_flags);
987 		if (nentries < ch->local_nentries)
988 			ch->local_nentries = nentries;
989 		spin_unlock_irqrestore(&ch->lock, irq_flags);
990 		return xpSuccess;
991 	}
992 
993 	return xpNoMemory;
994 }
995 
996 static enum xp_retval
997 xpc_allocate_recv_msg_slot_uv(struct xpc_channel *ch)
998 {
999 	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1000 	struct xpc_notify_mq_msg_uv *msg_slot;
1001 	unsigned long irq_flags;
1002 	int nentries;
1003 	int entry;
1004 	size_t nbytes;
1005 
1006 	for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
1007 		nbytes = nentries * ch->entry_size;
1008 		ch_uv->recv_msg_slots = kzalloc(nbytes, GFP_KERNEL);
1009 		if (ch_uv->recv_msg_slots == NULL)
1010 			continue;
1011 
1012 		for (entry = 0; entry < nentries; entry++) {
1013 			msg_slot = ch_uv->recv_msg_slots +
1014 			    entry * ch->entry_size;
1015 
1016 			msg_slot->hdr.msg_slot_number = entry;
1017 		}
1018 
1019 		spin_lock_irqsave(&ch->lock, irq_flags);
1020 		if (nentries < ch->remote_nentries)
1021 			ch->remote_nentries = nentries;
1022 		spin_unlock_irqrestore(&ch->lock, irq_flags);
1023 		return xpSuccess;
1024 	}
1025 
1026 	return xpNoMemory;
1027 }
1028 
1029 /*
1030  * Allocate msg_slots associated with the channel.
1031  */
1032 static enum xp_retval
1033 xpc_setup_msg_structures_uv(struct xpc_channel *ch)
1034 {
1035 	static enum xp_retval ret;
1036 	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1037 
1038 	DBUG_ON(ch->flags & XPC_C_SETUP);
1039 
1040 	ret = xpc_allocate_send_msg_slot_uv(ch);
1041 	if (ret == xpSuccess) {
1042 
1043 		ret = xpc_allocate_recv_msg_slot_uv(ch);
1044 		if (ret != xpSuccess) {
1045 			kfree(ch_uv->send_msg_slots);
1046 			xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
1047 		}
1048 	}
1049 	return ret;
1050 }
1051 
1052 /*
1053  * Free up msg_slots and clear other stuff that were setup for the specified
1054  * channel.
1055  */
1056 static void
1057 xpc_teardown_msg_structures_uv(struct xpc_channel *ch)
1058 {
1059 	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1060 
1061 	DBUG_ON(!spin_is_locked(&ch->lock));
1062 
1063 	ch_uv->remote_notify_mq_gpa = 0;
1064 
1065 	if (ch->flags & XPC_C_SETUP) {
1066 		xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
1067 		kfree(ch_uv->send_msg_slots);
1068 		xpc_init_fifo_uv(&ch_uv->recv_msg_list);
1069 		kfree(ch_uv->recv_msg_slots);
1070 	}
1071 }
1072 
1073 static void
1074 xpc_send_chctl_closerequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1075 {
1076 	struct xpc_activate_mq_msg_chctl_closerequest_uv msg;
1077 
1078 	msg.ch_number = ch->number;
1079 	msg.reason = ch->reason;
1080 	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1081 				    XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV);
1082 }
1083 
1084 static void
1085 xpc_send_chctl_closereply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1086 {
1087 	struct xpc_activate_mq_msg_chctl_closereply_uv msg;
1088 
1089 	msg.ch_number = ch->number;
1090 	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1091 				    XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV);
1092 }
1093 
1094 static void
1095 xpc_send_chctl_openrequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1096 {
1097 	struct xpc_activate_mq_msg_chctl_openrequest_uv msg;
1098 
1099 	msg.ch_number = ch->number;
1100 	msg.entry_size = ch->entry_size;
1101 	msg.local_nentries = ch->local_nentries;
1102 	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1103 				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV);
1104 }
1105 
1106 static void
1107 xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1108 {
1109 	struct xpc_activate_mq_msg_chctl_openreply_uv msg;
1110 
1111 	msg.ch_number = ch->number;
1112 	msg.local_nentries = ch->local_nentries;
1113 	msg.remote_nentries = ch->remote_nentries;
1114 	msg.local_notify_mq_gpa = uv_gpa(xpc_notify_mq_uv);
1115 	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1116 				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV);
1117 }
1118 
1119 static void
1120 xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number)
1121 {
1122 	unsigned long irq_flags;
1123 
1124 	spin_lock_irqsave(&part->chctl_lock, irq_flags);
1125 	part->chctl.flags[ch_number] |= XPC_CHCTL_MSGREQUEST;
1126 	spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
1127 
1128 	xpc_wakeup_channel_mgr(part);
1129 }
1130 
1131 static void
1132 xpc_save_remote_msgqueue_pa_uv(struct xpc_channel *ch,
1133 			       unsigned long msgqueue_pa)
1134 {
1135 	ch->sn.uv.remote_notify_mq_gpa = msgqueue_pa;
1136 }
1137 
1138 static void
1139 xpc_indicate_partition_engaged_uv(struct xpc_partition *part)
1140 {
1141 	struct xpc_activate_mq_msg_uv msg;
1142 
1143 	xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
1144 				      XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV);
1145 }
1146 
1147 static void
1148 xpc_indicate_partition_disengaged_uv(struct xpc_partition *part)
1149 {
1150 	struct xpc_activate_mq_msg_uv msg;
1151 
1152 	xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
1153 				      XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV);
1154 }
1155 
1156 static void
1157 xpc_assume_partition_disengaged_uv(short partid)
1158 {
1159 	struct xpc_partition_uv *part_uv = &xpc_partitions[partid].sn.uv;
1160 	unsigned long irq_flags;
1161 
1162 	spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
1163 	part_uv->flags &= ~XPC_P_ENGAGED_UV;
1164 	spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
1165 }
1166 
1167 static int
1168 xpc_partition_engaged_uv(short partid)
1169 {
1170 	return (xpc_partitions[partid].sn.uv.flags & XPC_P_ENGAGED_UV) != 0;
1171 }
1172 
1173 static int
1174 xpc_any_partition_engaged_uv(void)
1175 {
1176 	struct xpc_partition_uv *part_uv;
1177 	short partid;
1178 
1179 	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
1180 		part_uv = &xpc_partitions[partid].sn.uv;
1181 		if ((part_uv->flags & XPC_P_ENGAGED_UV) != 0)
1182 			return 1;
1183 	}
1184 	return 0;
1185 }
1186 
1187 static enum xp_retval
1188 xpc_allocate_msg_slot_uv(struct xpc_channel *ch, u32 flags,
1189 			 struct xpc_send_msg_slot_uv **address_of_msg_slot)
1190 {
1191 	enum xp_retval ret;
1192 	struct xpc_send_msg_slot_uv *msg_slot;
1193 	struct xpc_fifo_entry_uv *entry;
1194 
1195 	while (1) {
1196 		entry = xpc_get_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list);
1197 		if (entry != NULL)
1198 			break;
1199 
1200 		if (flags & XPC_NOWAIT)
1201 			return xpNoWait;
1202 
1203 		ret = xpc_allocate_msg_wait(ch);
1204 		if (ret != xpInterrupted && ret != xpTimeout)
1205 			return ret;
1206 	}
1207 
1208 	msg_slot = container_of(entry, struct xpc_send_msg_slot_uv, next);
1209 	*address_of_msg_slot = msg_slot;
1210 	return xpSuccess;
1211 }
1212 
1213 static void
1214 xpc_free_msg_slot_uv(struct xpc_channel *ch,
1215 		     struct xpc_send_msg_slot_uv *msg_slot)
1216 {
1217 	xpc_put_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list, &msg_slot->next);
1218 
1219 	/* wakeup anyone waiting for a free msg slot */
1220 	if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
1221 		wake_up(&ch->msg_allocate_wq);
1222 }
1223 
1224 static void
1225 xpc_notify_sender_uv(struct xpc_channel *ch,
1226 		     struct xpc_send_msg_slot_uv *msg_slot,
1227 		     enum xp_retval reason)
1228 {
1229 	xpc_notify_func func = msg_slot->func;
1230 
1231 	if (func != NULL && cmpxchg(&msg_slot->func, func, NULL) == func) {
1232 
1233 		atomic_dec(&ch->n_to_notify);
1234 
1235 		dev_dbg(xpc_chan, "msg_slot->func() called, msg_slot=0x%p "
1236 			"msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
1237 			msg_slot->msg_slot_number, ch->partid, ch->number);
1238 
1239 		func(reason, ch->partid, ch->number, msg_slot->key);
1240 
1241 		dev_dbg(xpc_chan, "msg_slot->func() returned, msg_slot=0x%p "
1242 			"msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
1243 			msg_slot->msg_slot_number, ch->partid, ch->number);
1244 	}
1245 }
1246 
1247 static void
1248 xpc_handle_notify_mq_ack_uv(struct xpc_channel *ch,
1249 			    struct xpc_notify_mq_msg_uv *msg)
1250 {
1251 	struct xpc_send_msg_slot_uv *msg_slot;
1252 	int entry = msg->hdr.msg_slot_number % ch->local_nentries;
1253 
1254 	msg_slot = &ch->sn.uv.send_msg_slots[entry];
1255 
1256 	BUG_ON(msg_slot->msg_slot_number != msg->hdr.msg_slot_number);
1257 	msg_slot->msg_slot_number += ch->local_nentries;
1258 
1259 	if (msg_slot->func != NULL)
1260 		xpc_notify_sender_uv(ch, msg_slot, xpMsgDelivered);
1261 
1262 	xpc_free_msg_slot_uv(ch, msg_slot);
1263 }
1264 
1265 static void
1266 xpc_handle_notify_mq_msg_uv(struct xpc_partition *part,
1267 			    struct xpc_notify_mq_msg_uv *msg)
1268 {
1269 	struct xpc_partition_uv *part_uv = &part->sn.uv;
1270 	struct xpc_channel *ch;
1271 	struct xpc_channel_uv *ch_uv;
1272 	struct xpc_notify_mq_msg_uv *msg_slot;
1273 	unsigned long irq_flags;
1274 	int ch_number = msg->hdr.ch_number;
1275 
1276 	if (unlikely(ch_number >= part->nchannels)) {
1277 		dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received invalid "
1278 			"channel number=0x%x in message from partid=%d\n",
1279 			ch_number, XPC_PARTID(part));
1280 
1281 		/* get hb checker to deactivate from the remote partition */
1282 		spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
1283 		if (part_uv->act_state_req == 0)
1284 			xpc_activate_IRQ_rcvd++;
1285 		part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
1286 		part_uv->reason = xpBadChannelNumber;
1287 		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
1288 
1289 		wake_up_interruptible(&xpc_activate_IRQ_wq);
1290 		return;
1291 	}
1292 
1293 	ch = &part->channels[ch_number];
1294 	xpc_msgqueue_ref(ch);
1295 
1296 	if (!(ch->flags & XPC_C_CONNECTED)) {
1297 		xpc_msgqueue_deref(ch);
1298 		return;
1299 	}
1300 
1301 	/* see if we're really dealing with an ACK for a previously sent msg */
1302 	if (msg->hdr.size == 0) {
1303 		xpc_handle_notify_mq_ack_uv(ch, msg);
1304 		xpc_msgqueue_deref(ch);
1305 		return;
1306 	}
1307 
1308 	/* we're dealing with a normal message sent via the notify_mq */
1309 	ch_uv = &ch->sn.uv;
1310 
1311 	msg_slot = ch_uv->recv_msg_slots +
1312 	    (msg->hdr.msg_slot_number % ch->remote_nentries) * ch->entry_size;
1313 
1314 	BUG_ON(msg->hdr.msg_slot_number != msg_slot->hdr.msg_slot_number);
1315 	BUG_ON(msg_slot->hdr.size != 0);
1316 
1317 	memcpy(msg_slot, msg, msg->hdr.size);
1318 
1319 	xpc_put_fifo_entry_uv(&ch_uv->recv_msg_list, &msg_slot->hdr.u.next);
1320 
1321 	if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) {
1322 		/*
1323 		 * If there is an existing idle kthread get it to deliver
1324 		 * the payload, otherwise we'll have to get the channel mgr
1325 		 * for this partition to create a kthread to do the delivery.
1326 		 */
1327 		if (atomic_read(&ch->kthreads_idle) > 0)
1328 			wake_up_nr(&ch->idle_wq, 1);
1329 		else
1330 			xpc_send_chctl_local_msgrequest_uv(part, ch->number);
1331 	}
1332 	xpc_msgqueue_deref(ch);
1333 }
1334 
1335 static irqreturn_t
1336 xpc_handle_notify_IRQ_uv(int irq, void *dev_id)
1337 {
1338 	struct xpc_notify_mq_msg_uv *msg;
1339 	short partid;
1340 	struct xpc_partition *part;
1341 
1342 	while ((msg = gru_get_next_message(xpc_notify_mq_uv)) != NULL) {
1343 
1344 		partid = msg->hdr.partid;
1345 		if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
1346 			dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received "
1347 				"invalid partid=0x%x in message\n", partid);
1348 		} else {
1349 			part = &xpc_partitions[partid];
1350 
1351 			if (xpc_part_ref(part)) {
1352 				xpc_handle_notify_mq_msg_uv(part, msg);
1353 				xpc_part_deref(part);
1354 			}
1355 		}
1356 
1357 		gru_free_message(xpc_notify_mq_uv, msg);
1358 	}
1359 
1360 	return IRQ_HANDLED;
1361 }
1362 
1363 static int
1364 xpc_n_of_deliverable_payloads_uv(struct xpc_channel *ch)
1365 {
1366 	return xpc_n_of_fifo_entries_uv(&ch->sn.uv.recv_msg_list);
1367 }
1368 
1369 static void
1370 xpc_process_msg_chctl_flags_uv(struct xpc_partition *part, int ch_number)
1371 {
1372 	struct xpc_channel *ch = &part->channels[ch_number];
1373 	int ndeliverable_payloads;
1374 
1375 	xpc_msgqueue_ref(ch);
1376 
1377 	ndeliverable_payloads = xpc_n_of_deliverable_payloads_uv(ch);
1378 
1379 	if (ndeliverable_payloads > 0 &&
1380 	    (ch->flags & XPC_C_CONNECTED) &&
1381 	    (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)) {
1382 
1383 		xpc_activate_kthreads(ch, ndeliverable_payloads);
1384 	}
1385 
1386 	xpc_msgqueue_deref(ch);
1387 }
1388 
1389 static enum xp_retval
1390 xpc_send_payload_uv(struct xpc_channel *ch, u32 flags, void *payload,
1391 		    u16 payload_size, u8 notify_type, xpc_notify_func func,
1392 		    void *key)
1393 {
1394 	enum xp_retval ret = xpSuccess;
1395 	struct xpc_send_msg_slot_uv *msg_slot = NULL;
1396 	struct xpc_notify_mq_msg_uv *msg;
1397 	u8 msg_buffer[XPC_NOTIFY_MSG_SIZE_UV];
1398 	size_t msg_size;
1399 
1400 	DBUG_ON(notify_type != XPC_N_CALL);
1401 
1402 	msg_size = sizeof(struct xpc_notify_mq_msghdr_uv) + payload_size;
1403 	if (msg_size > ch->entry_size)
1404 		return xpPayloadTooBig;
1405 
1406 	xpc_msgqueue_ref(ch);
1407 
1408 	if (ch->flags & XPC_C_DISCONNECTING) {
1409 		ret = ch->reason;
1410 		goto out_1;
1411 	}
1412 	if (!(ch->flags & XPC_C_CONNECTED)) {
1413 		ret = xpNotConnected;
1414 		goto out_1;
1415 	}
1416 
1417 	ret = xpc_allocate_msg_slot_uv(ch, flags, &msg_slot);
1418 	if (ret != xpSuccess)
1419 		goto out_1;
1420 
1421 	if (func != NULL) {
1422 		atomic_inc(&ch->n_to_notify);
1423 
1424 		msg_slot->key = key;
1425 		smp_wmb(); /* a non-NULL func must hit memory after the key */
1426 		msg_slot->func = func;
1427 
1428 		if (ch->flags & XPC_C_DISCONNECTING) {
1429 			ret = ch->reason;
1430 			goto out_2;
1431 		}
1432 	}
1433 
1434 	msg = (struct xpc_notify_mq_msg_uv *)&msg_buffer;
1435 	msg->hdr.partid = xp_partition_id;
1436 	msg->hdr.ch_number = ch->number;
1437 	msg->hdr.size = msg_size;
1438 	msg->hdr.msg_slot_number = msg_slot->msg_slot_number;
1439 	memcpy(&msg->payload, payload, payload_size);
1440 
1441 	ret = xpc_send_gru_msg(ch->sn.uv.remote_notify_mq_gpa, msg, msg_size);
1442 	if (ret == xpSuccess)
1443 		goto out_1;
1444 
1445 	XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
1446 out_2:
1447 	if (func != NULL) {
1448 		/*
1449 		 * Try to NULL the msg_slot's func field. If we fail, then
1450 		 * xpc_notify_senders_of_disconnect_uv() beat us to it, in which
1451 		 * case we need to pretend we succeeded to send the message
1452 		 * since the user will get a callout for the disconnect error
1453 		 * by xpc_notify_senders_of_disconnect_uv(), and to also get an
1454 		 * error returned here will confuse them. Additionally, since
1455 		 * in this case the channel is being disconnected we don't need
1456 		 * to put the the msg_slot back on the free list.
1457 		 */
1458 		if (cmpxchg(&msg_slot->func, func, NULL) != func) {
1459 			ret = xpSuccess;
1460 			goto out_1;
1461 		}
1462 
1463 		msg_slot->key = NULL;
1464 		atomic_dec(&ch->n_to_notify);
1465 	}
1466 	xpc_free_msg_slot_uv(ch, msg_slot);
1467 out_1:
1468 	xpc_msgqueue_deref(ch);
1469 	return ret;
1470 }
1471 
1472 /*
1473  * Tell the callers of xpc_send_notify() that the status of their payloads
1474  * is unknown because the channel is now disconnecting.
1475  *
1476  * We don't worry about putting these msg_slots on the free list since the
1477  * msg_slots themselves are about to be kfree'd.
1478  */
1479 static void
1480 xpc_notify_senders_of_disconnect_uv(struct xpc_channel *ch)
1481 {
1482 	struct xpc_send_msg_slot_uv *msg_slot;
1483 	int entry;
1484 
1485 	DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING));
1486 
1487 	for (entry = 0; entry < ch->local_nentries; entry++) {
1488 
1489 		if (atomic_read(&ch->n_to_notify) == 0)
1490 			break;
1491 
1492 		msg_slot = &ch->sn.uv.send_msg_slots[entry];
1493 		if (msg_slot->func != NULL)
1494 			xpc_notify_sender_uv(ch, msg_slot, ch->reason);
1495 	}
1496 }
1497 
1498 /*
1499  * Get the next deliverable message's payload.
1500  */
1501 static void *
1502 xpc_get_deliverable_payload_uv(struct xpc_channel *ch)
1503 {
1504 	struct xpc_fifo_entry_uv *entry;
1505 	struct xpc_notify_mq_msg_uv *msg;
1506 	void *payload = NULL;
1507 
1508 	if (!(ch->flags & XPC_C_DISCONNECTING)) {
1509 		entry = xpc_get_fifo_entry_uv(&ch->sn.uv.recv_msg_list);
1510 		if (entry != NULL) {
1511 			msg = container_of(entry, struct xpc_notify_mq_msg_uv,
1512 					   hdr.u.next);
1513 			payload = &msg->payload;
1514 		}
1515 	}
1516 	return payload;
1517 }
1518 
1519 static void
1520 xpc_received_payload_uv(struct xpc_channel *ch, void *payload)
1521 {
1522 	struct xpc_notify_mq_msg_uv *msg;
1523 	enum xp_retval ret;
1524 
1525 	msg = container_of(payload, struct xpc_notify_mq_msg_uv, payload);
1526 
1527 	/* return an ACK to the sender of this message */
1528 
1529 	msg->hdr.partid = xp_partition_id;
1530 	msg->hdr.size = 0;	/* size of zero indicates this is an ACK */
1531 
1532 	ret = xpc_send_gru_msg(ch->sn.uv.remote_notify_mq_gpa, msg,
1533 			       sizeof(struct xpc_notify_mq_msghdr_uv));
1534 	if (ret != xpSuccess)
1535 		XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
1536 
1537 	msg->hdr.msg_slot_number += ch->remote_nentries;
1538 }
1539 
1540 int
1541 xpc_init_uv(void)
1542 {
1543 	xpc_setup_partitions_sn = xpc_setup_partitions_sn_uv;
1544 	xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv;
1545 	xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv;
1546 	xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_uv;
1547 	xpc_increment_heartbeat = xpc_increment_heartbeat_uv;
1548 	xpc_offline_heartbeat = xpc_offline_heartbeat_uv;
1549 	xpc_online_heartbeat = xpc_online_heartbeat_uv;
1550 	xpc_heartbeat_init = xpc_heartbeat_init_uv;
1551 	xpc_heartbeat_exit = xpc_heartbeat_exit_uv;
1552 	xpc_get_remote_heartbeat = xpc_get_remote_heartbeat_uv;
1553 
1554 	xpc_request_partition_activation = xpc_request_partition_activation_uv;
1555 	xpc_request_partition_reactivation =
1556 	    xpc_request_partition_reactivation_uv;
1557 	xpc_request_partition_deactivation =
1558 	    xpc_request_partition_deactivation_uv;
1559 	xpc_cancel_partition_deactivation_request =
1560 	    xpc_cancel_partition_deactivation_request_uv;
1561 
1562 	xpc_setup_ch_structures_sn = xpc_setup_ch_structures_sn_uv;
1563 	xpc_teardown_ch_structures_sn = xpc_teardown_ch_structures_sn_uv;
1564 
1565 	xpc_make_first_contact = xpc_make_first_contact_uv;
1566 
1567 	xpc_get_chctl_all_flags = xpc_get_chctl_all_flags_uv;
1568 	xpc_send_chctl_closerequest = xpc_send_chctl_closerequest_uv;
1569 	xpc_send_chctl_closereply = xpc_send_chctl_closereply_uv;
1570 	xpc_send_chctl_openrequest = xpc_send_chctl_openrequest_uv;
1571 	xpc_send_chctl_openreply = xpc_send_chctl_openreply_uv;
1572 
1573 	xpc_save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv;
1574 
1575 	xpc_setup_msg_structures = xpc_setup_msg_structures_uv;
1576 	xpc_teardown_msg_structures = xpc_teardown_msg_structures_uv;
1577 
1578 	xpc_indicate_partition_engaged = xpc_indicate_partition_engaged_uv;
1579 	xpc_indicate_partition_disengaged =
1580 	    xpc_indicate_partition_disengaged_uv;
1581 	xpc_assume_partition_disengaged = xpc_assume_partition_disengaged_uv;
1582 	xpc_partition_engaged = xpc_partition_engaged_uv;
1583 	xpc_any_partition_engaged = xpc_any_partition_engaged_uv;
1584 
1585 	xpc_n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv;
1586 	xpc_process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv;
1587 	xpc_send_payload = xpc_send_payload_uv;
1588 	xpc_notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv;
1589 	xpc_get_deliverable_payload = xpc_get_deliverable_payload_uv;
1590 	xpc_received_payload = xpc_received_payload_uv;
1591 
1592 	if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) {
1593 		dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n",
1594 			XPC_MSG_HDR_MAX_SIZE);
1595 		return -E2BIG;
1596 	}
1597 
1598 	xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, 0,
1599 						  XPC_ACTIVATE_IRQ_NAME,
1600 						  xpc_handle_activate_IRQ_uv);
1601 	if (IS_ERR(xpc_activate_mq_uv))
1602 		return PTR_ERR(xpc_activate_mq_uv);
1603 
1604 	xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, 0,
1605 						XPC_NOTIFY_IRQ_NAME,
1606 						xpc_handle_notify_IRQ_uv);
1607 	if (IS_ERR(xpc_notify_mq_uv)) {
1608 		xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
1609 		return PTR_ERR(xpc_notify_mq_uv);
1610 	}
1611 
1612 	return 0;
1613 }
1614 
1615 void
1616 xpc_exit_uv(void)
1617 {
1618 	xpc_destroy_gru_mq_uv(xpc_notify_mq_uv);
1619 	xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
1620 }
1621