1 /*
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License.  See the file "COPYING" in the main directory of this archive
4  * for more details.
5  *
6  * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
7  */
8 
9 /*
10  * Cross Partition Communication (XPC) partition support.
11  *
12  *	This is the part of XPC that detects the presence/absence of
13  *	other partitions. It provides a heartbeat and monitors the
14  *	heartbeats of other partitions.
15  *
16  */
17 
18 #include <linux/device.h>
19 #include <linux/hardirq.h>
20 #include <linux/slab.h>
21 #include "xpc.h"
22 #include <asm/uv/uv_hub.h>
23 
24 /* XPC is exiting flag */
25 int xpc_exiting;
26 
27 /* this partition's reserved page pointers */
28 struct xpc_rsvd_page *xpc_rsvd_page;
29 static unsigned long *xpc_part_nasids;
30 unsigned long *xpc_mach_nasids;
31 
32 static int xpc_nasid_mask_nbytes;	/* #of bytes in nasid mask */
33 int xpc_nasid_mask_nlongs;	/* #of longs in nasid mask */
34 
35 struct xpc_partition *xpc_partitions;
36 
37 /*
38  * Guarantee that the kmalloc'd memory is cacheline aligned.
39  */
40 void *
41 xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
42 {
43 	/* see if kmalloc will give us cachline aligned memory by default */
44 	*base = kmalloc(size, flags);
45 	if (*base == NULL)
46 		return NULL;
47 
48 	if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
49 		return *base;
50 
51 	kfree(*base);
52 
53 	/* nope, we'll have to do it ourselves */
54 	*base = kmalloc(size + L1_CACHE_BYTES, flags);
55 	if (*base == NULL)
56 		return NULL;
57 
58 	return (void *)L1_CACHE_ALIGN((u64)*base);
59 }
60 
61 /*
62  * Given a nasid, get the physical address of the  partition's reserved page
63  * for that nasid. This function returns 0 on any error.
64  */
65 static unsigned long
66 xpc_get_rsvd_page_pa(int nasid)
67 {
68 	enum xp_retval ret;
69 	u64 cookie = 0;
70 	unsigned long rp_pa = nasid;	/* seed with nasid */
71 	size_t len = 0;
72 	size_t buf_len = 0;
73 	void *buf = buf;
74 	void *buf_base = NULL;
75 	enum xp_retval (*get_partition_rsvd_page_pa)
76 		(void *, u64 *, unsigned long *, size_t *) =
77 		xpc_arch_ops.get_partition_rsvd_page_pa;
78 
79 	while (1) {
80 
81 		/* !!! rp_pa will need to be _gpa on UV.
82 		 * ??? So do we save it into the architecture specific parts
83 		 * ??? of the xpc_partition structure? Do we rename this
84 		 * ??? function or have two versions? Rename rp_pa for UV to
85 		 * ??? rp_gpa?
86 		 */
87 		ret = get_partition_rsvd_page_pa(buf, &cookie, &rp_pa, &len);
88 
89 		dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, "
90 			"address=0x%016lx, len=0x%016lx\n", ret,
91 			(unsigned long)cookie, rp_pa, len);
92 
93 		if (ret != xpNeedMoreInfo)
94 			break;
95 
96 		/* !!! L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */
97 		if (is_shub())
98 			len = L1_CACHE_ALIGN(len);
99 
100 		if (len > buf_len) {
101 			if (buf_base != NULL)
102 				kfree(buf_base);
103 			buf_len = L1_CACHE_ALIGN(len);
104 			buf = xpc_kmalloc_cacheline_aligned(buf_len, GFP_KERNEL,
105 							    &buf_base);
106 			if (buf_base == NULL) {
107 				dev_err(xpc_part, "unable to kmalloc "
108 					"len=0x%016lx\n", buf_len);
109 				ret = xpNoMemory;
110 				break;
111 			}
112 		}
113 
114 		ret = xp_remote_memcpy(xp_pa(buf), rp_pa, len);
115 		if (ret != xpSuccess) {
116 			dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret);
117 			break;
118 		}
119 	}
120 
121 	kfree(buf_base);
122 
123 	if (ret != xpSuccess)
124 		rp_pa = 0;
125 
126 	dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
127 	return rp_pa;
128 }
129 
130 /*
131  * Fill the partition reserved page with the information needed by
132  * other partitions to discover we are alive and establish initial
133  * communications.
134  */
135 int
136 xpc_setup_rsvd_page(void)
137 {
138 	int ret;
139 	struct xpc_rsvd_page *rp;
140 	unsigned long rp_pa;
141 	unsigned long new_ts_jiffies;
142 
143 	/* get the local reserved page's address */
144 
145 	preempt_disable();
146 	rp_pa = xpc_get_rsvd_page_pa(xp_cpu_to_nasid(smp_processor_id()));
147 	preempt_enable();
148 	if (rp_pa == 0) {
149 		dev_err(xpc_part, "SAL failed to locate the reserved page\n");
150 		return -ESRCH;
151 	}
152 	rp = (struct xpc_rsvd_page *)__va(xp_socket_pa(rp_pa));
153 
154 	if (rp->SAL_version < 3) {
155 		/* SAL_versions < 3 had a SAL_partid defined as a u8 */
156 		rp->SAL_partid &= 0xff;
157 	}
158 	BUG_ON(rp->SAL_partid != xp_partition_id);
159 
160 	if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) {
161 		dev_err(xpc_part, "the reserved page's partid of %d is outside "
162 			"supported range (< 0 || >= %d)\n", rp->SAL_partid,
163 			xp_max_npartitions);
164 		return -EINVAL;
165 	}
166 
167 	rp->version = XPC_RP_VERSION;
168 	rp->max_npartitions = xp_max_npartitions;
169 
170 	/* establish the actual sizes of the nasid masks */
171 	if (rp->SAL_version == 1) {
172 		/* SAL_version 1 didn't set the nasids_size field */
173 		rp->SAL_nasids_size = 128;
174 	}
175 	xpc_nasid_mask_nbytes = rp->SAL_nasids_size;
176 	xpc_nasid_mask_nlongs = BITS_TO_LONGS(rp->SAL_nasids_size *
177 					      BITS_PER_BYTE);
178 
179 	/* setup the pointers to the various items in the reserved page */
180 	xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
181 	xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
182 
183 	ret = xpc_arch_ops.setup_rsvd_page(rp);
184 	if (ret != 0)
185 		return ret;
186 
187 	/*
188 	 * Set timestamp of when reserved page was setup by XPC.
189 	 * This signifies to the remote partition that our reserved
190 	 * page is initialized.
191 	 */
192 	new_ts_jiffies = jiffies;
193 	if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies)
194 		new_ts_jiffies++;
195 	rp->ts_jiffies = new_ts_jiffies;
196 
197 	xpc_rsvd_page = rp;
198 	return 0;
199 }
200 
201 void
202 xpc_teardown_rsvd_page(void)
203 {
204 	/* a zero timestamp indicates our rsvd page is not initialized */
205 	xpc_rsvd_page->ts_jiffies = 0;
206 }
207 
208 /*
209  * Get a copy of a portion of the remote partition's rsvd page.
210  *
211  * remote_rp points to a buffer that is cacheline aligned for BTE copies and
212  * is large enough to contain a copy of their reserved page header and
213  * part_nasids mask.
214  */
215 enum xp_retval
216 xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids,
217 		  struct xpc_rsvd_page *remote_rp, unsigned long *remote_rp_pa)
218 {
219 	int l;
220 	enum xp_retval ret;
221 
222 	/* get the reserved page's physical address */
223 
224 	*remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
225 	if (*remote_rp_pa == 0)
226 		return xpNoRsvdPageAddr;
227 
228 	/* pull over the reserved page header and part_nasids mask */
229 	ret = xp_remote_memcpy(xp_pa(remote_rp), *remote_rp_pa,
230 			       XPC_RP_HEADER_SIZE + xpc_nasid_mask_nbytes);
231 	if (ret != xpSuccess)
232 		return ret;
233 
234 	if (discovered_nasids != NULL) {
235 		unsigned long *remote_part_nasids =
236 		    XPC_RP_PART_NASIDS(remote_rp);
237 
238 		for (l = 0; l < xpc_nasid_mask_nlongs; l++)
239 			discovered_nasids[l] |= remote_part_nasids[l];
240 	}
241 
242 	/* zero timestamp indicates the reserved page has not been setup */
243 	if (remote_rp->ts_jiffies == 0)
244 		return xpRsvdPageNotSet;
245 
246 	if (XPC_VERSION_MAJOR(remote_rp->version) !=
247 	    XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
248 		return xpBadVersion;
249 	}
250 
251 	/* check that both remote and local partids are valid for each side */
252 	if (remote_rp->SAL_partid < 0 ||
253 	    remote_rp->SAL_partid >= xp_max_npartitions ||
254 	    remote_rp->max_npartitions <= xp_partition_id) {
255 		return xpInvalidPartid;
256 	}
257 
258 	if (remote_rp->SAL_partid == xp_partition_id)
259 		return xpLocalPartid;
260 
261 	return xpSuccess;
262 }
263 
264 /*
265  * See if the other side has responded to a partition deactivate request
266  * from us. Though we requested the remote partition to deactivate with regard
267  * to us, we really only need to wait for the other side to disengage from us.
268  */
269 int
270 xpc_partition_disengaged(struct xpc_partition *part)
271 {
272 	short partid = XPC_PARTID(part);
273 	int disengaged;
274 
275 	disengaged = !xpc_arch_ops.partition_engaged(partid);
276 	if (part->disengage_timeout) {
277 		if (!disengaged) {
278 			if (time_is_after_jiffies(part->disengage_timeout)) {
279 				/* timelimit hasn't been reached yet */
280 				return 0;
281 			}
282 
283 			/*
284 			 * Other side hasn't responded to our deactivate
285 			 * request in a timely fashion, so assume it's dead.
286 			 */
287 
288 			dev_info(xpc_part, "deactivate request to remote "
289 				 "partition %d timed out\n", partid);
290 			xpc_disengage_timedout = 1;
291 			xpc_arch_ops.assume_partition_disengaged(partid);
292 			disengaged = 1;
293 		}
294 		part->disengage_timeout = 0;
295 
296 		/* cancel the timer function, provided it's not us */
297 		if (!in_interrupt())
298 			del_singleshot_timer_sync(&part->disengage_timer);
299 
300 		DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING &&
301 			part->act_state != XPC_P_AS_INACTIVE);
302 		if (part->act_state != XPC_P_AS_INACTIVE)
303 			xpc_wakeup_channel_mgr(part);
304 
305 		xpc_arch_ops.cancel_partition_deactivation_request(part);
306 	}
307 	return disengaged;
308 }
309 
310 /*
311  * Mark specified partition as active.
312  */
313 enum xp_retval
314 xpc_mark_partition_active(struct xpc_partition *part)
315 {
316 	unsigned long irq_flags;
317 	enum xp_retval ret;
318 
319 	dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
320 
321 	spin_lock_irqsave(&part->act_lock, irq_flags);
322 	if (part->act_state == XPC_P_AS_ACTIVATING) {
323 		part->act_state = XPC_P_AS_ACTIVE;
324 		ret = xpSuccess;
325 	} else {
326 		DBUG_ON(part->reason == xpSuccess);
327 		ret = part->reason;
328 	}
329 	spin_unlock_irqrestore(&part->act_lock, irq_flags);
330 
331 	return ret;
332 }
333 
334 /*
335  * Start the process of deactivating the specified partition.
336  */
337 void
338 xpc_deactivate_partition(const int line, struct xpc_partition *part,
339 			 enum xp_retval reason)
340 {
341 	unsigned long irq_flags;
342 
343 	spin_lock_irqsave(&part->act_lock, irq_flags);
344 
345 	if (part->act_state == XPC_P_AS_INACTIVE) {
346 		XPC_SET_REASON(part, reason, line);
347 		spin_unlock_irqrestore(&part->act_lock, irq_flags);
348 		if (reason == xpReactivating) {
349 			/* we interrupt ourselves to reactivate partition */
350 			xpc_arch_ops.request_partition_reactivation(part);
351 		}
352 		return;
353 	}
354 	if (part->act_state == XPC_P_AS_DEACTIVATING) {
355 		if ((part->reason == xpUnloading && reason != xpUnloading) ||
356 		    reason == xpReactivating) {
357 			XPC_SET_REASON(part, reason, line);
358 		}
359 		spin_unlock_irqrestore(&part->act_lock, irq_flags);
360 		return;
361 	}
362 
363 	part->act_state = XPC_P_AS_DEACTIVATING;
364 	XPC_SET_REASON(part, reason, line);
365 
366 	spin_unlock_irqrestore(&part->act_lock, irq_flags);
367 
368 	/* ask remote partition to deactivate with regard to us */
369 	xpc_arch_ops.request_partition_deactivation(part);
370 
371 	/* set a timelimit on the disengage phase of the deactivation request */
372 	part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ);
373 	part->disengage_timer.expires = part->disengage_timeout;
374 	add_timer(&part->disengage_timer);
375 
376 	dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
377 		XPC_PARTID(part), reason);
378 
379 	xpc_partition_going_down(part, reason);
380 }
381 
382 /*
383  * Mark specified partition as inactive.
384  */
385 void
386 xpc_mark_partition_inactive(struct xpc_partition *part)
387 {
388 	unsigned long irq_flags;
389 
390 	dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
391 		XPC_PARTID(part));
392 
393 	spin_lock_irqsave(&part->act_lock, irq_flags);
394 	part->act_state = XPC_P_AS_INACTIVE;
395 	spin_unlock_irqrestore(&part->act_lock, irq_flags);
396 	part->remote_rp_pa = 0;
397 }
398 
399 /*
400  * SAL has provided a partition and machine mask.  The partition mask
401  * contains a bit for each even nasid in our partition.  The machine
402  * mask contains a bit for each even nasid in the entire machine.
403  *
404  * Using those two bit arrays, we can determine which nasids are
405  * known in the machine.  Each should also have a reserved page
406  * initialized if they are available for partitioning.
407  */
408 void
409 xpc_discovery(void)
410 {
411 	void *remote_rp_base;
412 	struct xpc_rsvd_page *remote_rp;
413 	unsigned long remote_rp_pa;
414 	int region;
415 	int region_size;
416 	int max_regions;
417 	int nasid;
418 	struct xpc_rsvd_page *rp;
419 	unsigned long *discovered_nasids;
420 	enum xp_retval ret;
421 
422 	remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
423 						  xpc_nasid_mask_nbytes,
424 						  GFP_KERNEL, &remote_rp_base);
425 	if (remote_rp == NULL)
426 		return;
427 
428 	discovered_nasids = kzalloc(sizeof(long) * xpc_nasid_mask_nlongs,
429 				    GFP_KERNEL);
430 	if (discovered_nasids == NULL) {
431 		kfree(remote_rp_base);
432 		return;
433 	}
434 
435 	rp = (struct xpc_rsvd_page *)xpc_rsvd_page;
436 
437 	/*
438 	 * The term 'region' in this context refers to the minimum number of
439 	 * nodes that can comprise an access protection grouping. The access
440 	 * protection is in regards to memory, IOI and IPI.
441 	 */
442 	region_size = xp_region_size;
443 
444 	if (is_uv())
445 		max_regions = 256;
446 	else {
447 		max_regions = 64;
448 
449 		switch (region_size) {
450 		case 128:
451 			max_regions *= 2;
452 		case 64:
453 			max_regions *= 2;
454 		case 32:
455 			max_regions *= 2;
456 			region_size = 16;
457 			DBUG_ON(!is_shub2());
458 		}
459 	}
460 
461 	for (region = 0; region < max_regions; region++) {
462 
463 		if (xpc_exiting)
464 			break;
465 
466 		dev_dbg(xpc_part, "searching region %d\n", region);
467 
468 		for (nasid = (region * region_size * 2);
469 		     nasid < ((region + 1) * region_size * 2); nasid += 2) {
470 
471 			if (xpc_exiting)
472 				break;
473 
474 			dev_dbg(xpc_part, "checking nasid %d\n", nasid);
475 
476 			if (test_bit(nasid / 2, xpc_part_nasids)) {
477 				dev_dbg(xpc_part, "PROM indicates Nasid %d is "
478 					"part of the local partition; skipping "
479 					"region\n", nasid);
480 				break;
481 			}
482 
483 			if (!(test_bit(nasid / 2, xpc_mach_nasids))) {
484 				dev_dbg(xpc_part, "PROM indicates Nasid %d was "
485 					"not on Numa-Link network at reset\n",
486 					nasid);
487 				continue;
488 			}
489 
490 			if (test_bit(nasid / 2, discovered_nasids)) {
491 				dev_dbg(xpc_part, "Nasid %d is part of a "
492 					"partition which was previously "
493 					"discovered\n", nasid);
494 				continue;
495 			}
496 
497 			/* pull over the rsvd page header & part_nasids mask */
498 
499 			ret = xpc_get_remote_rp(nasid, discovered_nasids,
500 						remote_rp, &remote_rp_pa);
501 			if (ret != xpSuccess) {
502 				dev_dbg(xpc_part, "unable to get reserved page "
503 					"from nasid %d, reason=%d\n", nasid,
504 					ret);
505 
506 				if (ret == xpLocalPartid)
507 					break;
508 
509 				continue;
510 			}
511 
512 			xpc_arch_ops.request_partition_activation(remote_rp,
513 							 remote_rp_pa, nasid);
514 		}
515 	}
516 
517 	kfree(discovered_nasids);
518 	kfree(remote_rp_base);
519 }
520 
521 /*
522  * Given a partid, get the nasids owned by that partition from the
523  * remote partition's reserved page.
524  */
525 enum xp_retval
526 xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
527 {
528 	struct xpc_partition *part;
529 	unsigned long part_nasid_pa;
530 
531 	part = &xpc_partitions[partid];
532 	if (part->remote_rp_pa == 0)
533 		return xpPartitionDown;
534 
535 	memset(nasid_mask, 0, xpc_nasid_mask_nbytes);
536 
537 	part_nasid_pa = (unsigned long)XPC_RP_PART_NASIDS(part->remote_rp_pa);
538 
539 	return xp_remote_memcpy(xp_pa(nasid_mask), part_nasid_pa,
540 				xpc_nasid_mask_nbytes);
541 }
542