1 /* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved. 7 */ 8 9 /* 10 * Cross Partition Communication (XPC) partition support. 11 * 12 * This is the part of XPC that detects the presence/absence of 13 * other partitions. It provides a heartbeat and monitors the 14 * heartbeats of other partitions. 15 * 16 */ 17 18 #include <linux/device.h> 19 #include <linux/hardirq.h> 20 #include "xpc.h" 21 22 /* XPC is exiting flag */ 23 int xpc_exiting; 24 25 /* this partition's reserved page pointers */ 26 struct xpc_rsvd_page *xpc_rsvd_page; 27 static unsigned long *xpc_part_nasids; 28 unsigned long *xpc_mach_nasids; 29 30 static int xpc_nasid_mask_nbytes; /* #of bytes in nasid mask */ 31 int xpc_nasid_mask_nlongs; /* #of longs in nasid mask */ 32 33 struct xpc_partition *xpc_partitions; 34 35 /* 36 * Guarantee that the kmalloc'd memory is cacheline aligned. 37 */ 38 void * 39 xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) 40 { 41 /* see if kmalloc will give us cachline aligned memory by default */ 42 *base = kmalloc(size, flags); 43 if (*base == NULL) 44 return NULL; 45 46 if ((u64)*base == L1_CACHE_ALIGN((u64)*base)) 47 return *base; 48 49 kfree(*base); 50 51 /* nope, we'll have to do it ourselves */ 52 *base = kmalloc(size + L1_CACHE_BYTES, flags); 53 if (*base == NULL) 54 return NULL; 55 56 return (void *)L1_CACHE_ALIGN((u64)*base); 57 } 58 59 /* 60 * Given a nasid, get the physical address of the partition's reserved page 61 * for that nasid. This function returns 0 on any error. 62 */ 63 static unsigned long 64 xpc_get_rsvd_page_pa(int nasid) 65 { 66 enum xp_retval ret; 67 u64 cookie = 0; 68 unsigned long rp_pa = nasid; /* seed with nasid */ 69 size_t len = 0; 70 size_t buf_len = 0; 71 void *buf = buf; 72 void *buf_base = NULL; 73 74 while (1) { 75 76 /* !!! rp_pa will need to be _gpa on UV. 77 * ??? So do we save it into the architecture specific parts 78 * ??? of the xpc_partition structure? Do we rename this 79 * ??? function or have two versions? Rename rp_pa for UV to 80 * ??? rp_gpa? 81 */ 82 ret = xpc_get_partition_rsvd_page_pa(buf, &cookie, &rp_pa, 83 &len); 84 85 dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, " 86 "address=0x%016lx, len=0x%016lx\n", ret, 87 (unsigned long)cookie, rp_pa, len); 88 89 if (ret != xpNeedMoreInfo) 90 break; 91 92 /* !!! L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */ 93 if (L1_CACHE_ALIGN(len) > buf_len) { 94 kfree(buf_base); 95 buf_len = L1_CACHE_ALIGN(len); 96 buf = xpc_kmalloc_cacheline_aligned(buf_len, GFP_KERNEL, 97 &buf_base); 98 if (buf_base == NULL) { 99 dev_err(xpc_part, "unable to kmalloc " 100 "len=0x%016lx\n", buf_len); 101 ret = xpNoMemory; 102 break; 103 } 104 } 105 106 ret = xp_remote_memcpy(xp_pa(buf), rp_pa, buf_len); 107 if (ret != xpSuccess) { 108 dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret); 109 break; 110 } 111 } 112 113 kfree(buf_base); 114 115 if (ret != xpSuccess) 116 rp_pa = 0; 117 118 dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa); 119 return rp_pa; 120 } 121 122 /* 123 * Fill the partition reserved page with the information needed by 124 * other partitions to discover we are alive and establish initial 125 * communications. 126 */ 127 int 128 xpc_setup_rsvd_page(void) 129 { 130 int ret; 131 struct xpc_rsvd_page *rp; 132 unsigned long rp_pa; 133 unsigned long new_ts_jiffies; 134 135 /* get the local reserved page's address */ 136 137 preempt_disable(); 138 rp_pa = xpc_get_rsvd_page_pa(xp_cpu_to_nasid(smp_processor_id())); 139 preempt_enable(); 140 if (rp_pa == 0) { 141 dev_err(xpc_part, "SAL failed to locate the reserved page\n"); 142 return -ESRCH; 143 } 144 rp = (struct xpc_rsvd_page *)__va(rp_pa); 145 146 if (rp->SAL_version < 3) { 147 /* SAL_versions < 3 had a SAL_partid defined as a u8 */ 148 rp->SAL_partid &= 0xff; 149 } 150 BUG_ON(rp->SAL_partid != xp_partition_id); 151 152 if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) { 153 dev_err(xpc_part, "the reserved page's partid of %d is outside " 154 "supported range (< 0 || >= %d)\n", rp->SAL_partid, 155 xp_max_npartitions); 156 return -EINVAL; 157 } 158 159 rp->version = XPC_RP_VERSION; 160 rp->max_npartitions = xp_max_npartitions; 161 162 /* establish the actual sizes of the nasid masks */ 163 if (rp->SAL_version == 1) { 164 /* SAL_version 1 didn't set the nasids_size field */ 165 rp->SAL_nasids_size = 128; 166 } 167 xpc_nasid_mask_nbytes = rp->SAL_nasids_size; 168 xpc_nasid_mask_nlongs = BITS_TO_LONGS(rp->SAL_nasids_size * 169 BITS_PER_BYTE); 170 171 /* setup the pointers to the various items in the reserved page */ 172 xpc_part_nasids = XPC_RP_PART_NASIDS(rp); 173 xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp); 174 175 ret = xpc_setup_rsvd_page_sn(rp); 176 if (ret != 0) 177 return ret; 178 179 /* 180 * Set timestamp of when reserved page was setup by XPC. 181 * This signifies to the remote partition that our reserved 182 * page is initialized. 183 */ 184 new_ts_jiffies = jiffies; 185 if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies) 186 new_ts_jiffies++; 187 rp->ts_jiffies = new_ts_jiffies; 188 189 xpc_rsvd_page = rp; 190 return 0; 191 } 192 193 void 194 xpc_teardown_rsvd_page(void) 195 { 196 /* a zero timestamp indicates our rsvd page is not initialized */ 197 xpc_rsvd_page->ts_jiffies = 0; 198 } 199 200 /* 201 * Get a copy of a portion of the remote partition's rsvd page. 202 * 203 * remote_rp points to a buffer that is cacheline aligned for BTE copies and 204 * is large enough to contain a copy of their reserved page header and 205 * part_nasids mask. 206 */ 207 enum xp_retval 208 xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids, 209 struct xpc_rsvd_page *remote_rp, unsigned long *remote_rp_pa) 210 { 211 int l; 212 enum xp_retval ret; 213 214 /* get the reserved page's physical address */ 215 216 *remote_rp_pa = xpc_get_rsvd_page_pa(nasid); 217 if (*remote_rp_pa == 0) 218 return xpNoRsvdPageAddr; 219 220 /* pull over the reserved page header and part_nasids mask */ 221 ret = xp_remote_memcpy(xp_pa(remote_rp), *remote_rp_pa, 222 XPC_RP_HEADER_SIZE + xpc_nasid_mask_nbytes); 223 if (ret != xpSuccess) 224 return ret; 225 226 if (discovered_nasids != NULL) { 227 unsigned long *remote_part_nasids = 228 XPC_RP_PART_NASIDS(remote_rp); 229 230 for (l = 0; l < xpc_nasid_mask_nlongs; l++) 231 discovered_nasids[l] |= remote_part_nasids[l]; 232 } 233 234 /* zero timestamp indicates the reserved page has not been setup */ 235 if (remote_rp->ts_jiffies == 0) 236 return xpRsvdPageNotSet; 237 238 if (XPC_VERSION_MAJOR(remote_rp->version) != 239 XPC_VERSION_MAJOR(XPC_RP_VERSION)) { 240 return xpBadVersion; 241 } 242 243 /* check that both remote and local partids are valid for each side */ 244 if (remote_rp->SAL_partid < 0 || 245 remote_rp->SAL_partid >= xp_max_npartitions || 246 remote_rp->max_npartitions <= xp_partition_id) { 247 return xpInvalidPartid; 248 } 249 250 if (remote_rp->SAL_partid == xp_partition_id) 251 return xpLocalPartid; 252 253 return xpSuccess; 254 } 255 256 /* 257 * See if the other side has responded to a partition deactivate request 258 * from us. Though we requested the remote partition to deactivate with regard 259 * to us, we really only need to wait for the other side to disengage from us. 260 */ 261 int 262 xpc_partition_disengaged(struct xpc_partition *part) 263 { 264 short partid = XPC_PARTID(part); 265 int disengaged; 266 267 disengaged = !xpc_partition_engaged(partid); 268 if (part->disengage_timeout) { 269 if (!disengaged) { 270 if (time_is_after_jiffies(part->disengage_timeout)) { 271 /* timelimit hasn't been reached yet */ 272 return 0; 273 } 274 275 /* 276 * Other side hasn't responded to our deactivate 277 * request in a timely fashion, so assume it's dead. 278 */ 279 280 dev_info(xpc_part, "deactivate request to remote " 281 "partition %d timed out\n", partid); 282 xpc_disengage_timedout = 1; 283 xpc_assume_partition_disengaged(partid); 284 disengaged = 1; 285 } 286 part->disengage_timeout = 0; 287 288 /* cancel the timer function, provided it's not us */ 289 if (!in_interrupt()) 290 del_singleshot_timer_sync(&part->disengage_timer); 291 292 DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING && 293 part->act_state != XPC_P_AS_INACTIVE); 294 if (part->act_state != XPC_P_AS_INACTIVE) 295 xpc_wakeup_channel_mgr(part); 296 297 xpc_cancel_partition_deactivation_request(part); 298 } 299 return disengaged; 300 } 301 302 /* 303 * Mark specified partition as active. 304 */ 305 enum xp_retval 306 xpc_mark_partition_active(struct xpc_partition *part) 307 { 308 unsigned long irq_flags; 309 enum xp_retval ret; 310 311 dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part)); 312 313 spin_lock_irqsave(&part->act_lock, irq_flags); 314 if (part->act_state == XPC_P_AS_ACTIVATING) { 315 part->act_state = XPC_P_AS_ACTIVE; 316 ret = xpSuccess; 317 } else { 318 DBUG_ON(part->reason == xpSuccess); 319 ret = part->reason; 320 } 321 spin_unlock_irqrestore(&part->act_lock, irq_flags); 322 323 return ret; 324 } 325 326 /* 327 * Start the process of deactivating the specified partition. 328 */ 329 void 330 xpc_deactivate_partition(const int line, struct xpc_partition *part, 331 enum xp_retval reason) 332 { 333 unsigned long irq_flags; 334 335 spin_lock_irqsave(&part->act_lock, irq_flags); 336 337 if (part->act_state == XPC_P_AS_INACTIVE) { 338 XPC_SET_REASON(part, reason, line); 339 spin_unlock_irqrestore(&part->act_lock, irq_flags); 340 if (reason == xpReactivating) { 341 /* we interrupt ourselves to reactivate partition */ 342 xpc_request_partition_reactivation(part); 343 } 344 return; 345 } 346 if (part->act_state == XPC_P_AS_DEACTIVATING) { 347 if ((part->reason == xpUnloading && reason != xpUnloading) || 348 reason == xpReactivating) { 349 XPC_SET_REASON(part, reason, line); 350 } 351 spin_unlock_irqrestore(&part->act_lock, irq_flags); 352 return; 353 } 354 355 part->act_state = XPC_P_AS_DEACTIVATING; 356 XPC_SET_REASON(part, reason, line); 357 358 spin_unlock_irqrestore(&part->act_lock, irq_flags); 359 360 /* ask remote partition to deactivate with regard to us */ 361 xpc_request_partition_deactivation(part); 362 363 /* set a timelimit on the disengage phase of the deactivation request */ 364 part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ); 365 part->disengage_timer.expires = part->disengage_timeout; 366 add_timer(&part->disengage_timer); 367 368 dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", 369 XPC_PARTID(part), reason); 370 371 xpc_partition_going_down(part, reason); 372 } 373 374 /* 375 * Mark specified partition as inactive. 376 */ 377 void 378 xpc_mark_partition_inactive(struct xpc_partition *part) 379 { 380 unsigned long irq_flags; 381 382 dev_dbg(xpc_part, "setting partition %d to INACTIVE\n", 383 XPC_PARTID(part)); 384 385 spin_lock_irqsave(&part->act_lock, irq_flags); 386 part->act_state = XPC_P_AS_INACTIVE; 387 spin_unlock_irqrestore(&part->act_lock, irq_flags); 388 part->remote_rp_pa = 0; 389 } 390 391 /* 392 * SAL has provided a partition and machine mask. The partition mask 393 * contains a bit for each even nasid in our partition. The machine 394 * mask contains a bit for each even nasid in the entire machine. 395 * 396 * Using those two bit arrays, we can determine which nasids are 397 * known in the machine. Each should also have a reserved page 398 * initialized if they are available for partitioning. 399 */ 400 void 401 xpc_discovery(void) 402 { 403 void *remote_rp_base; 404 struct xpc_rsvd_page *remote_rp; 405 unsigned long remote_rp_pa; 406 int region; 407 int region_size; 408 int max_regions; 409 int nasid; 410 struct xpc_rsvd_page *rp; 411 unsigned long *discovered_nasids; 412 enum xp_retval ret; 413 414 remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE + 415 xpc_nasid_mask_nbytes, 416 GFP_KERNEL, &remote_rp_base); 417 if (remote_rp == NULL) 418 return; 419 420 discovered_nasids = kzalloc(sizeof(long) * xpc_nasid_mask_nlongs, 421 GFP_KERNEL); 422 if (discovered_nasids == NULL) { 423 kfree(remote_rp_base); 424 return; 425 } 426 427 rp = (struct xpc_rsvd_page *)xpc_rsvd_page; 428 429 /* 430 * The term 'region' in this context refers to the minimum number of 431 * nodes that can comprise an access protection grouping. The access 432 * protection is in regards to memory, IOI and IPI. 433 */ 434 max_regions = 64; 435 region_size = xp_region_size; 436 437 switch (region_size) { 438 case 128: 439 max_regions *= 2; 440 case 64: 441 max_regions *= 2; 442 case 32: 443 max_regions *= 2; 444 region_size = 16; 445 DBUG_ON(!is_shub2()); 446 } 447 448 for (region = 0; region < max_regions; region++) { 449 450 if (xpc_exiting) 451 break; 452 453 dev_dbg(xpc_part, "searching region %d\n", region); 454 455 for (nasid = (region * region_size * 2); 456 nasid < ((region + 1) * region_size * 2); nasid += 2) { 457 458 if (xpc_exiting) 459 break; 460 461 dev_dbg(xpc_part, "checking nasid %d\n", nasid); 462 463 if (test_bit(nasid / 2, xpc_part_nasids)) { 464 dev_dbg(xpc_part, "PROM indicates Nasid %d is " 465 "part of the local partition; skipping " 466 "region\n", nasid); 467 break; 468 } 469 470 if (!(test_bit(nasid / 2, xpc_mach_nasids))) { 471 dev_dbg(xpc_part, "PROM indicates Nasid %d was " 472 "not on Numa-Link network at reset\n", 473 nasid); 474 continue; 475 } 476 477 if (test_bit(nasid / 2, discovered_nasids)) { 478 dev_dbg(xpc_part, "Nasid %d is part of a " 479 "partition which was previously " 480 "discovered\n", nasid); 481 continue; 482 } 483 484 /* pull over the rsvd page header & part_nasids mask */ 485 486 ret = xpc_get_remote_rp(nasid, discovered_nasids, 487 remote_rp, &remote_rp_pa); 488 if (ret != xpSuccess) { 489 dev_dbg(xpc_part, "unable to get reserved page " 490 "from nasid %d, reason=%d\n", nasid, 491 ret); 492 493 if (ret == xpLocalPartid) 494 break; 495 496 continue; 497 } 498 499 xpc_request_partition_activation(remote_rp, 500 remote_rp_pa, nasid); 501 } 502 } 503 504 kfree(discovered_nasids); 505 kfree(remote_rp_base); 506 } 507 508 /* 509 * Given a partid, get the nasids owned by that partition from the 510 * remote partition's reserved page. 511 */ 512 enum xp_retval 513 xpc_initiate_partid_to_nasids(short partid, void *nasid_mask) 514 { 515 struct xpc_partition *part; 516 unsigned long part_nasid_pa; 517 518 part = &xpc_partitions[partid]; 519 if (part->remote_rp_pa == 0) 520 return xpPartitionDown; 521 522 memset(nasid_mask, 0, xpc_nasid_mask_nbytes); 523 524 part_nasid_pa = (unsigned long)XPC_RP_PART_NASIDS(part->remote_rp_pa); 525 526 return xp_remote_memcpy(xp_pa(nasid_mask), part_nasid_pa, 527 xpc_nasid_mask_nbytes); 528 } 529