1 /* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved. 7 */ 8 9 /* 10 * Cross Partition Communication (XPC) partition support. 11 * 12 * This is the part of XPC that detects the presence/absence of 13 * other partitions. It provides a heartbeat and monitors the 14 * heartbeats of other partitions. 15 * 16 */ 17 18 #include <linux/kernel.h> 19 #include <linux/sysctl.h> 20 #include <linux/cache.h> 21 #include <linux/mmzone.h> 22 #include <linux/nodemask.h> 23 #include <asm/uncached.h> 24 #include <asm/sn/bte.h> 25 #include <asm/sn/intr.h> 26 #include <asm/sn/sn_sal.h> 27 #include <asm/sn/nodepda.h> 28 #include <asm/sn/addrs.h> 29 #include "xpc.h" 30 31 /* XPC is exiting flag */ 32 int xpc_exiting; 33 34 /* SH_IPI_ACCESS shub register value on startup */ 35 static u64 xpc_sh1_IPI_access; 36 static u64 xpc_sh2_IPI_access0; 37 static u64 xpc_sh2_IPI_access1; 38 static u64 xpc_sh2_IPI_access2; 39 static u64 xpc_sh2_IPI_access3; 40 41 /* original protection values for each node */ 42 u64 xpc_prot_vec[MAX_NUMNODES]; 43 44 /* this partition's reserved page pointers */ 45 struct xpc_rsvd_page *xpc_rsvd_page; 46 static u64 *xpc_part_nasids; 47 static u64 *xpc_mach_nasids; 48 struct xpc_vars *xpc_vars; 49 struct xpc_vars_part *xpc_vars_part; 50 51 static int xp_nasid_mask_bytes; /* actual size in bytes of nasid mask */ 52 static int xp_nasid_mask_words; /* actual size in words of nasid mask */ 53 54 /* 55 * For performance reasons, each entry of xpc_partitions[] is cacheline 56 * aligned. And xpc_partitions[] is padded with an additional entry at the 57 * end so that the last legitimate entry doesn't share its cacheline with 58 * another variable. 59 */ 60 struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1]; 61 62 /* 63 * Generic buffer used to store a local copy of portions of a remote 64 * partition's reserved page (either its header and part_nasids mask, 65 * or its vars). 66 */ 67 char *xpc_remote_copy_buffer; 68 void *xpc_remote_copy_buffer_base; 69 70 /* 71 * Guarantee that the kmalloc'd memory is cacheline aligned. 72 */ 73 void * 74 xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) 75 { 76 /* see if kmalloc will give us cachline aligned memory by default */ 77 *base = kmalloc(size, flags); 78 if (*base == NULL) 79 return NULL; 80 81 if ((u64)*base == L1_CACHE_ALIGN((u64)*base)) 82 return *base; 83 84 kfree(*base); 85 86 /* nope, we'll have to do it ourselves */ 87 *base = kmalloc(size + L1_CACHE_BYTES, flags); 88 if (*base == NULL) 89 return NULL; 90 91 return (void *)L1_CACHE_ALIGN((u64)*base); 92 } 93 94 /* 95 * Given a nasid, get the physical address of the partition's reserved page 96 * for that nasid. This function returns 0 on any error. 97 */ 98 static u64 99 xpc_get_rsvd_page_pa(int nasid) 100 { 101 bte_result_t bte_res; 102 s64 status; 103 u64 cookie = 0; 104 u64 rp_pa = nasid; /* seed with nasid */ 105 u64 len = 0; 106 u64 buf = buf; 107 u64 buf_len = 0; 108 void *buf_base = NULL; 109 110 while (1) { 111 112 status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa, 113 &len); 114 115 dev_dbg(xpc_part, "SAL returned with status=%li, cookie=" 116 "0x%016lx, address=0x%016lx, len=0x%016lx\n", 117 status, cookie, rp_pa, len); 118 119 if (status != SALRET_MORE_PASSES) 120 break; 121 122 if (L1_CACHE_ALIGN(len) > buf_len) { 123 kfree(buf_base); 124 buf_len = L1_CACHE_ALIGN(len); 125 buf = (u64)xpc_kmalloc_cacheline_aligned(buf_len, 126 GFP_KERNEL, 127 &buf_base); 128 if (buf_base == NULL) { 129 dev_err(xpc_part, "unable to kmalloc " 130 "len=0x%016lx\n", buf_len); 131 status = SALRET_ERROR; 132 break; 133 } 134 } 135 136 bte_res = xp_bte_copy(rp_pa, buf, buf_len, 137 (BTE_NOTIFY | BTE_WACQUIRE), NULL); 138 if (bte_res != BTE_SUCCESS) { 139 dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res); 140 status = SALRET_ERROR; 141 break; 142 } 143 } 144 145 kfree(buf_base); 146 147 if (status != SALRET_OK) 148 rp_pa = 0; 149 150 dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa); 151 return rp_pa; 152 } 153 154 /* 155 * Fill the partition reserved page with the information needed by 156 * other partitions to discover we are alive and establish initial 157 * communications. 158 */ 159 struct xpc_rsvd_page * 160 xpc_rsvd_page_init(void) 161 { 162 struct xpc_rsvd_page *rp; 163 AMO_t *amos_page; 164 u64 rp_pa, nasid_array = 0; 165 int i, ret; 166 167 /* get the local reserved page's address */ 168 169 preempt_disable(); 170 rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id())); 171 preempt_enable(); 172 if (rp_pa == 0) { 173 dev_err(xpc_part, "SAL failed to locate the reserved page\n"); 174 return NULL; 175 } 176 rp = (struct xpc_rsvd_page *)__va(rp_pa); 177 178 if (rp->partid != sn_partition_id) { 179 dev_err(xpc_part, "the reserved page's partid of %d should be " 180 "%d\n", rp->partid, sn_partition_id); 181 return NULL; 182 } 183 184 rp->version = XPC_RP_VERSION; 185 186 /* establish the actual sizes of the nasid masks */ 187 if (rp->SAL_version == 1) { 188 /* SAL_version 1 didn't set the nasids_size field */ 189 rp->nasids_size = 128; 190 } 191 xp_nasid_mask_bytes = rp->nasids_size; 192 xp_nasid_mask_words = xp_nasid_mask_bytes / 8; 193 194 /* setup the pointers to the various items in the reserved page */ 195 xpc_part_nasids = XPC_RP_PART_NASIDS(rp); 196 xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp); 197 xpc_vars = XPC_RP_VARS(rp); 198 xpc_vars_part = XPC_RP_VARS_PART(rp); 199 200 /* 201 * Before clearing xpc_vars, see if a page of AMOs had been previously 202 * allocated. If not we'll need to allocate one and set permissions 203 * so that cross-partition AMOs are allowed. 204 * 205 * The allocated AMO page needs MCA reporting to remain disabled after 206 * XPC has unloaded. To make this work, we keep a copy of the pointer 207 * to this page (i.e., amos_page) in the struct xpc_vars structure, 208 * which is pointed to by the reserved page, and re-use that saved copy 209 * on subsequent loads of XPC. This AMO page is never freed, and its 210 * memory protections are never restricted. 211 */ 212 amos_page = xpc_vars->amos_page; 213 if (amos_page == NULL) { 214 amos_page = (AMO_t *)TO_AMO(uncached_alloc_page(0, 1)); 215 if (amos_page == NULL) { 216 dev_err(xpc_part, "can't allocate page of AMOs\n"); 217 return NULL; 218 } 219 220 /* 221 * Open up AMO-R/W to cpu. This is done for Shub 1.1 systems 222 * when xpc_allow_IPI_ops() is called via xpc_hb_init(). 223 */ 224 if (!enable_shub_wars_1_1()) { 225 ret = sn_change_memprotect(ia64_tpa((u64)amos_page), 226 PAGE_SIZE, 227 SN_MEMPROT_ACCESS_CLASS_1, 228 &nasid_array); 229 if (ret != 0) { 230 dev_err(xpc_part, "can't change memory " 231 "protections\n"); 232 uncached_free_page(__IA64_UNCACHED_OFFSET | 233 TO_PHYS((u64)amos_page), 1); 234 return NULL; 235 } 236 } 237 } else if (!IS_AMO_ADDRESS((u64)amos_page)) { 238 /* 239 * EFI's XPBOOT can also set amos_page in the reserved page, 240 * but it happens to leave it as an uncached physical address 241 * and we need it to be an uncached virtual, so we'll have to 242 * convert it. 243 */ 244 if (!IS_AMO_PHYS_ADDRESS((u64)amos_page)) { 245 dev_err(xpc_part, "previously used amos_page address " 246 "is bad = 0x%p\n", (void *)amos_page); 247 return NULL; 248 } 249 amos_page = (AMO_t *)TO_AMO((u64)amos_page); 250 } 251 252 /* clear xpc_vars */ 253 memset(xpc_vars, 0, sizeof(struct xpc_vars)); 254 255 xpc_vars->version = XPC_V_VERSION; 256 xpc_vars->act_nasid = cpuid_to_nasid(0); 257 xpc_vars->act_phys_cpuid = cpu_physical_id(0); 258 xpc_vars->vars_part_pa = __pa(xpc_vars_part); 259 xpc_vars->amos_page_pa = ia64_tpa((u64)amos_page); 260 xpc_vars->amos_page = amos_page; /* save for next load of XPC */ 261 262 /* clear xpc_vars_part */ 263 memset((u64 *)xpc_vars_part, 0, sizeof(struct xpc_vars_part) * 264 XP_MAX_PARTITIONS); 265 266 /* initialize the activate IRQ related AMO variables */ 267 for (i = 0; i < xp_nasid_mask_words; i++) 268 (void)xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i); 269 270 /* initialize the engaged remote partitions related AMO variables */ 271 (void)xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO); 272 (void)xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO); 273 274 /* timestamp of when reserved page was setup by XPC */ 275 rp->stamp = CURRENT_TIME; 276 277 /* 278 * This signifies to the remote partition that our reserved 279 * page is initialized. 280 */ 281 rp->vars_pa = __pa(xpc_vars); 282 283 return rp; 284 } 285 286 /* 287 * Change protections to allow IPI operations (and AMO operations on 288 * Shub 1.1 systems). 289 */ 290 void 291 xpc_allow_IPI_ops(void) 292 { 293 int node; 294 int nasid; 295 296 /* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */ 297 298 if (is_shub2()) { 299 xpc_sh2_IPI_access0 = 300 (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0)); 301 xpc_sh2_IPI_access1 = 302 (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1)); 303 xpc_sh2_IPI_access2 = 304 (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2)); 305 xpc_sh2_IPI_access3 = 306 (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3)); 307 308 for_each_online_node(node) { 309 nasid = cnodeid_to_nasid(node); 310 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), 311 -1UL); 312 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), 313 -1UL); 314 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), 315 -1UL); 316 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), 317 -1UL); 318 } 319 320 } else { 321 xpc_sh1_IPI_access = 322 (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS)); 323 324 for_each_online_node(node) { 325 nasid = cnodeid_to_nasid(node); 326 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), 327 -1UL); 328 329 /* 330 * Since the BIST collides with memory operations on 331 * SHUB 1.1 sn_change_memprotect() cannot be used. 332 */ 333 if (enable_shub_wars_1_1()) { 334 /* open up everything */ 335 xpc_prot_vec[node] = (u64)HUB_L((u64 *) 336 GLOBAL_MMR_ADDR 337 (nasid, 338 SH1_MD_DQLP_MMR_DIR_PRIVEC0)); 339 HUB_S((u64 *) 340 GLOBAL_MMR_ADDR(nasid, 341 SH1_MD_DQLP_MMR_DIR_PRIVEC0), 342 -1UL); 343 HUB_S((u64 *) 344 GLOBAL_MMR_ADDR(nasid, 345 SH1_MD_DQRP_MMR_DIR_PRIVEC0), 346 -1UL); 347 } 348 } 349 } 350 } 351 352 /* 353 * Restrict protections to disallow IPI operations (and AMO operations on 354 * Shub 1.1 systems). 355 */ 356 void 357 xpc_restrict_IPI_ops(void) 358 { 359 int node; 360 int nasid; 361 362 /* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */ 363 364 if (is_shub2()) { 365 366 for_each_online_node(node) { 367 nasid = cnodeid_to_nasid(node); 368 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), 369 xpc_sh2_IPI_access0); 370 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), 371 xpc_sh2_IPI_access1); 372 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), 373 xpc_sh2_IPI_access2); 374 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), 375 xpc_sh2_IPI_access3); 376 } 377 378 } else { 379 380 for_each_online_node(node) { 381 nasid = cnodeid_to_nasid(node); 382 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), 383 xpc_sh1_IPI_access); 384 385 if (enable_shub_wars_1_1()) { 386 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, 387 SH1_MD_DQLP_MMR_DIR_PRIVEC0), 388 xpc_prot_vec[node]); 389 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, 390 SH1_MD_DQRP_MMR_DIR_PRIVEC0), 391 xpc_prot_vec[node]); 392 } 393 } 394 } 395 } 396 397 /* 398 * At periodic intervals, scan through all active partitions and ensure 399 * their heartbeat is still active. If not, the partition is deactivated. 400 */ 401 void 402 xpc_check_remote_hb(void) 403 { 404 struct xpc_vars *remote_vars; 405 struct xpc_partition *part; 406 short partid; 407 bte_result_t bres; 408 409 remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer; 410 411 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { 412 413 if (xpc_exiting) 414 break; 415 416 if (partid == sn_partition_id) 417 continue; 418 419 part = &xpc_partitions[partid]; 420 421 if (part->act_state == XPC_P_INACTIVE || 422 part->act_state == XPC_P_DEACTIVATING) { 423 continue; 424 } 425 426 /* pull the remote_hb cache line */ 427 bres = xp_bte_copy(part->remote_vars_pa, 428 (u64)remote_vars, 429 XPC_RP_VARS_SIZE, 430 (BTE_NOTIFY | BTE_WACQUIRE), NULL); 431 if (bres != BTE_SUCCESS) { 432 XPC_DEACTIVATE_PARTITION(part, 433 xpc_map_bte_errors(bres)); 434 continue; 435 } 436 437 dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat" 438 " = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n", 439 partid, remote_vars->heartbeat, part->last_heartbeat, 440 remote_vars->heartbeat_offline, 441 remote_vars->heartbeating_to_mask); 442 443 if (((remote_vars->heartbeat == part->last_heartbeat) && 444 (remote_vars->heartbeat_offline == 0)) || 445 !xpc_hb_allowed(sn_partition_id, remote_vars)) { 446 447 XPC_DEACTIVATE_PARTITION(part, xpNoHeartbeat); 448 continue; 449 } 450 451 part->last_heartbeat = remote_vars->heartbeat; 452 } 453 } 454 455 /* 456 * Get a copy of a portion of the remote partition's rsvd page. 457 * 458 * remote_rp points to a buffer that is cacheline aligned for BTE copies and 459 * is large enough to contain a copy of their reserved page header and 460 * part_nasids mask. 461 */ 462 static enum xp_retval 463 xpc_get_remote_rp(int nasid, u64 *discovered_nasids, 464 struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa) 465 { 466 int bres, i; 467 468 /* get the reserved page's physical address */ 469 470 *remote_rp_pa = xpc_get_rsvd_page_pa(nasid); 471 if (*remote_rp_pa == 0) 472 return xpNoRsvdPageAddr; 473 474 /* pull over the reserved page header and part_nasids mask */ 475 bres = xp_bte_copy(*remote_rp_pa, (u64)remote_rp, 476 XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes, 477 (BTE_NOTIFY | BTE_WACQUIRE), NULL); 478 if (bres != BTE_SUCCESS) 479 return xpc_map_bte_errors(bres); 480 481 if (discovered_nasids != NULL) { 482 u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp); 483 484 for (i = 0; i < xp_nasid_mask_words; i++) 485 discovered_nasids[i] |= remote_part_nasids[i]; 486 } 487 488 /* check that the partid is for another partition */ 489 490 if (remote_rp->partid < 1 || 491 remote_rp->partid > (XP_MAX_PARTITIONS - 1)) { 492 return xpInvalidPartid; 493 } 494 495 if (remote_rp->partid == sn_partition_id) 496 return xpLocalPartid; 497 498 if (XPC_VERSION_MAJOR(remote_rp->version) != 499 XPC_VERSION_MAJOR(XPC_RP_VERSION)) { 500 return xpBadVersion; 501 } 502 503 return xpSuccess; 504 } 505 506 /* 507 * Get a copy of the remote partition's XPC variables from the reserved page. 508 * 509 * remote_vars points to a buffer that is cacheline aligned for BTE copies and 510 * assumed to be of size XPC_RP_VARS_SIZE. 511 */ 512 static enum xp_retval 513 xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars) 514 { 515 int bres; 516 517 if (remote_vars_pa == 0) 518 return xpVarsNotSet; 519 520 /* pull over the cross partition variables */ 521 bres = xp_bte_copy(remote_vars_pa, (u64)remote_vars, XPC_RP_VARS_SIZE, 522 (BTE_NOTIFY | BTE_WACQUIRE), NULL); 523 if (bres != BTE_SUCCESS) 524 return xpc_map_bte_errors(bres); 525 526 if (XPC_VERSION_MAJOR(remote_vars->version) != 527 XPC_VERSION_MAJOR(XPC_V_VERSION)) { 528 return xpBadVersion; 529 } 530 531 return xpSuccess; 532 } 533 534 /* 535 * Update the remote partition's info. 536 */ 537 static void 538 xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version, 539 struct timespec *remote_rp_stamp, u64 remote_rp_pa, 540 u64 remote_vars_pa, struct xpc_vars *remote_vars) 541 { 542 part->remote_rp_version = remote_rp_version; 543 dev_dbg(xpc_part, " remote_rp_version = 0x%016x\n", 544 part->remote_rp_version); 545 546 part->remote_rp_stamp = *remote_rp_stamp; 547 dev_dbg(xpc_part, " remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n", 548 part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec); 549 550 part->remote_rp_pa = remote_rp_pa; 551 dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa); 552 553 part->remote_vars_pa = remote_vars_pa; 554 dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n", 555 part->remote_vars_pa); 556 557 part->last_heartbeat = remote_vars->heartbeat; 558 dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n", 559 part->last_heartbeat); 560 561 part->remote_vars_part_pa = remote_vars->vars_part_pa; 562 dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n", 563 part->remote_vars_part_pa); 564 565 part->remote_act_nasid = remote_vars->act_nasid; 566 dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n", 567 part->remote_act_nasid); 568 569 part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid; 570 dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n", 571 part->remote_act_phys_cpuid); 572 573 part->remote_amos_page_pa = remote_vars->amos_page_pa; 574 dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n", 575 part->remote_amos_page_pa); 576 577 part->remote_vars_version = remote_vars->version; 578 dev_dbg(xpc_part, " remote_vars_version = 0x%x\n", 579 part->remote_vars_version); 580 } 581 582 /* 583 * Prior code has determined the nasid which generated an IPI. Inspect 584 * that nasid to determine if its partition needs to be activated or 585 * deactivated. 586 * 587 * A partition is consider "awaiting activation" if our partition 588 * flags indicate it is not active and it has a heartbeat. A 589 * partition is considered "awaiting deactivation" if our partition 590 * flags indicate it is active but it has no heartbeat or it is not 591 * sending its heartbeat to us. 592 * 593 * To determine the heartbeat, the remote nasid must have a properly 594 * initialized reserved page. 595 */ 596 static void 597 xpc_identify_act_IRQ_req(int nasid) 598 { 599 struct xpc_rsvd_page *remote_rp; 600 struct xpc_vars *remote_vars; 601 u64 remote_rp_pa; 602 u64 remote_vars_pa; 603 int remote_rp_version; 604 int reactivate = 0; 605 int stamp_diff; 606 struct timespec remote_rp_stamp = { 0, 0 }; 607 short partid; 608 struct xpc_partition *part; 609 enum xp_retval ret; 610 611 /* pull over the reserved page structure */ 612 613 remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer; 614 615 ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa); 616 if (ret != xpSuccess) { 617 dev_warn(xpc_part, "unable to get reserved page from nasid %d, " 618 "which sent interrupt, reason=%d\n", nasid, ret); 619 return; 620 } 621 622 remote_vars_pa = remote_rp->vars_pa; 623 remote_rp_version = remote_rp->version; 624 if (XPC_SUPPORTS_RP_STAMP(remote_rp_version)) 625 remote_rp_stamp = remote_rp->stamp; 626 627 partid = remote_rp->partid; 628 part = &xpc_partitions[partid]; 629 630 /* pull over the cross partition variables */ 631 632 remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer; 633 634 ret = xpc_get_remote_vars(remote_vars_pa, remote_vars); 635 if (ret != xpSuccess) { 636 637 dev_warn(xpc_part, "unable to get XPC variables from nasid %d, " 638 "which sent interrupt, reason=%d\n", nasid, ret); 639 640 XPC_DEACTIVATE_PARTITION(part, ret); 641 return; 642 } 643 644 part->act_IRQ_rcvd++; 645 646 dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = " 647 "%ld:0x%lx\n", (int)nasid, (int)partid, part->act_IRQ_rcvd, 648 remote_vars->heartbeat, remote_vars->heartbeating_to_mask); 649 650 if (xpc_partition_disengaged(part) && 651 part->act_state == XPC_P_INACTIVE) { 652 653 xpc_update_partition_info(part, remote_rp_version, 654 &remote_rp_stamp, remote_rp_pa, 655 remote_vars_pa, remote_vars); 656 657 if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) { 658 if (xpc_partition_disengage_requested(1UL << partid)) { 659 /* 660 * Other side is waiting on us to disengage, 661 * even though we already have. 662 */ 663 return; 664 } 665 } else { 666 /* other side doesn't support disengage requests */ 667 xpc_clear_partition_disengage_request(1UL << partid); 668 } 669 670 xpc_activate_partition(part); 671 return; 672 } 673 674 DBUG_ON(part->remote_rp_version == 0); 675 DBUG_ON(part->remote_vars_version == 0); 676 677 if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) { 678 DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part-> 679 remote_vars_version)); 680 681 if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) { 682 DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars-> 683 version)); 684 /* see if the other side rebooted */ 685 if (part->remote_amos_page_pa == 686 remote_vars->amos_page_pa && 687 xpc_hb_allowed(sn_partition_id, remote_vars)) { 688 /* doesn't look that way, so ignore the IPI */ 689 return; 690 } 691 } 692 693 /* 694 * Other side rebooted and previous XPC didn't support the 695 * disengage request, so we don't need to do anything special. 696 */ 697 698 xpc_update_partition_info(part, remote_rp_version, 699 &remote_rp_stamp, remote_rp_pa, 700 remote_vars_pa, remote_vars); 701 part->reactivate_nasid = nasid; 702 XPC_DEACTIVATE_PARTITION(part, xpReactivating); 703 return; 704 } 705 706 DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)); 707 708 if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) { 709 DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version)); 710 711 /* 712 * Other side rebooted and previous XPC did support the 713 * disengage request, but the new one doesn't. 714 */ 715 716 xpc_clear_partition_engaged(1UL << partid); 717 xpc_clear_partition_disengage_request(1UL << partid); 718 719 xpc_update_partition_info(part, remote_rp_version, 720 &remote_rp_stamp, remote_rp_pa, 721 remote_vars_pa, remote_vars); 722 reactivate = 1; 723 724 } else { 725 DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version)); 726 727 stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp, 728 &remote_rp_stamp); 729 if (stamp_diff != 0) { 730 DBUG_ON(stamp_diff >= 0); 731 732 /* 733 * Other side rebooted and the previous XPC did support 734 * the disengage request, as does the new one. 735 */ 736 737 DBUG_ON(xpc_partition_engaged(1UL << partid)); 738 DBUG_ON(xpc_partition_disengage_requested(1UL << 739 partid)); 740 741 xpc_update_partition_info(part, remote_rp_version, 742 &remote_rp_stamp, 743 remote_rp_pa, remote_vars_pa, 744 remote_vars); 745 reactivate = 1; 746 } 747 } 748 749 if (part->disengage_request_timeout > 0 && 750 !xpc_partition_disengaged(part)) { 751 /* still waiting on other side to disengage from us */ 752 return; 753 } 754 755 if (reactivate) { 756 part->reactivate_nasid = nasid; 757 XPC_DEACTIVATE_PARTITION(part, xpReactivating); 758 759 } else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) && 760 xpc_partition_disengage_requested(1UL << partid)) { 761 XPC_DEACTIVATE_PARTITION(part, xpOtherGoingDown); 762 } 763 } 764 765 /* 766 * Loop through the activation AMO variables and process any bits 767 * which are set. Each bit indicates a nasid sending a partition 768 * activation or deactivation request. 769 * 770 * Return #of IRQs detected. 771 */ 772 int 773 xpc_identify_act_IRQ_sender(void) 774 { 775 int word, bit; 776 u64 nasid_mask; 777 u64 nasid; /* remote nasid */ 778 int n_IRQs_detected = 0; 779 AMO_t *act_amos; 780 781 act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS; 782 783 /* scan through act AMO variable looking for non-zero entries */ 784 for (word = 0; word < xp_nasid_mask_words; word++) { 785 786 if (xpc_exiting) 787 break; 788 789 nasid_mask = xpc_IPI_receive(&act_amos[word]); 790 if (nasid_mask == 0) { 791 /* no IRQs from nasids in this variable */ 792 continue; 793 } 794 795 dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word, 796 nasid_mask); 797 798 /* 799 * If this nasid has been added to the machine since 800 * our partition was reset, this will retain the 801 * remote nasid in our reserved pages machine mask. 802 * This is used in the event of module reload. 803 */ 804 xpc_mach_nasids[word] |= nasid_mask; 805 806 /* locate the nasid(s) which sent interrupts */ 807 808 for (bit = 0; bit < (8 * sizeof(u64)); bit++) { 809 if (nasid_mask & (1UL << bit)) { 810 n_IRQs_detected++; 811 nasid = XPC_NASID_FROM_W_B(word, bit); 812 dev_dbg(xpc_part, "interrupt from nasid %ld\n", 813 nasid); 814 xpc_identify_act_IRQ_req(nasid); 815 } 816 } 817 } 818 return n_IRQs_detected; 819 } 820 821 /* 822 * See if the other side has responded to a partition disengage request 823 * from us. 824 */ 825 int 826 xpc_partition_disengaged(struct xpc_partition *part) 827 { 828 short partid = XPC_PARTID(part); 829 int disengaged; 830 831 disengaged = (xpc_partition_engaged(1UL << partid) == 0); 832 if (part->disengage_request_timeout) { 833 if (!disengaged) { 834 if (time_before(jiffies, 835 part->disengage_request_timeout)) { 836 /* timelimit hasn't been reached yet */ 837 return 0; 838 } 839 840 /* 841 * Other side hasn't responded to our disengage 842 * request in a timely fashion, so assume it's dead. 843 */ 844 845 dev_info(xpc_part, "disengage from remote partition %d " 846 "timed out\n", partid); 847 xpc_disengage_request_timedout = 1; 848 xpc_clear_partition_engaged(1UL << partid); 849 disengaged = 1; 850 } 851 part->disengage_request_timeout = 0; 852 853 /* cancel the timer function, provided it's not us */ 854 if (!in_interrupt()) { 855 del_singleshot_timer_sync(&part-> 856 disengage_request_timer); 857 } 858 859 DBUG_ON(part->act_state != XPC_P_DEACTIVATING && 860 part->act_state != XPC_P_INACTIVE); 861 if (part->act_state != XPC_P_INACTIVE) 862 xpc_wakeup_channel_mgr(part); 863 864 if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) 865 xpc_cancel_partition_disengage_request(part); 866 } 867 return disengaged; 868 } 869 870 /* 871 * Mark specified partition as active. 872 */ 873 enum xp_retval 874 xpc_mark_partition_active(struct xpc_partition *part) 875 { 876 unsigned long irq_flags; 877 enum xp_retval ret; 878 879 dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part)); 880 881 spin_lock_irqsave(&part->act_lock, irq_flags); 882 if (part->act_state == XPC_P_ACTIVATING) { 883 part->act_state = XPC_P_ACTIVE; 884 ret = xpSuccess; 885 } else { 886 DBUG_ON(part->reason == xpSuccess); 887 ret = part->reason; 888 } 889 spin_unlock_irqrestore(&part->act_lock, irq_flags); 890 891 return ret; 892 } 893 894 /* 895 * Notify XPC that the partition is down. 896 */ 897 void 898 xpc_deactivate_partition(const int line, struct xpc_partition *part, 899 enum xp_retval reason) 900 { 901 unsigned long irq_flags; 902 903 spin_lock_irqsave(&part->act_lock, irq_flags); 904 905 if (part->act_state == XPC_P_INACTIVE) { 906 XPC_SET_REASON(part, reason, line); 907 spin_unlock_irqrestore(&part->act_lock, irq_flags); 908 if (reason == xpReactivating) { 909 /* we interrupt ourselves to reactivate partition */ 910 xpc_IPI_send_reactivate(part); 911 } 912 return; 913 } 914 if (part->act_state == XPC_P_DEACTIVATING) { 915 if ((part->reason == xpUnloading && reason != xpUnloading) || 916 reason == xpReactivating) { 917 XPC_SET_REASON(part, reason, line); 918 } 919 spin_unlock_irqrestore(&part->act_lock, irq_flags); 920 return; 921 } 922 923 part->act_state = XPC_P_DEACTIVATING; 924 XPC_SET_REASON(part, reason, line); 925 926 spin_unlock_irqrestore(&part->act_lock, irq_flags); 927 928 if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) { 929 xpc_request_partition_disengage(part); 930 xpc_IPI_send_disengage(part); 931 932 /* set a timelimit on the disengage request */ 933 part->disengage_request_timeout = jiffies + 934 (xpc_disengage_request_timelimit * HZ); 935 part->disengage_request_timer.expires = 936 part->disengage_request_timeout; 937 add_timer(&part->disengage_request_timer); 938 } 939 940 dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", 941 XPC_PARTID(part), reason); 942 943 xpc_partition_going_down(part, reason); 944 } 945 946 /* 947 * Mark specified partition as inactive. 948 */ 949 void 950 xpc_mark_partition_inactive(struct xpc_partition *part) 951 { 952 unsigned long irq_flags; 953 954 dev_dbg(xpc_part, "setting partition %d to INACTIVE\n", 955 XPC_PARTID(part)); 956 957 spin_lock_irqsave(&part->act_lock, irq_flags); 958 part->act_state = XPC_P_INACTIVE; 959 spin_unlock_irqrestore(&part->act_lock, irq_flags); 960 part->remote_rp_pa = 0; 961 } 962 963 /* 964 * SAL has provided a partition and machine mask. The partition mask 965 * contains a bit for each even nasid in our partition. The machine 966 * mask contains a bit for each even nasid in the entire machine. 967 * 968 * Using those two bit arrays, we can determine which nasids are 969 * known in the machine. Each should also have a reserved page 970 * initialized if they are available for partitioning. 971 */ 972 void 973 xpc_discovery(void) 974 { 975 void *remote_rp_base; 976 struct xpc_rsvd_page *remote_rp; 977 struct xpc_vars *remote_vars; 978 u64 remote_rp_pa; 979 u64 remote_vars_pa; 980 int region; 981 int region_size; 982 int max_regions; 983 int nasid; 984 struct xpc_rsvd_page *rp; 985 short partid; 986 struct xpc_partition *part; 987 u64 *discovered_nasids; 988 enum xp_retval ret; 989 990 remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE + 991 xp_nasid_mask_bytes, 992 GFP_KERNEL, &remote_rp_base); 993 if (remote_rp == NULL) 994 return; 995 996 remote_vars = (struct xpc_vars *)remote_rp; 997 998 discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words, 999 GFP_KERNEL); 1000 if (discovered_nasids == NULL) { 1001 kfree(remote_rp_base); 1002 return; 1003 } 1004 1005 rp = (struct xpc_rsvd_page *)xpc_rsvd_page; 1006 1007 /* 1008 * The term 'region' in this context refers to the minimum number of 1009 * nodes that can comprise an access protection grouping. The access 1010 * protection is in regards to memory, IOI and IPI. 1011 */ 1012 max_regions = 64; 1013 region_size = sn_region_size; 1014 1015 switch (region_size) { 1016 case 128: 1017 max_regions *= 2; 1018 case 64: 1019 max_regions *= 2; 1020 case 32: 1021 max_regions *= 2; 1022 region_size = 16; 1023 DBUG_ON(!is_shub2()); 1024 } 1025 1026 for (region = 0; region < max_regions; region++) { 1027 1028 if (xpc_exiting) 1029 break; 1030 1031 dev_dbg(xpc_part, "searching region %d\n", region); 1032 1033 for (nasid = (region * region_size * 2); 1034 nasid < ((region + 1) * region_size * 2); nasid += 2) { 1035 1036 if (xpc_exiting) 1037 break; 1038 1039 dev_dbg(xpc_part, "checking nasid %d\n", nasid); 1040 1041 if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) { 1042 dev_dbg(xpc_part, "PROM indicates Nasid %d is " 1043 "part of the local partition; skipping " 1044 "region\n", nasid); 1045 break; 1046 } 1047 1048 if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) { 1049 dev_dbg(xpc_part, "PROM indicates Nasid %d was " 1050 "not on Numa-Link network at reset\n", 1051 nasid); 1052 continue; 1053 } 1054 1055 if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) { 1056 dev_dbg(xpc_part, "Nasid %d is part of a " 1057 "partition which was previously " 1058 "discovered\n", nasid); 1059 continue; 1060 } 1061 1062 /* pull over the reserved page structure */ 1063 1064 ret = xpc_get_remote_rp(nasid, discovered_nasids, 1065 remote_rp, &remote_rp_pa); 1066 if (ret != xpSuccess) { 1067 dev_dbg(xpc_part, "unable to get reserved page " 1068 "from nasid %d, reason=%d\n", nasid, 1069 ret); 1070 1071 if (ret == xpLocalPartid) 1072 break; 1073 1074 continue; 1075 } 1076 1077 remote_vars_pa = remote_rp->vars_pa; 1078 1079 partid = remote_rp->partid; 1080 part = &xpc_partitions[partid]; 1081 1082 /* pull over the cross partition variables */ 1083 1084 ret = xpc_get_remote_vars(remote_vars_pa, remote_vars); 1085 if (ret != xpSuccess) { 1086 dev_dbg(xpc_part, "unable to get XPC variables " 1087 "from nasid %d, reason=%d\n", nasid, 1088 ret); 1089 1090 XPC_DEACTIVATE_PARTITION(part, ret); 1091 continue; 1092 } 1093 1094 if (part->act_state != XPC_P_INACTIVE) { 1095 dev_dbg(xpc_part, "partition %d on nasid %d is " 1096 "already activating\n", partid, nasid); 1097 break; 1098 } 1099 1100 /* 1101 * Register the remote partition's AMOs with SAL so it 1102 * can handle and cleanup errors within that address 1103 * range should the remote partition go down. We don't 1104 * unregister this range because it is difficult to 1105 * tell when outstanding writes to the remote partition 1106 * are finished and thus when it is thus safe to 1107 * unregister. This should not result in wasted space 1108 * in the SAL xp_addr_region table because we should 1109 * get the same page for remote_act_amos_pa after 1110 * module reloads and system reboots. 1111 */ 1112 if (sn_register_xp_addr_region 1113 (remote_vars->amos_page_pa, PAGE_SIZE, 1) < 0) { 1114 dev_dbg(xpc_part, 1115 "partition %d failed to " 1116 "register xp_addr region 0x%016lx\n", 1117 partid, remote_vars->amos_page_pa); 1118 1119 XPC_SET_REASON(part, xpPhysAddrRegFailed, 1120 __LINE__); 1121 break; 1122 } 1123 1124 /* 1125 * The remote nasid is valid and available. 1126 * Send an interrupt to that nasid to notify 1127 * it that we are ready to begin activation. 1128 */ 1129 dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, " 1130 "nasid %d, phys_cpuid 0x%x\n", 1131 remote_vars->amos_page_pa, 1132 remote_vars->act_nasid, 1133 remote_vars->act_phys_cpuid); 1134 1135 if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars-> 1136 version)) { 1137 part->remote_amos_page_pa = 1138 remote_vars->amos_page_pa; 1139 xpc_mark_partition_disengaged(part); 1140 xpc_cancel_partition_disengage_request(part); 1141 } 1142 xpc_IPI_send_activate(remote_vars); 1143 } 1144 } 1145 1146 kfree(discovered_nasids); 1147 kfree(remote_rp_base); 1148 } 1149 1150 /* 1151 * Given a partid, get the nasids owned by that partition from the 1152 * remote partition's reserved page. 1153 */ 1154 enum xp_retval 1155 xpc_initiate_partid_to_nasids(short partid, void *nasid_mask) 1156 { 1157 struct xpc_partition *part; 1158 u64 part_nasid_pa; 1159 int bte_res; 1160 1161 part = &xpc_partitions[partid]; 1162 if (part->remote_rp_pa == 0) 1163 return xpPartitionDown; 1164 1165 memset(nasid_mask, 0, XP_NASID_MASK_BYTES); 1166 1167 part_nasid_pa = (u64)XPC_RP_PART_NASIDS(part->remote_rp_pa); 1168 1169 bte_res = xp_bte_copy(part_nasid_pa, (u64)nasid_mask, 1170 xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE), 1171 NULL); 1172 1173 return xpc_map_bte_errors(bte_res); 1174 } 1175