1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright 2020-21 IBM Corp. 4 */ 5 6 #define pr_fmt(fmt) "vas: " fmt 7 8 #include <linux/module.h> 9 #include <linux/kernel.h> 10 #include <linux/export.h> 11 #include <linux/types.h> 12 #include <linux/delay.h> 13 #include <linux/slab.h> 14 #include <linux/interrupt.h> 15 #include <linux/irqdomain.h> 16 #include <asm/machdep.h> 17 #include <asm/hvcall.h> 18 #include <asm/plpar_wrappers.h> 19 #include <asm/firmware.h> 20 #include <asm/vas.h> 21 #include "vas.h" 22 23 #define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul 24 #define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul 25 /* The hypervisor allows one credit per window right now */ 26 #define DEF_WIN_CREDS 1 27 28 static struct vas_all_caps caps_all; 29 static bool copypaste_feat; 30 static struct hv_vas_cop_feat_caps hv_cop_caps; 31 32 static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; 33 static DEFINE_MUTEX(vas_pseries_mutex); 34 static bool migration_in_progress; 35 36 static long hcall_return_busy_check(long rc) 37 { 38 /* Check if we are stalled for some time */ 39 if (H_IS_LONG_BUSY(rc)) { 40 msleep(get_longbusy_msecs(rc)); 41 rc = H_BUSY; 42 } else if (rc == H_BUSY) { 43 cond_resched(); 44 } 45 46 return rc; 47 } 48 49 /* 50 * Allocate VAS window hcall 51 */ 52 static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain, 53 u8 wintype, u16 credits) 54 { 55 long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; 56 long rc; 57 58 do { 59 rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype, 60 credits, domain[0], domain[1], domain[2], 61 domain[3], domain[4], domain[5]); 62 63 rc = hcall_return_busy_check(rc); 64 } while (rc == H_BUSY); 65 66 if (rc == H_SUCCESS) { 67 if (win->win_addr == VAS_INVALID_WIN_ADDRESS) { 68 pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n"); 69 return -ENOTSUPP; 70 } 71 win->vas_win.winid = retbuf[0]; 72 win->win_addr = retbuf[1]; 73 win->complete_irq = retbuf[2]; 74 win->fault_irq = retbuf[3]; 75 return 0; 76 } 77 78 pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n", 79 rc, wintype, credits); 80 81 return -EIO; 82 } 83 84 /* 85 * Deallocate VAS window hcall. 86 */ 87 static int h_deallocate_vas_window(u64 winid) 88 { 89 long rc; 90 91 do { 92 rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid); 93 94 rc = hcall_return_busy_check(rc); 95 } while (rc == H_BUSY); 96 97 if (rc == H_SUCCESS) 98 return 0; 99 100 pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n", 101 rc, winid); 102 return -EIO; 103 } 104 105 /* 106 * Modify VAS window. 107 * After the window is opened with allocate window hcall, configure it 108 * with flags and LPAR PID before using. 109 */ 110 static int h_modify_vas_window(struct pseries_vas_window *win) 111 { 112 long rc; 113 114 /* 115 * AMR value is not supported in Linux VAS implementation. 116 * The hypervisor ignores it if 0 is passed. 117 */ 118 do { 119 rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, 120 win->vas_win.winid, win->pid, 0, 121 VAS_MOD_WIN_FLAGS, 0); 122 123 rc = hcall_return_busy_check(rc); 124 } while (rc == H_BUSY); 125 126 if (rc == H_SUCCESS) 127 return 0; 128 129 pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n", 130 rc, win->vas_win.winid, win->pid); 131 return -EIO; 132 } 133 134 /* 135 * This hcall is used to determine the capabilities from the hypervisor. 136 * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES 137 * @query_type: If 0 is passed, the hypervisor returns the overall 138 * capabilities which provides all feature(s) that are 139 * available. Then query the hypervisor to get the 140 * corresponding capabilities for the specific feature. 141 * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS 142 * and VAS GZIP Default capabilities. 143 * H_QUERY_NX_CAPABILITIES provides NX GZIP 144 * capabilities. 145 * @result: Return buffer to save capabilities. 146 */ 147 int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) 148 { 149 long rc; 150 151 rc = plpar_hcall_norets(hcall, query_type, result); 152 153 if (rc == H_SUCCESS) 154 return 0; 155 156 /* H_FUNCTION means HV does not support VAS so don't print an error */ 157 if (rc != H_FUNCTION) { 158 pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n", 159 (hcall == H_QUERY_VAS_CAPABILITIES) ? 160 "H_QUERY_VAS_CAPABILITIES" : 161 "H_QUERY_NX_CAPABILITIES", 162 rc, query_type, result); 163 } 164 165 return -EIO; 166 } 167 EXPORT_SYMBOL_GPL(h_query_vas_capabilities); 168 169 /* 170 * hcall to get fault CRB from the hypervisor. 171 */ 172 static int h_get_nx_fault(u32 winid, u64 buffer) 173 { 174 long rc; 175 176 rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer); 177 178 if (rc == H_SUCCESS) 179 return 0; 180 181 pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n", 182 rc, winid, buffer); 183 return -EIO; 184 185 } 186 187 /* 188 * Handle the fault interrupt. 189 * When the fault interrupt is received for each window, query the 190 * hypervisor to get the fault CRB on the specific fault. Then 191 * process the CRB by updating CSB or send signal if the user space 192 * CSB is invalid. 193 * Note: The hypervisor forwards an interrupt for each fault request. 194 * So one fault CRB to process for each H_GET_NX_FAULT hcall. 195 */ 196 static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) 197 { 198 struct pseries_vas_window *txwin = data; 199 struct coprocessor_request_block crb; 200 struct vas_user_win_ref *tsk_ref; 201 int rc; 202 203 rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb)); 204 if (!rc) { 205 tsk_ref = &txwin->vas_win.task_ref; 206 vas_dump_crb(&crb); 207 vas_update_csb(&crb, tsk_ref); 208 } 209 210 return IRQ_HANDLED; 211 } 212 213 /* 214 * Allocate window and setup IRQ mapping. 215 */ 216 static int allocate_setup_window(struct pseries_vas_window *txwin, 217 u64 *domain, u8 wintype) 218 { 219 int rc; 220 221 rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS); 222 if (rc) 223 return rc; 224 /* 225 * On PowerVM, the hypervisor setup and forwards the fault 226 * interrupt per window. So the IRQ setup and fault handling 227 * will be done for each open window separately. 228 */ 229 txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq); 230 if (!txwin->fault_virq) { 231 pr_err("Failed irq mapping %d\n", txwin->fault_irq); 232 rc = -EINVAL; 233 goto out_win; 234 } 235 236 txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d", 237 txwin->vas_win.winid); 238 if (!txwin->name) { 239 rc = -ENOMEM; 240 goto out_irq; 241 } 242 243 rc = request_threaded_irq(txwin->fault_virq, NULL, 244 pseries_vas_fault_thread_fn, IRQF_ONESHOT, 245 txwin->name, txwin); 246 if (rc) { 247 pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n", 248 txwin->vas_win.winid, txwin->fault_virq, rc); 249 goto out_free; 250 } 251 252 txwin->vas_win.wcreds_max = DEF_WIN_CREDS; 253 254 return 0; 255 out_free: 256 kfree(txwin->name); 257 out_irq: 258 irq_dispose_mapping(txwin->fault_virq); 259 out_win: 260 h_deallocate_vas_window(txwin->vas_win.winid); 261 return rc; 262 } 263 264 static inline void free_irq_setup(struct pseries_vas_window *txwin) 265 { 266 free_irq(txwin->fault_virq, txwin); 267 kfree(txwin->name); 268 irq_dispose_mapping(txwin->fault_virq); 269 } 270 271 static struct vas_window *vas_allocate_window(int vas_id, u64 flags, 272 enum vas_cop_type cop_type) 273 { 274 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; 275 struct vas_cop_feat_caps *cop_feat_caps; 276 struct vas_caps *caps; 277 struct pseries_vas_window *txwin; 278 int rc; 279 280 txwin = kzalloc(sizeof(*txwin), GFP_KERNEL); 281 if (!txwin) 282 return ERR_PTR(-ENOMEM); 283 284 /* 285 * A VAS window can have many credits which means that many 286 * requests can be issued simultaneously. But the hypervisor 287 * restricts one credit per window. 288 * The hypervisor introduces 2 different types of credits: 289 * Default credit type (Uses normal priority FIFO): 290 * A limited number of credits are assigned to partitions 291 * based on processor entitlement. But these credits may be 292 * over-committed on a system depends on whether the CPUs 293 * are in shared or dedicated modes - that is, more requests 294 * may be issued across the system than NX can service at 295 * once which can result in paste command failure (RMA_busy). 296 * Then the process has to resend requests or fall-back to 297 * SW compression. 298 * Quality of Service (QoS) credit type (Uses high priority FIFO): 299 * To avoid NX HW contention, the system admins can assign 300 * QoS credits for each LPAR so that this partition is 301 * guaranteed access to NX resources. These credits are 302 * assigned to partitions via the HMC. 303 * Refer PAPR for more information. 304 * 305 * Allocate window with QoS credits if user requested. Otherwise 306 * default credits are used. 307 */ 308 if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT) 309 caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE]; 310 else 311 caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE]; 312 313 cop_feat_caps = &caps->caps; 314 315 if (atomic_inc_return(&cop_feat_caps->nr_used_credits) > 316 atomic_read(&cop_feat_caps->nr_total_credits)) { 317 pr_err("Credits are not available to allocate window\n"); 318 rc = -EINVAL; 319 goto out; 320 } 321 322 if (vas_id == -1) { 323 /* 324 * The user space is requesting to allocate a window on 325 * a VAS instance where the process is executing. 326 * On PowerVM, domain values are passed to the hypervisor 327 * to select VAS instance. Useful if the process is 328 * affinity to NUMA node. 329 * The hypervisor selects VAS instance if 330 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values. 331 * The h_allocate_vas_window hcall is defined to take a 332 * domain values as specified by h_home_node_associativity, 333 * So no unpacking needs to be done. 334 */ 335 rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain, 336 VPHN_FLAG_VCPU, smp_processor_id()); 337 if (rc != H_SUCCESS) { 338 pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc); 339 goto out; 340 } 341 } 342 343 txwin->pid = mfspr(SPRN_PID); 344 345 /* 346 * Allocate / Deallocate window hcalls and setup / free IRQs 347 * have to be protected with mutex. 348 * Open VAS window: Allocate window hcall and setup IRQ 349 * Close VAS window: Deallocate window hcall and free IRQ 350 * The hypervisor waits until all NX requests are 351 * completed before closing the window. So expects OS 352 * to handle NX faults, means IRQ can be freed only 353 * after the deallocate window hcall is returned. 354 * So once the window is closed with deallocate hcall before 355 * the IRQ is freed, it can be assigned to new allocate 356 * hcall with the same fault IRQ by the hypervisor. It can 357 * result in setup IRQ fail for the new window since the 358 * same fault IRQ is not freed by the OS before. 359 */ 360 mutex_lock(&vas_pseries_mutex); 361 if (migration_in_progress) 362 rc = -EBUSY; 363 else 364 rc = allocate_setup_window(txwin, (u64 *)&domain[0], 365 cop_feat_caps->win_type); 366 mutex_unlock(&vas_pseries_mutex); 367 if (rc) 368 goto out; 369 370 /* 371 * Modify window and it is ready to use. 372 */ 373 rc = h_modify_vas_window(txwin); 374 if (!rc) 375 rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); 376 if (rc) 377 goto out_free; 378 379 txwin->win_type = cop_feat_caps->win_type; 380 mutex_lock(&vas_pseries_mutex); 381 /* 382 * Possible to lose the acquired credit with DLPAR core 383 * removal after the window is opened. So if there are any 384 * closed windows (means with lost credits), do not give new 385 * window to user space. New windows will be opened only 386 * after the existing windows are reopened when credits are 387 * available. 388 */ 389 if (!caps->nr_close_wins) { 390 list_add(&txwin->win_list, &caps->list); 391 caps->nr_open_windows++; 392 mutex_unlock(&vas_pseries_mutex); 393 vas_user_win_add_mm_context(&txwin->vas_win.task_ref); 394 return &txwin->vas_win; 395 } 396 mutex_unlock(&vas_pseries_mutex); 397 398 put_vas_user_win_ref(&txwin->vas_win.task_ref); 399 rc = -EBUSY; 400 pr_err("No credit is available to allocate window\n"); 401 402 out_free: 403 /* 404 * Window is not operational. Free IRQ before closing 405 * window so that do not have to hold mutex. 406 */ 407 free_irq_setup(txwin); 408 h_deallocate_vas_window(txwin->vas_win.winid); 409 out: 410 atomic_dec(&cop_feat_caps->nr_used_credits); 411 kfree(txwin); 412 return ERR_PTR(rc); 413 } 414 415 static u64 vas_paste_address(struct vas_window *vwin) 416 { 417 struct pseries_vas_window *win; 418 419 win = container_of(vwin, struct pseries_vas_window, vas_win); 420 return win->win_addr; 421 } 422 423 static int deallocate_free_window(struct pseries_vas_window *win) 424 { 425 int rc = 0; 426 427 /* 428 * The hypervisor waits for all requests including faults 429 * are processed before closing the window - Means all 430 * credits have to be returned. In the case of fault 431 * request, a credit is returned after OS issues 432 * H_GET_NX_FAULT hcall. 433 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW 434 * hcall. 435 */ 436 rc = h_deallocate_vas_window(win->vas_win.winid); 437 if (!rc) 438 free_irq_setup(win); 439 440 return rc; 441 } 442 443 static int vas_deallocate_window(struct vas_window *vwin) 444 { 445 struct pseries_vas_window *win; 446 struct vas_cop_feat_caps *caps; 447 int rc = 0; 448 449 if (!vwin) 450 return -EINVAL; 451 452 win = container_of(vwin, struct pseries_vas_window, vas_win); 453 454 /* Should not happen */ 455 if (win->win_type >= VAS_MAX_FEAT_TYPE) { 456 pr_err("Window (%u): Invalid window type %u\n", 457 vwin->winid, win->win_type); 458 return -EINVAL; 459 } 460 461 caps = &vascaps[win->win_type].caps; 462 mutex_lock(&vas_pseries_mutex); 463 /* 464 * VAS window is already closed in the hypervisor when 465 * lost the credit or with migration. So just remove the entry 466 * from the list, remove task references and free vas_window 467 * struct. 468 */ 469 if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && 470 !(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { 471 rc = deallocate_free_window(win); 472 if (rc) { 473 mutex_unlock(&vas_pseries_mutex); 474 return rc; 475 } 476 } else 477 vascaps[win->win_type].nr_close_wins--; 478 479 list_del(&win->win_list); 480 atomic_dec(&caps->nr_used_credits); 481 vascaps[win->win_type].nr_open_windows--; 482 mutex_unlock(&vas_pseries_mutex); 483 484 put_vas_user_win_ref(&vwin->task_ref); 485 mm_context_remove_vas_window(vwin->task_ref.mm); 486 487 kfree(win); 488 return 0; 489 } 490 491 static const struct vas_user_win_ops vops_pseries = { 492 .open_win = vas_allocate_window, /* Open and configure window */ 493 .paste_addr = vas_paste_address, /* To do copy/paste */ 494 .close_win = vas_deallocate_window, /* Close window */ 495 }; 496 497 /* 498 * Supporting only nx-gzip coprocessor type now, but this API code 499 * extended to other coprocessor types later. 500 */ 501 int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type, 502 const char *name) 503 { 504 int rc; 505 506 if (!copypaste_feat) 507 return -ENOTSUPP; 508 509 rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries); 510 511 return rc; 512 } 513 EXPORT_SYMBOL_GPL(vas_register_api_pseries); 514 515 void vas_unregister_api_pseries(void) 516 { 517 vas_unregister_coproc_api(); 518 } 519 EXPORT_SYMBOL_GPL(vas_unregister_api_pseries); 520 521 /* 522 * Get the specific capabilities based on the feature type. 523 * Right now supports GZIP default and GZIP QoS capabilities. 524 */ 525 static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, 526 struct hv_vas_cop_feat_caps *hv_caps) 527 { 528 struct vas_cop_feat_caps *caps; 529 struct vas_caps *vcaps; 530 int rc = 0; 531 532 vcaps = &vascaps[type]; 533 memset(vcaps, 0, sizeof(*vcaps)); 534 INIT_LIST_HEAD(&vcaps->list); 535 536 vcaps->feat = feat; 537 caps = &vcaps->caps; 538 539 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, 540 (u64)virt_to_phys(hv_caps)); 541 if (rc) 542 return rc; 543 544 caps->user_mode = hv_caps->user_mode; 545 if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) { 546 pr_err("User space COPY/PASTE is not supported\n"); 547 return -ENOTSUPP; 548 } 549 550 caps->descriptor = be64_to_cpu(hv_caps->descriptor); 551 caps->win_type = hv_caps->win_type; 552 if (caps->win_type >= VAS_MAX_FEAT_TYPE) { 553 pr_err("Unsupported window type %u\n", caps->win_type); 554 return -EINVAL; 555 } 556 caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); 557 caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); 558 atomic_set(&caps->nr_total_credits, 559 be16_to_cpu(hv_caps->target_lpar_creds)); 560 if (feat == VAS_GZIP_DEF_FEAT) { 561 caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); 562 563 if (caps->max_win_creds < DEF_WIN_CREDS) { 564 pr_err("Window creds(%u) > max allowed window creds(%u)\n", 565 DEF_WIN_CREDS, caps->max_win_creds); 566 return -EINVAL; 567 } 568 } 569 570 rc = sysfs_add_vas_caps(caps); 571 if (rc) 572 return rc; 573 574 copypaste_feat = true; 575 576 return 0; 577 } 578 579 /* 580 * VAS windows can be closed due to lost credits when the core is 581 * removed. So reopen them if credits are available due to DLPAR 582 * core add and set the window active status. When NX sees the page 583 * fault on the unmapped paste address, the kernel handles the fault 584 * by setting the remapping to new paste address if the window is 585 * active. 586 */ 587 static int reconfig_open_windows(struct vas_caps *vcaps, int creds, 588 bool migrate) 589 { 590 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; 591 struct vas_cop_feat_caps *caps = &vcaps->caps; 592 struct pseries_vas_window *win = NULL, *tmp; 593 int rc, mv_ents = 0; 594 int flag; 595 596 /* 597 * Nothing to do if there are no closed windows. 598 */ 599 if (!vcaps->nr_close_wins) 600 return 0; 601 602 /* 603 * For the core removal, the hypervisor reduces the credits 604 * assigned to the LPAR and the kernel closes VAS windows 605 * in the hypervisor depends on reduced credits. The kernel 606 * uses LIFO (the last windows that are opened will be closed 607 * first) and expects to open in the same order when credits 608 * are available. 609 * For example, 40 windows are closed when the LPAR lost 2 cores 610 * (dedicated). If 1 core is added, this LPAR can have 20 more 611 * credits. It means the kernel can reopen 20 windows. So move 612 * 20 entries in the VAS windows lost and reopen next 20 windows. 613 * For partition migration, reopen all windows that are closed 614 * during resume. 615 */ 616 if ((vcaps->nr_close_wins > creds) && !migrate) 617 mv_ents = vcaps->nr_close_wins - creds; 618 619 list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) { 620 if (!mv_ents) 621 break; 622 623 mv_ents--; 624 } 625 626 /* 627 * Open windows if they are closed only with migration or 628 * DLPAR (lost credit) before. 629 */ 630 if (migrate) 631 flag = VAS_WIN_MIGRATE_CLOSE; 632 else 633 flag = VAS_WIN_NO_CRED_CLOSE; 634 635 list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) { 636 /* 637 * This window is closed with DLPAR and migration events. 638 * So reopen the window with the last event. 639 * The user space is not suspended with the current 640 * migration notifier. So the user space can issue DLPAR 641 * CPU hotplug while migration in progress. In this case 642 * this window will be opened with the last event. 643 */ 644 if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && 645 (win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { 646 win->vas_win.status &= ~flag; 647 continue; 648 } 649 650 /* 651 * Nothing to do on this window if it is not closed 652 * with this flag 653 */ 654 if (!(win->vas_win.status & flag)) 655 continue; 656 657 rc = allocate_setup_window(win, (u64 *)&domain[0], 658 caps->win_type); 659 if (rc) 660 return rc; 661 662 rc = h_modify_vas_window(win); 663 if (rc) 664 goto out; 665 666 mutex_lock(&win->vas_win.task_ref.mmap_mutex); 667 /* 668 * Set window status to active 669 */ 670 win->vas_win.status &= ~flag; 671 mutex_unlock(&win->vas_win.task_ref.mmap_mutex); 672 win->win_type = caps->win_type; 673 if (!--vcaps->nr_close_wins) 674 break; 675 } 676 677 return 0; 678 out: 679 /* 680 * Window modify HCALL failed. So close the window to the 681 * hypervisor and return. 682 */ 683 free_irq_setup(win); 684 h_deallocate_vas_window(win->vas_win.winid); 685 return rc; 686 } 687 688 /* 689 * The hypervisor reduces the available credits if the LPAR lost core. It 690 * means the excessive windows should not be active and the user space 691 * should not be using these windows to send compression requests to NX. 692 * So the kernel closes the excessive windows and unmap the paste address 693 * such that the user space receives paste instruction failure. Then up to 694 * the user space to fall back to SW compression and manage with the 695 * existing windows. 696 */ 697 static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds, 698 bool migrate) 699 { 700 struct pseries_vas_window *win, *tmp; 701 struct vas_user_win_ref *task_ref; 702 struct vm_area_struct *vma; 703 int rc = 0, flag; 704 705 if (migrate) 706 flag = VAS_WIN_MIGRATE_CLOSE; 707 else 708 flag = VAS_WIN_NO_CRED_CLOSE; 709 710 list_for_each_entry_safe(win, tmp, &vcap->list, win_list) { 711 /* 712 * This window is already closed due to lost credit 713 * or for migration before. Go for next window. 714 * For migration, nothing to do since this window 715 * closed for DLPAR and will be reopened even on 716 * the destination system with other DLPAR operation. 717 */ 718 if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) || 719 (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) { 720 win->vas_win.status |= flag; 721 continue; 722 } 723 724 task_ref = &win->vas_win.task_ref; 725 mutex_lock(&task_ref->mmap_mutex); 726 vma = task_ref->vma; 727 /* 728 * Number of available credits are reduced, So select 729 * and close windows. 730 */ 731 win->vas_win.status |= flag; 732 733 mmap_write_lock(task_ref->mm); 734 /* 735 * vma is set in the original mapping. But this mapping 736 * is done with mmap() after the window is opened with ioctl. 737 * so we may not see the original mapping if the core remove 738 * is done before the original mmap() and after the ioctl. 739 */ 740 if (vma) 741 zap_page_range(vma, vma->vm_start, 742 vma->vm_end - vma->vm_start); 743 744 mmap_write_unlock(task_ref->mm); 745 mutex_unlock(&task_ref->mmap_mutex); 746 /* 747 * Close VAS window in the hypervisor, but do not 748 * free vas_window struct since it may be reused 749 * when the credit is available later (DLPAR with 750 * adding cores). This struct will be used 751 * later when the process issued with close(FD). 752 */ 753 rc = deallocate_free_window(win); 754 /* 755 * This failure is from the hypervisor. 756 * No way to stop migration for these failures. 757 * So ignore error and continue closing other windows. 758 */ 759 if (rc && !migrate) 760 return rc; 761 762 vcap->nr_close_wins++; 763 764 /* 765 * For migration, do not depend on lpar_creds in case if 766 * mismatch with the hypervisor value (should not happen). 767 * So close all active windows in the list and will be 768 * reopened windows based on the new lpar_creds on the 769 * destination system during resume. 770 */ 771 if (!migrate && !--excess_creds) 772 break; 773 } 774 775 return 0; 776 } 777 778 /* 779 * Get new VAS capabilities when the core add/removal configuration 780 * changes. Reconfig window configurations based on the credits 781 * availability from this new capabilities. 782 */ 783 int vas_reconfig_capabilties(u8 type, int new_nr_creds) 784 { 785 struct vas_cop_feat_caps *caps; 786 int old_nr_creds; 787 struct vas_caps *vcaps; 788 int rc = 0, nr_active_wins; 789 790 if (type >= VAS_MAX_FEAT_TYPE) { 791 pr_err("Invalid credit type %d\n", type); 792 return -EINVAL; 793 } 794 795 vcaps = &vascaps[type]; 796 caps = &vcaps->caps; 797 798 mutex_lock(&vas_pseries_mutex); 799 800 old_nr_creds = atomic_read(&caps->nr_total_credits); 801 802 atomic_set(&caps->nr_total_credits, new_nr_creds); 803 /* 804 * The total number of available credits may be decreased or 805 * increased with DLPAR operation. Means some windows have to be 806 * closed / reopened. Hold the vas_pseries_mutex so that the 807 * user space can not open new windows. 808 */ 809 if (old_nr_creds < new_nr_creds) { 810 /* 811 * If the existing target credits is less than the new 812 * target, reopen windows if they are closed due to 813 * the previous DLPAR (core removal). 814 */ 815 rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds, 816 false); 817 } else { 818 /* 819 * # active windows is more than new LPAR available 820 * credits. So close the excessive windows. 821 * On pseries, each window will have 1 credit. 822 */ 823 nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins; 824 if (nr_active_wins > new_nr_creds) 825 rc = reconfig_close_windows(vcaps, 826 nr_active_wins - new_nr_creds, 827 false); 828 } 829 830 mutex_unlock(&vas_pseries_mutex); 831 return rc; 832 } 833 /* 834 * Total number of default credits available (target_credits) 835 * in LPAR depends on number of cores configured. It varies based on 836 * whether processors are in shared mode or dedicated mode. 837 * Get the notifier when CPU configuration is changed with DLPAR 838 * operation so that get the new target_credits (vas default capabilities) 839 * and then update the existing windows usage if needed. 840 */ 841 static int pseries_vas_notifier(struct notifier_block *nb, 842 unsigned long action, void *data) 843 { 844 struct of_reconfig_data *rd = data; 845 struct device_node *dn = rd->dn; 846 const __be32 *intserv = NULL; 847 int new_nr_creds, len, rc = 0; 848 849 if ((action == OF_RECONFIG_ATTACH_NODE) || 850 (action == OF_RECONFIG_DETACH_NODE)) 851 intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", 852 &len); 853 /* 854 * Processor config is not changed 855 */ 856 if (!intserv) 857 return NOTIFY_OK; 858 859 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 860 vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat, 861 (u64)virt_to_phys(&hv_cop_caps)); 862 if (!rc) { 863 new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); 864 rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, 865 new_nr_creds); 866 } 867 868 if (rc) 869 pr_err("Failed reconfig VAS capabilities with DLPAR\n"); 870 871 return rc; 872 } 873 874 static struct notifier_block pseries_vas_nb = { 875 .notifier_call = pseries_vas_notifier, 876 }; 877 878 /* 879 * For LPM, all windows have to be closed on the source partition 880 * before migration and reopen them on the destination partition 881 * after migration. So closing windows during suspend and 882 * reopen them during resume. 883 */ 884 int vas_migration_handler(int action) 885 { 886 struct vas_cop_feat_caps *caps; 887 int old_nr_creds, new_nr_creds = 0; 888 struct vas_caps *vcaps; 889 int i, rc = 0; 890 891 /* 892 * NX-GZIP is not enabled. Nothing to do for migration. 893 */ 894 if (!copypaste_feat) 895 return rc; 896 897 mutex_lock(&vas_pseries_mutex); 898 899 if (action == VAS_SUSPEND) 900 migration_in_progress = true; 901 else 902 migration_in_progress = false; 903 904 for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) { 905 vcaps = &vascaps[i]; 906 caps = &vcaps->caps; 907 old_nr_creds = atomic_read(&caps->nr_total_credits); 908 909 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 910 vcaps->feat, 911 (u64)virt_to_phys(&hv_cop_caps)); 912 if (!rc) { 913 new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); 914 /* 915 * Should not happen. But incase print messages, close 916 * all windows in the list during suspend and reopen 917 * windows based on new lpar_creds on the destination 918 * system. 919 */ 920 if (old_nr_creds != new_nr_creds) { 921 pr_err("Target credits mismatch with the hypervisor\n"); 922 pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n", 923 action, old_nr_creds, new_nr_creds); 924 pr_err("Used creds: %d, Active creds: %d\n", 925 atomic_read(&caps->nr_used_credits), 926 vcaps->nr_open_windows - vcaps->nr_close_wins); 927 } 928 } else { 929 pr_err("state(%d): Get VAS capabilities failed with %d\n", 930 action, rc); 931 /* 932 * We can not stop migration with the current lpm 933 * implementation. So continue closing all windows in 934 * the list (during suspend) and return without 935 * opening windows (during resume) if VAS capabilities 936 * HCALL failed. 937 */ 938 if (action == VAS_RESUME) 939 goto out; 940 } 941 942 switch (action) { 943 case VAS_SUSPEND: 944 rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows, 945 true); 946 break; 947 case VAS_RESUME: 948 atomic_set(&caps->nr_total_credits, new_nr_creds); 949 rc = reconfig_open_windows(vcaps, new_nr_creds, true); 950 break; 951 default: 952 /* should not happen */ 953 pr_err("Invalid migration action %d\n", action); 954 rc = -EINVAL; 955 goto out; 956 } 957 958 /* 959 * Ignore errors during suspend and return for resume. 960 */ 961 if (rc && (action == VAS_RESUME)) 962 goto out; 963 } 964 965 out: 966 mutex_unlock(&vas_pseries_mutex); 967 return rc; 968 } 969 970 static int __init pseries_vas_init(void) 971 { 972 struct hv_vas_all_caps *hv_caps; 973 int rc = 0; 974 975 /* 976 * Linux supports user space COPY/PASTE only with Radix 977 */ 978 if (!radix_enabled()) { 979 pr_err("API is supported only with radix page tables\n"); 980 return -ENOTSUPP; 981 } 982 983 hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); 984 if (!hv_caps) 985 return -ENOMEM; 986 /* 987 * Get VAS overall capabilities by passing 0 to feature type. 988 */ 989 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0, 990 (u64)virt_to_phys(hv_caps)); 991 if (rc) 992 goto out; 993 994 caps_all.descriptor = be64_to_cpu(hv_caps->descriptor); 995 caps_all.feat_type = be64_to_cpu(hv_caps->feat_type); 996 997 sysfs_pseries_vas_init(&caps_all); 998 999 /* 1000 * QOS capabilities available 1001 */ 1002 if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) { 1003 rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT, 1004 VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps); 1005 1006 if (rc) 1007 goto out; 1008 } 1009 /* 1010 * Default capabilities available 1011 */ 1012 if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) 1013 rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT, 1014 VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps); 1015 1016 if (!rc && copypaste_feat) { 1017 if (firmware_has_feature(FW_FEATURE_LPAR)) 1018 of_reconfig_notifier_register(&pseries_vas_nb); 1019 1020 pr_info("GZIP feature is available\n"); 1021 } else { 1022 /* 1023 * Should not happen, but only when get default 1024 * capabilities HCALL failed. So disable copy paste 1025 * feature. 1026 */ 1027 copypaste_feat = false; 1028 } 1029 1030 out: 1031 kfree(hv_caps); 1032 return rc; 1033 } 1034 machine_device_initcall(pseries, pseries_vas_init); 1035