1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright 2020-21 IBM Corp. 4 */ 5 6 #define pr_fmt(fmt) "vas: " fmt 7 8 #include <linux/module.h> 9 #include <linux/kernel.h> 10 #include <linux/export.h> 11 #include <linux/types.h> 12 #include <linux/delay.h> 13 #include <linux/slab.h> 14 #include <linux/interrupt.h> 15 #include <linux/irqdomain.h> 16 #include <asm/machdep.h> 17 #include <asm/hvcall.h> 18 #include <asm/plpar_wrappers.h> 19 #include <asm/firmware.h> 20 #include <asm/vas.h> 21 #include "vas.h" 22 23 #define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul 24 #define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul 25 /* The hypervisor allows one credit per window right now */ 26 #define DEF_WIN_CREDS 1 27 28 static struct vas_all_caps caps_all; 29 static bool copypaste_feat; 30 static struct hv_vas_cop_feat_caps hv_cop_caps; 31 32 static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; 33 static DEFINE_MUTEX(vas_pseries_mutex); 34 static bool migration_in_progress; 35 36 static long hcall_return_busy_check(long rc) 37 { 38 /* Check if we are stalled for some time */ 39 if (H_IS_LONG_BUSY(rc)) { 40 msleep(get_longbusy_msecs(rc)); 41 rc = H_BUSY; 42 } else if (rc == H_BUSY) { 43 cond_resched(); 44 } 45 46 return rc; 47 } 48 49 /* 50 * Allocate VAS window hcall 51 */ 52 static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain, 53 u8 wintype, u16 credits) 54 { 55 long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; 56 long rc; 57 58 do { 59 rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype, 60 credits, domain[0], domain[1], domain[2], 61 domain[3], domain[4], domain[5]); 62 63 rc = hcall_return_busy_check(rc); 64 } while (rc == H_BUSY); 65 66 if (rc == H_SUCCESS) { 67 if (win->win_addr == VAS_INVALID_WIN_ADDRESS) { 68 pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n"); 69 return -ENOTSUPP; 70 } 71 win->vas_win.winid = retbuf[0]; 72 win->win_addr = retbuf[1]; 73 win->complete_irq = retbuf[2]; 74 win->fault_irq = retbuf[3]; 75 return 0; 76 } 77 78 pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n", 79 rc, wintype, credits); 80 81 return -EIO; 82 } 83 84 /* 85 * Deallocate VAS window hcall. 86 */ 87 static int h_deallocate_vas_window(u64 winid) 88 { 89 long rc; 90 91 do { 92 rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid); 93 94 rc = hcall_return_busy_check(rc); 95 } while (rc == H_BUSY); 96 97 if (rc == H_SUCCESS) 98 return 0; 99 100 pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n", 101 rc, winid); 102 return -EIO; 103 } 104 105 /* 106 * Modify VAS window. 107 * After the window is opened with allocate window hcall, configure it 108 * with flags and LPAR PID before using. 109 */ 110 static int h_modify_vas_window(struct pseries_vas_window *win) 111 { 112 long rc; 113 114 /* 115 * AMR value is not supported in Linux VAS implementation. 116 * The hypervisor ignores it if 0 is passed. 117 */ 118 do { 119 rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, 120 win->vas_win.winid, win->pid, 0, 121 VAS_MOD_WIN_FLAGS, 0); 122 123 rc = hcall_return_busy_check(rc); 124 } while (rc == H_BUSY); 125 126 if (rc == H_SUCCESS) 127 return 0; 128 129 pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n", 130 rc, win->vas_win.winid, win->pid); 131 return -EIO; 132 } 133 134 /* 135 * This hcall is used to determine the capabilities from the hypervisor. 136 * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES 137 * @query_type: If 0 is passed, the hypervisor returns the overall 138 * capabilities which provides all feature(s) that are 139 * available. Then query the hypervisor to get the 140 * corresponding capabilities for the specific feature. 141 * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS 142 * and VAS GZIP Default capabilities. 143 * H_QUERY_NX_CAPABILITIES provides NX GZIP 144 * capabilities. 145 * @result: Return buffer to save capabilities. 146 */ 147 int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) 148 { 149 long rc; 150 151 rc = plpar_hcall_norets(hcall, query_type, result); 152 153 if (rc == H_SUCCESS) 154 return 0; 155 156 /* H_FUNCTION means HV does not support VAS so don't print an error */ 157 if (rc != H_FUNCTION) { 158 pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n", 159 (hcall == H_QUERY_VAS_CAPABILITIES) ? 160 "H_QUERY_VAS_CAPABILITIES" : 161 "H_QUERY_NX_CAPABILITIES", 162 rc, query_type, result); 163 } 164 165 return -EIO; 166 } 167 EXPORT_SYMBOL_GPL(h_query_vas_capabilities); 168 169 /* 170 * hcall to get fault CRB from the hypervisor. 171 */ 172 static int h_get_nx_fault(u32 winid, u64 buffer) 173 { 174 long rc; 175 176 rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer); 177 178 if (rc == H_SUCCESS) 179 return 0; 180 181 pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n", 182 rc, winid, buffer); 183 return -EIO; 184 185 } 186 187 /* 188 * Handle the fault interrupt. 189 * When the fault interrupt is received for each window, query the 190 * hypervisor to get the fault CRB on the specific fault. Then 191 * process the CRB by updating CSB or send signal if the user space 192 * CSB is invalid. 193 * Note: The hypervisor forwards an interrupt for each fault request. 194 * So one fault CRB to process for each H_GET_NX_FAULT hcall. 195 */ 196 static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) 197 { 198 struct pseries_vas_window *txwin = data; 199 struct coprocessor_request_block crb; 200 struct vas_user_win_ref *tsk_ref; 201 int rc; 202 203 rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb)); 204 if (!rc) { 205 tsk_ref = &txwin->vas_win.task_ref; 206 vas_dump_crb(&crb); 207 vas_update_csb(&crb, tsk_ref); 208 } 209 210 return IRQ_HANDLED; 211 } 212 213 /* 214 * Allocate window and setup IRQ mapping. 215 */ 216 static int allocate_setup_window(struct pseries_vas_window *txwin, 217 u64 *domain, u8 wintype) 218 { 219 int rc; 220 221 rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS); 222 if (rc) 223 return rc; 224 /* 225 * On PowerVM, the hypervisor setup and forwards the fault 226 * interrupt per window. So the IRQ setup and fault handling 227 * will be done for each open window separately. 228 */ 229 txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq); 230 if (!txwin->fault_virq) { 231 pr_err("Failed irq mapping %d\n", txwin->fault_irq); 232 rc = -EINVAL; 233 goto out_win; 234 } 235 236 txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d", 237 txwin->vas_win.winid); 238 if (!txwin->name) { 239 rc = -ENOMEM; 240 goto out_irq; 241 } 242 243 rc = request_threaded_irq(txwin->fault_virq, NULL, 244 pseries_vas_fault_thread_fn, IRQF_ONESHOT, 245 txwin->name, txwin); 246 if (rc) { 247 pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n", 248 txwin->vas_win.winid, txwin->fault_virq, rc); 249 goto out_free; 250 } 251 252 txwin->vas_win.wcreds_max = DEF_WIN_CREDS; 253 254 return 0; 255 out_free: 256 kfree(txwin->name); 257 out_irq: 258 irq_dispose_mapping(txwin->fault_virq); 259 out_win: 260 h_deallocate_vas_window(txwin->vas_win.winid); 261 return rc; 262 } 263 264 static inline void free_irq_setup(struct pseries_vas_window *txwin) 265 { 266 free_irq(txwin->fault_virq, txwin); 267 kfree(txwin->name); 268 irq_dispose_mapping(txwin->fault_virq); 269 } 270 271 static struct vas_window *vas_allocate_window(int vas_id, u64 flags, 272 enum vas_cop_type cop_type) 273 { 274 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; 275 struct vas_cop_feat_caps *cop_feat_caps; 276 struct vas_caps *caps; 277 struct pseries_vas_window *txwin; 278 int rc; 279 280 txwin = kzalloc(sizeof(*txwin), GFP_KERNEL); 281 if (!txwin) 282 return ERR_PTR(-ENOMEM); 283 284 /* 285 * A VAS window can have many credits which means that many 286 * requests can be issued simultaneously. But the hypervisor 287 * restricts one credit per window. 288 * The hypervisor introduces 2 different types of credits: 289 * Default credit type (Uses normal priority FIFO): 290 * A limited number of credits are assigned to partitions 291 * based on processor entitlement. But these credits may be 292 * over-committed on a system depends on whether the CPUs 293 * are in shared or dedicated modes - that is, more requests 294 * may be issued across the system than NX can service at 295 * once which can result in paste command failure (RMA_busy). 296 * Then the process has to resend requests or fall-back to 297 * SW compression. 298 * Quality of Service (QoS) credit type (Uses high priority FIFO): 299 * To avoid NX HW contention, the system admins can assign 300 * QoS credits for each LPAR so that this partition is 301 * guaranteed access to NX resources. These credits are 302 * assigned to partitions via the HMC. 303 * Refer PAPR for more information. 304 * 305 * Allocate window with QoS credits if user requested. Otherwise 306 * default credits are used. 307 */ 308 if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT) 309 caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE]; 310 else 311 caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE]; 312 313 cop_feat_caps = &caps->caps; 314 315 if (atomic_inc_return(&cop_feat_caps->nr_used_credits) > 316 atomic_read(&cop_feat_caps->nr_total_credits)) { 317 pr_err("Credits are not available to allocate window\n"); 318 rc = -EINVAL; 319 goto out; 320 } 321 322 if (vas_id == -1) { 323 /* 324 * The user space is requesting to allocate a window on 325 * a VAS instance where the process is executing. 326 * On PowerVM, domain values are passed to the hypervisor 327 * to select VAS instance. Useful if the process is 328 * affinity to NUMA node. 329 * The hypervisor selects VAS instance if 330 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values. 331 * The h_allocate_vas_window hcall is defined to take a 332 * domain values as specified by h_home_node_associativity, 333 * So no unpacking needs to be done. 334 */ 335 rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain, 336 VPHN_FLAG_VCPU, hard_smp_processor_id()); 337 if (rc != H_SUCCESS) { 338 pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc); 339 goto out; 340 } 341 } 342 343 txwin->pid = mfspr(SPRN_PID); 344 345 /* 346 * Allocate / Deallocate window hcalls and setup / free IRQs 347 * have to be protected with mutex. 348 * Open VAS window: Allocate window hcall and setup IRQ 349 * Close VAS window: Deallocate window hcall and free IRQ 350 * The hypervisor waits until all NX requests are 351 * completed before closing the window. So expects OS 352 * to handle NX faults, means IRQ can be freed only 353 * after the deallocate window hcall is returned. 354 * So once the window is closed with deallocate hcall before 355 * the IRQ is freed, it can be assigned to new allocate 356 * hcall with the same fault IRQ by the hypervisor. It can 357 * result in setup IRQ fail for the new window since the 358 * same fault IRQ is not freed by the OS before. 359 */ 360 mutex_lock(&vas_pseries_mutex); 361 if (migration_in_progress) 362 rc = -EBUSY; 363 else 364 rc = allocate_setup_window(txwin, (u64 *)&domain[0], 365 cop_feat_caps->win_type); 366 mutex_unlock(&vas_pseries_mutex); 367 if (rc) 368 goto out; 369 370 /* 371 * Modify window and it is ready to use. 372 */ 373 rc = h_modify_vas_window(txwin); 374 if (!rc) 375 rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); 376 if (rc) 377 goto out_free; 378 379 txwin->win_type = cop_feat_caps->win_type; 380 mutex_lock(&vas_pseries_mutex); 381 /* 382 * Possible to lose the acquired credit with DLPAR core 383 * removal after the window is opened. So if there are any 384 * closed windows (means with lost credits), do not give new 385 * window to user space. New windows will be opened only 386 * after the existing windows are reopened when credits are 387 * available. 388 */ 389 if (!caps->nr_close_wins) { 390 list_add(&txwin->win_list, &caps->list); 391 caps->nr_open_windows++; 392 mutex_unlock(&vas_pseries_mutex); 393 vas_user_win_add_mm_context(&txwin->vas_win.task_ref); 394 return &txwin->vas_win; 395 } 396 mutex_unlock(&vas_pseries_mutex); 397 398 put_vas_user_win_ref(&txwin->vas_win.task_ref); 399 rc = -EBUSY; 400 pr_err("No credit is available to allocate window\n"); 401 402 out_free: 403 /* 404 * Window is not operational. Free IRQ before closing 405 * window so that do not have to hold mutex. 406 */ 407 free_irq_setup(txwin); 408 h_deallocate_vas_window(txwin->vas_win.winid); 409 out: 410 atomic_dec(&cop_feat_caps->nr_used_credits); 411 kfree(txwin); 412 return ERR_PTR(rc); 413 } 414 415 static u64 vas_paste_address(struct vas_window *vwin) 416 { 417 struct pseries_vas_window *win; 418 419 win = container_of(vwin, struct pseries_vas_window, vas_win); 420 return win->win_addr; 421 } 422 423 static int deallocate_free_window(struct pseries_vas_window *win) 424 { 425 int rc = 0; 426 427 /* 428 * The hypervisor waits for all requests including faults 429 * are processed before closing the window - Means all 430 * credits have to be returned. In the case of fault 431 * request, a credit is returned after OS issues 432 * H_GET_NX_FAULT hcall. 433 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW 434 * hcall. 435 */ 436 rc = h_deallocate_vas_window(win->vas_win.winid); 437 if (!rc) 438 free_irq_setup(win); 439 440 return rc; 441 } 442 443 static int vas_deallocate_window(struct vas_window *vwin) 444 { 445 struct pseries_vas_window *win; 446 struct vas_cop_feat_caps *caps; 447 int rc = 0; 448 449 if (!vwin) 450 return -EINVAL; 451 452 win = container_of(vwin, struct pseries_vas_window, vas_win); 453 454 /* Should not happen */ 455 if (win->win_type >= VAS_MAX_FEAT_TYPE) { 456 pr_err("Window (%u): Invalid window type %u\n", 457 vwin->winid, win->win_type); 458 return -EINVAL; 459 } 460 461 caps = &vascaps[win->win_type].caps; 462 mutex_lock(&vas_pseries_mutex); 463 /* 464 * VAS window is already closed in the hypervisor when 465 * lost the credit or with migration. So just remove the entry 466 * from the list, remove task references and free vas_window 467 * struct. 468 */ 469 if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && 470 !(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { 471 rc = deallocate_free_window(win); 472 if (rc) { 473 mutex_unlock(&vas_pseries_mutex); 474 return rc; 475 } 476 } else 477 vascaps[win->win_type].nr_close_wins--; 478 479 list_del(&win->win_list); 480 atomic_dec(&caps->nr_used_credits); 481 vascaps[win->win_type].nr_open_windows--; 482 mutex_unlock(&vas_pseries_mutex); 483 484 put_vas_user_win_ref(&vwin->task_ref); 485 mm_context_remove_vas_window(vwin->task_ref.mm); 486 487 kfree(win); 488 return 0; 489 } 490 491 static const struct vas_user_win_ops vops_pseries = { 492 .open_win = vas_allocate_window, /* Open and configure window */ 493 .paste_addr = vas_paste_address, /* To do copy/paste */ 494 .close_win = vas_deallocate_window, /* Close window */ 495 }; 496 497 /* 498 * Supporting only nx-gzip coprocessor type now, but this API code 499 * extended to other coprocessor types later. 500 */ 501 int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type, 502 const char *name) 503 { 504 if (!copypaste_feat) 505 return -ENOTSUPP; 506 507 return vas_register_coproc_api(mod, cop_type, name, &vops_pseries); 508 } 509 EXPORT_SYMBOL_GPL(vas_register_api_pseries); 510 511 void vas_unregister_api_pseries(void) 512 { 513 vas_unregister_coproc_api(); 514 } 515 EXPORT_SYMBOL_GPL(vas_unregister_api_pseries); 516 517 /* 518 * Get the specific capabilities based on the feature type. 519 * Right now supports GZIP default and GZIP QoS capabilities. 520 */ 521 static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, 522 struct hv_vas_cop_feat_caps *hv_caps) 523 { 524 struct vas_cop_feat_caps *caps; 525 struct vas_caps *vcaps; 526 int rc = 0; 527 528 vcaps = &vascaps[type]; 529 memset(vcaps, 0, sizeof(*vcaps)); 530 INIT_LIST_HEAD(&vcaps->list); 531 532 vcaps->feat = feat; 533 caps = &vcaps->caps; 534 535 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, 536 (u64)virt_to_phys(hv_caps)); 537 if (rc) 538 return rc; 539 540 caps->user_mode = hv_caps->user_mode; 541 if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) { 542 pr_err("User space COPY/PASTE is not supported\n"); 543 return -ENOTSUPP; 544 } 545 546 caps->descriptor = be64_to_cpu(hv_caps->descriptor); 547 caps->win_type = hv_caps->win_type; 548 if (caps->win_type >= VAS_MAX_FEAT_TYPE) { 549 pr_err("Unsupported window type %u\n", caps->win_type); 550 return -EINVAL; 551 } 552 caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); 553 caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); 554 atomic_set(&caps->nr_total_credits, 555 be16_to_cpu(hv_caps->target_lpar_creds)); 556 if (feat == VAS_GZIP_DEF_FEAT) { 557 caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); 558 559 if (caps->max_win_creds < DEF_WIN_CREDS) { 560 pr_err("Window creds(%u) > max allowed window creds(%u)\n", 561 DEF_WIN_CREDS, caps->max_win_creds); 562 return -EINVAL; 563 } 564 } 565 566 rc = sysfs_add_vas_caps(caps); 567 if (rc) 568 return rc; 569 570 copypaste_feat = true; 571 572 return 0; 573 } 574 575 /* 576 * VAS windows can be closed due to lost credits when the core is 577 * removed. So reopen them if credits are available due to DLPAR 578 * core add and set the window active status. When NX sees the page 579 * fault on the unmapped paste address, the kernel handles the fault 580 * by setting the remapping to new paste address if the window is 581 * active. 582 */ 583 static int reconfig_open_windows(struct vas_caps *vcaps, int creds, 584 bool migrate) 585 { 586 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; 587 struct vas_cop_feat_caps *caps = &vcaps->caps; 588 struct pseries_vas_window *win = NULL, *tmp; 589 int rc, mv_ents = 0; 590 int flag; 591 592 /* 593 * Nothing to do if there are no closed windows. 594 */ 595 if (!vcaps->nr_close_wins) 596 return 0; 597 598 /* 599 * For the core removal, the hypervisor reduces the credits 600 * assigned to the LPAR and the kernel closes VAS windows 601 * in the hypervisor depends on reduced credits. The kernel 602 * uses LIFO (the last windows that are opened will be closed 603 * first) and expects to open in the same order when credits 604 * are available. 605 * For example, 40 windows are closed when the LPAR lost 2 cores 606 * (dedicated). If 1 core is added, this LPAR can have 20 more 607 * credits. It means the kernel can reopen 20 windows. So move 608 * 20 entries in the VAS windows lost and reopen next 20 windows. 609 * For partition migration, reopen all windows that are closed 610 * during resume. 611 */ 612 if ((vcaps->nr_close_wins > creds) && !migrate) 613 mv_ents = vcaps->nr_close_wins - creds; 614 615 list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) { 616 if (!mv_ents) 617 break; 618 619 mv_ents--; 620 } 621 622 /* 623 * Open windows if they are closed only with migration or 624 * DLPAR (lost credit) before. 625 */ 626 if (migrate) 627 flag = VAS_WIN_MIGRATE_CLOSE; 628 else 629 flag = VAS_WIN_NO_CRED_CLOSE; 630 631 list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) { 632 /* 633 * This window is closed with DLPAR and migration events. 634 * So reopen the window with the last event. 635 * The user space is not suspended with the current 636 * migration notifier. So the user space can issue DLPAR 637 * CPU hotplug while migration in progress. In this case 638 * this window will be opened with the last event. 639 */ 640 if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && 641 (win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { 642 win->vas_win.status &= ~flag; 643 continue; 644 } 645 646 /* 647 * Nothing to do on this window if it is not closed 648 * with this flag 649 */ 650 if (!(win->vas_win.status & flag)) 651 continue; 652 653 rc = allocate_setup_window(win, (u64 *)&domain[0], 654 caps->win_type); 655 if (rc) 656 return rc; 657 658 rc = h_modify_vas_window(win); 659 if (rc) 660 goto out; 661 662 mutex_lock(&win->vas_win.task_ref.mmap_mutex); 663 /* 664 * Set window status to active 665 */ 666 win->vas_win.status &= ~flag; 667 mutex_unlock(&win->vas_win.task_ref.mmap_mutex); 668 win->win_type = caps->win_type; 669 if (!--vcaps->nr_close_wins) 670 break; 671 } 672 673 return 0; 674 out: 675 /* 676 * Window modify HCALL failed. So close the window to the 677 * hypervisor and return. 678 */ 679 free_irq_setup(win); 680 h_deallocate_vas_window(win->vas_win.winid); 681 return rc; 682 } 683 684 /* 685 * The hypervisor reduces the available credits if the LPAR lost core. It 686 * means the excessive windows should not be active and the user space 687 * should not be using these windows to send compression requests to NX. 688 * So the kernel closes the excessive windows and unmap the paste address 689 * such that the user space receives paste instruction failure. Then up to 690 * the user space to fall back to SW compression and manage with the 691 * existing windows. 692 */ 693 static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds, 694 bool migrate) 695 { 696 struct pseries_vas_window *win, *tmp; 697 struct vas_user_win_ref *task_ref; 698 struct vm_area_struct *vma; 699 int rc = 0, flag; 700 701 if (migrate) 702 flag = VAS_WIN_MIGRATE_CLOSE; 703 else 704 flag = VAS_WIN_NO_CRED_CLOSE; 705 706 list_for_each_entry_safe(win, tmp, &vcap->list, win_list) { 707 /* 708 * This window is already closed due to lost credit 709 * or for migration before. Go for next window. 710 * For migration, nothing to do since this window 711 * closed for DLPAR and will be reopened even on 712 * the destination system with other DLPAR operation. 713 */ 714 if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) || 715 (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) { 716 win->vas_win.status |= flag; 717 continue; 718 } 719 720 task_ref = &win->vas_win.task_ref; 721 mutex_lock(&task_ref->mmap_mutex); 722 vma = task_ref->vma; 723 /* 724 * Number of available credits are reduced, So select 725 * and close windows. 726 */ 727 win->vas_win.status |= flag; 728 729 mmap_write_lock(task_ref->mm); 730 /* 731 * vma is set in the original mapping. But this mapping 732 * is done with mmap() after the window is opened with ioctl. 733 * so we may not see the original mapping if the core remove 734 * is done before the original mmap() and after the ioctl. 735 */ 736 if (vma) 737 zap_page_range(vma, vma->vm_start, 738 vma->vm_end - vma->vm_start); 739 740 mmap_write_unlock(task_ref->mm); 741 mutex_unlock(&task_ref->mmap_mutex); 742 /* 743 * Close VAS window in the hypervisor, but do not 744 * free vas_window struct since it may be reused 745 * when the credit is available later (DLPAR with 746 * adding cores). This struct will be used 747 * later when the process issued with close(FD). 748 */ 749 rc = deallocate_free_window(win); 750 /* 751 * This failure is from the hypervisor. 752 * No way to stop migration for these failures. 753 * So ignore error and continue closing other windows. 754 */ 755 if (rc && !migrate) 756 return rc; 757 758 vcap->nr_close_wins++; 759 760 /* 761 * For migration, do not depend on lpar_creds in case if 762 * mismatch with the hypervisor value (should not happen). 763 * So close all active windows in the list and will be 764 * reopened windows based on the new lpar_creds on the 765 * destination system during resume. 766 */ 767 if (!migrate && !--excess_creds) 768 break; 769 } 770 771 return 0; 772 } 773 774 /* 775 * Get new VAS capabilities when the core add/removal configuration 776 * changes. Reconfig window configurations based on the credits 777 * availability from this new capabilities. 778 */ 779 int vas_reconfig_capabilties(u8 type, int new_nr_creds) 780 { 781 struct vas_cop_feat_caps *caps; 782 int old_nr_creds; 783 struct vas_caps *vcaps; 784 int rc = 0, nr_active_wins; 785 786 if (type >= VAS_MAX_FEAT_TYPE) { 787 pr_err("Invalid credit type %d\n", type); 788 return -EINVAL; 789 } 790 791 vcaps = &vascaps[type]; 792 caps = &vcaps->caps; 793 794 mutex_lock(&vas_pseries_mutex); 795 796 old_nr_creds = atomic_read(&caps->nr_total_credits); 797 798 atomic_set(&caps->nr_total_credits, new_nr_creds); 799 /* 800 * The total number of available credits may be decreased or 801 * increased with DLPAR operation. Means some windows have to be 802 * closed / reopened. Hold the vas_pseries_mutex so that the 803 * user space can not open new windows. 804 */ 805 if (old_nr_creds < new_nr_creds) { 806 /* 807 * If the existing target credits is less than the new 808 * target, reopen windows if they are closed due to 809 * the previous DLPAR (core removal). 810 */ 811 rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds, 812 false); 813 } else { 814 /* 815 * # active windows is more than new LPAR available 816 * credits. So close the excessive windows. 817 * On pseries, each window will have 1 credit. 818 */ 819 nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins; 820 if (nr_active_wins > new_nr_creds) 821 rc = reconfig_close_windows(vcaps, 822 nr_active_wins - new_nr_creds, 823 false); 824 } 825 826 mutex_unlock(&vas_pseries_mutex); 827 return rc; 828 } 829 /* 830 * Total number of default credits available (target_credits) 831 * in LPAR depends on number of cores configured. It varies based on 832 * whether processors are in shared mode or dedicated mode. 833 * Get the notifier when CPU configuration is changed with DLPAR 834 * operation so that get the new target_credits (vas default capabilities) 835 * and then update the existing windows usage if needed. 836 */ 837 static int pseries_vas_notifier(struct notifier_block *nb, 838 unsigned long action, void *data) 839 { 840 struct of_reconfig_data *rd = data; 841 struct device_node *dn = rd->dn; 842 const __be32 *intserv = NULL; 843 int new_nr_creds, len, rc = 0; 844 845 if ((action == OF_RECONFIG_ATTACH_NODE) || 846 (action == OF_RECONFIG_DETACH_NODE)) 847 intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", 848 &len); 849 /* 850 * Processor config is not changed 851 */ 852 if (!intserv) 853 return NOTIFY_OK; 854 855 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 856 vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat, 857 (u64)virt_to_phys(&hv_cop_caps)); 858 if (!rc) { 859 new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); 860 rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, 861 new_nr_creds); 862 } 863 864 if (rc) 865 pr_err("Failed reconfig VAS capabilities with DLPAR\n"); 866 867 return rc; 868 } 869 870 static struct notifier_block pseries_vas_nb = { 871 .notifier_call = pseries_vas_notifier, 872 }; 873 874 /* 875 * For LPM, all windows have to be closed on the source partition 876 * before migration and reopen them on the destination partition 877 * after migration. So closing windows during suspend and 878 * reopen them during resume. 879 */ 880 int vas_migration_handler(int action) 881 { 882 struct vas_cop_feat_caps *caps; 883 int old_nr_creds, new_nr_creds = 0; 884 struct vas_caps *vcaps; 885 int i, rc = 0; 886 887 /* 888 * NX-GZIP is not enabled. Nothing to do for migration. 889 */ 890 if (!copypaste_feat) 891 return rc; 892 893 mutex_lock(&vas_pseries_mutex); 894 895 if (action == VAS_SUSPEND) 896 migration_in_progress = true; 897 else 898 migration_in_progress = false; 899 900 for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) { 901 vcaps = &vascaps[i]; 902 caps = &vcaps->caps; 903 old_nr_creds = atomic_read(&caps->nr_total_credits); 904 905 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 906 vcaps->feat, 907 (u64)virt_to_phys(&hv_cop_caps)); 908 if (!rc) { 909 new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); 910 /* 911 * Should not happen. But incase print messages, close 912 * all windows in the list during suspend and reopen 913 * windows based on new lpar_creds on the destination 914 * system. 915 */ 916 if (old_nr_creds != new_nr_creds) { 917 pr_err("Target credits mismatch with the hypervisor\n"); 918 pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n", 919 action, old_nr_creds, new_nr_creds); 920 pr_err("Used creds: %d, Active creds: %d\n", 921 atomic_read(&caps->nr_used_credits), 922 vcaps->nr_open_windows - vcaps->nr_close_wins); 923 } 924 } else { 925 pr_err("state(%d): Get VAS capabilities failed with %d\n", 926 action, rc); 927 /* 928 * We can not stop migration with the current lpm 929 * implementation. So continue closing all windows in 930 * the list (during suspend) and return without 931 * opening windows (during resume) if VAS capabilities 932 * HCALL failed. 933 */ 934 if (action == VAS_RESUME) 935 goto out; 936 } 937 938 switch (action) { 939 case VAS_SUSPEND: 940 rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows, 941 true); 942 break; 943 case VAS_RESUME: 944 atomic_set(&caps->nr_total_credits, new_nr_creds); 945 rc = reconfig_open_windows(vcaps, new_nr_creds, true); 946 break; 947 default: 948 /* should not happen */ 949 pr_err("Invalid migration action %d\n", action); 950 rc = -EINVAL; 951 goto out; 952 } 953 954 /* 955 * Ignore errors during suspend and return for resume. 956 */ 957 if (rc && (action == VAS_RESUME)) 958 goto out; 959 } 960 961 out: 962 mutex_unlock(&vas_pseries_mutex); 963 return rc; 964 } 965 966 static int __init pseries_vas_init(void) 967 { 968 struct hv_vas_all_caps *hv_caps; 969 int rc = 0; 970 971 /* 972 * Linux supports user space COPY/PASTE only with Radix 973 */ 974 if (!radix_enabled()) { 975 pr_err("API is supported only with radix page tables\n"); 976 return -ENOTSUPP; 977 } 978 979 hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); 980 if (!hv_caps) 981 return -ENOMEM; 982 /* 983 * Get VAS overall capabilities by passing 0 to feature type. 984 */ 985 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0, 986 (u64)virt_to_phys(hv_caps)); 987 if (rc) 988 goto out; 989 990 caps_all.descriptor = be64_to_cpu(hv_caps->descriptor); 991 caps_all.feat_type = be64_to_cpu(hv_caps->feat_type); 992 993 sysfs_pseries_vas_init(&caps_all); 994 995 /* 996 * QOS capabilities available 997 */ 998 if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) { 999 rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT, 1000 VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps); 1001 1002 if (rc) 1003 goto out; 1004 } 1005 /* 1006 * Default capabilities available 1007 */ 1008 if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) 1009 rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT, 1010 VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps); 1011 1012 if (!rc && copypaste_feat) { 1013 if (firmware_has_feature(FW_FEATURE_LPAR)) 1014 of_reconfig_notifier_register(&pseries_vas_nb); 1015 1016 pr_info("GZIP feature is available\n"); 1017 } else { 1018 /* 1019 * Should not happen, but only when get default 1020 * capabilities HCALL failed. So disable copy paste 1021 * feature. 1022 */ 1023 copypaste_feat = false; 1024 } 1025 1026 out: 1027 kfree(hv_caps); 1028 return rc; 1029 } 1030 machine_device_initcall(pseries, pseries_vas_init); 1031