1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright 2020-21 IBM Corp. 4 */ 5 6 #define pr_fmt(fmt) "vas: " fmt 7 8 #include <linux/module.h> 9 #include <linux/kernel.h> 10 #include <linux/export.h> 11 #include <linux/types.h> 12 #include <linux/delay.h> 13 #include <linux/slab.h> 14 #include <linux/interrupt.h> 15 #include <linux/irqdomain.h> 16 #include <asm/machdep.h> 17 #include <asm/hvcall.h> 18 #include <asm/plpar_wrappers.h> 19 #include <asm/vas.h> 20 #include "vas.h" 21 22 #define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul 23 #define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul 24 /* The hypervisor allows one credit per window right now */ 25 #define DEF_WIN_CREDS 1 26 27 static struct vas_all_caps caps_all; 28 static bool copypaste_feat; 29 static struct hv_vas_cop_feat_caps hv_cop_caps; 30 31 static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; 32 static DEFINE_MUTEX(vas_pseries_mutex); 33 static bool migration_in_progress; 34 35 static long hcall_return_busy_check(long rc) 36 { 37 /* Check if we are stalled for some time */ 38 if (H_IS_LONG_BUSY(rc)) { 39 msleep(get_longbusy_msecs(rc)); 40 rc = H_BUSY; 41 } else if (rc == H_BUSY) { 42 cond_resched(); 43 } 44 45 return rc; 46 } 47 48 /* 49 * Allocate VAS window hcall 50 */ 51 static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain, 52 u8 wintype, u16 credits) 53 { 54 long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; 55 long rc; 56 57 do { 58 rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype, 59 credits, domain[0], domain[1], domain[2], 60 domain[3], domain[4], domain[5]); 61 62 rc = hcall_return_busy_check(rc); 63 } while (rc == H_BUSY); 64 65 if (rc == H_SUCCESS) { 66 if (win->win_addr == VAS_INVALID_WIN_ADDRESS) { 67 pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n"); 68 return -ENOTSUPP; 69 } 70 win->vas_win.winid = retbuf[0]; 71 win->win_addr = retbuf[1]; 72 win->complete_irq = retbuf[2]; 73 win->fault_irq = retbuf[3]; 74 return 0; 75 } 76 77 pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n", 78 rc, wintype, credits); 79 80 return -EIO; 81 } 82 83 /* 84 * Deallocate VAS window hcall. 85 */ 86 static int h_deallocate_vas_window(u64 winid) 87 { 88 long rc; 89 90 do { 91 rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid); 92 93 rc = hcall_return_busy_check(rc); 94 } while (rc == H_BUSY); 95 96 if (rc == H_SUCCESS) 97 return 0; 98 99 pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n", 100 rc, winid); 101 return -EIO; 102 } 103 104 /* 105 * Modify VAS window. 106 * After the window is opened with allocate window hcall, configure it 107 * with flags and LPAR PID before using. 108 */ 109 static int h_modify_vas_window(struct pseries_vas_window *win) 110 { 111 long rc; 112 113 /* 114 * AMR value is not supported in Linux VAS implementation. 115 * The hypervisor ignores it if 0 is passed. 116 */ 117 do { 118 rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, 119 win->vas_win.winid, win->pid, 0, 120 VAS_MOD_WIN_FLAGS, 0); 121 122 rc = hcall_return_busy_check(rc); 123 } while (rc == H_BUSY); 124 125 if (rc == H_SUCCESS) 126 return 0; 127 128 pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n", 129 rc, win->vas_win.winid, win->pid); 130 return -EIO; 131 } 132 133 /* 134 * This hcall is used to determine the capabilities from the hypervisor. 135 * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES 136 * @query_type: If 0 is passed, the hypervisor returns the overall 137 * capabilities which provides all feature(s) that are 138 * available. Then query the hypervisor to get the 139 * corresponding capabilities for the specific feature. 140 * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS 141 * and VAS GZIP Default capabilities. 142 * H_QUERY_NX_CAPABILITIES provides NX GZIP 143 * capabilities. 144 * @result: Return buffer to save capabilities. 145 */ 146 int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) 147 { 148 long rc; 149 150 rc = plpar_hcall_norets(hcall, query_type, result); 151 152 if (rc == H_SUCCESS) 153 return 0; 154 155 /* H_FUNCTION means HV does not support VAS so don't print an error */ 156 if (rc != H_FUNCTION) { 157 pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n", 158 (hcall == H_QUERY_VAS_CAPABILITIES) ? 159 "H_QUERY_VAS_CAPABILITIES" : 160 "H_QUERY_NX_CAPABILITIES", 161 rc, query_type, result); 162 } 163 164 return -EIO; 165 } 166 EXPORT_SYMBOL_GPL(h_query_vas_capabilities); 167 168 /* 169 * hcall to get fault CRB from the hypervisor. 170 */ 171 static int h_get_nx_fault(u32 winid, u64 buffer) 172 { 173 long rc; 174 175 rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer); 176 177 if (rc == H_SUCCESS) 178 return 0; 179 180 pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n", 181 rc, winid, buffer); 182 return -EIO; 183 184 } 185 186 /* 187 * Handle the fault interrupt. 188 * When the fault interrupt is received for each window, query the 189 * hypervisor to get the fault CRB on the specific fault. Then 190 * process the CRB by updating CSB or send signal if the user space 191 * CSB is invalid. 192 * Note: The hypervisor forwards an interrupt for each fault request. 193 * So one fault CRB to process for each H_GET_NX_FAULT hcall. 194 */ 195 static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) 196 { 197 struct pseries_vas_window *txwin = data; 198 struct coprocessor_request_block crb; 199 struct vas_user_win_ref *tsk_ref; 200 int rc; 201 202 rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb)); 203 if (!rc) { 204 tsk_ref = &txwin->vas_win.task_ref; 205 vas_dump_crb(&crb); 206 vas_update_csb(&crb, tsk_ref); 207 } 208 209 return IRQ_HANDLED; 210 } 211 212 /* 213 * Allocate window and setup IRQ mapping. 214 */ 215 static int allocate_setup_window(struct pseries_vas_window *txwin, 216 u64 *domain, u8 wintype) 217 { 218 int rc; 219 220 rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS); 221 if (rc) 222 return rc; 223 /* 224 * On PowerVM, the hypervisor setup and forwards the fault 225 * interrupt per window. So the IRQ setup and fault handling 226 * will be done for each open window separately. 227 */ 228 txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq); 229 if (!txwin->fault_virq) { 230 pr_err("Failed irq mapping %d\n", txwin->fault_irq); 231 rc = -EINVAL; 232 goto out_win; 233 } 234 235 txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d", 236 txwin->vas_win.winid); 237 if (!txwin->name) { 238 rc = -ENOMEM; 239 goto out_irq; 240 } 241 242 rc = request_threaded_irq(txwin->fault_virq, NULL, 243 pseries_vas_fault_thread_fn, IRQF_ONESHOT, 244 txwin->name, txwin); 245 if (rc) { 246 pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n", 247 txwin->vas_win.winid, txwin->fault_virq, rc); 248 goto out_free; 249 } 250 251 txwin->vas_win.wcreds_max = DEF_WIN_CREDS; 252 253 return 0; 254 out_free: 255 kfree(txwin->name); 256 out_irq: 257 irq_dispose_mapping(txwin->fault_virq); 258 out_win: 259 h_deallocate_vas_window(txwin->vas_win.winid); 260 return rc; 261 } 262 263 static inline void free_irq_setup(struct pseries_vas_window *txwin) 264 { 265 free_irq(txwin->fault_virq, txwin); 266 kfree(txwin->name); 267 irq_dispose_mapping(txwin->fault_virq); 268 } 269 270 static struct vas_window *vas_allocate_window(int vas_id, u64 flags, 271 enum vas_cop_type cop_type) 272 { 273 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; 274 struct vas_cop_feat_caps *cop_feat_caps; 275 struct vas_caps *caps; 276 struct pseries_vas_window *txwin; 277 int rc; 278 279 txwin = kzalloc(sizeof(*txwin), GFP_KERNEL); 280 if (!txwin) 281 return ERR_PTR(-ENOMEM); 282 283 /* 284 * A VAS window can have many credits which means that many 285 * requests can be issued simultaneously. But the hypervisor 286 * restricts one credit per window. 287 * The hypervisor introduces 2 different types of credits: 288 * Default credit type (Uses normal priority FIFO): 289 * A limited number of credits are assigned to partitions 290 * based on processor entitlement. But these credits may be 291 * over-committed on a system depends on whether the CPUs 292 * are in shared or dedicated modes - that is, more requests 293 * may be issued across the system than NX can service at 294 * once which can result in paste command failure (RMA_busy). 295 * Then the process has to resend requests or fall-back to 296 * SW compression. 297 * Quality of Service (QoS) credit type (Uses high priority FIFO): 298 * To avoid NX HW contention, the system admins can assign 299 * QoS credits for each LPAR so that this partition is 300 * guaranteed access to NX resources. These credits are 301 * assigned to partitions via the HMC. 302 * Refer PAPR for more information. 303 * 304 * Allocate window with QoS credits if user requested. Otherwise 305 * default credits are used. 306 */ 307 if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT) 308 caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE]; 309 else 310 caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE]; 311 312 cop_feat_caps = &caps->caps; 313 314 if (atomic_inc_return(&cop_feat_caps->nr_used_credits) > 315 atomic_read(&cop_feat_caps->nr_total_credits)) { 316 pr_err("Credits are not available to allocate window\n"); 317 rc = -EINVAL; 318 goto out; 319 } 320 321 if (vas_id == -1) { 322 /* 323 * The user space is requesting to allocate a window on 324 * a VAS instance where the process is executing. 325 * On PowerVM, domain values are passed to the hypervisor 326 * to select VAS instance. Useful if the process is 327 * affinity to NUMA node. 328 * The hypervisor selects VAS instance if 329 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values. 330 * The h_allocate_vas_window hcall is defined to take a 331 * domain values as specified by h_home_node_associativity, 332 * So no unpacking needs to be done. 333 */ 334 rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain, 335 VPHN_FLAG_VCPU, smp_processor_id()); 336 if (rc != H_SUCCESS) { 337 pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc); 338 goto out; 339 } 340 } 341 342 txwin->pid = mfspr(SPRN_PID); 343 344 /* 345 * Allocate / Deallocate window hcalls and setup / free IRQs 346 * have to be protected with mutex. 347 * Open VAS window: Allocate window hcall and setup IRQ 348 * Close VAS window: Deallocate window hcall and free IRQ 349 * The hypervisor waits until all NX requests are 350 * completed before closing the window. So expects OS 351 * to handle NX faults, means IRQ can be freed only 352 * after the deallocate window hcall is returned. 353 * So once the window is closed with deallocate hcall before 354 * the IRQ is freed, it can be assigned to new allocate 355 * hcall with the same fault IRQ by the hypervisor. It can 356 * result in setup IRQ fail for the new window since the 357 * same fault IRQ is not freed by the OS before. 358 */ 359 mutex_lock(&vas_pseries_mutex); 360 if (migration_in_progress) 361 rc = -EBUSY; 362 else 363 rc = allocate_setup_window(txwin, (u64 *)&domain[0], 364 cop_feat_caps->win_type); 365 mutex_unlock(&vas_pseries_mutex); 366 if (rc) 367 goto out; 368 369 /* 370 * Modify window and it is ready to use. 371 */ 372 rc = h_modify_vas_window(txwin); 373 if (!rc) 374 rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); 375 if (rc) 376 goto out_free; 377 378 txwin->win_type = cop_feat_caps->win_type; 379 mutex_lock(&vas_pseries_mutex); 380 /* 381 * Possible to lose the acquired credit with DLPAR core 382 * removal after the window is opened. So if there are any 383 * closed windows (means with lost credits), do not give new 384 * window to user space. New windows will be opened only 385 * after the existing windows are reopened when credits are 386 * available. 387 */ 388 if (!caps->nr_close_wins) { 389 list_add(&txwin->win_list, &caps->list); 390 caps->nr_open_windows++; 391 mutex_unlock(&vas_pseries_mutex); 392 vas_user_win_add_mm_context(&txwin->vas_win.task_ref); 393 return &txwin->vas_win; 394 } 395 mutex_unlock(&vas_pseries_mutex); 396 397 put_vas_user_win_ref(&txwin->vas_win.task_ref); 398 rc = -EBUSY; 399 pr_err("No credit is available to allocate window\n"); 400 401 out_free: 402 /* 403 * Window is not operational. Free IRQ before closing 404 * window so that do not have to hold mutex. 405 */ 406 free_irq_setup(txwin); 407 h_deallocate_vas_window(txwin->vas_win.winid); 408 out: 409 atomic_dec(&cop_feat_caps->nr_used_credits); 410 kfree(txwin); 411 return ERR_PTR(rc); 412 } 413 414 static u64 vas_paste_address(struct vas_window *vwin) 415 { 416 struct pseries_vas_window *win; 417 418 win = container_of(vwin, struct pseries_vas_window, vas_win); 419 return win->win_addr; 420 } 421 422 static int deallocate_free_window(struct pseries_vas_window *win) 423 { 424 int rc = 0; 425 426 /* 427 * The hypervisor waits for all requests including faults 428 * are processed before closing the window - Means all 429 * credits have to be returned. In the case of fault 430 * request, a credit is returned after OS issues 431 * H_GET_NX_FAULT hcall. 432 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW 433 * hcall. 434 */ 435 rc = h_deallocate_vas_window(win->vas_win.winid); 436 if (!rc) 437 free_irq_setup(win); 438 439 return rc; 440 } 441 442 static int vas_deallocate_window(struct vas_window *vwin) 443 { 444 struct pseries_vas_window *win; 445 struct vas_cop_feat_caps *caps; 446 int rc = 0; 447 448 if (!vwin) 449 return -EINVAL; 450 451 win = container_of(vwin, struct pseries_vas_window, vas_win); 452 453 /* Should not happen */ 454 if (win->win_type >= VAS_MAX_FEAT_TYPE) { 455 pr_err("Window (%u): Invalid window type %u\n", 456 vwin->winid, win->win_type); 457 return -EINVAL; 458 } 459 460 caps = &vascaps[win->win_type].caps; 461 mutex_lock(&vas_pseries_mutex); 462 /* 463 * VAS window is already closed in the hypervisor when 464 * lost the credit or with migration. So just remove the entry 465 * from the list, remove task references and free vas_window 466 * struct. 467 */ 468 if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && 469 !(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { 470 rc = deallocate_free_window(win); 471 if (rc) { 472 mutex_unlock(&vas_pseries_mutex); 473 return rc; 474 } 475 } else 476 vascaps[win->win_type].nr_close_wins--; 477 478 list_del(&win->win_list); 479 atomic_dec(&caps->nr_used_credits); 480 vascaps[win->win_type].nr_open_windows--; 481 mutex_unlock(&vas_pseries_mutex); 482 483 put_vas_user_win_ref(&vwin->task_ref); 484 mm_context_remove_vas_window(vwin->task_ref.mm); 485 486 kfree(win); 487 return 0; 488 } 489 490 static const struct vas_user_win_ops vops_pseries = { 491 .open_win = vas_allocate_window, /* Open and configure window */ 492 .paste_addr = vas_paste_address, /* To do copy/paste */ 493 .close_win = vas_deallocate_window, /* Close window */ 494 }; 495 496 /* 497 * Supporting only nx-gzip coprocessor type now, but this API code 498 * extended to other coprocessor types later. 499 */ 500 int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type, 501 const char *name) 502 { 503 int rc; 504 505 if (!copypaste_feat) 506 return -ENOTSUPP; 507 508 rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries); 509 510 return rc; 511 } 512 EXPORT_SYMBOL_GPL(vas_register_api_pseries); 513 514 void vas_unregister_api_pseries(void) 515 { 516 vas_unregister_coproc_api(); 517 } 518 EXPORT_SYMBOL_GPL(vas_unregister_api_pseries); 519 520 /* 521 * Get the specific capabilities based on the feature type. 522 * Right now supports GZIP default and GZIP QoS capabilities. 523 */ 524 static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, 525 struct hv_vas_cop_feat_caps *hv_caps) 526 { 527 struct vas_cop_feat_caps *caps; 528 struct vas_caps *vcaps; 529 int rc = 0; 530 531 vcaps = &vascaps[type]; 532 memset(vcaps, 0, sizeof(*vcaps)); 533 INIT_LIST_HEAD(&vcaps->list); 534 535 vcaps->feat = feat; 536 caps = &vcaps->caps; 537 538 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, 539 (u64)virt_to_phys(hv_caps)); 540 if (rc) 541 return rc; 542 543 caps->user_mode = hv_caps->user_mode; 544 if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) { 545 pr_err("User space COPY/PASTE is not supported\n"); 546 return -ENOTSUPP; 547 } 548 549 caps->descriptor = be64_to_cpu(hv_caps->descriptor); 550 caps->win_type = hv_caps->win_type; 551 if (caps->win_type >= VAS_MAX_FEAT_TYPE) { 552 pr_err("Unsupported window type %u\n", caps->win_type); 553 return -EINVAL; 554 } 555 caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); 556 caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); 557 atomic_set(&caps->nr_total_credits, 558 be16_to_cpu(hv_caps->target_lpar_creds)); 559 if (feat == VAS_GZIP_DEF_FEAT) { 560 caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); 561 562 if (caps->max_win_creds < DEF_WIN_CREDS) { 563 pr_err("Window creds(%u) > max allowed window creds(%u)\n", 564 DEF_WIN_CREDS, caps->max_win_creds); 565 return -EINVAL; 566 } 567 } 568 569 rc = sysfs_add_vas_caps(caps); 570 if (rc) 571 return rc; 572 573 copypaste_feat = true; 574 575 return 0; 576 } 577 578 /* 579 * VAS windows can be closed due to lost credits when the core is 580 * removed. So reopen them if credits are available due to DLPAR 581 * core add and set the window active status. When NX sees the page 582 * fault on the unmapped paste address, the kernel handles the fault 583 * by setting the remapping to new paste address if the window is 584 * active. 585 */ 586 static int reconfig_open_windows(struct vas_caps *vcaps, int creds, 587 bool migrate) 588 { 589 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; 590 struct vas_cop_feat_caps *caps = &vcaps->caps; 591 struct pseries_vas_window *win = NULL, *tmp; 592 int rc, mv_ents = 0; 593 int flag; 594 595 /* 596 * Nothing to do if there are no closed windows. 597 */ 598 if (!vcaps->nr_close_wins) 599 return 0; 600 601 /* 602 * For the core removal, the hypervisor reduces the credits 603 * assigned to the LPAR and the kernel closes VAS windows 604 * in the hypervisor depends on reduced credits. The kernel 605 * uses LIFO (the last windows that are opened will be closed 606 * first) and expects to open in the same order when credits 607 * are available. 608 * For example, 40 windows are closed when the LPAR lost 2 cores 609 * (dedicated). If 1 core is added, this LPAR can have 20 more 610 * credits. It means the kernel can reopen 20 windows. So move 611 * 20 entries in the VAS windows lost and reopen next 20 windows. 612 * For partition migration, reopen all windows that are closed 613 * during resume. 614 */ 615 if ((vcaps->nr_close_wins > creds) && !migrate) 616 mv_ents = vcaps->nr_close_wins - creds; 617 618 list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) { 619 if (!mv_ents) 620 break; 621 622 mv_ents--; 623 } 624 625 /* 626 * Open windows if they are closed only with migration or 627 * DLPAR (lost credit) before. 628 */ 629 if (migrate) 630 flag = VAS_WIN_MIGRATE_CLOSE; 631 else 632 flag = VAS_WIN_NO_CRED_CLOSE; 633 634 list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) { 635 /* 636 * This window is closed with DLPAR and migration events. 637 * So reopen the window with the last event. 638 * The user space is not suspended with the current 639 * migration notifier. So the user space can issue DLPAR 640 * CPU hotplug while migration in progress. In this case 641 * this window will be opened with the last event. 642 */ 643 if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && 644 (win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { 645 win->vas_win.status &= ~flag; 646 continue; 647 } 648 649 /* 650 * Nothing to do on this window if it is not closed 651 * with this flag 652 */ 653 if (!(win->vas_win.status & flag)) 654 continue; 655 656 rc = allocate_setup_window(win, (u64 *)&domain[0], 657 caps->win_type); 658 if (rc) 659 return rc; 660 661 rc = h_modify_vas_window(win); 662 if (rc) 663 goto out; 664 665 mutex_lock(&win->vas_win.task_ref.mmap_mutex); 666 /* 667 * Set window status to active 668 */ 669 win->vas_win.status &= ~flag; 670 mutex_unlock(&win->vas_win.task_ref.mmap_mutex); 671 win->win_type = caps->win_type; 672 if (!--vcaps->nr_close_wins) 673 break; 674 } 675 676 return 0; 677 out: 678 /* 679 * Window modify HCALL failed. So close the window to the 680 * hypervisor and return. 681 */ 682 free_irq_setup(win); 683 h_deallocate_vas_window(win->vas_win.winid); 684 return rc; 685 } 686 687 /* 688 * The hypervisor reduces the available credits if the LPAR lost core. It 689 * means the excessive windows should not be active and the user space 690 * should not be using these windows to send compression requests to NX. 691 * So the kernel closes the excessive windows and unmap the paste address 692 * such that the user space receives paste instruction failure. Then up to 693 * the user space to fall back to SW compression and manage with the 694 * existing windows. 695 */ 696 static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds, 697 bool migrate) 698 { 699 struct pseries_vas_window *win, *tmp; 700 struct vas_user_win_ref *task_ref; 701 struct vm_area_struct *vma; 702 int rc = 0, flag; 703 704 if (migrate) 705 flag = VAS_WIN_MIGRATE_CLOSE; 706 else 707 flag = VAS_WIN_NO_CRED_CLOSE; 708 709 list_for_each_entry_safe(win, tmp, &vcap->list, win_list) { 710 /* 711 * This window is already closed due to lost credit 712 * or for migration before. Go for next window. 713 * For migration, nothing to do since this window 714 * closed for DLPAR and will be reopened even on 715 * the destination system with other DLPAR operation. 716 */ 717 if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) || 718 (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) { 719 win->vas_win.status |= flag; 720 continue; 721 } 722 723 task_ref = &win->vas_win.task_ref; 724 mutex_lock(&task_ref->mmap_mutex); 725 vma = task_ref->vma; 726 /* 727 * Number of available credits are reduced, So select 728 * and close windows. 729 */ 730 win->vas_win.status |= flag; 731 732 mmap_write_lock(task_ref->mm); 733 /* 734 * vma is set in the original mapping. But this mapping 735 * is done with mmap() after the window is opened with ioctl. 736 * so we may not see the original mapping if the core remove 737 * is done before the original mmap() and after the ioctl. 738 */ 739 if (vma) 740 zap_page_range(vma, vma->vm_start, 741 vma->vm_end - vma->vm_start); 742 743 mmap_write_unlock(task_ref->mm); 744 mutex_unlock(&task_ref->mmap_mutex); 745 /* 746 * Close VAS window in the hypervisor, but do not 747 * free vas_window struct since it may be reused 748 * when the credit is available later (DLPAR with 749 * adding cores). This struct will be used 750 * later when the process issued with close(FD). 751 */ 752 rc = deallocate_free_window(win); 753 /* 754 * This failure is from the hypervisor. 755 * No way to stop migration for these failures. 756 * So ignore error and continue closing other windows. 757 */ 758 if (rc && !migrate) 759 return rc; 760 761 vcap->nr_close_wins++; 762 763 /* 764 * For migration, do not depend on lpar_creds in case if 765 * mismatch with the hypervisor value (should not happen). 766 * So close all active windows in the list and will be 767 * reopened windows based on the new lpar_creds on the 768 * destination system during resume. 769 */ 770 if (!migrate && !--excess_creds) 771 break; 772 } 773 774 return 0; 775 } 776 777 /* 778 * Get new VAS capabilities when the core add/removal configuration 779 * changes. Reconfig window configurations based on the credits 780 * availability from this new capabilities. 781 */ 782 int vas_reconfig_capabilties(u8 type, int new_nr_creds) 783 { 784 struct vas_cop_feat_caps *caps; 785 int old_nr_creds; 786 struct vas_caps *vcaps; 787 int rc = 0, nr_active_wins; 788 789 if (type >= VAS_MAX_FEAT_TYPE) { 790 pr_err("Invalid credit type %d\n", type); 791 return -EINVAL; 792 } 793 794 vcaps = &vascaps[type]; 795 caps = &vcaps->caps; 796 797 mutex_lock(&vas_pseries_mutex); 798 799 old_nr_creds = atomic_read(&caps->nr_total_credits); 800 801 atomic_set(&caps->nr_total_credits, new_nr_creds); 802 /* 803 * The total number of available credits may be decreased or 804 * increased with DLPAR operation. Means some windows have to be 805 * closed / reopened. Hold the vas_pseries_mutex so that the 806 * the user space can not open new windows. 807 */ 808 if (old_nr_creds < new_nr_creds) { 809 /* 810 * If the existing target credits is less than the new 811 * target, reopen windows if they are closed due to 812 * the previous DLPAR (core removal). 813 */ 814 rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds, 815 false); 816 } else { 817 /* 818 * # active windows is more than new LPAR available 819 * credits. So close the excessive windows. 820 * On pseries, each window will have 1 credit. 821 */ 822 nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins; 823 if (nr_active_wins > new_nr_creds) 824 rc = reconfig_close_windows(vcaps, 825 nr_active_wins - new_nr_creds, 826 false); 827 } 828 829 mutex_unlock(&vas_pseries_mutex); 830 return rc; 831 } 832 /* 833 * Total number of default credits available (target_credits) 834 * in LPAR depends on number of cores configured. It varies based on 835 * whether processors are in shared mode or dedicated mode. 836 * Get the notifier when CPU configuration is changed with DLPAR 837 * operation so that get the new target_credits (vas default capabilities) 838 * and then update the existing windows usage if needed. 839 */ 840 static int pseries_vas_notifier(struct notifier_block *nb, 841 unsigned long action, void *data) 842 { 843 struct of_reconfig_data *rd = data; 844 struct device_node *dn = rd->dn; 845 const __be32 *intserv = NULL; 846 int new_nr_creds, len, rc = 0; 847 848 if ((action == OF_RECONFIG_ATTACH_NODE) || 849 (action == OF_RECONFIG_DETACH_NODE)) 850 intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", 851 &len); 852 /* 853 * Processor config is not changed 854 */ 855 if (!intserv) 856 return NOTIFY_OK; 857 858 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 859 vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat, 860 (u64)virt_to_phys(&hv_cop_caps)); 861 if (!rc) { 862 new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); 863 rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, 864 new_nr_creds); 865 } 866 867 if (rc) 868 pr_err("Failed reconfig VAS capabilities with DLPAR\n"); 869 870 return rc; 871 } 872 873 static struct notifier_block pseries_vas_nb = { 874 .notifier_call = pseries_vas_notifier, 875 }; 876 877 /* 878 * For LPM, all windows have to be closed on the source partition 879 * before migration and reopen them on the destination partition 880 * after migration. So closing windows during suspend and 881 * reopen them during resume. 882 */ 883 int vas_migration_handler(int action) 884 { 885 struct vas_cop_feat_caps *caps; 886 int old_nr_creds, new_nr_creds = 0; 887 struct vas_caps *vcaps; 888 int i, rc = 0; 889 890 /* 891 * NX-GZIP is not enabled. Nothing to do for migration. 892 */ 893 if (!copypaste_feat) 894 return rc; 895 896 mutex_lock(&vas_pseries_mutex); 897 898 if (action == VAS_SUSPEND) 899 migration_in_progress = true; 900 else 901 migration_in_progress = false; 902 903 for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) { 904 vcaps = &vascaps[i]; 905 caps = &vcaps->caps; 906 old_nr_creds = atomic_read(&caps->nr_total_credits); 907 908 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 909 vcaps->feat, 910 (u64)virt_to_phys(&hv_cop_caps)); 911 if (!rc) { 912 new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); 913 /* 914 * Should not happen. But incase print messages, close 915 * all windows in the list during suspend and reopen 916 * windows based on new lpar_creds on the destination 917 * system. 918 */ 919 if (old_nr_creds != new_nr_creds) { 920 pr_err("Target credits mismatch with the hypervisor\n"); 921 pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n", 922 action, old_nr_creds, new_nr_creds); 923 pr_err("Used creds: %d, Active creds: %d\n", 924 atomic_read(&caps->nr_used_credits), 925 vcaps->nr_open_windows - vcaps->nr_close_wins); 926 } 927 } else { 928 pr_err("state(%d): Get VAS capabilities failed with %d\n", 929 action, rc); 930 /* 931 * We can not stop migration with the current lpm 932 * implementation. So continue closing all windows in 933 * the list (during suspend) and return without 934 * opening windows (during resume) if VAS capabilities 935 * HCALL failed. 936 */ 937 if (action == VAS_RESUME) 938 goto out; 939 } 940 941 switch (action) { 942 case VAS_SUSPEND: 943 rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows, 944 true); 945 break; 946 case VAS_RESUME: 947 atomic_set(&caps->nr_total_credits, new_nr_creds); 948 rc = reconfig_open_windows(vcaps, new_nr_creds, true); 949 break; 950 default: 951 /* should not happen */ 952 pr_err("Invalid migration action %d\n", action); 953 rc = -EINVAL; 954 goto out; 955 } 956 957 /* 958 * Ignore errors during suspend and return for resume. 959 */ 960 if (rc && (action == VAS_RESUME)) 961 goto out; 962 } 963 964 out: 965 mutex_unlock(&vas_pseries_mutex); 966 return rc; 967 } 968 969 static int __init pseries_vas_init(void) 970 { 971 struct hv_vas_all_caps *hv_caps; 972 int rc = 0; 973 974 /* 975 * Linux supports user space COPY/PASTE only with Radix 976 */ 977 if (!radix_enabled()) { 978 pr_err("API is supported only with radix page tables\n"); 979 return -ENOTSUPP; 980 } 981 982 hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); 983 if (!hv_caps) 984 return -ENOMEM; 985 /* 986 * Get VAS overall capabilities by passing 0 to feature type. 987 */ 988 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0, 989 (u64)virt_to_phys(hv_caps)); 990 if (rc) 991 goto out; 992 993 caps_all.descriptor = be64_to_cpu(hv_caps->descriptor); 994 caps_all.feat_type = be64_to_cpu(hv_caps->feat_type); 995 996 sysfs_pseries_vas_init(&caps_all); 997 998 /* 999 * QOS capabilities available 1000 */ 1001 if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) { 1002 rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT, 1003 VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps); 1004 1005 if (rc) 1006 goto out; 1007 } 1008 /* 1009 * Default capabilities available 1010 */ 1011 if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) 1012 rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT, 1013 VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps); 1014 1015 if (!rc && copypaste_feat) { 1016 if (firmware_has_feature(FW_FEATURE_LPAR)) 1017 of_reconfig_notifier_register(&pseries_vas_nb); 1018 1019 pr_info("GZIP feature is available\n"); 1020 } else { 1021 /* 1022 * Should not happen, but only when get default 1023 * capabilities HCALL failed. So disable copy paste 1024 * feature. 1025 */ 1026 copypaste_feat = false; 1027 } 1028 1029 out: 1030 kfree(hv_caps); 1031 return rc; 1032 } 1033 machine_device_initcall(pseries, pseries_vas_init); 1034