1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright 2020-21 IBM Corp. 4 */ 5 6 #define pr_fmt(fmt) "vas: " fmt 7 8 #include <linux/module.h> 9 #include <linux/kernel.h> 10 #include <linux/export.h> 11 #include <linux/types.h> 12 #include <linux/delay.h> 13 #include <linux/slab.h> 14 #include <linux/interrupt.h> 15 #include <linux/irqdomain.h> 16 #include <asm/machdep.h> 17 #include <asm/hvcall.h> 18 #include <asm/plpar_wrappers.h> 19 #include <asm/firmware.h> 20 #include <asm/vphn.h> 21 #include <asm/vas.h> 22 #include "vas.h" 23 24 #define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul 25 #define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul 26 /* The hypervisor allows one credit per window right now */ 27 #define DEF_WIN_CREDS 1 28 29 static struct vas_all_caps caps_all; 30 static bool copypaste_feat; 31 static struct hv_vas_cop_feat_caps hv_cop_caps; 32 33 static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; 34 static DEFINE_MUTEX(vas_pseries_mutex); 35 static bool migration_in_progress; 36 37 static long hcall_return_busy_check(long rc) 38 { 39 /* Check if we are stalled for some time */ 40 if (H_IS_LONG_BUSY(rc)) { 41 msleep(get_longbusy_msecs(rc)); 42 rc = H_BUSY; 43 } else if (rc == H_BUSY) { 44 cond_resched(); 45 } 46 47 return rc; 48 } 49 50 /* 51 * Allocate VAS window hcall 52 */ 53 static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain, 54 u8 wintype, u16 credits) 55 { 56 long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; 57 long rc; 58 59 do { 60 rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype, 61 credits, domain[0], domain[1], domain[2], 62 domain[3], domain[4], domain[5]); 63 64 rc = hcall_return_busy_check(rc); 65 } while (rc == H_BUSY); 66 67 if (rc == H_SUCCESS) { 68 if (win->win_addr == VAS_INVALID_WIN_ADDRESS) { 69 pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n"); 70 return -ENOTSUPP; 71 } 72 win->vas_win.winid = retbuf[0]; 73 win->win_addr = retbuf[1]; 74 win->complete_irq = retbuf[2]; 75 win->fault_irq = retbuf[3]; 76 return 0; 77 } 78 79 pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n", 80 rc, wintype, credits); 81 82 return -EIO; 83 } 84 85 /* 86 * Deallocate VAS window hcall. 87 */ 88 static int h_deallocate_vas_window(u64 winid) 89 { 90 long rc; 91 92 do { 93 rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid); 94 95 rc = hcall_return_busy_check(rc); 96 } while (rc == H_BUSY); 97 98 if (rc == H_SUCCESS) 99 return 0; 100 101 pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n", 102 rc, winid); 103 return -EIO; 104 } 105 106 /* 107 * Modify VAS window. 108 * After the window is opened with allocate window hcall, configure it 109 * with flags and LPAR PID before using. 110 */ 111 static int h_modify_vas_window(struct pseries_vas_window *win) 112 { 113 long rc; 114 115 /* 116 * AMR value is not supported in Linux VAS implementation. 117 * The hypervisor ignores it if 0 is passed. 118 */ 119 do { 120 rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, 121 win->vas_win.winid, win->pid, 0, 122 VAS_MOD_WIN_FLAGS, 0); 123 124 rc = hcall_return_busy_check(rc); 125 } while (rc == H_BUSY); 126 127 if (rc == H_SUCCESS) 128 return 0; 129 130 pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n", 131 rc, win->vas_win.winid, win->pid); 132 return -EIO; 133 } 134 135 /* 136 * This hcall is used to determine the capabilities from the hypervisor. 137 * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES 138 * @query_type: If 0 is passed, the hypervisor returns the overall 139 * capabilities which provides all feature(s) that are 140 * available. Then query the hypervisor to get the 141 * corresponding capabilities for the specific feature. 142 * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS 143 * and VAS GZIP Default capabilities. 144 * H_QUERY_NX_CAPABILITIES provides NX GZIP 145 * capabilities. 146 * @result: Return buffer to save capabilities. 147 */ 148 int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) 149 { 150 long rc; 151 152 rc = plpar_hcall_norets(hcall, query_type, result); 153 154 if (rc == H_SUCCESS) 155 return 0; 156 157 /* H_FUNCTION means HV does not support VAS so don't print an error */ 158 if (rc != H_FUNCTION) { 159 pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n", 160 (hcall == H_QUERY_VAS_CAPABILITIES) ? 161 "H_QUERY_VAS_CAPABILITIES" : 162 "H_QUERY_NX_CAPABILITIES", 163 rc, query_type, result); 164 } 165 166 return -EIO; 167 } 168 EXPORT_SYMBOL_GPL(h_query_vas_capabilities); 169 170 /* 171 * hcall to get fault CRB from the hypervisor. 172 */ 173 static int h_get_nx_fault(u32 winid, u64 buffer) 174 { 175 long rc; 176 177 rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer); 178 179 if (rc == H_SUCCESS) 180 return 0; 181 182 pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n", 183 rc, winid, buffer); 184 return -EIO; 185 186 } 187 188 /* 189 * Handle the fault interrupt. 190 * When the fault interrupt is received for each window, query the 191 * hypervisor to get the fault CRB on the specific fault. Then 192 * process the CRB by updating CSB or send signal if the user space 193 * CSB is invalid. 194 * Note: The hypervisor forwards an interrupt for each fault request. 195 * So one fault CRB to process for each H_GET_NX_FAULT hcall. 196 */ 197 static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) 198 { 199 struct pseries_vas_window *txwin = data; 200 struct coprocessor_request_block crb; 201 struct vas_user_win_ref *tsk_ref; 202 int rc; 203 204 while (atomic_read(&txwin->pending_faults)) { 205 rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb)); 206 if (!rc) { 207 tsk_ref = &txwin->vas_win.task_ref; 208 vas_dump_crb(&crb); 209 vas_update_csb(&crb, tsk_ref); 210 } 211 atomic_dec(&txwin->pending_faults); 212 } 213 214 return IRQ_HANDLED; 215 } 216 217 /* 218 * irq_default_primary_handler() can be used only with IRQF_ONESHOT 219 * which disables IRQ before executing the thread handler and enables 220 * it after. But this disabling interrupt sets the VAS IRQ OFF 221 * state in the hypervisor. If the NX generates fault interrupt 222 * during this window, the hypervisor will not deliver this 223 * interrupt to the LPAR. So use VAS specific IRQ handler instead 224 * of calling the default primary handler. 225 */ 226 static irqreturn_t pseries_vas_irq_handler(int irq, void *data) 227 { 228 struct pseries_vas_window *txwin = data; 229 230 /* 231 * The thread hanlder will process this interrupt if it is 232 * already running. 233 */ 234 atomic_inc(&txwin->pending_faults); 235 236 return IRQ_WAKE_THREAD; 237 } 238 239 /* 240 * Allocate window and setup IRQ mapping. 241 */ 242 static int allocate_setup_window(struct pseries_vas_window *txwin, 243 u64 *domain, u8 wintype) 244 { 245 int rc; 246 247 rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS); 248 if (rc) 249 return rc; 250 /* 251 * On PowerVM, the hypervisor setup and forwards the fault 252 * interrupt per window. So the IRQ setup and fault handling 253 * will be done for each open window separately. 254 */ 255 txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq); 256 if (!txwin->fault_virq) { 257 pr_err("Failed irq mapping %d\n", txwin->fault_irq); 258 rc = -EINVAL; 259 goto out_win; 260 } 261 262 txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d", 263 txwin->vas_win.winid); 264 if (!txwin->name) { 265 rc = -ENOMEM; 266 goto out_irq; 267 } 268 269 rc = request_threaded_irq(txwin->fault_virq, 270 pseries_vas_irq_handler, 271 pseries_vas_fault_thread_fn, 0, 272 txwin->name, txwin); 273 if (rc) { 274 pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n", 275 txwin->vas_win.winid, txwin->fault_virq, rc); 276 goto out_free; 277 } 278 279 txwin->vas_win.wcreds_max = DEF_WIN_CREDS; 280 281 return 0; 282 out_free: 283 kfree(txwin->name); 284 out_irq: 285 irq_dispose_mapping(txwin->fault_virq); 286 out_win: 287 h_deallocate_vas_window(txwin->vas_win.winid); 288 return rc; 289 } 290 291 static inline void free_irq_setup(struct pseries_vas_window *txwin) 292 { 293 free_irq(txwin->fault_virq, txwin); 294 kfree(txwin->name); 295 irq_dispose_mapping(txwin->fault_virq); 296 } 297 298 static struct vas_window *vas_allocate_window(int vas_id, u64 flags, 299 enum vas_cop_type cop_type) 300 { 301 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; 302 struct vas_cop_feat_caps *cop_feat_caps; 303 struct vas_caps *caps; 304 struct pseries_vas_window *txwin; 305 int rc; 306 307 txwin = kzalloc(sizeof(*txwin), GFP_KERNEL); 308 if (!txwin) 309 return ERR_PTR(-ENOMEM); 310 311 /* 312 * A VAS window can have many credits which means that many 313 * requests can be issued simultaneously. But the hypervisor 314 * restricts one credit per window. 315 * The hypervisor introduces 2 different types of credits: 316 * Default credit type (Uses normal priority FIFO): 317 * A limited number of credits are assigned to partitions 318 * based on processor entitlement. But these credits may be 319 * over-committed on a system depends on whether the CPUs 320 * are in shared or dedicated modes - that is, more requests 321 * may be issued across the system than NX can service at 322 * once which can result in paste command failure (RMA_busy). 323 * Then the process has to resend requests or fall-back to 324 * SW compression. 325 * Quality of Service (QoS) credit type (Uses high priority FIFO): 326 * To avoid NX HW contention, the system admins can assign 327 * QoS credits for each LPAR so that this partition is 328 * guaranteed access to NX resources. These credits are 329 * assigned to partitions via the HMC. 330 * Refer PAPR for more information. 331 * 332 * Allocate window with QoS credits if user requested. Otherwise 333 * default credits are used. 334 */ 335 if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT) 336 caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE]; 337 else 338 caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE]; 339 340 cop_feat_caps = &caps->caps; 341 342 if (atomic_inc_return(&cop_feat_caps->nr_used_credits) > 343 atomic_read(&cop_feat_caps->nr_total_credits)) { 344 pr_err_ratelimited("Credits are not available to allocate window\n"); 345 rc = -EINVAL; 346 goto out; 347 } 348 349 if (vas_id == -1) { 350 /* 351 * The user space is requesting to allocate a window on 352 * a VAS instance where the process is executing. 353 * On PowerVM, domain values are passed to the hypervisor 354 * to select VAS instance. Useful if the process is 355 * affinity to NUMA node. 356 * The hypervisor selects VAS instance if 357 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values. 358 * The h_allocate_vas_window hcall is defined to take a 359 * domain values as specified by h_home_node_associativity, 360 * So no unpacking needs to be done. 361 */ 362 rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain, 363 VPHN_FLAG_VCPU, hard_smp_processor_id()); 364 if (rc != H_SUCCESS) { 365 pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc); 366 goto out; 367 } 368 } 369 370 txwin->pid = mfspr(SPRN_PID); 371 372 /* 373 * Allocate / Deallocate window hcalls and setup / free IRQs 374 * have to be protected with mutex. 375 * Open VAS window: Allocate window hcall and setup IRQ 376 * Close VAS window: Deallocate window hcall and free IRQ 377 * The hypervisor waits until all NX requests are 378 * completed before closing the window. So expects OS 379 * to handle NX faults, means IRQ can be freed only 380 * after the deallocate window hcall is returned. 381 * So once the window is closed with deallocate hcall before 382 * the IRQ is freed, it can be assigned to new allocate 383 * hcall with the same fault IRQ by the hypervisor. It can 384 * result in setup IRQ fail for the new window since the 385 * same fault IRQ is not freed by the OS before. 386 */ 387 mutex_lock(&vas_pseries_mutex); 388 if (migration_in_progress) 389 rc = -EBUSY; 390 else 391 rc = allocate_setup_window(txwin, (u64 *)&domain[0], 392 cop_feat_caps->win_type); 393 mutex_unlock(&vas_pseries_mutex); 394 if (rc) 395 goto out; 396 397 /* 398 * Modify window and it is ready to use. 399 */ 400 rc = h_modify_vas_window(txwin); 401 if (!rc) 402 rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); 403 if (rc) 404 goto out_free; 405 406 txwin->win_type = cop_feat_caps->win_type; 407 mutex_lock(&vas_pseries_mutex); 408 /* 409 * Possible to lose the acquired credit with DLPAR core 410 * removal after the window is opened. So if there are any 411 * closed windows (means with lost credits), do not give new 412 * window to user space. New windows will be opened only 413 * after the existing windows are reopened when credits are 414 * available. 415 */ 416 if (!caps->nr_close_wins) { 417 list_add(&txwin->win_list, &caps->list); 418 caps->nr_open_windows++; 419 mutex_unlock(&vas_pseries_mutex); 420 vas_user_win_add_mm_context(&txwin->vas_win.task_ref); 421 return &txwin->vas_win; 422 } 423 mutex_unlock(&vas_pseries_mutex); 424 425 put_vas_user_win_ref(&txwin->vas_win.task_ref); 426 rc = -EBUSY; 427 pr_err_ratelimited("No credit is available to allocate window\n"); 428 429 out_free: 430 /* 431 * Window is not operational. Free IRQ before closing 432 * window so that do not have to hold mutex. 433 */ 434 free_irq_setup(txwin); 435 h_deallocate_vas_window(txwin->vas_win.winid); 436 out: 437 atomic_dec(&cop_feat_caps->nr_used_credits); 438 kfree(txwin); 439 return ERR_PTR(rc); 440 } 441 442 static u64 vas_paste_address(struct vas_window *vwin) 443 { 444 struct pseries_vas_window *win; 445 446 win = container_of(vwin, struct pseries_vas_window, vas_win); 447 return win->win_addr; 448 } 449 450 static int deallocate_free_window(struct pseries_vas_window *win) 451 { 452 int rc = 0; 453 454 /* 455 * The hypervisor waits for all requests including faults 456 * are processed before closing the window - Means all 457 * credits have to be returned. In the case of fault 458 * request, a credit is returned after OS issues 459 * H_GET_NX_FAULT hcall. 460 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW 461 * hcall. 462 */ 463 rc = h_deallocate_vas_window(win->vas_win.winid); 464 if (!rc) 465 free_irq_setup(win); 466 467 return rc; 468 } 469 470 static int vas_deallocate_window(struct vas_window *vwin) 471 { 472 struct pseries_vas_window *win; 473 struct vas_cop_feat_caps *caps; 474 int rc = 0; 475 476 if (!vwin) 477 return -EINVAL; 478 479 win = container_of(vwin, struct pseries_vas_window, vas_win); 480 481 /* Should not happen */ 482 if (win->win_type >= VAS_MAX_FEAT_TYPE) { 483 pr_err("Window (%u): Invalid window type %u\n", 484 vwin->winid, win->win_type); 485 return -EINVAL; 486 } 487 488 caps = &vascaps[win->win_type].caps; 489 mutex_lock(&vas_pseries_mutex); 490 /* 491 * VAS window is already closed in the hypervisor when 492 * lost the credit or with migration. So just remove the entry 493 * from the list, remove task references and free vas_window 494 * struct. 495 */ 496 if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && 497 !(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { 498 rc = deallocate_free_window(win); 499 if (rc) { 500 mutex_unlock(&vas_pseries_mutex); 501 return rc; 502 } 503 } else 504 vascaps[win->win_type].nr_close_wins--; 505 506 list_del(&win->win_list); 507 atomic_dec(&caps->nr_used_credits); 508 vascaps[win->win_type].nr_open_windows--; 509 mutex_unlock(&vas_pseries_mutex); 510 511 mm_context_remove_vas_window(vwin->task_ref.mm); 512 put_vas_user_win_ref(&vwin->task_ref); 513 514 kfree(win); 515 return 0; 516 } 517 518 static const struct vas_user_win_ops vops_pseries = { 519 .open_win = vas_allocate_window, /* Open and configure window */ 520 .paste_addr = vas_paste_address, /* To do copy/paste */ 521 .close_win = vas_deallocate_window, /* Close window */ 522 }; 523 524 /* 525 * Supporting only nx-gzip coprocessor type now, but this API code 526 * extended to other coprocessor types later. 527 */ 528 int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type, 529 const char *name) 530 { 531 if (!copypaste_feat) 532 return -ENOTSUPP; 533 534 return vas_register_coproc_api(mod, cop_type, name, &vops_pseries); 535 } 536 EXPORT_SYMBOL_GPL(vas_register_api_pseries); 537 538 void vas_unregister_api_pseries(void) 539 { 540 vas_unregister_coproc_api(); 541 } 542 EXPORT_SYMBOL_GPL(vas_unregister_api_pseries); 543 544 /* 545 * Get the specific capabilities based on the feature type. 546 * Right now supports GZIP default and GZIP QoS capabilities. 547 */ 548 static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, 549 struct hv_vas_cop_feat_caps *hv_caps) 550 { 551 struct vas_cop_feat_caps *caps; 552 struct vas_caps *vcaps; 553 int rc = 0; 554 555 vcaps = &vascaps[type]; 556 memset(vcaps, 0, sizeof(*vcaps)); 557 INIT_LIST_HEAD(&vcaps->list); 558 559 vcaps->feat = feat; 560 caps = &vcaps->caps; 561 562 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, 563 (u64)virt_to_phys(hv_caps)); 564 if (rc) 565 return rc; 566 567 caps->user_mode = hv_caps->user_mode; 568 if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) { 569 pr_err("User space COPY/PASTE is not supported\n"); 570 return -ENOTSUPP; 571 } 572 573 caps->descriptor = be64_to_cpu(hv_caps->descriptor); 574 caps->win_type = hv_caps->win_type; 575 if (caps->win_type >= VAS_MAX_FEAT_TYPE) { 576 pr_err("Unsupported window type %u\n", caps->win_type); 577 return -EINVAL; 578 } 579 caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); 580 caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); 581 atomic_set(&caps->nr_total_credits, 582 be16_to_cpu(hv_caps->target_lpar_creds)); 583 if (feat == VAS_GZIP_DEF_FEAT) { 584 caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); 585 586 if (caps->max_win_creds < DEF_WIN_CREDS) { 587 pr_err("Window creds(%u) > max allowed window creds(%u)\n", 588 DEF_WIN_CREDS, caps->max_win_creds); 589 return -EINVAL; 590 } 591 } 592 593 rc = sysfs_add_vas_caps(caps); 594 if (rc) 595 return rc; 596 597 copypaste_feat = true; 598 599 return 0; 600 } 601 602 /* 603 * VAS windows can be closed due to lost credits when the core is 604 * removed. So reopen them if credits are available due to DLPAR 605 * core add and set the window active status. When NX sees the page 606 * fault on the unmapped paste address, the kernel handles the fault 607 * by setting the remapping to new paste address if the window is 608 * active. 609 */ 610 static int reconfig_open_windows(struct vas_caps *vcaps, int creds, 611 bool migrate) 612 { 613 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; 614 struct vas_cop_feat_caps *caps = &vcaps->caps; 615 struct pseries_vas_window *win = NULL, *tmp; 616 int rc, mv_ents = 0; 617 int flag; 618 619 /* 620 * Nothing to do if there are no closed windows. 621 */ 622 if (!vcaps->nr_close_wins) 623 return 0; 624 625 /* 626 * For the core removal, the hypervisor reduces the credits 627 * assigned to the LPAR and the kernel closes VAS windows 628 * in the hypervisor depends on reduced credits. The kernel 629 * uses LIFO (the last windows that are opened will be closed 630 * first) and expects to open in the same order when credits 631 * are available. 632 * For example, 40 windows are closed when the LPAR lost 2 cores 633 * (dedicated). If 1 core is added, this LPAR can have 20 more 634 * credits. It means the kernel can reopen 20 windows. So move 635 * 20 entries in the VAS windows lost and reopen next 20 windows. 636 * For partition migration, reopen all windows that are closed 637 * during resume. 638 */ 639 if ((vcaps->nr_close_wins > creds) && !migrate) 640 mv_ents = vcaps->nr_close_wins - creds; 641 642 list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) { 643 if (!mv_ents) 644 break; 645 646 mv_ents--; 647 } 648 649 /* 650 * Open windows if they are closed only with migration or 651 * DLPAR (lost credit) before. 652 */ 653 if (migrate) 654 flag = VAS_WIN_MIGRATE_CLOSE; 655 else 656 flag = VAS_WIN_NO_CRED_CLOSE; 657 658 list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) { 659 /* 660 * This window is closed with DLPAR and migration events. 661 * So reopen the window with the last event. 662 * The user space is not suspended with the current 663 * migration notifier. So the user space can issue DLPAR 664 * CPU hotplug while migration in progress. In this case 665 * this window will be opened with the last event. 666 */ 667 if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && 668 (win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { 669 win->vas_win.status &= ~flag; 670 continue; 671 } 672 673 /* 674 * Nothing to do on this window if it is not closed 675 * with this flag 676 */ 677 if (!(win->vas_win.status & flag)) 678 continue; 679 680 rc = allocate_setup_window(win, (u64 *)&domain[0], 681 caps->win_type); 682 if (rc) 683 return rc; 684 685 rc = h_modify_vas_window(win); 686 if (rc) 687 goto out; 688 689 mutex_lock(&win->vas_win.task_ref.mmap_mutex); 690 /* 691 * Set window status to active 692 */ 693 win->vas_win.status &= ~flag; 694 mutex_unlock(&win->vas_win.task_ref.mmap_mutex); 695 win->win_type = caps->win_type; 696 if (!--vcaps->nr_close_wins) 697 break; 698 } 699 700 return 0; 701 out: 702 /* 703 * Window modify HCALL failed. So close the window to the 704 * hypervisor and return. 705 */ 706 free_irq_setup(win); 707 h_deallocate_vas_window(win->vas_win.winid); 708 return rc; 709 } 710 711 /* 712 * The hypervisor reduces the available credits if the LPAR lost core. It 713 * means the excessive windows should not be active and the user space 714 * should not be using these windows to send compression requests to NX. 715 * So the kernel closes the excessive windows and unmap the paste address 716 * such that the user space receives paste instruction failure. Then up to 717 * the user space to fall back to SW compression and manage with the 718 * existing windows. 719 */ 720 static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds, 721 bool migrate) 722 { 723 struct pseries_vas_window *win, *tmp; 724 struct vas_user_win_ref *task_ref; 725 struct vm_area_struct *vma; 726 int rc = 0, flag; 727 728 if (migrate) 729 flag = VAS_WIN_MIGRATE_CLOSE; 730 else 731 flag = VAS_WIN_NO_CRED_CLOSE; 732 733 list_for_each_entry_safe(win, tmp, &vcap->list, win_list) { 734 /* 735 * This window is already closed due to lost credit 736 * or for migration before. Go for next window. 737 * For migration, nothing to do since this window 738 * closed for DLPAR and will be reopened even on 739 * the destination system with other DLPAR operation. 740 */ 741 if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) || 742 (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) { 743 win->vas_win.status |= flag; 744 continue; 745 } 746 747 task_ref = &win->vas_win.task_ref; 748 /* 749 * VAS mmap (coproc_mmap()) and its fault handler 750 * (vas_mmap_fault()) are called after holding mmap lock. 751 * So hold mmap mutex after mmap_lock to avoid deadlock. 752 */ 753 mmap_write_lock(task_ref->mm); 754 mutex_lock(&task_ref->mmap_mutex); 755 vma = task_ref->vma; 756 /* 757 * Number of available credits are reduced, So select 758 * and close windows. 759 */ 760 win->vas_win.status |= flag; 761 762 /* 763 * vma is set in the original mapping. But this mapping 764 * is done with mmap() after the window is opened with ioctl. 765 * so we may not see the original mapping if the core remove 766 * is done before the original mmap() and after the ioctl. 767 */ 768 if (vma) 769 zap_vma_pages(vma); 770 771 mutex_unlock(&task_ref->mmap_mutex); 772 mmap_write_unlock(task_ref->mm); 773 /* 774 * Close VAS window in the hypervisor, but do not 775 * free vas_window struct since it may be reused 776 * when the credit is available later (DLPAR with 777 * adding cores). This struct will be used 778 * later when the process issued with close(FD). 779 */ 780 rc = deallocate_free_window(win); 781 /* 782 * This failure is from the hypervisor. 783 * No way to stop migration for these failures. 784 * So ignore error and continue closing other windows. 785 */ 786 if (rc && !migrate) 787 return rc; 788 789 vcap->nr_close_wins++; 790 791 /* 792 * For migration, do not depend on lpar_creds in case if 793 * mismatch with the hypervisor value (should not happen). 794 * So close all active windows in the list and will be 795 * reopened windows based on the new lpar_creds on the 796 * destination system during resume. 797 */ 798 if (!migrate && !--excess_creds) 799 break; 800 } 801 802 return 0; 803 } 804 805 /* 806 * Get new VAS capabilities when the core add/removal configuration 807 * changes. Reconfig window configurations based on the credits 808 * availability from this new capabilities. 809 */ 810 int vas_reconfig_capabilties(u8 type, int new_nr_creds) 811 { 812 struct vas_cop_feat_caps *caps; 813 int old_nr_creds; 814 struct vas_caps *vcaps; 815 int rc = 0, nr_active_wins; 816 817 if (type >= VAS_MAX_FEAT_TYPE) { 818 pr_err("Invalid credit type %d\n", type); 819 return -EINVAL; 820 } 821 822 vcaps = &vascaps[type]; 823 caps = &vcaps->caps; 824 825 mutex_lock(&vas_pseries_mutex); 826 827 old_nr_creds = atomic_read(&caps->nr_total_credits); 828 829 atomic_set(&caps->nr_total_credits, new_nr_creds); 830 /* 831 * The total number of available credits may be decreased or 832 * increased with DLPAR operation. Means some windows have to be 833 * closed / reopened. Hold the vas_pseries_mutex so that the 834 * user space can not open new windows. 835 */ 836 if (old_nr_creds < new_nr_creds) { 837 /* 838 * If the existing target credits is less than the new 839 * target, reopen windows if they are closed due to 840 * the previous DLPAR (core removal). 841 */ 842 rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds, 843 false); 844 } else { 845 /* 846 * # active windows is more than new LPAR available 847 * credits. So close the excessive windows. 848 * On pseries, each window will have 1 credit. 849 */ 850 nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins; 851 if (nr_active_wins > new_nr_creds) 852 rc = reconfig_close_windows(vcaps, 853 nr_active_wins - new_nr_creds, 854 false); 855 } 856 857 mutex_unlock(&vas_pseries_mutex); 858 return rc; 859 } 860 861 int pseries_vas_dlpar_cpu(void) 862 { 863 int new_nr_creds, rc; 864 865 /* 866 * NX-GZIP is not enabled. Nothing to do for DLPAR event 867 */ 868 if (!copypaste_feat) 869 return 0; 870 871 872 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 873 vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat, 874 (u64)virt_to_phys(&hv_cop_caps)); 875 if (!rc) { 876 new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); 877 rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, new_nr_creds); 878 } 879 880 if (rc) 881 pr_err("Failed reconfig VAS capabilities with DLPAR\n"); 882 883 return rc; 884 } 885 886 /* 887 * Total number of default credits available (target_credits) 888 * in LPAR depends on number of cores configured. It varies based on 889 * whether processors are in shared mode or dedicated mode. 890 * Get the notifier when CPU configuration is changed with DLPAR 891 * operation so that get the new target_credits (vas default capabilities) 892 * and then update the existing windows usage if needed. 893 */ 894 static int pseries_vas_notifier(struct notifier_block *nb, 895 unsigned long action, void *data) 896 { 897 struct of_reconfig_data *rd = data; 898 struct device_node *dn = rd->dn; 899 const __be32 *intserv = NULL; 900 int len; 901 902 /* 903 * For shared CPU partition, the hypervisor assigns total credits 904 * based on entitled core capacity. So updating VAS windows will 905 * be called from lparcfg_write(). 906 */ 907 if (is_shared_processor()) 908 return NOTIFY_OK; 909 910 if ((action == OF_RECONFIG_ATTACH_NODE) || 911 (action == OF_RECONFIG_DETACH_NODE)) 912 intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", 913 &len); 914 /* 915 * Processor config is not changed 916 */ 917 if (!intserv) 918 return NOTIFY_OK; 919 920 return pseries_vas_dlpar_cpu(); 921 } 922 923 static struct notifier_block pseries_vas_nb = { 924 .notifier_call = pseries_vas_notifier, 925 }; 926 927 /* 928 * For LPM, all windows have to be closed on the source partition 929 * before migration and reopen them on the destination partition 930 * after migration. So closing windows during suspend and 931 * reopen them during resume. 932 */ 933 int vas_migration_handler(int action) 934 { 935 struct vas_cop_feat_caps *caps; 936 int old_nr_creds, new_nr_creds = 0; 937 struct vas_caps *vcaps; 938 int i, rc = 0; 939 940 /* 941 * NX-GZIP is not enabled. Nothing to do for migration. 942 */ 943 if (!copypaste_feat) 944 return rc; 945 946 mutex_lock(&vas_pseries_mutex); 947 948 if (action == VAS_SUSPEND) 949 migration_in_progress = true; 950 else 951 migration_in_progress = false; 952 953 for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) { 954 vcaps = &vascaps[i]; 955 caps = &vcaps->caps; 956 old_nr_creds = atomic_read(&caps->nr_total_credits); 957 958 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 959 vcaps->feat, 960 (u64)virt_to_phys(&hv_cop_caps)); 961 if (!rc) { 962 new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); 963 /* 964 * Should not happen. But incase print messages, close 965 * all windows in the list during suspend and reopen 966 * windows based on new lpar_creds on the destination 967 * system. 968 */ 969 if (old_nr_creds != new_nr_creds) { 970 pr_err("Target credits mismatch with the hypervisor\n"); 971 pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n", 972 action, old_nr_creds, new_nr_creds); 973 pr_err("Used creds: %d, Active creds: %d\n", 974 atomic_read(&caps->nr_used_credits), 975 vcaps->nr_open_windows - vcaps->nr_close_wins); 976 } 977 } else { 978 pr_err("state(%d): Get VAS capabilities failed with %d\n", 979 action, rc); 980 /* 981 * We can not stop migration with the current lpm 982 * implementation. So continue closing all windows in 983 * the list (during suspend) and return without 984 * opening windows (during resume) if VAS capabilities 985 * HCALL failed. 986 */ 987 if (action == VAS_RESUME) 988 goto out; 989 } 990 991 switch (action) { 992 case VAS_SUSPEND: 993 rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows, 994 true); 995 break; 996 case VAS_RESUME: 997 atomic_set(&caps->nr_total_credits, new_nr_creds); 998 rc = reconfig_open_windows(vcaps, new_nr_creds, true); 999 break; 1000 default: 1001 /* should not happen */ 1002 pr_err("Invalid migration action %d\n", action); 1003 rc = -EINVAL; 1004 goto out; 1005 } 1006 1007 /* 1008 * Ignore errors during suspend and return for resume. 1009 */ 1010 if (rc && (action == VAS_RESUME)) 1011 goto out; 1012 } 1013 1014 out: 1015 mutex_unlock(&vas_pseries_mutex); 1016 return rc; 1017 } 1018 1019 static int __init pseries_vas_init(void) 1020 { 1021 struct hv_vas_all_caps *hv_caps; 1022 int rc = 0; 1023 1024 /* 1025 * Linux supports user space COPY/PASTE only with Radix 1026 */ 1027 if (!radix_enabled()) { 1028 copypaste_feat = false; 1029 pr_err("API is supported only with radix page tables\n"); 1030 return -ENOTSUPP; 1031 } 1032 1033 hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); 1034 if (!hv_caps) 1035 return -ENOMEM; 1036 /* 1037 * Get VAS overall capabilities by passing 0 to feature type. 1038 */ 1039 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0, 1040 (u64)virt_to_phys(hv_caps)); 1041 if (rc) 1042 goto out; 1043 1044 caps_all.descriptor = be64_to_cpu(hv_caps->descriptor); 1045 caps_all.feat_type = be64_to_cpu(hv_caps->feat_type); 1046 1047 sysfs_pseries_vas_init(&caps_all); 1048 1049 /* 1050 * QOS capabilities available 1051 */ 1052 if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) { 1053 rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT, 1054 VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps); 1055 1056 if (rc) 1057 goto out; 1058 } 1059 /* 1060 * Default capabilities available 1061 */ 1062 if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) 1063 rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT, 1064 VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps); 1065 1066 if (!rc && copypaste_feat) { 1067 if (firmware_has_feature(FW_FEATURE_LPAR)) 1068 of_reconfig_notifier_register(&pseries_vas_nb); 1069 1070 pr_info("GZIP feature is available\n"); 1071 } else { 1072 /* 1073 * Should not happen, but only when get default 1074 * capabilities HCALL failed. So disable copy paste 1075 * feature. 1076 */ 1077 copypaste_feat = false; 1078 } 1079 1080 out: 1081 kfree(hv_caps); 1082 return rc; 1083 } 1084 machine_device_initcall(pseries, pseries_vas_init); 1085