1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright 2020-21 IBM Corp. 4 */ 5 6 #define pr_fmt(fmt) "vas: " fmt 7 8 #include <linux/module.h> 9 #include <linux/kernel.h> 10 #include <linux/export.h> 11 #include <linux/types.h> 12 #include <linux/delay.h> 13 #include <linux/slab.h> 14 #include <linux/interrupt.h> 15 #include <linux/irqdomain.h> 16 #include <asm/machdep.h> 17 #include <asm/hvcall.h> 18 #include <asm/plpar_wrappers.h> 19 #include <asm/firmware.h> 20 #include <asm/vas.h> 21 #include "vas.h" 22 23 #define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul 24 #define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul 25 /* The hypervisor allows one credit per window right now */ 26 #define DEF_WIN_CREDS 1 27 28 static struct vas_all_caps caps_all; 29 static bool copypaste_feat; 30 static struct hv_vas_cop_feat_caps hv_cop_caps; 31 32 static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; 33 static DEFINE_MUTEX(vas_pseries_mutex); 34 static bool migration_in_progress; 35 36 static long hcall_return_busy_check(long rc) 37 { 38 /* Check if we are stalled for some time */ 39 if (H_IS_LONG_BUSY(rc)) { 40 msleep(get_longbusy_msecs(rc)); 41 rc = H_BUSY; 42 } else if (rc == H_BUSY) { 43 cond_resched(); 44 } 45 46 return rc; 47 } 48 49 /* 50 * Allocate VAS window hcall 51 */ 52 static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain, 53 u8 wintype, u16 credits) 54 { 55 long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; 56 long rc; 57 58 do { 59 rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype, 60 credits, domain[0], domain[1], domain[2], 61 domain[3], domain[4], domain[5]); 62 63 rc = hcall_return_busy_check(rc); 64 } while (rc == H_BUSY); 65 66 if (rc == H_SUCCESS) { 67 if (win->win_addr == VAS_INVALID_WIN_ADDRESS) { 68 pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n"); 69 return -ENOTSUPP; 70 } 71 win->vas_win.winid = retbuf[0]; 72 win->win_addr = retbuf[1]; 73 win->complete_irq = retbuf[2]; 74 win->fault_irq = retbuf[3]; 75 return 0; 76 } 77 78 pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n", 79 rc, wintype, credits); 80 81 return -EIO; 82 } 83 84 /* 85 * Deallocate VAS window hcall. 86 */ 87 static int h_deallocate_vas_window(u64 winid) 88 { 89 long rc; 90 91 do { 92 rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid); 93 94 rc = hcall_return_busy_check(rc); 95 } while (rc == H_BUSY); 96 97 if (rc == H_SUCCESS) 98 return 0; 99 100 pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n", 101 rc, winid); 102 return -EIO; 103 } 104 105 /* 106 * Modify VAS window. 107 * After the window is opened with allocate window hcall, configure it 108 * with flags and LPAR PID before using. 109 */ 110 static int h_modify_vas_window(struct pseries_vas_window *win) 111 { 112 long rc; 113 114 /* 115 * AMR value is not supported in Linux VAS implementation. 116 * The hypervisor ignores it if 0 is passed. 117 */ 118 do { 119 rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, 120 win->vas_win.winid, win->pid, 0, 121 VAS_MOD_WIN_FLAGS, 0); 122 123 rc = hcall_return_busy_check(rc); 124 } while (rc == H_BUSY); 125 126 if (rc == H_SUCCESS) 127 return 0; 128 129 pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n", 130 rc, win->vas_win.winid, win->pid); 131 return -EIO; 132 } 133 134 /* 135 * This hcall is used to determine the capabilities from the hypervisor. 136 * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES 137 * @query_type: If 0 is passed, the hypervisor returns the overall 138 * capabilities which provides all feature(s) that are 139 * available. Then query the hypervisor to get the 140 * corresponding capabilities for the specific feature. 141 * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS 142 * and VAS GZIP Default capabilities. 143 * H_QUERY_NX_CAPABILITIES provides NX GZIP 144 * capabilities. 145 * @result: Return buffer to save capabilities. 146 */ 147 int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) 148 { 149 long rc; 150 151 rc = plpar_hcall_norets(hcall, query_type, result); 152 153 if (rc == H_SUCCESS) 154 return 0; 155 156 /* H_FUNCTION means HV does not support VAS so don't print an error */ 157 if (rc != H_FUNCTION) { 158 pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n", 159 (hcall == H_QUERY_VAS_CAPABILITIES) ? 160 "H_QUERY_VAS_CAPABILITIES" : 161 "H_QUERY_NX_CAPABILITIES", 162 rc, query_type, result); 163 } 164 165 return -EIO; 166 } 167 EXPORT_SYMBOL_GPL(h_query_vas_capabilities); 168 169 /* 170 * hcall to get fault CRB from the hypervisor. 171 */ 172 static int h_get_nx_fault(u32 winid, u64 buffer) 173 { 174 long rc; 175 176 rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer); 177 178 if (rc == H_SUCCESS) 179 return 0; 180 181 pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n", 182 rc, winid, buffer); 183 return -EIO; 184 185 } 186 187 /* 188 * Handle the fault interrupt. 189 * When the fault interrupt is received for each window, query the 190 * hypervisor to get the fault CRB on the specific fault. Then 191 * process the CRB by updating CSB or send signal if the user space 192 * CSB is invalid. 193 * Note: The hypervisor forwards an interrupt for each fault request. 194 * So one fault CRB to process for each H_GET_NX_FAULT hcall. 195 */ 196 static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) 197 { 198 struct pseries_vas_window *txwin = data; 199 struct coprocessor_request_block crb; 200 struct vas_user_win_ref *tsk_ref; 201 int rc; 202 203 while (atomic_read(&txwin->pending_faults)) { 204 rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb)); 205 if (!rc) { 206 tsk_ref = &txwin->vas_win.task_ref; 207 vas_dump_crb(&crb); 208 vas_update_csb(&crb, tsk_ref); 209 } 210 atomic_dec(&txwin->pending_faults); 211 } 212 213 return IRQ_HANDLED; 214 } 215 216 /* 217 * irq_default_primary_handler() can be used only with IRQF_ONESHOT 218 * which disables IRQ before executing the thread handler and enables 219 * it after. But this disabling interrupt sets the VAS IRQ OFF 220 * state in the hypervisor. If the NX generates fault interrupt 221 * during this window, the hypervisor will not deliver this 222 * interrupt to the LPAR. So use VAS specific IRQ handler instead 223 * of calling the default primary handler. 224 */ 225 static irqreturn_t pseries_vas_irq_handler(int irq, void *data) 226 { 227 struct pseries_vas_window *txwin = data; 228 229 /* 230 * The thread hanlder will process this interrupt if it is 231 * already running. 232 */ 233 atomic_inc(&txwin->pending_faults); 234 235 return IRQ_WAKE_THREAD; 236 } 237 238 /* 239 * Allocate window and setup IRQ mapping. 240 */ 241 static int allocate_setup_window(struct pseries_vas_window *txwin, 242 u64 *domain, u8 wintype) 243 { 244 int rc; 245 246 rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS); 247 if (rc) 248 return rc; 249 /* 250 * On PowerVM, the hypervisor setup and forwards the fault 251 * interrupt per window. So the IRQ setup and fault handling 252 * will be done for each open window separately. 253 */ 254 txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq); 255 if (!txwin->fault_virq) { 256 pr_err("Failed irq mapping %d\n", txwin->fault_irq); 257 rc = -EINVAL; 258 goto out_win; 259 } 260 261 txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d", 262 txwin->vas_win.winid); 263 if (!txwin->name) { 264 rc = -ENOMEM; 265 goto out_irq; 266 } 267 268 rc = request_threaded_irq(txwin->fault_virq, 269 pseries_vas_irq_handler, 270 pseries_vas_fault_thread_fn, 0, 271 txwin->name, txwin); 272 if (rc) { 273 pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n", 274 txwin->vas_win.winid, txwin->fault_virq, rc); 275 goto out_free; 276 } 277 278 txwin->vas_win.wcreds_max = DEF_WIN_CREDS; 279 280 return 0; 281 out_free: 282 kfree(txwin->name); 283 out_irq: 284 irq_dispose_mapping(txwin->fault_virq); 285 out_win: 286 h_deallocate_vas_window(txwin->vas_win.winid); 287 return rc; 288 } 289 290 static inline void free_irq_setup(struct pseries_vas_window *txwin) 291 { 292 free_irq(txwin->fault_virq, txwin); 293 kfree(txwin->name); 294 irq_dispose_mapping(txwin->fault_virq); 295 } 296 297 static struct vas_window *vas_allocate_window(int vas_id, u64 flags, 298 enum vas_cop_type cop_type) 299 { 300 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; 301 struct vas_cop_feat_caps *cop_feat_caps; 302 struct vas_caps *caps; 303 struct pseries_vas_window *txwin; 304 int rc; 305 306 txwin = kzalloc(sizeof(*txwin), GFP_KERNEL); 307 if (!txwin) 308 return ERR_PTR(-ENOMEM); 309 310 /* 311 * A VAS window can have many credits which means that many 312 * requests can be issued simultaneously. But the hypervisor 313 * restricts one credit per window. 314 * The hypervisor introduces 2 different types of credits: 315 * Default credit type (Uses normal priority FIFO): 316 * A limited number of credits are assigned to partitions 317 * based on processor entitlement. But these credits may be 318 * over-committed on a system depends on whether the CPUs 319 * are in shared or dedicated modes - that is, more requests 320 * may be issued across the system than NX can service at 321 * once which can result in paste command failure (RMA_busy). 322 * Then the process has to resend requests or fall-back to 323 * SW compression. 324 * Quality of Service (QoS) credit type (Uses high priority FIFO): 325 * To avoid NX HW contention, the system admins can assign 326 * QoS credits for each LPAR so that this partition is 327 * guaranteed access to NX resources. These credits are 328 * assigned to partitions via the HMC. 329 * Refer PAPR for more information. 330 * 331 * Allocate window with QoS credits if user requested. Otherwise 332 * default credits are used. 333 */ 334 if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT) 335 caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE]; 336 else 337 caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE]; 338 339 cop_feat_caps = &caps->caps; 340 341 if (atomic_inc_return(&cop_feat_caps->nr_used_credits) > 342 atomic_read(&cop_feat_caps->nr_total_credits)) { 343 pr_err("Credits are not available to allocate window\n"); 344 rc = -EINVAL; 345 goto out; 346 } 347 348 if (vas_id == -1) { 349 /* 350 * The user space is requesting to allocate a window on 351 * a VAS instance where the process is executing. 352 * On PowerVM, domain values are passed to the hypervisor 353 * to select VAS instance. Useful if the process is 354 * affinity to NUMA node. 355 * The hypervisor selects VAS instance if 356 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values. 357 * The h_allocate_vas_window hcall is defined to take a 358 * domain values as specified by h_home_node_associativity, 359 * So no unpacking needs to be done. 360 */ 361 rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain, 362 VPHN_FLAG_VCPU, hard_smp_processor_id()); 363 if (rc != H_SUCCESS) { 364 pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc); 365 goto out; 366 } 367 } 368 369 txwin->pid = mfspr(SPRN_PID); 370 371 /* 372 * Allocate / Deallocate window hcalls and setup / free IRQs 373 * have to be protected with mutex. 374 * Open VAS window: Allocate window hcall and setup IRQ 375 * Close VAS window: Deallocate window hcall and free IRQ 376 * The hypervisor waits until all NX requests are 377 * completed before closing the window. So expects OS 378 * to handle NX faults, means IRQ can be freed only 379 * after the deallocate window hcall is returned. 380 * So once the window is closed with deallocate hcall before 381 * the IRQ is freed, it can be assigned to new allocate 382 * hcall with the same fault IRQ by the hypervisor. It can 383 * result in setup IRQ fail for the new window since the 384 * same fault IRQ is not freed by the OS before. 385 */ 386 mutex_lock(&vas_pseries_mutex); 387 if (migration_in_progress) 388 rc = -EBUSY; 389 else 390 rc = allocate_setup_window(txwin, (u64 *)&domain[0], 391 cop_feat_caps->win_type); 392 mutex_unlock(&vas_pseries_mutex); 393 if (rc) 394 goto out; 395 396 /* 397 * Modify window and it is ready to use. 398 */ 399 rc = h_modify_vas_window(txwin); 400 if (!rc) 401 rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); 402 if (rc) 403 goto out_free; 404 405 txwin->win_type = cop_feat_caps->win_type; 406 mutex_lock(&vas_pseries_mutex); 407 /* 408 * Possible to lose the acquired credit with DLPAR core 409 * removal after the window is opened. So if there are any 410 * closed windows (means with lost credits), do not give new 411 * window to user space. New windows will be opened only 412 * after the existing windows are reopened when credits are 413 * available. 414 */ 415 if (!caps->nr_close_wins) { 416 list_add(&txwin->win_list, &caps->list); 417 caps->nr_open_windows++; 418 mutex_unlock(&vas_pseries_mutex); 419 vas_user_win_add_mm_context(&txwin->vas_win.task_ref); 420 return &txwin->vas_win; 421 } 422 mutex_unlock(&vas_pseries_mutex); 423 424 put_vas_user_win_ref(&txwin->vas_win.task_ref); 425 rc = -EBUSY; 426 pr_err("No credit is available to allocate window\n"); 427 428 out_free: 429 /* 430 * Window is not operational. Free IRQ before closing 431 * window so that do not have to hold mutex. 432 */ 433 free_irq_setup(txwin); 434 h_deallocate_vas_window(txwin->vas_win.winid); 435 out: 436 atomic_dec(&cop_feat_caps->nr_used_credits); 437 kfree(txwin); 438 return ERR_PTR(rc); 439 } 440 441 static u64 vas_paste_address(struct vas_window *vwin) 442 { 443 struct pseries_vas_window *win; 444 445 win = container_of(vwin, struct pseries_vas_window, vas_win); 446 return win->win_addr; 447 } 448 449 static int deallocate_free_window(struct pseries_vas_window *win) 450 { 451 int rc = 0; 452 453 /* 454 * The hypervisor waits for all requests including faults 455 * are processed before closing the window - Means all 456 * credits have to be returned. In the case of fault 457 * request, a credit is returned after OS issues 458 * H_GET_NX_FAULT hcall. 459 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW 460 * hcall. 461 */ 462 rc = h_deallocate_vas_window(win->vas_win.winid); 463 if (!rc) 464 free_irq_setup(win); 465 466 return rc; 467 } 468 469 static int vas_deallocate_window(struct vas_window *vwin) 470 { 471 struct pseries_vas_window *win; 472 struct vas_cop_feat_caps *caps; 473 int rc = 0; 474 475 if (!vwin) 476 return -EINVAL; 477 478 win = container_of(vwin, struct pseries_vas_window, vas_win); 479 480 /* Should not happen */ 481 if (win->win_type >= VAS_MAX_FEAT_TYPE) { 482 pr_err("Window (%u): Invalid window type %u\n", 483 vwin->winid, win->win_type); 484 return -EINVAL; 485 } 486 487 caps = &vascaps[win->win_type].caps; 488 mutex_lock(&vas_pseries_mutex); 489 /* 490 * VAS window is already closed in the hypervisor when 491 * lost the credit or with migration. So just remove the entry 492 * from the list, remove task references and free vas_window 493 * struct. 494 */ 495 if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && 496 !(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { 497 rc = deallocate_free_window(win); 498 if (rc) { 499 mutex_unlock(&vas_pseries_mutex); 500 return rc; 501 } 502 } else 503 vascaps[win->win_type].nr_close_wins--; 504 505 list_del(&win->win_list); 506 atomic_dec(&caps->nr_used_credits); 507 vascaps[win->win_type].nr_open_windows--; 508 mutex_unlock(&vas_pseries_mutex); 509 510 mm_context_remove_vas_window(vwin->task_ref.mm); 511 put_vas_user_win_ref(&vwin->task_ref); 512 513 kfree(win); 514 return 0; 515 } 516 517 static const struct vas_user_win_ops vops_pseries = { 518 .open_win = vas_allocate_window, /* Open and configure window */ 519 .paste_addr = vas_paste_address, /* To do copy/paste */ 520 .close_win = vas_deallocate_window, /* Close window */ 521 }; 522 523 /* 524 * Supporting only nx-gzip coprocessor type now, but this API code 525 * extended to other coprocessor types later. 526 */ 527 int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type, 528 const char *name) 529 { 530 if (!copypaste_feat) 531 return -ENOTSUPP; 532 533 return vas_register_coproc_api(mod, cop_type, name, &vops_pseries); 534 } 535 EXPORT_SYMBOL_GPL(vas_register_api_pseries); 536 537 void vas_unregister_api_pseries(void) 538 { 539 vas_unregister_coproc_api(); 540 } 541 EXPORT_SYMBOL_GPL(vas_unregister_api_pseries); 542 543 /* 544 * Get the specific capabilities based on the feature type. 545 * Right now supports GZIP default and GZIP QoS capabilities. 546 */ 547 static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, 548 struct hv_vas_cop_feat_caps *hv_caps) 549 { 550 struct vas_cop_feat_caps *caps; 551 struct vas_caps *vcaps; 552 int rc = 0; 553 554 vcaps = &vascaps[type]; 555 memset(vcaps, 0, sizeof(*vcaps)); 556 INIT_LIST_HEAD(&vcaps->list); 557 558 vcaps->feat = feat; 559 caps = &vcaps->caps; 560 561 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, 562 (u64)virt_to_phys(hv_caps)); 563 if (rc) 564 return rc; 565 566 caps->user_mode = hv_caps->user_mode; 567 if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) { 568 pr_err("User space COPY/PASTE is not supported\n"); 569 return -ENOTSUPP; 570 } 571 572 caps->descriptor = be64_to_cpu(hv_caps->descriptor); 573 caps->win_type = hv_caps->win_type; 574 if (caps->win_type >= VAS_MAX_FEAT_TYPE) { 575 pr_err("Unsupported window type %u\n", caps->win_type); 576 return -EINVAL; 577 } 578 caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); 579 caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); 580 atomic_set(&caps->nr_total_credits, 581 be16_to_cpu(hv_caps->target_lpar_creds)); 582 if (feat == VAS_GZIP_DEF_FEAT) { 583 caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); 584 585 if (caps->max_win_creds < DEF_WIN_CREDS) { 586 pr_err("Window creds(%u) > max allowed window creds(%u)\n", 587 DEF_WIN_CREDS, caps->max_win_creds); 588 return -EINVAL; 589 } 590 } 591 592 rc = sysfs_add_vas_caps(caps); 593 if (rc) 594 return rc; 595 596 copypaste_feat = true; 597 598 return 0; 599 } 600 601 /* 602 * VAS windows can be closed due to lost credits when the core is 603 * removed. So reopen them if credits are available due to DLPAR 604 * core add and set the window active status. When NX sees the page 605 * fault on the unmapped paste address, the kernel handles the fault 606 * by setting the remapping to new paste address if the window is 607 * active. 608 */ 609 static int reconfig_open_windows(struct vas_caps *vcaps, int creds, 610 bool migrate) 611 { 612 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; 613 struct vas_cop_feat_caps *caps = &vcaps->caps; 614 struct pseries_vas_window *win = NULL, *tmp; 615 int rc, mv_ents = 0; 616 int flag; 617 618 /* 619 * Nothing to do if there are no closed windows. 620 */ 621 if (!vcaps->nr_close_wins) 622 return 0; 623 624 /* 625 * For the core removal, the hypervisor reduces the credits 626 * assigned to the LPAR and the kernel closes VAS windows 627 * in the hypervisor depends on reduced credits. The kernel 628 * uses LIFO (the last windows that are opened will be closed 629 * first) and expects to open in the same order when credits 630 * are available. 631 * For example, 40 windows are closed when the LPAR lost 2 cores 632 * (dedicated). If 1 core is added, this LPAR can have 20 more 633 * credits. It means the kernel can reopen 20 windows. So move 634 * 20 entries in the VAS windows lost and reopen next 20 windows. 635 * For partition migration, reopen all windows that are closed 636 * during resume. 637 */ 638 if ((vcaps->nr_close_wins > creds) && !migrate) 639 mv_ents = vcaps->nr_close_wins - creds; 640 641 list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) { 642 if (!mv_ents) 643 break; 644 645 mv_ents--; 646 } 647 648 /* 649 * Open windows if they are closed only with migration or 650 * DLPAR (lost credit) before. 651 */ 652 if (migrate) 653 flag = VAS_WIN_MIGRATE_CLOSE; 654 else 655 flag = VAS_WIN_NO_CRED_CLOSE; 656 657 list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) { 658 /* 659 * This window is closed with DLPAR and migration events. 660 * So reopen the window with the last event. 661 * The user space is not suspended with the current 662 * migration notifier. So the user space can issue DLPAR 663 * CPU hotplug while migration in progress. In this case 664 * this window will be opened with the last event. 665 */ 666 if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && 667 (win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { 668 win->vas_win.status &= ~flag; 669 continue; 670 } 671 672 /* 673 * Nothing to do on this window if it is not closed 674 * with this flag 675 */ 676 if (!(win->vas_win.status & flag)) 677 continue; 678 679 rc = allocate_setup_window(win, (u64 *)&domain[0], 680 caps->win_type); 681 if (rc) 682 return rc; 683 684 rc = h_modify_vas_window(win); 685 if (rc) 686 goto out; 687 688 mutex_lock(&win->vas_win.task_ref.mmap_mutex); 689 /* 690 * Set window status to active 691 */ 692 win->vas_win.status &= ~flag; 693 mutex_unlock(&win->vas_win.task_ref.mmap_mutex); 694 win->win_type = caps->win_type; 695 if (!--vcaps->nr_close_wins) 696 break; 697 } 698 699 return 0; 700 out: 701 /* 702 * Window modify HCALL failed. So close the window to the 703 * hypervisor and return. 704 */ 705 free_irq_setup(win); 706 h_deallocate_vas_window(win->vas_win.winid); 707 return rc; 708 } 709 710 /* 711 * The hypervisor reduces the available credits if the LPAR lost core. It 712 * means the excessive windows should not be active and the user space 713 * should not be using these windows to send compression requests to NX. 714 * So the kernel closes the excessive windows and unmap the paste address 715 * such that the user space receives paste instruction failure. Then up to 716 * the user space to fall back to SW compression and manage with the 717 * existing windows. 718 */ 719 static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds, 720 bool migrate) 721 { 722 struct pseries_vas_window *win, *tmp; 723 struct vas_user_win_ref *task_ref; 724 struct vm_area_struct *vma; 725 int rc = 0, flag; 726 727 if (migrate) 728 flag = VAS_WIN_MIGRATE_CLOSE; 729 else 730 flag = VAS_WIN_NO_CRED_CLOSE; 731 732 list_for_each_entry_safe(win, tmp, &vcap->list, win_list) { 733 /* 734 * This window is already closed due to lost credit 735 * or for migration before. Go for next window. 736 * For migration, nothing to do since this window 737 * closed for DLPAR and will be reopened even on 738 * the destination system with other DLPAR operation. 739 */ 740 if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) || 741 (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) { 742 win->vas_win.status |= flag; 743 continue; 744 } 745 746 task_ref = &win->vas_win.task_ref; 747 /* 748 * VAS mmap (coproc_mmap()) and its fault handler 749 * (vas_mmap_fault()) are called after holding mmap lock. 750 * So hold mmap mutex after mmap_lock to avoid deadlock. 751 */ 752 mmap_write_lock(task_ref->mm); 753 mutex_lock(&task_ref->mmap_mutex); 754 vma = task_ref->vma; 755 /* 756 * Number of available credits are reduced, So select 757 * and close windows. 758 */ 759 win->vas_win.status |= flag; 760 761 /* 762 * vma is set in the original mapping. But this mapping 763 * is done with mmap() after the window is opened with ioctl. 764 * so we may not see the original mapping if the core remove 765 * is done before the original mmap() and after the ioctl. 766 */ 767 if (vma) 768 zap_vma_pages(vma); 769 770 mutex_unlock(&task_ref->mmap_mutex); 771 mmap_write_unlock(task_ref->mm); 772 /* 773 * Close VAS window in the hypervisor, but do not 774 * free vas_window struct since it may be reused 775 * when the credit is available later (DLPAR with 776 * adding cores). This struct will be used 777 * later when the process issued with close(FD). 778 */ 779 rc = deallocate_free_window(win); 780 /* 781 * This failure is from the hypervisor. 782 * No way to stop migration for these failures. 783 * So ignore error and continue closing other windows. 784 */ 785 if (rc && !migrate) 786 return rc; 787 788 vcap->nr_close_wins++; 789 790 /* 791 * For migration, do not depend on lpar_creds in case if 792 * mismatch with the hypervisor value (should not happen). 793 * So close all active windows in the list and will be 794 * reopened windows based on the new lpar_creds on the 795 * destination system during resume. 796 */ 797 if (!migrate && !--excess_creds) 798 break; 799 } 800 801 return 0; 802 } 803 804 /* 805 * Get new VAS capabilities when the core add/removal configuration 806 * changes. Reconfig window configurations based on the credits 807 * availability from this new capabilities. 808 */ 809 int vas_reconfig_capabilties(u8 type, int new_nr_creds) 810 { 811 struct vas_cop_feat_caps *caps; 812 int old_nr_creds; 813 struct vas_caps *vcaps; 814 int rc = 0, nr_active_wins; 815 816 if (type >= VAS_MAX_FEAT_TYPE) { 817 pr_err("Invalid credit type %d\n", type); 818 return -EINVAL; 819 } 820 821 vcaps = &vascaps[type]; 822 caps = &vcaps->caps; 823 824 mutex_lock(&vas_pseries_mutex); 825 826 old_nr_creds = atomic_read(&caps->nr_total_credits); 827 828 atomic_set(&caps->nr_total_credits, new_nr_creds); 829 /* 830 * The total number of available credits may be decreased or 831 * increased with DLPAR operation. Means some windows have to be 832 * closed / reopened. Hold the vas_pseries_mutex so that the 833 * user space can not open new windows. 834 */ 835 if (old_nr_creds < new_nr_creds) { 836 /* 837 * If the existing target credits is less than the new 838 * target, reopen windows if they are closed due to 839 * the previous DLPAR (core removal). 840 */ 841 rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds, 842 false); 843 } else { 844 /* 845 * # active windows is more than new LPAR available 846 * credits. So close the excessive windows. 847 * On pseries, each window will have 1 credit. 848 */ 849 nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins; 850 if (nr_active_wins > new_nr_creds) 851 rc = reconfig_close_windows(vcaps, 852 nr_active_wins - new_nr_creds, 853 false); 854 } 855 856 mutex_unlock(&vas_pseries_mutex); 857 return rc; 858 } 859 860 int pseries_vas_dlpar_cpu(void) 861 { 862 int new_nr_creds, rc; 863 864 /* 865 * NX-GZIP is not enabled. Nothing to do for DLPAR event 866 */ 867 if (!copypaste_feat) 868 return 0; 869 870 871 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 872 vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat, 873 (u64)virt_to_phys(&hv_cop_caps)); 874 if (!rc) { 875 new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); 876 rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, new_nr_creds); 877 } 878 879 if (rc) 880 pr_err("Failed reconfig VAS capabilities with DLPAR\n"); 881 882 return rc; 883 } 884 885 /* 886 * Total number of default credits available (target_credits) 887 * in LPAR depends on number of cores configured. It varies based on 888 * whether processors are in shared mode or dedicated mode. 889 * Get the notifier when CPU configuration is changed with DLPAR 890 * operation so that get the new target_credits (vas default capabilities) 891 * and then update the existing windows usage if needed. 892 */ 893 static int pseries_vas_notifier(struct notifier_block *nb, 894 unsigned long action, void *data) 895 { 896 struct of_reconfig_data *rd = data; 897 struct device_node *dn = rd->dn; 898 const __be32 *intserv = NULL; 899 int len; 900 901 /* 902 * For shared CPU partition, the hypervisor assigns total credits 903 * based on entitled core capacity. So updating VAS windows will 904 * be called from lparcfg_write(). 905 */ 906 if (is_shared_processor()) 907 return NOTIFY_OK; 908 909 if ((action == OF_RECONFIG_ATTACH_NODE) || 910 (action == OF_RECONFIG_DETACH_NODE)) 911 intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", 912 &len); 913 /* 914 * Processor config is not changed 915 */ 916 if (!intserv) 917 return NOTIFY_OK; 918 919 return pseries_vas_dlpar_cpu(); 920 } 921 922 static struct notifier_block pseries_vas_nb = { 923 .notifier_call = pseries_vas_notifier, 924 }; 925 926 /* 927 * For LPM, all windows have to be closed on the source partition 928 * before migration and reopen them on the destination partition 929 * after migration. So closing windows during suspend and 930 * reopen them during resume. 931 */ 932 int vas_migration_handler(int action) 933 { 934 struct vas_cop_feat_caps *caps; 935 int old_nr_creds, new_nr_creds = 0; 936 struct vas_caps *vcaps; 937 int i, rc = 0; 938 939 /* 940 * NX-GZIP is not enabled. Nothing to do for migration. 941 */ 942 if (!copypaste_feat) 943 return rc; 944 945 mutex_lock(&vas_pseries_mutex); 946 947 if (action == VAS_SUSPEND) 948 migration_in_progress = true; 949 else 950 migration_in_progress = false; 951 952 for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) { 953 vcaps = &vascaps[i]; 954 caps = &vcaps->caps; 955 old_nr_creds = atomic_read(&caps->nr_total_credits); 956 957 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 958 vcaps->feat, 959 (u64)virt_to_phys(&hv_cop_caps)); 960 if (!rc) { 961 new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); 962 /* 963 * Should not happen. But incase print messages, close 964 * all windows in the list during suspend and reopen 965 * windows based on new lpar_creds on the destination 966 * system. 967 */ 968 if (old_nr_creds != new_nr_creds) { 969 pr_err("Target credits mismatch with the hypervisor\n"); 970 pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n", 971 action, old_nr_creds, new_nr_creds); 972 pr_err("Used creds: %d, Active creds: %d\n", 973 atomic_read(&caps->nr_used_credits), 974 vcaps->nr_open_windows - vcaps->nr_close_wins); 975 } 976 } else { 977 pr_err("state(%d): Get VAS capabilities failed with %d\n", 978 action, rc); 979 /* 980 * We can not stop migration with the current lpm 981 * implementation. So continue closing all windows in 982 * the list (during suspend) and return without 983 * opening windows (during resume) if VAS capabilities 984 * HCALL failed. 985 */ 986 if (action == VAS_RESUME) 987 goto out; 988 } 989 990 switch (action) { 991 case VAS_SUSPEND: 992 rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows, 993 true); 994 break; 995 case VAS_RESUME: 996 atomic_set(&caps->nr_total_credits, new_nr_creds); 997 rc = reconfig_open_windows(vcaps, new_nr_creds, true); 998 break; 999 default: 1000 /* should not happen */ 1001 pr_err("Invalid migration action %d\n", action); 1002 rc = -EINVAL; 1003 goto out; 1004 } 1005 1006 /* 1007 * Ignore errors during suspend and return for resume. 1008 */ 1009 if (rc && (action == VAS_RESUME)) 1010 goto out; 1011 } 1012 1013 out: 1014 mutex_unlock(&vas_pseries_mutex); 1015 return rc; 1016 } 1017 1018 static int __init pseries_vas_init(void) 1019 { 1020 struct hv_vas_all_caps *hv_caps; 1021 int rc = 0; 1022 1023 /* 1024 * Linux supports user space COPY/PASTE only with Radix 1025 */ 1026 if (!radix_enabled()) { 1027 copypaste_feat = false; 1028 pr_err("API is supported only with radix page tables\n"); 1029 return -ENOTSUPP; 1030 } 1031 1032 hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); 1033 if (!hv_caps) 1034 return -ENOMEM; 1035 /* 1036 * Get VAS overall capabilities by passing 0 to feature type. 1037 */ 1038 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0, 1039 (u64)virt_to_phys(hv_caps)); 1040 if (rc) 1041 goto out; 1042 1043 caps_all.descriptor = be64_to_cpu(hv_caps->descriptor); 1044 caps_all.feat_type = be64_to_cpu(hv_caps->feat_type); 1045 1046 sysfs_pseries_vas_init(&caps_all); 1047 1048 /* 1049 * QOS capabilities available 1050 */ 1051 if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) { 1052 rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT, 1053 VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps); 1054 1055 if (rc) 1056 goto out; 1057 } 1058 /* 1059 * Default capabilities available 1060 */ 1061 if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) 1062 rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT, 1063 VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps); 1064 1065 if (!rc && copypaste_feat) { 1066 if (firmware_has_feature(FW_FEATURE_LPAR)) 1067 of_reconfig_notifier_register(&pseries_vas_nb); 1068 1069 pr_info("GZIP feature is available\n"); 1070 } else { 1071 /* 1072 * Should not happen, but only when get default 1073 * capabilities HCALL failed. So disable copy paste 1074 * feature. 1075 */ 1076 copypaste_feat = false; 1077 } 1078 1079 out: 1080 kfree(hv_caps); 1081 return rc; 1082 } 1083 machine_device_initcall(pseries, pseries_vas_init); 1084