1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright 2020-21 IBM Corp. 4 */ 5 6 #define pr_fmt(fmt) "vas: " fmt 7 8 #include <linux/module.h> 9 #include <linux/kernel.h> 10 #include <linux/export.h> 11 #include <linux/types.h> 12 #include <linux/delay.h> 13 #include <linux/slab.h> 14 #include <linux/interrupt.h> 15 #include <linux/irqdomain.h> 16 #include <asm/machdep.h> 17 #include <asm/hvcall.h> 18 #include <asm/plpar_wrappers.h> 19 #include <asm/vas.h> 20 #include "vas.h" 21 22 #define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul 23 #define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul 24 /* The hypervisor allows one credit per window right now */ 25 #define DEF_WIN_CREDS 1 26 27 static struct vas_all_caps caps_all; 28 static bool copypaste_feat; 29 30 static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; 31 static DEFINE_MUTEX(vas_pseries_mutex); 32 33 static long hcall_return_busy_check(long rc) 34 { 35 /* Check if we are stalled for some time */ 36 if (H_IS_LONG_BUSY(rc)) { 37 msleep(get_longbusy_msecs(rc)); 38 rc = H_BUSY; 39 } else if (rc == H_BUSY) { 40 cond_resched(); 41 } 42 43 return rc; 44 } 45 46 /* 47 * Allocate VAS window hcall 48 */ 49 static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain, 50 u8 wintype, u16 credits) 51 { 52 long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; 53 long rc; 54 55 do { 56 rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype, 57 credits, domain[0], domain[1], domain[2], 58 domain[3], domain[4], domain[5]); 59 60 rc = hcall_return_busy_check(rc); 61 } while (rc == H_BUSY); 62 63 if (rc == H_SUCCESS) { 64 if (win->win_addr == VAS_INVALID_WIN_ADDRESS) { 65 pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n"); 66 return -ENOTSUPP; 67 } 68 win->vas_win.winid = retbuf[0]; 69 win->win_addr = retbuf[1]; 70 win->complete_irq = retbuf[2]; 71 win->fault_irq = retbuf[3]; 72 return 0; 73 } 74 75 pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n", 76 rc, wintype, credits); 77 78 return -EIO; 79 } 80 81 /* 82 * Deallocate VAS window hcall. 83 */ 84 static int h_deallocate_vas_window(u64 winid) 85 { 86 long rc; 87 88 do { 89 rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid); 90 91 rc = hcall_return_busy_check(rc); 92 } while (rc == H_BUSY); 93 94 if (rc == H_SUCCESS) 95 return 0; 96 97 pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n", 98 rc, winid); 99 return -EIO; 100 } 101 102 /* 103 * Modify VAS window. 104 * After the window is opened with allocate window hcall, configure it 105 * with flags and LPAR PID before using. 106 */ 107 static int h_modify_vas_window(struct pseries_vas_window *win) 108 { 109 long rc; 110 111 /* 112 * AMR value is not supported in Linux VAS implementation. 113 * The hypervisor ignores it if 0 is passed. 114 */ 115 do { 116 rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, 117 win->vas_win.winid, win->pid, 0, 118 VAS_MOD_WIN_FLAGS, 0); 119 120 rc = hcall_return_busy_check(rc); 121 } while (rc == H_BUSY); 122 123 if (rc == H_SUCCESS) 124 return 0; 125 126 pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n", 127 rc, win->vas_win.winid, win->pid); 128 return -EIO; 129 } 130 131 /* 132 * This hcall is used to determine the capabilities from the hypervisor. 133 * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES 134 * @query_type: If 0 is passed, the hypervisor returns the overall 135 * capabilities which provides all feature(s) that are 136 * available. Then query the hypervisor to get the 137 * corresponding capabilities for the specific feature. 138 * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS 139 * and VAS GZIP Default capabilities. 140 * H_QUERY_NX_CAPABILITIES provides NX GZIP 141 * capabilities. 142 * @result: Return buffer to save capabilities. 143 */ 144 int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) 145 { 146 long rc; 147 148 rc = plpar_hcall_norets(hcall, query_type, result); 149 150 if (rc == H_SUCCESS) 151 return 0; 152 153 /* H_FUNCTION means HV does not support VAS so don't print an error */ 154 if (rc != H_FUNCTION) { 155 pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n", 156 (hcall == H_QUERY_VAS_CAPABILITIES) ? 157 "H_QUERY_VAS_CAPABILITIES" : 158 "H_QUERY_NX_CAPABILITIES", 159 rc, query_type, result); 160 } 161 162 return -EIO; 163 } 164 EXPORT_SYMBOL_GPL(h_query_vas_capabilities); 165 166 /* 167 * hcall to get fault CRB from the hypervisor. 168 */ 169 static int h_get_nx_fault(u32 winid, u64 buffer) 170 { 171 long rc; 172 173 rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer); 174 175 if (rc == H_SUCCESS) 176 return 0; 177 178 pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n", 179 rc, winid, buffer); 180 return -EIO; 181 182 } 183 184 /* 185 * Handle the fault interrupt. 186 * When the fault interrupt is received for each window, query the 187 * hypervisor to get the fault CRB on the specific fault. Then 188 * process the CRB by updating CSB or send signal if the user space 189 * CSB is invalid. 190 * Note: The hypervisor forwards an interrupt for each fault request. 191 * So one fault CRB to process for each H_GET_NX_FAULT hcall. 192 */ 193 static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) 194 { 195 struct pseries_vas_window *txwin = data; 196 struct coprocessor_request_block crb; 197 struct vas_user_win_ref *tsk_ref; 198 int rc; 199 200 rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb)); 201 if (!rc) { 202 tsk_ref = &txwin->vas_win.task_ref; 203 vas_dump_crb(&crb); 204 vas_update_csb(&crb, tsk_ref); 205 } 206 207 return IRQ_HANDLED; 208 } 209 210 /* 211 * Allocate window and setup IRQ mapping. 212 */ 213 static int allocate_setup_window(struct pseries_vas_window *txwin, 214 u64 *domain, u8 wintype) 215 { 216 int rc; 217 218 rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS); 219 if (rc) 220 return rc; 221 /* 222 * On PowerVM, the hypervisor setup and forwards the fault 223 * interrupt per window. So the IRQ setup and fault handling 224 * will be done for each open window separately. 225 */ 226 txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq); 227 if (!txwin->fault_virq) { 228 pr_err("Failed irq mapping %d\n", txwin->fault_irq); 229 rc = -EINVAL; 230 goto out_win; 231 } 232 233 txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d", 234 txwin->vas_win.winid); 235 if (!txwin->name) { 236 rc = -ENOMEM; 237 goto out_irq; 238 } 239 240 rc = request_threaded_irq(txwin->fault_virq, NULL, 241 pseries_vas_fault_thread_fn, IRQF_ONESHOT, 242 txwin->name, txwin); 243 if (rc) { 244 pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n", 245 txwin->vas_win.winid, txwin->fault_virq, rc); 246 goto out_free; 247 } 248 249 txwin->vas_win.wcreds_max = DEF_WIN_CREDS; 250 251 return 0; 252 out_free: 253 kfree(txwin->name); 254 out_irq: 255 irq_dispose_mapping(txwin->fault_virq); 256 out_win: 257 h_deallocate_vas_window(txwin->vas_win.winid); 258 return rc; 259 } 260 261 static inline void free_irq_setup(struct pseries_vas_window *txwin) 262 { 263 free_irq(txwin->fault_virq, txwin); 264 kfree(txwin->name); 265 irq_dispose_mapping(txwin->fault_virq); 266 } 267 268 static struct vas_window *vas_allocate_window(int vas_id, u64 flags, 269 enum vas_cop_type cop_type) 270 { 271 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; 272 struct vas_cop_feat_caps *cop_feat_caps; 273 struct vas_caps *caps; 274 struct pseries_vas_window *txwin; 275 int rc; 276 277 txwin = kzalloc(sizeof(*txwin), GFP_KERNEL); 278 if (!txwin) 279 return ERR_PTR(-ENOMEM); 280 281 /* 282 * A VAS window can have many credits which means that many 283 * requests can be issued simultaneously. But the hypervisor 284 * restricts one credit per window. 285 * The hypervisor introduces 2 different types of credits: 286 * Default credit type (Uses normal priority FIFO): 287 * A limited number of credits are assigned to partitions 288 * based on processor entitlement. But these credits may be 289 * over-committed on a system depends on whether the CPUs 290 * are in shared or dedicated modes - that is, more requests 291 * may be issued across the system than NX can service at 292 * once which can result in paste command failure (RMA_busy). 293 * Then the process has to resend requests or fall-back to 294 * SW compression. 295 * Quality of Service (QoS) credit type (Uses high priority FIFO): 296 * To avoid NX HW contention, the system admins can assign 297 * QoS credits for each LPAR so that this partition is 298 * guaranteed access to NX resources. These credits are 299 * assigned to partitions via the HMC. 300 * Refer PAPR for more information. 301 * 302 * Allocate window with QoS credits if user requested. Otherwise 303 * default credits are used. 304 */ 305 if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT) 306 caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE]; 307 else 308 caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE]; 309 310 cop_feat_caps = &caps->caps; 311 312 if (atomic_inc_return(&cop_feat_caps->nr_used_credits) > 313 atomic_read(&cop_feat_caps->nr_total_credits)) { 314 pr_err("Credits are not available to allocate window\n"); 315 rc = -EINVAL; 316 goto out; 317 } 318 319 if (vas_id == -1) { 320 /* 321 * The user space is requesting to allocate a window on 322 * a VAS instance where the process is executing. 323 * On PowerVM, domain values are passed to the hypervisor 324 * to select VAS instance. Useful if the process is 325 * affinity to NUMA node. 326 * The hypervisor selects VAS instance if 327 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values. 328 * The h_allocate_vas_window hcall is defined to take a 329 * domain values as specified by h_home_node_associativity, 330 * So no unpacking needs to be done. 331 */ 332 rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain, 333 VPHN_FLAG_VCPU, smp_processor_id()); 334 if (rc != H_SUCCESS) { 335 pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc); 336 goto out; 337 } 338 } 339 340 txwin->pid = mfspr(SPRN_PID); 341 342 /* 343 * Allocate / Deallocate window hcalls and setup / free IRQs 344 * have to be protected with mutex. 345 * Open VAS window: Allocate window hcall and setup IRQ 346 * Close VAS window: Deallocate window hcall and free IRQ 347 * The hypervisor waits until all NX requests are 348 * completed before closing the window. So expects OS 349 * to handle NX faults, means IRQ can be freed only 350 * after the deallocate window hcall is returned. 351 * So once the window is closed with deallocate hcall before 352 * the IRQ is freed, it can be assigned to new allocate 353 * hcall with the same fault IRQ by the hypervisor. It can 354 * result in setup IRQ fail for the new window since the 355 * same fault IRQ is not freed by the OS before. 356 */ 357 mutex_lock(&vas_pseries_mutex); 358 rc = allocate_setup_window(txwin, (u64 *)&domain[0], 359 cop_feat_caps->win_type); 360 mutex_unlock(&vas_pseries_mutex); 361 if (rc) 362 goto out; 363 364 /* 365 * Modify window and it is ready to use. 366 */ 367 rc = h_modify_vas_window(txwin); 368 if (!rc) 369 rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); 370 if (rc) 371 goto out_free; 372 373 vas_user_win_add_mm_context(&txwin->vas_win.task_ref); 374 txwin->win_type = cop_feat_caps->win_type; 375 mutex_lock(&vas_pseries_mutex); 376 list_add(&txwin->win_list, &caps->list); 377 mutex_unlock(&vas_pseries_mutex); 378 379 return &txwin->vas_win; 380 381 out_free: 382 /* 383 * Window is not operational. Free IRQ before closing 384 * window so that do not have to hold mutex. 385 */ 386 free_irq_setup(txwin); 387 h_deallocate_vas_window(txwin->vas_win.winid); 388 out: 389 atomic_dec(&cop_feat_caps->nr_used_credits); 390 kfree(txwin); 391 return ERR_PTR(rc); 392 } 393 394 static u64 vas_paste_address(struct vas_window *vwin) 395 { 396 struct pseries_vas_window *win; 397 398 win = container_of(vwin, struct pseries_vas_window, vas_win); 399 return win->win_addr; 400 } 401 402 static int deallocate_free_window(struct pseries_vas_window *win) 403 { 404 int rc = 0; 405 406 /* 407 * The hypervisor waits for all requests including faults 408 * are processed before closing the window - Means all 409 * credits have to be returned. In the case of fault 410 * request, a credit is returned after OS issues 411 * H_GET_NX_FAULT hcall. 412 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW 413 * hcall. 414 */ 415 rc = h_deallocate_vas_window(win->vas_win.winid); 416 if (!rc) 417 free_irq_setup(win); 418 419 return rc; 420 } 421 422 static int vas_deallocate_window(struct vas_window *vwin) 423 { 424 struct pseries_vas_window *win; 425 struct vas_cop_feat_caps *caps; 426 int rc = 0; 427 428 if (!vwin) 429 return -EINVAL; 430 431 win = container_of(vwin, struct pseries_vas_window, vas_win); 432 433 /* Should not happen */ 434 if (win->win_type >= VAS_MAX_FEAT_TYPE) { 435 pr_err("Window (%u): Invalid window type %u\n", 436 vwin->winid, win->win_type); 437 return -EINVAL; 438 } 439 440 caps = &vascaps[win->win_type].caps; 441 mutex_lock(&vas_pseries_mutex); 442 rc = deallocate_free_window(win); 443 if (rc) { 444 mutex_unlock(&vas_pseries_mutex); 445 return rc; 446 } 447 448 list_del(&win->win_list); 449 atomic_dec(&caps->nr_used_credits); 450 mutex_unlock(&vas_pseries_mutex); 451 452 put_vas_user_win_ref(&vwin->task_ref); 453 mm_context_remove_vas_window(vwin->task_ref.mm); 454 455 kfree(win); 456 return 0; 457 } 458 459 static const struct vas_user_win_ops vops_pseries = { 460 .open_win = vas_allocate_window, /* Open and configure window */ 461 .paste_addr = vas_paste_address, /* To do copy/paste */ 462 .close_win = vas_deallocate_window, /* Close window */ 463 }; 464 465 /* 466 * Supporting only nx-gzip coprocessor type now, but this API code 467 * extended to other coprocessor types later. 468 */ 469 int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type, 470 const char *name) 471 { 472 int rc; 473 474 if (!copypaste_feat) 475 return -ENOTSUPP; 476 477 rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries); 478 479 return rc; 480 } 481 EXPORT_SYMBOL_GPL(vas_register_api_pseries); 482 483 void vas_unregister_api_pseries(void) 484 { 485 vas_unregister_coproc_api(); 486 } 487 EXPORT_SYMBOL_GPL(vas_unregister_api_pseries); 488 489 /* 490 * Get the specific capabilities based on the feature type. 491 * Right now supports GZIP default and GZIP QoS capabilities. 492 */ 493 static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, 494 struct hv_vas_cop_feat_caps *hv_caps) 495 { 496 struct vas_cop_feat_caps *caps; 497 struct vas_caps *vcaps; 498 int rc = 0; 499 500 vcaps = &vascaps[type]; 501 memset(vcaps, 0, sizeof(*vcaps)); 502 INIT_LIST_HEAD(&vcaps->list); 503 504 caps = &vcaps->caps; 505 506 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, 507 (u64)virt_to_phys(hv_caps)); 508 if (rc) 509 return rc; 510 511 caps->user_mode = hv_caps->user_mode; 512 if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) { 513 pr_err("User space COPY/PASTE is not supported\n"); 514 return -ENOTSUPP; 515 } 516 517 caps->descriptor = be64_to_cpu(hv_caps->descriptor); 518 caps->win_type = hv_caps->win_type; 519 if (caps->win_type >= VAS_MAX_FEAT_TYPE) { 520 pr_err("Unsupported window type %u\n", caps->win_type); 521 return -EINVAL; 522 } 523 caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); 524 caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); 525 atomic_set(&caps->nr_total_credits, 526 be16_to_cpu(hv_caps->target_lpar_creds)); 527 if (feat == VAS_GZIP_DEF_FEAT) { 528 caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); 529 530 if (caps->max_win_creds < DEF_WIN_CREDS) { 531 pr_err("Window creds(%u) > max allowed window creds(%u)\n", 532 DEF_WIN_CREDS, caps->max_win_creds); 533 return -EINVAL; 534 } 535 } 536 537 copypaste_feat = true; 538 539 return 0; 540 } 541 542 static int __init pseries_vas_init(void) 543 { 544 struct hv_vas_cop_feat_caps *hv_cop_caps; 545 struct hv_vas_all_caps *hv_caps; 546 int rc; 547 548 /* 549 * Linux supports user space COPY/PASTE only with Radix 550 */ 551 if (!radix_enabled()) { 552 pr_err("API is supported only with radix page tables\n"); 553 return -ENOTSUPP; 554 } 555 556 hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); 557 if (!hv_caps) 558 return -ENOMEM; 559 /* 560 * Get VAS overall capabilities by passing 0 to feature type. 561 */ 562 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0, 563 (u64)virt_to_phys(hv_caps)); 564 if (rc) 565 goto out; 566 567 caps_all.descriptor = be64_to_cpu(hv_caps->descriptor); 568 caps_all.feat_type = be64_to_cpu(hv_caps->feat_type); 569 570 hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL); 571 if (!hv_cop_caps) { 572 rc = -ENOMEM; 573 goto out; 574 } 575 /* 576 * QOS capabilities available 577 */ 578 if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) { 579 rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT, 580 VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps); 581 582 if (rc) 583 goto out_cop; 584 } 585 /* 586 * Default capabilities available 587 */ 588 if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) { 589 rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT, 590 VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps); 591 if (rc) 592 goto out_cop; 593 } 594 595 pr_info("GZIP feature is available\n"); 596 597 out_cop: 598 kfree(hv_cop_caps); 599 out: 600 kfree(hv_caps); 601 return rc; 602 } 603 machine_device_initcall(pseries, pseries_vas_init); 604