1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright 2020-21 IBM Corp. 4 */ 5 6 #define pr_fmt(fmt) "vas: " fmt 7 8 #include <linux/module.h> 9 #include <linux/kernel.h> 10 #include <linux/export.h> 11 #include <linux/types.h> 12 #include <linux/delay.h> 13 #include <linux/slab.h> 14 #include <linux/interrupt.h> 15 #include <linux/irqdomain.h> 16 #include <asm/machdep.h> 17 #include <asm/hvcall.h> 18 #include <asm/plpar_wrappers.h> 19 #include <asm/vas.h> 20 #include "vas.h" 21 22 #define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul 23 #define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul 24 /* The hypervisor allows one credit per window right now */ 25 #define DEF_WIN_CREDS 1 26 27 static struct vas_all_caps caps_all; 28 static bool copypaste_feat; 29 30 static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; 31 static DEFINE_MUTEX(vas_pseries_mutex); 32 33 static long hcall_return_busy_check(long rc) 34 { 35 /* Check if we are stalled for some time */ 36 if (H_IS_LONG_BUSY(rc)) { 37 msleep(get_longbusy_msecs(rc)); 38 rc = H_BUSY; 39 } else if (rc == H_BUSY) { 40 cond_resched(); 41 } 42 43 return rc; 44 } 45 46 /* 47 * Allocate VAS window hcall 48 */ 49 static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain, 50 u8 wintype, u16 credits) 51 { 52 long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; 53 long rc; 54 55 do { 56 rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype, 57 credits, domain[0], domain[1], domain[2], 58 domain[3], domain[4], domain[5]); 59 60 rc = hcall_return_busy_check(rc); 61 } while (rc == H_BUSY); 62 63 if (rc == H_SUCCESS) { 64 if (win->win_addr == VAS_INVALID_WIN_ADDRESS) { 65 pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n"); 66 return -ENOTSUPP; 67 } 68 win->vas_win.winid = retbuf[0]; 69 win->win_addr = retbuf[1]; 70 win->complete_irq = retbuf[2]; 71 win->fault_irq = retbuf[3]; 72 return 0; 73 } 74 75 pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n", 76 rc, wintype, credits); 77 78 return -EIO; 79 } 80 81 /* 82 * Deallocate VAS window hcall. 83 */ 84 static int h_deallocate_vas_window(u64 winid) 85 { 86 long rc; 87 88 do { 89 rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid); 90 91 rc = hcall_return_busy_check(rc); 92 } while (rc == H_BUSY); 93 94 if (rc == H_SUCCESS) 95 return 0; 96 97 pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n", 98 rc, winid); 99 return -EIO; 100 } 101 102 /* 103 * Modify VAS window. 104 * After the window is opened with allocate window hcall, configure it 105 * with flags and LPAR PID before using. 106 */ 107 static int h_modify_vas_window(struct pseries_vas_window *win) 108 { 109 long rc; 110 u32 lpid = mfspr(SPRN_PID); 111 112 /* 113 * AMR value is not supported in Linux VAS implementation. 114 * The hypervisor ignores it if 0 is passed. 115 */ 116 do { 117 rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, 118 win->vas_win.winid, lpid, 0, 119 VAS_MOD_WIN_FLAGS, 0); 120 121 rc = hcall_return_busy_check(rc); 122 } while (rc == H_BUSY); 123 124 if (rc == H_SUCCESS) 125 return 0; 126 127 pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n", 128 rc, win->vas_win.winid, lpid); 129 return -EIO; 130 } 131 132 /* 133 * This hcall is used to determine the capabilities from the hypervisor. 134 * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES 135 * @query_type: If 0 is passed, the hypervisor returns the overall 136 * capabilities which provides all feature(s) that are 137 * available. Then query the hypervisor to get the 138 * corresponding capabilities for the specific feature. 139 * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS 140 * and VAS GZIP Default capabilities. 141 * H_QUERY_NX_CAPABILITIES provides NX GZIP 142 * capabilities. 143 * @result: Return buffer to save capabilities. 144 */ 145 int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) 146 { 147 long rc; 148 149 rc = plpar_hcall_norets(hcall, query_type, result); 150 151 if (rc == H_SUCCESS) 152 return 0; 153 154 /* H_FUNCTION means HV does not support VAS so don't print an error */ 155 if (rc != H_FUNCTION) { 156 pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n", 157 (hcall == H_QUERY_VAS_CAPABILITIES) ? 158 "H_QUERY_VAS_CAPABILITIES" : 159 "H_QUERY_NX_CAPABILITIES", 160 rc, query_type, result); 161 } 162 163 return -EIO; 164 } 165 EXPORT_SYMBOL_GPL(h_query_vas_capabilities); 166 167 /* 168 * hcall to get fault CRB from the hypervisor. 169 */ 170 static int h_get_nx_fault(u32 winid, u64 buffer) 171 { 172 long rc; 173 174 rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer); 175 176 if (rc == H_SUCCESS) 177 return 0; 178 179 pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n", 180 rc, winid, buffer); 181 return -EIO; 182 183 } 184 185 /* 186 * Handle the fault interrupt. 187 * When the fault interrupt is received for each window, query the 188 * hypervisor to get the fault CRB on the specific fault. Then 189 * process the CRB by updating CSB or send signal if the user space 190 * CSB is invalid. 191 * Note: The hypervisor forwards an interrupt for each fault request. 192 * So one fault CRB to process for each H_GET_NX_FAULT hcall. 193 */ 194 static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) 195 { 196 struct pseries_vas_window *txwin = data; 197 struct coprocessor_request_block crb; 198 struct vas_user_win_ref *tsk_ref; 199 int rc; 200 201 rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb)); 202 if (!rc) { 203 tsk_ref = &txwin->vas_win.task_ref; 204 vas_dump_crb(&crb); 205 vas_update_csb(&crb, tsk_ref); 206 } 207 208 return IRQ_HANDLED; 209 } 210 211 /* 212 * Allocate window and setup IRQ mapping. 213 */ 214 static int allocate_setup_window(struct pseries_vas_window *txwin, 215 u64 *domain, u8 wintype) 216 { 217 int rc; 218 219 rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS); 220 if (rc) 221 return rc; 222 /* 223 * On PowerVM, the hypervisor setup and forwards the fault 224 * interrupt per window. So the IRQ setup and fault handling 225 * will be done for each open window separately. 226 */ 227 txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq); 228 if (!txwin->fault_virq) { 229 pr_err("Failed irq mapping %d\n", txwin->fault_irq); 230 rc = -EINVAL; 231 goto out_win; 232 } 233 234 txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d", 235 txwin->vas_win.winid); 236 if (!txwin->name) { 237 rc = -ENOMEM; 238 goto out_irq; 239 } 240 241 rc = request_threaded_irq(txwin->fault_virq, NULL, 242 pseries_vas_fault_thread_fn, IRQF_ONESHOT, 243 txwin->name, txwin); 244 if (rc) { 245 pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n", 246 txwin->vas_win.winid, txwin->fault_virq, rc); 247 goto out_free; 248 } 249 250 txwin->vas_win.wcreds_max = DEF_WIN_CREDS; 251 252 return 0; 253 out_free: 254 kfree(txwin->name); 255 out_irq: 256 irq_dispose_mapping(txwin->fault_virq); 257 out_win: 258 h_deallocate_vas_window(txwin->vas_win.winid); 259 return rc; 260 } 261 262 static inline void free_irq_setup(struct pseries_vas_window *txwin) 263 { 264 free_irq(txwin->fault_virq, txwin); 265 kfree(txwin->name); 266 irq_dispose_mapping(txwin->fault_virq); 267 } 268 269 static struct vas_window *vas_allocate_window(int vas_id, u64 flags, 270 enum vas_cop_type cop_type) 271 { 272 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; 273 struct vas_cop_feat_caps *cop_feat_caps; 274 struct vas_caps *caps; 275 struct pseries_vas_window *txwin; 276 int rc; 277 278 txwin = kzalloc(sizeof(*txwin), GFP_KERNEL); 279 if (!txwin) 280 return ERR_PTR(-ENOMEM); 281 282 /* 283 * A VAS window can have many credits which means that many 284 * requests can be issued simultaneously. But the hypervisor 285 * restricts one credit per window. 286 * The hypervisor introduces 2 different types of credits: 287 * Default credit type (Uses normal priority FIFO): 288 * A limited number of credits are assigned to partitions 289 * based on processor entitlement. But these credits may be 290 * over-committed on a system depends on whether the CPUs 291 * are in shared or dedicated modes - that is, more requests 292 * may be issued across the system than NX can service at 293 * once which can result in paste command failure (RMA_busy). 294 * Then the process has to resend requests or fall-back to 295 * SW compression. 296 * Quality of Service (QoS) credit type (Uses high priority FIFO): 297 * To avoid NX HW contention, the system admins can assign 298 * QoS credits for each LPAR so that this partition is 299 * guaranteed access to NX resources. These credits are 300 * assigned to partitions via the HMC. 301 * Refer PAPR for more information. 302 * 303 * Allocate window with QoS credits if user requested. Otherwise 304 * default credits are used. 305 */ 306 if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT) 307 caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE]; 308 else 309 caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE]; 310 311 cop_feat_caps = &caps->caps; 312 313 if (atomic_inc_return(&cop_feat_caps->used_lpar_creds) > 314 atomic_read(&cop_feat_caps->target_lpar_creds)) { 315 pr_err("Credits are not available to allocate window\n"); 316 rc = -EINVAL; 317 goto out; 318 } 319 320 if (vas_id == -1) { 321 /* 322 * The user space is requesting to allocate a window on 323 * a VAS instance where the process is executing. 324 * On PowerVM, domain values are passed to the hypervisor 325 * to select VAS instance. Useful if the process is 326 * affinity to NUMA node. 327 * The hypervisor selects VAS instance if 328 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values. 329 * The h_allocate_vas_window hcall is defined to take a 330 * domain values as specified by h_home_node_associativity, 331 * So no unpacking needs to be done. 332 */ 333 rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain, 334 VPHN_FLAG_VCPU, smp_processor_id()); 335 if (rc != H_SUCCESS) { 336 pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc); 337 goto out; 338 } 339 } 340 341 /* 342 * Allocate / Deallocate window hcalls and setup / free IRQs 343 * have to be protected with mutex. 344 * Open VAS window: Allocate window hcall and setup IRQ 345 * Close VAS window: Deallocate window hcall and free IRQ 346 * The hypervisor waits until all NX requests are 347 * completed before closing the window. So expects OS 348 * to handle NX faults, means IRQ can be freed only 349 * after the deallocate window hcall is returned. 350 * So once the window is closed with deallocate hcall before 351 * the IRQ is freed, it can be assigned to new allocate 352 * hcall with the same fault IRQ by the hypervisor. It can 353 * result in setup IRQ fail for the new window since the 354 * same fault IRQ is not freed by the OS before. 355 */ 356 mutex_lock(&vas_pseries_mutex); 357 rc = allocate_setup_window(txwin, (u64 *)&domain[0], 358 cop_feat_caps->win_type); 359 mutex_unlock(&vas_pseries_mutex); 360 if (rc) 361 goto out; 362 363 /* 364 * Modify window and it is ready to use. 365 */ 366 rc = h_modify_vas_window(txwin); 367 if (!rc) 368 rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); 369 if (rc) 370 goto out_free; 371 372 vas_user_win_add_mm_context(&txwin->vas_win.task_ref); 373 txwin->win_type = cop_feat_caps->win_type; 374 mutex_lock(&vas_pseries_mutex); 375 list_add(&txwin->win_list, &caps->list); 376 mutex_unlock(&vas_pseries_mutex); 377 378 return &txwin->vas_win; 379 380 out_free: 381 /* 382 * Window is not operational. Free IRQ before closing 383 * window so that do not have to hold mutex. 384 */ 385 free_irq_setup(txwin); 386 h_deallocate_vas_window(txwin->vas_win.winid); 387 out: 388 atomic_dec(&cop_feat_caps->used_lpar_creds); 389 kfree(txwin); 390 return ERR_PTR(rc); 391 } 392 393 static u64 vas_paste_address(struct vas_window *vwin) 394 { 395 struct pseries_vas_window *win; 396 397 win = container_of(vwin, struct pseries_vas_window, vas_win); 398 return win->win_addr; 399 } 400 401 static int deallocate_free_window(struct pseries_vas_window *win) 402 { 403 int rc = 0; 404 405 /* 406 * The hypervisor waits for all requests including faults 407 * are processed before closing the window - Means all 408 * credits have to be returned. In the case of fault 409 * request, a credit is returned after OS issues 410 * H_GET_NX_FAULT hcall. 411 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW 412 * hcall. 413 */ 414 rc = h_deallocate_vas_window(win->vas_win.winid); 415 if (!rc) 416 free_irq_setup(win); 417 418 return rc; 419 } 420 421 static int vas_deallocate_window(struct vas_window *vwin) 422 { 423 struct pseries_vas_window *win; 424 struct vas_cop_feat_caps *caps; 425 int rc = 0; 426 427 if (!vwin) 428 return -EINVAL; 429 430 win = container_of(vwin, struct pseries_vas_window, vas_win); 431 432 /* Should not happen */ 433 if (win->win_type >= VAS_MAX_FEAT_TYPE) { 434 pr_err("Window (%u): Invalid window type %u\n", 435 vwin->winid, win->win_type); 436 return -EINVAL; 437 } 438 439 caps = &vascaps[win->win_type].caps; 440 mutex_lock(&vas_pseries_mutex); 441 rc = deallocate_free_window(win); 442 if (rc) { 443 mutex_unlock(&vas_pseries_mutex); 444 return rc; 445 } 446 447 list_del(&win->win_list); 448 atomic_dec(&caps->used_lpar_creds); 449 mutex_unlock(&vas_pseries_mutex); 450 451 put_vas_user_win_ref(&vwin->task_ref); 452 mm_context_remove_vas_window(vwin->task_ref.mm); 453 454 kfree(win); 455 return 0; 456 } 457 458 static const struct vas_user_win_ops vops_pseries = { 459 .open_win = vas_allocate_window, /* Open and configure window */ 460 .paste_addr = vas_paste_address, /* To do copy/paste */ 461 .close_win = vas_deallocate_window, /* Close window */ 462 }; 463 464 /* 465 * Supporting only nx-gzip coprocessor type now, but this API code 466 * extended to other coprocessor types later. 467 */ 468 int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type, 469 const char *name) 470 { 471 int rc; 472 473 if (!copypaste_feat) 474 return -ENOTSUPP; 475 476 rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries); 477 478 return rc; 479 } 480 EXPORT_SYMBOL_GPL(vas_register_api_pseries); 481 482 void vas_unregister_api_pseries(void) 483 { 484 vas_unregister_coproc_api(); 485 } 486 EXPORT_SYMBOL_GPL(vas_unregister_api_pseries); 487 488 /* 489 * Get the specific capabilities based on the feature type. 490 * Right now supports GZIP default and GZIP QoS capabilities. 491 */ 492 static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, 493 struct hv_vas_cop_feat_caps *hv_caps) 494 { 495 struct vas_cop_feat_caps *caps; 496 struct vas_caps *vcaps; 497 int rc = 0; 498 499 vcaps = &vascaps[type]; 500 memset(vcaps, 0, sizeof(*vcaps)); 501 INIT_LIST_HEAD(&vcaps->list); 502 503 caps = &vcaps->caps; 504 505 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, 506 (u64)virt_to_phys(hv_caps)); 507 if (rc) 508 return rc; 509 510 caps->user_mode = hv_caps->user_mode; 511 if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) { 512 pr_err("User space COPY/PASTE is not supported\n"); 513 return -ENOTSUPP; 514 } 515 516 caps->descriptor = be64_to_cpu(hv_caps->descriptor); 517 caps->win_type = hv_caps->win_type; 518 if (caps->win_type >= VAS_MAX_FEAT_TYPE) { 519 pr_err("Unsupported window type %u\n", caps->win_type); 520 return -EINVAL; 521 } 522 caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); 523 caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); 524 atomic_set(&caps->target_lpar_creds, 525 be16_to_cpu(hv_caps->target_lpar_creds)); 526 if (feat == VAS_GZIP_DEF_FEAT) { 527 caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); 528 529 if (caps->max_win_creds < DEF_WIN_CREDS) { 530 pr_err("Window creds(%u) > max allowed window creds(%u)\n", 531 DEF_WIN_CREDS, caps->max_win_creds); 532 return -EINVAL; 533 } 534 } 535 536 copypaste_feat = true; 537 538 return 0; 539 } 540 541 static int __init pseries_vas_init(void) 542 { 543 struct hv_vas_cop_feat_caps *hv_cop_caps; 544 struct hv_vas_all_caps *hv_caps; 545 int rc; 546 547 /* 548 * Linux supports user space COPY/PASTE only with Radix 549 */ 550 if (!radix_enabled()) { 551 pr_err("API is supported only with radix page tables\n"); 552 return -ENOTSUPP; 553 } 554 555 hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); 556 if (!hv_caps) 557 return -ENOMEM; 558 /* 559 * Get VAS overall capabilities by passing 0 to feature type. 560 */ 561 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0, 562 (u64)virt_to_phys(hv_caps)); 563 if (rc) 564 goto out; 565 566 caps_all.descriptor = be64_to_cpu(hv_caps->descriptor); 567 caps_all.feat_type = be64_to_cpu(hv_caps->feat_type); 568 569 hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL); 570 if (!hv_cop_caps) { 571 rc = -ENOMEM; 572 goto out; 573 } 574 /* 575 * QOS capabilities available 576 */ 577 if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) { 578 rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT, 579 VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps); 580 581 if (rc) 582 goto out_cop; 583 } 584 /* 585 * Default capabilities available 586 */ 587 if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) { 588 rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT, 589 VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps); 590 if (rc) 591 goto out_cop; 592 } 593 594 pr_info("GZIP feature is available\n"); 595 596 out_cop: 597 kfree(hv_cop_caps); 598 out: 599 kfree(hv_caps); 600 return rc; 601 } 602 machine_device_initcall(pseries, pseries_vas_init); 603