1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright 2020-21 IBM Corp. 4 */ 5 6 #define pr_fmt(fmt) "vas: " fmt 7 8 #include <linux/module.h> 9 #include <linux/kernel.h> 10 #include <linux/export.h> 11 #include <linux/types.h> 12 #include <linux/delay.h> 13 #include <linux/slab.h> 14 #include <linux/interrupt.h> 15 #include <linux/irqdomain.h> 16 #include <asm/machdep.h> 17 #include <asm/hvcall.h> 18 #include <asm/plpar_wrappers.h> 19 #include <asm/vas.h> 20 #include "vas.h" 21 22 #define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul 23 #define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul 24 /* The hypervisor allows one credit per window right now */ 25 #define DEF_WIN_CREDS 1 26 27 static struct vas_all_caps caps_all; 28 static bool copypaste_feat; 29 30 static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; 31 static DEFINE_MUTEX(vas_pseries_mutex); 32 33 static long hcall_return_busy_check(long rc) 34 { 35 /* Check if we are stalled for some time */ 36 if (H_IS_LONG_BUSY(rc)) { 37 msleep(get_longbusy_msecs(rc)); 38 rc = H_BUSY; 39 } else if (rc == H_BUSY) { 40 cond_resched(); 41 } 42 43 return rc; 44 } 45 46 /* 47 * Allocate VAS window hcall 48 */ 49 static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain, 50 u8 wintype, u16 credits) 51 { 52 long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; 53 long rc; 54 55 do { 56 rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype, 57 credits, domain[0], domain[1], domain[2], 58 domain[3], domain[4], domain[5]); 59 60 rc = hcall_return_busy_check(rc); 61 } while (rc == H_BUSY); 62 63 if (rc == H_SUCCESS) { 64 if (win->win_addr == VAS_INVALID_WIN_ADDRESS) { 65 pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n"); 66 return -ENOTSUPP; 67 } 68 win->vas_win.winid = retbuf[0]; 69 win->win_addr = retbuf[1]; 70 win->complete_irq = retbuf[2]; 71 win->fault_irq = retbuf[3]; 72 return 0; 73 } 74 75 pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n", 76 rc, wintype, credits); 77 78 return -EIO; 79 } 80 81 /* 82 * Deallocate VAS window hcall. 83 */ 84 static int h_deallocate_vas_window(u64 winid) 85 { 86 long rc; 87 88 do { 89 rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid); 90 91 rc = hcall_return_busy_check(rc); 92 } while (rc == H_BUSY); 93 94 if (rc == H_SUCCESS) 95 return 0; 96 97 pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n", 98 rc, winid); 99 return -EIO; 100 } 101 102 /* 103 * Modify VAS window. 104 * After the window is opened with allocate window hcall, configure it 105 * with flags and LPAR PID before using. 106 */ 107 static int h_modify_vas_window(struct pseries_vas_window *win) 108 { 109 long rc; 110 u32 lpid = mfspr(SPRN_PID); 111 112 /* 113 * AMR value is not supported in Linux VAS implementation. 114 * The hypervisor ignores it if 0 is passed. 115 */ 116 do { 117 rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, 118 win->vas_win.winid, lpid, 0, 119 VAS_MOD_WIN_FLAGS, 0); 120 121 rc = hcall_return_busy_check(rc); 122 } while (rc == H_BUSY); 123 124 if (rc == H_SUCCESS) 125 return 0; 126 127 pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n", 128 rc, win->vas_win.winid, lpid); 129 return -EIO; 130 } 131 132 /* 133 * This hcall is used to determine the capabilities from the hypervisor. 134 * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES 135 * @query_type: If 0 is passed, the hypervisor returns the overall 136 * capabilities which provides all feature(s) that are 137 * available. Then query the hypervisor to get the 138 * corresponding capabilities for the specific feature. 139 * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS 140 * and VAS GZIP Default capabilities. 141 * H_QUERY_NX_CAPABILITIES provides NX GZIP 142 * capabilities. 143 * @result: Return buffer to save capabilities. 144 */ 145 int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) 146 { 147 long rc; 148 149 rc = plpar_hcall_norets(hcall, query_type, result); 150 151 if (rc == H_SUCCESS) 152 return 0; 153 154 pr_err("HCALL(%llx) error %ld, query_type %u, result buffer 0x%llx\n", 155 hcall, rc, query_type, result); 156 return -EIO; 157 } 158 EXPORT_SYMBOL_GPL(h_query_vas_capabilities); 159 160 /* 161 * hcall to get fault CRB from the hypervisor. 162 */ 163 static int h_get_nx_fault(u32 winid, u64 buffer) 164 { 165 long rc; 166 167 rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer); 168 169 if (rc == H_SUCCESS) 170 return 0; 171 172 pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n", 173 rc, winid, buffer); 174 return -EIO; 175 176 } 177 178 /* 179 * Handle the fault interrupt. 180 * When the fault interrupt is received for each window, query the 181 * hypervisor to get the fault CRB on the specific fault. Then 182 * process the CRB by updating CSB or send signal if the user space 183 * CSB is invalid. 184 * Note: The hypervisor forwards an interrupt for each fault request. 185 * So one fault CRB to process for each H_GET_NX_FAULT hcall. 186 */ 187 irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) 188 { 189 struct pseries_vas_window *txwin = data; 190 struct coprocessor_request_block crb; 191 struct vas_user_win_ref *tsk_ref; 192 int rc; 193 194 rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb)); 195 if (!rc) { 196 tsk_ref = &txwin->vas_win.task_ref; 197 vas_dump_crb(&crb); 198 vas_update_csb(&crb, tsk_ref); 199 } 200 201 return IRQ_HANDLED; 202 } 203 204 /* 205 * Allocate window and setup IRQ mapping. 206 */ 207 static int allocate_setup_window(struct pseries_vas_window *txwin, 208 u64 *domain, u8 wintype) 209 { 210 int rc; 211 212 rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS); 213 if (rc) 214 return rc; 215 /* 216 * On PowerVM, the hypervisor setup and forwards the fault 217 * interrupt per window. So the IRQ setup and fault handling 218 * will be done for each open window separately. 219 */ 220 txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq); 221 if (!txwin->fault_virq) { 222 pr_err("Failed irq mapping %d\n", txwin->fault_irq); 223 rc = -EINVAL; 224 goto out_win; 225 } 226 227 txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d", 228 txwin->vas_win.winid); 229 if (!txwin->name) { 230 rc = -ENOMEM; 231 goto out_irq; 232 } 233 234 rc = request_threaded_irq(txwin->fault_virq, NULL, 235 pseries_vas_fault_thread_fn, IRQF_ONESHOT, 236 txwin->name, txwin); 237 if (rc) { 238 pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n", 239 txwin->vas_win.winid, txwin->fault_virq, rc); 240 goto out_free; 241 } 242 243 txwin->vas_win.wcreds_max = DEF_WIN_CREDS; 244 245 return 0; 246 out_free: 247 kfree(txwin->name); 248 out_irq: 249 irq_dispose_mapping(txwin->fault_virq); 250 out_win: 251 h_deallocate_vas_window(txwin->vas_win.winid); 252 return rc; 253 } 254 255 static inline void free_irq_setup(struct pseries_vas_window *txwin) 256 { 257 free_irq(txwin->fault_virq, txwin); 258 kfree(txwin->name); 259 irq_dispose_mapping(txwin->fault_virq); 260 } 261 262 static struct vas_window *vas_allocate_window(int vas_id, u64 flags, 263 enum vas_cop_type cop_type) 264 { 265 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; 266 struct vas_cop_feat_caps *cop_feat_caps; 267 struct vas_caps *caps; 268 struct pseries_vas_window *txwin; 269 int rc; 270 271 txwin = kzalloc(sizeof(*txwin), GFP_KERNEL); 272 if (!txwin) 273 return ERR_PTR(-ENOMEM); 274 275 /* 276 * A VAS window can have many credits which means that many 277 * requests can be issued simultaneously. But the hypervisor 278 * restricts one credit per window. 279 * The hypervisor introduces 2 different types of credits: 280 * Default credit type (Uses normal priority FIFO): 281 * A limited number of credits are assigned to partitions 282 * based on processor entitlement. But these credits may be 283 * over-committed on a system depends on whether the CPUs 284 * are in shared or dedicated modes - that is, more requests 285 * may be issued across the system than NX can service at 286 * once which can result in paste command failure (RMA_busy). 287 * Then the process has to resend requests or fall-back to 288 * SW compression. 289 * Quality of Service (QoS) credit type (Uses high priority FIFO): 290 * To avoid NX HW contention, the system admins can assign 291 * QoS credits for each LPAR so that this partition is 292 * guaranteed access to NX resources. These credits are 293 * assigned to partitions via the HMC. 294 * Refer PAPR for more information. 295 * 296 * Allocate window with QoS credits if user requested. Otherwise 297 * default credits are used. 298 */ 299 if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT) 300 caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE]; 301 else 302 caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE]; 303 304 cop_feat_caps = &caps->caps; 305 306 if (atomic_inc_return(&cop_feat_caps->used_lpar_creds) > 307 atomic_read(&cop_feat_caps->target_lpar_creds)) { 308 pr_err("Credits are not available to allocate window\n"); 309 rc = -EINVAL; 310 goto out; 311 } 312 313 if (vas_id == -1) { 314 /* 315 * The user space is requesting to allocate a window on 316 * a VAS instance where the process is executing. 317 * On PowerVM, domain values are passed to the hypervisor 318 * to select VAS instance. Useful if the process is 319 * affinity to NUMA node. 320 * The hypervisor selects VAS instance if 321 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values. 322 * The h_allocate_vas_window hcall is defined to take a 323 * domain values as specified by h_home_node_associativity, 324 * So no unpacking needs to be done. 325 */ 326 rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain, 327 VPHN_FLAG_VCPU, smp_processor_id()); 328 if (rc != H_SUCCESS) { 329 pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc); 330 goto out; 331 } 332 } 333 334 /* 335 * Allocate / Deallocate window hcalls and setup / free IRQs 336 * have to be protected with mutex. 337 * Open VAS window: Allocate window hcall and setup IRQ 338 * Close VAS window: Deallocate window hcall and free IRQ 339 * The hypervisor waits until all NX requests are 340 * completed before closing the window. So expects OS 341 * to handle NX faults, means IRQ can be freed only 342 * after the deallocate window hcall is returned. 343 * So once the window is closed with deallocate hcall before 344 * the IRQ is freed, it can be assigned to new allocate 345 * hcall with the same fault IRQ by the hypervisor. It can 346 * result in setup IRQ fail for the new window since the 347 * same fault IRQ is not freed by the OS before. 348 */ 349 mutex_lock(&vas_pseries_mutex); 350 rc = allocate_setup_window(txwin, (u64 *)&domain[0], 351 cop_feat_caps->win_type); 352 mutex_unlock(&vas_pseries_mutex); 353 if (rc) 354 goto out; 355 356 /* 357 * Modify window and it is ready to use. 358 */ 359 rc = h_modify_vas_window(txwin); 360 if (!rc) 361 rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); 362 if (rc) 363 goto out_free; 364 365 vas_user_win_add_mm_context(&txwin->vas_win.task_ref); 366 txwin->win_type = cop_feat_caps->win_type; 367 mutex_lock(&vas_pseries_mutex); 368 list_add(&txwin->win_list, &caps->list); 369 mutex_unlock(&vas_pseries_mutex); 370 371 return &txwin->vas_win; 372 373 out_free: 374 /* 375 * Window is not operational. Free IRQ before closing 376 * window so that do not have to hold mutex. 377 */ 378 free_irq_setup(txwin); 379 h_deallocate_vas_window(txwin->vas_win.winid); 380 out: 381 atomic_dec(&cop_feat_caps->used_lpar_creds); 382 kfree(txwin); 383 return ERR_PTR(rc); 384 } 385 386 static u64 vas_paste_address(struct vas_window *vwin) 387 { 388 struct pseries_vas_window *win; 389 390 win = container_of(vwin, struct pseries_vas_window, vas_win); 391 return win->win_addr; 392 } 393 394 static int deallocate_free_window(struct pseries_vas_window *win) 395 { 396 int rc = 0; 397 398 /* 399 * The hypervisor waits for all requests including faults 400 * are processed before closing the window - Means all 401 * credits have to be returned. In the case of fault 402 * request, a credit is returned after OS issues 403 * H_GET_NX_FAULT hcall. 404 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW 405 * hcall. 406 */ 407 rc = h_deallocate_vas_window(win->vas_win.winid); 408 if (!rc) 409 free_irq_setup(win); 410 411 return rc; 412 } 413 414 static int vas_deallocate_window(struct vas_window *vwin) 415 { 416 struct pseries_vas_window *win; 417 struct vas_cop_feat_caps *caps; 418 int rc = 0; 419 420 if (!vwin) 421 return -EINVAL; 422 423 win = container_of(vwin, struct pseries_vas_window, vas_win); 424 425 /* Should not happen */ 426 if (win->win_type >= VAS_MAX_FEAT_TYPE) { 427 pr_err("Window (%u): Invalid window type %u\n", 428 vwin->winid, win->win_type); 429 return -EINVAL; 430 } 431 432 caps = &vascaps[win->win_type].caps; 433 mutex_lock(&vas_pseries_mutex); 434 rc = deallocate_free_window(win); 435 if (rc) { 436 mutex_unlock(&vas_pseries_mutex); 437 return rc; 438 } 439 440 list_del(&win->win_list); 441 atomic_dec(&caps->used_lpar_creds); 442 mutex_unlock(&vas_pseries_mutex); 443 444 put_vas_user_win_ref(&vwin->task_ref); 445 mm_context_remove_vas_window(vwin->task_ref.mm); 446 447 kfree(win); 448 return 0; 449 } 450 451 static const struct vas_user_win_ops vops_pseries = { 452 .open_win = vas_allocate_window, /* Open and configure window */ 453 .paste_addr = vas_paste_address, /* To do copy/paste */ 454 .close_win = vas_deallocate_window, /* Close window */ 455 }; 456 457 /* 458 * Supporting only nx-gzip coprocessor type now, but this API code 459 * extended to other coprocessor types later. 460 */ 461 int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type, 462 const char *name) 463 { 464 int rc; 465 466 if (!copypaste_feat) 467 return -ENOTSUPP; 468 469 rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries); 470 471 return rc; 472 } 473 EXPORT_SYMBOL_GPL(vas_register_api_pseries); 474 475 void vas_unregister_api_pseries(void) 476 { 477 vas_unregister_coproc_api(); 478 } 479 EXPORT_SYMBOL_GPL(vas_unregister_api_pseries); 480 481 /* 482 * Get the specific capabilities based on the feature type. 483 * Right now supports GZIP default and GZIP QoS capabilities. 484 */ 485 static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, 486 struct hv_vas_cop_feat_caps *hv_caps) 487 { 488 struct vas_cop_feat_caps *caps; 489 struct vas_caps *vcaps; 490 int rc = 0; 491 492 vcaps = &vascaps[type]; 493 memset(vcaps, 0, sizeof(*vcaps)); 494 INIT_LIST_HEAD(&vcaps->list); 495 496 caps = &vcaps->caps; 497 498 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, 499 (u64)virt_to_phys(hv_caps)); 500 if (rc) 501 return rc; 502 503 caps->user_mode = hv_caps->user_mode; 504 if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) { 505 pr_err("User space COPY/PASTE is not supported\n"); 506 return -ENOTSUPP; 507 } 508 509 caps->descriptor = be64_to_cpu(hv_caps->descriptor); 510 caps->win_type = hv_caps->win_type; 511 if (caps->win_type >= VAS_MAX_FEAT_TYPE) { 512 pr_err("Unsupported window type %u\n", caps->win_type); 513 return -EINVAL; 514 } 515 caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); 516 caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); 517 atomic_set(&caps->target_lpar_creds, 518 be16_to_cpu(hv_caps->target_lpar_creds)); 519 if (feat == VAS_GZIP_DEF_FEAT) { 520 caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); 521 522 if (caps->max_win_creds < DEF_WIN_CREDS) { 523 pr_err("Window creds(%u) > max allowed window creds(%u)\n", 524 DEF_WIN_CREDS, caps->max_win_creds); 525 return -EINVAL; 526 } 527 } 528 529 copypaste_feat = true; 530 531 return 0; 532 } 533 534 static int __init pseries_vas_init(void) 535 { 536 struct hv_vas_cop_feat_caps *hv_cop_caps; 537 struct hv_vas_all_caps *hv_caps; 538 int rc; 539 540 /* 541 * Linux supports user space COPY/PASTE only with Radix 542 */ 543 if (!radix_enabled()) { 544 pr_err("API is supported only with radix page tables\n"); 545 return -ENOTSUPP; 546 } 547 548 hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); 549 if (!hv_caps) 550 return -ENOMEM; 551 /* 552 * Get VAS overall capabilities by passing 0 to feature type. 553 */ 554 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0, 555 (u64)virt_to_phys(hv_caps)); 556 if (rc) 557 goto out; 558 559 caps_all.descriptor = be64_to_cpu(hv_caps->descriptor); 560 caps_all.feat_type = be64_to_cpu(hv_caps->feat_type); 561 562 hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL); 563 if (!hv_cop_caps) { 564 rc = -ENOMEM; 565 goto out; 566 } 567 /* 568 * QOS capabilities available 569 */ 570 if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) { 571 rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT, 572 VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps); 573 574 if (rc) 575 goto out_cop; 576 } 577 /* 578 * Default capabilities available 579 */ 580 if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) { 581 rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT, 582 VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps); 583 if (rc) 584 goto out_cop; 585 } 586 587 pr_info("GZIP feature is available\n"); 588 589 out_cop: 590 kfree(hv_cop_caps); 591 out: 592 kfree(hv_caps); 593 return rc; 594 } 595 machine_device_initcall(pseries, pseries_vas_init); 596