1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright 2016-17 IBM Corp. 4 */ 5 6 #define pr_fmt(fmt) "vas: " fmt 7 8 #include <linux/types.h> 9 #include <linux/mutex.h> 10 #include <linux/slab.h> 11 #include <linux/io.h> 12 #include <linux/log2.h> 13 #include <linux/rcupdate.h> 14 #include <linux/cred.h> 15 #include <asm/switch_to.h> 16 #include <asm/ppc-opcode.h> 17 #include "vas.h" 18 #include "copy-paste.h" 19 20 #define CREATE_TRACE_POINTS 21 #include "vas-trace.h" 22 23 /* 24 * Compute the paste address region for the window @window using the 25 * ->paste_base_addr and ->paste_win_id_shift we got from device tree. 26 */ 27 static void compute_paste_address(struct vas_window *window, u64 *addr, int *len) 28 { 29 int winid; 30 u64 base, shift; 31 32 base = window->vinst->paste_base_addr; 33 shift = window->vinst->paste_win_id_shift; 34 winid = window->winid; 35 36 *addr = base + (winid << shift); 37 if (len) 38 *len = PAGE_SIZE; 39 40 pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr); 41 } 42 43 static inline void get_hvwc_mmio_bar(struct vas_window *window, 44 u64 *start, int *len) 45 { 46 u64 pbaddr; 47 48 pbaddr = window->vinst->hvwc_bar_start; 49 *start = pbaddr + window->winid * VAS_HVWC_SIZE; 50 *len = VAS_HVWC_SIZE; 51 } 52 53 static inline void get_uwc_mmio_bar(struct vas_window *window, 54 u64 *start, int *len) 55 { 56 u64 pbaddr; 57 58 pbaddr = window->vinst->uwc_bar_start; 59 *start = pbaddr + window->winid * VAS_UWC_SIZE; 60 *len = VAS_UWC_SIZE; 61 } 62 63 /* 64 * Map the paste bus address of the given send window into kernel address 65 * space. Unlike MMIO regions (map_mmio_region() below), paste region must 66 * be mapped cache-able and is only applicable to send windows. 67 */ 68 static void *map_paste_region(struct vas_window *txwin) 69 { 70 int len; 71 void *map; 72 char *name; 73 u64 start; 74 75 name = kasprintf(GFP_KERNEL, "window-v%d-w%d", txwin->vinst->vas_id, 76 txwin->winid); 77 if (!name) 78 goto free_name; 79 80 txwin->paste_addr_name = name; 81 compute_paste_address(txwin, &start, &len); 82 83 if (!request_mem_region(start, len, name)) { 84 pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n", 85 __func__, start, len); 86 goto free_name; 87 } 88 89 map = ioremap_cache(start, len); 90 if (!map) { 91 pr_devel("%s(): ioremap_cache(0x%llx, %d) failed\n", __func__, 92 start, len); 93 goto free_name; 94 } 95 96 pr_devel("Mapped paste addr 0x%llx to kaddr 0x%p\n", start, map); 97 return map; 98 99 free_name: 100 kfree(name); 101 return ERR_PTR(-ENOMEM); 102 } 103 104 static void *map_mmio_region(char *name, u64 start, int len) 105 { 106 void *map; 107 108 if (!request_mem_region(start, len, name)) { 109 pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n", 110 __func__, start, len); 111 return NULL; 112 } 113 114 map = ioremap(start, len); 115 if (!map) { 116 pr_devel("%s(): ioremap(0x%llx, %d) failed\n", __func__, start, 117 len); 118 return NULL; 119 } 120 121 return map; 122 } 123 124 static void unmap_region(void *addr, u64 start, int len) 125 { 126 iounmap(addr); 127 release_mem_region((phys_addr_t)start, len); 128 } 129 130 /* 131 * Unmap the paste address region for a window. 132 */ 133 static void unmap_paste_region(struct vas_window *window) 134 { 135 int len; 136 u64 busaddr_start; 137 138 if (window->paste_kaddr) { 139 compute_paste_address(window, &busaddr_start, &len); 140 unmap_region(window->paste_kaddr, busaddr_start, len); 141 window->paste_kaddr = NULL; 142 kfree(window->paste_addr_name); 143 window->paste_addr_name = NULL; 144 } 145 } 146 147 /* 148 * Unmap the MMIO regions for a window. Hold the vas_mutex so we don't 149 * unmap when the window's debugfs dir is in use. This serializes close 150 * of a window even on another VAS instance but since its not a critical 151 * path, just minimize the time we hold the mutex for now. We can add 152 * a per-instance mutex later if necessary. 153 */ 154 static void unmap_winctx_mmio_bars(struct vas_window *window) 155 { 156 int len; 157 void *uwc_map; 158 void *hvwc_map; 159 u64 busaddr_start; 160 161 mutex_lock(&vas_mutex); 162 163 hvwc_map = window->hvwc_map; 164 window->hvwc_map = NULL; 165 166 uwc_map = window->uwc_map; 167 window->uwc_map = NULL; 168 169 mutex_unlock(&vas_mutex); 170 171 if (hvwc_map) { 172 get_hvwc_mmio_bar(window, &busaddr_start, &len); 173 unmap_region(hvwc_map, busaddr_start, len); 174 } 175 176 if (uwc_map) { 177 get_uwc_mmio_bar(window, &busaddr_start, &len); 178 unmap_region(uwc_map, busaddr_start, len); 179 } 180 } 181 182 /* 183 * Find the Hypervisor Window Context (HVWC) MMIO Base Address Region and the 184 * OS/User Window Context (UWC) MMIO Base Address Region for the given window. 185 * Map these bus addresses and save the mapped kernel addresses in @window. 186 */ 187 int map_winctx_mmio_bars(struct vas_window *window) 188 { 189 int len; 190 u64 start; 191 192 get_hvwc_mmio_bar(window, &start, &len); 193 window->hvwc_map = map_mmio_region("HVWCM_Window", start, len); 194 195 get_uwc_mmio_bar(window, &start, &len); 196 window->uwc_map = map_mmio_region("UWCM_Window", start, len); 197 198 if (!window->hvwc_map || !window->uwc_map) { 199 unmap_winctx_mmio_bars(window); 200 return -1; 201 } 202 203 return 0; 204 } 205 206 /* 207 * Reset all valid registers in the HV and OS/User Window Contexts for 208 * the window identified by @window. 209 * 210 * NOTE: We cannot really use a for loop to reset window context. Not all 211 * offsets in a window context are valid registers and the valid 212 * registers are not sequential. And, we can only write to offsets 213 * with valid registers. 214 */ 215 void reset_window_regs(struct vas_window *window) 216 { 217 write_hvwc_reg(window, VREG(LPID), 0ULL); 218 write_hvwc_reg(window, VREG(PID), 0ULL); 219 write_hvwc_reg(window, VREG(XLATE_MSR), 0ULL); 220 write_hvwc_reg(window, VREG(XLATE_LPCR), 0ULL); 221 write_hvwc_reg(window, VREG(XLATE_CTL), 0ULL); 222 write_hvwc_reg(window, VREG(AMR), 0ULL); 223 write_hvwc_reg(window, VREG(SEIDR), 0ULL); 224 write_hvwc_reg(window, VREG(FAULT_TX_WIN), 0ULL); 225 write_hvwc_reg(window, VREG(OSU_INTR_SRC_RA), 0ULL); 226 write_hvwc_reg(window, VREG(HV_INTR_SRC_RA), 0ULL); 227 write_hvwc_reg(window, VREG(PSWID), 0ULL); 228 write_hvwc_reg(window, VREG(LFIFO_BAR), 0ULL); 229 write_hvwc_reg(window, VREG(LDATA_STAMP_CTL), 0ULL); 230 write_hvwc_reg(window, VREG(LDMA_CACHE_CTL), 0ULL); 231 write_hvwc_reg(window, VREG(LRFIFO_PUSH), 0ULL); 232 write_hvwc_reg(window, VREG(CURR_MSG_COUNT), 0ULL); 233 write_hvwc_reg(window, VREG(LNOTIFY_AFTER_COUNT), 0ULL); 234 write_hvwc_reg(window, VREG(LRX_WCRED), 0ULL); 235 write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL); 236 write_hvwc_reg(window, VREG(TX_WCRED), 0ULL); 237 write_hvwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL); 238 write_hvwc_reg(window, VREG(LFIFO_SIZE), 0ULL); 239 write_hvwc_reg(window, VREG(WINCTL), 0ULL); 240 write_hvwc_reg(window, VREG(WIN_STATUS), 0ULL); 241 write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), 0ULL); 242 write_hvwc_reg(window, VREG(TX_RSVD_BUF_COUNT), 0ULL); 243 write_hvwc_reg(window, VREG(LRFIFO_WIN_PTR), 0ULL); 244 write_hvwc_reg(window, VREG(LNOTIFY_CTL), 0ULL); 245 write_hvwc_reg(window, VREG(LNOTIFY_PID), 0ULL); 246 write_hvwc_reg(window, VREG(LNOTIFY_LPID), 0ULL); 247 write_hvwc_reg(window, VREG(LNOTIFY_TID), 0ULL); 248 write_hvwc_reg(window, VREG(LNOTIFY_SCOPE), 0ULL); 249 write_hvwc_reg(window, VREG(NX_UTIL_ADDER), 0ULL); 250 251 /* Skip read-only registers: NX_UTIL and NX_UTIL_SE */ 252 253 /* 254 * The send and receive window credit adder registers are also 255 * accessible from HVWC and have been initialized above. We don't 256 * need to initialize from the OS/User Window Context, so skip 257 * following calls: 258 * 259 * write_uwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL); 260 * write_uwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL); 261 */ 262 } 263 264 /* 265 * Initialize window context registers related to Address Translation. 266 * These registers are common to send/receive windows although they 267 * differ for user/kernel windows. As we resolve the TODOs we may 268 * want to add fields to vas_winctx and move the initialization to 269 * init_vas_winctx_regs(). 270 */ 271 static void init_xlate_regs(struct vas_window *window, bool user_win) 272 { 273 u64 lpcr, val; 274 275 /* 276 * MSR_TA, MSR_US are false for both kernel and user. 277 * MSR_DR and MSR_PR are false for kernel. 278 */ 279 val = 0ULL; 280 val = SET_FIELD(VAS_XLATE_MSR_HV, val, 1); 281 val = SET_FIELD(VAS_XLATE_MSR_SF, val, 1); 282 if (user_win) { 283 val = SET_FIELD(VAS_XLATE_MSR_DR, val, 1); 284 val = SET_FIELD(VAS_XLATE_MSR_PR, val, 1); 285 } 286 write_hvwc_reg(window, VREG(XLATE_MSR), val); 287 288 lpcr = mfspr(SPRN_LPCR); 289 val = 0ULL; 290 /* 291 * NOTE: From Section 5.7.8.1 Segment Lookaside Buffer of the 292 * Power ISA, v3.0B, Page size encoding is 0 = 4KB, 5 = 64KB. 293 * 294 * NOTE: From Section 1.3.1, Address Translation Context of the 295 * Nest MMU Workbook, LPCR_SC should be 0 for Power9. 296 */ 297 val = SET_FIELD(VAS_XLATE_LPCR_PAGE_SIZE, val, 5); 298 val = SET_FIELD(VAS_XLATE_LPCR_ISL, val, lpcr & LPCR_ISL); 299 val = SET_FIELD(VAS_XLATE_LPCR_TC, val, lpcr & LPCR_TC); 300 val = SET_FIELD(VAS_XLATE_LPCR_SC, val, 0); 301 write_hvwc_reg(window, VREG(XLATE_LPCR), val); 302 303 /* 304 * Section 1.3.1 (Address translation Context) of NMMU workbook. 305 * 0b00 Hashed Page Table mode 306 * 0b01 Reserved 307 * 0b10 Radix on HPT 308 * 0b11 Radix on Radix 309 */ 310 val = 0ULL; 311 val = SET_FIELD(VAS_XLATE_MODE, val, radix_enabled() ? 3 : 2); 312 write_hvwc_reg(window, VREG(XLATE_CTL), val); 313 314 /* 315 * TODO: Can we mfspr(AMR) even for user windows? 316 */ 317 val = 0ULL; 318 val = SET_FIELD(VAS_AMR, val, mfspr(SPRN_AMR)); 319 write_hvwc_reg(window, VREG(AMR), val); 320 321 val = 0ULL; 322 val = SET_FIELD(VAS_SEIDR, val, 0); 323 write_hvwc_reg(window, VREG(SEIDR), val); 324 } 325 326 /* 327 * Initialize Reserved Send Buffer Count for the send window. It involves 328 * writing to the register, reading it back to confirm that the hardware 329 * has enough buffers to reserve. See section 1.3.1.2.1 of VAS workbook. 330 * 331 * Since we can only make a best-effort attempt to fulfill the request, 332 * we don't return any errors if we cannot. 333 * 334 * TODO: Reserved (aka dedicated) send buffers are not supported yet. 335 */ 336 static void init_rsvd_tx_buf_count(struct vas_window *txwin, 337 struct vas_winctx *winctx) 338 { 339 write_hvwc_reg(txwin, VREG(TX_RSVD_BUF_COUNT), 0ULL); 340 } 341 342 /* 343 * init_winctx_regs() 344 * Initialize window context registers for a receive window. 345 * Except for caching control and marking window open, the registers 346 * are initialized in the order listed in Section 3.1.4 (Window Context 347 * Cache Register Details) of the VAS workbook although they don't need 348 * to be. 349 * 350 * Design note: For NX receive windows, NX allocates the FIFO buffer in OPAL 351 * (so that it can get a large contiguous area) and passes that buffer 352 * to kernel via device tree. We now write that buffer address to the 353 * FIFO BAR. Would it make sense to do this all in OPAL? i.e have OPAL 354 * write the per-chip RX FIFO addresses to the windows during boot-up 355 * as a one-time task? That could work for NX but what about other 356 * receivers? Let the receivers tell us the rx-fifo buffers for now. 357 */ 358 int init_winctx_regs(struct vas_window *window, struct vas_winctx *winctx) 359 { 360 u64 val; 361 int fifo_size; 362 363 reset_window_regs(window); 364 365 val = 0ULL; 366 val = SET_FIELD(VAS_LPID, val, winctx->lpid); 367 write_hvwc_reg(window, VREG(LPID), val); 368 369 val = 0ULL; 370 val = SET_FIELD(VAS_PID_ID, val, winctx->pidr); 371 write_hvwc_reg(window, VREG(PID), val); 372 373 init_xlate_regs(window, winctx->user_win); 374 375 val = 0ULL; 376 val = SET_FIELD(VAS_FAULT_TX_WIN, val, 0); 377 write_hvwc_reg(window, VREG(FAULT_TX_WIN), val); 378 379 /* In PowerNV, interrupts go to HV. */ 380 write_hvwc_reg(window, VREG(OSU_INTR_SRC_RA), 0ULL); 381 382 val = 0ULL; 383 val = SET_FIELD(VAS_HV_INTR_SRC_RA, val, winctx->irq_port); 384 write_hvwc_reg(window, VREG(HV_INTR_SRC_RA), val); 385 386 val = 0ULL; 387 val = SET_FIELD(VAS_PSWID_EA_HANDLE, val, winctx->pswid); 388 write_hvwc_reg(window, VREG(PSWID), val); 389 390 write_hvwc_reg(window, VREG(SPARE1), 0ULL); 391 write_hvwc_reg(window, VREG(SPARE2), 0ULL); 392 write_hvwc_reg(window, VREG(SPARE3), 0ULL); 393 394 /* 395 * NOTE: VAS expects the FIFO address to be copied into the LFIFO_BAR 396 * register as is - do NOT shift the address into VAS_LFIFO_BAR 397 * bit fields! Ok to set the page migration select fields - 398 * VAS ignores the lower 10+ bits in the address anyway, because 399 * the minimum FIFO size is 1K? 400 * 401 * See also: Design note in function header. 402 */ 403 val = __pa(winctx->rx_fifo); 404 val = SET_FIELD(VAS_PAGE_MIGRATION_SELECT, val, 0); 405 write_hvwc_reg(window, VREG(LFIFO_BAR), val); 406 407 val = 0ULL; 408 val = SET_FIELD(VAS_LDATA_STAMP, val, winctx->data_stamp); 409 write_hvwc_reg(window, VREG(LDATA_STAMP_CTL), val); 410 411 val = 0ULL; 412 val = SET_FIELD(VAS_LDMA_TYPE, val, winctx->dma_type); 413 val = SET_FIELD(VAS_LDMA_FIFO_DISABLE, val, winctx->fifo_disable); 414 write_hvwc_reg(window, VREG(LDMA_CACHE_CTL), val); 415 416 write_hvwc_reg(window, VREG(LRFIFO_PUSH), 0ULL); 417 write_hvwc_reg(window, VREG(CURR_MSG_COUNT), 0ULL); 418 write_hvwc_reg(window, VREG(LNOTIFY_AFTER_COUNT), 0ULL); 419 420 val = 0ULL; 421 val = SET_FIELD(VAS_LRX_WCRED, val, winctx->wcreds_max); 422 write_hvwc_reg(window, VREG(LRX_WCRED), val); 423 424 val = 0ULL; 425 val = SET_FIELD(VAS_TX_WCRED, val, winctx->wcreds_max); 426 write_hvwc_reg(window, VREG(TX_WCRED), val); 427 428 write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL); 429 write_hvwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL); 430 431 fifo_size = winctx->rx_fifo_size / 1024; 432 433 val = 0ULL; 434 val = SET_FIELD(VAS_LFIFO_SIZE, val, ilog2(fifo_size)); 435 write_hvwc_reg(window, VREG(LFIFO_SIZE), val); 436 437 /* Update window control and caching control registers last so 438 * we mark the window open only after fully initializing it and 439 * pushing context to cache. 440 */ 441 442 write_hvwc_reg(window, VREG(WIN_STATUS), 0ULL); 443 444 init_rsvd_tx_buf_count(window, winctx); 445 446 /* for a send window, point to the matching receive window */ 447 val = 0ULL; 448 val = SET_FIELD(VAS_LRX_WIN_ID, val, winctx->rx_win_id); 449 write_hvwc_reg(window, VREG(LRFIFO_WIN_PTR), val); 450 451 write_hvwc_reg(window, VREG(SPARE4), 0ULL); 452 453 val = 0ULL; 454 val = SET_FIELD(VAS_NOTIFY_DISABLE, val, winctx->notify_disable); 455 val = SET_FIELD(VAS_INTR_DISABLE, val, winctx->intr_disable); 456 val = SET_FIELD(VAS_NOTIFY_EARLY, val, winctx->notify_early); 457 val = SET_FIELD(VAS_NOTIFY_OSU_INTR, val, winctx->notify_os_intr_reg); 458 write_hvwc_reg(window, VREG(LNOTIFY_CTL), val); 459 460 val = 0ULL; 461 val = SET_FIELD(VAS_LNOTIFY_PID, val, winctx->lnotify_pid); 462 write_hvwc_reg(window, VREG(LNOTIFY_PID), val); 463 464 val = 0ULL; 465 val = SET_FIELD(VAS_LNOTIFY_LPID, val, winctx->lnotify_lpid); 466 write_hvwc_reg(window, VREG(LNOTIFY_LPID), val); 467 468 val = 0ULL; 469 val = SET_FIELD(VAS_LNOTIFY_TID, val, winctx->lnotify_tid); 470 write_hvwc_reg(window, VREG(LNOTIFY_TID), val); 471 472 val = 0ULL; 473 val = SET_FIELD(VAS_LNOTIFY_MIN_SCOPE, val, winctx->min_scope); 474 val = SET_FIELD(VAS_LNOTIFY_MAX_SCOPE, val, winctx->max_scope); 475 write_hvwc_reg(window, VREG(LNOTIFY_SCOPE), val); 476 477 /* Skip read-only registers NX_UTIL and NX_UTIL_SE */ 478 479 write_hvwc_reg(window, VREG(SPARE5), 0ULL); 480 write_hvwc_reg(window, VREG(NX_UTIL_ADDER), 0ULL); 481 write_hvwc_reg(window, VREG(SPARE6), 0ULL); 482 483 /* Finally, push window context to memory and... */ 484 val = 0ULL; 485 val = SET_FIELD(VAS_PUSH_TO_MEM, val, 1); 486 write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), val); 487 488 /* ... mark the window open for business */ 489 val = 0ULL; 490 val = SET_FIELD(VAS_WINCTL_REJ_NO_CREDIT, val, winctx->rej_no_credit); 491 val = SET_FIELD(VAS_WINCTL_PIN, val, winctx->pin_win); 492 val = SET_FIELD(VAS_WINCTL_TX_WCRED_MODE, val, winctx->tx_wcred_mode); 493 val = SET_FIELD(VAS_WINCTL_RX_WCRED_MODE, val, winctx->rx_wcred_mode); 494 val = SET_FIELD(VAS_WINCTL_TX_WORD_MODE, val, winctx->tx_word_mode); 495 val = SET_FIELD(VAS_WINCTL_RX_WORD_MODE, val, winctx->rx_word_mode); 496 val = SET_FIELD(VAS_WINCTL_FAULT_WIN, val, winctx->fault_win); 497 val = SET_FIELD(VAS_WINCTL_NX_WIN, val, winctx->nx_win); 498 val = SET_FIELD(VAS_WINCTL_OPEN, val, 1); 499 write_hvwc_reg(window, VREG(WINCTL), val); 500 501 return 0; 502 } 503 504 static void vas_release_window_id(struct ida *ida, int winid) 505 { 506 ida_free(ida, winid); 507 } 508 509 static int vas_assign_window_id(struct ida *ida) 510 { 511 int winid = ida_alloc_max(ida, VAS_WINDOWS_PER_CHIP - 1, GFP_KERNEL); 512 513 if (winid == -ENOSPC) { 514 pr_err("Too many (%d) open windows\n", VAS_WINDOWS_PER_CHIP); 515 return -EAGAIN; 516 } 517 518 return winid; 519 } 520 521 static void vas_window_free(struct vas_window *window) 522 { 523 int winid = window->winid; 524 struct vas_instance *vinst = window->vinst; 525 526 unmap_winctx_mmio_bars(window); 527 528 vas_window_free_dbgdir(window); 529 530 kfree(window); 531 532 vas_release_window_id(&vinst->ida, winid); 533 } 534 535 static struct vas_window *vas_window_alloc(struct vas_instance *vinst) 536 { 537 int winid; 538 struct vas_window *window; 539 540 winid = vas_assign_window_id(&vinst->ida); 541 if (winid < 0) 542 return ERR_PTR(winid); 543 544 window = kzalloc(sizeof(*window), GFP_KERNEL); 545 if (!window) 546 goto out_free; 547 548 window->vinst = vinst; 549 window->winid = winid; 550 551 if (map_winctx_mmio_bars(window)) 552 goto out_free; 553 554 vas_window_init_dbgdir(window); 555 556 return window; 557 558 out_free: 559 kfree(window); 560 vas_release_window_id(&vinst->ida, winid); 561 return ERR_PTR(-ENOMEM); 562 } 563 564 static void put_rx_win(struct vas_window *rxwin) 565 { 566 /* Better not be a send window! */ 567 WARN_ON_ONCE(rxwin->tx_win); 568 569 atomic_dec(&rxwin->num_txwins); 570 } 571 572 /* 573 * Find the user space receive window given the @pswid. 574 * - We must have a valid vasid and it must belong to this instance. 575 * (so both send and receive windows are on the same VAS instance) 576 * - The window must refer to an OPEN, FTW, RECEIVE window. 577 * 578 * NOTE: We access ->windows[] table and assume that vinst->mutex is held. 579 */ 580 static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid) 581 { 582 int vasid, winid; 583 struct vas_window *rxwin; 584 585 decode_pswid(pswid, &vasid, &winid); 586 587 if (vinst->vas_id != vasid) 588 return ERR_PTR(-EINVAL); 589 590 rxwin = vinst->windows[winid]; 591 592 if (!rxwin || rxwin->tx_win || rxwin->cop != VAS_COP_TYPE_FTW) 593 return ERR_PTR(-EINVAL); 594 595 return rxwin; 596 } 597 598 /* 599 * Get the VAS receive window associated with NX engine identified 600 * by @cop and if applicable, @pswid. 601 * 602 * See also function header of set_vinst_win(). 603 */ 604 static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst, 605 enum vas_cop_type cop, u32 pswid) 606 { 607 struct vas_window *rxwin; 608 609 mutex_lock(&vinst->mutex); 610 611 if (cop == VAS_COP_TYPE_FTW) 612 rxwin = get_user_rxwin(vinst, pswid); 613 else 614 rxwin = vinst->rxwin[cop] ?: ERR_PTR(-EINVAL); 615 616 if (!IS_ERR(rxwin)) 617 atomic_inc(&rxwin->num_txwins); 618 619 mutex_unlock(&vinst->mutex); 620 621 return rxwin; 622 } 623 624 /* 625 * We have two tables of windows in a VAS instance. The first one, 626 * ->windows[], contains all the windows in the instance and allows 627 * looking up a window by its id. It is used to look up send windows 628 * during fault handling and receive windows when pairing user space 629 * send/receive windows. 630 * 631 * The second table, ->rxwin[], contains receive windows that are 632 * associated with NX engines. This table has VAS_COP_TYPE_MAX 633 * entries and is used to look up a receive window by its 634 * coprocessor type. 635 * 636 * Here, we save @window in the ->windows[] table. If it is a receive 637 * window, we also save the window in the ->rxwin[] table. 638 */ 639 static void set_vinst_win(struct vas_instance *vinst, 640 struct vas_window *window) 641 { 642 int id = window->winid; 643 644 mutex_lock(&vinst->mutex); 645 646 /* 647 * There should only be one receive window for a coprocessor type 648 * unless its a user (FTW) window. 649 */ 650 if (!window->user_win && !window->tx_win) { 651 WARN_ON_ONCE(vinst->rxwin[window->cop]); 652 vinst->rxwin[window->cop] = window; 653 } 654 655 WARN_ON_ONCE(vinst->windows[id] != NULL); 656 vinst->windows[id] = window; 657 658 mutex_unlock(&vinst->mutex); 659 } 660 661 /* 662 * Clear this window from the table(s) of windows for this VAS instance. 663 * See also function header of set_vinst_win(). 664 */ 665 static void clear_vinst_win(struct vas_window *window) 666 { 667 int id = window->winid; 668 struct vas_instance *vinst = window->vinst; 669 670 mutex_lock(&vinst->mutex); 671 672 if (!window->user_win && !window->tx_win) { 673 WARN_ON_ONCE(!vinst->rxwin[window->cop]); 674 vinst->rxwin[window->cop] = NULL; 675 } 676 677 WARN_ON_ONCE(vinst->windows[id] != window); 678 vinst->windows[id] = NULL; 679 680 mutex_unlock(&vinst->mutex); 681 } 682 683 static void init_winctx_for_rxwin(struct vas_window *rxwin, 684 struct vas_rx_win_attr *rxattr, 685 struct vas_winctx *winctx) 686 { 687 /* 688 * We first zero (memset()) all fields and only set non-zero fields. 689 * Following fields are 0/false but maybe deserve a comment: 690 * 691 * ->notify_os_intr_reg In powerNV, send intrs to HV 692 * ->notify_disable False for NX windows 693 * ->intr_disable False for Fault Windows 694 * ->xtra_write False for NX windows 695 * ->notify_early NA for NX windows 696 * ->rsvd_txbuf_count NA for Rx windows 697 * ->lpid, ->pid, ->tid NA for Rx windows 698 */ 699 700 memset(winctx, 0, sizeof(struct vas_winctx)); 701 702 winctx->rx_fifo = rxattr->rx_fifo; 703 winctx->rx_fifo_size = rxattr->rx_fifo_size; 704 winctx->wcreds_max = rxwin->wcreds_max; 705 winctx->pin_win = rxattr->pin_win; 706 707 winctx->nx_win = rxattr->nx_win; 708 winctx->fault_win = rxattr->fault_win; 709 winctx->user_win = rxattr->user_win; 710 winctx->rej_no_credit = rxattr->rej_no_credit; 711 winctx->rx_word_mode = rxattr->rx_win_ord_mode; 712 winctx->tx_word_mode = rxattr->tx_win_ord_mode; 713 winctx->rx_wcred_mode = rxattr->rx_wcred_mode; 714 winctx->tx_wcred_mode = rxattr->tx_wcred_mode; 715 winctx->notify_early = rxattr->notify_early; 716 717 if (winctx->nx_win) { 718 winctx->data_stamp = true; 719 winctx->intr_disable = true; 720 winctx->pin_win = true; 721 722 WARN_ON_ONCE(winctx->fault_win); 723 WARN_ON_ONCE(!winctx->rx_word_mode); 724 WARN_ON_ONCE(!winctx->tx_word_mode); 725 WARN_ON_ONCE(winctx->notify_after_count); 726 } else if (winctx->fault_win) { 727 winctx->notify_disable = true; 728 } else if (winctx->user_win) { 729 /* 730 * Section 1.8.1 Low Latency Core-Core Wake up of 731 * the VAS workbook: 732 * 733 * - disable credit checks ([tr]x_wcred_mode = false) 734 * - disable FIFO writes 735 * - enable ASB_Notify, disable interrupt 736 */ 737 winctx->fifo_disable = true; 738 winctx->intr_disable = true; 739 winctx->rx_fifo = NULL; 740 } 741 742 winctx->lnotify_lpid = rxattr->lnotify_lpid; 743 winctx->lnotify_pid = rxattr->lnotify_pid; 744 winctx->lnotify_tid = rxattr->lnotify_tid; 745 winctx->pswid = rxattr->pswid; 746 winctx->dma_type = VAS_DMA_TYPE_INJECT; 747 winctx->tc_mode = rxattr->tc_mode; 748 749 winctx->min_scope = VAS_SCOPE_LOCAL; 750 winctx->max_scope = VAS_SCOPE_VECTORED_GROUP; 751 } 752 753 static bool rx_win_args_valid(enum vas_cop_type cop, 754 struct vas_rx_win_attr *attr) 755 { 756 pr_debug("Rxattr: fault %d, notify %d, intr %d, early %d, fifo %d\n", 757 attr->fault_win, attr->notify_disable, 758 attr->intr_disable, attr->notify_early, 759 attr->rx_fifo_size); 760 761 if (cop >= VAS_COP_TYPE_MAX) 762 return false; 763 764 if (cop != VAS_COP_TYPE_FTW && 765 attr->rx_fifo_size < VAS_RX_FIFO_SIZE_MIN) 766 return false; 767 768 if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX) 769 return false; 770 771 if (attr->wcreds_max > VAS_RX_WCREDS_MAX) 772 return false; 773 774 if (attr->nx_win) { 775 /* cannot be fault or user window if it is nx */ 776 if (attr->fault_win || attr->user_win) 777 return false; 778 /* 779 * Section 3.1.4.32: NX Windows must not disable notification, 780 * and must not enable interrupts or early notification. 781 */ 782 if (attr->notify_disable || !attr->intr_disable || 783 attr->notify_early) 784 return false; 785 } else if (attr->fault_win) { 786 /* cannot be both fault and user window */ 787 if (attr->user_win) 788 return false; 789 790 /* 791 * Section 3.1.4.32: Fault windows must disable notification 792 * but not interrupts. 793 */ 794 if (!attr->notify_disable || attr->intr_disable) 795 return false; 796 797 } else if (attr->user_win) { 798 /* 799 * User receive windows are only for fast-thread-wakeup 800 * (FTW). They don't need a FIFO and must disable interrupts 801 */ 802 if (attr->rx_fifo || attr->rx_fifo_size || !attr->intr_disable) 803 return false; 804 } else { 805 /* Rx window must be one of NX or Fault or User window. */ 806 return false; 807 } 808 809 return true; 810 } 811 812 void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop) 813 { 814 memset(rxattr, 0, sizeof(*rxattr)); 815 816 if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) { 817 rxattr->pin_win = true; 818 rxattr->nx_win = true; 819 rxattr->fault_win = false; 820 rxattr->intr_disable = true; 821 rxattr->rx_wcred_mode = true; 822 rxattr->tx_wcred_mode = true; 823 rxattr->rx_win_ord_mode = true; 824 rxattr->tx_win_ord_mode = true; 825 } else if (cop == VAS_COP_TYPE_FAULT) { 826 rxattr->pin_win = true; 827 rxattr->fault_win = true; 828 rxattr->notify_disable = true; 829 rxattr->rx_wcred_mode = true; 830 rxattr->tx_wcred_mode = true; 831 rxattr->rx_win_ord_mode = true; 832 rxattr->tx_win_ord_mode = true; 833 } else if (cop == VAS_COP_TYPE_FTW) { 834 rxattr->user_win = true; 835 rxattr->intr_disable = true; 836 837 /* 838 * As noted in the VAS Workbook we disable credit checks. 839 * If we enable credit checks in the future, we must also 840 * implement a mechanism to return the user credits or new 841 * paste operations will fail. 842 */ 843 } 844 } 845 EXPORT_SYMBOL_GPL(vas_init_rx_win_attr); 846 847 struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, 848 struct vas_rx_win_attr *rxattr) 849 { 850 struct vas_window *rxwin; 851 struct vas_winctx winctx; 852 struct vas_instance *vinst; 853 854 trace_vas_rx_win_open(current, vasid, cop, rxattr); 855 856 if (!rx_win_args_valid(cop, rxattr)) 857 return ERR_PTR(-EINVAL); 858 859 vinst = find_vas_instance(vasid); 860 if (!vinst) { 861 pr_devel("vasid %d not found!\n", vasid); 862 return ERR_PTR(-EINVAL); 863 } 864 pr_devel("Found instance %d\n", vasid); 865 866 rxwin = vas_window_alloc(vinst); 867 if (IS_ERR(rxwin)) { 868 pr_devel("Unable to allocate memory for Rx window\n"); 869 return rxwin; 870 } 871 872 rxwin->tx_win = false; 873 rxwin->nx_win = rxattr->nx_win; 874 rxwin->user_win = rxattr->user_win; 875 rxwin->cop = cop; 876 rxwin->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT; 877 if (rxattr->user_win) 878 rxwin->pid = task_pid_vnr(current); 879 880 init_winctx_for_rxwin(rxwin, rxattr, &winctx); 881 init_winctx_regs(rxwin, &winctx); 882 883 set_vinst_win(vinst, rxwin); 884 885 return rxwin; 886 } 887 EXPORT_SYMBOL_GPL(vas_rx_win_open); 888 889 void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, enum vas_cop_type cop) 890 { 891 memset(txattr, 0, sizeof(*txattr)); 892 893 if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) { 894 txattr->rej_no_credit = false; 895 txattr->rx_wcred_mode = true; 896 txattr->tx_wcred_mode = true; 897 txattr->rx_win_ord_mode = true; 898 txattr->tx_win_ord_mode = true; 899 } else if (cop == VAS_COP_TYPE_FTW) { 900 txattr->user_win = true; 901 } 902 } 903 EXPORT_SYMBOL_GPL(vas_init_tx_win_attr); 904 905 static void init_winctx_for_txwin(struct vas_window *txwin, 906 struct vas_tx_win_attr *txattr, 907 struct vas_winctx *winctx) 908 { 909 /* 910 * We first zero all fields and only set non-zero ones. Following 911 * are some fields set to 0/false for the stated reason: 912 * 913 * ->notify_os_intr_reg In powernv, send intrs to HV 914 * ->rsvd_txbuf_count Not supported yet. 915 * ->notify_disable False for NX windows 916 * ->xtra_write False for NX windows 917 * ->notify_early NA for NX windows 918 * ->lnotify_lpid NA for Tx windows 919 * ->lnotify_pid NA for Tx windows 920 * ->lnotify_tid NA for Tx windows 921 * ->tx_win_cred_mode Ignore for now for NX windows 922 * ->rx_win_cred_mode Ignore for now for NX windows 923 */ 924 memset(winctx, 0, sizeof(struct vas_winctx)); 925 926 winctx->wcreds_max = txwin->wcreds_max; 927 928 winctx->user_win = txattr->user_win; 929 winctx->nx_win = txwin->rxwin->nx_win; 930 winctx->pin_win = txattr->pin_win; 931 winctx->rej_no_credit = txattr->rej_no_credit; 932 winctx->rsvd_txbuf_enable = txattr->rsvd_txbuf_enable; 933 934 winctx->rx_wcred_mode = txattr->rx_wcred_mode; 935 winctx->tx_wcred_mode = txattr->tx_wcred_mode; 936 winctx->rx_word_mode = txattr->rx_win_ord_mode; 937 winctx->tx_word_mode = txattr->tx_win_ord_mode; 938 winctx->rsvd_txbuf_count = txattr->rsvd_txbuf_count; 939 940 winctx->intr_disable = true; 941 if (winctx->nx_win) 942 winctx->data_stamp = true; 943 944 winctx->lpid = txattr->lpid; 945 winctx->pidr = txattr->pidr; 946 winctx->rx_win_id = txwin->rxwin->winid; 947 948 winctx->dma_type = VAS_DMA_TYPE_INJECT; 949 winctx->tc_mode = txattr->tc_mode; 950 winctx->min_scope = VAS_SCOPE_LOCAL; 951 winctx->max_scope = VAS_SCOPE_VECTORED_GROUP; 952 953 winctx->pswid = 0; 954 } 955 956 static bool tx_win_args_valid(enum vas_cop_type cop, 957 struct vas_tx_win_attr *attr) 958 { 959 if (attr->tc_mode != VAS_THRESH_DISABLED) 960 return false; 961 962 if (cop > VAS_COP_TYPE_MAX) 963 return false; 964 965 if (attr->wcreds_max > VAS_TX_WCREDS_MAX) 966 return false; 967 968 if (attr->user_win && 969 (cop != VAS_COP_TYPE_FTW || attr->rsvd_txbuf_count)) 970 return false; 971 972 return true; 973 } 974 975 struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, 976 struct vas_tx_win_attr *attr) 977 { 978 int rc; 979 struct vas_window *txwin; 980 struct vas_window *rxwin; 981 struct vas_winctx winctx; 982 struct vas_instance *vinst; 983 984 trace_vas_tx_win_open(current, vasid, cop, attr); 985 986 if (!tx_win_args_valid(cop, attr)) 987 return ERR_PTR(-EINVAL); 988 989 /* 990 * If caller did not specify a vasid but specified the PSWID of a 991 * receive window (applicable only to FTW windows), use the vasid 992 * from that receive window. 993 */ 994 if (vasid == -1 && attr->pswid) 995 decode_pswid(attr->pswid, &vasid, NULL); 996 997 vinst = find_vas_instance(vasid); 998 if (!vinst) { 999 pr_devel("vasid %d not found!\n", vasid); 1000 return ERR_PTR(-EINVAL); 1001 } 1002 1003 rxwin = get_vinst_rxwin(vinst, cop, attr->pswid); 1004 if (IS_ERR(rxwin)) { 1005 pr_devel("No RxWin for vasid %d, cop %d\n", vasid, cop); 1006 return rxwin; 1007 } 1008 1009 txwin = vas_window_alloc(vinst); 1010 if (IS_ERR(txwin)) { 1011 rc = PTR_ERR(txwin); 1012 goto put_rxwin; 1013 } 1014 1015 txwin->cop = cop; 1016 txwin->tx_win = 1; 1017 txwin->rxwin = rxwin; 1018 txwin->nx_win = txwin->rxwin->nx_win; 1019 txwin->pid = attr->pid; 1020 txwin->user_win = attr->user_win; 1021 txwin->wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT; 1022 1023 init_winctx_for_txwin(txwin, attr, &winctx); 1024 1025 init_winctx_regs(txwin, &winctx); 1026 1027 /* 1028 * If its a kernel send window, map the window address into the 1029 * kernel's address space. For user windows, user must issue an 1030 * mmap() to map the window into their address space. 1031 * 1032 * NOTE: If kernel ever resubmits a user CRB after handling a page 1033 * fault, we will need to map this into kernel as well. 1034 */ 1035 if (!txwin->user_win) { 1036 txwin->paste_kaddr = map_paste_region(txwin); 1037 if (IS_ERR(txwin->paste_kaddr)) { 1038 rc = PTR_ERR(txwin->paste_kaddr); 1039 goto free_window; 1040 } 1041 } else { 1042 /* 1043 * A user mapping must ensure that context switch issues 1044 * CP_ABORT for this thread. 1045 */ 1046 rc = set_thread_uses_vas(); 1047 if (rc) 1048 goto free_window; 1049 } 1050 1051 set_vinst_win(vinst, txwin); 1052 1053 return txwin; 1054 1055 free_window: 1056 vas_window_free(txwin); 1057 1058 put_rxwin: 1059 put_rx_win(rxwin); 1060 return ERR_PTR(rc); 1061 1062 } 1063 EXPORT_SYMBOL_GPL(vas_tx_win_open); 1064 1065 int vas_copy_crb(void *crb, int offset) 1066 { 1067 return vas_copy(crb, offset); 1068 } 1069 EXPORT_SYMBOL_GPL(vas_copy_crb); 1070 1071 #define RMA_LSMP_REPORT_ENABLE PPC_BIT(53) 1072 int vas_paste_crb(struct vas_window *txwin, int offset, bool re) 1073 { 1074 int rc; 1075 void *addr; 1076 uint64_t val; 1077 1078 trace_vas_paste_crb(current, txwin); 1079 1080 /* 1081 * Only NX windows are supported for now and hardware assumes 1082 * report-enable flag is set for NX windows. Ensure software 1083 * complies too. 1084 */ 1085 WARN_ON_ONCE(txwin->nx_win && !re); 1086 1087 addr = txwin->paste_kaddr; 1088 if (re) { 1089 /* 1090 * Set the REPORT_ENABLE bit (equivalent to writing 1091 * to 1K offset of the paste address) 1092 */ 1093 val = SET_FIELD(RMA_LSMP_REPORT_ENABLE, 0ULL, 1); 1094 addr += val; 1095 } 1096 1097 /* 1098 * Map the raw CR value from vas_paste() to an error code (there 1099 * is just pass or fail for now though). 1100 */ 1101 rc = vas_paste(addr, offset); 1102 if (rc == 2) 1103 rc = 0; 1104 else 1105 rc = -EINVAL; 1106 1107 pr_debug("Txwin #%d: Msg count %llu\n", txwin->winid, 1108 read_hvwc_reg(txwin, VREG(LRFIFO_PUSH))); 1109 1110 return rc; 1111 } 1112 EXPORT_SYMBOL_GPL(vas_paste_crb); 1113 1114 /* 1115 * If credit checking is enabled for this window, poll for the return 1116 * of window credits (i.e for NX engines to process any outstanding CRBs). 1117 * Since NX-842 waits for the CRBs to be processed before closing the 1118 * window, we should not have to wait for too long. 1119 * 1120 * TODO: We retry in 10ms intervals now. We could/should probably peek at 1121 * the VAS_LRFIFO_PUSH_OFFSET register to get an estimate of pending 1122 * CRBs on the FIFO and compute the delay dynamically on each retry. 1123 * But that is not really needed until we support NX-GZIP access from 1124 * user space. (NX-842 driver waits for CSB and Fast thread-wakeup 1125 * doesn't use credit checking). 1126 */ 1127 static void poll_window_credits(struct vas_window *window) 1128 { 1129 u64 val; 1130 int creds, mode; 1131 1132 val = read_hvwc_reg(window, VREG(WINCTL)); 1133 if (window->tx_win) 1134 mode = GET_FIELD(VAS_WINCTL_TX_WCRED_MODE, val); 1135 else 1136 mode = GET_FIELD(VAS_WINCTL_RX_WCRED_MODE, val); 1137 1138 if (!mode) 1139 return; 1140 retry: 1141 if (window->tx_win) { 1142 val = read_hvwc_reg(window, VREG(TX_WCRED)); 1143 creds = GET_FIELD(VAS_TX_WCRED, val); 1144 } else { 1145 val = read_hvwc_reg(window, VREG(LRX_WCRED)); 1146 creds = GET_FIELD(VAS_LRX_WCRED, val); 1147 } 1148 1149 if (creds < window->wcreds_max) { 1150 val = 0; 1151 set_current_state(TASK_UNINTERRUPTIBLE); 1152 schedule_timeout(msecs_to_jiffies(10)); 1153 goto retry; 1154 } 1155 } 1156 1157 /* 1158 * Wait for the window to go to "not-busy" state. It should only take a 1159 * short time to queue a CRB, so window should not be busy for too long. 1160 * Trying 5ms intervals. 1161 */ 1162 static void poll_window_busy_state(struct vas_window *window) 1163 { 1164 int busy; 1165 u64 val; 1166 1167 retry: 1168 val = read_hvwc_reg(window, VREG(WIN_STATUS)); 1169 busy = GET_FIELD(VAS_WIN_BUSY, val); 1170 if (busy) { 1171 val = 0; 1172 set_current_state(TASK_UNINTERRUPTIBLE); 1173 schedule_timeout(msecs_to_jiffies(5)); 1174 goto retry; 1175 } 1176 } 1177 1178 /* 1179 * Have the hardware cast a window out of cache and wait for it to 1180 * be completed. 1181 * 1182 * NOTE: It can take a relatively long time to cast the window context 1183 * out of the cache. It is not strictly necessary to cast out if: 1184 * 1185 * - we clear the "Pin Window" bit (so hardware is free to evict) 1186 * 1187 * - we re-initialize the window context when it is reassigned. 1188 * 1189 * We do the former in vas_win_close() and latter in vas_win_open(). 1190 * So, ignoring the cast-out for now. We can add it as needed. If 1191 * casting out becomes necessary we should consider offloading the 1192 * job to a worker thread, so the window close can proceed quickly. 1193 */ 1194 static void poll_window_castout(struct vas_window *window) 1195 { 1196 /* stub for now */ 1197 } 1198 1199 /* 1200 * Unpin and close a window so no new requests are accepted and the 1201 * hardware can evict this window from cache if necessary. 1202 */ 1203 static void unpin_close_window(struct vas_window *window) 1204 { 1205 u64 val; 1206 1207 val = read_hvwc_reg(window, VREG(WINCTL)); 1208 val = SET_FIELD(VAS_WINCTL_PIN, val, 0); 1209 val = SET_FIELD(VAS_WINCTL_OPEN, val, 0); 1210 write_hvwc_reg(window, VREG(WINCTL), val); 1211 } 1212 1213 /* 1214 * Close a window. 1215 * 1216 * See Section 1.12.1 of VAS workbook v1.05 for details on closing window: 1217 * - Disable new paste operations (unmap paste address) 1218 * - Poll for the "Window Busy" bit to be cleared 1219 * - Clear the Open/Enable bit for the Window. 1220 * - Poll for return of window Credits (implies FIFO empty for Rx win?) 1221 * - Unpin and cast window context out of cache 1222 * 1223 * Besides the hardware, kernel has some bookkeeping of course. 1224 */ 1225 int vas_win_close(struct vas_window *window) 1226 { 1227 if (!window) 1228 return 0; 1229 1230 if (!window->tx_win && atomic_read(&window->num_txwins) != 0) { 1231 pr_devel("Attempting to close an active Rx window!\n"); 1232 WARN_ON_ONCE(1); 1233 return -EBUSY; 1234 } 1235 1236 unmap_paste_region(window); 1237 1238 clear_vinst_win(window); 1239 1240 poll_window_busy_state(window); 1241 1242 unpin_close_window(window); 1243 1244 poll_window_credits(window); 1245 1246 poll_window_castout(window); 1247 1248 /* if send window, drop reference to matching receive window */ 1249 if (window->tx_win) 1250 put_rx_win(window->rxwin); 1251 1252 vas_window_free(window); 1253 1254 return 0; 1255 } 1256 EXPORT_SYMBOL_GPL(vas_win_close); 1257