1 /* 2 * Copyright 2017 Red Hat Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 #define NVKM_VMM_LEVELS_MAX 5 23 #include "vmm.h" 24 25 #include <subdev/fb.h> 26 27 static void 28 nvkm_vmm_pt_del(struct nvkm_vmm_pt **ppgt) 29 { 30 struct nvkm_vmm_pt *pgt = *ppgt; 31 if (pgt) { 32 kvfree(pgt->pde); 33 kfree(pgt); 34 *ppgt = NULL; 35 } 36 } 37 38 39 static struct nvkm_vmm_pt * 40 nvkm_vmm_pt_new(const struct nvkm_vmm_desc *desc, bool sparse, 41 const struct nvkm_vmm_page *page) 42 { 43 const u32 pten = 1 << desc->bits; 44 struct nvkm_vmm_pt *pgt; 45 u32 lpte = 0; 46 47 if (desc->type > PGT) { 48 if (desc->type == SPT) { 49 const struct nvkm_vmm_desc *pair = page[-1].desc; 50 lpte = pten >> (desc->bits - pair->bits); 51 } else { 52 lpte = pten; 53 } 54 } 55 56 if (!(pgt = kzalloc(sizeof(*pgt) + lpte, GFP_KERNEL))) 57 return NULL; 58 pgt->page = page ? page->shift : 0; 59 pgt->sparse = sparse; 60 61 if (desc->type == PGD) { 62 pgt->pde = kvcalloc(pten, sizeof(*pgt->pde), GFP_KERNEL); 63 if (!pgt->pde) { 64 kfree(pgt); 65 return NULL; 66 } 67 } 68 69 return pgt; 70 } 71 72 struct nvkm_vmm_iter { 73 const struct nvkm_vmm_page *page; 74 const struct nvkm_vmm_desc *desc; 75 struct nvkm_vmm *vmm; 76 u64 cnt; 77 u16 max, lvl; 78 u32 pte[NVKM_VMM_LEVELS_MAX]; 79 struct nvkm_vmm_pt *pt[NVKM_VMM_LEVELS_MAX]; 80 int flush; 81 }; 82 83 #ifdef CONFIG_NOUVEAU_DEBUG_MMU 84 static const char * 85 nvkm_vmm_desc_type(const struct nvkm_vmm_desc *desc) 86 { 87 switch (desc->type) { 88 case PGD: return "PGD"; 89 case PGT: return "PGT"; 90 case SPT: return "SPT"; 91 case LPT: return "LPT"; 92 default: 93 return "UNKNOWN"; 94 } 95 } 96 97 static void 98 nvkm_vmm_trace(struct nvkm_vmm_iter *it, char *buf) 99 { 100 int lvl; 101 for (lvl = it->max; lvl >= 0; lvl--) { 102 if (lvl >= it->lvl) 103 buf += sprintf(buf, "%05x:", it->pte[lvl]); 104 else 105 buf += sprintf(buf, "xxxxx:"); 106 } 107 } 108 109 #define TRA(i,f,a...) do { \ 110 char _buf[NVKM_VMM_LEVELS_MAX * 7]; \ 111 struct nvkm_vmm_iter *_it = (i); \ 112 nvkm_vmm_trace(_it, _buf); \ 113 VMM_TRACE(_it->vmm, "%s "f, _buf, ##a); \ 114 } while(0) 115 #else 116 #define TRA(i,f,a...) 117 #endif 118 119 static inline void 120 nvkm_vmm_flush_mark(struct nvkm_vmm_iter *it) 121 { 122 it->flush = min(it->flush, it->max - it->lvl); 123 } 124 125 static inline void 126 nvkm_vmm_flush(struct nvkm_vmm_iter *it) 127 { 128 if (it->flush != NVKM_VMM_LEVELS_MAX) { 129 if (it->vmm->func->flush) { 130 TRA(it, "flush: %d", it->flush); 131 it->vmm->func->flush(it->vmm, it->flush); 132 } 133 it->flush = NVKM_VMM_LEVELS_MAX; 134 } 135 } 136 137 static void 138 nvkm_vmm_unref_pdes(struct nvkm_vmm_iter *it) 139 { 140 const struct nvkm_vmm_desc *desc = it->desc; 141 const int type = desc[it->lvl].type == SPT; 142 struct nvkm_vmm_pt *pgd = it->pt[it->lvl + 1]; 143 struct nvkm_vmm_pt *pgt = it->pt[it->lvl]; 144 struct nvkm_mmu_pt *pt = pgt->pt[type]; 145 struct nvkm_vmm *vmm = it->vmm; 146 u32 pdei = it->pte[it->lvl + 1]; 147 148 /* Recurse up the tree, unreferencing/destroying unneeded PDs. */ 149 it->lvl++; 150 if (--pgd->refs[0]) { 151 const struct nvkm_vmm_desc_func *func = desc[it->lvl].func; 152 /* PD has other valid PDEs, so we need a proper update. */ 153 TRA(it, "PDE unmap %s", nvkm_vmm_desc_type(&desc[it->lvl - 1])); 154 pgt->pt[type] = NULL; 155 if (!pgt->refs[!type]) { 156 /* PDE no longer required. */ 157 if (pgd->pt[0]) { 158 if (pgt->sparse) { 159 func->sparse(vmm, pgd->pt[0], pdei, 1); 160 pgd->pde[pdei] = NVKM_VMM_PDE_SPARSE; 161 } else { 162 func->unmap(vmm, pgd->pt[0], pdei, 1); 163 pgd->pde[pdei] = NULL; 164 } 165 } else { 166 /* Special handling for Tesla-class GPUs, 167 * where there's no central PD, but each 168 * instance has its own embedded PD. 169 */ 170 func->pde(vmm, pgd, pdei); 171 pgd->pde[pdei] = NULL; 172 } 173 } else { 174 /* PDE was pointing at dual-PTs and we're removing 175 * one of them, leaving the other in place. 176 */ 177 func->pde(vmm, pgd, pdei); 178 } 179 180 /* GPU may have cached the PTs, flush before freeing. */ 181 nvkm_vmm_flush_mark(it); 182 nvkm_vmm_flush(it); 183 } else { 184 /* PD has no valid PDEs left, so we can just destroy it. */ 185 nvkm_vmm_unref_pdes(it); 186 } 187 188 /* Destroy PD/PT. */ 189 TRA(it, "PDE free %s", nvkm_vmm_desc_type(&desc[it->lvl - 1])); 190 nvkm_mmu_ptc_put(vmm->mmu, vmm->bootstrapped, &pt); 191 if (!pgt->refs[!type]) 192 nvkm_vmm_pt_del(&pgt); 193 it->lvl--; 194 } 195 196 static void 197 nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, 198 const struct nvkm_vmm_desc *desc, u32 ptei, u32 ptes) 199 { 200 const struct nvkm_vmm_desc *pair = it->page[-1].desc; 201 const u32 sptb = desc->bits - pair->bits; 202 const u32 sptn = 1 << sptb; 203 struct nvkm_vmm *vmm = it->vmm; 204 u32 spti = ptei & (sptn - 1), lpti, pteb; 205 206 /* Determine how many SPTEs are being touched under each LPTE, 207 * and drop reference counts. 208 */ 209 for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) { 210 const u32 pten = min(sptn - spti, ptes); 211 pgt->pte[lpti] -= pten; 212 ptes -= pten; 213 } 214 215 /* We're done here if there's no corresponding LPT. */ 216 if (!pgt->refs[0]) 217 return; 218 219 for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) { 220 /* Skip over any LPTEs that still have valid SPTEs. */ 221 if (pgt->pte[pteb] & NVKM_VMM_PTE_SPTES) { 222 for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { 223 if (!(pgt->pte[ptei] & NVKM_VMM_PTE_SPTES)) 224 break; 225 } 226 continue; 227 } 228 229 /* As there's no more non-UNMAPPED SPTEs left in the range 230 * covered by a number of LPTEs, the LPTEs once again take 231 * control over their address range. 232 * 233 * Determine how many LPTEs need to transition state. 234 */ 235 pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID; 236 for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { 237 if (pgt->pte[ptei] & NVKM_VMM_PTE_SPTES) 238 break; 239 pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID; 240 } 241 242 if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) { 243 TRA(it, "LPTE %05x: U -> S %d PTEs", pteb, ptes); 244 pair->func->sparse(vmm, pgt->pt[0], pteb, ptes); 245 } else 246 if (pair->func->invalid) { 247 /* If the MMU supports it, restore the LPTE to the 248 * INVALID state to tell the MMU there is no point 249 * trying to fetch the corresponding SPTEs. 250 */ 251 TRA(it, "LPTE %05x: U -> I %d PTEs", pteb, ptes); 252 pair->func->invalid(vmm, pgt->pt[0], pteb, ptes); 253 } 254 } 255 } 256 257 static bool 258 nvkm_vmm_unref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) 259 { 260 const struct nvkm_vmm_desc *desc = it->desc; 261 const int type = desc->type == SPT; 262 struct nvkm_vmm_pt *pgt = it->pt[0]; 263 264 /* Drop PTE references. */ 265 pgt->refs[type] -= ptes; 266 267 /* Dual-PTs need special handling, unless PDE becoming invalid. */ 268 if (desc->type == SPT && (pgt->refs[0] || pgt->refs[1])) 269 nvkm_vmm_unref_sptes(it, pgt, desc, ptei, ptes); 270 271 /* PT no longer neeed? Destroy it. */ 272 if (!pgt->refs[type]) { 273 it->lvl++; 274 TRA(it, "%s empty", nvkm_vmm_desc_type(desc)); 275 it->lvl--; 276 nvkm_vmm_unref_pdes(it); 277 return false; /* PTE writes for unmap() not necessary. */ 278 } 279 280 return true; 281 } 282 283 static void 284 nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt, 285 const struct nvkm_vmm_desc *desc, u32 ptei, u32 ptes) 286 { 287 const struct nvkm_vmm_desc *pair = it->page[-1].desc; 288 const u32 sptb = desc->bits - pair->bits; 289 const u32 sptn = 1 << sptb; 290 struct nvkm_vmm *vmm = it->vmm; 291 u32 spti = ptei & (sptn - 1), lpti, pteb; 292 293 /* Determine how many SPTEs are being touched under each LPTE, 294 * and increase reference counts. 295 */ 296 for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) { 297 const u32 pten = min(sptn - spti, ptes); 298 pgt->pte[lpti] += pten; 299 ptes -= pten; 300 } 301 302 /* We're done here if there's no corresponding LPT. */ 303 if (!pgt->refs[0]) 304 return; 305 306 for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) { 307 /* Skip over any LPTEs that already have valid SPTEs. */ 308 if (pgt->pte[pteb] & NVKM_VMM_PTE_VALID) { 309 for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { 310 if (!(pgt->pte[ptei] & NVKM_VMM_PTE_VALID)) 311 break; 312 } 313 continue; 314 } 315 316 /* As there are now non-UNMAPPED SPTEs in the range covered 317 * by a number of LPTEs, we need to transfer control of the 318 * address range to the SPTEs. 319 * 320 * Determine how many LPTEs need to transition state. 321 */ 322 pgt->pte[ptei] |= NVKM_VMM_PTE_VALID; 323 for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) { 324 if (pgt->pte[ptei] & NVKM_VMM_PTE_VALID) 325 break; 326 pgt->pte[ptei] |= NVKM_VMM_PTE_VALID; 327 } 328 329 if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) { 330 const u32 spti = pteb * sptn; 331 const u32 sptc = ptes * sptn; 332 /* The entire LPTE is marked as sparse, we need 333 * to make sure that the SPTEs are too. 334 */ 335 TRA(it, "SPTE %05x: U -> S %d PTEs", spti, sptc); 336 desc->func->sparse(vmm, pgt->pt[1], spti, sptc); 337 /* Sparse LPTEs prevent SPTEs from being accessed. */ 338 TRA(it, "LPTE %05x: S -> U %d PTEs", pteb, ptes); 339 pair->func->unmap(vmm, pgt->pt[0], pteb, ptes); 340 } else 341 if (pair->func->invalid) { 342 /* MMU supports blocking SPTEs by marking an LPTE 343 * as INVALID. We need to reverse that here. 344 */ 345 TRA(it, "LPTE %05x: I -> U %d PTEs", pteb, ptes); 346 pair->func->unmap(vmm, pgt->pt[0], pteb, ptes); 347 } 348 } 349 } 350 351 static bool 352 nvkm_vmm_ref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) 353 { 354 const struct nvkm_vmm_desc *desc = it->desc; 355 const int type = desc->type == SPT; 356 struct nvkm_vmm_pt *pgt = it->pt[0]; 357 358 /* Take PTE references. */ 359 pgt->refs[type] += ptes; 360 361 /* Dual-PTs need special handling. */ 362 if (desc->type == SPT) 363 nvkm_vmm_ref_sptes(it, pgt, desc, ptei, ptes); 364 365 return true; 366 } 367 368 static void 369 nvkm_vmm_sparse_ptes(const struct nvkm_vmm_desc *desc, 370 struct nvkm_vmm_pt *pgt, u32 ptei, u32 ptes) 371 { 372 if (desc->type == PGD) { 373 while (ptes--) 374 pgt->pde[ptei++] = NVKM_VMM_PDE_SPARSE; 375 } else 376 if (desc->type == LPT) { 377 memset(&pgt->pte[ptei], NVKM_VMM_PTE_SPARSE, ptes); 378 } 379 } 380 381 static bool 382 nvkm_vmm_sparse_unref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) 383 { 384 struct nvkm_vmm_pt *pt = it->pt[0]; 385 if (it->desc->type == PGD) 386 memset(&pt->pde[ptei], 0x00, sizeof(pt->pde[0]) * ptes); 387 else 388 if (it->desc->type == LPT) 389 memset(&pt->pte[ptei], 0x00, sizeof(pt->pte[0]) * ptes); 390 return nvkm_vmm_unref_ptes(it, ptei, ptes); 391 } 392 393 static bool 394 nvkm_vmm_sparse_ref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) 395 { 396 nvkm_vmm_sparse_ptes(it->desc, it->pt[0], ptei, ptes); 397 return nvkm_vmm_ref_ptes(it, ptei, ptes); 398 } 399 400 static bool 401 nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei) 402 { 403 const struct nvkm_vmm_desc *desc = &it->desc[it->lvl - 1]; 404 const int type = desc->type == SPT; 405 struct nvkm_vmm_pt *pgt = pgd->pde[pdei]; 406 const bool zero = !pgt->sparse && !desc->func->invalid; 407 struct nvkm_vmm *vmm = it->vmm; 408 struct nvkm_mmu *mmu = vmm->mmu; 409 struct nvkm_mmu_pt *pt; 410 u32 pten = 1 << desc->bits; 411 u32 pteb, ptei, ptes; 412 u32 size = desc->size * pten; 413 414 pgd->refs[0]++; 415 416 pgt->pt[type] = nvkm_mmu_ptc_get(mmu, size, desc->align, zero); 417 if (!pgt->pt[type]) { 418 it->lvl--; 419 nvkm_vmm_unref_pdes(it); 420 return false; 421 } 422 423 if (zero) 424 goto done; 425 426 pt = pgt->pt[type]; 427 428 if (desc->type == LPT && pgt->refs[1]) { 429 /* SPT already exists covering the same range as this LPT, 430 * which means we need to be careful that any LPTEs which 431 * overlap valid SPTEs are unmapped as opposed to invalid 432 * or sparse, which would prevent the MMU from looking at 433 * the SPTEs on some GPUs. 434 */ 435 for (ptei = pteb = 0; ptei < pten; pteb = ptei) { 436 bool spte = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES; 437 for (ptes = 1, ptei++; ptei < pten; ptes++, ptei++) { 438 bool next = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES; 439 if (spte != next) 440 break; 441 } 442 443 if (!spte) { 444 if (pgt->sparse) 445 desc->func->sparse(vmm, pt, pteb, ptes); 446 else 447 desc->func->invalid(vmm, pt, pteb, ptes); 448 memset(&pgt->pte[pteb], 0x00, ptes); 449 } else { 450 desc->func->unmap(vmm, pt, pteb, ptes); 451 while (ptes--) 452 pgt->pte[pteb++] |= NVKM_VMM_PTE_VALID; 453 } 454 } 455 } else { 456 if (pgt->sparse) { 457 nvkm_vmm_sparse_ptes(desc, pgt, 0, pten); 458 desc->func->sparse(vmm, pt, 0, pten); 459 } else { 460 desc->func->invalid(vmm, pt, 0, pten); 461 } 462 } 463 464 done: 465 TRA(it, "PDE write %s", nvkm_vmm_desc_type(desc)); 466 it->desc[it->lvl].func->pde(it->vmm, pgd, pdei); 467 nvkm_vmm_flush_mark(it); 468 return true; 469 } 470 471 static bool 472 nvkm_vmm_ref_swpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei) 473 { 474 const struct nvkm_vmm_desc *desc = &it->desc[it->lvl - 1]; 475 struct nvkm_vmm_pt *pgt = pgd->pde[pdei]; 476 477 pgt = nvkm_vmm_pt_new(desc, NVKM_VMM_PDE_SPARSED(pgt), it->page); 478 if (!pgt) { 479 if (!pgd->refs[0]) 480 nvkm_vmm_unref_pdes(it); 481 return false; 482 } 483 484 pgd->pde[pdei] = pgt; 485 return true; 486 } 487 488 static inline u64 489 nvkm_vmm_iter(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, 490 u64 addr, u64 size, const char *name, bool ref, 491 bool (*REF_PTES)(struct nvkm_vmm_iter *, u32, u32), 492 nvkm_vmm_pte_func MAP_PTES, struct nvkm_vmm_map *map, 493 nvkm_vmm_pxe_func CLR_PTES) 494 { 495 const struct nvkm_vmm_desc *desc = page->desc; 496 struct nvkm_vmm_iter it; 497 u64 bits = addr >> page->shift; 498 499 it.page = page; 500 it.desc = desc; 501 it.vmm = vmm; 502 it.cnt = size >> page->shift; 503 it.flush = NVKM_VMM_LEVELS_MAX; 504 505 /* Deconstruct address into PTE indices for each mapping level. */ 506 for (it.lvl = 0; desc[it.lvl].bits; it.lvl++) { 507 it.pte[it.lvl] = bits & ((1 << desc[it.lvl].bits) - 1); 508 bits >>= desc[it.lvl].bits; 509 } 510 it.max = --it.lvl; 511 it.pt[it.max] = vmm->pd; 512 513 it.lvl = 0; 514 TRA(&it, "%s: %016llx %016llx %d %lld PTEs", name, 515 addr, size, page->shift, it.cnt); 516 it.lvl = it.max; 517 518 /* Depth-first traversal of page tables. */ 519 while (it.cnt) { 520 struct nvkm_vmm_pt *pgt = it.pt[it.lvl]; 521 const int type = desc->type == SPT; 522 const u32 pten = 1 << desc->bits; 523 const u32 ptei = it.pte[0]; 524 const u32 ptes = min_t(u64, it.cnt, pten - ptei); 525 526 /* Walk down the tree, finding page tables for each level. */ 527 for (; it.lvl; it.lvl--) { 528 const u32 pdei = it.pte[it.lvl]; 529 struct nvkm_vmm_pt *pgd = pgt; 530 531 /* Software PT. */ 532 if (ref && NVKM_VMM_PDE_INVALID(pgd->pde[pdei])) { 533 if (!nvkm_vmm_ref_swpt(&it, pgd, pdei)) 534 goto fail; 535 } 536 it.pt[it.lvl - 1] = pgt = pgd->pde[pdei]; 537 538 /* Hardware PT. 539 * 540 * This is a separate step from above due to GF100 and 541 * newer having dual page tables at some levels, which 542 * are refcounted independently. 543 */ 544 if (ref && !pgt->refs[desc[it.lvl - 1].type == SPT]) { 545 if (!nvkm_vmm_ref_hwpt(&it, pgd, pdei)) 546 goto fail; 547 } 548 } 549 550 /* Handle PTE updates. */ 551 if (!REF_PTES || REF_PTES(&it, ptei, ptes)) { 552 struct nvkm_mmu_pt *pt = pgt->pt[type]; 553 if (MAP_PTES || CLR_PTES) { 554 if (MAP_PTES) 555 MAP_PTES(vmm, pt, ptei, ptes, map); 556 else 557 CLR_PTES(vmm, pt, ptei, ptes); 558 nvkm_vmm_flush_mark(&it); 559 } 560 } 561 562 /* Walk back up the tree to the next position. */ 563 it.pte[it.lvl] += ptes; 564 it.cnt -= ptes; 565 if (it.cnt) { 566 while (it.pte[it.lvl] == (1 << desc[it.lvl].bits)) { 567 it.pte[it.lvl++] = 0; 568 it.pte[it.lvl]++; 569 } 570 } 571 }; 572 573 nvkm_vmm_flush(&it); 574 return ~0ULL; 575 576 fail: 577 /* Reconstruct the failure address so the caller is able to 578 * reverse any partially completed operations. 579 */ 580 addr = it.pte[it.max--]; 581 do { 582 addr = addr << desc[it.max].bits; 583 addr |= it.pte[it.max]; 584 } while (it.max--); 585 586 return addr << page->shift; 587 } 588 589 static void 590 nvkm_vmm_ptes_sparse_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, 591 u64 addr, u64 size) 592 { 593 nvkm_vmm_iter(vmm, page, addr, size, "sparse unref", false, 594 nvkm_vmm_sparse_unref_ptes, NULL, NULL, 595 page->desc->func->invalid ? 596 page->desc->func->invalid : page->desc->func->unmap); 597 } 598 599 static int 600 nvkm_vmm_ptes_sparse_get(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, 601 u64 addr, u64 size) 602 { 603 if ((page->type & NVKM_VMM_PAGE_SPARSE)) { 604 u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "sparse ref", 605 true, nvkm_vmm_sparse_ref_ptes, NULL, 606 NULL, page->desc->func->sparse); 607 if (fail != ~0ULL) { 608 if ((size = fail - addr)) 609 nvkm_vmm_ptes_sparse_put(vmm, page, addr, size); 610 return -ENOMEM; 611 } 612 return 0; 613 } 614 return -EINVAL; 615 } 616 617 static int 618 nvkm_vmm_ptes_sparse(struct nvkm_vmm *vmm, u64 addr, u64 size, bool ref) 619 { 620 const struct nvkm_vmm_page *page = vmm->func->page; 621 int m = 0, i; 622 u64 start = addr; 623 u64 block; 624 625 while (size) { 626 /* Limit maximum page size based on remaining size. */ 627 while (size < (1ULL << page[m].shift)) 628 m++; 629 i = m; 630 631 /* Find largest page size suitable for alignment. */ 632 while (!IS_ALIGNED(addr, 1ULL << page[i].shift)) 633 i++; 634 635 /* Determine number of PTEs at this page size. */ 636 if (i != m) { 637 /* Limited to alignment boundary of next page size. */ 638 u64 next = 1ULL << page[i - 1].shift; 639 u64 part = ALIGN(addr, next) - addr; 640 if (size - part >= next) 641 block = (part >> page[i].shift) << page[i].shift; 642 else 643 block = (size >> page[i].shift) << page[i].shift; 644 } else { 645 block = (size >> page[i].shift) << page[i].shift; 646 } 647 648 /* Perform operation. */ 649 if (ref) { 650 int ret = nvkm_vmm_ptes_sparse_get(vmm, &page[i], addr, block); 651 if (ret) { 652 if ((size = addr - start)) 653 nvkm_vmm_ptes_sparse(vmm, start, size, false); 654 return ret; 655 } 656 } else { 657 nvkm_vmm_ptes_sparse_put(vmm, &page[i], addr, block); 658 } 659 660 size -= block; 661 addr += block; 662 } 663 664 return 0; 665 } 666 667 static void 668 nvkm_vmm_ptes_unmap_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, 669 u64 addr, u64 size, bool sparse) 670 { 671 const struct nvkm_vmm_desc_func *func = page->desc->func; 672 nvkm_vmm_iter(vmm, page, addr, size, "unmap + unref", 673 false, nvkm_vmm_unref_ptes, NULL, NULL, 674 sparse ? func->sparse : func->invalid ? func->invalid : 675 func->unmap); 676 } 677 678 static int 679 nvkm_vmm_ptes_get_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, 680 u64 addr, u64 size, struct nvkm_vmm_map *map, 681 nvkm_vmm_pte_func func) 682 { 683 u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "ref + map", true, 684 nvkm_vmm_ref_ptes, func, map, NULL); 685 if (fail != ~0ULL) { 686 if ((size = fail - addr)) 687 nvkm_vmm_ptes_unmap_put(vmm, page, addr, size, false); 688 return -ENOMEM; 689 } 690 return 0; 691 } 692 693 static void 694 nvkm_vmm_ptes_unmap(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, 695 u64 addr, u64 size, bool sparse) 696 { 697 const struct nvkm_vmm_desc_func *func = page->desc->func; 698 nvkm_vmm_iter(vmm, page, addr, size, "unmap", false, NULL, NULL, NULL, 699 sparse ? func->sparse : func->invalid ? func->invalid : 700 func->unmap); 701 } 702 703 static void 704 nvkm_vmm_ptes_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, 705 u64 addr, u64 size, struct nvkm_vmm_map *map, 706 nvkm_vmm_pte_func func) 707 { 708 nvkm_vmm_iter(vmm, page, addr, size, "map", false, 709 NULL, func, map, NULL); 710 } 711 712 static void 713 nvkm_vmm_ptes_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, 714 u64 addr, u64 size) 715 { 716 nvkm_vmm_iter(vmm, page, addr, size, "unref", false, 717 nvkm_vmm_unref_ptes, NULL, NULL, NULL); 718 } 719 720 static int 721 nvkm_vmm_ptes_get(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page, 722 u64 addr, u64 size) 723 { 724 u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "ref", true, 725 nvkm_vmm_ref_ptes, NULL, NULL, NULL); 726 if (fail != ~0ULL) { 727 if (fail != addr) 728 nvkm_vmm_ptes_put(vmm, page, addr, fail - addr); 729 return -ENOMEM; 730 } 731 return 0; 732 } 733 734 static inline struct nvkm_vma * 735 nvkm_vma_new(u64 addr, u64 size) 736 { 737 struct nvkm_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); 738 if (vma) { 739 vma->addr = addr; 740 vma->size = size; 741 vma->page = NVKM_VMA_PAGE_NONE; 742 vma->refd = NVKM_VMA_PAGE_NONE; 743 } 744 return vma; 745 } 746 747 struct nvkm_vma * 748 nvkm_vma_tail(struct nvkm_vma *vma, u64 tail) 749 { 750 struct nvkm_vma *new; 751 752 BUG_ON(vma->size == tail); 753 754 if (!(new = nvkm_vma_new(vma->addr + (vma->size - tail), tail))) 755 return NULL; 756 vma->size -= tail; 757 758 new->mapref = vma->mapref; 759 new->sparse = vma->sparse; 760 new->page = vma->page; 761 new->refd = vma->refd; 762 new->used = vma->used; 763 new->part = vma->part; 764 new->user = vma->user; 765 new->busy = vma->busy; 766 list_add(&new->head, &vma->head); 767 return new; 768 } 769 770 static void 771 nvkm_vmm_free_insert(struct nvkm_vmm *vmm, struct nvkm_vma *vma) 772 { 773 struct rb_node **ptr = &vmm->free.rb_node; 774 struct rb_node *parent = NULL; 775 776 while (*ptr) { 777 struct nvkm_vma *this = rb_entry(*ptr, typeof(*this), tree); 778 parent = *ptr; 779 if (vma->size < this->size) 780 ptr = &parent->rb_left; 781 else 782 if (vma->size > this->size) 783 ptr = &parent->rb_right; 784 else 785 if (vma->addr < this->addr) 786 ptr = &parent->rb_left; 787 else 788 if (vma->addr > this->addr) 789 ptr = &parent->rb_right; 790 else 791 BUG(); 792 } 793 794 rb_link_node(&vma->tree, parent, ptr); 795 rb_insert_color(&vma->tree, &vmm->free); 796 } 797 798 void 799 nvkm_vmm_node_insert(struct nvkm_vmm *vmm, struct nvkm_vma *vma) 800 { 801 struct rb_node **ptr = &vmm->root.rb_node; 802 struct rb_node *parent = NULL; 803 804 while (*ptr) { 805 struct nvkm_vma *this = rb_entry(*ptr, typeof(*this), tree); 806 parent = *ptr; 807 if (vma->addr < this->addr) 808 ptr = &parent->rb_left; 809 else 810 if (vma->addr > this->addr) 811 ptr = &parent->rb_right; 812 else 813 BUG(); 814 } 815 816 rb_link_node(&vma->tree, parent, ptr); 817 rb_insert_color(&vma->tree, &vmm->root); 818 } 819 820 struct nvkm_vma * 821 nvkm_vmm_node_search(struct nvkm_vmm *vmm, u64 addr) 822 { 823 struct rb_node *node = vmm->root.rb_node; 824 while (node) { 825 struct nvkm_vma *vma = rb_entry(node, typeof(*vma), tree); 826 if (addr < vma->addr) 827 node = node->rb_left; 828 else 829 if (addr >= vma->addr + vma->size) 830 node = node->rb_right; 831 else 832 return vma; 833 } 834 return NULL; 835 } 836 837 static void 838 nvkm_vmm_dtor(struct nvkm_vmm *vmm) 839 { 840 struct nvkm_vma *vma; 841 struct rb_node *node; 842 843 while ((node = rb_first(&vmm->root))) { 844 struct nvkm_vma *vma = rb_entry(node, typeof(*vma), tree); 845 nvkm_vmm_put(vmm, &vma); 846 } 847 848 if (vmm->bootstrapped) { 849 const struct nvkm_vmm_page *page = vmm->func->page; 850 const u64 limit = vmm->limit - vmm->start; 851 852 while (page[1].shift) 853 page++; 854 855 nvkm_mmu_ptc_dump(vmm->mmu); 856 nvkm_vmm_ptes_put(vmm, page, vmm->start, limit); 857 } 858 859 vma = list_first_entry(&vmm->list, typeof(*vma), head); 860 list_del(&vma->head); 861 kfree(vma); 862 WARN_ON(!list_empty(&vmm->list)); 863 864 if (vmm->nullp) { 865 dma_free_coherent(vmm->mmu->subdev.device->dev, 16 * 1024, 866 vmm->nullp, vmm->null); 867 } 868 869 if (vmm->pd) { 870 nvkm_mmu_ptc_put(vmm->mmu, true, &vmm->pd->pt[0]); 871 nvkm_vmm_pt_del(&vmm->pd); 872 } 873 } 874 875 int 876 nvkm_vmm_ctor(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu, 877 u32 pd_header, u64 addr, u64 size, struct lock_class_key *key, 878 const char *name, struct nvkm_vmm *vmm) 879 { 880 static struct lock_class_key _key; 881 const struct nvkm_vmm_page *page = func->page; 882 const struct nvkm_vmm_desc *desc; 883 struct nvkm_vma *vma; 884 int levels, bits = 0; 885 886 vmm->func = func; 887 vmm->mmu = mmu; 888 vmm->name = name; 889 vmm->debug = mmu->subdev.debug; 890 kref_init(&vmm->kref); 891 892 __mutex_init(&vmm->mutex, "&vmm->mutex", key ? key : &_key); 893 894 /* Locate the smallest page size supported by the backend, it will 895 * have the the deepest nesting of page tables. 896 */ 897 while (page[1].shift) 898 page++; 899 900 /* Locate the structure that describes the layout of the top-level 901 * page table, and determine the number of valid bits in a virtual 902 * address. 903 */ 904 for (levels = 0, desc = page->desc; desc->bits; desc++, levels++) 905 bits += desc->bits; 906 bits += page->shift; 907 desc--; 908 909 if (WARN_ON(levels > NVKM_VMM_LEVELS_MAX)) 910 return -EINVAL; 911 912 vmm->start = addr; 913 vmm->limit = size ? (addr + size) : (1ULL << bits); 914 if (vmm->start > vmm->limit || vmm->limit > (1ULL << bits)) 915 return -EINVAL; 916 917 /* Allocate top-level page table. */ 918 vmm->pd = nvkm_vmm_pt_new(desc, false, NULL); 919 if (!vmm->pd) 920 return -ENOMEM; 921 vmm->pd->refs[0] = 1; 922 INIT_LIST_HEAD(&vmm->join); 923 924 /* ... and the GPU storage for it, except on Tesla-class GPUs that 925 * have the PD embedded in the instance structure. 926 */ 927 if (desc->size) { 928 const u32 size = pd_header + desc->size * (1 << desc->bits); 929 vmm->pd->pt[0] = nvkm_mmu_ptc_get(mmu, size, desc->align, true); 930 if (!vmm->pd->pt[0]) 931 return -ENOMEM; 932 } 933 934 /* Initialise address-space MM. */ 935 INIT_LIST_HEAD(&vmm->list); 936 vmm->free = RB_ROOT; 937 vmm->root = RB_ROOT; 938 939 if (!(vma = nvkm_vma_new(vmm->start, vmm->limit - vmm->start))) 940 return -ENOMEM; 941 942 nvkm_vmm_free_insert(vmm, vma); 943 list_add(&vma->head, &vmm->list); 944 return 0; 945 } 946 947 int 948 nvkm_vmm_new_(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu, 949 u32 hdr, u64 addr, u64 size, struct lock_class_key *key, 950 const char *name, struct nvkm_vmm **pvmm) 951 { 952 if (!(*pvmm = kzalloc(sizeof(**pvmm), GFP_KERNEL))) 953 return -ENOMEM; 954 return nvkm_vmm_ctor(func, mmu, hdr, addr, size, key, name, *pvmm); 955 } 956 957 #define node(root, dir) ((root)->head.dir == &vmm->list) ? NULL : \ 958 list_entry((root)->head.dir, struct nvkm_vma, head) 959 960 void 961 nvkm_vmm_unmap_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma) 962 { 963 struct nvkm_vma *next; 964 965 nvkm_memory_tags_put(vma->memory, vmm->mmu->subdev.device, &vma->tags); 966 nvkm_memory_unref(&vma->memory); 967 968 if (vma->part) { 969 struct nvkm_vma *prev = node(vma, prev); 970 if (!prev->memory) { 971 prev->size += vma->size; 972 rb_erase(&vma->tree, &vmm->root); 973 list_del(&vma->head); 974 kfree(vma); 975 vma = prev; 976 } 977 } 978 979 next = node(vma, next); 980 if (next && next->part) { 981 if (!next->memory) { 982 vma->size += next->size; 983 rb_erase(&next->tree, &vmm->root); 984 list_del(&next->head); 985 kfree(next); 986 } 987 } 988 } 989 990 void 991 nvkm_vmm_unmap_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma) 992 { 993 const struct nvkm_vmm_page *page = &vmm->func->page[vma->refd]; 994 995 if (vma->mapref) { 996 nvkm_vmm_ptes_unmap_put(vmm, page, vma->addr, vma->size, vma->sparse); 997 vma->refd = NVKM_VMA_PAGE_NONE; 998 } else { 999 nvkm_vmm_ptes_unmap(vmm, page, vma->addr, vma->size, vma->sparse); 1000 } 1001 1002 nvkm_vmm_unmap_region(vmm, vma); 1003 } 1004 1005 void 1006 nvkm_vmm_unmap(struct nvkm_vmm *vmm, struct nvkm_vma *vma) 1007 { 1008 if (vma->memory) { 1009 mutex_lock(&vmm->mutex); 1010 nvkm_vmm_unmap_locked(vmm, vma); 1011 mutex_unlock(&vmm->mutex); 1012 } 1013 } 1014 1015 static int 1016 nvkm_vmm_map_valid(struct nvkm_vmm *vmm, struct nvkm_vma *vma, 1017 void *argv, u32 argc, struct nvkm_vmm_map *map) 1018 { 1019 switch (nvkm_memory_target(map->memory)) { 1020 case NVKM_MEM_TARGET_VRAM: 1021 if (!(map->page->type & NVKM_VMM_PAGE_VRAM)) { 1022 VMM_DEBUG(vmm, "%d !VRAM", map->page->shift); 1023 return -EINVAL; 1024 } 1025 break; 1026 case NVKM_MEM_TARGET_HOST: 1027 case NVKM_MEM_TARGET_NCOH: 1028 if (!(map->page->type & NVKM_VMM_PAGE_HOST)) { 1029 VMM_DEBUG(vmm, "%d !HOST", map->page->shift); 1030 return -EINVAL; 1031 } 1032 break; 1033 default: 1034 WARN_ON(1); 1035 return -ENOSYS; 1036 } 1037 1038 if (!IS_ALIGNED( vma->addr, 1ULL << map->page->shift) || 1039 !IS_ALIGNED((u64)vma->size, 1ULL << map->page->shift) || 1040 !IS_ALIGNED( map->offset, 1ULL << map->page->shift) || 1041 nvkm_memory_page(map->memory) < map->page->shift) { 1042 VMM_DEBUG(vmm, "alignment %016llx %016llx %016llx %d %d", 1043 vma->addr, (u64)vma->size, map->offset, map->page->shift, 1044 nvkm_memory_page(map->memory)); 1045 return -EINVAL; 1046 } 1047 1048 return vmm->func->valid(vmm, argv, argc, map); 1049 } 1050 1051 static int 1052 nvkm_vmm_map_choose(struct nvkm_vmm *vmm, struct nvkm_vma *vma, 1053 void *argv, u32 argc, struct nvkm_vmm_map *map) 1054 { 1055 for (map->page = vmm->func->page; map->page->shift; map->page++) { 1056 VMM_DEBUG(vmm, "trying %d", map->page->shift); 1057 if (!nvkm_vmm_map_valid(vmm, vma, argv, argc, map)) 1058 return 0; 1059 } 1060 return -EINVAL; 1061 } 1062 1063 static int 1064 nvkm_vmm_map_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma, 1065 void *argv, u32 argc, struct nvkm_vmm_map *map) 1066 { 1067 nvkm_vmm_pte_func func; 1068 int ret; 1069 1070 /* Make sure we won't overrun the end of the memory object. */ 1071 if (unlikely(nvkm_memory_size(map->memory) < map->offset + vma->size)) { 1072 VMM_DEBUG(vmm, "overrun %016llx %016llx %016llx", 1073 nvkm_memory_size(map->memory), 1074 map->offset, (u64)vma->size); 1075 return -EINVAL; 1076 } 1077 1078 /* Check remaining arguments for validity. */ 1079 if (vma->page == NVKM_VMA_PAGE_NONE && 1080 vma->refd == NVKM_VMA_PAGE_NONE) { 1081 /* Find the largest page size we can perform the mapping at. */ 1082 const u32 debug = vmm->debug; 1083 vmm->debug = 0; 1084 ret = nvkm_vmm_map_choose(vmm, vma, argv, argc, map); 1085 vmm->debug = debug; 1086 if (ret) { 1087 VMM_DEBUG(vmm, "invalid at any page size"); 1088 nvkm_vmm_map_choose(vmm, vma, argv, argc, map); 1089 return -EINVAL; 1090 } 1091 } else { 1092 /* Page size of the VMA is already pre-determined. */ 1093 if (vma->refd != NVKM_VMA_PAGE_NONE) 1094 map->page = &vmm->func->page[vma->refd]; 1095 else 1096 map->page = &vmm->func->page[vma->page]; 1097 1098 ret = nvkm_vmm_map_valid(vmm, vma, argv, argc, map); 1099 if (ret) { 1100 VMM_DEBUG(vmm, "invalid %d\n", ret); 1101 return ret; 1102 } 1103 } 1104 1105 /* Deal with the 'offset' argument, and fetch the backend function. */ 1106 map->off = map->offset; 1107 if (map->mem) { 1108 for (; map->off; map->mem = map->mem->next) { 1109 u64 size = (u64)map->mem->length << NVKM_RAM_MM_SHIFT; 1110 if (size > map->off) 1111 break; 1112 map->off -= size; 1113 } 1114 func = map->page->desc->func->mem; 1115 } else 1116 if (map->sgl) { 1117 for (; map->off; map->sgl = sg_next(map->sgl)) { 1118 u64 size = sg_dma_len(map->sgl); 1119 if (size > map->off) 1120 break; 1121 map->off -= size; 1122 } 1123 func = map->page->desc->func->sgl; 1124 } else { 1125 map->dma += map->offset >> PAGE_SHIFT; 1126 map->off = map->offset & PAGE_MASK; 1127 func = map->page->desc->func->dma; 1128 } 1129 1130 /* Perform the map. */ 1131 if (vma->refd == NVKM_VMA_PAGE_NONE) { 1132 ret = nvkm_vmm_ptes_get_map(vmm, map->page, vma->addr, vma->size, map, func); 1133 if (ret) 1134 return ret; 1135 1136 vma->refd = map->page - vmm->func->page; 1137 } else { 1138 nvkm_vmm_ptes_map(vmm, map->page, vma->addr, vma->size, map, func); 1139 } 1140 1141 nvkm_memory_tags_put(vma->memory, vmm->mmu->subdev.device, &vma->tags); 1142 nvkm_memory_unref(&vma->memory); 1143 vma->memory = nvkm_memory_ref(map->memory); 1144 vma->tags = map->tags; 1145 return 0; 1146 } 1147 1148 int 1149 nvkm_vmm_map(struct nvkm_vmm *vmm, struct nvkm_vma *vma, void *argv, u32 argc, 1150 struct nvkm_vmm_map *map) 1151 { 1152 int ret; 1153 mutex_lock(&vmm->mutex); 1154 ret = nvkm_vmm_map_locked(vmm, vma, argv, argc, map); 1155 vma->busy = false; 1156 mutex_unlock(&vmm->mutex); 1157 return ret; 1158 } 1159 1160 static void 1161 nvkm_vmm_put_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma) 1162 { 1163 struct nvkm_vma *prev, *next; 1164 1165 if ((prev = node(vma, prev)) && !prev->used) { 1166 rb_erase(&prev->tree, &vmm->free); 1167 list_del(&prev->head); 1168 vma->addr = prev->addr; 1169 vma->size += prev->size; 1170 kfree(prev); 1171 } 1172 1173 if ((next = node(vma, next)) && !next->used) { 1174 rb_erase(&next->tree, &vmm->free); 1175 list_del(&next->head); 1176 vma->size += next->size; 1177 kfree(next); 1178 } 1179 1180 nvkm_vmm_free_insert(vmm, vma); 1181 } 1182 1183 void 1184 nvkm_vmm_put_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma) 1185 { 1186 const struct nvkm_vmm_page *page = vmm->func->page; 1187 struct nvkm_vma *next = vma; 1188 1189 BUG_ON(vma->part); 1190 1191 if (vma->mapref || !vma->sparse) { 1192 do { 1193 const bool map = next->memory != NULL; 1194 const u8 refd = next->refd; 1195 const u64 addr = next->addr; 1196 u64 size = next->size; 1197 1198 /* Merge regions that are in the same state. */ 1199 while ((next = node(next, next)) && next->part && 1200 (next->memory != NULL) == map && 1201 (next->refd == refd)) 1202 size += next->size; 1203 1204 if (map) { 1205 /* Region(s) are mapped, merge the unmap 1206 * and dereference into a single walk of 1207 * the page tree. 1208 */ 1209 nvkm_vmm_ptes_unmap_put(vmm, &page[refd], addr, 1210 size, vma->sparse); 1211 } else 1212 if (refd != NVKM_VMA_PAGE_NONE) { 1213 /* Drop allocation-time PTE references. */ 1214 nvkm_vmm_ptes_put(vmm, &page[refd], addr, size); 1215 } 1216 } while (next && next->part); 1217 } 1218 1219 /* Merge any mapped regions that were split from the initial 1220 * address-space allocation back into the allocated VMA, and 1221 * release memory/compression resources. 1222 */ 1223 next = vma; 1224 do { 1225 if (next->memory) 1226 nvkm_vmm_unmap_region(vmm, next); 1227 } while ((next = node(vma, next)) && next->part); 1228 1229 if (vma->sparse && !vma->mapref) { 1230 /* Sparse region that was allocated with a fixed page size, 1231 * meaning all relevant PTEs were referenced once when the 1232 * region was allocated, and remained that way, regardless 1233 * of whether memory was mapped into it afterwards. 1234 * 1235 * The process of unmapping, unsparsing, and dereferencing 1236 * PTEs can be done in a single page tree walk. 1237 */ 1238 nvkm_vmm_ptes_sparse_put(vmm, &page[vma->refd], vma->addr, vma->size); 1239 } else 1240 if (vma->sparse) { 1241 /* Sparse region that wasn't allocated with a fixed page size, 1242 * PTE references were taken both at allocation time (to make 1243 * the GPU see the region as sparse), and when mapping memory 1244 * into the region. 1245 * 1246 * The latter was handled above, and the remaining references 1247 * are dealt with here. 1248 */ 1249 nvkm_vmm_ptes_sparse(vmm, vma->addr, vma->size, false); 1250 } 1251 1252 /* Remove VMA from the list of allocated nodes. */ 1253 rb_erase(&vma->tree, &vmm->root); 1254 1255 /* Merge VMA back into the free list. */ 1256 vma->page = NVKM_VMA_PAGE_NONE; 1257 vma->refd = NVKM_VMA_PAGE_NONE; 1258 vma->used = false; 1259 vma->user = false; 1260 nvkm_vmm_put_region(vmm, vma); 1261 } 1262 1263 void 1264 nvkm_vmm_put(struct nvkm_vmm *vmm, struct nvkm_vma **pvma) 1265 { 1266 struct nvkm_vma *vma = *pvma; 1267 if (vma) { 1268 mutex_lock(&vmm->mutex); 1269 nvkm_vmm_put_locked(vmm, vma); 1270 mutex_unlock(&vmm->mutex); 1271 *pvma = NULL; 1272 } 1273 } 1274 1275 int 1276 nvkm_vmm_get_locked(struct nvkm_vmm *vmm, bool getref, bool mapref, bool sparse, 1277 u8 shift, u8 align, u64 size, struct nvkm_vma **pvma) 1278 { 1279 const struct nvkm_vmm_page *page = &vmm->func->page[NVKM_VMA_PAGE_NONE]; 1280 struct rb_node *node = NULL, *temp; 1281 struct nvkm_vma *vma = NULL, *tmp; 1282 u64 addr, tail; 1283 int ret; 1284 1285 VMM_TRACE(vmm, "getref %d mapref %d sparse %d " 1286 "shift: %d align: %d size: %016llx", 1287 getref, mapref, sparse, shift, align, size); 1288 1289 /* Zero-sized, or lazily-allocated sparse VMAs, make no sense. */ 1290 if (unlikely(!size || (!getref && !mapref && sparse))) { 1291 VMM_DEBUG(vmm, "args %016llx %d %d %d", 1292 size, getref, mapref, sparse); 1293 return -EINVAL; 1294 } 1295 1296 /* Tesla-class GPUs can only select page size per-PDE, which means 1297 * we're required to know the mapping granularity up-front to find 1298 * a suitable region of address-space. 1299 * 1300 * The same goes if we're requesting up-front allocation of PTES. 1301 */ 1302 if (unlikely((getref || vmm->func->page_block) && !shift)) { 1303 VMM_DEBUG(vmm, "page size required: %d %016llx", 1304 getref, vmm->func->page_block); 1305 return -EINVAL; 1306 } 1307 1308 /* If a specific page size was requested, determine its index and 1309 * make sure the requested size is a multiple of the page size. 1310 */ 1311 if (shift) { 1312 for (page = vmm->func->page; page->shift; page++) { 1313 if (shift == page->shift) 1314 break; 1315 } 1316 1317 if (!page->shift || !IS_ALIGNED(size, 1ULL << page->shift)) { 1318 VMM_DEBUG(vmm, "page %d %016llx", shift, size); 1319 return -EINVAL; 1320 } 1321 align = max_t(u8, align, shift); 1322 } else { 1323 align = max_t(u8, align, 12); 1324 } 1325 1326 /* Locate smallest block that can possibly satisfy the allocation. */ 1327 temp = vmm->free.rb_node; 1328 while (temp) { 1329 struct nvkm_vma *this = rb_entry(temp, typeof(*this), tree); 1330 if (this->size < size) { 1331 temp = temp->rb_right; 1332 } else { 1333 node = temp; 1334 temp = temp->rb_left; 1335 } 1336 } 1337 1338 if (unlikely(!node)) 1339 return -ENOSPC; 1340 1341 /* Take into account alignment restrictions, trying larger blocks 1342 * in turn until we find a suitable free block. 1343 */ 1344 do { 1345 struct nvkm_vma *this = rb_entry(node, typeof(*this), tree); 1346 struct nvkm_vma *prev = node(this, prev); 1347 struct nvkm_vma *next = node(this, next); 1348 const int p = page - vmm->func->page; 1349 1350 addr = this->addr; 1351 if (vmm->func->page_block && prev && prev->page != p) 1352 addr = ALIGN(addr, vmm->func->page_block); 1353 addr = ALIGN(addr, 1ULL << align); 1354 1355 tail = this->addr + this->size; 1356 if (vmm->func->page_block && next && next->page != p) 1357 tail = ALIGN_DOWN(tail, vmm->func->page_block); 1358 1359 if (addr <= tail && tail - addr >= size) { 1360 rb_erase(&this->tree, &vmm->free); 1361 vma = this; 1362 break; 1363 } 1364 } while ((node = rb_next(node))); 1365 1366 if (unlikely(!vma)) 1367 return -ENOSPC; 1368 1369 /* If the VMA we found isn't already exactly the requested size, 1370 * it needs to be split, and the remaining free blocks returned. 1371 */ 1372 if (addr != vma->addr) { 1373 if (!(tmp = nvkm_vma_tail(vma, vma->size + vma->addr - addr))) { 1374 nvkm_vmm_put_region(vmm, vma); 1375 return -ENOMEM; 1376 } 1377 nvkm_vmm_free_insert(vmm, vma); 1378 vma = tmp; 1379 } 1380 1381 if (size != vma->size) { 1382 if (!(tmp = nvkm_vma_tail(vma, vma->size - size))) { 1383 nvkm_vmm_put_region(vmm, vma); 1384 return -ENOMEM; 1385 } 1386 nvkm_vmm_free_insert(vmm, tmp); 1387 } 1388 1389 /* Pre-allocate page tables and/or setup sparse mappings. */ 1390 if (sparse && getref) 1391 ret = nvkm_vmm_ptes_sparse_get(vmm, page, vma->addr, vma->size); 1392 else if (sparse) 1393 ret = nvkm_vmm_ptes_sparse(vmm, vma->addr, vma->size, true); 1394 else if (getref) 1395 ret = nvkm_vmm_ptes_get(vmm, page, vma->addr, vma->size); 1396 else 1397 ret = 0; 1398 if (ret) { 1399 nvkm_vmm_put_region(vmm, vma); 1400 return ret; 1401 } 1402 1403 vma->mapref = mapref && !getref; 1404 vma->sparse = sparse; 1405 vma->page = page - vmm->func->page; 1406 vma->refd = getref ? vma->page : NVKM_VMA_PAGE_NONE; 1407 vma->used = true; 1408 nvkm_vmm_node_insert(vmm, vma); 1409 *pvma = vma; 1410 return 0; 1411 } 1412 1413 int 1414 nvkm_vmm_get(struct nvkm_vmm *vmm, u8 page, u64 size, struct nvkm_vma **pvma) 1415 { 1416 int ret; 1417 mutex_lock(&vmm->mutex); 1418 ret = nvkm_vmm_get_locked(vmm, false, true, false, page, 0, size, pvma); 1419 mutex_unlock(&vmm->mutex); 1420 return ret; 1421 } 1422 1423 void 1424 nvkm_vmm_part(struct nvkm_vmm *vmm, struct nvkm_memory *inst) 1425 { 1426 if (inst && vmm->func->part) { 1427 mutex_lock(&vmm->mutex); 1428 vmm->func->part(vmm, inst); 1429 mutex_unlock(&vmm->mutex); 1430 } 1431 } 1432 1433 int 1434 nvkm_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst) 1435 { 1436 int ret = 0; 1437 if (vmm->func->join) { 1438 mutex_lock(&vmm->mutex); 1439 ret = vmm->func->join(vmm, inst); 1440 mutex_unlock(&vmm->mutex); 1441 } 1442 return ret; 1443 } 1444 1445 static bool 1446 nvkm_vmm_boot_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes) 1447 { 1448 const struct nvkm_vmm_desc *desc = it->desc; 1449 const int type = desc->type == SPT; 1450 nvkm_memory_boot(it->pt[0]->pt[type]->memory, it->vmm); 1451 return false; 1452 } 1453 1454 int 1455 nvkm_vmm_boot(struct nvkm_vmm *vmm) 1456 { 1457 const struct nvkm_vmm_page *page = vmm->func->page; 1458 const u64 limit = vmm->limit - vmm->start; 1459 int ret; 1460 1461 while (page[1].shift) 1462 page++; 1463 1464 ret = nvkm_vmm_ptes_get(vmm, page, vmm->start, limit); 1465 if (ret) 1466 return ret; 1467 1468 nvkm_vmm_iter(vmm, page, vmm->start, limit, "bootstrap", false, 1469 nvkm_vmm_boot_ptes, NULL, NULL, NULL); 1470 vmm->bootstrapped = true; 1471 return 0; 1472 } 1473 1474 static void 1475 nvkm_vmm_del(struct kref *kref) 1476 { 1477 struct nvkm_vmm *vmm = container_of(kref, typeof(*vmm), kref); 1478 nvkm_vmm_dtor(vmm); 1479 kfree(vmm); 1480 } 1481 1482 void 1483 nvkm_vmm_unref(struct nvkm_vmm **pvmm) 1484 { 1485 struct nvkm_vmm *vmm = *pvmm; 1486 if (vmm) { 1487 kref_put(&vmm->kref, nvkm_vmm_del); 1488 *pvmm = NULL; 1489 } 1490 } 1491 1492 struct nvkm_vmm * 1493 nvkm_vmm_ref(struct nvkm_vmm *vmm) 1494 { 1495 if (vmm) 1496 kref_get(&vmm->kref); 1497 return vmm; 1498 } 1499 1500 int 1501 nvkm_vmm_new(struct nvkm_device *device, u64 addr, u64 size, void *argv, 1502 u32 argc, struct lock_class_key *key, const char *name, 1503 struct nvkm_vmm **pvmm) 1504 { 1505 struct nvkm_mmu *mmu = device->mmu; 1506 struct nvkm_vmm *vmm = NULL; 1507 int ret; 1508 ret = mmu->func->vmm.ctor(mmu, addr, size, argv, argc, key, name, &vmm); 1509 if (ret) 1510 nvkm_vmm_unref(&vmm); 1511 *pvmm = vmm; 1512 return ret; 1513 } 1514