1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2014-2016 Intel Corporation 5 */ 6 7 #include <linux/pagevec.h> 8 #include <linux/swap.h> 9 10 #include "i915_drv.h" 11 #include "i915_gem_object.h" 12 #include "i915_scatterlist.h" 13 14 /* 15 * Move pages to appropriate lru and release the pagevec, decrementing the 16 * ref count of those pages. 17 */ 18 static void check_release_pagevec(struct pagevec *pvec) 19 { 20 check_move_unevictable_pages(pvec); 21 __pagevec_release(pvec); 22 cond_resched(); 23 } 24 25 static int shmem_get_pages(struct drm_i915_gem_object *obj) 26 { 27 struct drm_i915_private *i915 = to_i915(obj->base.dev); 28 const unsigned long page_count = obj->base.size / PAGE_SIZE; 29 unsigned long i; 30 struct address_space *mapping; 31 struct sg_table *st; 32 struct scatterlist *sg; 33 struct sgt_iter sgt_iter; 34 struct page *page; 35 unsigned long last_pfn = 0; /* suppress gcc warning */ 36 unsigned int max_segment = i915_sg_segment_size(); 37 unsigned int sg_page_sizes; 38 struct pagevec pvec; 39 gfp_t noreclaim; 40 int ret; 41 42 /* 43 * Assert that the object is not currently in any GPU domain. As it 44 * wasn't in the GTT, there shouldn't be any way it could have been in 45 * a GPU cache 46 */ 47 GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); 48 GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); 49 50 /* 51 * If there's no chance of allocating enough pages for the whole 52 * object, bail early. 53 */ 54 if (page_count > totalram_pages()) 55 return -ENOMEM; 56 57 st = kmalloc(sizeof(*st), GFP_KERNEL); 58 if (!st) 59 return -ENOMEM; 60 61 rebuild_st: 62 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 63 kfree(st); 64 return -ENOMEM; 65 } 66 67 /* 68 * Get the list of pages out of our struct file. They'll be pinned 69 * at this point until we release them. 70 * 71 * Fail silently without starting the shrinker 72 */ 73 mapping = obj->base.filp->f_mapping; 74 mapping_set_unevictable(mapping); 75 noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); 76 noreclaim |= __GFP_NORETRY | __GFP_NOWARN; 77 78 sg = st->sgl; 79 st->nents = 0; 80 sg_page_sizes = 0; 81 for (i = 0; i < page_count; i++) { 82 const unsigned int shrink[] = { 83 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND, 84 0, 85 }, *s = shrink; 86 gfp_t gfp = noreclaim; 87 88 do { 89 cond_resched(); 90 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 91 if (!IS_ERR(page)) 92 break; 93 94 if (!*s) { 95 ret = PTR_ERR(page); 96 goto err_sg; 97 } 98 99 i915_gem_shrink(i915, 2 * page_count, NULL, *s++); 100 101 /* 102 * We've tried hard to allocate the memory by reaping 103 * our own buffer, now let the real VM do its job and 104 * go down in flames if truly OOM. 105 * 106 * However, since graphics tend to be disposable, 107 * defer the oom here by reporting the ENOMEM back 108 * to userspace. 109 */ 110 if (!*s) { 111 /* reclaim and warn, but no oom */ 112 gfp = mapping_gfp_mask(mapping); 113 114 /* 115 * Our bo are always dirty and so we require 116 * kswapd to reclaim our pages (direct reclaim 117 * does not effectively begin pageout of our 118 * buffers on its own). However, direct reclaim 119 * only waits for kswapd when under allocation 120 * congestion. So as a result __GFP_RECLAIM is 121 * unreliable and fails to actually reclaim our 122 * dirty pages -- unless you try over and over 123 * again with !__GFP_NORETRY. However, we still 124 * want to fail this allocation rather than 125 * trigger the out-of-memory killer and for 126 * this we want __GFP_RETRY_MAYFAIL. 127 */ 128 gfp |= __GFP_RETRY_MAYFAIL; 129 } 130 } while (1); 131 132 if (!i || 133 sg->length >= max_segment || 134 page_to_pfn(page) != last_pfn + 1) { 135 if (i) { 136 sg_page_sizes |= sg->length; 137 sg = sg_next(sg); 138 } 139 st->nents++; 140 sg_set_page(sg, page, PAGE_SIZE, 0); 141 } else { 142 sg->length += PAGE_SIZE; 143 } 144 last_pfn = page_to_pfn(page); 145 146 /* Check that the i965g/gm workaround works. */ 147 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 148 } 149 if (sg) { /* loop terminated early; short sg table */ 150 sg_page_sizes |= sg->length; 151 sg_mark_end(sg); 152 } 153 154 /* Trim unused sg entries to avoid wasting memory. */ 155 i915_sg_trim(st); 156 157 ret = i915_gem_gtt_prepare_pages(obj, st); 158 if (ret) { 159 /* 160 * DMA remapping failed? One possible cause is that 161 * it could not reserve enough large entries, asking 162 * for PAGE_SIZE chunks instead may be helpful. 163 */ 164 if (max_segment > PAGE_SIZE) { 165 for_each_sgt_page(page, sgt_iter, st) 166 put_page(page); 167 sg_free_table(st); 168 169 max_segment = PAGE_SIZE; 170 goto rebuild_st; 171 } else { 172 dev_warn(&i915->drm.pdev->dev, 173 "Failed to DMA remap %lu pages\n", 174 page_count); 175 goto err_pages; 176 } 177 } 178 179 if (i915_gem_object_needs_bit17_swizzle(obj)) 180 i915_gem_object_do_bit_17_swizzle(obj, st); 181 182 __i915_gem_object_set_pages(obj, st, sg_page_sizes); 183 184 return 0; 185 186 err_sg: 187 sg_mark_end(sg); 188 err_pages: 189 mapping_clear_unevictable(mapping); 190 pagevec_init(&pvec); 191 for_each_sgt_page(page, sgt_iter, st) { 192 if (!pagevec_add(&pvec, page)) 193 check_release_pagevec(&pvec); 194 } 195 if (pagevec_count(&pvec)) 196 check_release_pagevec(&pvec); 197 sg_free_table(st); 198 kfree(st); 199 200 /* 201 * shmemfs first checks if there is enough memory to allocate the page 202 * and reports ENOSPC should there be insufficient, along with the usual 203 * ENOMEM for a genuine allocation failure. 204 * 205 * We use ENOSPC in our driver to mean that we have run out of aperture 206 * space and so want to translate the error from shmemfs back to our 207 * usual understanding of ENOMEM. 208 */ 209 if (ret == -ENOSPC) 210 ret = -ENOMEM; 211 212 return ret; 213 } 214 215 static void 216 shmem_truncate(struct drm_i915_gem_object *obj) 217 { 218 /* 219 * Our goal here is to return as much of the memory as 220 * is possible back to the system as we are called from OOM. 221 * To do this we must instruct the shmfs to drop all of its 222 * backing pages, *now*. 223 */ 224 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 225 obj->mm.madv = __I915_MADV_PURGED; 226 obj->mm.pages = ERR_PTR(-EFAULT); 227 } 228 229 static void 230 shmem_writeback(struct drm_i915_gem_object *obj) 231 { 232 struct address_space *mapping; 233 struct writeback_control wbc = { 234 .sync_mode = WB_SYNC_NONE, 235 .nr_to_write = SWAP_CLUSTER_MAX, 236 .range_start = 0, 237 .range_end = LLONG_MAX, 238 .for_reclaim = 1, 239 }; 240 unsigned long i; 241 242 /* 243 * Leave mmapings intact (GTT will have been revoked on unbinding, 244 * leaving only CPU mmapings around) and add those pages to the LRU 245 * instead of invoking writeback so they are aged and paged out 246 * as normal. 247 */ 248 mapping = obj->base.filp->f_mapping; 249 250 /* Begin writeback on each dirty page */ 251 for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) { 252 struct page *page; 253 254 page = find_lock_entry(mapping, i); 255 if (!page || xa_is_value(page)) 256 continue; 257 258 if (!page_mapped(page) && clear_page_dirty_for_io(page)) { 259 int ret; 260 261 SetPageReclaim(page); 262 ret = mapping->a_ops->writepage(page, &wbc); 263 if (!PageWriteback(page)) 264 ClearPageReclaim(page); 265 if (!ret) 266 goto put; 267 } 268 unlock_page(page); 269 put: 270 put_page(page); 271 } 272 } 273 274 void 275 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, 276 struct sg_table *pages, 277 bool needs_clflush) 278 { 279 GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); 280 281 if (obj->mm.madv == I915_MADV_DONTNEED) 282 obj->mm.dirty = false; 283 284 if (needs_clflush && 285 (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 && 286 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 287 drm_clflush_sg(pages); 288 289 __start_cpu_write(obj); 290 } 291 292 static void 293 shmem_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) 294 { 295 struct sgt_iter sgt_iter; 296 struct pagevec pvec; 297 struct page *page; 298 299 __i915_gem_object_release_shmem(obj, pages, true); 300 301 i915_gem_gtt_finish_pages(obj, pages); 302 303 if (i915_gem_object_needs_bit17_swizzle(obj)) 304 i915_gem_object_save_bit_17_swizzle(obj, pages); 305 306 mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping); 307 308 pagevec_init(&pvec); 309 for_each_sgt_page(page, sgt_iter, pages) { 310 if (obj->mm.dirty) 311 set_page_dirty(page); 312 313 if (obj->mm.madv == I915_MADV_WILLNEED) 314 mark_page_accessed(page); 315 316 if (!pagevec_add(&pvec, page)) 317 check_release_pagevec(&pvec); 318 } 319 if (pagevec_count(&pvec)) 320 check_release_pagevec(&pvec); 321 obj->mm.dirty = false; 322 323 sg_free_table(pages); 324 kfree(pages); 325 } 326 327 static int 328 shmem_pwrite(struct drm_i915_gem_object *obj, 329 const struct drm_i915_gem_pwrite *arg) 330 { 331 struct address_space *mapping = obj->base.filp->f_mapping; 332 char __user *user_data = u64_to_user_ptr(arg->data_ptr); 333 u64 remain, offset; 334 unsigned int pg; 335 336 /* Caller already validated user args */ 337 GEM_BUG_ON(!access_ok(user_data, arg->size)); 338 339 /* 340 * Before we instantiate/pin the backing store for our use, we 341 * can prepopulate the shmemfs filp efficiently using a write into 342 * the pagecache. We avoid the penalty of instantiating all the 343 * pages, important if the user is just writing to a few and never 344 * uses the object on the GPU, and using a direct write into shmemfs 345 * allows it to avoid the cost of retrieving a page (either swapin 346 * or clearing-before-use) before it is overwritten. 347 */ 348 if (i915_gem_object_has_pages(obj)) 349 return -ENODEV; 350 351 if (obj->mm.madv != I915_MADV_WILLNEED) 352 return -EFAULT; 353 354 /* 355 * Before the pages are instantiated the object is treated as being 356 * in the CPU domain. The pages will be clflushed as required before 357 * use, and we can freely write into the pages directly. If userspace 358 * races pwrite with any other operation; corruption will ensue - 359 * that is userspace's prerogative! 360 */ 361 362 remain = arg->size; 363 offset = arg->offset; 364 pg = offset_in_page(offset); 365 366 do { 367 unsigned int len, unwritten; 368 struct page *page; 369 void *data, *vaddr; 370 int err; 371 char c; 372 373 len = PAGE_SIZE - pg; 374 if (len > remain) 375 len = remain; 376 377 /* Prefault the user page to reduce potential recursion */ 378 err = __get_user(c, user_data); 379 if (err) 380 return err; 381 382 err = __get_user(c, user_data + len - 1); 383 if (err) 384 return err; 385 386 err = pagecache_write_begin(obj->base.filp, mapping, 387 offset, len, 0, 388 &page, &data); 389 if (err < 0) 390 return err; 391 392 vaddr = kmap_atomic(page); 393 unwritten = __copy_from_user_inatomic(vaddr + pg, 394 user_data, 395 len); 396 kunmap_atomic(vaddr); 397 398 err = pagecache_write_end(obj->base.filp, mapping, 399 offset, len, len - unwritten, 400 page, data); 401 if (err < 0) 402 return err; 403 404 /* We don't handle -EFAULT, leave it to the caller to check */ 405 if (unwritten) 406 return -ENODEV; 407 408 remain -= len; 409 user_data += len; 410 offset += len; 411 pg = 0; 412 } while (remain); 413 414 return 0; 415 } 416 417 const struct drm_i915_gem_object_ops i915_gem_shmem_ops = { 418 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | 419 I915_GEM_OBJECT_IS_SHRINKABLE, 420 421 .get_pages = shmem_get_pages, 422 .put_pages = shmem_put_pages, 423 .truncate = shmem_truncate, 424 .writeback = shmem_writeback, 425 426 .pwrite = shmem_pwrite, 427 }; 428 429 static int create_shmem(struct drm_i915_private *i915, 430 struct drm_gem_object *obj, 431 size_t size) 432 { 433 unsigned long flags = VM_NORESERVE; 434 struct file *filp; 435 436 drm_gem_private_object_init(&i915->drm, obj, size); 437 438 if (i915->mm.gemfs) 439 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, 440 flags); 441 else 442 filp = shmem_file_setup("i915", size, flags); 443 if (IS_ERR(filp)) 444 return PTR_ERR(filp); 445 446 obj->filp = filp; 447 return 0; 448 } 449 450 struct drm_i915_gem_object * 451 i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size) 452 { 453 struct drm_i915_gem_object *obj; 454 struct address_space *mapping; 455 unsigned int cache_level; 456 gfp_t mask; 457 int ret; 458 459 /* There is a prevalence of the assumption that we fit the object's 460 * page count inside a 32bit _signed_ variable. Let's document this and 461 * catch if we ever need to fix it. In the meantime, if you do spot 462 * such a local variable, please consider fixing! 463 */ 464 if (size >> PAGE_SHIFT > INT_MAX) 465 return ERR_PTR(-E2BIG); 466 467 if (overflows_type(size, obj->base.size)) 468 return ERR_PTR(-E2BIG); 469 470 obj = i915_gem_object_alloc(); 471 if (!obj) 472 return ERR_PTR(-ENOMEM); 473 474 ret = create_shmem(i915, &obj->base, size); 475 if (ret) 476 goto fail; 477 478 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 479 if (IS_I965GM(i915) || IS_I965G(i915)) { 480 /* 965gm cannot relocate objects above 4GiB. */ 481 mask &= ~__GFP_HIGHMEM; 482 mask |= __GFP_DMA32; 483 } 484 485 mapping = obj->base.filp->f_mapping; 486 mapping_set_gfp_mask(mapping, mask); 487 GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); 488 489 i915_gem_object_init(obj, &i915_gem_shmem_ops); 490 491 obj->write_domain = I915_GEM_DOMAIN_CPU; 492 obj->read_domains = I915_GEM_DOMAIN_CPU; 493 494 if (HAS_LLC(i915)) 495 /* On some devices, we can have the GPU use the LLC (the CPU 496 * cache) for about a 10% performance improvement 497 * compared to uncached. Graphics requests other than 498 * display scanout are coherent with the CPU in 499 * accessing this cache. This means in this mode we 500 * don't need to clflush on the CPU side, and on the 501 * GPU side we only need to flush internal caches to 502 * get data visible to the CPU. 503 * 504 * However, we maintain the display planes as UC, and so 505 * need to rebind when first used as such. 506 */ 507 cache_level = I915_CACHE_LLC; 508 else 509 cache_level = I915_CACHE_NONE; 510 511 i915_gem_object_set_cache_coherency(obj, cache_level); 512 513 trace_i915_gem_object_create(obj); 514 515 return obj; 516 517 fail: 518 i915_gem_object_free(obj); 519 return ERR_PTR(ret); 520 } 521 522 /* Allocate a new GEM object and fill it with the supplied data */ 523 struct drm_i915_gem_object * 524 i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv, 525 const void *data, size_t size) 526 { 527 struct drm_i915_gem_object *obj; 528 struct file *file; 529 size_t offset; 530 int err; 531 532 obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE)); 533 if (IS_ERR(obj)) 534 return obj; 535 536 GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); 537 538 file = obj->base.filp; 539 offset = 0; 540 do { 541 unsigned int len = min_t(typeof(size), size, PAGE_SIZE); 542 struct page *page; 543 void *pgdata, *vaddr; 544 545 err = pagecache_write_begin(file, file->f_mapping, 546 offset, len, 0, 547 &page, &pgdata); 548 if (err < 0) 549 goto fail; 550 551 vaddr = kmap(page); 552 memcpy(vaddr, data, len); 553 kunmap(page); 554 555 err = pagecache_write_end(file, file->f_mapping, 556 offset, len, len, 557 page, pgdata); 558 if (err < 0) 559 goto fail; 560 561 size -= len; 562 data += len; 563 offset += len; 564 } while (size); 565 566 return obj; 567 568 fail: 569 i915_gem_object_put(obj); 570 return ERR_PTR(err); 571 } 572