1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include "drmP.h" 29 #include "drm.h" 30 #include "i915_drm.h" 31 #include "i915_drv.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 #include <linux/swap.h> 35 #include <linux/pci.h> 36 37 #define I915_GEM_GPU_DOMAINS (~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) 38 39 static void i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj); 40 static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj); 41 static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj); 42 static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, 43 int write); 44 static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, 45 uint64_t offset, 46 uint64_t size); 47 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj); 48 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); 49 static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, 50 unsigned alignment); 51 static void i915_gem_clear_fence_reg(struct drm_gem_object *obj); 52 static int i915_gem_evict_something(struct drm_device *dev, int min_size); 53 static int i915_gem_evict_from_inactive_list(struct drm_device *dev); 54 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 55 struct drm_i915_gem_pwrite *args, 56 struct drm_file *file_priv); 57 58 static LIST_HEAD(shrink_list); 59 static DEFINE_SPINLOCK(shrink_list_lock); 60 61 int i915_gem_do_init(struct drm_device *dev, unsigned long start, 62 unsigned long end) 63 { 64 drm_i915_private_t *dev_priv = dev->dev_private; 65 66 if (start >= end || 67 (start & (PAGE_SIZE - 1)) != 0 || 68 (end & (PAGE_SIZE - 1)) != 0) { 69 return -EINVAL; 70 } 71 72 drm_mm_init(&dev_priv->mm.gtt_space, start, 73 end - start); 74 75 dev->gtt_total = (uint32_t) (end - start); 76 77 return 0; 78 } 79 80 int 81 i915_gem_init_ioctl(struct drm_device *dev, void *data, 82 struct drm_file *file_priv) 83 { 84 struct drm_i915_gem_init *args = data; 85 int ret; 86 87 mutex_lock(&dev->struct_mutex); 88 ret = i915_gem_do_init(dev, args->gtt_start, args->gtt_end); 89 mutex_unlock(&dev->struct_mutex); 90 91 return ret; 92 } 93 94 int 95 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 96 struct drm_file *file_priv) 97 { 98 struct drm_i915_gem_get_aperture *args = data; 99 100 if (!(dev->driver->driver_features & DRIVER_GEM)) 101 return -ENODEV; 102 103 args->aper_size = dev->gtt_total; 104 args->aper_available_size = (args->aper_size - 105 atomic_read(&dev->pin_memory)); 106 107 return 0; 108 } 109 110 111 /** 112 * Creates a new mm object and returns a handle to it. 113 */ 114 int 115 i915_gem_create_ioctl(struct drm_device *dev, void *data, 116 struct drm_file *file_priv) 117 { 118 struct drm_i915_gem_create *args = data; 119 struct drm_gem_object *obj; 120 int ret; 121 u32 handle; 122 123 args->size = roundup(args->size, PAGE_SIZE); 124 125 /* Allocate the new object */ 126 obj = drm_gem_object_alloc(dev, args->size); 127 if (obj == NULL) 128 return -ENOMEM; 129 130 ret = drm_gem_handle_create(file_priv, obj, &handle); 131 mutex_lock(&dev->struct_mutex); 132 drm_gem_object_handle_unreference(obj); 133 mutex_unlock(&dev->struct_mutex); 134 135 if (ret) 136 return ret; 137 138 args->handle = handle; 139 140 return 0; 141 } 142 143 static inline int 144 fast_shmem_read(struct page **pages, 145 loff_t page_base, int page_offset, 146 char __user *data, 147 int length) 148 { 149 char __iomem *vaddr; 150 int unwritten; 151 152 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0); 153 if (vaddr == NULL) 154 return -ENOMEM; 155 unwritten = __copy_to_user_inatomic(data, vaddr + page_offset, length); 156 kunmap_atomic(vaddr, KM_USER0); 157 158 if (unwritten) 159 return -EFAULT; 160 161 return 0; 162 } 163 164 static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj) 165 { 166 drm_i915_private_t *dev_priv = obj->dev->dev_private; 167 struct drm_i915_gem_object *obj_priv = obj->driver_private; 168 169 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && 170 obj_priv->tiling_mode != I915_TILING_NONE; 171 } 172 173 static inline int 174 slow_shmem_copy(struct page *dst_page, 175 int dst_offset, 176 struct page *src_page, 177 int src_offset, 178 int length) 179 { 180 char *dst_vaddr, *src_vaddr; 181 182 dst_vaddr = kmap_atomic(dst_page, KM_USER0); 183 if (dst_vaddr == NULL) 184 return -ENOMEM; 185 186 src_vaddr = kmap_atomic(src_page, KM_USER1); 187 if (src_vaddr == NULL) { 188 kunmap_atomic(dst_vaddr, KM_USER0); 189 return -ENOMEM; 190 } 191 192 memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length); 193 194 kunmap_atomic(src_vaddr, KM_USER1); 195 kunmap_atomic(dst_vaddr, KM_USER0); 196 197 return 0; 198 } 199 200 static inline int 201 slow_shmem_bit17_copy(struct page *gpu_page, 202 int gpu_offset, 203 struct page *cpu_page, 204 int cpu_offset, 205 int length, 206 int is_read) 207 { 208 char *gpu_vaddr, *cpu_vaddr; 209 210 /* Use the unswizzled path if this page isn't affected. */ 211 if ((page_to_phys(gpu_page) & (1 << 17)) == 0) { 212 if (is_read) 213 return slow_shmem_copy(cpu_page, cpu_offset, 214 gpu_page, gpu_offset, length); 215 else 216 return slow_shmem_copy(gpu_page, gpu_offset, 217 cpu_page, cpu_offset, length); 218 } 219 220 gpu_vaddr = kmap_atomic(gpu_page, KM_USER0); 221 if (gpu_vaddr == NULL) 222 return -ENOMEM; 223 224 cpu_vaddr = kmap_atomic(cpu_page, KM_USER1); 225 if (cpu_vaddr == NULL) { 226 kunmap_atomic(gpu_vaddr, KM_USER0); 227 return -ENOMEM; 228 } 229 230 /* Copy the data, XORing A6 with A17 (1). The user already knows he's 231 * XORing with the other bits (A9 for Y, A9 and A10 for X) 232 */ 233 while (length > 0) { 234 int cacheline_end = ALIGN(gpu_offset + 1, 64); 235 int this_length = min(cacheline_end - gpu_offset, length); 236 int swizzled_gpu_offset = gpu_offset ^ 64; 237 238 if (is_read) { 239 memcpy(cpu_vaddr + cpu_offset, 240 gpu_vaddr + swizzled_gpu_offset, 241 this_length); 242 } else { 243 memcpy(gpu_vaddr + swizzled_gpu_offset, 244 cpu_vaddr + cpu_offset, 245 this_length); 246 } 247 cpu_offset += this_length; 248 gpu_offset += this_length; 249 length -= this_length; 250 } 251 252 kunmap_atomic(cpu_vaddr, KM_USER1); 253 kunmap_atomic(gpu_vaddr, KM_USER0); 254 255 return 0; 256 } 257 258 /** 259 * This is the fast shmem pread path, which attempts to copy_from_user directly 260 * from the backing pages of the object to the user's address space. On a 261 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow(). 262 */ 263 static int 264 i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj, 265 struct drm_i915_gem_pread *args, 266 struct drm_file *file_priv) 267 { 268 struct drm_i915_gem_object *obj_priv = obj->driver_private; 269 ssize_t remain; 270 loff_t offset, page_base; 271 char __user *user_data; 272 int page_offset, page_length; 273 int ret; 274 275 user_data = (char __user *) (uintptr_t) args->data_ptr; 276 remain = args->size; 277 278 mutex_lock(&dev->struct_mutex); 279 280 ret = i915_gem_object_get_pages(obj, 0); 281 if (ret != 0) 282 goto fail_unlock; 283 284 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, 285 args->size); 286 if (ret != 0) 287 goto fail_put_pages; 288 289 obj_priv = obj->driver_private; 290 offset = args->offset; 291 292 while (remain > 0) { 293 /* Operation in this page 294 * 295 * page_base = page offset within aperture 296 * page_offset = offset within page 297 * page_length = bytes to copy for this page 298 */ 299 page_base = (offset & ~(PAGE_SIZE-1)); 300 page_offset = offset & (PAGE_SIZE-1); 301 page_length = remain; 302 if ((page_offset + remain) > PAGE_SIZE) 303 page_length = PAGE_SIZE - page_offset; 304 305 ret = fast_shmem_read(obj_priv->pages, 306 page_base, page_offset, 307 user_data, page_length); 308 if (ret) 309 goto fail_put_pages; 310 311 remain -= page_length; 312 user_data += page_length; 313 offset += page_length; 314 } 315 316 fail_put_pages: 317 i915_gem_object_put_pages(obj); 318 fail_unlock: 319 mutex_unlock(&dev->struct_mutex); 320 321 return ret; 322 } 323 324 static int 325 i915_gem_object_get_pages_or_evict(struct drm_gem_object *obj) 326 { 327 int ret; 328 329 ret = i915_gem_object_get_pages(obj, __GFP_NORETRY | __GFP_NOWARN); 330 331 /* If we've insufficient memory to map in the pages, attempt 332 * to make some space by throwing out some old buffers. 333 */ 334 if (ret == -ENOMEM) { 335 struct drm_device *dev = obj->dev; 336 337 ret = i915_gem_evict_something(dev, obj->size); 338 if (ret) 339 return ret; 340 341 ret = i915_gem_object_get_pages(obj, 0); 342 } 343 344 return ret; 345 } 346 347 /** 348 * This is the fallback shmem pread path, which allocates temporary storage 349 * in kernel space to copy_to_user into outside of the struct_mutex, so we 350 * can copy out of the object's backing pages while holding the struct mutex 351 * and not take page faults. 352 */ 353 static int 354 i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, 355 struct drm_i915_gem_pread *args, 356 struct drm_file *file_priv) 357 { 358 struct drm_i915_gem_object *obj_priv = obj->driver_private; 359 struct mm_struct *mm = current->mm; 360 struct page **user_pages; 361 ssize_t remain; 362 loff_t offset, pinned_pages, i; 363 loff_t first_data_page, last_data_page, num_pages; 364 int shmem_page_index, shmem_page_offset; 365 int data_page_index, data_page_offset; 366 int page_length; 367 int ret; 368 uint64_t data_ptr = args->data_ptr; 369 int do_bit17_swizzling; 370 371 remain = args->size; 372 373 /* Pin the user pages containing the data. We can't fault while 374 * holding the struct mutex, yet we want to hold it while 375 * dereferencing the user data. 376 */ 377 first_data_page = data_ptr / PAGE_SIZE; 378 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 379 num_pages = last_data_page - first_data_page + 1; 380 381 user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); 382 if (user_pages == NULL) 383 return -ENOMEM; 384 385 down_read(&mm->mmap_sem); 386 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 387 num_pages, 1, 0, user_pages, NULL); 388 up_read(&mm->mmap_sem); 389 if (pinned_pages < num_pages) { 390 ret = -EFAULT; 391 goto fail_put_user_pages; 392 } 393 394 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 395 396 mutex_lock(&dev->struct_mutex); 397 398 ret = i915_gem_object_get_pages_or_evict(obj); 399 if (ret) 400 goto fail_unlock; 401 402 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, 403 args->size); 404 if (ret != 0) 405 goto fail_put_pages; 406 407 obj_priv = obj->driver_private; 408 offset = args->offset; 409 410 while (remain > 0) { 411 /* Operation in this page 412 * 413 * shmem_page_index = page number within shmem file 414 * shmem_page_offset = offset within page in shmem file 415 * data_page_index = page number in get_user_pages return 416 * data_page_offset = offset with data_page_index page. 417 * page_length = bytes to copy for this page 418 */ 419 shmem_page_index = offset / PAGE_SIZE; 420 shmem_page_offset = offset & ~PAGE_MASK; 421 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 422 data_page_offset = data_ptr & ~PAGE_MASK; 423 424 page_length = remain; 425 if ((shmem_page_offset + page_length) > PAGE_SIZE) 426 page_length = PAGE_SIZE - shmem_page_offset; 427 if ((data_page_offset + page_length) > PAGE_SIZE) 428 page_length = PAGE_SIZE - data_page_offset; 429 430 if (do_bit17_swizzling) { 431 ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index], 432 shmem_page_offset, 433 user_pages[data_page_index], 434 data_page_offset, 435 page_length, 436 1); 437 } else { 438 ret = slow_shmem_copy(user_pages[data_page_index], 439 data_page_offset, 440 obj_priv->pages[shmem_page_index], 441 shmem_page_offset, 442 page_length); 443 } 444 if (ret) 445 goto fail_put_pages; 446 447 remain -= page_length; 448 data_ptr += page_length; 449 offset += page_length; 450 } 451 452 fail_put_pages: 453 i915_gem_object_put_pages(obj); 454 fail_unlock: 455 mutex_unlock(&dev->struct_mutex); 456 fail_put_user_pages: 457 for (i = 0; i < pinned_pages; i++) { 458 SetPageDirty(user_pages[i]); 459 page_cache_release(user_pages[i]); 460 } 461 drm_free_large(user_pages); 462 463 return ret; 464 } 465 466 /** 467 * Reads data from the object referenced by handle. 468 * 469 * On error, the contents of *data are undefined. 470 */ 471 int 472 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 473 struct drm_file *file_priv) 474 { 475 struct drm_i915_gem_pread *args = data; 476 struct drm_gem_object *obj; 477 struct drm_i915_gem_object *obj_priv; 478 int ret; 479 480 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 481 if (obj == NULL) 482 return -EBADF; 483 obj_priv = obj->driver_private; 484 485 /* Bounds check source. 486 * 487 * XXX: This could use review for overflow issues... 488 */ 489 if (args->offset > obj->size || args->size > obj->size || 490 args->offset + args->size > obj->size) { 491 drm_gem_object_unreference(obj); 492 return -EINVAL; 493 } 494 495 if (i915_gem_object_needs_bit17_swizzle(obj)) { 496 ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv); 497 } else { 498 ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv); 499 if (ret != 0) 500 ret = i915_gem_shmem_pread_slow(dev, obj, args, 501 file_priv); 502 } 503 504 drm_gem_object_unreference(obj); 505 506 return ret; 507 } 508 509 /* This is the fast write path which cannot handle 510 * page faults in the source data 511 */ 512 513 static inline int 514 fast_user_write(struct io_mapping *mapping, 515 loff_t page_base, int page_offset, 516 char __user *user_data, 517 int length) 518 { 519 char *vaddr_atomic; 520 unsigned long unwritten; 521 522 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 523 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset, 524 user_data, length); 525 io_mapping_unmap_atomic(vaddr_atomic); 526 if (unwritten) 527 return -EFAULT; 528 return 0; 529 } 530 531 /* Here's the write path which can sleep for 532 * page faults 533 */ 534 535 static inline int 536 slow_kernel_write(struct io_mapping *mapping, 537 loff_t gtt_base, int gtt_offset, 538 struct page *user_page, int user_offset, 539 int length) 540 { 541 char *src_vaddr, *dst_vaddr; 542 unsigned long unwritten; 543 544 dst_vaddr = io_mapping_map_atomic_wc(mapping, gtt_base); 545 src_vaddr = kmap_atomic(user_page, KM_USER1); 546 unwritten = __copy_from_user_inatomic_nocache(dst_vaddr + gtt_offset, 547 src_vaddr + user_offset, 548 length); 549 kunmap_atomic(src_vaddr, KM_USER1); 550 io_mapping_unmap_atomic(dst_vaddr); 551 if (unwritten) 552 return -EFAULT; 553 return 0; 554 } 555 556 static inline int 557 fast_shmem_write(struct page **pages, 558 loff_t page_base, int page_offset, 559 char __user *data, 560 int length) 561 { 562 char __iomem *vaddr; 563 unsigned long unwritten; 564 565 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0); 566 if (vaddr == NULL) 567 return -ENOMEM; 568 unwritten = __copy_from_user_inatomic(vaddr + page_offset, data, length); 569 kunmap_atomic(vaddr, KM_USER0); 570 571 if (unwritten) 572 return -EFAULT; 573 return 0; 574 } 575 576 /** 577 * This is the fast pwrite path, where we copy the data directly from the 578 * user into the GTT, uncached. 579 */ 580 static int 581 i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj, 582 struct drm_i915_gem_pwrite *args, 583 struct drm_file *file_priv) 584 { 585 struct drm_i915_gem_object *obj_priv = obj->driver_private; 586 drm_i915_private_t *dev_priv = dev->dev_private; 587 ssize_t remain; 588 loff_t offset, page_base; 589 char __user *user_data; 590 int page_offset, page_length; 591 int ret; 592 593 user_data = (char __user *) (uintptr_t) args->data_ptr; 594 remain = args->size; 595 if (!access_ok(VERIFY_READ, user_data, remain)) 596 return -EFAULT; 597 598 599 mutex_lock(&dev->struct_mutex); 600 ret = i915_gem_object_pin(obj, 0); 601 if (ret) { 602 mutex_unlock(&dev->struct_mutex); 603 return ret; 604 } 605 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 606 if (ret) 607 goto fail; 608 609 obj_priv = obj->driver_private; 610 offset = obj_priv->gtt_offset + args->offset; 611 612 while (remain > 0) { 613 /* Operation in this page 614 * 615 * page_base = page offset within aperture 616 * page_offset = offset within page 617 * page_length = bytes to copy for this page 618 */ 619 page_base = (offset & ~(PAGE_SIZE-1)); 620 page_offset = offset & (PAGE_SIZE-1); 621 page_length = remain; 622 if ((page_offset + remain) > PAGE_SIZE) 623 page_length = PAGE_SIZE - page_offset; 624 625 ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base, 626 page_offset, user_data, page_length); 627 628 /* If we get a fault while copying data, then (presumably) our 629 * source page isn't available. Return the error and we'll 630 * retry in the slow path. 631 */ 632 if (ret) 633 goto fail; 634 635 remain -= page_length; 636 user_data += page_length; 637 offset += page_length; 638 } 639 640 fail: 641 i915_gem_object_unpin(obj); 642 mutex_unlock(&dev->struct_mutex); 643 644 return ret; 645 } 646 647 /** 648 * This is the fallback GTT pwrite path, which uses get_user_pages to pin 649 * the memory and maps it using kmap_atomic for copying. 650 * 651 * This code resulted in x11perf -rgb10text consuming about 10% more CPU 652 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit). 653 */ 654 static int 655 i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, 656 struct drm_i915_gem_pwrite *args, 657 struct drm_file *file_priv) 658 { 659 struct drm_i915_gem_object *obj_priv = obj->driver_private; 660 drm_i915_private_t *dev_priv = dev->dev_private; 661 ssize_t remain; 662 loff_t gtt_page_base, offset; 663 loff_t first_data_page, last_data_page, num_pages; 664 loff_t pinned_pages, i; 665 struct page **user_pages; 666 struct mm_struct *mm = current->mm; 667 int gtt_page_offset, data_page_offset, data_page_index, page_length; 668 int ret; 669 uint64_t data_ptr = args->data_ptr; 670 671 remain = args->size; 672 673 /* Pin the user pages containing the data. We can't fault while 674 * holding the struct mutex, and all of the pwrite implementations 675 * want to hold it while dereferencing the user data. 676 */ 677 first_data_page = data_ptr / PAGE_SIZE; 678 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 679 num_pages = last_data_page - first_data_page + 1; 680 681 user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); 682 if (user_pages == NULL) 683 return -ENOMEM; 684 685 down_read(&mm->mmap_sem); 686 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 687 num_pages, 0, 0, user_pages, NULL); 688 up_read(&mm->mmap_sem); 689 if (pinned_pages < num_pages) { 690 ret = -EFAULT; 691 goto out_unpin_pages; 692 } 693 694 mutex_lock(&dev->struct_mutex); 695 ret = i915_gem_object_pin(obj, 0); 696 if (ret) 697 goto out_unlock; 698 699 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 700 if (ret) 701 goto out_unpin_object; 702 703 obj_priv = obj->driver_private; 704 offset = obj_priv->gtt_offset + args->offset; 705 706 while (remain > 0) { 707 /* Operation in this page 708 * 709 * gtt_page_base = page offset within aperture 710 * gtt_page_offset = offset within page in aperture 711 * data_page_index = page number in get_user_pages return 712 * data_page_offset = offset with data_page_index page. 713 * page_length = bytes to copy for this page 714 */ 715 gtt_page_base = offset & PAGE_MASK; 716 gtt_page_offset = offset & ~PAGE_MASK; 717 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 718 data_page_offset = data_ptr & ~PAGE_MASK; 719 720 page_length = remain; 721 if ((gtt_page_offset + page_length) > PAGE_SIZE) 722 page_length = PAGE_SIZE - gtt_page_offset; 723 if ((data_page_offset + page_length) > PAGE_SIZE) 724 page_length = PAGE_SIZE - data_page_offset; 725 726 ret = slow_kernel_write(dev_priv->mm.gtt_mapping, 727 gtt_page_base, gtt_page_offset, 728 user_pages[data_page_index], 729 data_page_offset, 730 page_length); 731 732 /* If we get a fault while copying data, then (presumably) our 733 * source page isn't available. Return the error and we'll 734 * retry in the slow path. 735 */ 736 if (ret) 737 goto out_unpin_object; 738 739 remain -= page_length; 740 offset += page_length; 741 data_ptr += page_length; 742 } 743 744 out_unpin_object: 745 i915_gem_object_unpin(obj); 746 out_unlock: 747 mutex_unlock(&dev->struct_mutex); 748 out_unpin_pages: 749 for (i = 0; i < pinned_pages; i++) 750 page_cache_release(user_pages[i]); 751 drm_free_large(user_pages); 752 753 return ret; 754 } 755 756 /** 757 * This is the fast shmem pwrite path, which attempts to directly 758 * copy_from_user into the kmapped pages backing the object. 759 */ 760 static int 761 i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj, 762 struct drm_i915_gem_pwrite *args, 763 struct drm_file *file_priv) 764 { 765 struct drm_i915_gem_object *obj_priv = obj->driver_private; 766 ssize_t remain; 767 loff_t offset, page_base; 768 char __user *user_data; 769 int page_offset, page_length; 770 int ret; 771 772 user_data = (char __user *) (uintptr_t) args->data_ptr; 773 remain = args->size; 774 775 mutex_lock(&dev->struct_mutex); 776 777 ret = i915_gem_object_get_pages(obj, 0); 778 if (ret != 0) 779 goto fail_unlock; 780 781 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 782 if (ret != 0) 783 goto fail_put_pages; 784 785 obj_priv = obj->driver_private; 786 offset = args->offset; 787 obj_priv->dirty = 1; 788 789 while (remain > 0) { 790 /* Operation in this page 791 * 792 * page_base = page offset within aperture 793 * page_offset = offset within page 794 * page_length = bytes to copy for this page 795 */ 796 page_base = (offset & ~(PAGE_SIZE-1)); 797 page_offset = offset & (PAGE_SIZE-1); 798 page_length = remain; 799 if ((page_offset + remain) > PAGE_SIZE) 800 page_length = PAGE_SIZE - page_offset; 801 802 ret = fast_shmem_write(obj_priv->pages, 803 page_base, page_offset, 804 user_data, page_length); 805 if (ret) 806 goto fail_put_pages; 807 808 remain -= page_length; 809 user_data += page_length; 810 offset += page_length; 811 } 812 813 fail_put_pages: 814 i915_gem_object_put_pages(obj); 815 fail_unlock: 816 mutex_unlock(&dev->struct_mutex); 817 818 return ret; 819 } 820 821 /** 822 * This is the fallback shmem pwrite path, which uses get_user_pages to pin 823 * the memory and maps it using kmap_atomic for copying. 824 * 825 * This avoids taking mmap_sem for faulting on the user's address while the 826 * struct_mutex is held. 827 */ 828 static int 829 i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, 830 struct drm_i915_gem_pwrite *args, 831 struct drm_file *file_priv) 832 { 833 struct drm_i915_gem_object *obj_priv = obj->driver_private; 834 struct mm_struct *mm = current->mm; 835 struct page **user_pages; 836 ssize_t remain; 837 loff_t offset, pinned_pages, i; 838 loff_t first_data_page, last_data_page, num_pages; 839 int shmem_page_index, shmem_page_offset; 840 int data_page_index, data_page_offset; 841 int page_length; 842 int ret; 843 uint64_t data_ptr = args->data_ptr; 844 int do_bit17_swizzling; 845 846 remain = args->size; 847 848 /* Pin the user pages containing the data. We can't fault while 849 * holding the struct mutex, and all of the pwrite implementations 850 * want to hold it while dereferencing the user data. 851 */ 852 first_data_page = data_ptr / PAGE_SIZE; 853 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 854 num_pages = last_data_page - first_data_page + 1; 855 856 user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); 857 if (user_pages == NULL) 858 return -ENOMEM; 859 860 down_read(&mm->mmap_sem); 861 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 862 num_pages, 0, 0, user_pages, NULL); 863 up_read(&mm->mmap_sem); 864 if (pinned_pages < num_pages) { 865 ret = -EFAULT; 866 goto fail_put_user_pages; 867 } 868 869 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 870 871 mutex_lock(&dev->struct_mutex); 872 873 ret = i915_gem_object_get_pages_or_evict(obj); 874 if (ret) 875 goto fail_unlock; 876 877 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 878 if (ret != 0) 879 goto fail_put_pages; 880 881 obj_priv = obj->driver_private; 882 offset = args->offset; 883 obj_priv->dirty = 1; 884 885 while (remain > 0) { 886 /* Operation in this page 887 * 888 * shmem_page_index = page number within shmem file 889 * shmem_page_offset = offset within page in shmem file 890 * data_page_index = page number in get_user_pages return 891 * data_page_offset = offset with data_page_index page. 892 * page_length = bytes to copy for this page 893 */ 894 shmem_page_index = offset / PAGE_SIZE; 895 shmem_page_offset = offset & ~PAGE_MASK; 896 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 897 data_page_offset = data_ptr & ~PAGE_MASK; 898 899 page_length = remain; 900 if ((shmem_page_offset + page_length) > PAGE_SIZE) 901 page_length = PAGE_SIZE - shmem_page_offset; 902 if ((data_page_offset + page_length) > PAGE_SIZE) 903 page_length = PAGE_SIZE - data_page_offset; 904 905 if (do_bit17_swizzling) { 906 ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index], 907 shmem_page_offset, 908 user_pages[data_page_index], 909 data_page_offset, 910 page_length, 911 0); 912 } else { 913 ret = slow_shmem_copy(obj_priv->pages[shmem_page_index], 914 shmem_page_offset, 915 user_pages[data_page_index], 916 data_page_offset, 917 page_length); 918 } 919 if (ret) 920 goto fail_put_pages; 921 922 remain -= page_length; 923 data_ptr += page_length; 924 offset += page_length; 925 } 926 927 fail_put_pages: 928 i915_gem_object_put_pages(obj); 929 fail_unlock: 930 mutex_unlock(&dev->struct_mutex); 931 fail_put_user_pages: 932 for (i = 0; i < pinned_pages; i++) 933 page_cache_release(user_pages[i]); 934 drm_free_large(user_pages); 935 936 return ret; 937 } 938 939 /** 940 * Writes data to the object referenced by handle. 941 * 942 * On error, the contents of the buffer that were to be modified are undefined. 943 */ 944 int 945 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 946 struct drm_file *file_priv) 947 { 948 struct drm_i915_gem_pwrite *args = data; 949 struct drm_gem_object *obj; 950 struct drm_i915_gem_object *obj_priv; 951 int ret = 0; 952 953 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 954 if (obj == NULL) 955 return -EBADF; 956 obj_priv = obj->driver_private; 957 958 /* Bounds check destination. 959 * 960 * XXX: This could use review for overflow issues... 961 */ 962 if (args->offset > obj->size || args->size > obj->size || 963 args->offset + args->size > obj->size) { 964 drm_gem_object_unreference(obj); 965 return -EINVAL; 966 } 967 968 /* We can only do the GTT pwrite on untiled buffers, as otherwise 969 * it would end up going through the fenced access, and we'll get 970 * different detiling behavior between reading and writing. 971 * pread/pwrite currently are reading and writing from the CPU 972 * perspective, requiring manual detiling by the client. 973 */ 974 if (obj_priv->phys_obj) 975 ret = i915_gem_phys_pwrite(dev, obj, args, file_priv); 976 else if (obj_priv->tiling_mode == I915_TILING_NONE && 977 dev->gtt_total != 0) { 978 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file_priv); 979 if (ret == -EFAULT) { 980 ret = i915_gem_gtt_pwrite_slow(dev, obj, args, 981 file_priv); 982 } 983 } else if (i915_gem_object_needs_bit17_swizzle(obj)) { 984 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv); 985 } else { 986 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv); 987 if (ret == -EFAULT) { 988 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, 989 file_priv); 990 } 991 } 992 993 #if WATCH_PWRITE 994 if (ret) 995 DRM_INFO("pwrite failed %d\n", ret); 996 #endif 997 998 drm_gem_object_unreference(obj); 999 1000 return ret; 1001 } 1002 1003 /** 1004 * Called when user space prepares to use an object with the CPU, either 1005 * through the mmap ioctl's mapping or a GTT mapping. 1006 */ 1007 int 1008 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1009 struct drm_file *file_priv) 1010 { 1011 struct drm_i915_private *dev_priv = dev->dev_private; 1012 struct drm_i915_gem_set_domain *args = data; 1013 struct drm_gem_object *obj; 1014 struct drm_i915_gem_object *obj_priv; 1015 uint32_t read_domains = args->read_domains; 1016 uint32_t write_domain = args->write_domain; 1017 int ret; 1018 1019 if (!(dev->driver->driver_features & DRIVER_GEM)) 1020 return -ENODEV; 1021 1022 /* Only handle setting domains to types used by the CPU. */ 1023 if (write_domain & I915_GEM_GPU_DOMAINS) 1024 return -EINVAL; 1025 1026 if (read_domains & I915_GEM_GPU_DOMAINS) 1027 return -EINVAL; 1028 1029 /* Having something in the write domain implies it's in the read 1030 * domain, and only that read domain. Enforce that in the request. 1031 */ 1032 if (write_domain != 0 && read_domains != write_domain) 1033 return -EINVAL; 1034 1035 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1036 if (obj == NULL) 1037 return -EBADF; 1038 obj_priv = obj->driver_private; 1039 1040 mutex_lock(&dev->struct_mutex); 1041 1042 intel_mark_busy(dev, obj); 1043 1044 #if WATCH_BUF 1045 DRM_INFO("set_domain_ioctl %p(%zd), %08x %08x\n", 1046 obj, obj->size, read_domains, write_domain); 1047 #endif 1048 if (read_domains & I915_GEM_DOMAIN_GTT) { 1049 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1050 1051 /* Update the LRU on the fence for the CPU access that's 1052 * about to occur. 1053 */ 1054 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) { 1055 list_move_tail(&obj_priv->fence_list, 1056 &dev_priv->mm.fence_list); 1057 } 1058 1059 /* Silently promote "you're not bound, there was nothing to do" 1060 * to success, since the client was just asking us to 1061 * make sure everything was done. 1062 */ 1063 if (ret == -EINVAL) 1064 ret = 0; 1065 } else { 1066 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1067 } 1068 1069 drm_gem_object_unreference(obj); 1070 mutex_unlock(&dev->struct_mutex); 1071 return ret; 1072 } 1073 1074 /** 1075 * Called when user space has done writes to this buffer 1076 */ 1077 int 1078 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1079 struct drm_file *file_priv) 1080 { 1081 struct drm_i915_gem_sw_finish *args = data; 1082 struct drm_gem_object *obj; 1083 struct drm_i915_gem_object *obj_priv; 1084 int ret = 0; 1085 1086 if (!(dev->driver->driver_features & DRIVER_GEM)) 1087 return -ENODEV; 1088 1089 mutex_lock(&dev->struct_mutex); 1090 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1091 if (obj == NULL) { 1092 mutex_unlock(&dev->struct_mutex); 1093 return -EBADF; 1094 } 1095 1096 #if WATCH_BUF 1097 DRM_INFO("%s: sw_finish %d (%p %zd)\n", 1098 __func__, args->handle, obj, obj->size); 1099 #endif 1100 obj_priv = obj->driver_private; 1101 1102 /* Pinned buffers may be scanout, so flush the cache */ 1103 if (obj_priv->pin_count) 1104 i915_gem_object_flush_cpu_write_domain(obj); 1105 1106 drm_gem_object_unreference(obj); 1107 mutex_unlock(&dev->struct_mutex); 1108 return ret; 1109 } 1110 1111 /** 1112 * Maps the contents of an object, returning the address it is mapped 1113 * into. 1114 * 1115 * While the mapping holds a reference on the contents of the object, it doesn't 1116 * imply a ref on the object itself. 1117 */ 1118 int 1119 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1120 struct drm_file *file_priv) 1121 { 1122 struct drm_i915_gem_mmap *args = data; 1123 struct drm_gem_object *obj; 1124 loff_t offset; 1125 unsigned long addr; 1126 1127 if (!(dev->driver->driver_features & DRIVER_GEM)) 1128 return -ENODEV; 1129 1130 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1131 if (obj == NULL) 1132 return -EBADF; 1133 1134 offset = args->offset; 1135 1136 down_write(¤t->mm->mmap_sem); 1137 addr = do_mmap(obj->filp, 0, args->size, 1138 PROT_READ | PROT_WRITE, MAP_SHARED, 1139 args->offset); 1140 up_write(¤t->mm->mmap_sem); 1141 mutex_lock(&dev->struct_mutex); 1142 drm_gem_object_unreference(obj); 1143 mutex_unlock(&dev->struct_mutex); 1144 if (IS_ERR((void *)addr)) 1145 return addr; 1146 1147 args->addr_ptr = (uint64_t) addr; 1148 1149 return 0; 1150 } 1151 1152 /** 1153 * i915_gem_fault - fault a page into the GTT 1154 * vma: VMA in question 1155 * vmf: fault info 1156 * 1157 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1158 * from userspace. The fault handler takes care of binding the object to 1159 * the GTT (if needed), allocating and programming a fence register (again, 1160 * only if needed based on whether the old reg is still valid or the object 1161 * is tiled) and inserting a new PTE into the faulting process. 1162 * 1163 * Note that the faulting process may involve evicting existing objects 1164 * from the GTT and/or fence registers to make room. So performance may 1165 * suffer if the GTT working set is large or there are few fence registers 1166 * left. 1167 */ 1168 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1169 { 1170 struct drm_gem_object *obj = vma->vm_private_data; 1171 struct drm_device *dev = obj->dev; 1172 struct drm_i915_private *dev_priv = dev->dev_private; 1173 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1174 pgoff_t page_offset; 1175 unsigned long pfn; 1176 int ret = 0; 1177 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1178 1179 /* We don't use vmf->pgoff since that has the fake offset */ 1180 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1181 PAGE_SHIFT; 1182 1183 /* Now bind it into the GTT if needed */ 1184 mutex_lock(&dev->struct_mutex); 1185 if (!obj_priv->gtt_space) { 1186 ret = i915_gem_object_bind_to_gtt(obj, 0); 1187 if (ret) 1188 goto unlock; 1189 1190 list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list); 1191 1192 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1193 if (ret) 1194 goto unlock; 1195 } 1196 1197 /* Need a new fence register? */ 1198 if (obj_priv->tiling_mode != I915_TILING_NONE) { 1199 ret = i915_gem_object_get_fence_reg(obj); 1200 if (ret) 1201 goto unlock; 1202 } 1203 1204 pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) + 1205 page_offset; 1206 1207 /* Finally, remap it using the new GTT offset */ 1208 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); 1209 unlock: 1210 mutex_unlock(&dev->struct_mutex); 1211 1212 switch (ret) { 1213 case 0: 1214 case -ERESTARTSYS: 1215 return VM_FAULT_NOPAGE; 1216 case -ENOMEM: 1217 case -EAGAIN: 1218 return VM_FAULT_OOM; 1219 default: 1220 return VM_FAULT_SIGBUS; 1221 } 1222 } 1223 1224 /** 1225 * i915_gem_create_mmap_offset - create a fake mmap offset for an object 1226 * @obj: obj in question 1227 * 1228 * GEM memory mapping works by handing back to userspace a fake mmap offset 1229 * it can use in a subsequent mmap(2) call. The DRM core code then looks 1230 * up the object based on the offset and sets up the various memory mapping 1231 * structures. 1232 * 1233 * This routine allocates and attaches a fake offset for @obj. 1234 */ 1235 static int 1236 i915_gem_create_mmap_offset(struct drm_gem_object *obj) 1237 { 1238 struct drm_device *dev = obj->dev; 1239 struct drm_gem_mm *mm = dev->mm_private; 1240 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1241 struct drm_map_list *list; 1242 struct drm_local_map *map; 1243 int ret = 0; 1244 1245 /* Set the object up for mmap'ing */ 1246 list = &obj->map_list; 1247 list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL); 1248 if (!list->map) 1249 return -ENOMEM; 1250 1251 map = list->map; 1252 map->type = _DRM_GEM; 1253 map->size = obj->size; 1254 map->handle = obj; 1255 1256 /* Get a DRM GEM mmap offset allocated... */ 1257 list->file_offset_node = drm_mm_search_free(&mm->offset_manager, 1258 obj->size / PAGE_SIZE, 0, 0); 1259 if (!list->file_offset_node) { 1260 DRM_ERROR("failed to allocate offset for bo %d\n", obj->name); 1261 ret = -ENOMEM; 1262 goto out_free_list; 1263 } 1264 1265 list->file_offset_node = drm_mm_get_block(list->file_offset_node, 1266 obj->size / PAGE_SIZE, 0); 1267 if (!list->file_offset_node) { 1268 ret = -ENOMEM; 1269 goto out_free_list; 1270 } 1271 1272 list->hash.key = list->file_offset_node->start; 1273 if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) { 1274 DRM_ERROR("failed to add to map hash\n"); 1275 ret = -ENOMEM; 1276 goto out_free_mm; 1277 } 1278 1279 /* By now we should be all set, any drm_mmap request on the offset 1280 * below will get to our mmap & fault handler */ 1281 obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT; 1282 1283 return 0; 1284 1285 out_free_mm: 1286 drm_mm_put_block(list->file_offset_node); 1287 out_free_list: 1288 kfree(list->map); 1289 1290 return ret; 1291 } 1292 1293 /** 1294 * i915_gem_release_mmap - remove physical page mappings 1295 * @obj: obj in question 1296 * 1297 * Preserve the reservation of the mmapping with the DRM core code, but 1298 * relinquish ownership of the pages back to the system. 1299 * 1300 * It is vital that we remove the page mapping if we have mapped a tiled 1301 * object through the GTT and then lose the fence register due to 1302 * resource pressure. Similarly if the object has been moved out of the 1303 * aperture, than pages mapped into userspace must be revoked. Removing the 1304 * mapping will then trigger a page fault on the next user access, allowing 1305 * fixup by i915_gem_fault(). 1306 */ 1307 void 1308 i915_gem_release_mmap(struct drm_gem_object *obj) 1309 { 1310 struct drm_device *dev = obj->dev; 1311 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1312 1313 if (dev->dev_mapping) 1314 unmap_mapping_range(dev->dev_mapping, 1315 obj_priv->mmap_offset, obj->size, 1); 1316 } 1317 1318 static void 1319 i915_gem_free_mmap_offset(struct drm_gem_object *obj) 1320 { 1321 struct drm_device *dev = obj->dev; 1322 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1323 struct drm_gem_mm *mm = dev->mm_private; 1324 struct drm_map_list *list; 1325 1326 list = &obj->map_list; 1327 drm_ht_remove_item(&mm->offset_hash, &list->hash); 1328 1329 if (list->file_offset_node) { 1330 drm_mm_put_block(list->file_offset_node); 1331 list->file_offset_node = NULL; 1332 } 1333 1334 if (list->map) { 1335 kfree(list->map); 1336 list->map = NULL; 1337 } 1338 1339 obj_priv->mmap_offset = 0; 1340 } 1341 1342 /** 1343 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1344 * @obj: object to check 1345 * 1346 * Return the required GTT alignment for an object, taking into account 1347 * potential fence register mapping if needed. 1348 */ 1349 static uint32_t 1350 i915_gem_get_gtt_alignment(struct drm_gem_object *obj) 1351 { 1352 struct drm_device *dev = obj->dev; 1353 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1354 int start, i; 1355 1356 /* 1357 * Minimum alignment is 4k (GTT page size), but might be greater 1358 * if a fence register is needed for the object. 1359 */ 1360 if (IS_I965G(dev) || obj_priv->tiling_mode == I915_TILING_NONE) 1361 return 4096; 1362 1363 /* 1364 * Previous chips need to be aligned to the size of the smallest 1365 * fence register that can contain the object. 1366 */ 1367 if (IS_I9XX(dev)) 1368 start = 1024*1024; 1369 else 1370 start = 512*1024; 1371 1372 for (i = start; i < obj->size; i <<= 1) 1373 ; 1374 1375 return i; 1376 } 1377 1378 /** 1379 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1380 * @dev: DRM device 1381 * @data: GTT mapping ioctl data 1382 * @file_priv: GEM object info 1383 * 1384 * Simply returns the fake offset to userspace so it can mmap it. 1385 * The mmap call will end up in drm_gem_mmap(), which will set things 1386 * up so we can get faults in the handler above. 1387 * 1388 * The fault handler will take care of binding the object into the GTT 1389 * (since it may have been evicted to make room for something), allocating 1390 * a fence register, and mapping the appropriate aperture address into 1391 * userspace. 1392 */ 1393 int 1394 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1395 struct drm_file *file_priv) 1396 { 1397 struct drm_i915_gem_mmap_gtt *args = data; 1398 struct drm_i915_private *dev_priv = dev->dev_private; 1399 struct drm_gem_object *obj; 1400 struct drm_i915_gem_object *obj_priv; 1401 int ret; 1402 1403 if (!(dev->driver->driver_features & DRIVER_GEM)) 1404 return -ENODEV; 1405 1406 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1407 if (obj == NULL) 1408 return -EBADF; 1409 1410 mutex_lock(&dev->struct_mutex); 1411 1412 obj_priv = obj->driver_private; 1413 1414 if (obj_priv->madv != I915_MADV_WILLNEED) { 1415 DRM_ERROR("Attempting to mmap a purgeable buffer\n"); 1416 drm_gem_object_unreference(obj); 1417 mutex_unlock(&dev->struct_mutex); 1418 return -EINVAL; 1419 } 1420 1421 1422 if (!obj_priv->mmap_offset) { 1423 ret = i915_gem_create_mmap_offset(obj); 1424 if (ret) { 1425 drm_gem_object_unreference(obj); 1426 mutex_unlock(&dev->struct_mutex); 1427 return ret; 1428 } 1429 } 1430 1431 args->offset = obj_priv->mmap_offset; 1432 1433 /* 1434 * Pull it into the GTT so that we have a page list (makes the 1435 * initial fault faster and any subsequent flushing possible). 1436 */ 1437 if (!obj_priv->agp_mem) { 1438 ret = i915_gem_object_bind_to_gtt(obj, 0); 1439 if (ret) { 1440 drm_gem_object_unreference(obj); 1441 mutex_unlock(&dev->struct_mutex); 1442 return ret; 1443 } 1444 list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list); 1445 } 1446 1447 drm_gem_object_unreference(obj); 1448 mutex_unlock(&dev->struct_mutex); 1449 1450 return 0; 1451 } 1452 1453 void 1454 i915_gem_object_put_pages(struct drm_gem_object *obj) 1455 { 1456 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1457 int page_count = obj->size / PAGE_SIZE; 1458 int i; 1459 1460 BUG_ON(obj_priv->pages_refcount == 0); 1461 BUG_ON(obj_priv->madv == __I915_MADV_PURGED); 1462 1463 if (--obj_priv->pages_refcount != 0) 1464 return; 1465 1466 if (obj_priv->tiling_mode != I915_TILING_NONE) 1467 i915_gem_object_save_bit_17_swizzle(obj); 1468 1469 if (obj_priv->madv == I915_MADV_DONTNEED) 1470 obj_priv->dirty = 0; 1471 1472 for (i = 0; i < page_count; i++) { 1473 if (obj_priv->pages[i] == NULL) 1474 break; 1475 1476 if (obj_priv->dirty) 1477 set_page_dirty(obj_priv->pages[i]); 1478 1479 if (obj_priv->madv == I915_MADV_WILLNEED) 1480 mark_page_accessed(obj_priv->pages[i]); 1481 1482 page_cache_release(obj_priv->pages[i]); 1483 } 1484 obj_priv->dirty = 0; 1485 1486 drm_free_large(obj_priv->pages); 1487 obj_priv->pages = NULL; 1488 } 1489 1490 static void 1491 i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno) 1492 { 1493 struct drm_device *dev = obj->dev; 1494 drm_i915_private_t *dev_priv = dev->dev_private; 1495 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1496 1497 /* Add a reference if we're newly entering the active list. */ 1498 if (!obj_priv->active) { 1499 drm_gem_object_reference(obj); 1500 obj_priv->active = 1; 1501 } 1502 /* Move from whatever list we were on to the tail of execution. */ 1503 spin_lock(&dev_priv->mm.active_list_lock); 1504 list_move_tail(&obj_priv->list, 1505 &dev_priv->mm.active_list); 1506 spin_unlock(&dev_priv->mm.active_list_lock); 1507 obj_priv->last_rendering_seqno = seqno; 1508 } 1509 1510 static void 1511 i915_gem_object_move_to_flushing(struct drm_gem_object *obj) 1512 { 1513 struct drm_device *dev = obj->dev; 1514 drm_i915_private_t *dev_priv = dev->dev_private; 1515 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1516 1517 BUG_ON(!obj_priv->active); 1518 list_move_tail(&obj_priv->list, &dev_priv->mm.flushing_list); 1519 obj_priv->last_rendering_seqno = 0; 1520 } 1521 1522 /* Immediately discard the backing storage */ 1523 static void 1524 i915_gem_object_truncate(struct drm_gem_object *obj) 1525 { 1526 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1527 struct inode *inode; 1528 1529 inode = obj->filp->f_path.dentry->d_inode; 1530 if (inode->i_op->truncate) 1531 inode->i_op->truncate (inode); 1532 1533 obj_priv->madv = __I915_MADV_PURGED; 1534 } 1535 1536 static inline int 1537 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv) 1538 { 1539 return obj_priv->madv == I915_MADV_DONTNEED; 1540 } 1541 1542 static void 1543 i915_gem_object_move_to_inactive(struct drm_gem_object *obj) 1544 { 1545 struct drm_device *dev = obj->dev; 1546 drm_i915_private_t *dev_priv = dev->dev_private; 1547 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1548 1549 i915_verify_inactive(dev, __FILE__, __LINE__); 1550 if (obj_priv->pin_count != 0) 1551 list_del_init(&obj_priv->list); 1552 else 1553 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list); 1554 1555 BUG_ON(!list_empty(&obj_priv->gpu_write_list)); 1556 1557 obj_priv->last_rendering_seqno = 0; 1558 if (obj_priv->active) { 1559 obj_priv->active = 0; 1560 drm_gem_object_unreference(obj); 1561 } 1562 i915_verify_inactive(dev, __FILE__, __LINE__); 1563 } 1564 1565 /** 1566 * Creates a new sequence number, emitting a write of it to the status page 1567 * plus an interrupt, which will trigger i915_user_interrupt_handler. 1568 * 1569 * Must be called with struct_lock held. 1570 * 1571 * Returned sequence numbers are nonzero on success. 1572 */ 1573 uint32_t 1574 i915_add_request(struct drm_device *dev, struct drm_file *file_priv, 1575 uint32_t flush_domains) 1576 { 1577 drm_i915_private_t *dev_priv = dev->dev_private; 1578 struct drm_i915_file_private *i915_file_priv = NULL; 1579 struct drm_i915_gem_request *request; 1580 uint32_t seqno; 1581 int was_empty; 1582 RING_LOCALS; 1583 1584 if (file_priv != NULL) 1585 i915_file_priv = file_priv->driver_priv; 1586 1587 request = kzalloc(sizeof(*request), GFP_KERNEL); 1588 if (request == NULL) 1589 return 0; 1590 1591 /* Grab the seqno we're going to make this request be, and bump the 1592 * next (skipping 0 so it can be the reserved no-seqno value). 1593 */ 1594 seqno = dev_priv->mm.next_gem_seqno; 1595 dev_priv->mm.next_gem_seqno++; 1596 if (dev_priv->mm.next_gem_seqno == 0) 1597 dev_priv->mm.next_gem_seqno++; 1598 1599 BEGIN_LP_RING(4); 1600 OUT_RING(MI_STORE_DWORD_INDEX); 1601 OUT_RING(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 1602 OUT_RING(seqno); 1603 1604 OUT_RING(MI_USER_INTERRUPT); 1605 ADVANCE_LP_RING(); 1606 1607 DRM_DEBUG_DRIVER("%d\n", seqno); 1608 1609 request->seqno = seqno; 1610 request->emitted_jiffies = jiffies; 1611 was_empty = list_empty(&dev_priv->mm.request_list); 1612 list_add_tail(&request->list, &dev_priv->mm.request_list); 1613 if (i915_file_priv) { 1614 list_add_tail(&request->client_list, 1615 &i915_file_priv->mm.request_list); 1616 } else { 1617 INIT_LIST_HEAD(&request->client_list); 1618 } 1619 1620 /* Associate any objects on the flushing list matching the write 1621 * domain we're flushing with our flush. 1622 */ 1623 if (flush_domains != 0) { 1624 struct drm_i915_gem_object *obj_priv, *next; 1625 1626 list_for_each_entry_safe(obj_priv, next, 1627 &dev_priv->mm.gpu_write_list, 1628 gpu_write_list) { 1629 struct drm_gem_object *obj = obj_priv->obj; 1630 1631 if ((obj->write_domain & flush_domains) == 1632 obj->write_domain) { 1633 uint32_t old_write_domain = obj->write_domain; 1634 1635 obj->write_domain = 0; 1636 list_del_init(&obj_priv->gpu_write_list); 1637 i915_gem_object_move_to_active(obj, seqno); 1638 1639 trace_i915_gem_object_change_domain(obj, 1640 obj->read_domains, 1641 old_write_domain); 1642 } 1643 } 1644 1645 } 1646 1647 if (!dev_priv->mm.suspended) { 1648 mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD); 1649 if (was_empty) 1650 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); 1651 } 1652 return seqno; 1653 } 1654 1655 /** 1656 * Command execution barrier 1657 * 1658 * Ensures that all commands in the ring are finished 1659 * before signalling the CPU 1660 */ 1661 static uint32_t 1662 i915_retire_commands(struct drm_device *dev) 1663 { 1664 drm_i915_private_t *dev_priv = dev->dev_private; 1665 uint32_t cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 1666 uint32_t flush_domains = 0; 1667 RING_LOCALS; 1668 1669 /* The sampler always gets flushed on i965 (sigh) */ 1670 if (IS_I965G(dev)) 1671 flush_domains |= I915_GEM_DOMAIN_SAMPLER; 1672 BEGIN_LP_RING(2); 1673 OUT_RING(cmd); 1674 OUT_RING(0); /* noop */ 1675 ADVANCE_LP_RING(); 1676 return flush_domains; 1677 } 1678 1679 /** 1680 * Moves buffers associated only with the given active seqno from the active 1681 * to inactive list, potentially freeing them. 1682 */ 1683 static void 1684 i915_gem_retire_request(struct drm_device *dev, 1685 struct drm_i915_gem_request *request) 1686 { 1687 drm_i915_private_t *dev_priv = dev->dev_private; 1688 1689 trace_i915_gem_request_retire(dev, request->seqno); 1690 1691 /* Move any buffers on the active list that are no longer referenced 1692 * by the ringbuffer to the flushing/inactive lists as appropriate. 1693 */ 1694 spin_lock(&dev_priv->mm.active_list_lock); 1695 while (!list_empty(&dev_priv->mm.active_list)) { 1696 struct drm_gem_object *obj; 1697 struct drm_i915_gem_object *obj_priv; 1698 1699 obj_priv = list_first_entry(&dev_priv->mm.active_list, 1700 struct drm_i915_gem_object, 1701 list); 1702 obj = obj_priv->obj; 1703 1704 /* If the seqno being retired doesn't match the oldest in the 1705 * list, then the oldest in the list must still be newer than 1706 * this seqno. 1707 */ 1708 if (obj_priv->last_rendering_seqno != request->seqno) 1709 goto out; 1710 1711 #if WATCH_LRU 1712 DRM_INFO("%s: retire %d moves to inactive list %p\n", 1713 __func__, request->seqno, obj); 1714 #endif 1715 1716 if (obj->write_domain != 0) 1717 i915_gem_object_move_to_flushing(obj); 1718 else { 1719 /* Take a reference on the object so it won't be 1720 * freed while the spinlock is held. The list 1721 * protection for this spinlock is safe when breaking 1722 * the lock like this since the next thing we do 1723 * is just get the head of the list again. 1724 */ 1725 drm_gem_object_reference(obj); 1726 i915_gem_object_move_to_inactive(obj); 1727 spin_unlock(&dev_priv->mm.active_list_lock); 1728 drm_gem_object_unreference(obj); 1729 spin_lock(&dev_priv->mm.active_list_lock); 1730 } 1731 } 1732 out: 1733 spin_unlock(&dev_priv->mm.active_list_lock); 1734 } 1735 1736 /** 1737 * Returns true if seq1 is later than seq2. 1738 */ 1739 bool 1740 i915_seqno_passed(uint32_t seq1, uint32_t seq2) 1741 { 1742 return (int32_t)(seq1 - seq2) >= 0; 1743 } 1744 1745 uint32_t 1746 i915_get_gem_seqno(struct drm_device *dev) 1747 { 1748 drm_i915_private_t *dev_priv = dev->dev_private; 1749 1750 return READ_HWSP(dev_priv, I915_GEM_HWS_INDEX); 1751 } 1752 1753 /** 1754 * This function clears the request list as sequence numbers are passed. 1755 */ 1756 void 1757 i915_gem_retire_requests(struct drm_device *dev) 1758 { 1759 drm_i915_private_t *dev_priv = dev->dev_private; 1760 uint32_t seqno; 1761 1762 if (!dev_priv->hw_status_page || list_empty(&dev_priv->mm.request_list)) 1763 return; 1764 1765 seqno = i915_get_gem_seqno(dev); 1766 1767 while (!list_empty(&dev_priv->mm.request_list)) { 1768 struct drm_i915_gem_request *request; 1769 uint32_t retiring_seqno; 1770 1771 request = list_first_entry(&dev_priv->mm.request_list, 1772 struct drm_i915_gem_request, 1773 list); 1774 retiring_seqno = request->seqno; 1775 1776 if (i915_seqno_passed(seqno, retiring_seqno) || 1777 atomic_read(&dev_priv->mm.wedged)) { 1778 i915_gem_retire_request(dev, request); 1779 1780 list_del(&request->list); 1781 list_del(&request->client_list); 1782 kfree(request); 1783 } else 1784 break; 1785 } 1786 1787 if (unlikely (dev_priv->trace_irq_seqno && 1788 i915_seqno_passed(dev_priv->trace_irq_seqno, seqno))) { 1789 i915_user_irq_put(dev); 1790 dev_priv->trace_irq_seqno = 0; 1791 } 1792 } 1793 1794 void 1795 i915_gem_retire_work_handler(struct work_struct *work) 1796 { 1797 drm_i915_private_t *dev_priv; 1798 struct drm_device *dev; 1799 1800 dev_priv = container_of(work, drm_i915_private_t, 1801 mm.retire_work.work); 1802 dev = dev_priv->dev; 1803 1804 mutex_lock(&dev->struct_mutex); 1805 i915_gem_retire_requests(dev); 1806 if (!dev_priv->mm.suspended && 1807 !list_empty(&dev_priv->mm.request_list)) 1808 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); 1809 mutex_unlock(&dev->struct_mutex); 1810 } 1811 1812 int 1813 i915_do_wait_request(struct drm_device *dev, uint32_t seqno, int interruptible) 1814 { 1815 drm_i915_private_t *dev_priv = dev->dev_private; 1816 u32 ier; 1817 int ret = 0; 1818 1819 BUG_ON(seqno == 0); 1820 1821 if (atomic_read(&dev_priv->mm.wedged)) 1822 return -EIO; 1823 1824 if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) { 1825 if (IS_IRONLAKE(dev)) 1826 ier = I915_READ(DEIER) | I915_READ(GTIER); 1827 else 1828 ier = I915_READ(IER); 1829 if (!ier) { 1830 DRM_ERROR("something (likely vbetool) disabled " 1831 "interrupts, re-enabling\n"); 1832 i915_driver_irq_preinstall(dev); 1833 i915_driver_irq_postinstall(dev); 1834 } 1835 1836 trace_i915_gem_request_wait_begin(dev, seqno); 1837 1838 dev_priv->mm.waiting_gem_seqno = seqno; 1839 i915_user_irq_get(dev); 1840 if (interruptible) 1841 ret = wait_event_interruptible(dev_priv->irq_queue, 1842 i915_seqno_passed(i915_get_gem_seqno(dev), seqno) || 1843 atomic_read(&dev_priv->mm.wedged)); 1844 else 1845 wait_event(dev_priv->irq_queue, 1846 i915_seqno_passed(i915_get_gem_seqno(dev), seqno) || 1847 atomic_read(&dev_priv->mm.wedged)); 1848 1849 i915_user_irq_put(dev); 1850 dev_priv->mm.waiting_gem_seqno = 0; 1851 1852 trace_i915_gem_request_wait_end(dev, seqno); 1853 } 1854 if (atomic_read(&dev_priv->mm.wedged)) 1855 ret = -EIO; 1856 1857 if (ret && ret != -ERESTARTSYS) 1858 DRM_ERROR("%s returns %d (awaiting %d at %d)\n", 1859 __func__, ret, seqno, i915_get_gem_seqno(dev)); 1860 1861 /* Directly dispatch request retiring. While we have the work queue 1862 * to handle this, the waiter on a request often wants an associated 1863 * buffer to have made it to the inactive list, and we would need 1864 * a separate wait queue to handle that. 1865 */ 1866 if (ret == 0) 1867 i915_gem_retire_requests(dev); 1868 1869 return ret; 1870 } 1871 1872 /** 1873 * Waits for a sequence number to be signaled, and cleans up the 1874 * request and object lists appropriately for that event. 1875 */ 1876 static int 1877 i915_wait_request(struct drm_device *dev, uint32_t seqno) 1878 { 1879 return i915_do_wait_request(dev, seqno, 1); 1880 } 1881 1882 static void 1883 i915_gem_flush(struct drm_device *dev, 1884 uint32_t invalidate_domains, 1885 uint32_t flush_domains) 1886 { 1887 drm_i915_private_t *dev_priv = dev->dev_private; 1888 uint32_t cmd; 1889 RING_LOCALS; 1890 1891 #if WATCH_EXEC 1892 DRM_INFO("%s: invalidate %08x flush %08x\n", __func__, 1893 invalidate_domains, flush_domains); 1894 #endif 1895 trace_i915_gem_request_flush(dev, dev_priv->mm.next_gem_seqno, 1896 invalidate_domains, flush_domains); 1897 1898 if (flush_domains & I915_GEM_DOMAIN_CPU) 1899 drm_agp_chipset_flush(dev); 1900 1901 if ((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) { 1902 /* 1903 * read/write caches: 1904 * 1905 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 1906 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 1907 * also flushed at 2d versus 3d pipeline switches. 1908 * 1909 * read-only caches: 1910 * 1911 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 1912 * MI_READ_FLUSH is set, and is always flushed on 965. 1913 * 1914 * I915_GEM_DOMAIN_COMMAND may not exist? 1915 * 1916 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 1917 * invalidated when MI_EXE_FLUSH is set. 1918 * 1919 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 1920 * invalidated with every MI_FLUSH. 1921 * 1922 * TLBs: 1923 * 1924 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 1925 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 1926 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 1927 * are flushed at any MI_FLUSH. 1928 */ 1929 1930 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 1931 if ((invalidate_domains|flush_domains) & 1932 I915_GEM_DOMAIN_RENDER) 1933 cmd &= ~MI_NO_WRITE_FLUSH; 1934 if (!IS_I965G(dev)) { 1935 /* 1936 * On the 965, the sampler cache always gets flushed 1937 * and this bit is reserved. 1938 */ 1939 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) 1940 cmd |= MI_READ_FLUSH; 1941 } 1942 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) 1943 cmd |= MI_EXE_FLUSH; 1944 1945 #if WATCH_EXEC 1946 DRM_INFO("%s: queue flush %08x to ring\n", __func__, cmd); 1947 #endif 1948 BEGIN_LP_RING(2); 1949 OUT_RING(cmd); 1950 OUT_RING(MI_NOOP); 1951 ADVANCE_LP_RING(); 1952 } 1953 } 1954 1955 /** 1956 * Ensures that all rendering to the object has completed and the object is 1957 * safe to unbind from the GTT or access from the CPU. 1958 */ 1959 static int 1960 i915_gem_object_wait_rendering(struct drm_gem_object *obj) 1961 { 1962 struct drm_device *dev = obj->dev; 1963 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1964 int ret; 1965 1966 /* This function only exists to support waiting for existing rendering, 1967 * not for emitting required flushes. 1968 */ 1969 BUG_ON((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0); 1970 1971 /* If there is rendering queued on the buffer being evicted, wait for 1972 * it. 1973 */ 1974 if (obj_priv->active) { 1975 #if WATCH_BUF 1976 DRM_INFO("%s: object %p wait for seqno %08x\n", 1977 __func__, obj, obj_priv->last_rendering_seqno); 1978 #endif 1979 ret = i915_wait_request(dev, obj_priv->last_rendering_seqno); 1980 if (ret != 0) 1981 return ret; 1982 } 1983 1984 return 0; 1985 } 1986 1987 /** 1988 * Unbinds an object from the GTT aperture. 1989 */ 1990 int 1991 i915_gem_object_unbind(struct drm_gem_object *obj) 1992 { 1993 struct drm_device *dev = obj->dev; 1994 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1995 int ret = 0; 1996 1997 #if WATCH_BUF 1998 DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj); 1999 DRM_INFO("gtt_space %p\n", obj_priv->gtt_space); 2000 #endif 2001 if (obj_priv->gtt_space == NULL) 2002 return 0; 2003 2004 if (obj_priv->pin_count != 0) { 2005 DRM_ERROR("Attempting to unbind pinned buffer\n"); 2006 return -EINVAL; 2007 } 2008 2009 /* blow away mappings if mapped through GTT */ 2010 i915_gem_release_mmap(obj); 2011 2012 /* Move the object to the CPU domain to ensure that 2013 * any possible CPU writes while it's not in the GTT 2014 * are flushed when we go to remap it. This will 2015 * also ensure that all pending GPU writes are finished 2016 * before we unbind. 2017 */ 2018 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 2019 if (ret) { 2020 if (ret != -ERESTARTSYS) 2021 DRM_ERROR("set_domain failed: %d\n", ret); 2022 return ret; 2023 } 2024 2025 BUG_ON(obj_priv->active); 2026 2027 /* release the fence reg _after_ flushing */ 2028 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) 2029 i915_gem_clear_fence_reg(obj); 2030 2031 if (obj_priv->agp_mem != NULL) { 2032 drm_unbind_agp(obj_priv->agp_mem); 2033 drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE); 2034 obj_priv->agp_mem = NULL; 2035 } 2036 2037 i915_gem_object_put_pages(obj); 2038 BUG_ON(obj_priv->pages_refcount); 2039 2040 if (obj_priv->gtt_space) { 2041 atomic_dec(&dev->gtt_count); 2042 atomic_sub(obj->size, &dev->gtt_memory); 2043 2044 drm_mm_put_block(obj_priv->gtt_space); 2045 obj_priv->gtt_space = NULL; 2046 } 2047 2048 /* Remove ourselves from the LRU list if present. */ 2049 if (!list_empty(&obj_priv->list)) 2050 list_del_init(&obj_priv->list); 2051 2052 if (i915_gem_object_is_purgeable(obj_priv)) 2053 i915_gem_object_truncate(obj); 2054 2055 trace_i915_gem_object_unbind(obj); 2056 2057 return 0; 2058 } 2059 2060 static struct drm_gem_object * 2061 i915_gem_find_inactive_object(struct drm_device *dev, int min_size) 2062 { 2063 drm_i915_private_t *dev_priv = dev->dev_private; 2064 struct drm_i915_gem_object *obj_priv; 2065 struct drm_gem_object *best = NULL; 2066 struct drm_gem_object *first = NULL; 2067 2068 /* Try to find the smallest clean object */ 2069 list_for_each_entry(obj_priv, &dev_priv->mm.inactive_list, list) { 2070 struct drm_gem_object *obj = obj_priv->obj; 2071 if (obj->size >= min_size) { 2072 if ((!obj_priv->dirty || 2073 i915_gem_object_is_purgeable(obj_priv)) && 2074 (!best || obj->size < best->size)) { 2075 best = obj; 2076 if (best->size == min_size) 2077 return best; 2078 } 2079 if (!first) 2080 first = obj; 2081 } 2082 } 2083 2084 return best ? best : first; 2085 } 2086 2087 static int 2088 i915_gem_evict_everything(struct drm_device *dev) 2089 { 2090 drm_i915_private_t *dev_priv = dev->dev_private; 2091 int ret; 2092 uint32_t seqno; 2093 bool lists_empty; 2094 2095 spin_lock(&dev_priv->mm.active_list_lock); 2096 lists_empty = (list_empty(&dev_priv->mm.inactive_list) && 2097 list_empty(&dev_priv->mm.flushing_list) && 2098 list_empty(&dev_priv->mm.active_list)); 2099 spin_unlock(&dev_priv->mm.active_list_lock); 2100 2101 if (lists_empty) 2102 return -ENOSPC; 2103 2104 /* Flush everything (on to the inactive lists) and evict */ 2105 i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 2106 seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS); 2107 if (seqno == 0) 2108 return -ENOMEM; 2109 2110 ret = i915_wait_request(dev, seqno); 2111 if (ret) 2112 return ret; 2113 2114 BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); 2115 2116 ret = i915_gem_evict_from_inactive_list(dev); 2117 if (ret) 2118 return ret; 2119 2120 spin_lock(&dev_priv->mm.active_list_lock); 2121 lists_empty = (list_empty(&dev_priv->mm.inactive_list) && 2122 list_empty(&dev_priv->mm.flushing_list) && 2123 list_empty(&dev_priv->mm.active_list)); 2124 spin_unlock(&dev_priv->mm.active_list_lock); 2125 BUG_ON(!lists_empty); 2126 2127 return 0; 2128 } 2129 2130 static int 2131 i915_gem_evict_something(struct drm_device *dev, int min_size) 2132 { 2133 drm_i915_private_t *dev_priv = dev->dev_private; 2134 struct drm_gem_object *obj; 2135 int ret; 2136 2137 for (;;) { 2138 i915_gem_retire_requests(dev); 2139 2140 /* If there's an inactive buffer available now, grab it 2141 * and be done. 2142 */ 2143 obj = i915_gem_find_inactive_object(dev, min_size); 2144 if (obj) { 2145 struct drm_i915_gem_object *obj_priv; 2146 2147 #if WATCH_LRU 2148 DRM_INFO("%s: evicting %p\n", __func__, obj); 2149 #endif 2150 obj_priv = obj->driver_private; 2151 BUG_ON(obj_priv->pin_count != 0); 2152 BUG_ON(obj_priv->active); 2153 2154 /* Wait on the rendering and unbind the buffer. */ 2155 return i915_gem_object_unbind(obj); 2156 } 2157 2158 /* If we didn't get anything, but the ring is still processing 2159 * things, wait for the next to finish and hopefully leave us 2160 * a buffer to evict. 2161 */ 2162 if (!list_empty(&dev_priv->mm.request_list)) { 2163 struct drm_i915_gem_request *request; 2164 2165 request = list_first_entry(&dev_priv->mm.request_list, 2166 struct drm_i915_gem_request, 2167 list); 2168 2169 ret = i915_wait_request(dev, request->seqno); 2170 if (ret) 2171 return ret; 2172 2173 continue; 2174 } 2175 2176 /* If we didn't have anything on the request list but there 2177 * are buffers awaiting a flush, emit one and try again. 2178 * When we wait on it, those buffers waiting for that flush 2179 * will get moved to inactive. 2180 */ 2181 if (!list_empty(&dev_priv->mm.flushing_list)) { 2182 struct drm_i915_gem_object *obj_priv; 2183 2184 /* Find an object that we can immediately reuse */ 2185 list_for_each_entry(obj_priv, &dev_priv->mm.flushing_list, list) { 2186 obj = obj_priv->obj; 2187 if (obj->size >= min_size) 2188 break; 2189 2190 obj = NULL; 2191 } 2192 2193 if (obj != NULL) { 2194 uint32_t seqno; 2195 2196 i915_gem_flush(dev, 2197 obj->write_domain, 2198 obj->write_domain); 2199 seqno = i915_add_request(dev, NULL, obj->write_domain); 2200 if (seqno == 0) 2201 return -ENOMEM; 2202 2203 ret = i915_wait_request(dev, seqno); 2204 if (ret) 2205 return ret; 2206 2207 continue; 2208 } 2209 } 2210 2211 /* If we didn't do any of the above, there's no single buffer 2212 * large enough to swap out for the new one, so just evict 2213 * everything and start again. (This should be rare.) 2214 */ 2215 if (!list_empty (&dev_priv->mm.inactive_list)) 2216 return i915_gem_evict_from_inactive_list(dev); 2217 else 2218 return i915_gem_evict_everything(dev); 2219 } 2220 } 2221 2222 int 2223 i915_gem_object_get_pages(struct drm_gem_object *obj, 2224 gfp_t gfpmask) 2225 { 2226 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2227 int page_count, i; 2228 struct address_space *mapping; 2229 struct inode *inode; 2230 struct page *page; 2231 int ret; 2232 2233 if (obj_priv->pages_refcount++ != 0) 2234 return 0; 2235 2236 /* Get the list of pages out of our struct file. They'll be pinned 2237 * at this point until we release them. 2238 */ 2239 page_count = obj->size / PAGE_SIZE; 2240 BUG_ON(obj_priv->pages != NULL); 2241 obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *)); 2242 if (obj_priv->pages == NULL) { 2243 obj_priv->pages_refcount--; 2244 return -ENOMEM; 2245 } 2246 2247 inode = obj->filp->f_path.dentry->d_inode; 2248 mapping = inode->i_mapping; 2249 for (i = 0; i < page_count; i++) { 2250 page = read_cache_page_gfp(mapping, i, 2251 mapping_gfp_mask (mapping) | 2252 __GFP_COLD | 2253 gfpmask); 2254 if (IS_ERR(page)) { 2255 ret = PTR_ERR(page); 2256 i915_gem_object_put_pages(obj); 2257 return ret; 2258 } 2259 obj_priv->pages[i] = page; 2260 } 2261 2262 if (obj_priv->tiling_mode != I915_TILING_NONE) 2263 i915_gem_object_do_bit_17_swizzle(obj); 2264 2265 return 0; 2266 } 2267 2268 static void i965_write_fence_reg(struct drm_i915_fence_reg *reg) 2269 { 2270 struct drm_gem_object *obj = reg->obj; 2271 struct drm_device *dev = obj->dev; 2272 drm_i915_private_t *dev_priv = dev->dev_private; 2273 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2274 int regnum = obj_priv->fence_reg; 2275 uint64_t val; 2276 2277 val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) & 2278 0xfffff000) << 32; 2279 val |= obj_priv->gtt_offset & 0xfffff000; 2280 val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT; 2281 if (obj_priv->tiling_mode == I915_TILING_Y) 2282 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2283 val |= I965_FENCE_REG_VALID; 2284 2285 I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val); 2286 } 2287 2288 static void i915_write_fence_reg(struct drm_i915_fence_reg *reg) 2289 { 2290 struct drm_gem_object *obj = reg->obj; 2291 struct drm_device *dev = obj->dev; 2292 drm_i915_private_t *dev_priv = dev->dev_private; 2293 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2294 int regnum = obj_priv->fence_reg; 2295 int tile_width; 2296 uint32_t fence_reg, val; 2297 uint32_t pitch_val; 2298 2299 if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) || 2300 (obj_priv->gtt_offset & (obj->size - 1))) { 2301 WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n", 2302 __func__, obj_priv->gtt_offset, obj->size); 2303 return; 2304 } 2305 2306 if (obj_priv->tiling_mode == I915_TILING_Y && 2307 HAS_128_BYTE_Y_TILING(dev)) 2308 tile_width = 128; 2309 else 2310 tile_width = 512; 2311 2312 /* Note: pitch better be a power of two tile widths */ 2313 pitch_val = obj_priv->stride / tile_width; 2314 pitch_val = ffs(pitch_val) - 1; 2315 2316 val = obj_priv->gtt_offset; 2317 if (obj_priv->tiling_mode == I915_TILING_Y) 2318 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2319 val |= I915_FENCE_SIZE_BITS(obj->size); 2320 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2321 val |= I830_FENCE_REG_VALID; 2322 2323 if (regnum < 8) 2324 fence_reg = FENCE_REG_830_0 + (regnum * 4); 2325 else 2326 fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4); 2327 I915_WRITE(fence_reg, val); 2328 } 2329 2330 static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) 2331 { 2332 struct drm_gem_object *obj = reg->obj; 2333 struct drm_device *dev = obj->dev; 2334 drm_i915_private_t *dev_priv = dev->dev_private; 2335 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2336 int regnum = obj_priv->fence_reg; 2337 uint32_t val; 2338 uint32_t pitch_val; 2339 uint32_t fence_size_bits; 2340 2341 if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) || 2342 (obj_priv->gtt_offset & (obj->size - 1))) { 2343 WARN(1, "%s: object 0x%08x not 512K or size aligned\n", 2344 __func__, obj_priv->gtt_offset); 2345 return; 2346 } 2347 2348 pitch_val = obj_priv->stride / 128; 2349 pitch_val = ffs(pitch_val) - 1; 2350 WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL); 2351 2352 val = obj_priv->gtt_offset; 2353 if (obj_priv->tiling_mode == I915_TILING_Y) 2354 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2355 fence_size_bits = I830_FENCE_SIZE_BITS(obj->size); 2356 WARN_ON(fence_size_bits & ~0x00000f00); 2357 val |= fence_size_bits; 2358 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2359 val |= I830_FENCE_REG_VALID; 2360 2361 I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val); 2362 } 2363 2364 /** 2365 * i915_gem_object_get_fence_reg - set up a fence reg for an object 2366 * @obj: object to map through a fence reg 2367 * 2368 * When mapping objects through the GTT, userspace wants to be able to write 2369 * to them without having to worry about swizzling if the object is tiled. 2370 * 2371 * This function walks the fence regs looking for a free one for @obj, 2372 * stealing one if it can't find any. 2373 * 2374 * It then sets up the reg based on the object's properties: address, pitch 2375 * and tiling format. 2376 */ 2377 int 2378 i915_gem_object_get_fence_reg(struct drm_gem_object *obj) 2379 { 2380 struct drm_device *dev = obj->dev; 2381 struct drm_i915_private *dev_priv = dev->dev_private; 2382 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2383 struct drm_i915_fence_reg *reg = NULL; 2384 struct drm_i915_gem_object *old_obj_priv = NULL; 2385 int i, ret, avail; 2386 2387 /* Just update our place in the LRU if our fence is getting used. */ 2388 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) { 2389 list_move_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list); 2390 return 0; 2391 } 2392 2393 switch (obj_priv->tiling_mode) { 2394 case I915_TILING_NONE: 2395 WARN(1, "allocating a fence for non-tiled object?\n"); 2396 break; 2397 case I915_TILING_X: 2398 if (!obj_priv->stride) 2399 return -EINVAL; 2400 WARN((obj_priv->stride & (512 - 1)), 2401 "object 0x%08x is X tiled but has non-512B pitch\n", 2402 obj_priv->gtt_offset); 2403 break; 2404 case I915_TILING_Y: 2405 if (!obj_priv->stride) 2406 return -EINVAL; 2407 WARN((obj_priv->stride & (128 - 1)), 2408 "object 0x%08x is Y tiled but has non-128B pitch\n", 2409 obj_priv->gtt_offset); 2410 break; 2411 } 2412 2413 /* First try to find a free reg */ 2414 avail = 0; 2415 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 2416 reg = &dev_priv->fence_regs[i]; 2417 if (!reg->obj) 2418 break; 2419 2420 old_obj_priv = reg->obj->driver_private; 2421 if (!old_obj_priv->pin_count) 2422 avail++; 2423 } 2424 2425 /* None available, try to steal one or wait for a user to finish */ 2426 if (i == dev_priv->num_fence_regs) { 2427 struct drm_gem_object *old_obj = NULL; 2428 2429 if (avail == 0) 2430 return -ENOSPC; 2431 2432 list_for_each_entry(old_obj_priv, &dev_priv->mm.fence_list, 2433 fence_list) { 2434 old_obj = old_obj_priv->obj; 2435 2436 if (old_obj_priv->pin_count) 2437 continue; 2438 2439 /* Take a reference, as otherwise the wait_rendering 2440 * below may cause the object to get freed out from 2441 * under us. 2442 */ 2443 drm_gem_object_reference(old_obj); 2444 2445 /* i915 uses fences for GPU access to tiled buffers */ 2446 if (IS_I965G(dev) || !old_obj_priv->active) 2447 break; 2448 2449 /* This brings the object to the head of the LRU if it 2450 * had been written to. The only way this should 2451 * result in us waiting longer than the expected 2452 * optimal amount of time is if there was a 2453 * fence-using buffer later that was read-only. 2454 */ 2455 i915_gem_object_flush_gpu_write_domain(old_obj); 2456 ret = i915_gem_object_wait_rendering(old_obj); 2457 if (ret != 0) { 2458 drm_gem_object_unreference(old_obj); 2459 return ret; 2460 } 2461 2462 break; 2463 } 2464 2465 /* 2466 * Zap this virtual mapping so we can set up a fence again 2467 * for this object next time we need it. 2468 */ 2469 i915_gem_release_mmap(old_obj); 2470 2471 i = old_obj_priv->fence_reg; 2472 reg = &dev_priv->fence_regs[i]; 2473 2474 old_obj_priv->fence_reg = I915_FENCE_REG_NONE; 2475 list_del_init(&old_obj_priv->fence_list); 2476 2477 drm_gem_object_unreference(old_obj); 2478 } 2479 2480 obj_priv->fence_reg = i; 2481 list_add_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list); 2482 2483 reg->obj = obj; 2484 2485 if (IS_I965G(dev)) 2486 i965_write_fence_reg(reg); 2487 else if (IS_I9XX(dev)) 2488 i915_write_fence_reg(reg); 2489 else 2490 i830_write_fence_reg(reg); 2491 2492 trace_i915_gem_object_get_fence(obj, i, obj_priv->tiling_mode); 2493 2494 return 0; 2495 } 2496 2497 /** 2498 * i915_gem_clear_fence_reg - clear out fence register info 2499 * @obj: object to clear 2500 * 2501 * Zeroes out the fence register itself and clears out the associated 2502 * data structures in dev_priv and obj_priv. 2503 */ 2504 static void 2505 i915_gem_clear_fence_reg(struct drm_gem_object *obj) 2506 { 2507 struct drm_device *dev = obj->dev; 2508 drm_i915_private_t *dev_priv = dev->dev_private; 2509 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2510 2511 if (IS_I965G(dev)) 2512 I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0); 2513 else { 2514 uint32_t fence_reg; 2515 2516 if (obj_priv->fence_reg < 8) 2517 fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4; 2518 else 2519 fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg - 2520 8) * 4; 2521 2522 I915_WRITE(fence_reg, 0); 2523 } 2524 2525 dev_priv->fence_regs[obj_priv->fence_reg].obj = NULL; 2526 obj_priv->fence_reg = I915_FENCE_REG_NONE; 2527 list_del_init(&obj_priv->fence_list); 2528 } 2529 2530 /** 2531 * i915_gem_object_put_fence_reg - waits on outstanding fenced access 2532 * to the buffer to finish, and then resets the fence register. 2533 * @obj: tiled object holding a fence register. 2534 * 2535 * Zeroes out the fence register itself and clears out the associated 2536 * data structures in dev_priv and obj_priv. 2537 */ 2538 int 2539 i915_gem_object_put_fence_reg(struct drm_gem_object *obj) 2540 { 2541 struct drm_device *dev = obj->dev; 2542 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2543 2544 if (obj_priv->fence_reg == I915_FENCE_REG_NONE) 2545 return 0; 2546 2547 /* On the i915, GPU access to tiled buffers is via a fence, 2548 * therefore we must wait for any outstanding access to complete 2549 * before clearing the fence. 2550 */ 2551 if (!IS_I965G(dev)) { 2552 int ret; 2553 2554 i915_gem_object_flush_gpu_write_domain(obj); 2555 i915_gem_object_flush_gtt_write_domain(obj); 2556 ret = i915_gem_object_wait_rendering(obj); 2557 if (ret != 0) 2558 return ret; 2559 } 2560 2561 i915_gem_clear_fence_reg (obj); 2562 2563 return 0; 2564 } 2565 2566 /** 2567 * Finds free space in the GTT aperture and binds the object there. 2568 */ 2569 static int 2570 i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) 2571 { 2572 struct drm_device *dev = obj->dev; 2573 drm_i915_private_t *dev_priv = dev->dev_private; 2574 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2575 struct drm_mm_node *free_space; 2576 gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN; 2577 int ret; 2578 2579 if (obj_priv->madv != I915_MADV_WILLNEED) { 2580 DRM_ERROR("Attempting to bind a purgeable object\n"); 2581 return -EINVAL; 2582 } 2583 2584 if (alignment == 0) 2585 alignment = i915_gem_get_gtt_alignment(obj); 2586 if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) { 2587 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 2588 return -EINVAL; 2589 } 2590 2591 search_free: 2592 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space, 2593 obj->size, alignment, 0); 2594 if (free_space != NULL) { 2595 obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size, 2596 alignment); 2597 if (obj_priv->gtt_space != NULL) { 2598 obj_priv->gtt_space->private = obj; 2599 obj_priv->gtt_offset = obj_priv->gtt_space->start; 2600 } 2601 } 2602 if (obj_priv->gtt_space == NULL) { 2603 /* If the gtt is empty and we're still having trouble 2604 * fitting our object in, we're out of memory. 2605 */ 2606 #if WATCH_LRU 2607 DRM_INFO("%s: GTT full, evicting something\n", __func__); 2608 #endif 2609 ret = i915_gem_evict_something(dev, obj->size); 2610 if (ret) 2611 return ret; 2612 2613 goto search_free; 2614 } 2615 2616 #if WATCH_BUF 2617 DRM_INFO("Binding object of size %zd at 0x%08x\n", 2618 obj->size, obj_priv->gtt_offset); 2619 #endif 2620 ret = i915_gem_object_get_pages(obj, gfpmask); 2621 if (ret) { 2622 drm_mm_put_block(obj_priv->gtt_space); 2623 obj_priv->gtt_space = NULL; 2624 2625 if (ret == -ENOMEM) { 2626 /* first try to clear up some space from the GTT */ 2627 ret = i915_gem_evict_something(dev, obj->size); 2628 if (ret) { 2629 /* now try to shrink everyone else */ 2630 if (gfpmask) { 2631 gfpmask = 0; 2632 goto search_free; 2633 } 2634 2635 return ret; 2636 } 2637 2638 goto search_free; 2639 } 2640 2641 return ret; 2642 } 2643 2644 /* Create an AGP memory structure pointing at our pages, and bind it 2645 * into the GTT. 2646 */ 2647 obj_priv->agp_mem = drm_agp_bind_pages(dev, 2648 obj_priv->pages, 2649 obj->size >> PAGE_SHIFT, 2650 obj_priv->gtt_offset, 2651 obj_priv->agp_type); 2652 if (obj_priv->agp_mem == NULL) { 2653 i915_gem_object_put_pages(obj); 2654 drm_mm_put_block(obj_priv->gtt_space); 2655 obj_priv->gtt_space = NULL; 2656 2657 ret = i915_gem_evict_something(dev, obj->size); 2658 if (ret) 2659 return ret; 2660 2661 goto search_free; 2662 } 2663 atomic_inc(&dev->gtt_count); 2664 atomic_add(obj->size, &dev->gtt_memory); 2665 2666 /* Assert that the object is not currently in any GPU domain. As it 2667 * wasn't in the GTT, there shouldn't be any way it could have been in 2668 * a GPU cache 2669 */ 2670 BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); 2671 BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); 2672 2673 trace_i915_gem_object_bind(obj, obj_priv->gtt_offset); 2674 2675 return 0; 2676 } 2677 2678 void 2679 i915_gem_clflush_object(struct drm_gem_object *obj) 2680 { 2681 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2682 2683 /* If we don't have a page list set up, then we're not pinned 2684 * to GPU, and we can ignore the cache flush because it'll happen 2685 * again at bind time. 2686 */ 2687 if (obj_priv->pages == NULL) 2688 return; 2689 2690 trace_i915_gem_object_clflush(obj); 2691 2692 drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE); 2693 } 2694 2695 /** Flushes any GPU write domain for the object if it's dirty. */ 2696 static void 2697 i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj) 2698 { 2699 struct drm_device *dev = obj->dev; 2700 uint32_t seqno; 2701 uint32_t old_write_domain; 2702 2703 if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) 2704 return; 2705 2706 /* Queue the GPU write cache flushing we need. */ 2707 old_write_domain = obj->write_domain; 2708 i915_gem_flush(dev, 0, obj->write_domain); 2709 seqno = i915_add_request(dev, NULL, obj->write_domain); 2710 BUG_ON(obj->write_domain); 2711 i915_gem_object_move_to_active(obj, seqno); 2712 2713 trace_i915_gem_object_change_domain(obj, 2714 obj->read_domains, 2715 old_write_domain); 2716 } 2717 2718 /** Flushes the GTT write domain for the object if it's dirty. */ 2719 static void 2720 i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj) 2721 { 2722 uint32_t old_write_domain; 2723 2724 if (obj->write_domain != I915_GEM_DOMAIN_GTT) 2725 return; 2726 2727 /* No actual flushing is required for the GTT write domain. Writes 2728 * to it immediately go to main memory as far as we know, so there's 2729 * no chipset flush. It also doesn't land in render cache. 2730 */ 2731 old_write_domain = obj->write_domain; 2732 obj->write_domain = 0; 2733 2734 trace_i915_gem_object_change_domain(obj, 2735 obj->read_domains, 2736 old_write_domain); 2737 } 2738 2739 /** Flushes the CPU write domain for the object if it's dirty. */ 2740 static void 2741 i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj) 2742 { 2743 struct drm_device *dev = obj->dev; 2744 uint32_t old_write_domain; 2745 2746 if (obj->write_domain != I915_GEM_DOMAIN_CPU) 2747 return; 2748 2749 i915_gem_clflush_object(obj); 2750 drm_agp_chipset_flush(dev); 2751 old_write_domain = obj->write_domain; 2752 obj->write_domain = 0; 2753 2754 trace_i915_gem_object_change_domain(obj, 2755 obj->read_domains, 2756 old_write_domain); 2757 } 2758 2759 void 2760 i915_gem_object_flush_write_domain(struct drm_gem_object *obj) 2761 { 2762 switch (obj->write_domain) { 2763 case I915_GEM_DOMAIN_GTT: 2764 i915_gem_object_flush_gtt_write_domain(obj); 2765 break; 2766 case I915_GEM_DOMAIN_CPU: 2767 i915_gem_object_flush_cpu_write_domain(obj); 2768 break; 2769 default: 2770 i915_gem_object_flush_gpu_write_domain(obj); 2771 break; 2772 } 2773 } 2774 2775 /** 2776 * Moves a single object to the GTT read, and possibly write domain. 2777 * 2778 * This function returns when the move is complete, including waiting on 2779 * flushes to occur. 2780 */ 2781 int 2782 i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) 2783 { 2784 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2785 uint32_t old_write_domain, old_read_domains; 2786 int ret; 2787 2788 /* Not valid to be called on unbound objects. */ 2789 if (obj_priv->gtt_space == NULL) 2790 return -EINVAL; 2791 2792 i915_gem_object_flush_gpu_write_domain(obj); 2793 /* Wait on any GPU rendering and flushing to occur. */ 2794 ret = i915_gem_object_wait_rendering(obj); 2795 if (ret != 0) 2796 return ret; 2797 2798 old_write_domain = obj->write_domain; 2799 old_read_domains = obj->read_domains; 2800 2801 /* If we're writing through the GTT domain, then CPU and GPU caches 2802 * will need to be invalidated at next use. 2803 */ 2804 if (write) 2805 obj->read_domains &= I915_GEM_DOMAIN_GTT; 2806 2807 i915_gem_object_flush_cpu_write_domain(obj); 2808 2809 /* It should now be out of any other write domains, and we can update 2810 * the domain values for our changes. 2811 */ 2812 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 2813 obj->read_domains |= I915_GEM_DOMAIN_GTT; 2814 if (write) { 2815 obj->write_domain = I915_GEM_DOMAIN_GTT; 2816 obj_priv->dirty = 1; 2817 } 2818 2819 trace_i915_gem_object_change_domain(obj, 2820 old_read_domains, 2821 old_write_domain); 2822 2823 return 0; 2824 } 2825 2826 /* 2827 * Prepare buffer for display plane. Use uninterruptible for possible flush 2828 * wait, as in modesetting process we're not supposed to be interrupted. 2829 */ 2830 int 2831 i915_gem_object_set_to_display_plane(struct drm_gem_object *obj) 2832 { 2833 struct drm_device *dev = obj->dev; 2834 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2835 uint32_t old_write_domain, old_read_domains; 2836 int ret; 2837 2838 /* Not valid to be called on unbound objects. */ 2839 if (obj_priv->gtt_space == NULL) 2840 return -EINVAL; 2841 2842 i915_gem_object_flush_gpu_write_domain(obj); 2843 2844 /* Wait on any GPU rendering and flushing to occur. */ 2845 if (obj_priv->active) { 2846 #if WATCH_BUF 2847 DRM_INFO("%s: object %p wait for seqno %08x\n", 2848 __func__, obj, obj_priv->last_rendering_seqno); 2849 #endif 2850 ret = i915_do_wait_request(dev, obj_priv->last_rendering_seqno, 0); 2851 if (ret != 0) 2852 return ret; 2853 } 2854 2855 old_write_domain = obj->write_domain; 2856 old_read_domains = obj->read_domains; 2857 2858 obj->read_domains &= I915_GEM_DOMAIN_GTT; 2859 2860 i915_gem_object_flush_cpu_write_domain(obj); 2861 2862 /* It should now be out of any other write domains, and we can update 2863 * the domain values for our changes. 2864 */ 2865 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 2866 obj->read_domains |= I915_GEM_DOMAIN_GTT; 2867 obj->write_domain = I915_GEM_DOMAIN_GTT; 2868 obj_priv->dirty = 1; 2869 2870 trace_i915_gem_object_change_domain(obj, 2871 old_read_domains, 2872 old_write_domain); 2873 2874 return 0; 2875 } 2876 2877 /** 2878 * Moves a single object to the CPU read, and possibly write domain. 2879 * 2880 * This function returns when the move is complete, including waiting on 2881 * flushes to occur. 2882 */ 2883 static int 2884 i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) 2885 { 2886 uint32_t old_write_domain, old_read_domains; 2887 int ret; 2888 2889 i915_gem_object_flush_gpu_write_domain(obj); 2890 /* Wait on any GPU rendering and flushing to occur. */ 2891 ret = i915_gem_object_wait_rendering(obj); 2892 if (ret != 0) 2893 return ret; 2894 2895 i915_gem_object_flush_gtt_write_domain(obj); 2896 2897 /* If we have a partially-valid cache of the object in the CPU, 2898 * finish invalidating it and free the per-page flags. 2899 */ 2900 i915_gem_object_set_to_full_cpu_read_domain(obj); 2901 2902 old_write_domain = obj->write_domain; 2903 old_read_domains = obj->read_domains; 2904 2905 /* Flush the CPU cache if it's still invalid. */ 2906 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 2907 i915_gem_clflush_object(obj); 2908 2909 obj->read_domains |= I915_GEM_DOMAIN_CPU; 2910 } 2911 2912 /* It should now be out of any other write domains, and we can update 2913 * the domain values for our changes. 2914 */ 2915 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 2916 2917 /* If we're writing through the CPU, then the GPU read domains will 2918 * need to be invalidated at next use. 2919 */ 2920 if (write) { 2921 obj->read_domains &= I915_GEM_DOMAIN_CPU; 2922 obj->write_domain = I915_GEM_DOMAIN_CPU; 2923 } 2924 2925 trace_i915_gem_object_change_domain(obj, 2926 old_read_domains, 2927 old_write_domain); 2928 2929 return 0; 2930 } 2931 2932 /* 2933 * Set the next domain for the specified object. This 2934 * may not actually perform the necessary flushing/invaliding though, 2935 * as that may want to be batched with other set_domain operations 2936 * 2937 * This is (we hope) the only really tricky part of gem. The goal 2938 * is fairly simple -- track which caches hold bits of the object 2939 * and make sure they remain coherent. A few concrete examples may 2940 * help to explain how it works. For shorthand, we use the notation 2941 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the 2942 * a pair of read and write domain masks. 2943 * 2944 * Case 1: the batch buffer 2945 * 2946 * 1. Allocated 2947 * 2. Written by CPU 2948 * 3. Mapped to GTT 2949 * 4. Read by GPU 2950 * 5. Unmapped from GTT 2951 * 6. Freed 2952 * 2953 * Let's take these a step at a time 2954 * 2955 * 1. Allocated 2956 * Pages allocated from the kernel may still have 2957 * cache contents, so we set them to (CPU, CPU) always. 2958 * 2. Written by CPU (using pwrite) 2959 * The pwrite function calls set_domain (CPU, CPU) and 2960 * this function does nothing (as nothing changes) 2961 * 3. Mapped by GTT 2962 * This function asserts that the object is not 2963 * currently in any GPU-based read or write domains 2964 * 4. Read by GPU 2965 * i915_gem_execbuffer calls set_domain (COMMAND, 0). 2966 * As write_domain is zero, this function adds in the 2967 * current read domains (CPU+COMMAND, 0). 2968 * flush_domains is set to CPU. 2969 * invalidate_domains is set to COMMAND 2970 * clflush is run to get data out of the CPU caches 2971 * then i915_dev_set_domain calls i915_gem_flush to 2972 * emit an MI_FLUSH and drm_agp_chipset_flush 2973 * 5. Unmapped from GTT 2974 * i915_gem_object_unbind calls set_domain (CPU, CPU) 2975 * flush_domains and invalidate_domains end up both zero 2976 * so no flushing/invalidating happens 2977 * 6. Freed 2978 * yay, done 2979 * 2980 * Case 2: The shared render buffer 2981 * 2982 * 1. Allocated 2983 * 2. Mapped to GTT 2984 * 3. Read/written by GPU 2985 * 4. set_domain to (CPU,CPU) 2986 * 5. Read/written by CPU 2987 * 6. Read/written by GPU 2988 * 2989 * 1. Allocated 2990 * Same as last example, (CPU, CPU) 2991 * 2. Mapped to GTT 2992 * Nothing changes (assertions find that it is not in the GPU) 2993 * 3. Read/written by GPU 2994 * execbuffer calls set_domain (RENDER, RENDER) 2995 * flush_domains gets CPU 2996 * invalidate_domains gets GPU 2997 * clflush (obj) 2998 * MI_FLUSH and drm_agp_chipset_flush 2999 * 4. set_domain (CPU, CPU) 3000 * flush_domains gets GPU 3001 * invalidate_domains gets CPU 3002 * wait_rendering (obj) to make sure all drawing is complete. 3003 * This will include an MI_FLUSH to get the data from GPU 3004 * to memory 3005 * clflush (obj) to invalidate the CPU cache 3006 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) 3007 * 5. Read/written by CPU 3008 * cache lines are loaded and dirtied 3009 * 6. Read written by GPU 3010 * Same as last GPU access 3011 * 3012 * Case 3: The constant buffer 3013 * 3014 * 1. Allocated 3015 * 2. Written by CPU 3016 * 3. Read by GPU 3017 * 4. Updated (written) by CPU again 3018 * 5. Read by GPU 3019 * 3020 * 1. Allocated 3021 * (CPU, CPU) 3022 * 2. Written by CPU 3023 * (CPU, CPU) 3024 * 3. Read by GPU 3025 * (CPU+RENDER, 0) 3026 * flush_domains = CPU 3027 * invalidate_domains = RENDER 3028 * clflush (obj) 3029 * MI_FLUSH 3030 * drm_agp_chipset_flush 3031 * 4. Updated (written) by CPU again 3032 * (CPU, CPU) 3033 * flush_domains = 0 (no previous write domain) 3034 * invalidate_domains = 0 (no new read domains) 3035 * 5. Read by GPU 3036 * (CPU+RENDER, 0) 3037 * flush_domains = CPU 3038 * invalidate_domains = RENDER 3039 * clflush (obj) 3040 * MI_FLUSH 3041 * drm_agp_chipset_flush 3042 */ 3043 static void 3044 i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj) 3045 { 3046 struct drm_device *dev = obj->dev; 3047 struct drm_i915_gem_object *obj_priv = obj->driver_private; 3048 uint32_t invalidate_domains = 0; 3049 uint32_t flush_domains = 0; 3050 uint32_t old_read_domains; 3051 3052 BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU); 3053 BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU); 3054 3055 intel_mark_busy(dev, obj); 3056 3057 #if WATCH_BUF 3058 DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n", 3059 __func__, obj, 3060 obj->read_domains, obj->pending_read_domains, 3061 obj->write_domain, obj->pending_write_domain); 3062 #endif 3063 /* 3064 * If the object isn't moving to a new write domain, 3065 * let the object stay in multiple read domains 3066 */ 3067 if (obj->pending_write_domain == 0) 3068 obj->pending_read_domains |= obj->read_domains; 3069 else 3070 obj_priv->dirty = 1; 3071 3072 /* 3073 * Flush the current write domain if 3074 * the new read domains don't match. Invalidate 3075 * any read domains which differ from the old 3076 * write domain 3077 */ 3078 if (obj->write_domain && 3079 obj->write_domain != obj->pending_read_domains) { 3080 flush_domains |= obj->write_domain; 3081 invalidate_domains |= 3082 obj->pending_read_domains & ~obj->write_domain; 3083 } 3084 /* 3085 * Invalidate any read caches which may have 3086 * stale data. That is, any new read domains. 3087 */ 3088 invalidate_domains |= obj->pending_read_domains & ~obj->read_domains; 3089 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) { 3090 #if WATCH_BUF 3091 DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n", 3092 __func__, flush_domains, invalidate_domains); 3093 #endif 3094 i915_gem_clflush_object(obj); 3095 } 3096 3097 old_read_domains = obj->read_domains; 3098 3099 /* The actual obj->write_domain will be updated with 3100 * pending_write_domain after we emit the accumulated flush for all 3101 * of our domain changes in execbuffers (which clears objects' 3102 * write_domains). So if we have a current write domain that we 3103 * aren't changing, set pending_write_domain to that. 3104 */ 3105 if (flush_domains == 0 && obj->pending_write_domain == 0) 3106 obj->pending_write_domain = obj->write_domain; 3107 obj->read_domains = obj->pending_read_domains; 3108 3109 dev->invalidate_domains |= invalidate_domains; 3110 dev->flush_domains |= flush_domains; 3111 #if WATCH_BUF 3112 DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n", 3113 __func__, 3114 obj->read_domains, obj->write_domain, 3115 dev->invalidate_domains, dev->flush_domains); 3116 #endif 3117 3118 trace_i915_gem_object_change_domain(obj, 3119 old_read_domains, 3120 obj->write_domain); 3121 } 3122 3123 /** 3124 * Moves the object from a partially CPU read to a full one. 3125 * 3126 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(), 3127 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU). 3128 */ 3129 static void 3130 i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj) 3131 { 3132 struct drm_i915_gem_object *obj_priv = obj->driver_private; 3133 3134 if (!obj_priv->page_cpu_valid) 3135 return; 3136 3137 /* If we're partially in the CPU read domain, finish moving it in. 3138 */ 3139 if (obj->read_domains & I915_GEM_DOMAIN_CPU) { 3140 int i; 3141 3142 for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) { 3143 if (obj_priv->page_cpu_valid[i]) 3144 continue; 3145 drm_clflush_pages(obj_priv->pages + i, 1); 3146 } 3147 } 3148 3149 /* Free the page_cpu_valid mappings which are now stale, whether 3150 * or not we've got I915_GEM_DOMAIN_CPU. 3151 */ 3152 kfree(obj_priv->page_cpu_valid); 3153 obj_priv->page_cpu_valid = NULL; 3154 } 3155 3156 /** 3157 * Set the CPU read domain on a range of the object. 3158 * 3159 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's 3160 * not entirely valid. The page_cpu_valid member of the object flags which 3161 * pages have been flushed, and will be respected by 3162 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping 3163 * of the whole object. 3164 * 3165 * This function returns when the move is complete, including waiting on 3166 * flushes to occur. 3167 */ 3168 static int 3169 i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, 3170 uint64_t offset, uint64_t size) 3171 { 3172 struct drm_i915_gem_object *obj_priv = obj->driver_private; 3173 uint32_t old_read_domains; 3174 int i, ret; 3175 3176 if (offset == 0 && size == obj->size) 3177 return i915_gem_object_set_to_cpu_domain(obj, 0); 3178 3179 i915_gem_object_flush_gpu_write_domain(obj); 3180 /* Wait on any GPU rendering and flushing to occur. */ 3181 ret = i915_gem_object_wait_rendering(obj); 3182 if (ret != 0) 3183 return ret; 3184 i915_gem_object_flush_gtt_write_domain(obj); 3185 3186 /* If we're already fully in the CPU read domain, we're done. */ 3187 if (obj_priv->page_cpu_valid == NULL && 3188 (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0) 3189 return 0; 3190 3191 /* Otherwise, create/clear the per-page CPU read domain flag if we're 3192 * newly adding I915_GEM_DOMAIN_CPU 3193 */ 3194 if (obj_priv->page_cpu_valid == NULL) { 3195 obj_priv->page_cpu_valid = kzalloc(obj->size / PAGE_SIZE, 3196 GFP_KERNEL); 3197 if (obj_priv->page_cpu_valid == NULL) 3198 return -ENOMEM; 3199 } else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) 3200 memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE); 3201 3202 /* Flush the cache on any pages that are still invalid from the CPU's 3203 * perspective. 3204 */ 3205 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; 3206 i++) { 3207 if (obj_priv->page_cpu_valid[i]) 3208 continue; 3209 3210 drm_clflush_pages(obj_priv->pages + i, 1); 3211 3212 obj_priv->page_cpu_valid[i] = 1; 3213 } 3214 3215 /* It should now be out of any other write domains, and we can update 3216 * the domain values for our changes. 3217 */ 3218 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3219 3220 old_read_domains = obj->read_domains; 3221 obj->read_domains |= I915_GEM_DOMAIN_CPU; 3222 3223 trace_i915_gem_object_change_domain(obj, 3224 old_read_domains, 3225 obj->write_domain); 3226 3227 return 0; 3228 } 3229 3230 /** 3231 * Pin an object to the GTT and evaluate the relocations landing in it. 3232 */ 3233 static int 3234 i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, 3235 struct drm_file *file_priv, 3236 struct drm_i915_gem_exec_object2 *entry, 3237 struct drm_i915_gem_relocation_entry *relocs) 3238 { 3239 struct drm_device *dev = obj->dev; 3240 drm_i915_private_t *dev_priv = dev->dev_private; 3241 struct drm_i915_gem_object *obj_priv = obj->driver_private; 3242 int i, ret; 3243 void __iomem *reloc_page; 3244 bool need_fence; 3245 3246 need_fence = entry->flags & EXEC_OBJECT_NEEDS_FENCE && 3247 obj_priv->tiling_mode != I915_TILING_NONE; 3248 3249 /* Check fence reg constraints and rebind if necessary */ 3250 if (need_fence && !i915_obj_fenceable(dev, obj)) 3251 i915_gem_object_unbind(obj); 3252 3253 /* Choose the GTT offset for our buffer and put it there. */ 3254 ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment); 3255 if (ret) 3256 return ret; 3257 3258 /* 3259 * Pre-965 chips need a fence register set up in order to 3260 * properly handle blits to/from tiled surfaces. 3261 */ 3262 if (need_fence) { 3263 ret = i915_gem_object_get_fence_reg(obj); 3264 if (ret != 0) { 3265 if (ret != -EBUSY && ret != -ERESTARTSYS) 3266 DRM_ERROR("Failure to install fence: %d\n", 3267 ret); 3268 i915_gem_object_unpin(obj); 3269 return ret; 3270 } 3271 } 3272 3273 entry->offset = obj_priv->gtt_offset; 3274 3275 /* Apply the relocations, using the GTT aperture to avoid cache 3276 * flushing requirements. 3277 */ 3278 for (i = 0; i < entry->relocation_count; i++) { 3279 struct drm_i915_gem_relocation_entry *reloc= &relocs[i]; 3280 struct drm_gem_object *target_obj; 3281 struct drm_i915_gem_object *target_obj_priv; 3282 uint32_t reloc_val, reloc_offset; 3283 uint32_t __iomem *reloc_entry; 3284 3285 target_obj = drm_gem_object_lookup(obj->dev, file_priv, 3286 reloc->target_handle); 3287 if (target_obj == NULL) { 3288 i915_gem_object_unpin(obj); 3289 return -EBADF; 3290 } 3291 target_obj_priv = target_obj->driver_private; 3292 3293 #if WATCH_RELOC 3294 DRM_INFO("%s: obj %p offset %08x target %d " 3295 "read %08x write %08x gtt %08x " 3296 "presumed %08x delta %08x\n", 3297 __func__, 3298 obj, 3299 (int) reloc->offset, 3300 (int) reloc->target_handle, 3301 (int) reloc->read_domains, 3302 (int) reloc->write_domain, 3303 (int) target_obj_priv->gtt_offset, 3304 (int) reloc->presumed_offset, 3305 reloc->delta); 3306 #endif 3307 3308 /* The target buffer should have appeared before us in the 3309 * exec_object list, so it should have a GTT space bound by now. 3310 */ 3311 if (target_obj_priv->gtt_space == NULL) { 3312 DRM_ERROR("No GTT space found for object %d\n", 3313 reloc->target_handle); 3314 drm_gem_object_unreference(target_obj); 3315 i915_gem_object_unpin(obj); 3316 return -EINVAL; 3317 } 3318 3319 /* Validate that the target is in a valid r/w GPU domain */ 3320 if (reloc->write_domain & I915_GEM_DOMAIN_CPU || 3321 reloc->read_domains & I915_GEM_DOMAIN_CPU) { 3322 DRM_ERROR("reloc with read/write CPU domains: " 3323 "obj %p target %d offset %d " 3324 "read %08x write %08x", 3325 obj, reloc->target_handle, 3326 (int) reloc->offset, 3327 reloc->read_domains, 3328 reloc->write_domain); 3329 drm_gem_object_unreference(target_obj); 3330 i915_gem_object_unpin(obj); 3331 return -EINVAL; 3332 } 3333 if (reloc->write_domain && target_obj->pending_write_domain && 3334 reloc->write_domain != target_obj->pending_write_domain) { 3335 DRM_ERROR("Write domain conflict: " 3336 "obj %p target %d offset %d " 3337 "new %08x old %08x\n", 3338 obj, reloc->target_handle, 3339 (int) reloc->offset, 3340 reloc->write_domain, 3341 target_obj->pending_write_domain); 3342 drm_gem_object_unreference(target_obj); 3343 i915_gem_object_unpin(obj); 3344 return -EINVAL; 3345 } 3346 3347 target_obj->pending_read_domains |= reloc->read_domains; 3348 target_obj->pending_write_domain |= reloc->write_domain; 3349 3350 /* If the relocation already has the right value in it, no 3351 * more work needs to be done. 3352 */ 3353 if (target_obj_priv->gtt_offset == reloc->presumed_offset) { 3354 drm_gem_object_unreference(target_obj); 3355 continue; 3356 } 3357 3358 /* Check that the relocation address is valid... */ 3359 if (reloc->offset > obj->size - 4) { 3360 DRM_ERROR("Relocation beyond object bounds: " 3361 "obj %p target %d offset %d size %d.\n", 3362 obj, reloc->target_handle, 3363 (int) reloc->offset, (int) obj->size); 3364 drm_gem_object_unreference(target_obj); 3365 i915_gem_object_unpin(obj); 3366 return -EINVAL; 3367 } 3368 if (reloc->offset & 3) { 3369 DRM_ERROR("Relocation not 4-byte aligned: " 3370 "obj %p target %d offset %d.\n", 3371 obj, reloc->target_handle, 3372 (int) reloc->offset); 3373 drm_gem_object_unreference(target_obj); 3374 i915_gem_object_unpin(obj); 3375 return -EINVAL; 3376 } 3377 3378 /* and points to somewhere within the target object. */ 3379 if (reloc->delta >= target_obj->size) { 3380 DRM_ERROR("Relocation beyond target object bounds: " 3381 "obj %p target %d delta %d size %d.\n", 3382 obj, reloc->target_handle, 3383 (int) reloc->delta, (int) target_obj->size); 3384 drm_gem_object_unreference(target_obj); 3385 i915_gem_object_unpin(obj); 3386 return -EINVAL; 3387 } 3388 3389 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 3390 if (ret != 0) { 3391 drm_gem_object_unreference(target_obj); 3392 i915_gem_object_unpin(obj); 3393 return -EINVAL; 3394 } 3395 3396 /* Map the page containing the relocation we're going to 3397 * perform. 3398 */ 3399 reloc_offset = obj_priv->gtt_offset + reloc->offset; 3400 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, 3401 (reloc_offset & 3402 ~(PAGE_SIZE - 1))); 3403 reloc_entry = (uint32_t __iomem *)(reloc_page + 3404 (reloc_offset & (PAGE_SIZE - 1))); 3405 reloc_val = target_obj_priv->gtt_offset + reloc->delta; 3406 3407 #if WATCH_BUF 3408 DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n", 3409 obj, (unsigned int) reloc->offset, 3410 readl(reloc_entry), reloc_val); 3411 #endif 3412 writel(reloc_val, reloc_entry); 3413 io_mapping_unmap_atomic(reloc_page); 3414 3415 /* The updated presumed offset for this entry will be 3416 * copied back out to the user. 3417 */ 3418 reloc->presumed_offset = target_obj_priv->gtt_offset; 3419 3420 drm_gem_object_unreference(target_obj); 3421 } 3422 3423 #if WATCH_BUF 3424 if (0) 3425 i915_gem_dump_object(obj, 128, __func__, ~0); 3426 #endif 3427 return 0; 3428 } 3429 3430 /** Dispatch a batchbuffer to the ring 3431 */ 3432 static int 3433 i915_dispatch_gem_execbuffer(struct drm_device *dev, 3434 struct drm_i915_gem_execbuffer2 *exec, 3435 struct drm_clip_rect *cliprects, 3436 uint64_t exec_offset) 3437 { 3438 drm_i915_private_t *dev_priv = dev->dev_private; 3439 int nbox = exec->num_cliprects; 3440 int i = 0, count; 3441 uint32_t exec_start, exec_len; 3442 RING_LOCALS; 3443 3444 exec_start = (uint32_t) exec_offset + exec->batch_start_offset; 3445 exec_len = (uint32_t) exec->batch_len; 3446 3447 trace_i915_gem_request_submit(dev, dev_priv->mm.next_gem_seqno + 1); 3448 3449 count = nbox ? nbox : 1; 3450 3451 for (i = 0; i < count; i++) { 3452 if (i < nbox) { 3453 int ret = i915_emit_box(dev, cliprects, i, 3454 exec->DR1, exec->DR4); 3455 if (ret) 3456 return ret; 3457 } 3458 3459 if (IS_I830(dev) || IS_845G(dev)) { 3460 BEGIN_LP_RING(4); 3461 OUT_RING(MI_BATCH_BUFFER); 3462 OUT_RING(exec_start | MI_BATCH_NON_SECURE); 3463 OUT_RING(exec_start + exec_len - 4); 3464 OUT_RING(0); 3465 ADVANCE_LP_RING(); 3466 } else { 3467 BEGIN_LP_RING(2); 3468 if (IS_I965G(dev)) { 3469 OUT_RING(MI_BATCH_BUFFER_START | 3470 (2 << 6) | 3471 MI_BATCH_NON_SECURE_I965); 3472 OUT_RING(exec_start); 3473 } else { 3474 OUT_RING(MI_BATCH_BUFFER_START | 3475 (2 << 6)); 3476 OUT_RING(exec_start | MI_BATCH_NON_SECURE); 3477 } 3478 ADVANCE_LP_RING(); 3479 } 3480 } 3481 3482 /* XXX breadcrumb */ 3483 return 0; 3484 } 3485 3486 /* Throttle our rendering by waiting until the ring has completed our requests 3487 * emitted over 20 msec ago. 3488 * 3489 * Note that if we were to use the current jiffies each time around the loop, 3490 * we wouldn't escape the function with any frames outstanding if the time to 3491 * render a frame was over 20ms. 3492 * 3493 * This should get us reasonable parallelism between CPU and GPU but also 3494 * relatively low latency when blocking on a particular request to finish. 3495 */ 3496 static int 3497 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv) 3498 { 3499 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; 3500 int ret = 0; 3501 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 3502 3503 mutex_lock(&dev->struct_mutex); 3504 while (!list_empty(&i915_file_priv->mm.request_list)) { 3505 struct drm_i915_gem_request *request; 3506 3507 request = list_first_entry(&i915_file_priv->mm.request_list, 3508 struct drm_i915_gem_request, 3509 client_list); 3510 3511 if (time_after_eq(request->emitted_jiffies, recent_enough)) 3512 break; 3513 3514 ret = i915_wait_request(dev, request->seqno); 3515 if (ret != 0) 3516 break; 3517 } 3518 mutex_unlock(&dev->struct_mutex); 3519 3520 return ret; 3521 } 3522 3523 static int 3524 i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object2 *exec_list, 3525 uint32_t buffer_count, 3526 struct drm_i915_gem_relocation_entry **relocs) 3527 { 3528 uint32_t reloc_count = 0, reloc_index = 0, i; 3529 int ret; 3530 3531 *relocs = NULL; 3532 for (i = 0; i < buffer_count; i++) { 3533 if (reloc_count + exec_list[i].relocation_count < reloc_count) 3534 return -EINVAL; 3535 reloc_count += exec_list[i].relocation_count; 3536 } 3537 3538 *relocs = drm_calloc_large(reloc_count, sizeof(**relocs)); 3539 if (*relocs == NULL) { 3540 DRM_ERROR("failed to alloc relocs, count %d\n", reloc_count); 3541 return -ENOMEM; 3542 } 3543 3544 for (i = 0; i < buffer_count; i++) { 3545 struct drm_i915_gem_relocation_entry __user *user_relocs; 3546 3547 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr; 3548 3549 ret = copy_from_user(&(*relocs)[reloc_index], 3550 user_relocs, 3551 exec_list[i].relocation_count * 3552 sizeof(**relocs)); 3553 if (ret != 0) { 3554 drm_free_large(*relocs); 3555 *relocs = NULL; 3556 return -EFAULT; 3557 } 3558 3559 reloc_index += exec_list[i].relocation_count; 3560 } 3561 3562 return 0; 3563 } 3564 3565 static int 3566 i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object2 *exec_list, 3567 uint32_t buffer_count, 3568 struct drm_i915_gem_relocation_entry *relocs) 3569 { 3570 uint32_t reloc_count = 0, i; 3571 int ret = 0; 3572 3573 if (relocs == NULL) 3574 return 0; 3575 3576 for (i = 0; i < buffer_count; i++) { 3577 struct drm_i915_gem_relocation_entry __user *user_relocs; 3578 int unwritten; 3579 3580 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr; 3581 3582 unwritten = copy_to_user(user_relocs, 3583 &relocs[reloc_count], 3584 exec_list[i].relocation_count * 3585 sizeof(*relocs)); 3586 3587 if (unwritten) { 3588 ret = -EFAULT; 3589 goto err; 3590 } 3591 3592 reloc_count += exec_list[i].relocation_count; 3593 } 3594 3595 err: 3596 drm_free_large(relocs); 3597 3598 return ret; 3599 } 3600 3601 static int 3602 i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer2 *exec, 3603 uint64_t exec_offset) 3604 { 3605 uint32_t exec_start, exec_len; 3606 3607 exec_start = (uint32_t) exec_offset + exec->batch_start_offset; 3608 exec_len = (uint32_t) exec->batch_len; 3609 3610 if ((exec_start | exec_len) & 0x7) 3611 return -EINVAL; 3612 3613 if (!exec_start) 3614 return -EINVAL; 3615 3616 return 0; 3617 } 3618 3619 static int 3620 i915_gem_wait_for_pending_flip(struct drm_device *dev, 3621 struct drm_gem_object **object_list, 3622 int count) 3623 { 3624 drm_i915_private_t *dev_priv = dev->dev_private; 3625 struct drm_i915_gem_object *obj_priv; 3626 DEFINE_WAIT(wait); 3627 int i, ret = 0; 3628 3629 for (;;) { 3630 prepare_to_wait(&dev_priv->pending_flip_queue, 3631 &wait, TASK_INTERRUPTIBLE); 3632 for (i = 0; i < count; i++) { 3633 obj_priv = object_list[i]->driver_private; 3634 if (atomic_read(&obj_priv->pending_flip) > 0) 3635 break; 3636 } 3637 if (i == count) 3638 break; 3639 3640 if (!signal_pending(current)) { 3641 mutex_unlock(&dev->struct_mutex); 3642 schedule(); 3643 mutex_lock(&dev->struct_mutex); 3644 continue; 3645 } 3646 ret = -ERESTARTSYS; 3647 break; 3648 } 3649 finish_wait(&dev_priv->pending_flip_queue, &wait); 3650 3651 return ret; 3652 } 3653 3654 int 3655 i915_gem_do_execbuffer(struct drm_device *dev, void *data, 3656 struct drm_file *file_priv, 3657 struct drm_i915_gem_execbuffer2 *args, 3658 struct drm_i915_gem_exec_object2 *exec_list) 3659 { 3660 drm_i915_private_t *dev_priv = dev->dev_private; 3661 struct drm_gem_object **object_list = NULL; 3662 struct drm_gem_object *batch_obj; 3663 struct drm_i915_gem_object *obj_priv; 3664 struct drm_clip_rect *cliprects = NULL; 3665 struct drm_i915_gem_relocation_entry *relocs = NULL; 3666 int ret = 0, ret2, i, pinned = 0; 3667 uint64_t exec_offset; 3668 uint32_t seqno, flush_domains, reloc_index; 3669 int pin_tries, flips; 3670 3671 #if WATCH_EXEC 3672 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", 3673 (int) args->buffers_ptr, args->buffer_count, args->batch_len); 3674 #endif 3675 3676 if (args->buffer_count < 1) { 3677 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count); 3678 return -EINVAL; 3679 } 3680 object_list = drm_malloc_ab(sizeof(*object_list), args->buffer_count); 3681 if (object_list == NULL) { 3682 DRM_ERROR("Failed to allocate object list for %d buffers\n", 3683 args->buffer_count); 3684 ret = -ENOMEM; 3685 goto pre_mutex_err; 3686 } 3687 3688 if (args->num_cliprects != 0) { 3689 cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects), 3690 GFP_KERNEL); 3691 if (cliprects == NULL) { 3692 ret = -ENOMEM; 3693 goto pre_mutex_err; 3694 } 3695 3696 ret = copy_from_user(cliprects, 3697 (struct drm_clip_rect __user *) 3698 (uintptr_t) args->cliprects_ptr, 3699 sizeof(*cliprects) * args->num_cliprects); 3700 if (ret != 0) { 3701 DRM_ERROR("copy %d cliprects failed: %d\n", 3702 args->num_cliprects, ret); 3703 goto pre_mutex_err; 3704 } 3705 } 3706 3707 ret = i915_gem_get_relocs_from_user(exec_list, args->buffer_count, 3708 &relocs); 3709 if (ret != 0) 3710 goto pre_mutex_err; 3711 3712 mutex_lock(&dev->struct_mutex); 3713 3714 i915_verify_inactive(dev, __FILE__, __LINE__); 3715 3716 if (atomic_read(&dev_priv->mm.wedged)) { 3717 mutex_unlock(&dev->struct_mutex); 3718 ret = -EIO; 3719 goto pre_mutex_err; 3720 } 3721 3722 if (dev_priv->mm.suspended) { 3723 mutex_unlock(&dev->struct_mutex); 3724 ret = -EBUSY; 3725 goto pre_mutex_err; 3726 } 3727 3728 /* Look up object handles */ 3729 flips = 0; 3730 for (i = 0; i < args->buffer_count; i++) { 3731 object_list[i] = drm_gem_object_lookup(dev, file_priv, 3732 exec_list[i].handle); 3733 if (object_list[i] == NULL) { 3734 DRM_ERROR("Invalid object handle %d at index %d\n", 3735 exec_list[i].handle, i); 3736 /* prevent error path from reading uninitialized data */ 3737 args->buffer_count = i + 1; 3738 ret = -EBADF; 3739 goto err; 3740 } 3741 3742 obj_priv = object_list[i]->driver_private; 3743 if (obj_priv->in_execbuffer) { 3744 DRM_ERROR("Object %p appears more than once in object list\n", 3745 object_list[i]); 3746 /* prevent error path from reading uninitialized data */ 3747 args->buffer_count = i + 1; 3748 ret = -EBADF; 3749 goto err; 3750 } 3751 obj_priv->in_execbuffer = true; 3752 flips += atomic_read(&obj_priv->pending_flip); 3753 } 3754 3755 if (flips > 0) { 3756 ret = i915_gem_wait_for_pending_flip(dev, object_list, 3757 args->buffer_count); 3758 if (ret) 3759 goto err; 3760 } 3761 3762 /* Pin and relocate */ 3763 for (pin_tries = 0; ; pin_tries++) { 3764 ret = 0; 3765 reloc_index = 0; 3766 3767 for (i = 0; i < args->buffer_count; i++) { 3768 object_list[i]->pending_read_domains = 0; 3769 object_list[i]->pending_write_domain = 0; 3770 ret = i915_gem_object_pin_and_relocate(object_list[i], 3771 file_priv, 3772 &exec_list[i], 3773 &relocs[reloc_index]); 3774 if (ret) 3775 break; 3776 pinned = i + 1; 3777 reloc_index += exec_list[i].relocation_count; 3778 } 3779 /* success */ 3780 if (ret == 0) 3781 break; 3782 3783 /* error other than GTT full, or we've already tried again */ 3784 if (ret != -ENOSPC || pin_tries >= 1) { 3785 if (ret != -ERESTARTSYS) { 3786 unsigned long long total_size = 0; 3787 for (i = 0; i < args->buffer_count; i++) 3788 total_size += object_list[i]->size; 3789 DRM_ERROR("Failed to pin buffer %d of %d, total %llu bytes: %d\n", 3790 pinned+1, args->buffer_count, 3791 total_size, ret); 3792 DRM_ERROR("%d objects [%d pinned], " 3793 "%d object bytes [%d pinned], " 3794 "%d/%d gtt bytes\n", 3795 atomic_read(&dev->object_count), 3796 atomic_read(&dev->pin_count), 3797 atomic_read(&dev->object_memory), 3798 atomic_read(&dev->pin_memory), 3799 atomic_read(&dev->gtt_memory), 3800 dev->gtt_total); 3801 } 3802 goto err; 3803 } 3804 3805 /* unpin all of our buffers */ 3806 for (i = 0; i < pinned; i++) 3807 i915_gem_object_unpin(object_list[i]); 3808 pinned = 0; 3809 3810 /* evict everyone we can from the aperture */ 3811 ret = i915_gem_evict_everything(dev); 3812 if (ret && ret != -ENOSPC) 3813 goto err; 3814 } 3815 3816 /* Set the pending read domains for the batch buffer to COMMAND */ 3817 batch_obj = object_list[args->buffer_count-1]; 3818 if (batch_obj->pending_write_domain) { 3819 DRM_ERROR("Attempting to use self-modifying batch buffer\n"); 3820 ret = -EINVAL; 3821 goto err; 3822 } 3823 batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND; 3824 3825 /* Sanity check the batch buffer, prior to moving objects */ 3826 exec_offset = exec_list[args->buffer_count - 1].offset; 3827 ret = i915_gem_check_execbuffer (args, exec_offset); 3828 if (ret != 0) { 3829 DRM_ERROR("execbuf with invalid offset/length\n"); 3830 goto err; 3831 } 3832 3833 i915_verify_inactive(dev, __FILE__, __LINE__); 3834 3835 /* Zero the global flush/invalidate flags. These 3836 * will be modified as new domains are computed 3837 * for each object 3838 */ 3839 dev->invalidate_domains = 0; 3840 dev->flush_domains = 0; 3841 3842 for (i = 0; i < args->buffer_count; i++) { 3843 struct drm_gem_object *obj = object_list[i]; 3844 3845 /* Compute new gpu domains and update invalidate/flush */ 3846 i915_gem_object_set_to_gpu_domain(obj); 3847 } 3848 3849 i915_verify_inactive(dev, __FILE__, __LINE__); 3850 3851 if (dev->invalidate_domains | dev->flush_domains) { 3852 #if WATCH_EXEC 3853 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n", 3854 __func__, 3855 dev->invalidate_domains, 3856 dev->flush_domains); 3857 #endif 3858 i915_gem_flush(dev, 3859 dev->invalidate_domains, 3860 dev->flush_domains); 3861 if (dev->flush_domains & I915_GEM_GPU_DOMAINS) 3862 (void)i915_add_request(dev, file_priv, 3863 dev->flush_domains); 3864 } 3865 3866 for (i = 0; i < args->buffer_count; i++) { 3867 struct drm_gem_object *obj = object_list[i]; 3868 struct drm_i915_gem_object *obj_priv = obj->driver_private; 3869 uint32_t old_write_domain = obj->write_domain; 3870 3871 obj->write_domain = obj->pending_write_domain; 3872 if (obj->write_domain) 3873 list_move_tail(&obj_priv->gpu_write_list, 3874 &dev_priv->mm.gpu_write_list); 3875 else 3876 list_del_init(&obj_priv->gpu_write_list); 3877 3878 trace_i915_gem_object_change_domain(obj, 3879 obj->read_domains, 3880 old_write_domain); 3881 } 3882 3883 i915_verify_inactive(dev, __FILE__, __LINE__); 3884 3885 #if WATCH_COHERENCY 3886 for (i = 0; i < args->buffer_count; i++) { 3887 i915_gem_object_check_coherency(object_list[i], 3888 exec_list[i].handle); 3889 } 3890 #endif 3891 3892 #if WATCH_EXEC 3893 i915_gem_dump_object(batch_obj, 3894 args->batch_len, 3895 __func__, 3896 ~0); 3897 #endif 3898 3899 /* Exec the batchbuffer */ 3900 ret = i915_dispatch_gem_execbuffer(dev, args, cliprects, exec_offset); 3901 if (ret) { 3902 DRM_ERROR("dispatch failed %d\n", ret); 3903 goto err; 3904 } 3905 3906 /* 3907 * Ensure that the commands in the batch buffer are 3908 * finished before the interrupt fires 3909 */ 3910 flush_domains = i915_retire_commands(dev); 3911 3912 i915_verify_inactive(dev, __FILE__, __LINE__); 3913 3914 /* 3915 * Get a seqno representing the execution of the current buffer, 3916 * which we can wait on. We would like to mitigate these interrupts, 3917 * likely by only creating seqnos occasionally (so that we have 3918 * *some* interrupts representing completion of buffers that we can 3919 * wait on when trying to clear up gtt space). 3920 */ 3921 seqno = i915_add_request(dev, file_priv, flush_domains); 3922 BUG_ON(seqno == 0); 3923 for (i = 0; i < args->buffer_count; i++) { 3924 struct drm_gem_object *obj = object_list[i]; 3925 3926 i915_gem_object_move_to_active(obj, seqno); 3927 #if WATCH_LRU 3928 DRM_INFO("%s: move to exec list %p\n", __func__, obj); 3929 #endif 3930 } 3931 #if WATCH_LRU 3932 i915_dump_lru(dev, __func__); 3933 #endif 3934 3935 i915_verify_inactive(dev, __FILE__, __LINE__); 3936 3937 err: 3938 for (i = 0; i < pinned; i++) 3939 i915_gem_object_unpin(object_list[i]); 3940 3941 for (i = 0; i < args->buffer_count; i++) { 3942 if (object_list[i]) { 3943 obj_priv = object_list[i]->driver_private; 3944 obj_priv->in_execbuffer = false; 3945 } 3946 drm_gem_object_unreference(object_list[i]); 3947 } 3948 3949 mutex_unlock(&dev->struct_mutex); 3950 3951 pre_mutex_err: 3952 /* Copy the updated relocations out regardless of current error 3953 * state. Failure to update the relocs would mean that the next 3954 * time userland calls execbuf, it would do so with presumed offset 3955 * state that didn't match the actual object state. 3956 */ 3957 ret2 = i915_gem_put_relocs_to_user(exec_list, args->buffer_count, 3958 relocs); 3959 if (ret2 != 0) { 3960 DRM_ERROR("Failed to copy relocations back out: %d\n", ret2); 3961 3962 if (ret == 0) 3963 ret = ret2; 3964 } 3965 3966 drm_free_large(object_list); 3967 kfree(cliprects); 3968 3969 return ret; 3970 } 3971 3972 /* 3973 * Legacy execbuffer just creates an exec2 list from the original exec object 3974 * list array and passes it to the real function. 3975 */ 3976 int 3977 i915_gem_execbuffer(struct drm_device *dev, void *data, 3978 struct drm_file *file_priv) 3979 { 3980 struct drm_i915_gem_execbuffer *args = data; 3981 struct drm_i915_gem_execbuffer2 exec2; 3982 struct drm_i915_gem_exec_object *exec_list = NULL; 3983 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 3984 int ret, i; 3985 3986 #if WATCH_EXEC 3987 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", 3988 (int) args->buffers_ptr, args->buffer_count, args->batch_len); 3989 #endif 3990 3991 if (args->buffer_count < 1) { 3992 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count); 3993 return -EINVAL; 3994 } 3995 3996 /* Copy in the exec list from userland */ 3997 exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count); 3998 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); 3999 if (exec_list == NULL || exec2_list == NULL) { 4000 DRM_ERROR("Failed to allocate exec list for %d buffers\n", 4001 args->buffer_count); 4002 drm_free_large(exec_list); 4003 drm_free_large(exec2_list); 4004 return -ENOMEM; 4005 } 4006 ret = copy_from_user(exec_list, 4007 (struct drm_i915_relocation_entry __user *) 4008 (uintptr_t) args->buffers_ptr, 4009 sizeof(*exec_list) * args->buffer_count); 4010 if (ret != 0) { 4011 DRM_ERROR("copy %d exec entries failed %d\n", 4012 args->buffer_count, ret); 4013 drm_free_large(exec_list); 4014 drm_free_large(exec2_list); 4015 return -EFAULT; 4016 } 4017 4018 for (i = 0; i < args->buffer_count; i++) { 4019 exec2_list[i].handle = exec_list[i].handle; 4020 exec2_list[i].relocation_count = exec_list[i].relocation_count; 4021 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; 4022 exec2_list[i].alignment = exec_list[i].alignment; 4023 exec2_list[i].offset = exec_list[i].offset; 4024 if (!IS_I965G(dev)) 4025 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; 4026 else 4027 exec2_list[i].flags = 0; 4028 } 4029 4030 exec2.buffers_ptr = args->buffers_ptr; 4031 exec2.buffer_count = args->buffer_count; 4032 exec2.batch_start_offset = args->batch_start_offset; 4033 exec2.batch_len = args->batch_len; 4034 exec2.DR1 = args->DR1; 4035 exec2.DR4 = args->DR4; 4036 exec2.num_cliprects = args->num_cliprects; 4037 exec2.cliprects_ptr = args->cliprects_ptr; 4038 exec2.flags = 0; 4039 4040 ret = i915_gem_do_execbuffer(dev, data, file_priv, &exec2, exec2_list); 4041 if (!ret) { 4042 /* Copy the new buffer offsets back to the user's exec list. */ 4043 for (i = 0; i < args->buffer_count; i++) 4044 exec_list[i].offset = exec2_list[i].offset; 4045 /* ... and back out to userspace */ 4046 ret = copy_to_user((struct drm_i915_relocation_entry __user *) 4047 (uintptr_t) args->buffers_ptr, 4048 exec_list, 4049 sizeof(*exec_list) * args->buffer_count); 4050 if (ret) { 4051 ret = -EFAULT; 4052 DRM_ERROR("failed to copy %d exec entries " 4053 "back to user (%d)\n", 4054 args->buffer_count, ret); 4055 } 4056 } 4057 4058 drm_free_large(exec_list); 4059 drm_free_large(exec2_list); 4060 return ret; 4061 } 4062 4063 int 4064 i915_gem_execbuffer2(struct drm_device *dev, void *data, 4065 struct drm_file *file_priv) 4066 { 4067 struct drm_i915_gem_execbuffer2 *args = data; 4068 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 4069 int ret; 4070 4071 #if WATCH_EXEC 4072 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", 4073 (int) args->buffers_ptr, args->buffer_count, args->batch_len); 4074 #endif 4075 4076 if (args->buffer_count < 1) { 4077 DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count); 4078 return -EINVAL; 4079 } 4080 4081 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); 4082 if (exec2_list == NULL) { 4083 DRM_ERROR("Failed to allocate exec list for %d buffers\n", 4084 args->buffer_count); 4085 return -ENOMEM; 4086 } 4087 ret = copy_from_user(exec2_list, 4088 (struct drm_i915_relocation_entry __user *) 4089 (uintptr_t) args->buffers_ptr, 4090 sizeof(*exec2_list) * args->buffer_count); 4091 if (ret != 0) { 4092 DRM_ERROR("copy %d exec entries failed %d\n", 4093 args->buffer_count, ret); 4094 drm_free_large(exec2_list); 4095 return -EFAULT; 4096 } 4097 4098 ret = i915_gem_do_execbuffer(dev, data, file_priv, args, exec2_list); 4099 if (!ret) { 4100 /* Copy the new buffer offsets back to the user's exec list. */ 4101 ret = copy_to_user((struct drm_i915_relocation_entry __user *) 4102 (uintptr_t) args->buffers_ptr, 4103 exec2_list, 4104 sizeof(*exec2_list) * args->buffer_count); 4105 if (ret) { 4106 ret = -EFAULT; 4107 DRM_ERROR("failed to copy %d exec entries " 4108 "back to user (%d)\n", 4109 args->buffer_count, ret); 4110 } 4111 } 4112 4113 drm_free_large(exec2_list); 4114 return ret; 4115 } 4116 4117 int 4118 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) 4119 { 4120 struct drm_device *dev = obj->dev; 4121 struct drm_i915_gem_object *obj_priv = obj->driver_private; 4122 int ret; 4123 4124 i915_verify_inactive(dev, __FILE__, __LINE__); 4125 if (obj_priv->gtt_space == NULL) { 4126 ret = i915_gem_object_bind_to_gtt(obj, alignment); 4127 if (ret) 4128 return ret; 4129 } 4130 4131 obj_priv->pin_count++; 4132 4133 /* If the object is not active and not pending a flush, 4134 * remove it from the inactive list 4135 */ 4136 if (obj_priv->pin_count == 1) { 4137 atomic_inc(&dev->pin_count); 4138 atomic_add(obj->size, &dev->pin_memory); 4139 if (!obj_priv->active && 4140 (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0 && 4141 !list_empty(&obj_priv->list)) 4142 list_del_init(&obj_priv->list); 4143 } 4144 i915_verify_inactive(dev, __FILE__, __LINE__); 4145 4146 return 0; 4147 } 4148 4149 void 4150 i915_gem_object_unpin(struct drm_gem_object *obj) 4151 { 4152 struct drm_device *dev = obj->dev; 4153 drm_i915_private_t *dev_priv = dev->dev_private; 4154 struct drm_i915_gem_object *obj_priv = obj->driver_private; 4155 4156 i915_verify_inactive(dev, __FILE__, __LINE__); 4157 obj_priv->pin_count--; 4158 BUG_ON(obj_priv->pin_count < 0); 4159 BUG_ON(obj_priv->gtt_space == NULL); 4160 4161 /* If the object is no longer pinned, and is 4162 * neither active nor being flushed, then stick it on 4163 * the inactive list 4164 */ 4165 if (obj_priv->pin_count == 0) { 4166 if (!obj_priv->active && 4167 (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) 4168 list_move_tail(&obj_priv->list, 4169 &dev_priv->mm.inactive_list); 4170 atomic_dec(&dev->pin_count); 4171 atomic_sub(obj->size, &dev->pin_memory); 4172 } 4173 i915_verify_inactive(dev, __FILE__, __LINE__); 4174 } 4175 4176 int 4177 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 4178 struct drm_file *file_priv) 4179 { 4180 struct drm_i915_gem_pin *args = data; 4181 struct drm_gem_object *obj; 4182 struct drm_i915_gem_object *obj_priv; 4183 int ret; 4184 4185 mutex_lock(&dev->struct_mutex); 4186 4187 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 4188 if (obj == NULL) { 4189 DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n", 4190 args->handle); 4191 mutex_unlock(&dev->struct_mutex); 4192 return -EBADF; 4193 } 4194 obj_priv = obj->driver_private; 4195 4196 if (obj_priv->madv != I915_MADV_WILLNEED) { 4197 DRM_ERROR("Attempting to pin a purgeable buffer\n"); 4198 drm_gem_object_unreference(obj); 4199 mutex_unlock(&dev->struct_mutex); 4200 return -EINVAL; 4201 } 4202 4203 if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) { 4204 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 4205 args->handle); 4206 drm_gem_object_unreference(obj); 4207 mutex_unlock(&dev->struct_mutex); 4208 return -EINVAL; 4209 } 4210 4211 obj_priv->user_pin_count++; 4212 obj_priv->pin_filp = file_priv; 4213 if (obj_priv->user_pin_count == 1) { 4214 ret = i915_gem_object_pin(obj, args->alignment); 4215 if (ret != 0) { 4216 drm_gem_object_unreference(obj); 4217 mutex_unlock(&dev->struct_mutex); 4218 return ret; 4219 } 4220 } 4221 4222 /* XXX - flush the CPU caches for pinned objects 4223 * as the X server doesn't manage domains yet 4224 */ 4225 i915_gem_object_flush_cpu_write_domain(obj); 4226 args->offset = obj_priv->gtt_offset; 4227 drm_gem_object_unreference(obj); 4228 mutex_unlock(&dev->struct_mutex); 4229 4230 return 0; 4231 } 4232 4233 int 4234 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 4235 struct drm_file *file_priv) 4236 { 4237 struct drm_i915_gem_pin *args = data; 4238 struct drm_gem_object *obj; 4239 struct drm_i915_gem_object *obj_priv; 4240 4241 mutex_lock(&dev->struct_mutex); 4242 4243 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 4244 if (obj == NULL) { 4245 DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n", 4246 args->handle); 4247 mutex_unlock(&dev->struct_mutex); 4248 return -EBADF; 4249 } 4250 4251 obj_priv = obj->driver_private; 4252 if (obj_priv->pin_filp != file_priv) { 4253 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 4254 args->handle); 4255 drm_gem_object_unreference(obj); 4256 mutex_unlock(&dev->struct_mutex); 4257 return -EINVAL; 4258 } 4259 obj_priv->user_pin_count--; 4260 if (obj_priv->user_pin_count == 0) { 4261 obj_priv->pin_filp = NULL; 4262 i915_gem_object_unpin(obj); 4263 } 4264 4265 drm_gem_object_unreference(obj); 4266 mutex_unlock(&dev->struct_mutex); 4267 return 0; 4268 } 4269 4270 int 4271 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4272 struct drm_file *file_priv) 4273 { 4274 struct drm_i915_gem_busy *args = data; 4275 struct drm_gem_object *obj; 4276 struct drm_i915_gem_object *obj_priv; 4277 4278 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 4279 if (obj == NULL) { 4280 DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n", 4281 args->handle); 4282 return -EBADF; 4283 } 4284 4285 mutex_lock(&dev->struct_mutex); 4286 /* Update the active list for the hardware's current position. 4287 * Otherwise this only updates on a delayed timer or when irqs are 4288 * actually unmasked, and our working set ends up being larger than 4289 * required. 4290 */ 4291 i915_gem_retire_requests(dev); 4292 4293 obj_priv = obj->driver_private; 4294 /* Don't count being on the flushing list against the object being 4295 * done. Otherwise, a buffer left on the flushing list but not getting 4296 * flushed (because nobody's flushing that domain) won't ever return 4297 * unbusy and get reused by libdrm's bo cache. The other expected 4298 * consumer of this interface, OpenGL's occlusion queries, also specs 4299 * that the objects get unbusy "eventually" without any interference. 4300 */ 4301 args->busy = obj_priv->active && obj_priv->last_rendering_seqno != 0; 4302 4303 drm_gem_object_unreference(obj); 4304 mutex_unlock(&dev->struct_mutex); 4305 return 0; 4306 } 4307 4308 int 4309 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4310 struct drm_file *file_priv) 4311 { 4312 return i915_gem_ring_throttle(dev, file_priv); 4313 } 4314 4315 int 4316 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4317 struct drm_file *file_priv) 4318 { 4319 struct drm_i915_gem_madvise *args = data; 4320 struct drm_gem_object *obj; 4321 struct drm_i915_gem_object *obj_priv; 4322 4323 switch (args->madv) { 4324 case I915_MADV_DONTNEED: 4325 case I915_MADV_WILLNEED: 4326 break; 4327 default: 4328 return -EINVAL; 4329 } 4330 4331 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 4332 if (obj == NULL) { 4333 DRM_ERROR("Bad handle in i915_gem_madvise_ioctl(): %d\n", 4334 args->handle); 4335 return -EBADF; 4336 } 4337 4338 mutex_lock(&dev->struct_mutex); 4339 obj_priv = obj->driver_private; 4340 4341 if (obj_priv->pin_count) { 4342 drm_gem_object_unreference(obj); 4343 mutex_unlock(&dev->struct_mutex); 4344 4345 DRM_ERROR("Attempted i915_gem_madvise_ioctl() on a pinned object\n"); 4346 return -EINVAL; 4347 } 4348 4349 if (obj_priv->madv != __I915_MADV_PURGED) 4350 obj_priv->madv = args->madv; 4351 4352 /* if the object is no longer bound, discard its backing storage */ 4353 if (i915_gem_object_is_purgeable(obj_priv) && 4354 obj_priv->gtt_space == NULL) 4355 i915_gem_object_truncate(obj); 4356 4357 args->retained = obj_priv->madv != __I915_MADV_PURGED; 4358 4359 drm_gem_object_unreference(obj); 4360 mutex_unlock(&dev->struct_mutex); 4361 4362 return 0; 4363 } 4364 4365 int i915_gem_init_object(struct drm_gem_object *obj) 4366 { 4367 struct drm_i915_gem_object *obj_priv; 4368 4369 obj_priv = kzalloc(sizeof(*obj_priv), GFP_KERNEL); 4370 if (obj_priv == NULL) 4371 return -ENOMEM; 4372 4373 /* 4374 * We've just allocated pages from the kernel, 4375 * so they've just been written by the CPU with 4376 * zeros. They'll need to be clflushed before we 4377 * use them with the GPU. 4378 */ 4379 obj->write_domain = I915_GEM_DOMAIN_CPU; 4380 obj->read_domains = I915_GEM_DOMAIN_CPU; 4381 4382 obj_priv->agp_type = AGP_USER_MEMORY; 4383 4384 obj->driver_private = obj_priv; 4385 obj_priv->obj = obj; 4386 obj_priv->fence_reg = I915_FENCE_REG_NONE; 4387 INIT_LIST_HEAD(&obj_priv->list); 4388 INIT_LIST_HEAD(&obj_priv->gpu_write_list); 4389 INIT_LIST_HEAD(&obj_priv->fence_list); 4390 obj_priv->madv = I915_MADV_WILLNEED; 4391 4392 trace_i915_gem_object_create(obj); 4393 4394 return 0; 4395 } 4396 4397 void i915_gem_free_object(struct drm_gem_object *obj) 4398 { 4399 struct drm_device *dev = obj->dev; 4400 struct drm_i915_gem_object *obj_priv = obj->driver_private; 4401 4402 trace_i915_gem_object_destroy(obj); 4403 4404 while (obj_priv->pin_count > 0) 4405 i915_gem_object_unpin(obj); 4406 4407 if (obj_priv->phys_obj) 4408 i915_gem_detach_phys_object(dev, obj); 4409 4410 i915_gem_object_unbind(obj); 4411 4412 if (obj_priv->mmap_offset) 4413 i915_gem_free_mmap_offset(obj); 4414 4415 kfree(obj_priv->page_cpu_valid); 4416 kfree(obj_priv->bit_17); 4417 kfree(obj->driver_private); 4418 } 4419 4420 /** Unbinds all inactive objects. */ 4421 static int 4422 i915_gem_evict_from_inactive_list(struct drm_device *dev) 4423 { 4424 drm_i915_private_t *dev_priv = dev->dev_private; 4425 4426 while (!list_empty(&dev_priv->mm.inactive_list)) { 4427 struct drm_gem_object *obj; 4428 int ret; 4429 4430 obj = list_first_entry(&dev_priv->mm.inactive_list, 4431 struct drm_i915_gem_object, 4432 list)->obj; 4433 4434 ret = i915_gem_object_unbind(obj); 4435 if (ret != 0) { 4436 DRM_ERROR("Error unbinding object: %d\n", ret); 4437 return ret; 4438 } 4439 } 4440 4441 return 0; 4442 } 4443 4444 int 4445 i915_gem_idle(struct drm_device *dev) 4446 { 4447 drm_i915_private_t *dev_priv = dev->dev_private; 4448 uint32_t seqno, cur_seqno, last_seqno; 4449 int stuck, ret; 4450 4451 mutex_lock(&dev->struct_mutex); 4452 4453 if (dev_priv->mm.suspended || dev_priv->ring.ring_obj == NULL) { 4454 mutex_unlock(&dev->struct_mutex); 4455 return 0; 4456 } 4457 4458 /* Hack! Don't let anybody do execbuf while we don't control the chip. 4459 * We need to replace this with a semaphore, or something. 4460 */ 4461 dev_priv->mm.suspended = 1; 4462 del_timer(&dev_priv->hangcheck_timer); 4463 4464 /* Cancel the retire work handler, wait for it to finish if running 4465 */ 4466 mutex_unlock(&dev->struct_mutex); 4467 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4468 mutex_lock(&dev->struct_mutex); 4469 4470 i915_kernel_lost_context(dev); 4471 4472 /* Flush the GPU along with all non-CPU write domains 4473 */ 4474 i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 4475 seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS); 4476 4477 if (seqno == 0) { 4478 mutex_unlock(&dev->struct_mutex); 4479 return -ENOMEM; 4480 } 4481 4482 dev_priv->mm.waiting_gem_seqno = seqno; 4483 last_seqno = 0; 4484 stuck = 0; 4485 for (;;) { 4486 cur_seqno = i915_get_gem_seqno(dev); 4487 if (i915_seqno_passed(cur_seqno, seqno)) 4488 break; 4489 if (last_seqno == cur_seqno) { 4490 if (stuck++ > 100) { 4491 DRM_ERROR("hardware wedged\n"); 4492 atomic_set(&dev_priv->mm.wedged, 1); 4493 DRM_WAKEUP(&dev_priv->irq_queue); 4494 break; 4495 } 4496 } 4497 msleep(10); 4498 last_seqno = cur_seqno; 4499 } 4500 dev_priv->mm.waiting_gem_seqno = 0; 4501 4502 i915_gem_retire_requests(dev); 4503 4504 spin_lock(&dev_priv->mm.active_list_lock); 4505 if (!atomic_read(&dev_priv->mm.wedged)) { 4506 /* Active and flushing should now be empty as we've 4507 * waited for a sequence higher than any pending execbuffer 4508 */ 4509 WARN_ON(!list_empty(&dev_priv->mm.active_list)); 4510 WARN_ON(!list_empty(&dev_priv->mm.flushing_list)); 4511 /* Request should now be empty as we've also waited 4512 * for the last request in the list 4513 */ 4514 WARN_ON(!list_empty(&dev_priv->mm.request_list)); 4515 } 4516 4517 /* Empty the active and flushing lists to inactive. If there's 4518 * anything left at this point, it means that we're wedged and 4519 * nothing good's going to happen by leaving them there. So strip 4520 * the GPU domains and just stuff them onto inactive. 4521 */ 4522 while (!list_empty(&dev_priv->mm.active_list)) { 4523 struct drm_gem_object *obj; 4524 uint32_t old_write_domain; 4525 4526 obj = list_first_entry(&dev_priv->mm.active_list, 4527 struct drm_i915_gem_object, 4528 list)->obj; 4529 old_write_domain = obj->write_domain; 4530 obj->write_domain &= ~I915_GEM_GPU_DOMAINS; 4531 i915_gem_object_move_to_inactive(obj); 4532 4533 trace_i915_gem_object_change_domain(obj, 4534 obj->read_domains, 4535 old_write_domain); 4536 } 4537 spin_unlock(&dev_priv->mm.active_list_lock); 4538 4539 while (!list_empty(&dev_priv->mm.flushing_list)) { 4540 struct drm_gem_object *obj; 4541 uint32_t old_write_domain; 4542 4543 obj = list_first_entry(&dev_priv->mm.flushing_list, 4544 struct drm_i915_gem_object, 4545 list)->obj; 4546 old_write_domain = obj->write_domain; 4547 obj->write_domain &= ~I915_GEM_GPU_DOMAINS; 4548 i915_gem_object_move_to_inactive(obj); 4549 4550 trace_i915_gem_object_change_domain(obj, 4551 obj->read_domains, 4552 old_write_domain); 4553 } 4554 4555 4556 /* Move all inactive buffers out of the GTT. */ 4557 ret = i915_gem_evict_from_inactive_list(dev); 4558 WARN_ON(!list_empty(&dev_priv->mm.inactive_list)); 4559 if (ret) { 4560 mutex_unlock(&dev->struct_mutex); 4561 return ret; 4562 } 4563 4564 i915_gem_cleanup_ringbuffer(dev); 4565 mutex_unlock(&dev->struct_mutex); 4566 4567 return 0; 4568 } 4569 4570 static int 4571 i915_gem_init_hws(struct drm_device *dev) 4572 { 4573 drm_i915_private_t *dev_priv = dev->dev_private; 4574 struct drm_gem_object *obj; 4575 struct drm_i915_gem_object *obj_priv; 4576 int ret; 4577 4578 /* If we need a physical address for the status page, it's already 4579 * initialized at driver load time. 4580 */ 4581 if (!I915_NEED_GFX_HWS(dev)) 4582 return 0; 4583 4584 obj = drm_gem_object_alloc(dev, 4096); 4585 if (obj == NULL) { 4586 DRM_ERROR("Failed to allocate status page\n"); 4587 return -ENOMEM; 4588 } 4589 obj_priv = obj->driver_private; 4590 obj_priv->agp_type = AGP_USER_CACHED_MEMORY; 4591 4592 ret = i915_gem_object_pin(obj, 4096); 4593 if (ret != 0) { 4594 drm_gem_object_unreference(obj); 4595 return ret; 4596 } 4597 4598 dev_priv->status_gfx_addr = obj_priv->gtt_offset; 4599 4600 dev_priv->hw_status_page = kmap(obj_priv->pages[0]); 4601 if (dev_priv->hw_status_page == NULL) { 4602 DRM_ERROR("Failed to map status page.\n"); 4603 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 4604 i915_gem_object_unpin(obj); 4605 drm_gem_object_unreference(obj); 4606 return -EINVAL; 4607 } 4608 dev_priv->hws_obj = obj; 4609 memset(dev_priv->hw_status_page, 0, PAGE_SIZE); 4610 I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr); 4611 I915_READ(HWS_PGA); /* posting read */ 4612 DRM_DEBUG_DRIVER("hws offset: 0x%08x\n", dev_priv->status_gfx_addr); 4613 4614 return 0; 4615 } 4616 4617 static void 4618 i915_gem_cleanup_hws(struct drm_device *dev) 4619 { 4620 drm_i915_private_t *dev_priv = dev->dev_private; 4621 struct drm_gem_object *obj; 4622 struct drm_i915_gem_object *obj_priv; 4623 4624 if (dev_priv->hws_obj == NULL) 4625 return; 4626 4627 obj = dev_priv->hws_obj; 4628 obj_priv = obj->driver_private; 4629 4630 kunmap(obj_priv->pages[0]); 4631 i915_gem_object_unpin(obj); 4632 drm_gem_object_unreference(obj); 4633 dev_priv->hws_obj = NULL; 4634 4635 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 4636 dev_priv->hw_status_page = NULL; 4637 4638 /* Write high address into HWS_PGA when disabling. */ 4639 I915_WRITE(HWS_PGA, 0x1ffff000); 4640 } 4641 4642 int 4643 i915_gem_init_ringbuffer(struct drm_device *dev) 4644 { 4645 drm_i915_private_t *dev_priv = dev->dev_private; 4646 struct drm_gem_object *obj; 4647 struct drm_i915_gem_object *obj_priv; 4648 drm_i915_ring_buffer_t *ring = &dev_priv->ring; 4649 int ret; 4650 u32 head; 4651 4652 ret = i915_gem_init_hws(dev); 4653 if (ret != 0) 4654 return ret; 4655 4656 obj = drm_gem_object_alloc(dev, 128 * 1024); 4657 if (obj == NULL) { 4658 DRM_ERROR("Failed to allocate ringbuffer\n"); 4659 i915_gem_cleanup_hws(dev); 4660 return -ENOMEM; 4661 } 4662 obj_priv = obj->driver_private; 4663 4664 ret = i915_gem_object_pin(obj, 4096); 4665 if (ret != 0) { 4666 drm_gem_object_unreference(obj); 4667 i915_gem_cleanup_hws(dev); 4668 return ret; 4669 } 4670 4671 /* Set up the kernel mapping for the ring. */ 4672 ring->Size = obj->size; 4673 4674 ring->map.offset = dev->agp->base + obj_priv->gtt_offset; 4675 ring->map.size = obj->size; 4676 ring->map.type = 0; 4677 ring->map.flags = 0; 4678 ring->map.mtrr = 0; 4679 4680 drm_core_ioremap_wc(&ring->map, dev); 4681 if (ring->map.handle == NULL) { 4682 DRM_ERROR("Failed to map ringbuffer.\n"); 4683 memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); 4684 i915_gem_object_unpin(obj); 4685 drm_gem_object_unreference(obj); 4686 i915_gem_cleanup_hws(dev); 4687 return -EINVAL; 4688 } 4689 ring->ring_obj = obj; 4690 ring->virtual_start = ring->map.handle; 4691 4692 /* Stop the ring if it's running. */ 4693 I915_WRITE(PRB0_CTL, 0); 4694 I915_WRITE(PRB0_TAIL, 0); 4695 I915_WRITE(PRB0_HEAD, 0); 4696 4697 /* Initialize the ring. */ 4698 I915_WRITE(PRB0_START, obj_priv->gtt_offset); 4699 head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 4700 4701 /* G45 ring initialization fails to reset head to zero */ 4702 if (head != 0) { 4703 DRM_ERROR("Ring head not reset to zero " 4704 "ctl %08x head %08x tail %08x start %08x\n", 4705 I915_READ(PRB0_CTL), 4706 I915_READ(PRB0_HEAD), 4707 I915_READ(PRB0_TAIL), 4708 I915_READ(PRB0_START)); 4709 I915_WRITE(PRB0_HEAD, 0); 4710 4711 DRM_ERROR("Ring head forced to zero " 4712 "ctl %08x head %08x tail %08x start %08x\n", 4713 I915_READ(PRB0_CTL), 4714 I915_READ(PRB0_HEAD), 4715 I915_READ(PRB0_TAIL), 4716 I915_READ(PRB0_START)); 4717 } 4718 4719 I915_WRITE(PRB0_CTL, 4720 ((obj->size - 4096) & RING_NR_PAGES) | 4721 RING_NO_REPORT | 4722 RING_VALID); 4723 4724 head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 4725 4726 /* If the head is still not zero, the ring is dead */ 4727 if (head != 0) { 4728 DRM_ERROR("Ring initialization failed " 4729 "ctl %08x head %08x tail %08x start %08x\n", 4730 I915_READ(PRB0_CTL), 4731 I915_READ(PRB0_HEAD), 4732 I915_READ(PRB0_TAIL), 4733 I915_READ(PRB0_START)); 4734 return -EIO; 4735 } 4736 4737 /* Update our cache of the ring state */ 4738 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4739 i915_kernel_lost_context(dev); 4740 else { 4741 ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 4742 ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR; 4743 ring->space = ring->head - (ring->tail + 8); 4744 if (ring->space < 0) 4745 ring->space += ring->Size; 4746 } 4747 4748 return 0; 4749 } 4750 4751 void 4752 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 4753 { 4754 drm_i915_private_t *dev_priv = dev->dev_private; 4755 4756 if (dev_priv->ring.ring_obj == NULL) 4757 return; 4758 4759 drm_core_ioremapfree(&dev_priv->ring.map, dev); 4760 4761 i915_gem_object_unpin(dev_priv->ring.ring_obj); 4762 drm_gem_object_unreference(dev_priv->ring.ring_obj); 4763 dev_priv->ring.ring_obj = NULL; 4764 memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); 4765 4766 i915_gem_cleanup_hws(dev); 4767 } 4768 4769 int 4770 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 4771 struct drm_file *file_priv) 4772 { 4773 drm_i915_private_t *dev_priv = dev->dev_private; 4774 int ret; 4775 4776 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4777 return 0; 4778 4779 if (atomic_read(&dev_priv->mm.wedged)) { 4780 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 4781 atomic_set(&dev_priv->mm.wedged, 0); 4782 } 4783 4784 mutex_lock(&dev->struct_mutex); 4785 dev_priv->mm.suspended = 0; 4786 4787 ret = i915_gem_init_ringbuffer(dev); 4788 if (ret != 0) { 4789 mutex_unlock(&dev->struct_mutex); 4790 return ret; 4791 } 4792 4793 spin_lock(&dev_priv->mm.active_list_lock); 4794 BUG_ON(!list_empty(&dev_priv->mm.active_list)); 4795 spin_unlock(&dev_priv->mm.active_list_lock); 4796 4797 BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); 4798 BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); 4799 BUG_ON(!list_empty(&dev_priv->mm.request_list)); 4800 mutex_unlock(&dev->struct_mutex); 4801 4802 drm_irq_install(dev); 4803 4804 return 0; 4805 } 4806 4807 int 4808 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 4809 struct drm_file *file_priv) 4810 { 4811 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4812 return 0; 4813 4814 drm_irq_uninstall(dev); 4815 return i915_gem_idle(dev); 4816 } 4817 4818 void 4819 i915_gem_lastclose(struct drm_device *dev) 4820 { 4821 int ret; 4822 4823 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4824 return; 4825 4826 ret = i915_gem_idle(dev); 4827 if (ret) 4828 DRM_ERROR("failed to idle hardware: %d\n", ret); 4829 } 4830 4831 void 4832 i915_gem_load(struct drm_device *dev) 4833 { 4834 int i; 4835 drm_i915_private_t *dev_priv = dev->dev_private; 4836 4837 spin_lock_init(&dev_priv->mm.active_list_lock); 4838 INIT_LIST_HEAD(&dev_priv->mm.active_list); 4839 INIT_LIST_HEAD(&dev_priv->mm.flushing_list); 4840 INIT_LIST_HEAD(&dev_priv->mm.gpu_write_list); 4841 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 4842 INIT_LIST_HEAD(&dev_priv->mm.request_list); 4843 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 4844 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 4845 i915_gem_retire_work_handler); 4846 dev_priv->mm.next_gem_seqno = 1; 4847 4848 spin_lock(&shrink_list_lock); 4849 list_add(&dev_priv->mm.shrink_list, &shrink_list); 4850 spin_unlock(&shrink_list_lock); 4851 4852 /* Old X drivers will take 0-2 for front, back, depth buffers */ 4853 dev_priv->fence_reg_start = 3; 4854 4855 if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 4856 dev_priv->num_fence_regs = 16; 4857 else 4858 dev_priv->num_fence_regs = 8; 4859 4860 /* Initialize fence registers to zero */ 4861 if (IS_I965G(dev)) { 4862 for (i = 0; i < 16; i++) 4863 I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0); 4864 } else { 4865 for (i = 0; i < 8; i++) 4866 I915_WRITE(FENCE_REG_830_0 + (i * 4), 0); 4867 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 4868 for (i = 0; i < 8; i++) 4869 I915_WRITE(FENCE_REG_945_8 + (i * 4), 0); 4870 } 4871 i915_gem_detect_bit_6_swizzle(dev); 4872 init_waitqueue_head(&dev_priv->pending_flip_queue); 4873 } 4874 4875 /* 4876 * Create a physically contiguous memory object for this object 4877 * e.g. for cursor + overlay regs 4878 */ 4879 int i915_gem_init_phys_object(struct drm_device *dev, 4880 int id, int size) 4881 { 4882 drm_i915_private_t *dev_priv = dev->dev_private; 4883 struct drm_i915_gem_phys_object *phys_obj; 4884 int ret; 4885 4886 if (dev_priv->mm.phys_objs[id - 1] || !size) 4887 return 0; 4888 4889 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL); 4890 if (!phys_obj) 4891 return -ENOMEM; 4892 4893 phys_obj->id = id; 4894 4895 phys_obj->handle = drm_pci_alloc(dev, size, 0); 4896 if (!phys_obj->handle) { 4897 ret = -ENOMEM; 4898 goto kfree_obj; 4899 } 4900 #ifdef CONFIG_X86 4901 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4902 #endif 4903 4904 dev_priv->mm.phys_objs[id - 1] = phys_obj; 4905 4906 return 0; 4907 kfree_obj: 4908 kfree(phys_obj); 4909 return ret; 4910 } 4911 4912 void i915_gem_free_phys_object(struct drm_device *dev, int id) 4913 { 4914 drm_i915_private_t *dev_priv = dev->dev_private; 4915 struct drm_i915_gem_phys_object *phys_obj; 4916 4917 if (!dev_priv->mm.phys_objs[id - 1]) 4918 return; 4919 4920 phys_obj = dev_priv->mm.phys_objs[id - 1]; 4921 if (phys_obj->cur_obj) { 4922 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 4923 } 4924 4925 #ifdef CONFIG_X86 4926 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4927 #endif 4928 drm_pci_free(dev, phys_obj->handle); 4929 kfree(phys_obj); 4930 dev_priv->mm.phys_objs[id - 1] = NULL; 4931 } 4932 4933 void i915_gem_free_all_phys_object(struct drm_device *dev) 4934 { 4935 int i; 4936 4937 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 4938 i915_gem_free_phys_object(dev, i); 4939 } 4940 4941 void i915_gem_detach_phys_object(struct drm_device *dev, 4942 struct drm_gem_object *obj) 4943 { 4944 struct drm_i915_gem_object *obj_priv; 4945 int i; 4946 int ret; 4947 int page_count; 4948 4949 obj_priv = obj->driver_private; 4950 if (!obj_priv->phys_obj) 4951 return; 4952 4953 ret = i915_gem_object_get_pages(obj, 0); 4954 if (ret) 4955 goto out; 4956 4957 page_count = obj->size / PAGE_SIZE; 4958 4959 for (i = 0; i < page_count; i++) { 4960 char *dst = kmap_atomic(obj_priv->pages[i], KM_USER0); 4961 char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE); 4962 4963 memcpy(dst, src, PAGE_SIZE); 4964 kunmap_atomic(dst, KM_USER0); 4965 } 4966 drm_clflush_pages(obj_priv->pages, page_count); 4967 drm_agp_chipset_flush(dev); 4968 4969 i915_gem_object_put_pages(obj); 4970 out: 4971 obj_priv->phys_obj->cur_obj = NULL; 4972 obj_priv->phys_obj = NULL; 4973 } 4974 4975 int 4976 i915_gem_attach_phys_object(struct drm_device *dev, 4977 struct drm_gem_object *obj, int id) 4978 { 4979 drm_i915_private_t *dev_priv = dev->dev_private; 4980 struct drm_i915_gem_object *obj_priv; 4981 int ret = 0; 4982 int page_count; 4983 int i; 4984 4985 if (id > I915_MAX_PHYS_OBJECT) 4986 return -EINVAL; 4987 4988 obj_priv = obj->driver_private; 4989 4990 if (obj_priv->phys_obj) { 4991 if (obj_priv->phys_obj->id == id) 4992 return 0; 4993 i915_gem_detach_phys_object(dev, obj); 4994 } 4995 4996 4997 /* create a new object */ 4998 if (!dev_priv->mm.phys_objs[id - 1]) { 4999 ret = i915_gem_init_phys_object(dev, id, 5000 obj->size); 5001 if (ret) { 5002 DRM_ERROR("failed to init phys object %d size: %zu\n", id, obj->size); 5003 goto out; 5004 } 5005 } 5006 5007 /* bind to the object */ 5008 obj_priv->phys_obj = dev_priv->mm.phys_objs[id - 1]; 5009 obj_priv->phys_obj->cur_obj = obj; 5010 5011 ret = i915_gem_object_get_pages(obj, 0); 5012 if (ret) { 5013 DRM_ERROR("failed to get page list\n"); 5014 goto out; 5015 } 5016 5017 page_count = obj->size / PAGE_SIZE; 5018 5019 for (i = 0; i < page_count; i++) { 5020 char *src = kmap_atomic(obj_priv->pages[i], KM_USER0); 5021 char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE); 5022 5023 memcpy(dst, src, PAGE_SIZE); 5024 kunmap_atomic(src, KM_USER0); 5025 } 5026 5027 i915_gem_object_put_pages(obj); 5028 5029 return 0; 5030 out: 5031 return ret; 5032 } 5033 5034 static int 5035 i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 5036 struct drm_i915_gem_pwrite *args, 5037 struct drm_file *file_priv) 5038 { 5039 struct drm_i915_gem_object *obj_priv = obj->driver_private; 5040 void *obj_addr; 5041 int ret; 5042 char __user *user_data; 5043 5044 user_data = (char __user *) (uintptr_t) args->data_ptr; 5045 obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset; 5046 5047 DRM_DEBUG_DRIVER("obj_addr %p, %lld\n", obj_addr, args->size); 5048 ret = copy_from_user(obj_addr, user_data, args->size); 5049 if (ret) 5050 return -EFAULT; 5051 5052 drm_agp_chipset_flush(dev); 5053 return 0; 5054 } 5055 5056 void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv) 5057 { 5058 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; 5059 5060 /* Clean up our request list when the client is going away, so that 5061 * later retire_requests won't dereference our soon-to-be-gone 5062 * file_priv. 5063 */ 5064 mutex_lock(&dev->struct_mutex); 5065 while (!list_empty(&i915_file_priv->mm.request_list)) 5066 list_del_init(i915_file_priv->mm.request_list.next); 5067 mutex_unlock(&dev->struct_mutex); 5068 } 5069 5070 static int 5071 i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask) 5072 { 5073 drm_i915_private_t *dev_priv, *next_dev; 5074 struct drm_i915_gem_object *obj_priv, *next_obj; 5075 int cnt = 0; 5076 int would_deadlock = 1; 5077 5078 /* "fast-path" to count number of available objects */ 5079 if (nr_to_scan == 0) { 5080 spin_lock(&shrink_list_lock); 5081 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) { 5082 struct drm_device *dev = dev_priv->dev; 5083 5084 if (mutex_trylock(&dev->struct_mutex)) { 5085 list_for_each_entry(obj_priv, 5086 &dev_priv->mm.inactive_list, 5087 list) 5088 cnt++; 5089 mutex_unlock(&dev->struct_mutex); 5090 } 5091 } 5092 spin_unlock(&shrink_list_lock); 5093 5094 return (cnt / 100) * sysctl_vfs_cache_pressure; 5095 } 5096 5097 spin_lock(&shrink_list_lock); 5098 5099 /* first scan for clean buffers */ 5100 list_for_each_entry_safe(dev_priv, next_dev, 5101 &shrink_list, mm.shrink_list) { 5102 struct drm_device *dev = dev_priv->dev; 5103 5104 if (! mutex_trylock(&dev->struct_mutex)) 5105 continue; 5106 5107 spin_unlock(&shrink_list_lock); 5108 5109 i915_gem_retire_requests(dev); 5110 5111 list_for_each_entry_safe(obj_priv, next_obj, 5112 &dev_priv->mm.inactive_list, 5113 list) { 5114 if (i915_gem_object_is_purgeable(obj_priv)) { 5115 i915_gem_object_unbind(obj_priv->obj); 5116 if (--nr_to_scan <= 0) 5117 break; 5118 } 5119 } 5120 5121 spin_lock(&shrink_list_lock); 5122 mutex_unlock(&dev->struct_mutex); 5123 5124 would_deadlock = 0; 5125 5126 if (nr_to_scan <= 0) 5127 break; 5128 } 5129 5130 /* second pass, evict/count anything still on the inactive list */ 5131 list_for_each_entry_safe(dev_priv, next_dev, 5132 &shrink_list, mm.shrink_list) { 5133 struct drm_device *dev = dev_priv->dev; 5134 5135 if (! mutex_trylock(&dev->struct_mutex)) 5136 continue; 5137 5138 spin_unlock(&shrink_list_lock); 5139 5140 list_for_each_entry_safe(obj_priv, next_obj, 5141 &dev_priv->mm.inactive_list, 5142 list) { 5143 if (nr_to_scan > 0) { 5144 i915_gem_object_unbind(obj_priv->obj); 5145 nr_to_scan--; 5146 } else 5147 cnt++; 5148 } 5149 5150 spin_lock(&shrink_list_lock); 5151 mutex_unlock(&dev->struct_mutex); 5152 5153 would_deadlock = 0; 5154 } 5155 5156 spin_unlock(&shrink_list_lock); 5157 5158 if (would_deadlock) 5159 return -1; 5160 else if (cnt > 0) 5161 return (cnt / 100) * sysctl_vfs_cache_pressure; 5162 else 5163 return 0; 5164 } 5165 5166 static struct shrinker shrinker = { 5167 .shrink = i915_gem_shrink, 5168 .seeks = DEFAULT_SEEKS, 5169 }; 5170 5171 __init void 5172 i915_gem_shrinker_init(void) 5173 { 5174 register_shrinker(&shrinker); 5175 } 5176 5177 __exit void 5178 i915_gem_shrinker_exit(void) 5179 { 5180 unregister_shrinker(&shrinker); 5181 } 5182