1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include "drmP.h" 29 #include "drm.h" 30 #include "i915_drm.h" 31 #include "i915_drv.h" 32 #include <linux/swap.h> 33 #include <linux/pci.h> 34 35 #define I915_GEM_GPU_DOMAINS (~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) 36 37 static void i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj); 38 static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj); 39 static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj); 40 static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, 41 int write); 42 static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, 43 uint64_t offset, 44 uint64_t size); 45 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj); 46 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); 47 static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, 48 unsigned alignment); 49 static int i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write); 50 static void i915_gem_clear_fence_reg(struct drm_gem_object *obj); 51 static int i915_gem_evict_something(struct drm_device *dev); 52 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 53 struct drm_i915_gem_pwrite *args, 54 struct drm_file *file_priv); 55 56 int i915_gem_do_init(struct drm_device *dev, unsigned long start, 57 unsigned long end) 58 { 59 drm_i915_private_t *dev_priv = dev->dev_private; 60 61 if (start >= end || 62 (start & (PAGE_SIZE - 1)) != 0 || 63 (end & (PAGE_SIZE - 1)) != 0) { 64 return -EINVAL; 65 } 66 67 drm_mm_init(&dev_priv->mm.gtt_space, start, 68 end - start); 69 70 dev->gtt_total = (uint32_t) (end - start); 71 72 return 0; 73 } 74 75 int 76 i915_gem_init_ioctl(struct drm_device *dev, void *data, 77 struct drm_file *file_priv) 78 { 79 struct drm_i915_gem_init *args = data; 80 int ret; 81 82 mutex_lock(&dev->struct_mutex); 83 ret = i915_gem_do_init(dev, args->gtt_start, args->gtt_end); 84 mutex_unlock(&dev->struct_mutex); 85 86 return ret; 87 } 88 89 int 90 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 91 struct drm_file *file_priv) 92 { 93 struct drm_i915_gem_get_aperture *args = data; 94 95 if (!(dev->driver->driver_features & DRIVER_GEM)) 96 return -ENODEV; 97 98 args->aper_size = dev->gtt_total; 99 args->aper_available_size = (args->aper_size - 100 atomic_read(&dev->pin_memory)); 101 102 return 0; 103 } 104 105 106 /** 107 * Creates a new mm object and returns a handle to it. 108 */ 109 int 110 i915_gem_create_ioctl(struct drm_device *dev, void *data, 111 struct drm_file *file_priv) 112 { 113 struct drm_i915_gem_create *args = data; 114 struct drm_gem_object *obj; 115 int handle, ret; 116 117 args->size = roundup(args->size, PAGE_SIZE); 118 119 /* Allocate the new object */ 120 obj = drm_gem_object_alloc(dev, args->size); 121 if (obj == NULL) 122 return -ENOMEM; 123 124 ret = drm_gem_handle_create(file_priv, obj, &handle); 125 mutex_lock(&dev->struct_mutex); 126 drm_gem_object_handle_unreference(obj); 127 mutex_unlock(&dev->struct_mutex); 128 129 if (ret) 130 return ret; 131 132 args->handle = handle; 133 134 return 0; 135 } 136 137 static inline int 138 fast_shmem_read(struct page **pages, 139 loff_t page_base, int page_offset, 140 char __user *data, 141 int length) 142 { 143 char __iomem *vaddr; 144 int unwritten; 145 146 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0); 147 if (vaddr == NULL) 148 return -ENOMEM; 149 unwritten = __copy_to_user_inatomic(data, vaddr + page_offset, length); 150 kunmap_atomic(vaddr, KM_USER0); 151 152 if (unwritten) 153 return -EFAULT; 154 155 return 0; 156 } 157 158 static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj) 159 { 160 drm_i915_private_t *dev_priv = obj->dev->dev_private; 161 struct drm_i915_gem_object *obj_priv = obj->driver_private; 162 163 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && 164 obj_priv->tiling_mode != I915_TILING_NONE; 165 } 166 167 static inline int 168 slow_shmem_copy(struct page *dst_page, 169 int dst_offset, 170 struct page *src_page, 171 int src_offset, 172 int length) 173 { 174 char *dst_vaddr, *src_vaddr; 175 176 dst_vaddr = kmap_atomic(dst_page, KM_USER0); 177 if (dst_vaddr == NULL) 178 return -ENOMEM; 179 180 src_vaddr = kmap_atomic(src_page, KM_USER1); 181 if (src_vaddr == NULL) { 182 kunmap_atomic(dst_vaddr, KM_USER0); 183 return -ENOMEM; 184 } 185 186 memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length); 187 188 kunmap_atomic(src_vaddr, KM_USER1); 189 kunmap_atomic(dst_vaddr, KM_USER0); 190 191 return 0; 192 } 193 194 static inline int 195 slow_shmem_bit17_copy(struct page *gpu_page, 196 int gpu_offset, 197 struct page *cpu_page, 198 int cpu_offset, 199 int length, 200 int is_read) 201 { 202 char *gpu_vaddr, *cpu_vaddr; 203 204 /* Use the unswizzled path if this page isn't affected. */ 205 if ((page_to_phys(gpu_page) & (1 << 17)) == 0) { 206 if (is_read) 207 return slow_shmem_copy(cpu_page, cpu_offset, 208 gpu_page, gpu_offset, length); 209 else 210 return slow_shmem_copy(gpu_page, gpu_offset, 211 cpu_page, cpu_offset, length); 212 } 213 214 gpu_vaddr = kmap_atomic(gpu_page, KM_USER0); 215 if (gpu_vaddr == NULL) 216 return -ENOMEM; 217 218 cpu_vaddr = kmap_atomic(cpu_page, KM_USER1); 219 if (cpu_vaddr == NULL) { 220 kunmap_atomic(gpu_vaddr, KM_USER0); 221 return -ENOMEM; 222 } 223 224 /* Copy the data, XORing A6 with A17 (1). The user already knows he's 225 * XORing with the other bits (A9 for Y, A9 and A10 for X) 226 */ 227 while (length > 0) { 228 int cacheline_end = ALIGN(gpu_offset + 1, 64); 229 int this_length = min(cacheline_end - gpu_offset, length); 230 int swizzled_gpu_offset = gpu_offset ^ 64; 231 232 if (is_read) { 233 memcpy(cpu_vaddr + cpu_offset, 234 gpu_vaddr + swizzled_gpu_offset, 235 this_length); 236 } else { 237 memcpy(gpu_vaddr + swizzled_gpu_offset, 238 cpu_vaddr + cpu_offset, 239 this_length); 240 } 241 cpu_offset += this_length; 242 gpu_offset += this_length; 243 length -= this_length; 244 } 245 246 kunmap_atomic(cpu_vaddr, KM_USER1); 247 kunmap_atomic(gpu_vaddr, KM_USER0); 248 249 return 0; 250 } 251 252 /** 253 * This is the fast shmem pread path, which attempts to copy_from_user directly 254 * from the backing pages of the object to the user's address space. On a 255 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow(). 256 */ 257 static int 258 i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj, 259 struct drm_i915_gem_pread *args, 260 struct drm_file *file_priv) 261 { 262 struct drm_i915_gem_object *obj_priv = obj->driver_private; 263 ssize_t remain; 264 loff_t offset, page_base; 265 char __user *user_data; 266 int page_offset, page_length; 267 int ret; 268 269 user_data = (char __user *) (uintptr_t) args->data_ptr; 270 remain = args->size; 271 272 mutex_lock(&dev->struct_mutex); 273 274 ret = i915_gem_object_get_pages(obj); 275 if (ret != 0) 276 goto fail_unlock; 277 278 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, 279 args->size); 280 if (ret != 0) 281 goto fail_put_pages; 282 283 obj_priv = obj->driver_private; 284 offset = args->offset; 285 286 while (remain > 0) { 287 /* Operation in this page 288 * 289 * page_base = page offset within aperture 290 * page_offset = offset within page 291 * page_length = bytes to copy for this page 292 */ 293 page_base = (offset & ~(PAGE_SIZE-1)); 294 page_offset = offset & (PAGE_SIZE-1); 295 page_length = remain; 296 if ((page_offset + remain) > PAGE_SIZE) 297 page_length = PAGE_SIZE - page_offset; 298 299 ret = fast_shmem_read(obj_priv->pages, 300 page_base, page_offset, 301 user_data, page_length); 302 if (ret) 303 goto fail_put_pages; 304 305 remain -= page_length; 306 user_data += page_length; 307 offset += page_length; 308 } 309 310 fail_put_pages: 311 i915_gem_object_put_pages(obj); 312 fail_unlock: 313 mutex_unlock(&dev->struct_mutex); 314 315 return ret; 316 } 317 318 /** 319 * This is the fallback shmem pread path, which allocates temporary storage 320 * in kernel space to copy_to_user into outside of the struct_mutex, so we 321 * can copy out of the object's backing pages while holding the struct mutex 322 * and not take page faults. 323 */ 324 static int 325 i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, 326 struct drm_i915_gem_pread *args, 327 struct drm_file *file_priv) 328 { 329 struct drm_i915_gem_object *obj_priv = obj->driver_private; 330 struct mm_struct *mm = current->mm; 331 struct page **user_pages; 332 ssize_t remain; 333 loff_t offset, pinned_pages, i; 334 loff_t first_data_page, last_data_page, num_pages; 335 int shmem_page_index, shmem_page_offset; 336 int data_page_index, data_page_offset; 337 int page_length; 338 int ret; 339 uint64_t data_ptr = args->data_ptr; 340 int do_bit17_swizzling; 341 342 remain = args->size; 343 344 /* Pin the user pages containing the data. We can't fault while 345 * holding the struct mutex, yet we want to hold it while 346 * dereferencing the user data. 347 */ 348 first_data_page = data_ptr / PAGE_SIZE; 349 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 350 num_pages = last_data_page - first_data_page + 1; 351 352 user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); 353 if (user_pages == NULL) 354 return -ENOMEM; 355 356 down_read(&mm->mmap_sem); 357 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 358 num_pages, 1, 0, user_pages, NULL); 359 up_read(&mm->mmap_sem); 360 if (pinned_pages < num_pages) { 361 ret = -EFAULT; 362 goto fail_put_user_pages; 363 } 364 365 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 366 367 mutex_lock(&dev->struct_mutex); 368 369 ret = i915_gem_object_get_pages(obj); 370 if (ret != 0) 371 goto fail_unlock; 372 373 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, 374 args->size); 375 if (ret != 0) 376 goto fail_put_pages; 377 378 obj_priv = obj->driver_private; 379 offset = args->offset; 380 381 while (remain > 0) { 382 /* Operation in this page 383 * 384 * shmem_page_index = page number within shmem file 385 * shmem_page_offset = offset within page in shmem file 386 * data_page_index = page number in get_user_pages return 387 * data_page_offset = offset with data_page_index page. 388 * page_length = bytes to copy for this page 389 */ 390 shmem_page_index = offset / PAGE_SIZE; 391 shmem_page_offset = offset & ~PAGE_MASK; 392 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 393 data_page_offset = data_ptr & ~PAGE_MASK; 394 395 page_length = remain; 396 if ((shmem_page_offset + page_length) > PAGE_SIZE) 397 page_length = PAGE_SIZE - shmem_page_offset; 398 if ((data_page_offset + page_length) > PAGE_SIZE) 399 page_length = PAGE_SIZE - data_page_offset; 400 401 if (do_bit17_swizzling) { 402 ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index], 403 shmem_page_offset, 404 user_pages[data_page_index], 405 data_page_offset, 406 page_length, 407 1); 408 } else { 409 ret = slow_shmem_copy(user_pages[data_page_index], 410 data_page_offset, 411 obj_priv->pages[shmem_page_index], 412 shmem_page_offset, 413 page_length); 414 } 415 if (ret) 416 goto fail_put_pages; 417 418 remain -= page_length; 419 data_ptr += page_length; 420 offset += page_length; 421 } 422 423 fail_put_pages: 424 i915_gem_object_put_pages(obj); 425 fail_unlock: 426 mutex_unlock(&dev->struct_mutex); 427 fail_put_user_pages: 428 for (i = 0; i < pinned_pages; i++) { 429 SetPageDirty(user_pages[i]); 430 page_cache_release(user_pages[i]); 431 } 432 kfree(user_pages); 433 434 return ret; 435 } 436 437 /** 438 * Reads data from the object referenced by handle. 439 * 440 * On error, the contents of *data are undefined. 441 */ 442 int 443 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 444 struct drm_file *file_priv) 445 { 446 struct drm_i915_gem_pread *args = data; 447 struct drm_gem_object *obj; 448 struct drm_i915_gem_object *obj_priv; 449 int ret; 450 451 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 452 if (obj == NULL) 453 return -EBADF; 454 obj_priv = obj->driver_private; 455 456 /* Bounds check source. 457 * 458 * XXX: This could use review for overflow issues... 459 */ 460 if (args->offset > obj->size || args->size > obj->size || 461 args->offset + args->size > obj->size) { 462 drm_gem_object_unreference(obj); 463 return -EINVAL; 464 } 465 466 if (i915_gem_object_needs_bit17_swizzle(obj)) { 467 ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv); 468 } else { 469 ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv); 470 if (ret != 0) 471 ret = i915_gem_shmem_pread_slow(dev, obj, args, 472 file_priv); 473 } 474 475 drm_gem_object_unreference(obj); 476 477 return ret; 478 } 479 480 /* This is the fast write path which cannot handle 481 * page faults in the source data 482 */ 483 484 static inline int 485 fast_user_write(struct io_mapping *mapping, 486 loff_t page_base, int page_offset, 487 char __user *user_data, 488 int length) 489 { 490 char *vaddr_atomic; 491 unsigned long unwritten; 492 493 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 494 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset, 495 user_data, length); 496 io_mapping_unmap_atomic(vaddr_atomic); 497 if (unwritten) 498 return -EFAULT; 499 return 0; 500 } 501 502 /* Here's the write path which can sleep for 503 * page faults 504 */ 505 506 static inline int 507 slow_kernel_write(struct io_mapping *mapping, 508 loff_t gtt_base, int gtt_offset, 509 struct page *user_page, int user_offset, 510 int length) 511 { 512 char *src_vaddr, *dst_vaddr; 513 unsigned long unwritten; 514 515 dst_vaddr = io_mapping_map_atomic_wc(mapping, gtt_base); 516 src_vaddr = kmap_atomic(user_page, KM_USER1); 517 unwritten = __copy_from_user_inatomic_nocache(dst_vaddr + gtt_offset, 518 src_vaddr + user_offset, 519 length); 520 kunmap_atomic(src_vaddr, KM_USER1); 521 io_mapping_unmap_atomic(dst_vaddr); 522 if (unwritten) 523 return -EFAULT; 524 return 0; 525 } 526 527 static inline int 528 fast_shmem_write(struct page **pages, 529 loff_t page_base, int page_offset, 530 char __user *data, 531 int length) 532 { 533 char __iomem *vaddr; 534 unsigned long unwritten; 535 536 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0); 537 if (vaddr == NULL) 538 return -ENOMEM; 539 unwritten = __copy_from_user_inatomic(vaddr + page_offset, data, length); 540 kunmap_atomic(vaddr, KM_USER0); 541 542 if (unwritten) 543 return -EFAULT; 544 return 0; 545 } 546 547 /** 548 * This is the fast pwrite path, where we copy the data directly from the 549 * user into the GTT, uncached. 550 */ 551 static int 552 i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj, 553 struct drm_i915_gem_pwrite *args, 554 struct drm_file *file_priv) 555 { 556 struct drm_i915_gem_object *obj_priv = obj->driver_private; 557 drm_i915_private_t *dev_priv = dev->dev_private; 558 ssize_t remain; 559 loff_t offset, page_base; 560 char __user *user_data; 561 int page_offset, page_length; 562 int ret; 563 564 user_data = (char __user *) (uintptr_t) args->data_ptr; 565 remain = args->size; 566 if (!access_ok(VERIFY_READ, user_data, remain)) 567 return -EFAULT; 568 569 570 mutex_lock(&dev->struct_mutex); 571 ret = i915_gem_object_pin(obj, 0); 572 if (ret) { 573 mutex_unlock(&dev->struct_mutex); 574 return ret; 575 } 576 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 577 if (ret) 578 goto fail; 579 580 obj_priv = obj->driver_private; 581 offset = obj_priv->gtt_offset + args->offset; 582 583 while (remain > 0) { 584 /* Operation in this page 585 * 586 * page_base = page offset within aperture 587 * page_offset = offset within page 588 * page_length = bytes to copy for this page 589 */ 590 page_base = (offset & ~(PAGE_SIZE-1)); 591 page_offset = offset & (PAGE_SIZE-1); 592 page_length = remain; 593 if ((page_offset + remain) > PAGE_SIZE) 594 page_length = PAGE_SIZE - page_offset; 595 596 ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base, 597 page_offset, user_data, page_length); 598 599 /* If we get a fault while copying data, then (presumably) our 600 * source page isn't available. Return the error and we'll 601 * retry in the slow path. 602 */ 603 if (ret) 604 goto fail; 605 606 remain -= page_length; 607 user_data += page_length; 608 offset += page_length; 609 } 610 611 fail: 612 i915_gem_object_unpin(obj); 613 mutex_unlock(&dev->struct_mutex); 614 615 return ret; 616 } 617 618 /** 619 * This is the fallback GTT pwrite path, which uses get_user_pages to pin 620 * the memory and maps it using kmap_atomic for copying. 621 * 622 * This code resulted in x11perf -rgb10text consuming about 10% more CPU 623 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit). 624 */ 625 static int 626 i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, 627 struct drm_i915_gem_pwrite *args, 628 struct drm_file *file_priv) 629 { 630 struct drm_i915_gem_object *obj_priv = obj->driver_private; 631 drm_i915_private_t *dev_priv = dev->dev_private; 632 ssize_t remain; 633 loff_t gtt_page_base, offset; 634 loff_t first_data_page, last_data_page, num_pages; 635 loff_t pinned_pages, i; 636 struct page **user_pages; 637 struct mm_struct *mm = current->mm; 638 int gtt_page_offset, data_page_offset, data_page_index, page_length; 639 int ret; 640 uint64_t data_ptr = args->data_ptr; 641 642 remain = args->size; 643 644 /* Pin the user pages containing the data. We can't fault while 645 * holding the struct mutex, and all of the pwrite implementations 646 * want to hold it while dereferencing the user data. 647 */ 648 first_data_page = data_ptr / PAGE_SIZE; 649 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 650 num_pages = last_data_page - first_data_page + 1; 651 652 user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); 653 if (user_pages == NULL) 654 return -ENOMEM; 655 656 down_read(&mm->mmap_sem); 657 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 658 num_pages, 0, 0, user_pages, NULL); 659 up_read(&mm->mmap_sem); 660 if (pinned_pages < num_pages) { 661 ret = -EFAULT; 662 goto out_unpin_pages; 663 } 664 665 mutex_lock(&dev->struct_mutex); 666 ret = i915_gem_object_pin(obj, 0); 667 if (ret) 668 goto out_unlock; 669 670 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 671 if (ret) 672 goto out_unpin_object; 673 674 obj_priv = obj->driver_private; 675 offset = obj_priv->gtt_offset + args->offset; 676 677 while (remain > 0) { 678 /* Operation in this page 679 * 680 * gtt_page_base = page offset within aperture 681 * gtt_page_offset = offset within page in aperture 682 * data_page_index = page number in get_user_pages return 683 * data_page_offset = offset with data_page_index page. 684 * page_length = bytes to copy for this page 685 */ 686 gtt_page_base = offset & PAGE_MASK; 687 gtt_page_offset = offset & ~PAGE_MASK; 688 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 689 data_page_offset = data_ptr & ~PAGE_MASK; 690 691 page_length = remain; 692 if ((gtt_page_offset + page_length) > PAGE_SIZE) 693 page_length = PAGE_SIZE - gtt_page_offset; 694 if ((data_page_offset + page_length) > PAGE_SIZE) 695 page_length = PAGE_SIZE - data_page_offset; 696 697 ret = slow_kernel_write(dev_priv->mm.gtt_mapping, 698 gtt_page_base, gtt_page_offset, 699 user_pages[data_page_index], 700 data_page_offset, 701 page_length); 702 703 /* If we get a fault while copying data, then (presumably) our 704 * source page isn't available. Return the error and we'll 705 * retry in the slow path. 706 */ 707 if (ret) 708 goto out_unpin_object; 709 710 remain -= page_length; 711 offset += page_length; 712 data_ptr += page_length; 713 } 714 715 out_unpin_object: 716 i915_gem_object_unpin(obj); 717 out_unlock: 718 mutex_unlock(&dev->struct_mutex); 719 out_unpin_pages: 720 for (i = 0; i < pinned_pages; i++) 721 page_cache_release(user_pages[i]); 722 kfree(user_pages); 723 724 return ret; 725 } 726 727 /** 728 * This is the fast shmem pwrite path, which attempts to directly 729 * copy_from_user into the kmapped pages backing the object. 730 */ 731 static int 732 i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj, 733 struct drm_i915_gem_pwrite *args, 734 struct drm_file *file_priv) 735 { 736 struct drm_i915_gem_object *obj_priv = obj->driver_private; 737 ssize_t remain; 738 loff_t offset, page_base; 739 char __user *user_data; 740 int page_offset, page_length; 741 int ret; 742 743 user_data = (char __user *) (uintptr_t) args->data_ptr; 744 remain = args->size; 745 746 mutex_lock(&dev->struct_mutex); 747 748 ret = i915_gem_object_get_pages(obj); 749 if (ret != 0) 750 goto fail_unlock; 751 752 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 753 if (ret != 0) 754 goto fail_put_pages; 755 756 obj_priv = obj->driver_private; 757 offset = args->offset; 758 obj_priv->dirty = 1; 759 760 while (remain > 0) { 761 /* Operation in this page 762 * 763 * page_base = page offset within aperture 764 * page_offset = offset within page 765 * page_length = bytes to copy for this page 766 */ 767 page_base = (offset & ~(PAGE_SIZE-1)); 768 page_offset = offset & (PAGE_SIZE-1); 769 page_length = remain; 770 if ((page_offset + remain) > PAGE_SIZE) 771 page_length = PAGE_SIZE - page_offset; 772 773 ret = fast_shmem_write(obj_priv->pages, 774 page_base, page_offset, 775 user_data, page_length); 776 if (ret) 777 goto fail_put_pages; 778 779 remain -= page_length; 780 user_data += page_length; 781 offset += page_length; 782 } 783 784 fail_put_pages: 785 i915_gem_object_put_pages(obj); 786 fail_unlock: 787 mutex_unlock(&dev->struct_mutex); 788 789 return ret; 790 } 791 792 /** 793 * This is the fallback shmem pwrite path, which uses get_user_pages to pin 794 * the memory and maps it using kmap_atomic for copying. 795 * 796 * This avoids taking mmap_sem for faulting on the user's address while the 797 * struct_mutex is held. 798 */ 799 static int 800 i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, 801 struct drm_i915_gem_pwrite *args, 802 struct drm_file *file_priv) 803 { 804 struct drm_i915_gem_object *obj_priv = obj->driver_private; 805 struct mm_struct *mm = current->mm; 806 struct page **user_pages; 807 ssize_t remain; 808 loff_t offset, pinned_pages, i; 809 loff_t first_data_page, last_data_page, num_pages; 810 int shmem_page_index, shmem_page_offset; 811 int data_page_index, data_page_offset; 812 int page_length; 813 int ret; 814 uint64_t data_ptr = args->data_ptr; 815 int do_bit17_swizzling; 816 817 remain = args->size; 818 819 /* Pin the user pages containing the data. We can't fault while 820 * holding the struct mutex, and all of the pwrite implementations 821 * want to hold it while dereferencing the user data. 822 */ 823 first_data_page = data_ptr / PAGE_SIZE; 824 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 825 num_pages = last_data_page - first_data_page + 1; 826 827 user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); 828 if (user_pages == NULL) 829 return -ENOMEM; 830 831 down_read(&mm->mmap_sem); 832 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 833 num_pages, 0, 0, user_pages, NULL); 834 up_read(&mm->mmap_sem); 835 if (pinned_pages < num_pages) { 836 ret = -EFAULT; 837 goto fail_put_user_pages; 838 } 839 840 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 841 842 mutex_lock(&dev->struct_mutex); 843 844 ret = i915_gem_object_get_pages(obj); 845 if (ret != 0) 846 goto fail_unlock; 847 848 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 849 if (ret != 0) 850 goto fail_put_pages; 851 852 obj_priv = obj->driver_private; 853 offset = args->offset; 854 obj_priv->dirty = 1; 855 856 while (remain > 0) { 857 /* Operation in this page 858 * 859 * shmem_page_index = page number within shmem file 860 * shmem_page_offset = offset within page in shmem file 861 * data_page_index = page number in get_user_pages return 862 * data_page_offset = offset with data_page_index page. 863 * page_length = bytes to copy for this page 864 */ 865 shmem_page_index = offset / PAGE_SIZE; 866 shmem_page_offset = offset & ~PAGE_MASK; 867 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 868 data_page_offset = data_ptr & ~PAGE_MASK; 869 870 page_length = remain; 871 if ((shmem_page_offset + page_length) > PAGE_SIZE) 872 page_length = PAGE_SIZE - shmem_page_offset; 873 if ((data_page_offset + page_length) > PAGE_SIZE) 874 page_length = PAGE_SIZE - data_page_offset; 875 876 if (do_bit17_swizzling) { 877 ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index], 878 shmem_page_offset, 879 user_pages[data_page_index], 880 data_page_offset, 881 page_length, 882 0); 883 } else { 884 ret = slow_shmem_copy(obj_priv->pages[shmem_page_index], 885 shmem_page_offset, 886 user_pages[data_page_index], 887 data_page_offset, 888 page_length); 889 } 890 if (ret) 891 goto fail_put_pages; 892 893 remain -= page_length; 894 data_ptr += page_length; 895 offset += page_length; 896 } 897 898 fail_put_pages: 899 i915_gem_object_put_pages(obj); 900 fail_unlock: 901 mutex_unlock(&dev->struct_mutex); 902 fail_put_user_pages: 903 for (i = 0; i < pinned_pages; i++) 904 page_cache_release(user_pages[i]); 905 kfree(user_pages); 906 907 return ret; 908 } 909 910 /** 911 * Writes data to the object referenced by handle. 912 * 913 * On error, the contents of the buffer that were to be modified are undefined. 914 */ 915 int 916 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 917 struct drm_file *file_priv) 918 { 919 struct drm_i915_gem_pwrite *args = data; 920 struct drm_gem_object *obj; 921 struct drm_i915_gem_object *obj_priv; 922 int ret = 0; 923 924 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 925 if (obj == NULL) 926 return -EBADF; 927 obj_priv = obj->driver_private; 928 929 /* Bounds check destination. 930 * 931 * XXX: This could use review for overflow issues... 932 */ 933 if (args->offset > obj->size || args->size > obj->size || 934 args->offset + args->size > obj->size) { 935 drm_gem_object_unreference(obj); 936 return -EINVAL; 937 } 938 939 /* We can only do the GTT pwrite on untiled buffers, as otherwise 940 * it would end up going through the fenced access, and we'll get 941 * different detiling behavior between reading and writing. 942 * pread/pwrite currently are reading and writing from the CPU 943 * perspective, requiring manual detiling by the client. 944 */ 945 if (obj_priv->phys_obj) 946 ret = i915_gem_phys_pwrite(dev, obj, args, file_priv); 947 else if (obj_priv->tiling_mode == I915_TILING_NONE && 948 dev->gtt_total != 0) { 949 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file_priv); 950 if (ret == -EFAULT) { 951 ret = i915_gem_gtt_pwrite_slow(dev, obj, args, 952 file_priv); 953 } 954 } else if (i915_gem_object_needs_bit17_swizzle(obj)) { 955 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv); 956 } else { 957 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv); 958 if (ret == -EFAULT) { 959 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, 960 file_priv); 961 } 962 } 963 964 #if WATCH_PWRITE 965 if (ret) 966 DRM_INFO("pwrite failed %d\n", ret); 967 #endif 968 969 drm_gem_object_unreference(obj); 970 971 return ret; 972 } 973 974 /** 975 * Called when user space prepares to use an object with the CPU, either 976 * through the mmap ioctl's mapping or a GTT mapping. 977 */ 978 int 979 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 980 struct drm_file *file_priv) 981 { 982 struct drm_i915_gem_set_domain *args = data; 983 struct drm_gem_object *obj; 984 uint32_t read_domains = args->read_domains; 985 uint32_t write_domain = args->write_domain; 986 int ret; 987 988 if (!(dev->driver->driver_features & DRIVER_GEM)) 989 return -ENODEV; 990 991 /* Only handle setting domains to types used by the CPU. */ 992 if (write_domain & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) 993 return -EINVAL; 994 995 if (read_domains & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) 996 return -EINVAL; 997 998 /* Having something in the write domain implies it's in the read 999 * domain, and only that read domain. Enforce that in the request. 1000 */ 1001 if (write_domain != 0 && read_domains != write_domain) 1002 return -EINVAL; 1003 1004 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1005 if (obj == NULL) 1006 return -EBADF; 1007 1008 mutex_lock(&dev->struct_mutex); 1009 #if WATCH_BUF 1010 DRM_INFO("set_domain_ioctl %p(%d), %08x %08x\n", 1011 obj, obj->size, read_domains, write_domain); 1012 #endif 1013 if (read_domains & I915_GEM_DOMAIN_GTT) { 1014 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1015 1016 /* Silently promote "you're not bound, there was nothing to do" 1017 * to success, since the client was just asking us to 1018 * make sure everything was done. 1019 */ 1020 if (ret == -EINVAL) 1021 ret = 0; 1022 } else { 1023 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1024 } 1025 1026 drm_gem_object_unreference(obj); 1027 mutex_unlock(&dev->struct_mutex); 1028 return ret; 1029 } 1030 1031 /** 1032 * Called when user space has done writes to this buffer 1033 */ 1034 int 1035 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1036 struct drm_file *file_priv) 1037 { 1038 struct drm_i915_gem_sw_finish *args = data; 1039 struct drm_gem_object *obj; 1040 struct drm_i915_gem_object *obj_priv; 1041 int ret = 0; 1042 1043 if (!(dev->driver->driver_features & DRIVER_GEM)) 1044 return -ENODEV; 1045 1046 mutex_lock(&dev->struct_mutex); 1047 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1048 if (obj == NULL) { 1049 mutex_unlock(&dev->struct_mutex); 1050 return -EBADF; 1051 } 1052 1053 #if WATCH_BUF 1054 DRM_INFO("%s: sw_finish %d (%p %d)\n", 1055 __func__, args->handle, obj, obj->size); 1056 #endif 1057 obj_priv = obj->driver_private; 1058 1059 /* Pinned buffers may be scanout, so flush the cache */ 1060 if (obj_priv->pin_count) 1061 i915_gem_object_flush_cpu_write_domain(obj); 1062 1063 drm_gem_object_unreference(obj); 1064 mutex_unlock(&dev->struct_mutex); 1065 return ret; 1066 } 1067 1068 /** 1069 * Maps the contents of an object, returning the address it is mapped 1070 * into. 1071 * 1072 * While the mapping holds a reference on the contents of the object, it doesn't 1073 * imply a ref on the object itself. 1074 */ 1075 int 1076 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1077 struct drm_file *file_priv) 1078 { 1079 struct drm_i915_gem_mmap *args = data; 1080 struct drm_gem_object *obj; 1081 loff_t offset; 1082 unsigned long addr; 1083 1084 if (!(dev->driver->driver_features & DRIVER_GEM)) 1085 return -ENODEV; 1086 1087 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1088 if (obj == NULL) 1089 return -EBADF; 1090 1091 offset = args->offset; 1092 1093 down_write(¤t->mm->mmap_sem); 1094 addr = do_mmap(obj->filp, 0, args->size, 1095 PROT_READ | PROT_WRITE, MAP_SHARED, 1096 args->offset); 1097 up_write(¤t->mm->mmap_sem); 1098 mutex_lock(&dev->struct_mutex); 1099 drm_gem_object_unreference(obj); 1100 mutex_unlock(&dev->struct_mutex); 1101 if (IS_ERR((void *)addr)) 1102 return addr; 1103 1104 args->addr_ptr = (uint64_t) addr; 1105 1106 return 0; 1107 } 1108 1109 /** 1110 * i915_gem_fault - fault a page into the GTT 1111 * vma: VMA in question 1112 * vmf: fault info 1113 * 1114 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1115 * from userspace. The fault handler takes care of binding the object to 1116 * the GTT (if needed), allocating and programming a fence register (again, 1117 * only if needed based on whether the old reg is still valid or the object 1118 * is tiled) and inserting a new PTE into the faulting process. 1119 * 1120 * Note that the faulting process may involve evicting existing objects 1121 * from the GTT and/or fence registers to make room. So performance may 1122 * suffer if the GTT working set is large or there are few fence registers 1123 * left. 1124 */ 1125 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1126 { 1127 struct drm_gem_object *obj = vma->vm_private_data; 1128 struct drm_device *dev = obj->dev; 1129 struct drm_i915_private *dev_priv = dev->dev_private; 1130 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1131 pgoff_t page_offset; 1132 unsigned long pfn; 1133 int ret = 0; 1134 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1135 1136 /* We don't use vmf->pgoff since that has the fake offset */ 1137 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1138 PAGE_SHIFT; 1139 1140 /* Now bind it into the GTT if needed */ 1141 mutex_lock(&dev->struct_mutex); 1142 if (!obj_priv->gtt_space) { 1143 ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment); 1144 if (ret) { 1145 mutex_unlock(&dev->struct_mutex); 1146 return VM_FAULT_SIGBUS; 1147 } 1148 list_add(&obj_priv->list, &dev_priv->mm.inactive_list); 1149 } 1150 1151 /* Need a new fence register? */ 1152 if (obj_priv->fence_reg == I915_FENCE_REG_NONE && 1153 obj_priv->tiling_mode != I915_TILING_NONE) { 1154 ret = i915_gem_object_get_fence_reg(obj, write); 1155 if (ret) { 1156 mutex_unlock(&dev->struct_mutex); 1157 return VM_FAULT_SIGBUS; 1158 } 1159 } 1160 1161 pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) + 1162 page_offset; 1163 1164 /* Finally, remap it using the new GTT offset */ 1165 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); 1166 1167 mutex_unlock(&dev->struct_mutex); 1168 1169 switch (ret) { 1170 case -ENOMEM: 1171 case -EAGAIN: 1172 return VM_FAULT_OOM; 1173 case -EFAULT: 1174 case -EINVAL: 1175 return VM_FAULT_SIGBUS; 1176 default: 1177 return VM_FAULT_NOPAGE; 1178 } 1179 } 1180 1181 /** 1182 * i915_gem_create_mmap_offset - create a fake mmap offset for an object 1183 * @obj: obj in question 1184 * 1185 * GEM memory mapping works by handing back to userspace a fake mmap offset 1186 * it can use in a subsequent mmap(2) call. The DRM core code then looks 1187 * up the object based on the offset and sets up the various memory mapping 1188 * structures. 1189 * 1190 * This routine allocates and attaches a fake offset for @obj. 1191 */ 1192 static int 1193 i915_gem_create_mmap_offset(struct drm_gem_object *obj) 1194 { 1195 struct drm_device *dev = obj->dev; 1196 struct drm_gem_mm *mm = dev->mm_private; 1197 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1198 struct drm_map_list *list; 1199 struct drm_local_map *map; 1200 int ret = 0; 1201 1202 /* Set the object up for mmap'ing */ 1203 list = &obj->map_list; 1204 list->map = drm_calloc(1, sizeof(struct drm_map_list), 1205 DRM_MEM_DRIVER); 1206 if (!list->map) 1207 return -ENOMEM; 1208 1209 map = list->map; 1210 map->type = _DRM_GEM; 1211 map->size = obj->size; 1212 map->handle = obj; 1213 1214 /* Get a DRM GEM mmap offset allocated... */ 1215 list->file_offset_node = drm_mm_search_free(&mm->offset_manager, 1216 obj->size / PAGE_SIZE, 0, 0); 1217 if (!list->file_offset_node) { 1218 DRM_ERROR("failed to allocate offset for bo %d\n", obj->name); 1219 ret = -ENOMEM; 1220 goto out_free_list; 1221 } 1222 1223 list->file_offset_node = drm_mm_get_block(list->file_offset_node, 1224 obj->size / PAGE_SIZE, 0); 1225 if (!list->file_offset_node) { 1226 ret = -ENOMEM; 1227 goto out_free_list; 1228 } 1229 1230 list->hash.key = list->file_offset_node->start; 1231 if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) { 1232 DRM_ERROR("failed to add to map hash\n"); 1233 goto out_free_mm; 1234 } 1235 1236 /* By now we should be all set, any drm_mmap request on the offset 1237 * below will get to our mmap & fault handler */ 1238 obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT; 1239 1240 return 0; 1241 1242 out_free_mm: 1243 drm_mm_put_block(list->file_offset_node); 1244 out_free_list: 1245 drm_free(list->map, sizeof(struct drm_map_list), DRM_MEM_DRIVER); 1246 1247 return ret; 1248 } 1249 1250 static void 1251 i915_gem_free_mmap_offset(struct drm_gem_object *obj) 1252 { 1253 struct drm_device *dev = obj->dev; 1254 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1255 struct drm_gem_mm *mm = dev->mm_private; 1256 struct drm_map_list *list; 1257 1258 list = &obj->map_list; 1259 drm_ht_remove_item(&mm->offset_hash, &list->hash); 1260 1261 if (list->file_offset_node) { 1262 drm_mm_put_block(list->file_offset_node); 1263 list->file_offset_node = NULL; 1264 } 1265 1266 if (list->map) { 1267 drm_free(list->map, sizeof(struct drm_map), DRM_MEM_DRIVER); 1268 list->map = NULL; 1269 } 1270 1271 obj_priv->mmap_offset = 0; 1272 } 1273 1274 /** 1275 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1276 * @obj: object to check 1277 * 1278 * Return the required GTT alignment for an object, taking into account 1279 * potential fence register mapping if needed. 1280 */ 1281 static uint32_t 1282 i915_gem_get_gtt_alignment(struct drm_gem_object *obj) 1283 { 1284 struct drm_device *dev = obj->dev; 1285 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1286 int start, i; 1287 1288 /* 1289 * Minimum alignment is 4k (GTT page size), but might be greater 1290 * if a fence register is needed for the object. 1291 */ 1292 if (IS_I965G(dev) || obj_priv->tiling_mode == I915_TILING_NONE) 1293 return 4096; 1294 1295 /* 1296 * Previous chips need to be aligned to the size of the smallest 1297 * fence register that can contain the object. 1298 */ 1299 if (IS_I9XX(dev)) 1300 start = 1024*1024; 1301 else 1302 start = 512*1024; 1303 1304 for (i = start; i < obj->size; i <<= 1) 1305 ; 1306 1307 return i; 1308 } 1309 1310 /** 1311 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1312 * @dev: DRM device 1313 * @data: GTT mapping ioctl data 1314 * @file_priv: GEM object info 1315 * 1316 * Simply returns the fake offset to userspace so it can mmap it. 1317 * The mmap call will end up in drm_gem_mmap(), which will set things 1318 * up so we can get faults in the handler above. 1319 * 1320 * The fault handler will take care of binding the object into the GTT 1321 * (since it may have been evicted to make room for something), allocating 1322 * a fence register, and mapping the appropriate aperture address into 1323 * userspace. 1324 */ 1325 int 1326 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1327 struct drm_file *file_priv) 1328 { 1329 struct drm_i915_gem_mmap_gtt *args = data; 1330 struct drm_i915_private *dev_priv = dev->dev_private; 1331 struct drm_gem_object *obj; 1332 struct drm_i915_gem_object *obj_priv; 1333 int ret; 1334 1335 if (!(dev->driver->driver_features & DRIVER_GEM)) 1336 return -ENODEV; 1337 1338 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1339 if (obj == NULL) 1340 return -EBADF; 1341 1342 mutex_lock(&dev->struct_mutex); 1343 1344 obj_priv = obj->driver_private; 1345 1346 if (!obj_priv->mmap_offset) { 1347 ret = i915_gem_create_mmap_offset(obj); 1348 if (ret) { 1349 drm_gem_object_unreference(obj); 1350 mutex_unlock(&dev->struct_mutex); 1351 return ret; 1352 } 1353 } 1354 1355 args->offset = obj_priv->mmap_offset; 1356 1357 obj_priv->gtt_alignment = i915_gem_get_gtt_alignment(obj); 1358 1359 /* Make sure the alignment is correct for fence regs etc */ 1360 if (obj_priv->agp_mem && 1361 (obj_priv->gtt_offset & (obj_priv->gtt_alignment - 1))) { 1362 drm_gem_object_unreference(obj); 1363 mutex_unlock(&dev->struct_mutex); 1364 return -EINVAL; 1365 } 1366 1367 /* 1368 * Pull it into the GTT so that we have a page list (makes the 1369 * initial fault faster and any subsequent flushing possible). 1370 */ 1371 if (!obj_priv->agp_mem) { 1372 ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment); 1373 if (ret) { 1374 drm_gem_object_unreference(obj); 1375 mutex_unlock(&dev->struct_mutex); 1376 return ret; 1377 } 1378 list_add(&obj_priv->list, &dev_priv->mm.inactive_list); 1379 } 1380 1381 drm_gem_object_unreference(obj); 1382 mutex_unlock(&dev->struct_mutex); 1383 1384 return 0; 1385 } 1386 1387 void 1388 i915_gem_object_put_pages(struct drm_gem_object *obj) 1389 { 1390 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1391 int page_count = obj->size / PAGE_SIZE; 1392 int i; 1393 1394 BUG_ON(obj_priv->pages_refcount == 0); 1395 1396 if (--obj_priv->pages_refcount != 0) 1397 return; 1398 1399 if (obj_priv->tiling_mode != I915_TILING_NONE) 1400 i915_gem_object_save_bit_17_swizzle(obj); 1401 1402 for (i = 0; i < page_count; i++) 1403 if (obj_priv->pages[i] != NULL) { 1404 if (obj_priv->dirty) 1405 set_page_dirty(obj_priv->pages[i]); 1406 mark_page_accessed(obj_priv->pages[i]); 1407 page_cache_release(obj_priv->pages[i]); 1408 } 1409 obj_priv->dirty = 0; 1410 1411 drm_free(obj_priv->pages, 1412 page_count * sizeof(struct page *), 1413 DRM_MEM_DRIVER); 1414 obj_priv->pages = NULL; 1415 } 1416 1417 static void 1418 i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno) 1419 { 1420 struct drm_device *dev = obj->dev; 1421 drm_i915_private_t *dev_priv = dev->dev_private; 1422 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1423 1424 /* Add a reference if we're newly entering the active list. */ 1425 if (!obj_priv->active) { 1426 drm_gem_object_reference(obj); 1427 obj_priv->active = 1; 1428 } 1429 /* Move from whatever list we were on to the tail of execution. */ 1430 spin_lock(&dev_priv->mm.active_list_lock); 1431 list_move_tail(&obj_priv->list, 1432 &dev_priv->mm.active_list); 1433 spin_unlock(&dev_priv->mm.active_list_lock); 1434 obj_priv->last_rendering_seqno = seqno; 1435 } 1436 1437 static void 1438 i915_gem_object_move_to_flushing(struct drm_gem_object *obj) 1439 { 1440 struct drm_device *dev = obj->dev; 1441 drm_i915_private_t *dev_priv = dev->dev_private; 1442 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1443 1444 BUG_ON(!obj_priv->active); 1445 list_move_tail(&obj_priv->list, &dev_priv->mm.flushing_list); 1446 obj_priv->last_rendering_seqno = 0; 1447 } 1448 1449 static void 1450 i915_gem_object_move_to_inactive(struct drm_gem_object *obj) 1451 { 1452 struct drm_device *dev = obj->dev; 1453 drm_i915_private_t *dev_priv = dev->dev_private; 1454 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1455 1456 i915_verify_inactive(dev, __FILE__, __LINE__); 1457 if (obj_priv->pin_count != 0) 1458 list_del_init(&obj_priv->list); 1459 else 1460 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list); 1461 1462 obj_priv->last_rendering_seqno = 0; 1463 if (obj_priv->active) { 1464 obj_priv->active = 0; 1465 drm_gem_object_unreference(obj); 1466 } 1467 i915_verify_inactive(dev, __FILE__, __LINE__); 1468 } 1469 1470 /** 1471 * Creates a new sequence number, emitting a write of it to the status page 1472 * plus an interrupt, which will trigger i915_user_interrupt_handler. 1473 * 1474 * Must be called with struct_lock held. 1475 * 1476 * Returned sequence numbers are nonzero on success. 1477 */ 1478 static uint32_t 1479 i915_add_request(struct drm_device *dev, uint32_t flush_domains) 1480 { 1481 drm_i915_private_t *dev_priv = dev->dev_private; 1482 struct drm_i915_gem_request *request; 1483 uint32_t seqno; 1484 int was_empty; 1485 RING_LOCALS; 1486 1487 request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER); 1488 if (request == NULL) 1489 return 0; 1490 1491 /* Grab the seqno we're going to make this request be, and bump the 1492 * next (skipping 0 so it can be the reserved no-seqno value). 1493 */ 1494 seqno = dev_priv->mm.next_gem_seqno; 1495 dev_priv->mm.next_gem_seqno++; 1496 if (dev_priv->mm.next_gem_seqno == 0) 1497 dev_priv->mm.next_gem_seqno++; 1498 1499 BEGIN_LP_RING(4); 1500 OUT_RING(MI_STORE_DWORD_INDEX); 1501 OUT_RING(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 1502 OUT_RING(seqno); 1503 1504 OUT_RING(MI_USER_INTERRUPT); 1505 ADVANCE_LP_RING(); 1506 1507 DRM_DEBUG("%d\n", seqno); 1508 1509 request->seqno = seqno; 1510 request->emitted_jiffies = jiffies; 1511 was_empty = list_empty(&dev_priv->mm.request_list); 1512 list_add_tail(&request->list, &dev_priv->mm.request_list); 1513 1514 /* Associate any objects on the flushing list matching the write 1515 * domain we're flushing with our flush. 1516 */ 1517 if (flush_domains != 0) { 1518 struct drm_i915_gem_object *obj_priv, *next; 1519 1520 list_for_each_entry_safe(obj_priv, next, 1521 &dev_priv->mm.flushing_list, list) { 1522 struct drm_gem_object *obj = obj_priv->obj; 1523 1524 if ((obj->write_domain & flush_domains) == 1525 obj->write_domain) { 1526 obj->write_domain = 0; 1527 i915_gem_object_move_to_active(obj, seqno); 1528 } 1529 } 1530 1531 } 1532 1533 if (was_empty && !dev_priv->mm.suspended) 1534 schedule_delayed_work(&dev_priv->mm.retire_work, HZ); 1535 return seqno; 1536 } 1537 1538 /** 1539 * Command execution barrier 1540 * 1541 * Ensures that all commands in the ring are finished 1542 * before signalling the CPU 1543 */ 1544 static uint32_t 1545 i915_retire_commands(struct drm_device *dev) 1546 { 1547 drm_i915_private_t *dev_priv = dev->dev_private; 1548 uint32_t cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 1549 uint32_t flush_domains = 0; 1550 RING_LOCALS; 1551 1552 /* The sampler always gets flushed on i965 (sigh) */ 1553 if (IS_I965G(dev)) 1554 flush_domains |= I915_GEM_DOMAIN_SAMPLER; 1555 BEGIN_LP_RING(2); 1556 OUT_RING(cmd); 1557 OUT_RING(0); /* noop */ 1558 ADVANCE_LP_RING(); 1559 return flush_domains; 1560 } 1561 1562 /** 1563 * Moves buffers associated only with the given active seqno from the active 1564 * to inactive list, potentially freeing them. 1565 */ 1566 static void 1567 i915_gem_retire_request(struct drm_device *dev, 1568 struct drm_i915_gem_request *request) 1569 { 1570 drm_i915_private_t *dev_priv = dev->dev_private; 1571 1572 /* Move any buffers on the active list that are no longer referenced 1573 * by the ringbuffer to the flushing/inactive lists as appropriate. 1574 */ 1575 spin_lock(&dev_priv->mm.active_list_lock); 1576 while (!list_empty(&dev_priv->mm.active_list)) { 1577 struct drm_gem_object *obj; 1578 struct drm_i915_gem_object *obj_priv; 1579 1580 obj_priv = list_first_entry(&dev_priv->mm.active_list, 1581 struct drm_i915_gem_object, 1582 list); 1583 obj = obj_priv->obj; 1584 1585 /* If the seqno being retired doesn't match the oldest in the 1586 * list, then the oldest in the list must still be newer than 1587 * this seqno. 1588 */ 1589 if (obj_priv->last_rendering_seqno != request->seqno) 1590 goto out; 1591 1592 #if WATCH_LRU 1593 DRM_INFO("%s: retire %d moves to inactive list %p\n", 1594 __func__, request->seqno, obj); 1595 #endif 1596 1597 if (obj->write_domain != 0) 1598 i915_gem_object_move_to_flushing(obj); 1599 else { 1600 /* Take a reference on the object so it won't be 1601 * freed while the spinlock is held. The list 1602 * protection for this spinlock is safe when breaking 1603 * the lock like this since the next thing we do 1604 * is just get the head of the list again. 1605 */ 1606 drm_gem_object_reference(obj); 1607 i915_gem_object_move_to_inactive(obj); 1608 spin_unlock(&dev_priv->mm.active_list_lock); 1609 drm_gem_object_unreference(obj); 1610 spin_lock(&dev_priv->mm.active_list_lock); 1611 } 1612 } 1613 out: 1614 spin_unlock(&dev_priv->mm.active_list_lock); 1615 } 1616 1617 /** 1618 * Returns true if seq1 is later than seq2. 1619 */ 1620 static int 1621 i915_seqno_passed(uint32_t seq1, uint32_t seq2) 1622 { 1623 return (int32_t)(seq1 - seq2) >= 0; 1624 } 1625 1626 uint32_t 1627 i915_get_gem_seqno(struct drm_device *dev) 1628 { 1629 drm_i915_private_t *dev_priv = dev->dev_private; 1630 1631 return READ_HWSP(dev_priv, I915_GEM_HWS_INDEX); 1632 } 1633 1634 /** 1635 * This function clears the request list as sequence numbers are passed. 1636 */ 1637 void 1638 i915_gem_retire_requests(struct drm_device *dev) 1639 { 1640 drm_i915_private_t *dev_priv = dev->dev_private; 1641 uint32_t seqno; 1642 1643 if (!dev_priv->hw_status_page) 1644 return; 1645 1646 seqno = i915_get_gem_seqno(dev); 1647 1648 while (!list_empty(&dev_priv->mm.request_list)) { 1649 struct drm_i915_gem_request *request; 1650 uint32_t retiring_seqno; 1651 1652 request = list_first_entry(&dev_priv->mm.request_list, 1653 struct drm_i915_gem_request, 1654 list); 1655 retiring_seqno = request->seqno; 1656 1657 if (i915_seqno_passed(seqno, retiring_seqno) || 1658 dev_priv->mm.wedged) { 1659 i915_gem_retire_request(dev, request); 1660 1661 list_del(&request->list); 1662 drm_free(request, sizeof(*request), DRM_MEM_DRIVER); 1663 } else 1664 break; 1665 } 1666 } 1667 1668 void 1669 i915_gem_retire_work_handler(struct work_struct *work) 1670 { 1671 drm_i915_private_t *dev_priv; 1672 struct drm_device *dev; 1673 1674 dev_priv = container_of(work, drm_i915_private_t, 1675 mm.retire_work.work); 1676 dev = dev_priv->dev; 1677 1678 mutex_lock(&dev->struct_mutex); 1679 i915_gem_retire_requests(dev); 1680 if (!dev_priv->mm.suspended && 1681 !list_empty(&dev_priv->mm.request_list)) 1682 schedule_delayed_work(&dev_priv->mm.retire_work, HZ); 1683 mutex_unlock(&dev->struct_mutex); 1684 } 1685 1686 /** 1687 * Waits for a sequence number to be signaled, and cleans up the 1688 * request and object lists appropriately for that event. 1689 */ 1690 static int 1691 i915_wait_request(struct drm_device *dev, uint32_t seqno) 1692 { 1693 drm_i915_private_t *dev_priv = dev->dev_private; 1694 int ret = 0; 1695 1696 BUG_ON(seqno == 0); 1697 1698 if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) { 1699 dev_priv->mm.waiting_gem_seqno = seqno; 1700 i915_user_irq_get(dev); 1701 ret = wait_event_interruptible(dev_priv->irq_queue, 1702 i915_seqno_passed(i915_get_gem_seqno(dev), 1703 seqno) || 1704 dev_priv->mm.wedged); 1705 i915_user_irq_put(dev); 1706 dev_priv->mm.waiting_gem_seqno = 0; 1707 } 1708 if (dev_priv->mm.wedged) 1709 ret = -EIO; 1710 1711 if (ret && ret != -ERESTARTSYS) 1712 DRM_ERROR("%s returns %d (awaiting %d at %d)\n", 1713 __func__, ret, seqno, i915_get_gem_seqno(dev)); 1714 1715 /* Directly dispatch request retiring. While we have the work queue 1716 * to handle this, the waiter on a request often wants an associated 1717 * buffer to have made it to the inactive list, and we would need 1718 * a separate wait queue to handle that. 1719 */ 1720 if (ret == 0) 1721 i915_gem_retire_requests(dev); 1722 1723 return ret; 1724 } 1725 1726 static void 1727 i915_gem_flush(struct drm_device *dev, 1728 uint32_t invalidate_domains, 1729 uint32_t flush_domains) 1730 { 1731 drm_i915_private_t *dev_priv = dev->dev_private; 1732 uint32_t cmd; 1733 RING_LOCALS; 1734 1735 #if WATCH_EXEC 1736 DRM_INFO("%s: invalidate %08x flush %08x\n", __func__, 1737 invalidate_domains, flush_domains); 1738 #endif 1739 1740 if (flush_domains & I915_GEM_DOMAIN_CPU) 1741 drm_agp_chipset_flush(dev); 1742 1743 if ((invalidate_domains | flush_domains) & ~(I915_GEM_DOMAIN_CPU | 1744 I915_GEM_DOMAIN_GTT)) { 1745 /* 1746 * read/write caches: 1747 * 1748 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 1749 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 1750 * also flushed at 2d versus 3d pipeline switches. 1751 * 1752 * read-only caches: 1753 * 1754 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 1755 * MI_READ_FLUSH is set, and is always flushed on 965. 1756 * 1757 * I915_GEM_DOMAIN_COMMAND may not exist? 1758 * 1759 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 1760 * invalidated when MI_EXE_FLUSH is set. 1761 * 1762 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 1763 * invalidated with every MI_FLUSH. 1764 * 1765 * TLBs: 1766 * 1767 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 1768 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 1769 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 1770 * are flushed at any MI_FLUSH. 1771 */ 1772 1773 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 1774 if ((invalidate_domains|flush_domains) & 1775 I915_GEM_DOMAIN_RENDER) 1776 cmd &= ~MI_NO_WRITE_FLUSH; 1777 if (!IS_I965G(dev)) { 1778 /* 1779 * On the 965, the sampler cache always gets flushed 1780 * and this bit is reserved. 1781 */ 1782 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) 1783 cmd |= MI_READ_FLUSH; 1784 } 1785 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) 1786 cmd |= MI_EXE_FLUSH; 1787 1788 #if WATCH_EXEC 1789 DRM_INFO("%s: queue flush %08x to ring\n", __func__, cmd); 1790 #endif 1791 BEGIN_LP_RING(2); 1792 OUT_RING(cmd); 1793 OUT_RING(0); /* noop */ 1794 ADVANCE_LP_RING(); 1795 } 1796 } 1797 1798 /** 1799 * Ensures that all rendering to the object has completed and the object is 1800 * safe to unbind from the GTT or access from the CPU. 1801 */ 1802 static int 1803 i915_gem_object_wait_rendering(struct drm_gem_object *obj) 1804 { 1805 struct drm_device *dev = obj->dev; 1806 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1807 int ret; 1808 1809 /* This function only exists to support waiting for existing rendering, 1810 * not for emitting required flushes. 1811 */ 1812 BUG_ON((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0); 1813 1814 /* If there is rendering queued on the buffer being evicted, wait for 1815 * it. 1816 */ 1817 if (obj_priv->active) { 1818 #if WATCH_BUF 1819 DRM_INFO("%s: object %p wait for seqno %08x\n", 1820 __func__, obj, obj_priv->last_rendering_seqno); 1821 #endif 1822 ret = i915_wait_request(dev, obj_priv->last_rendering_seqno); 1823 if (ret != 0) 1824 return ret; 1825 } 1826 1827 return 0; 1828 } 1829 1830 /** 1831 * Unbinds an object from the GTT aperture. 1832 */ 1833 int 1834 i915_gem_object_unbind(struct drm_gem_object *obj) 1835 { 1836 struct drm_device *dev = obj->dev; 1837 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1838 loff_t offset; 1839 int ret = 0; 1840 1841 #if WATCH_BUF 1842 DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj); 1843 DRM_INFO("gtt_space %p\n", obj_priv->gtt_space); 1844 #endif 1845 if (obj_priv->gtt_space == NULL) 1846 return 0; 1847 1848 if (obj_priv->pin_count != 0) { 1849 DRM_ERROR("Attempting to unbind pinned buffer\n"); 1850 return -EINVAL; 1851 } 1852 1853 /* Move the object to the CPU domain to ensure that 1854 * any possible CPU writes while it's not in the GTT 1855 * are flushed when we go to remap it. This will 1856 * also ensure that all pending GPU writes are finished 1857 * before we unbind. 1858 */ 1859 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 1860 if (ret) { 1861 if (ret != -ERESTARTSYS) 1862 DRM_ERROR("set_domain failed: %d\n", ret); 1863 return ret; 1864 } 1865 1866 if (obj_priv->agp_mem != NULL) { 1867 drm_unbind_agp(obj_priv->agp_mem); 1868 drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE); 1869 obj_priv->agp_mem = NULL; 1870 } 1871 1872 BUG_ON(obj_priv->active); 1873 1874 /* blow away mappings if mapped through GTT */ 1875 offset = ((loff_t) obj->map_list.hash.key) << PAGE_SHIFT; 1876 if (dev->dev_mapping) 1877 unmap_mapping_range(dev->dev_mapping, offset, obj->size, 1); 1878 1879 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) 1880 i915_gem_clear_fence_reg(obj); 1881 1882 i915_gem_object_put_pages(obj); 1883 1884 if (obj_priv->gtt_space) { 1885 atomic_dec(&dev->gtt_count); 1886 atomic_sub(obj->size, &dev->gtt_memory); 1887 1888 drm_mm_put_block(obj_priv->gtt_space); 1889 obj_priv->gtt_space = NULL; 1890 } 1891 1892 /* Remove ourselves from the LRU list if present. */ 1893 if (!list_empty(&obj_priv->list)) 1894 list_del_init(&obj_priv->list); 1895 1896 return 0; 1897 } 1898 1899 static int 1900 i915_gem_evict_something(struct drm_device *dev) 1901 { 1902 drm_i915_private_t *dev_priv = dev->dev_private; 1903 struct drm_gem_object *obj; 1904 struct drm_i915_gem_object *obj_priv; 1905 int ret = 0; 1906 1907 for (;;) { 1908 /* If there's an inactive buffer available now, grab it 1909 * and be done. 1910 */ 1911 if (!list_empty(&dev_priv->mm.inactive_list)) { 1912 obj_priv = list_first_entry(&dev_priv->mm.inactive_list, 1913 struct drm_i915_gem_object, 1914 list); 1915 obj = obj_priv->obj; 1916 BUG_ON(obj_priv->pin_count != 0); 1917 #if WATCH_LRU 1918 DRM_INFO("%s: evicting %p\n", __func__, obj); 1919 #endif 1920 BUG_ON(obj_priv->active); 1921 1922 /* Wait on the rendering and unbind the buffer. */ 1923 ret = i915_gem_object_unbind(obj); 1924 break; 1925 } 1926 1927 /* If we didn't get anything, but the ring is still processing 1928 * things, wait for one of those things to finish and hopefully 1929 * leave us a buffer to evict. 1930 */ 1931 if (!list_empty(&dev_priv->mm.request_list)) { 1932 struct drm_i915_gem_request *request; 1933 1934 request = list_first_entry(&dev_priv->mm.request_list, 1935 struct drm_i915_gem_request, 1936 list); 1937 1938 ret = i915_wait_request(dev, request->seqno); 1939 if (ret) 1940 break; 1941 1942 /* if waiting caused an object to become inactive, 1943 * then loop around and wait for it. Otherwise, we 1944 * assume that waiting freed and unbound something, 1945 * so there should now be some space in the GTT 1946 */ 1947 if (!list_empty(&dev_priv->mm.inactive_list)) 1948 continue; 1949 break; 1950 } 1951 1952 /* If we didn't have anything on the request list but there 1953 * are buffers awaiting a flush, emit one and try again. 1954 * When we wait on it, those buffers waiting for that flush 1955 * will get moved to inactive. 1956 */ 1957 if (!list_empty(&dev_priv->mm.flushing_list)) { 1958 obj_priv = list_first_entry(&dev_priv->mm.flushing_list, 1959 struct drm_i915_gem_object, 1960 list); 1961 obj = obj_priv->obj; 1962 1963 i915_gem_flush(dev, 1964 obj->write_domain, 1965 obj->write_domain); 1966 i915_add_request(dev, obj->write_domain); 1967 1968 obj = NULL; 1969 continue; 1970 } 1971 1972 DRM_ERROR("inactive empty %d request empty %d " 1973 "flushing empty %d\n", 1974 list_empty(&dev_priv->mm.inactive_list), 1975 list_empty(&dev_priv->mm.request_list), 1976 list_empty(&dev_priv->mm.flushing_list)); 1977 /* If we didn't do any of the above, there's nothing to be done 1978 * and we just can't fit it in. 1979 */ 1980 return -ENOMEM; 1981 } 1982 return ret; 1983 } 1984 1985 static int 1986 i915_gem_evict_everything(struct drm_device *dev) 1987 { 1988 int ret; 1989 1990 for (;;) { 1991 ret = i915_gem_evict_something(dev); 1992 if (ret != 0) 1993 break; 1994 } 1995 if (ret == -ENOMEM) 1996 return 0; 1997 return ret; 1998 } 1999 2000 int 2001 i915_gem_object_get_pages(struct drm_gem_object *obj) 2002 { 2003 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2004 int page_count, i; 2005 struct address_space *mapping; 2006 struct inode *inode; 2007 struct page *page; 2008 int ret; 2009 2010 if (obj_priv->pages_refcount++ != 0) 2011 return 0; 2012 2013 /* Get the list of pages out of our struct file. They'll be pinned 2014 * at this point until we release them. 2015 */ 2016 page_count = obj->size / PAGE_SIZE; 2017 BUG_ON(obj_priv->pages != NULL); 2018 obj_priv->pages = drm_calloc(page_count, sizeof(struct page *), 2019 DRM_MEM_DRIVER); 2020 if (obj_priv->pages == NULL) { 2021 DRM_ERROR("Faled to allocate page list\n"); 2022 obj_priv->pages_refcount--; 2023 return -ENOMEM; 2024 } 2025 2026 inode = obj->filp->f_path.dentry->d_inode; 2027 mapping = inode->i_mapping; 2028 for (i = 0; i < page_count; i++) { 2029 page = read_mapping_page(mapping, i, NULL); 2030 if (IS_ERR(page)) { 2031 ret = PTR_ERR(page); 2032 DRM_ERROR("read_mapping_page failed: %d\n", ret); 2033 i915_gem_object_put_pages(obj); 2034 return ret; 2035 } 2036 obj_priv->pages[i] = page; 2037 } 2038 2039 if (obj_priv->tiling_mode != I915_TILING_NONE) 2040 i915_gem_object_do_bit_17_swizzle(obj); 2041 2042 return 0; 2043 } 2044 2045 static void i965_write_fence_reg(struct drm_i915_fence_reg *reg) 2046 { 2047 struct drm_gem_object *obj = reg->obj; 2048 struct drm_device *dev = obj->dev; 2049 drm_i915_private_t *dev_priv = dev->dev_private; 2050 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2051 int regnum = obj_priv->fence_reg; 2052 uint64_t val; 2053 2054 val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) & 2055 0xfffff000) << 32; 2056 val |= obj_priv->gtt_offset & 0xfffff000; 2057 val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT; 2058 if (obj_priv->tiling_mode == I915_TILING_Y) 2059 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2060 val |= I965_FENCE_REG_VALID; 2061 2062 I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val); 2063 } 2064 2065 static void i915_write_fence_reg(struct drm_i915_fence_reg *reg) 2066 { 2067 struct drm_gem_object *obj = reg->obj; 2068 struct drm_device *dev = obj->dev; 2069 drm_i915_private_t *dev_priv = dev->dev_private; 2070 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2071 int regnum = obj_priv->fence_reg; 2072 int tile_width; 2073 uint32_t fence_reg, val; 2074 uint32_t pitch_val; 2075 2076 if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) || 2077 (obj_priv->gtt_offset & (obj->size - 1))) { 2078 WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n", 2079 __func__, obj_priv->gtt_offset, obj->size); 2080 return; 2081 } 2082 2083 if (obj_priv->tiling_mode == I915_TILING_Y && 2084 HAS_128_BYTE_Y_TILING(dev)) 2085 tile_width = 128; 2086 else 2087 tile_width = 512; 2088 2089 /* Note: pitch better be a power of two tile widths */ 2090 pitch_val = obj_priv->stride / tile_width; 2091 pitch_val = ffs(pitch_val) - 1; 2092 2093 val = obj_priv->gtt_offset; 2094 if (obj_priv->tiling_mode == I915_TILING_Y) 2095 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2096 val |= I915_FENCE_SIZE_BITS(obj->size); 2097 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2098 val |= I830_FENCE_REG_VALID; 2099 2100 if (regnum < 8) 2101 fence_reg = FENCE_REG_830_0 + (regnum * 4); 2102 else 2103 fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4); 2104 I915_WRITE(fence_reg, val); 2105 } 2106 2107 static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) 2108 { 2109 struct drm_gem_object *obj = reg->obj; 2110 struct drm_device *dev = obj->dev; 2111 drm_i915_private_t *dev_priv = dev->dev_private; 2112 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2113 int regnum = obj_priv->fence_reg; 2114 uint32_t val; 2115 uint32_t pitch_val; 2116 uint32_t fence_size_bits; 2117 2118 if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) || 2119 (obj_priv->gtt_offset & (obj->size - 1))) { 2120 WARN(1, "%s: object 0x%08x not 512K or size aligned\n", 2121 __func__, obj_priv->gtt_offset); 2122 return; 2123 } 2124 2125 pitch_val = (obj_priv->stride / 128) - 1; 2126 WARN_ON(pitch_val & ~0x0000000f); 2127 val = obj_priv->gtt_offset; 2128 if (obj_priv->tiling_mode == I915_TILING_Y) 2129 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2130 fence_size_bits = I830_FENCE_SIZE_BITS(obj->size); 2131 WARN_ON(fence_size_bits & ~0x00000f00); 2132 val |= fence_size_bits; 2133 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2134 val |= I830_FENCE_REG_VALID; 2135 2136 I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val); 2137 2138 } 2139 2140 /** 2141 * i915_gem_object_get_fence_reg - set up a fence reg for an object 2142 * @obj: object to map through a fence reg 2143 * @write: object is about to be written 2144 * 2145 * When mapping objects through the GTT, userspace wants to be able to write 2146 * to them without having to worry about swizzling if the object is tiled. 2147 * 2148 * This function walks the fence regs looking for a free one for @obj, 2149 * stealing one if it can't find any. 2150 * 2151 * It then sets up the reg based on the object's properties: address, pitch 2152 * and tiling format. 2153 */ 2154 static int 2155 i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write) 2156 { 2157 struct drm_device *dev = obj->dev; 2158 struct drm_i915_private *dev_priv = dev->dev_private; 2159 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2160 struct drm_i915_fence_reg *reg = NULL; 2161 struct drm_i915_gem_object *old_obj_priv = NULL; 2162 int i, ret, avail; 2163 2164 switch (obj_priv->tiling_mode) { 2165 case I915_TILING_NONE: 2166 WARN(1, "allocating a fence for non-tiled object?\n"); 2167 break; 2168 case I915_TILING_X: 2169 if (!obj_priv->stride) 2170 return -EINVAL; 2171 WARN((obj_priv->stride & (512 - 1)), 2172 "object 0x%08x is X tiled but has non-512B pitch\n", 2173 obj_priv->gtt_offset); 2174 break; 2175 case I915_TILING_Y: 2176 if (!obj_priv->stride) 2177 return -EINVAL; 2178 WARN((obj_priv->stride & (128 - 1)), 2179 "object 0x%08x is Y tiled but has non-128B pitch\n", 2180 obj_priv->gtt_offset); 2181 break; 2182 } 2183 2184 /* First try to find a free reg */ 2185 try_again: 2186 avail = 0; 2187 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 2188 reg = &dev_priv->fence_regs[i]; 2189 if (!reg->obj) 2190 break; 2191 2192 old_obj_priv = reg->obj->driver_private; 2193 if (!old_obj_priv->pin_count) 2194 avail++; 2195 } 2196 2197 /* None available, try to steal one or wait for a user to finish */ 2198 if (i == dev_priv->num_fence_regs) { 2199 uint32_t seqno = dev_priv->mm.next_gem_seqno; 2200 loff_t offset; 2201 2202 if (avail == 0) 2203 return -ENOMEM; 2204 2205 for (i = dev_priv->fence_reg_start; 2206 i < dev_priv->num_fence_regs; i++) { 2207 uint32_t this_seqno; 2208 2209 reg = &dev_priv->fence_regs[i]; 2210 old_obj_priv = reg->obj->driver_private; 2211 2212 if (old_obj_priv->pin_count) 2213 continue; 2214 2215 /* i915 uses fences for GPU access to tiled buffers */ 2216 if (IS_I965G(dev) || !old_obj_priv->active) 2217 break; 2218 2219 /* find the seqno of the first available fence */ 2220 this_seqno = old_obj_priv->last_rendering_seqno; 2221 if (this_seqno != 0 && 2222 reg->obj->write_domain == 0 && 2223 i915_seqno_passed(seqno, this_seqno)) 2224 seqno = this_seqno; 2225 } 2226 2227 /* 2228 * Now things get ugly... we have to wait for one of the 2229 * objects to finish before trying again. 2230 */ 2231 if (i == dev_priv->num_fence_regs) { 2232 if (seqno == dev_priv->mm.next_gem_seqno) { 2233 i915_gem_flush(dev, 2234 I915_GEM_GPU_DOMAINS, 2235 I915_GEM_GPU_DOMAINS); 2236 seqno = i915_add_request(dev, 2237 I915_GEM_GPU_DOMAINS); 2238 if (seqno == 0) 2239 return -ENOMEM; 2240 } 2241 2242 ret = i915_wait_request(dev, seqno); 2243 if (ret) 2244 return ret; 2245 goto try_again; 2246 } 2247 2248 BUG_ON(old_obj_priv->active || 2249 (reg->obj->write_domain & I915_GEM_GPU_DOMAINS)); 2250 2251 /* 2252 * Zap this virtual mapping so we can set up a fence again 2253 * for this object next time we need it. 2254 */ 2255 offset = ((loff_t) reg->obj->map_list.hash.key) << PAGE_SHIFT; 2256 if (dev->dev_mapping) 2257 unmap_mapping_range(dev->dev_mapping, offset, 2258 reg->obj->size, 1); 2259 old_obj_priv->fence_reg = I915_FENCE_REG_NONE; 2260 } 2261 2262 obj_priv->fence_reg = i; 2263 reg->obj = obj; 2264 2265 if (IS_I965G(dev)) 2266 i965_write_fence_reg(reg); 2267 else if (IS_I9XX(dev)) 2268 i915_write_fence_reg(reg); 2269 else 2270 i830_write_fence_reg(reg); 2271 2272 return 0; 2273 } 2274 2275 /** 2276 * i915_gem_clear_fence_reg - clear out fence register info 2277 * @obj: object to clear 2278 * 2279 * Zeroes out the fence register itself and clears out the associated 2280 * data structures in dev_priv and obj_priv. 2281 */ 2282 static void 2283 i915_gem_clear_fence_reg(struct drm_gem_object *obj) 2284 { 2285 struct drm_device *dev = obj->dev; 2286 drm_i915_private_t *dev_priv = dev->dev_private; 2287 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2288 2289 if (IS_I965G(dev)) 2290 I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0); 2291 else { 2292 uint32_t fence_reg; 2293 2294 if (obj_priv->fence_reg < 8) 2295 fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4; 2296 else 2297 fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg - 2298 8) * 4; 2299 2300 I915_WRITE(fence_reg, 0); 2301 } 2302 2303 dev_priv->fence_regs[obj_priv->fence_reg].obj = NULL; 2304 obj_priv->fence_reg = I915_FENCE_REG_NONE; 2305 } 2306 2307 /** 2308 * Finds free space in the GTT aperture and binds the object there. 2309 */ 2310 static int 2311 i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) 2312 { 2313 struct drm_device *dev = obj->dev; 2314 drm_i915_private_t *dev_priv = dev->dev_private; 2315 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2316 struct drm_mm_node *free_space; 2317 int page_count, ret; 2318 2319 if (dev_priv->mm.suspended) 2320 return -EBUSY; 2321 if (alignment == 0) 2322 alignment = i915_gem_get_gtt_alignment(obj); 2323 if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) { 2324 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 2325 return -EINVAL; 2326 } 2327 2328 search_free: 2329 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space, 2330 obj->size, alignment, 0); 2331 if (free_space != NULL) { 2332 obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size, 2333 alignment); 2334 if (obj_priv->gtt_space != NULL) { 2335 obj_priv->gtt_space->private = obj; 2336 obj_priv->gtt_offset = obj_priv->gtt_space->start; 2337 } 2338 } 2339 if (obj_priv->gtt_space == NULL) { 2340 bool lists_empty; 2341 2342 /* If the gtt is empty and we're still having trouble 2343 * fitting our object in, we're out of memory. 2344 */ 2345 #if WATCH_LRU 2346 DRM_INFO("%s: GTT full, evicting something\n", __func__); 2347 #endif 2348 spin_lock(&dev_priv->mm.active_list_lock); 2349 lists_empty = (list_empty(&dev_priv->mm.inactive_list) && 2350 list_empty(&dev_priv->mm.flushing_list) && 2351 list_empty(&dev_priv->mm.active_list)); 2352 spin_unlock(&dev_priv->mm.active_list_lock); 2353 if (lists_empty) { 2354 DRM_ERROR("GTT full, but LRU list empty\n"); 2355 return -ENOMEM; 2356 } 2357 2358 ret = i915_gem_evict_something(dev); 2359 if (ret != 0) { 2360 if (ret != -ERESTARTSYS) 2361 DRM_ERROR("Failed to evict a buffer %d\n", ret); 2362 return ret; 2363 } 2364 goto search_free; 2365 } 2366 2367 #if WATCH_BUF 2368 DRM_INFO("Binding object of size %d at 0x%08x\n", 2369 obj->size, obj_priv->gtt_offset); 2370 #endif 2371 ret = i915_gem_object_get_pages(obj); 2372 if (ret) { 2373 drm_mm_put_block(obj_priv->gtt_space); 2374 obj_priv->gtt_space = NULL; 2375 return ret; 2376 } 2377 2378 page_count = obj->size / PAGE_SIZE; 2379 /* Create an AGP memory structure pointing at our pages, and bind it 2380 * into the GTT. 2381 */ 2382 obj_priv->agp_mem = drm_agp_bind_pages(dev, 2383 obj_priv->pages, 2384 page_count, 2385 obj_priv->gtt_offset, 2386 obj_priv->agp_type); 2387 if (obj_priv->agp_mem == NULL) { 2388 i915_gem_object_put_pages(obj); 2389 drm_mm_put_block(obj_priv->gtt_space); 2390 obj_priv->gtt_space = NULL; 2391 return -ENOMEM; 2392 } 2393 atomic_inc(&dev->gtt_count); 2394 atomic_add(obj->size, &dev->gtt_memory); 2395 2396 /* Assert that the object is not currently in any GPU domain. As it 2397 * wasn't in the GTT, there shouldn't be any way it could have been in 2398 * a GPU cache 2399 */ 2400 BUG_ON(obj->read_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); 2401 BUG_ON(obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); 2402 2403 return 0; 2404 } 2405 2406 void 2407 i915_gem_clflush_object(struct drm_gem_object *obj) 2408 { 2409 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2410 2411 /* If we don't have a page list set up, then we're not pinned 2412 * to GPU, and we can ignore the cache flush because it'll happen 2413 * again at bind time. 2414 */ 2415 if (obj_priv->pages == NULL) 2416 return; 2417 2418 drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE); 2419 } 2420 2421 /** Flushes any GPU write domain for the object if it's dirty. */ 2422 static void 2423 i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj) 2424 { 2425 struct drm_device *dev = obj->dev; 2426 uint32_t seqno; 2427 2428 if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) 2429 return; 2430 2431 /* Queue the GPU write cache flushing we need. */ 2432 i915_gem_flush(dev, 0, obj->write_domain); 2433 seqno = i915_add_request(dev, obj->write_domain); 2434 obj->write_domain = 0; 2435 i915_gem_object_move_to_active(obj, seqno); 2436 } 2437 2438 /** Flushes the GTT write domain for the object if it's dirty. */ 2439 static void 2440 i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj) 2441 { 2442 if (obj->write_domain != I915_GEM_DOMAIN_GTT) 2443 return; 2444 2445 /* No actual flushing is required for the GTT write domain. Writes 2446 * to it immediately go to main memory as far as we know, so there's 2447 * no chipset flush. It also doesn't land in render cache. 2448 */ 2449 obj->write_domain = 0; 2450 } 2451 2452 /** Flushes the CPU write domain for the object if it's dirty. */ 2453 static void 2454 i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj) 2455 { 2456 struct drm_device *dev = obj->dev; 2457 2458 if (obj->write_domain != I915_GEM_DOMAIN_CPU) 2459 return; 2460 2461 i915_gem_clflush_object(obj); 2462 drm_agp_chipset_flush(dev); 2463 obj->write_domain = 0; 2464 } 2465 2466 /** 2467 * Moves a single object to the GTT read, and possibly write domain. 2468 * 2469 * This function returns when the move is complete, including waiting on 2470 * flushes to occur. 2471 */ 2472 int 2473 i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) 2474 { 2475 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2476 int ret; 2477 2478 /* Not valid to be called on unbound objects. */ 2479 if (obj_priv->gtt_space == NULL) 2480 return -EINVAL; 2481 2482 i915_gem_object_flush_gpu_write_domain(obj); 2483 /* Wait on any GPU rendering and flushing to occur. */ 2484 ret = i915_gem_object_wait_rendering(obj); 2485 if (ret != 0) 2486 return ret; 2487 2488 /* If we're writing through the GTT domain, then CPU and GPU caches 2489 * will need to be invalidated at next use. 2490 */ 2491 if (write) 2492 obj->read_domains &= I915_GEM_DOMAIN_GTT; 2493 2494 i915_gem_object_flush_cpu_write_domain(obj); 2495 2496 /* It should now be out of any other write domains, and we can update 2497 * the domain values for our changes. 2498 */ 2499 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 2500 obj->read_domains |= I915_GEM_DOMAIN_GTT; 2501 if (write) { 2502 obj->write_domain = I915_GEM_DOMAIN_GTT; 2503 obj_priv->dirty = 1; 2504 } 2505 2506 return 0; 2507 } 2508 2509 /** 2510 * Moves a single object to the CPU read, and possibly write domain. 2511 * 2512 * This function returns when the move is complete, including waiting on 2513 * flushes to occur. 2514 */ 2515 static int 2516 i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) 2517 { 2518 int ret; 2519 2520 i915_gem_object_flush_gpu_write_domain(obj); 2521 /* Wait on any GPU rendering and flushing to occur. */ 2522 ret = i915_gem_object_wait_rendering(obj); 2523 if (ret != 0) 2524 return ret; 2525 2526 i915_gem_object_flush_gtt_write_domain(obj); 2527 2528 /* If we have a partially-valid cache of the object in the CPU, 2529 * finish invalidating it and free the per-page flags. 2530 */ 2531 i915_gem_object_set_to_full_cpu_read_domain(obj); 2532 2533 /* Flush the CPU cache if it's still invalid. */ 2534 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 2535 i915_gem_clflush_object(obj); 2536 2537 obj->read_domains |= I915_GEM_DOMAIN_CPU; 2538 } 2539 2540 /* It should now be out of any other write domains, and we can update 2541 * the domain values for our changes. 2542 */ 2543 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 2544 2545 /* If we're writing through the CPU, then the GPU read domains will 2546 * need to be invalidated at next use. 2547 */ 2548 if (write) { 2549 obj->read_domains &= I915_GEM_DOMAIN_CPU; 2550 obj->write_domain = I915_GEM_DOMAIN_CPU; 2551 } 2552 2553 return 0; 2554 } 2555 2556 /* 2557 * Set the next domain for the specified object. This 2558 * may not actually perform the necessary flushing/invaliding though, 2559 * as that may want to be batched with other set_domain operations 2560 * 2561 * This is (we hope) the only really tricky part of gem. The goal 2562 * is fairly simple -- track which caches hold bits of the object 2563 * and make sure they remain coherent. A few concrete examples may 2564 * help to explain how it works. For shorthand, we use the notation 2565 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the 2566 * a pair of read and write domain masks. 2567 * 2568 * Case 1: the batch buffer 2569 * 2570 * 1. Allocated 2571 * 2. Written by CPU 2572 * 3. Mapped to GTT 2573 * 4. Read by GPU 2574 * 5. Unmapped from GTT 2575 * 6. Freed 2576 * 2577 * Let's take these a step at a time 2578 * 2579 * 1. Allocated 2580 * Pages allocated from the kernel may still have 2581 * cache contents, so we set them to (CPU, CPU) always. 2582 * 2. Written by CPU (using pwrite) 2583 * The pwrite function calls set_domain (CPU, CPU) and 2584 * this function does nothing (as nothing changes) 2585 * 3. Mapped by GTT 2586 * This function asserts that the object is not 2587 * currently in any GPU-based read or write domains 2588 * 4. Read by GPU 2589 * i915_gem_execbuffer calls set_domain (COMMAND, 0). 2590 * As write_domain is zero, this function adds in the 2591 * current read domains (CPU+COMMAND, 0). 2592 * flush_domains is set to CPU. 2593 * invalidate_domains is set to COMMAND 2594 * clflush is run to get data out of the CPU caches 2595 * then i915_dev_set_domain calls i915_gem_flush to 2596 * emit an MI_FLUSH and drm_agp_chipset_flush 2597 * 5. Unmapped from GTT 2598 * i915_gem_object_unbind calls set_domain (CPU, CPU) 2599 * flush_domains and invalidate_domains end up both zero 2600 * so no flushing/invalidating happens 2601 * 6. Freed 2602 * yay, done 2603 * 2604 * Case 2: The shared render buffer 2605 * 2606 * 1. Allocated 2607 * 2. Mapped to GTT 2608 * 3. Read/written by GPU 2609 * 4. set_domain to (CPU,CPU) 2610 * 5. Read/written by CPU 2611 * 6. Read/written by GPU 2612 * 2613 * 1. Allocated 2614 * Same as last example, (CPU, CPU) 2615 * 2. Mapped to GTT 2616 * Nothing changes (assertions find that it is not in the GPU) 2617 * 3. Read/written by GPU 2618 * execbuffer calls set_domain (RENDER, RENDER) 2619 * flush_domains gets CPU 2620 * invalidate_domains gets GPU 2621 * clflush (obj) 2622 * MI_FLUSH and drm_agp_chipset_flush 2623 * 4. set_domain (CPU, CPU) 2624 * flush_domains gets GPU 2625 * invalidate_domains gets CPU 2626 * wait_rendering (obj) to make sure all drawing is complete. 2627 * This will include an MI_FLUSH to get the data from GPU 2628 * to memory 2629 * clflush (obj) to invalidate the CPU cache 2630 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) 2631 * 5. Read/written by CPU 2632 * cache lines are loaded and dirtied 2633 * 6. Read written by GPU 2634 * Same as last GPU access 2635 * 2636 * Case 3: The constant buffer 2637 * 2638 * 1. Allocated 2639 * 2. Written by CPU 2640 * 3. Read by GPU 2641 * 4. Updated (written) by CPU again 2642 * 5. Read by GPU 2643 * 2644 * 1. Allocated 2645 * (CPU, CPU) 2646 * 2. Written by CPU 2647 * (CPU, CPU) 2648 * 3. Read by GPU 2649 * (CPU+RENDER, 0) 2650 * flush_domains = CPU 2651 * invalidate_domains = RENDER 2652 * clflush (obj) 2653 * MI_FLUSH 2654 * drm_agp_chipset_flush 2655 * 4. Updated (written) by CPU again 2656 * (CPU, CPU) 2657 * flush_domains = 0 (no previous write domain) 2658 * invalidate_domains = 0 (no new read domains) 2659 * 5. Read by GPU 2660 * (CPU+RENDER, 0) 2661 * flush_domains = CPU 2662 * invalidate_domains = RENDER 2663 * clflush (obj) 2664 * MI_FLUSH 2665 * drm_agp_chipset_flush 2666 */ 2667 static void 2668 i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj) 2669 { 2670 struct drm_device *dev = obj->dev; 2671 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2672 uint32_t invalidate_domains = 0; 2673 uint32_t flush_domains = 0; 2674 2675 BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU); 2676 BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU); 2677 2678 #if WATCH_BUF 2679 DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n", 2680 __func__, obj, 2681 obj->read_domains, obj->pending_read_domains, 2682 obj->write_domain, obj->pending_write_domain); 2683 #endif 2684 /* 2685 * If the object isn't moving to a new write domain, 2686 * let the object stay in multiple read domains 2687 */ 2688 if (obj->pending_write_domain == 0) 2689 obj->pending_read_domains |= obj->read_domains; 2690 else 2691 obj_priv->dirty = 1; 2692 2693 /* 2694 * Flush the current write domain if 2695 * the new read domains don't match. Invalidate 2696 * any read domains which differ from the old 2697 * write domain 2698 */ 2699 if (obj->write_domain && 2700 obj->write_domain != obj->pending_read_domains) { 2701 flush_domains |= obj->write_domain; 2702 invalidate_domains |= 2703 obj->pending_read_domains & ~obj->write_domain; 2704 } 2705 /* 2706 * Invalidate any read caches which may have 2707 * stale data. That is, any new read domains. 2708 */ 2709 invalidate_domains |= obj->pending_read_domains & ~obj->read_domains; 2710 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) { 2711 #if WATCH_BUF 2712 DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n", 2713 __func__, flush_domains, invalidate_domains); 2714 #endif 2715 i915_gem_clflush_object(obj); 2716 } 2717 2718 /* The actual obj->write_domain will be updated with 2719 * pending_write_domain after we emit the accumulated flush for all 2720 * of our domain changes in execbuffers (which clears objects' 2721 * write_domains). So if we have a current write domain that we 2722 * aren't changing, set pending_write_domain to that. 2723 */ 2724 if (flush_domains == 0 && obj->pending_write_domain == 0) 2725 obj->pending_write_domain = obj->write_domain; 2726 obj->read_domains = obj->pending_read_domains; 2727 2728 dev->invalidate_domains |= invalidate_domains; 2729 dev->flush_domains |= flush_domains; 2730 #if WATCH_BUF 2731 DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n", 2732 __func__, 2733 obj->read_domains, obj->write_domain, 2734 dev->invalidate_domains, dev->flush_domains); 2735 #endif 2736 } 2737 2738 /** 2739 * Moves the object from a partially CPU read to a full one. 2740 * 2741 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(), 2742 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU). 2743 */ 2744 static void 2745 i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj) 2746 { 2747 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2748 2749 if (!obj_priv->page_cpu_valid) 2750 return; 2751 2752 /* If we're partially in the CPU read domain, finish moving it in. 2753 */ 2754 if (obj->read_domains & I915_GEM_DOMAIN_CPU) { 2755 int i; 2756 2757 for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) { 2758 if (obj_priv->page_cpu_valid[i]) 2759 continue; 2760 drm_clflush_pages(obj_priv->pages + i, 1); 2761 } 2762 } 2763 2764 /* Free the page_cpu_valid mappings which are now stale, whether 2765 * or not we've got I915_GEM_DOMAIN_CPU. 2766 */ 2767 drm_free(obj_priv->page_cpu_valid, obj->size / PAGE_SIZE, 2768 DRM_MEM_DRIVER); 2769 obj_priv->page_cpu_valid = NULL; 2770 } 2771 2772 /** 2773 * Set the CPU read domain on a range of the object. 2774 * 2775 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's 2776 * not entirely valid. The page_cpu_valid member of the object flags which 2777 * pages have been flushed, and will be respected by 2778 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping 2779 * of the whole object. 2780 * 2781 * This function returns when the move is complete, including waiting on 2782 * flushes to occur. 2783 */ 2784 static int 2785 i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, 2786 uint64_t offset, uint64_t size) 2787 { 2788 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2789 int i, ret; 2790 2791 if (offset == 0 && size == obj->size) 2792 return i915_gem_object_set_to_cpu_domain(obj, 0); 2793 2794 i915_gem_object_flush_gpu_write_domain(obj); 2795 /* Wait on any GPU rendering and flushing to occur. */ 2796 ret = i915_gem_object_wait_rendering(obj); 2797 if (ret != 0) 2798 return ret; 2799 i915_gem_object_flush_gtt_write_domain(obj); 2800 2801 /* If we're already fully in the CPU read domain, we're done. */ 2802 if (obj_priv->page_cpu_valid == NULL && 2803 (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0) 2804 return 0; 2805 2806 /* Otherwise, create/clear the per-page CPU read domain flag if we're 2807 * newly adding I915_GEM_DOMAIN_CPU 2808 */ 2809 if (obj_priv->page_cpu_valid == NULL) { 2810 obj_priv->page_cpu_valid = drm_calloc(1, obj->size / PAGE_SIZE, 2811 DRM_MEM_DRIVER); 2812 if (obj_priv->page_cpu_valid == NULL) 2813 return -ENOMEM; 2814 } else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) 2815 memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE); 2816 2817 /* Flush the cache on any pages that are still invalid from the CPU's 2818 * perspective. 2819 */ 2820 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; 2821 i++) { 2822 if (obj_priv->page_cpu_valid[i]) 2823 continue; 2824 2825 drm_clflush_pages(obj_priv->pages + i, 1); 2826 2827 obj_priv->page_cpu_valid[i] = 1; 2828 } 2829 2830 /* It should now be out of any other write domains, and we can update 2831 * the domain values for our changes. 2832 */ 2833 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 2834 2835 obj->read_domains |= I915_GEM_DOMAIN_CPU; 2836 2837 return 0; 2838 } 2839 2840 /** 2841 * Pin an object to the GTT and evaluate the relocations landing in it. 2842 */ 2843 static int 2844 i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, 2845 struct drm_file *file_priv, 2846 struct drm_i915_gem_exec_object *entry, 2847 struct drm_i915_gem_relocation_entry *relocs) 2848 { 2849 struct drm_device *dev = obj->dev; 2850 drm_i915_private_t *dev_priv = dev->dev_private; 2851 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2852 int i, ret; 2853 void __iomem *reloc_page; 2854 2855 /* Choose the GTT offset for our buffer and put it there. */ 2856 ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment); 2857 if (ret) 2858 return ret; 2859 2860 entry->offset = obj_priv->gtt_offset; 2861 2862 /* Apply the relocations, using the GTT aperture to avoid cache 2863 * flushing requirements. 2864 */ 2865 for (i = 0; i < entry->relocation_count; i++) { 2866 struct drm_i915_gem_relocation_entry *reloc= &relocs[i]; 2867 struct drm_gem_object *target_obj; 2868 struct drm_i915_gem_object *target_obj_priv; 2869 uint32_t reloc_val, reloc_offset; 2870 uint32_t __iomem *reloc_entry; 2871 2872 target_obj = drm_gem_object_lookup(obj->dev, file_priv, 2873 reloc->target_handle); 2874 if (target_obj == NULL) { 2875 i915_gem_object_unpin(obj); 2876 return -EBADF; 2877 } 2878 target_obj_priv = target_obj->driver_private; 2879 2880 /* The target buffer should have appeared before us in the 2881 * exec_object list, so it should have a GTT space bound by now. 2882 */ 2883 if (target_obj_priv->gtt_space == NULL) { 2884 DRM_ERROR("No GTT space found for object %d\n", 2885 reloc->target_handle); 2886 drm_gem_object_unreference(target_obj); 2887 i915_gem_object_unpin(obj); 2888 return -EINVAL; 2889 } 2890 2891 if (reloc->offset > obj->size - 4) { 2892 DRM_ERROR("Relocation beyond object bounds: " 2893 "obj %p target %d offset %d size %d.\n", 2894 obj, reloc->target_handle, 2895 (int) reloc->offset, (int) obj->size); 2896 drm_gem_object_unreference(target_obj); 2897 i915_gem_object_unpin(obj); 2898 return -EINVAL; 2899 } 2900 if (reloc->offset & 3) { 2901 DRM_ERROR("Relocation not 4-byte aligned: " 2902 "obj %p target %d offset %d.\n", 2903 obj, reloc->target_handle, 2904 (int) reloc->offset); 2905 drm_gem_object_unreference(target_obj); 2906 i915_gem_object_unpin(obj); 2907 return -EINVAL; 2908 } 2909 2910 if (reloc->write_domain & I915_GEM_DOMAIN_CPU || 2911 reloc->read_domains & I915_GEM_DOMAIN_CPU) { 2912 DRM_ERROR("reloc with read/write CPU domains: " 2913 "obj %p target %d offset %d " 2914 "read %08x write %08x", 2915 obj, reloc->target_handle, 2916 (int) reloc->offset, 2917 reloc->read_domains, 2918 reloc->write_domain); 2919 drm_gem_object_unreference(target_obj); 2920 i915_gem_object_unpin(obj); 2921 return -EINVAL; 2922 } 2923 2924 if (reloc->write_domain && target_obj->pending_write_domain && 2925 reloc->write_domain != target_obj->pending_write_domain) { 2926 DRM_ERROR("Write domain conflict: " 2927 "obj %p target %d offset %d " 2928 "new %08x old %08x\n", 2929 obj, reloc->target_handle, 2930 (int) reloc->offset, 2931 reloc->write_domain, 2932 target_obj->pending_write_domain); 2933 drm_gem_object_unreference(target_obj); 2934 i915_gem_object_unpin(obj); 2935 return -EINVAL; 2936 } 2937 2938 #if WATCH_RELOC 2939 DRM_INFO("%s: obj %p offset %08x target %d " 2940 "read %08x write %08x gtt %08x " 2941 "presumed %08x delta %08x\n", 2942 __func__, 2943 obj, 2944 (int) reloc->offset, 2945 (int) reloc->target_handle, 2946 (int) reloc->read_domains, 2947 (int) reloc->write_domain, 2948 (int) target_obj_priv->gtt_offset, 2949 (int) reloc->presumed_offset, 2950 reloc->delta); 2951 #endif 2952 2953 target_obj->pending_read_domains |= reloc->read_domains; 2954 target_obj->pending_write_domain |= reloc->write_domain; 2955 2956 /* If the relocation already has the right value in it, no 2957 * more work needs to be done. 2958 */ 2959 if (target_obj_priv->gtt_offset == reloc->presumed_offset) { 2960 drm_gem_object_unreference(target_obj); 2961 continue; 2962 } 2963 2964 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 2965 if (ret != 0) { 2966 drm_gem_object_unreference(target_obj); 2967 i915_gem_object_unpin(obj); 2968 return -EINVAL; 2969 } 2970 2971 /* Map the page containing the relocation we're going to 2972 * perform. 2973 */ 2974 reloc_offset = obj_priv->gtt_offset + reloc->offset; 2975 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, 2976 (reloc_offset & 2977 ~(PAGE_SIZE - 1))); 2978 reloc_entry = (uint32_t __iomem *)(reloc_page + 2979 (reloc_offset & (PAGE_SIZE - 1))); 2980 reloc_val = target_obj_priv->gtt_offset + reloc->delta; 2981 2982 #if WATCH_BUF 2983 DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n", 2984 obj, (unsigned int) reloc->offset, 2985 readl(reloc_entry), reloc_val); 2986 #endif 2987 writel(reloc_val, reloc_entry); 2988 io_mapping_unmap_atomic(reloc_page); 2989 2990 /* The updated presumed offset for this entry will be 2991 * copied back out to the user. 2992 */ 2993 reloc->presumed_offset = target_obj_priv->gtt_offset; 2994 2995 drm_gem_object_unreference(target_obj); 2996 } 2997 2998 #if WATCH_BUF 2999 if (0) 3000 i915_gem_dump_object(obj, 128, __func__, ~0); 3001 #endif 3002 return 0; 3003 } 3004 3005 /** Dispatch a batchbuffer to the ring 3006 */ 3007 static int 3008 i915_dispatch_gem_execbuffer(struct drm_device *dev, 3009 struct drm_i915_gem_execbuffer *exec, 3010 struct drm_clip_rect *cliprects, 3011 uint64_t exec_offset) 3012 { 3013 drm_i915_private_t *dev_priv = dev->dev_private; 3014 int nbox = exec->num_cliprects; 3015 int i = 0, count; 3016 uint32_t exec_start, exec_len; 3017 RING_LOCALS; 3018 3019 exec_start = (uint32_t) exec_offset + exec->batch_start_offset; 3020 exec_len = (uint32_t) exec->batch_len; 3021 3022 if ((exec_start | exec_len) & 0x7) { 3023 DRM_ERROR("alignment\n"); 3024 return -EINVAL; 3025 } 3026 3027 if (!exec_start) 3028 return -EINVAL; 3029 3030 count = nbox ? nbox : 1; 3031 3032 for (i = 0; i < count; i++) { 3033 if (i < nbox) { 3034 int ret = i915_emit_box(dev, cliprects, i, 3035 exec->DR1, exec->DR4); 3036 if (ret) 3037 return ret; 3038 } 3039 3040 if (IS_I830(dev) || IS_845G(dev)) { 3041 BEGIN_LP_RING(4); 3042 OUT_RING(MI_BATCH_BUFFER); 3043 OUT_RING(exec_start | MI_BATCH_NON_SECURE); 3044 OUT_RING(exec_start + exec_len - 4); 3045 OUT_RING(0); 3046 ADVANCE_LP_RING(); 3047 } else { 3048 BEGIN_LP_RING(2); 3049 if (IS_I965G(dev)) { 3050 OUT_RING(MI_BATCH_BUFFER_START | 3051 (2 << 6) | 3052 MI_BATCH_NON_SECURE_I965); 3053 OUT_RING(exec_start); 3054 } else { 3055 OUT_RING(MI_BATCH_BUFFER_START | 3056 (2 << 6)); 3057 OUT_RING(exec_start | MI_BATCH_NON_SECURE); 3058 } 3059 ADVANCE_LP_RING(); 3060 } 3061 } 3062 3063 /* XXX breadcrumb */ 3064 return 0; 3065 } 3066 3067 /* Throttle our rendering by waiting until the ring has completed our requests 3068 * emitted over 20 msec ago. 3069 * 3070 * This should get us reasonable parallelism between CPU and GPU but also 3071 * relatively low latency when blocking on a particular request to finish. 3072 */ 3073 static int 3074 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv) 3075 { 3076 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; 3077 int ret = 0; 3078 uint32_t seqno; 3079 3080 mutex_lock(&dev->struct_mutex); 3081 seqno = i915_file_priv->mm.last_gem_throttle_seqno; 3082 i915_file_priv->mm.last_gem_throttle_seqno = 3083 i915_file_priv->mm.last_gem_seqno; 3084 if (seqno) 3085 ret = i915_wait_request(dev, seqno); 3086 mutex_unlock(&dev->struct_mutex); 3087 return ret; 3088 } 3089 3090 static int 3091 i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list, 3092 uint32_t buffer_count, 3093 struct drm_i915_gem_relocation_entry **relocs) 3094 { 3095 uint32_t reloc_count = 0, reloc_index = 0, i; 3096 int ret; 3097 3098 *relocs = NULL; 3099 for (i = 0; i < buffer_count; i++) { 3100 if (reloc_count + exec_list[i].relocation_count < reloc_count) 3101 return -EINVAL; 3102 reloc_count += exec_list[i].relocation_count; 3103 } 3104 3105 *relocs = drm_calloc(reloc_count, sizeof(**relocs), DRM_MEM_DRIVER); 3106 if (*relocs == NULL) 3107 return -ENOMEM; 3108 3109 for (i = 0; i < buffer_count; i++) { 3110 struct drm_i915_gem_relocation_entry __user *user_relocs; 3111 3112 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr; 3113 3114 ret = copy_from_user(&(*relocs)[reloc_index], 3115 user_relocs, 3116 exec_list[i].relocation_count * 3117 sizeof(**relocs)); 3118 if (ret != 0) { 3119 drm_free(*relocs, reloc_count * sizeof(**relocs), 3120 DRM_MEM_DRIVER); 3121 *relocs = NULL; 3122 return -EFAULT; 3123 } 3124 3125 reloc_index += exec_list[i].relocation_count; 3126 } 3127 3128 return 0; 3129 } 3130 3131 static int 3132 i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object *exec_list, 3133 uint32_t buffer_count, 3134 struct drm_i915_gem_relocation_entry *relocs) 3135 { 3136 uint32_t reloc_count = 0, i; 3137 int ret = 0; 3138 3139 for (i = 0; i < buffer_count; i++) { 3140 struct drm_i915_gem_relocation_entry __user *user_relocs; 3141 int unwritten; 3142 3143 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr; 3144 3145 unwritten = copy_to_user(user_relocs, 3146 &relocs[reloc_count], 3147 exec_list[i].relocation_count * 3148 sizeof(*relocs)); 3149 3150 if (unwritten) { 3151 ret = -EFAULT; 3152 goto err; 3153 } 3154 3155 reloc_count += exec_list[i].relocation_count; 3156 } 3157 3158 err: 3159 drm_free(relocs, reloc_count * sizeof(*relocs), DRM_MEM_DRIVER); 3160 3161 return ret; 3162 } 3163 3164 int 3165 i915_gem_execbuffer(struct drm_device *dev, void *data, 3166 struct drm_file *file_priv) 3167 { 3168 drm_i915_private_t *dev_priv = dev->dev_private; 3169 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; 3170 struct drm_i915_gem_execbuffer *args = data; 3171 struct drm_i915_gem_exec_object *exec_list = NULL; 3172 struct drm_gem_object **object_list = NULL; 3173 struct drm_gem_object *batch_obj; 3174 struct drm_i915_gem_object *obj_priv; 3175 struct drm_clip_rect *cliprects = NULL; 3176 struct drm_i915_gem_relocation_entry *relocs; 3177 int ret, ret2, i, pinned = 0; 3178 uint64_t exec_offset; 3179 uint32_t seqno, flush_domains, reloc_index; 3180 int pin_tries; 3181 3182 #if WATCH_EXEC 3183 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", 3184 (int) args->buffers_ptr, args->buffer_count, args->batch_len); 3185 #endif 3186 3187 if (args->buffer_count < 1) { 3188 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count); 3189 return -EINVAL; 3190 } 3191 /* Copy in the exec list from userland */ 3192 exec_list = drm_calloc(sizeof(*exec_list), args->buffer_count, 3193 DRM_MEM_DRIVER); 3194 object_list = drm_calloc(sizeof(*object_list), args->buffer_count, 3195 DRM_MEM_DRIVER); 3196 if (exec_list == NULL || object_list == NULL) { 3197 DRM_ERROR("Failed to allocate exec or object list " 3198 "for %d buffers\n", 3199 args->buffer_count); 3200 ret = -ENOMEM; 3201 goto pre_mutex_err; 3202 } 3203 ret = copy_from_user(exec_list, 3204 (struct drm_i915_relocation_entry __user *) 3205 (uintptr_t) args->buffers_ptr, 3206 sizeof(*exec_list) * args->buffer_count); 3207 if (ret != 0) { 3208 DRM_ERROR("copy %d exec entries failed %d\n", 3209 args->buffer_count, ret); 3210 goto pre_mutex_err; 3211 } 3212 3213 if (args->num_cliprects != 0) { 3214 cliprects = drm_calloc(args->num_cliprects, sizeof(*cliprects), 3215 DRM_MEM_DRIVER); 3216 if (cliprects == NULL) 3217 goto pre_mutex_err; 3218 3219 ret = copy_from_user(cliprects, 3220 (struct drm_clip_rect __user *) 3221 (uintptr_t) args->cliprects_ptr, 3222 sizeof(*cliprects) * args->num_cliprects); 3223 if (ret != 0) { 3224 DRM_ERROR("copy %d cliprects failed: %d\n", 3225 args->num_cliprects, ret); 3226 goto pre_mutex_err; 3227 } 3228 } 3229 3230 ret = i915_gem_get_relocs_from_user(exec_list, args->buffer_count, 3231 &relocs); 3232 if (ret != 0) 3233 goto pre_mutex_err; 3234 3235 mutex_lock(&dev->struct_mutex); 3236 3237 i915_verify_inactive(dev, __FILE__, __LINE__); 3238 3239 if (dev_priv->mm.wedged) { 3240 DRM_ERROR("Execbuf while wedged\n"); 3241 mutex_unlock(&dev->struct_mutex); 3242 ret = -EIO; 3243 goto pre_mutex_err; 3244 } 3245 3246 if (dev_priv->mm.suspended) { 3247 DRM_ERROR("Execbuf while VT-switched.\n"); 3248 mutex_unlock(&dev->struct_mutex); 3249 ret = -EBUSY; 3250 goto pre_mutex_err; 3251 } 3252 3253 /* Look up object handles */ 3254 for (i = 0; i < args->buffer_count; i++) { 3255 object_list[i] = drm_gem_object_lookup(dev, file_priv, 3256 exec_list[i].handle); 3257 if (object_list[i] == NULL) { 3258 DRM_ERROR("Invalid object handle %d at index %d\n", 3259 exec_list[i].handle, i); 3260 ret = -EBADF; 3261 goto err; 3262 } 3263 3264 obj_priv = object_list[i]->driver_private; 3265 if (obj_priv->in_execbuffer) { 3266 DRM_ERROR("Object %p appears more than once in object list\n", 3267 object_list[i]); 3268 ret = -EBADF; 3269 goto err; 3270 } 3271 obj_priv->in_execbuffer = true; 3272 } 3273 3274 /* Pin and relocate */ 3275 for (pin_tries = 0; ; pin_tries++) { 3276 ret = 0; 3277 reloc_index = 0; 3278 3279 for (i = 0; i < args->buffer_count; i++) { 3280 object_list[i]->pending_read_domains = 0; 3281 object_list[i]->pending_write_domain = 0; 3282 ret = i915_gem_object_pin_and_relocate(object_list[i], 3283 file_priv, 3284 &exec_list[i], 3285 &relocs[reloc_index]); 3286 if (ret) 3287 break; 3288 pinned = i + 1; 3289 reloc_index += exec_list[i].relocation_count; 3290 } 3291 /* success */ 3292 if (ret == 0) 3293 break; 3294 3295 /* error other than GTT full, or we've already tried again */ 3296 if (ret != -ENOMEM || pin_tries >= 1) { 3297 if (ret != -ERESTARTSYS) 3298 DRM_ERROR("Failed to pin buffers %d\n", ret); 3299 goto err; 3300 } 3301 3302 /* unpin all of our buffers */ 3303 for (i = 0; i < pinned; i++) 3304 i915_gem_object_unpin(object_list[i]); 3305 pinned = 0; 3306 3307 /* evict everyone we can from the aperture */ 3308 ret = i915_gem_evict_everything(dev); 3309 if (ret) 3310 goto err; 3311 } 3312 3313 /* Set the pending read domains for the batch buffer to COMMAND */ 3314 batch_obj = object_list[args->buffer_count-1]; 3315 batch_obj->pending_read_domains = I915_GEM_DOMAIN_COMMAND; 3316 batch_obj->pending_write_domain = 0; 3317 3318 i915_verify_inactive(dev, __FILE__, __LINE__); 3319 3320 /* Zero the global flush/invalidate flags. These 3321 * will be modified as new domains are computed 3322 * for each object 3323 */ 3324 dev->invalidate_domains = 0; 3325 dev->flush_domains = 0; 3326 3327 for (i = 0; i < args->buffer_count; i++) { 3328 struct drm_gem_object *obj = object_list[i]; 3329 3330 /* Compute new gpu domains and update invalidate/flush */ 3331 i915_gem_object_set_to_gpu_domain(obj); 3332 } 3333 3334 i915_verify_inactive(dev, __FILE__, __LINE__); 3335 3336 if (dev->invalidate_domains | dev->flush_domains) { 3337 #if WATCH_EXEC 3338 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n", 3339 __func__, 3340 dev->invalidate_domains, 3341 dev->flush_domains); 3342 #endif 3343 i915_gem_flush(dev, 3344 dev->invalidate_domains, 3345 dev->flush_domains); 3346 if (dev->flush_domains) 3347 (void)i915_add_request(dev, dev->flush_domains); 3348 } 3349 3350 for (i = 0; i < args->buffer_count; i++) { 3351 struct drm_gem_object *obj = object_list[i]; 3352 3353 obj->write_domain = obj->pending_write_domain; 3354 } 3355 3356 i915_verify_inactive(dev, __FILE__, __LINE__); 3357 3358 #if WATCH_COHERENCY 3359 for (i = 0; i < args->buffer_count; i++) { 3360 i915_gem_object_check_coherency(object_list[i], 3361 exec_list[i].handle); 3362 } 3363 #endif 3364 3365 exec_offset = exec_list[args->buffer_count - 1].offset; 3366 3367 #if WATCH_EXEC 3368 i915_gem_dump_object(batch_obj, 3369 args->batch_len, 3370 __func__, 3371 ~0); 3372 #endif 3373 3374 /* Exec the batchbuffer */ 3375 ret = i915_dispatch_gem_execbuffer(dev, args, cliprects, exec_offset); 3376 if (ret) { 3377 DRM_ERROR("dispatch failed %d\n", ret); 3378 goto err; 3379 } 3380 3381 /* 3382 * Ensure that the commands in the batch buffer are 3383 * finished before the interrupt fires 3384 */ 3385 flush_domains = i915_retire_commands(dev); 3386 3387 i915_verify_inactive(dev, __FILE__, __LINE__); 3388 3389 /* 3390 * Get a seqno representing the execution of the current buffer, 3391 * which we can wait on. We would like to mitigate these interrupts, 3392 * likely by only creating seqnos occasionally (so that we have 3393 * *some* interrupts representing completion of buffers that we can 3394 * wait on when trying to clear up gtt space). 3395 */ 3396 seqno = i915_add_request(dev, flush_domains); 3397 BUG_ON(seqno == 0); 3398 i915_file_priv->mm.last_gem_seqno = seqno; 3399 for (i = 0; i < args->buffer_count; i++) { 3400 struct drm_gem_object *obj = object_list[i]; 3401 3402 i915_gem_object_move_to_active(obj, seqno); 3403 #if WATCH_LRU 3404 DRM_INFO("%s: move to exec list %p\n", __func__, obj); 3405 #endif 3406 } 3407 #if WATCH_LRU 3408 i915_dump_lru(dev, __func__); 3409 #endif 3410 3411 i915_verify_inactive(dev, __FILE__, __LINE__); 3412 3413 err: 3414 for (i = 0; i < pinned; i++) 3415 i915_gem_object_unpin(object_list[i]); 3416 3417 for (i = 0; i < args->buffer_count; i++) { 3418 if (object_list[i]) { 3419 obj_priv = object_list[i]->driver_private; 3420 obj_priv->in_execbuffer = false; 3421 } 3422 drm_gem_object_unreference(object_list[i]); 3423 } 3424 3425 mutex_unlock(&dev->struct_mutex); 3426 3427 if (!ret) { 3428 /* Copy the new buffer offsets back to the user's exec list. */ 3429 ret = copy_to_user((struct drm_i915_relocation_entry __user *) 3430 (uintptr_t) args->buffers_ptr, 3431 exec_list, 3432 sizeof(*exec_list) * args->buffer_count); 3433 if (ret) { 3434 ret = -EFAULT; 3435 DRM_ERROR("failed to copy %d exec entries " 3436 "back to user (%d)\n", 3437 args->buffer_count, ret); 3438 } 3439 } 3440 3441 /* Copy the updated relocations out regardless of current error 3442 * state. Failure to update the relocs would mean that the next 3443 * time userland calls execbuf, it would do so with presumed offset 3444 * state that didn't match the actual object state. 3445 */ 3446 ret2 = i915_gem_put_relocs_to_user(exec_list, args->buffer_count, 3447 relocs); 3448 if (ret2 != 0) { 3449 DRM_ERROR("Failed to copy relocations back out: %d\n", ret2); 3450 3451 if (ret == 0) 3452 ret = ret2; 3453 } 3454 3455 pre_mutex_err: 3456 drm_free(object_list, sizeof(*object_list) * args->buffer_count, 3457 DRM_MEM_DRIVER); 3458 drm_free(exec_list, sizeof(*exec_list) * args->buffer_count, 3459 DRM_MEM_DRIVER); 3460 drm_free(cliprects, sizeof(*cliprects) * args->num_cliprects, 3461 DRM_MEM_DRIVER); 3462 3463 return ret; 3464 } 3465 3466 int 3467 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) 3468 { 3469 struct drm_device *dev = obj->dev; 3470 struct drm_i915_gem_object *obj_priv = obj->driver_private; 3471 int ret; 3472 3473 i915_verify_inactive(dev, __FILE__, __LINE__); 3474 if (obj_priv->gtt_space == NULL) { 3475 ret = i915_gem_object_bind_to_gtt(obj, alignment); 3476 if (ret != 0) { 3477 if (ret != -EBUSY && ret != -ERESTARTSYS) 3478 DRM_ERROR("Failure to bind: %d\n", ret); 3479 return ret; 3480 } 3481 } 3482 /* 3483 * Pre-965 chips need a fence register set up in order to 3484 * properly handle tiled surfaces. 3485 */ 3486 if (!IS_I965G(dev) && 3487 obj_priv->fence_reg == I915_FENCE_REG_NONE && 3488 obj_priv->tiling_mode != I915_TILING_NONE) { 3489 ret = i915_gem_object_get_fence_reg(obj, true); 3490 if (ret != 0) { 3491 if (ret != -EBUSY && ret != -ERESTARTSYS) 3492 DRM_ERROR("Failure to install fence: %d\n", 3493 ret); 3494 return ret; 3495 } 3496 } 3497 obj_priv->pin_count++; 3498 3499 /* If the object is not active and not pending a flush, 3500 * remove it from the inactive list 3501 */ 3502 if (obj_priv->pin_count == 1) { 3503 atomic_inc(&dev->pin_count); 3504 atomic_add(obj->size, &dev->pin_memory); 3505 if (!obj_priv->active && 3506 (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | 3507 I915_GEM_DOMAIN_GTT)) == 0 && 3508 !list_empty(&obj_priv->list)) 3509 list_del_init(&obj_priv->list); 3510 } 3511 i915_verify_inactive(dev, __FILE__, __LINE__); 3512 3513 return 0; 3514 } 3515 3516 void 3517 i915_gem_object_unpin(struct drm_gem_object *obj) 3518 { 3519 struct drm_device *dev = obj->dev; 3520 drm_i915_private_t *dev_priv = dev->dev_private; 3521 struct drm_i915_gem_object *obj_priv = obj->driver_private; 3522 3523 i915_verify_inactive(dev, __FILE__, __LINE__); 3524 obj_priv->pin_count--; 3525 BUG_ON(obj_priv->pin_count < 0); 3526 BUG_ON(obj_priv->gtt_space == NULL); 3527 3528 /* If the object is no longer pinned, and is 3529 * neither active nor being flushed, then stick it on 3530 * the inactive list 3531 */ 3532 if (obj_priv->pin_count == 0) { 3533 if (!obj_priv->active && 3534 (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | 3535 I915_GEM_DOMAIN_GTT)) == 0) 3536 list_move_tail(&obj_priv->list, 3537 &dev_priv->mm.inactive_list); 3538 atomic_dec(&dev->pin_count); 3539 atomic_sub(obj->size, &dev->pin_memory); 3540 } 3541 i915_verify_inactive(dev, __FILE__, __LINE__); 3542 } 3543 3544 int 3545 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 3546 struct drm_file *file_priv) 3547 { 3548 struct drm_i915_gem_pin *args = data; 3549 struct drm_gem_object *obj; 3550 struct drm_i915_gem_object *obj_priv; 3551 int ret; 3552 3553 mutex_lock(&dev->struct_mutex); 3554 3555 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 3556 if (obj == NULL) { 3557 DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n", 3558 args->handle); 3559 mutex_unlock(&dev->struct_mutex); 3560 return -EBADF; 3561 } 3562 obj_priv = obj->driver_private; 3563 3564 if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) { 3565 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 3566 args->handle); 3567 drm_gem_object_unreference(obj); 3568 mutex_unlock(&dev->struct_mutex); 3569 return -EINVAL; 3570 } 3571 3572 obj_priv->user_pin_count++; 3573 obj_priv->pin_filp = file_priv; 3574 if (obj_priv->user_pin_count == 1) { 3575 ret = i915_gem_object_pin(obj, args->alignment); 3576 if (ret != 0) { 3577 drm_gem_object_unreference(obj); 3578 mutex_unlock(&dev->struct_mutex); 3579 return ret; 3580 } 3581 } 3582 3583 /* XXX - flush the CPU caches for pinned objects 3584 * as the X server doesn't manage domains yet 3585 */ 3586 i915_gem_object_flush_cpu_write_domain(obj); 3587 args->offset = obj_priv->gtt_offset; 3588 drm_gem_object_unreference(obj); 3589 mutex_unlock(&dev->struct_mutex); 3590 3591 return 0; 3592 } 3593 3594 int 3595 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 3596 struct drm_file *file_priv) 3597 { 3598 struct drm_i915_gem_pin *args = data; 3599 struct drm_gem_object *obj; 3600 struct drm_i915_gem_object *obj_priv; 3601 3602 mutex_lock(&dev->struct_mutex); 3603 3604 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 3605 if (obj == NULL) { 3606 DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n", 3607 args->handle); 3608 mutex_unlock(&dev->struct_mutex); 3609 return -EBADF; 3610 } 3611 3612 obj_priv = obj->driver_private; 3613 if (obj_priv->pin_filp != file_priv) { 3614 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 3615 args->handle); 3616 drm_gem_object_unreference(obj); 3617 mutex_unlock(&dev->struct_mutex); 3618 return -EINVAL; 3619 } 3620 obj_priv->user_pin_count--; 3621 if (obj_priv->user_pin_count == 0) { 3622 obj_priv->pin_filp = NULL; 3623 i915_gem_object_unpin(obj); 3624 } 3625 3626 drm_gem_object_unreference(obj); 3627 mutex_unlock(&dev->struct_mutex); 3628 return 0; 3629 } 3630 3631 int 3632 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 3633 struct drm_file *file_priv) 3634 { 3635 struct drm_i915_gem_busy *args = data; 3636 struct drm_gem_object *obj; 3637 struct drm_i915_gem_object *obj_priv; 3638 3639 mutex_lock(&dev->struct_mutex); 3640 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 3641 if (obj == NULL) { 3642 DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n", 3643 args->handle); 3644 mutex_unlock(&dev->struct_mutex); 3645 return -EBADF; 3646 } 3647 3648 /* Update the active list for the hardware's current position. 3649 * Otherwise this only updates on a delayed timer or when irqs are 3650 * actually unmasked, and our working set ends up being larger than 3651 * required. 3652 */ 3653 i915_gem_retire_requests(dev); 3654 3655 obj_priv = obj->driver_private; 3656 /* Don't count being on the flushing list against the object being 3657 * done. Otherwise, a buffer left on the flushing list but not getting 3658 * flushed (because nobody's flushing that domain) won't ever return 3659 * unbusy and get reused by libdrm's bo cache. The other expected 3660 * consumer of this interface, OpenGL's occlusion queries, also specs 3661 * that the objects get unbusy "eventually" without any interference. 3662 */ 3663 args->busy = obj_priv->active && obj_priv->last_rendering_seqno != 0; 3664 3665 drm_gem_object_unreference(obj); 3666 mutex_unlock(&dev->struct_mutex); 3667 return 0; 3668 } 3669 3670 int 3671 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 3672 struct drm_file *file_priv) 3673 { 3674 return i915_gem_ring_throttle(dev, file_priv); 3675 } 3676 3677 int i915_gem_init_object(struct drm_gem_object *obj) 3678 { 3679 struct drm_i915_gem_object *obj_priv; 3680 3681 obj_priv = drm_calloc(1, sizeof(*obj_priv), DRM_MEM_DRIVER); 3682 if (obj_priv == NULL) 3683 return -ENOMEM; 3684 3685 /* 3686 * We've just allocated pages from the kernel, 3687 * so they've just been written by the CPU with 3688 * zeros. They'll need to be clflushed before we 3689 * use them with the GPU. 3690 */ 3691 obj->write_domain = I915_GEM_DOMAIN_CPU; 3692 obj->read_domains = I915_GEM_DOMAIN_CPU; 3693 3694 obj_priv->agp_type = AGP_USER_MEMORY; 3695 3696 obj->driver_private = obj_priv; 3697 obj_priv->obj = obj; 3698 obj_priv->fence_reg = I915_FENCE_REG_NONE; 3699 INIT_LIST_HEAD(&obj_priv->list); 3700 3701 return 0; 3702 } 3703 3704 void i915_gem_free_object(struct drm_gem_object *obj) 3705 { 3706 struct drm_device *dev = obj->dev; 3707 struct drm_i915_gem_object *obj_priv = obj->driver_private; 3708 3709 while (obj_priv->pin_count > 0) 3710 i915_gem_object_unpin(obj); 3711 3712 if (obj_priv->phys_obj) 3713 i915_gem_detach_phys_object(dev, obj); 3714 3715 i915_gem_object_unbind(obj); 3716 3717 i915_gem_free_mmap_offset(obj); 3718 3719 drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER); 3720 kfree(obj_priv->bit_17); 3721 drm_free(obj->driver_private, 1, DRM_MEM_DRIVER); 3722 } 3723 3724 /** Unbinds all objects that are on the given buffer list. */ 3725 static int 3726 i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head) 3727 { 3728 struct drm_gem_object *obj; 3729 struct drm_i915_gem_object *obj_priv; 3730 int ret; 3731 3732 while (!list_empty(head)) { 3733 obj_priv = list_first_entry(head, 3734 struct drm_i915_gem_object, 3735 list); 3736 obj = obj_priv->obj; 3737 3738 if (obj_priv->pin_count != 0) { 3739 DRM_ERROR("Pinned object in unbind list\n"); 3740 mutex_unlock(&dev->struct_mutex); 3741 return -EINVAL; 3742 } 3743 3744 ret = i915_gem_object_unbind(obj); 3745 if (ret != 0) { 3746 DRM_ERROR("Error unbinding object in LeaveVT: %d\n", 3747 ret); 3748 mutex_unlock(&dev->struct_mutex); 3749 return ret; 3750 } 3751 } 3752 3753 3754 return 0; 3755 } 3756 3757 int 3758 i915_gem_idle(struct drm_device *dev) 3759 { 3760 drm_i915_private_t *dev_priv = dev->dev_private; 3761 uint32_t seqno, cur_seqno, last_seqno; 3762 int stuck, ret; 3763 3764 mutex_lock(&dev->struct_mutex); 3765 3766 if (dev_priv->mm.suspended || dev_priv->ring.ring_obj == NULL) { 3767 mutex_unlock(&dev->struct_mutex); 3768 return 0; 3769 } 3770 3771 /* Hack! Don't let anybody do execbuf while we don't control the chip. 3772 * We need to replace this with a semaphore, or something. 3773 */ 3774 dev_priv->mm.suspended = 1; 3775 3776 /* Cancel the retire work handler, wait for it to finish if running 3777 */ 3778 mutex_unlock(&dev->struct_mutex); 3779 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 3780 mutex_lock(&dev->struct_mutex); 3781 3782 i915_kernel_lost_context(dev); 3783 3784 /* Flush the GPU along with all non-CPU write domains 3785 */ 3786 i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT), 3787 ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); 3788 seqno = i915_add_request(dev, ~I915_GEM_DOMAIN_CPU); 3789 3790 if (seqno == 0) { 3791 mutex_unlock(&dev->struct_mutex); 3792 return -ENOMEM; 3793 } 3794 3795 dev_priv->mm.waiting_gem_seqno = seqno; 3796 last_seqno = 0; 3797 stuck = 0; 3798 for (;;) { 3799 cur_seqno = i915_get_gem_seqno(dev); 3800 if (i915_seqno_passed(cur_seqno, seqno)) 3801 break; 3802 if (last_seqno == cur_seqno) { 3803 if (stuck++ > 100) { 3804 DRM_ERROR("hardware wedged\n"); 3805 dev_priv->mm.wedged = 1; 3806 DRM_WAKEUP(&dev_priv->irq_queue); 3807 break; 3808 } 3809 } 3810 msleep(10); 3811 last_seqno = cur_seqno; 3812 } 3813 dev_priv->mm.waiting_gem_seqno = 0; 3814 3815 i915_gem_retire_requests(dev); 3816 3817 spin_lock(&dev_priv->mm.active_list_lock); 3818 if (!dev_priv->mm.wedged) { 3819 /* Active and flushing should now be empty as we've 3820 * waited for a sequence higher than any pending execbuffer 3821 */ 3822 WARN_ON(!list_empty(&dev_priv->mm.active_list)); 3823 WARN_ON(!list_empty(&dev_priv->mm.flushing_list)); 3824 /* Request should now be empty as we've also waited 3825 * for the last request in the list 3826 */ 3827 WARN_ON(!list_empty(&dev_priv->mm.request_list)); 3828 } 3829 3830 /* Empty the active and flushing lists to inactive. If there's 3831 * anything left at this point, it means that we're wedged and 3832 * nothing good's going to happen by leaving them there. So strip 3833 * the GPU domains and just stuff them onto inactive. 3834 */ 3835 while (!list_empty(&dev_priv->mm.active_list)) { 3836 struct drm_i915_gem_object *obj_priv; 3837 3838 obj_priv = list_first_entry(&dev_priv->mm.active_list, 3839 struct drm_i915_gem_object, 3840 list); 3841 obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; 3842 i915_gem_object_move_to_inactive(obj_priv->obj); 3843 } 3844 spin_unlock(&dev_priv->mm.active_list_lock); 3845 3846 while (!list_empty(&dev_priv->mm.flushing_list)) { 3847 struct drm_i915_gem_object *obj_priv; 3848 3849 obj_priv = list_first_entry(&dev_priv->mm.flushing_list, 3850 struct drm_i915_gem_object, 3851 list); 3852 obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; 3853 i915_gem_object_move_to_inactive(obj_priv->obj); 3854 } 3855 3856 3857 /* Move all inactive buffers out of the GTT. */ 3858 ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list); 3859 WARN_ON(!list_empty(&dev_priv->mm.inactive_list)); 3860 if (ret) { 3861 mutex_unlock(&dev->struct_mutex); 3862 return ret; 3863 } 3864 3865 i915_gem_cleanup_ringbuffer(dev); 3866 mutex_unlock(&dev->struct_mutex); 3867 3868 return 0; 3869 } 3870 3871 static int 3872 i915_gem_init_hws(struct drm_device *dev) 3873 { 3874 drm_i915_private_t *dev_priv = dev->dev_private; 3875 struct drm_gem_object *obj; 3876 struct drm_i915_gem_object *obj_priv; 3877 int ret; 3878 3879 /* If we need a physical address for the status page, it's already 3880 * initialized at driver load time. 3881 */ 3882 if (!I915_NEED_GFX_HWS(dev)) 3883 return 0; 3884 3885 obj = drm_gem_object_alloc(dev, 4096); 3886 if (obj == NULL) { 3887 DRM_ERROR("Failed to allocate status page\n"); 3888 return -ENOMEM; 3889 } 3890 obj_priv = obj->driver_private; 3891 obj_priv->agp_type = AGP_USER_CACHED_MEMORY; 3892 3893 ret = i915_gem_object_pin(obj, 4096); 3894 if (ret != 0) { 3895 drm_gem_object_unreference(obj); 3896 return ret; 3897 } 3898 3899 dev_priv->status_gfx_addr = obj_priv->gtt_offset; 3900 3901 dev_priv->hw_status_page = kmap(obj_priv->pages[0]); 3902 if (dev_priv->hw_status_page == NULL) { 3903 DRM_ERROR("Failed to map status page.\n"); 3904 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 3905 i915_gem_object_unpin(obj); 3906 drm_gem_object_unreference(obj); 3907 return -EINVAL; 3908 } 3909 dev_priv->hws_obj = obj; 3910 memset(dev_priv->hw_status_page, 0, PAGE_SIZE); 3911 I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr); 3912 I915_READ(HWS_PGA); /* posting read */ 3913 DRM_DEBUG("hws offset: 0x%08x\n", dev_priv->status_gfx_addr); 3914 3915 return 0; 3916 } 3917 3918 static void 3919 i915_gem_cleanup_hws(struct drm_device *dev) 3920 { 3921 drm_i915_private_t *dev_priv = dev->dev_private; 3922 struct drm_gem_object *obj; 3923 struct drm_i915_gem_object *obj_priv; 3924 3925 if (dev_priv->hws_obj == NULL) 3926 return; 3927 3928 obj = dev_priv->hws_obj; 3929 obj_priv = obj->driver_private; 3930 3931 kunmap(obj_priv->pages[0]); 3932 i915_gem_object_unpin(obj); 3933 drm_gem_object_unreference(obj); 3934 dev_priv->hws_obj = NULL; 3935 3936 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 3937 dev_priv->hw_status_page = NULL; 3938 3939 /* Write high address into HWS_PGA when disabling. */ 3940 I915_WRITE(HWS_PGA, 0x1ffff000); 3941 } 3942 3943 int 3944 i915_gem_init_ringbuffer(struct drm_device *dev) 3945 { 3946 drm_i915_private_t *dev_priv = dev->dev_private; 3947 struct drm_gem_object *obj; 3948 struct drm_i915_gem_object *obj_priv; 3949 drm_i915_ring_buffer_t *ring = &dev_priv->ring; 3950 int ret; 3951 u32 head; 3952 3953 ret = i915_gem_init_hws(dev); 3954 if (ret != 0) 3955 return ret; 3956 3957 obj = drm_gem_object_alloc(dev, 128 * 1024); 3958 if (obj == NULL) { 3959 DRM_ERROR("Failed to allocate ringbuffer\n"); 3960 i915_gem_cleanup_hws(dev); 3961 return -ENOMEM; 3962 } 3963 obj_priv = obj->driver_private; 3964 3965 ret = i915_gem_object_pin(obj, 4096); 3966 if (ret != 0) { 3967 drm_gem_object_unreference(obj); 3968 i915_gem_cleanup_hws(dev); 3969 return ret; 3970 } 3971 3972 /* Set up the kernel mapping for the ring. */ 3973 ring->Size = obj->size; 3974 ring->tail_mask = obj->size - 1; 3975 3976 ring->map.offset = dev->agp->base + obj_priv->gtt_offset; 3977 ring->map.size = obj->size; 3978 ring->map.type = 0; 3979 ring->map.flags = 0; 3980 ring->map.mtrr = 0; 3981 3982 drm_core_ioremap_wc(&ring->map, dev); 3983 if (ring->map.handle == NULL) { 3984 DRM_ERROR("Failed to map ringbuffer.\n"); 3985 memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); 3986 i915_gem_object_unpin(obj); 3987 drm_gem_object_unreference(obj); 3988 i915_gem_cleanup_hws(dev); 3989 return -EINVAL; 3990 } 3991 ring->ring_obj = obj; 3992 ring->virtual_start = ring->map.handle; 3993 3994 /* Stop the ring if it's running. */ 3995 I915_WRITE(PRB0_CTL, 0); 3996 I915_WRITE(PRB0_TAIL, 0); 3997 I915_WRITE(PRB0_HEAD, 0); 3998 3999 /* Initialize the ring. */ 4000 I915_WRITE(PRB0_START, obj_priv->gtt_offset); 4001 head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 4002 4003 /* G45 ring initialization fails to reset head to zero */ 4004 if (head != 0) { 4005 DRM_ERROR("Ring head not reset to zero " 4006 "ctl %08x head %08x tail %08x start %08x\n", 4007 I915_READ(PRB0_CTL), 4008 I915_READ(PRB0_HEAD), 4009 I915_READ(PRB0_TAIL), 4010 I915_READ(PRB0_START)); 4011 I915_WRITE(PRB0_HEAD, 0); 4012 4013 DRM_ERROR("Ring head forced to zero " 4014 "ctl %08x head %08x tail %08x start %08x\n", 4015 I915_READ(PRB0_CTL), 4016 I915_READ(PRB0_HEAD), 4017 I915_READ(PRB0_TAIL), 4018 I915_READ(PRB0_START)); 4019 } 4020 4021 I915_WRITE(PRB0_CTL, 4022 ((obj->size - 4096) & RING_NR_PAGES) | 4023 RING_NO_REPORT | 4024 RING_VALID); 4025 4026 head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 4027 4028 /* If the head is still not zero, the ring is dead */ 4029 if (head != 0) { 4030 DRM_ERROR("Ring initialization failed " 4031 "ctl %08x head %08x tail %08x start %08x\n", 4032 I915_READ(PRB0_CTL), 4033 I915_READ(PRB0_HEAD), 4034 I915_READ(PRB0_TAIL), 4035 I915_READ(PRB0_START)); 4036 return -EIO; 4037 } 4038 4039 /* Update our cache of the ring state */ 4040 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4041 i915_kernel_lost_context(dev); 4042 else { 4043 ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 4044 ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR; 4045 ring->space = ring->head - (ring->tail + 8); 4046 if (ring->space < 0) 4047 ring->space += ring->Size; 4048 } 4049 4050 return 0; 4051 } 4052 4053 void 4054 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 4055 { 4056 drm_i915_private_t *dev_priv = dev->dev_private; 4057 4058 if (dev_priv->ring.ring_obj == NULL) 4059 return; 4060 4061 drm_core_ioremapfree(&dev_priv->ring.map, dev); 4062 4063 i915_gem_object_unpin(dev_priv->ring.ring_obj); 4064 drm_gem_object_unreference(dev_priv->ring.ring_obj); 4065 dev_priv->ring.ring_obj = NULL; 4066 memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); 4067 4068 i915_gem_cleanup_hws(dev); 4069 } 4070 4071 int 4072 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 4073 struct drm_file *file_priv) 4074 { 4075 drm_i915_private_t *dev_priv = dev->dev_private; 4076 int ret; 4077 4078 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4079 return 0; 4080 4081 if (dev_priv->mm.wedged) { 4082 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 4083 dev_priv->mm.wedged = 0; 4084 } 4085 4086 mutex_lock(&dev->struct_mutex); 4087 dev_priv->mm.suspended = 0; 4088 4089 ret = i915_gem_init_ringbuffer(dev); 4090 if (ret != 0) 4091 return ret; 4092 4093 spin_lock(&dev_priv->mm.active_list_lock); 4094 BUG_ON(!list_empty(&dev_priv->mm.active_list)); 4095 spin_unlock(&dev_priv->mm.active_list_lock); 4096 4097 BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); 4098 BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); 4099 BUG_ON(!list_empty(&dev_priv->mm.request_list)); 4100 mutex_unlock(&dev->struct_mutex); 4101 4102 drm_irq_install(dev); 4103 4104 return 0; 4105 } 4106 4107 int 4108 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 4109 struct drm_file *file_priv) 4110 { 4111 int ret; 4112 4113 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4114 return 0; 4115 4116 ret = i915_gem_idle(dev); 4117 drm_irq_uninstall(dev); 4118 4119 return ret; 4120 } 4121 4122 void 4123 i915_gem_lastclose(struct drm_device *dev) 4124 { 4125 int ret; 4126 4127 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4128 return; 4129 4130 ret = i915_gem_idle(dev); 4131 if (ret) 4132 DRM_ERROR("failed to idle hardware: %d\n", ret); 4133 } 4134 4135 void 4136 i915_gem_load(struct drm_device *dev) 4137 { 4138 drm_i915_private_t *dev_priv = dev->dev_private; 4139 4140 spin_lock_init(&dev_priv->mm.active_list_lock); 4141 INIT_LIST_HEAD(&dev_priv->mm.active_list); 4142 INIT_LIST_HEAD(&dev_priv->mm.flushing_list); 4143 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 4144 INIT_LIST_HEAD(&dev_priv->mm.request_list); 4145 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 4146 i915_gem_retire_work_handler); 4147 dev_priv->mm.next_gem_seqno = 1; 4148 4149 /* Old X drivers will take 0-2 for front, back, depth buffers */ 4150 dev_priv->fence_reg_start = 3; 4151 4152 if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 4153 dev_priv->num_fence_regs = 16; 4154 else 4155 dev_priv->num_fence_regs = 8; 4156 4157 i915_gem_detect_bit_6_swizzle(dev); 4158 } 4159 4160 /* 4161 * Create a physically contiguous memory object for this object 4162 * e.g. for cursor + overlay regs 4163 */ 4164 int i915_gem_init_phys_object(struct drm_device *dev, 4165 int id, int size) 4166 { 4167 drm_i915_private_t *dev_priv = dev->dev_private; 4168 struct drm_i915_gem_phys_object *phys_obj; 4169 int ret; 4170 4171 if (dev_priv->mm.phys_objs[id - 1] || !size) 4172 return 0; 4173 4174 phys_obj = drm_calloc(1, sizeof(struct drm_i915_gem_phys_object), DRM_MEM_DRIVER); 4175 if (!phys_obj) 4176 return -ENOMEM; 4177 4178 phys_obj->id = id; 4179 4180 phys_obj->handle = drm_pci_alloc(dev, size, 0, 0xffffffff); 4181 if (!phys_obj->handle) { 4182 ret = -ENOMEM; 4183 goto kfree_obj; 4184 } 4185 #ifdef CONFIG_X86 4186 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4187 #endif 4188 4189 dev_priv->mm.phys_objs[id - 1] = phys_obj; 4190 4191 return 0; 4192 kfree_obj: 4193 drm_free(phys_obj, sizeof(struct drm_i915_gem_phys_object), DRM_MEM_DRIVER); 4194 return ret; 4195 } 4196 4197 void i915_gem_free_phys_object(struct drm_device *dev, int id) 4198 { 4199 drm_i915_private_t *dev_priv = dev->dev_private; 4200 struct drm_i915_gem_phys_object *phys_obj; 4201 4202 if (!dev_priv->mm.phys_objs[id - 1]) 4203 return; 4204 4205 phys_obj = dev_priv->mm.phys_objs[id - 1]; 4206 if (phys_obj->cur_obj) { 4207 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 4208 } 4209 4210 #ifdef CONFIG_X86 4211 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4212 #endif 4213 drm_pci_free(dev, phys_obj->handle); 4214 kfree(phys_obj); 4215 dev_priv->mm.phys_objs[id - 1] = NULL; 4216 } 4217 4218 void i915_gem_free_all_phys_object(struct drm_device *dev) 4219 { 4220 int i; 4221 4222 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 4223 i915_gem_free_phys_object(dev, i); 4224 } 4225 4226 void i915_gem_detach_phys_object(struct drm_device *dev, 4227 struct drm_gem_object *obj) 4228 { 4229 struct drm_i915_gem_object *obj_priv; 4230 int i; 4231 int ret; 4232 int page_count; 4233 4234 obj_priv = obj->driver_private; 4235 if (!obj_priv->phys_obj) 4236 return; 4237 4238 ret = i915_gem_object_get_pages(obj); 4239 if (ret) 4240 goto out; 4241 4242 page_count = obj->size / PAGE_SIZE; 4243 4244 for (i = 0; i < page_count; i++) { 4245 char *dst = kmap_atomic(obj_priv->pages[i], KM_USER0); 4246 char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE); 4247 4248 memcpy(dst, src, PAGE_SIZE); 4249 kunmap_atomic(dst, KM_USER0); 4250 } 4251 drm_clflush_pages(obj_priv->pages, page_count); 4252 drm_agp_chipset_flush(dev); 4253 out: 4254 obj_priv->phys_obj->cur_obj = NULL; 4255 obj_priv->phys_obj = NULL; 4256 } 4257 4258 int 4259 i915_gem_attach_phys_object(struct drm_device *dev, 4260 struct drm_gem_object *obj, int id) 4261 { 4262 drm_i915_private_t *dev_priv = dev->dev_private; 4263 struct drm_i915_gem_object *obj_priv; 4264 int ret = 0; 4265 int page_count; 4266 int i; 4267 4268 if (id > I915_MAX_PHYS_OBJECT) 4269 return -EINVAL; 4270 4271 obj_priv = obj->driver_private; 4272 4273 if (obj_priv->phys_obj) { 4274 if (obj_priv->phys_obj->id == id) 4275 return 0; 4276 i915_gem_detach_phys_object(dev, obj); 4277 } 4278 4279 4280 /* create a new object */ 4281 if (!dev_priv->mm.phys_objs[id - 1]) { 4282 ret = i915_gem_init_phys_object(dev, id, 4283 obj->size); 4284 if (ret) { 4285 DRM_ERROR("failed to init phys object %d size: %zu\n", id, obj->size); 4286 goto out; 4287 } 4288 } 4289 4290 /* bind to the object */ 4291 obj_priv->phys_obj = dev_priv->mm.phys_objs[id - 1]; 4292 obj_priv->phys_obj->cur_obj = obj; 4293 4294 ret = i915_gem_object_get_pages(obj); 4295 if (ret) { 4296 DRM_ERROR("failed to get page list\n"); 4297 goto out; 4298 } 4299 4300 page_count = obj->size / PAGE_SIZE; 4301 4302 for (i = 0; i < page_count; i++) { 4303 char *src = kmap_atomic(obj_priv->pages[i], KM_USER0); 4304 char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE); 4305 4306 memcpy(dst, src, PAGE_SIZE); 4307 kunmap_atomic(src, KM_USER0); 4308 } 4309 4310 return 0; 4311 out: 4312 return ret; 4313 } 4314 4315 static int 4316 i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 4317 struct drm_i915_gem_pwrite *args, 4318 struct drm_file *file_priv) 4319 { 4320 struct drm_i915_gem_object *obj_priv = obj->driver_private; 4321 void *obj_addr; 4322 int ret; 4323 char __user *user_data; 4324 4325 user_data = (char __user *) (uintptr_t) args->data_ptr; 4326 obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset; 4327 4328 DRM_DEBUG("obj_addr %p, %lld\n", obj_addr, args->size); 4329 ret = copy_from_user(obj_addr, user_data, args->size); 4330 if (ret) 4331 return -EFAULT; 4332 4333 drm_agp_chipset_flush(dev); 4334 return 0; 4335 } 4336