1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include "drmP.h" 29 #include "drm.h" 30 #include "i915_drm.h" 31 #include "i915_drv.h" 32 #include <linux/swap.h> 33 #include <linux/pci.h> 34 35 #define I915_GEM_GPU_DOMAINS (~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) 36 37 static void i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj); 38 static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj); 39 static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj); 40 static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, 41 int write); 42 static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, 43 uint64_t offset, 44 uint64_t size); 45 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj); 46 static int i915_gem_object_get_pages(struct drm_gem_object *obj); 47 static void i915_gem_object_put_pages(struct drm_gem_object *obj); 48 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); 49 static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, 50 unsigned alignment); 51 static int i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write); 52 static void i915_gem_clear_fence_reg(struct drm_gem_object *obj); 53 static int i915_gem_evict_something(struct drm_device *dev); 54 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 55 struct drm_i915_gem_pwrite *args, 56 struct drm_file *file_priv); 57 58 int i915_gem_do_init(struct drm_device *dev, unsigned long start, 59 unsigned long end) 60 { 61 drm_i915_private_t *dev_priv = dev->dev_private; 62 63 if (start >= end || 64 (start & (PAGE_SIZE - 1)) != 0 || 65 (end & (PAGE_SIZE - 1)) != 0) { 66 return -EINVAL; 67 } 68 69 drm_mm_init(&dev_priv->mm.gtt_space, start, 70 end - start); 71 72 dev->gtt_total = (uint32_t) (end - start); 73 74 return 0; 75 } 76 77 int 78 i915_gem_init_ioctl(struct drm_device *dev, void *data, 79 struct drm_file *file_priv) 80 { 81 struct drm_i915_gem_init *args = data; 82 int ret; 83 84 mutex_lock(&dev->struct_mutex); 85 ret = i915_gem_do_init(dev, args->gtt_start, args->gtt_end); 86 mutex_unlock(&dev->struct_mutex); 87 88 return ret; 89 } 90 91 int 92 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 93 struct drm_file *file_priv) 94 { 95 struct drm_i915_gem_get_aperture *args = data; 96 97 if (!(dev->driver->driver_features & DRIVER_GEM)) 98 return -ENODEV; 99 100 args->aper_size = dev->gtt_total; 101 args->aper_available_size = (args->aper_size - 102 atomic_read(&dev->pin_memory)); 103 104 return 0; 105 } 106 107 108 /** 109 * Creates a new mm object and returns a handle to it. 110 */ 111 int 112 i915_gem_create_ioctl(struct drm_device *dev, void *data, 113 struct drm_file *file_priv) 114 { 115 struct drm_i915_gem_create *args = data; 116 struct drm_gem_object *obj; 117 int handle, ret; 118 119 args->size = roundup(args->size, PAGE_SIZE); 120 121 /* Allocate the new object */ 122 obj = drm_gem_object_alloc(dev, args->size); 123 if (obj == NULL) 124 return -ENOMEM; 125 126 ret = drm_gem_handle_create(file_priv, obj, &handle); 127 mutex_lock(&dev->struct_mutex); 128 drm_gem_object_handle_unreference(obj); 129 mutex_unlock(&dev->struct_mutex); 130 131 if (ret) 132 return ret; 133 134 args->handle = handle; 135 136 return 0; 137 } 138 139 static inline int 140 fast_shmem_read(struct page **pages, 141 loff_t page_base, int page_offset, 142 char __user *data, 143 int length) 144 { 145 char __iomem *vaddr; 146 int ret; 147 148 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0); 149 if (vaddr == NULL) 150 return -ENOMEM; 151 ret = __copy_to_user_inatomic(data, vaddr + page_offset, length); 152 kunmap_atomic(vaddr, KM_USER0); 153 154 return ret; 155 } 156 157 static inline int 158 slow_shmem_copy(struct page *dst_page, 159 int dst_offset, 160 struct page *src_page, 161 int src_offset, 162 int length) 163 { 164 char *dst_vaddr, *src_vaddr; 165 166 dst_vaddr = kmap_atomic(dst_page, KM_USER0); 167 if (dst_vaddr == NULL) 168 return -ENOMEM; 169 170 src_vaddr = kmap_atomic(src_page, KM_USER1); 171 if (src_vaddr == NULL) { 172 kunmap_atomic(dst_vaddr, KM_USER0); 173 return -ENOMEM; 174 } 175 176 memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length); 177 178 kunmap_atomic(src_vaddr, KM_USER1); 179 kunmap_atomic(dst_vaddr, KM_USER0); 180 181 return 0; 182 } 183 184 /** 185 * This is the fast shmem pread path, which attempts to copy_from_user directly 186 * from the backing pages of the object to the user's address space. On a 187 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow(). 188 */ 189 static int 190 i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj, 191 struct drm_i915_gem_pread *args, 192 struct drm_file *file_priv) 193 { 194 struct drm_i915_gem_object *obj_priv = obj->driver_private; 195 ssize_t remain; 196 loff_t offset, page_base; 197 char __user *user_data; 198 int page_offset, page_length; 199 int ret; 200 201 user_data = (char __user *) (uintptr_t) args->data_ptr; 202 remain = args->size; 203 204 mutex_lock(&dev->struct_mutex); 205 206 ret = i915_gem_object_get_pages(obj); 207 if (ret != 0) 208 goto fail_unlock; 209 210 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, 211 args->size); 212 if (ret != 0) 213 goto fail_put_pages; 214 215 obj_priv = obj->driver_private; 216 offset = args->offset; 217 218 while (remain > 0) { 219 /* Operation in this page 220 * 221 * page_base = page offset within aperture 222 * page_offset = offset within page 223 * page_length = bytes to copy for this page 224 */ 225 page_base = (offset & ~(PAGE_SIZE-1)); 226 page_offset = offset & (PAGE_SIZE-1); 227 page_length = remain; 228 if ((page_offset + remain) > PAGE_SIZE) 229 page_length = PAGE_SIZE - page_offset; 230 231 ret = fast_shmem_read(obj_priv->pages, 232 page_base, page_offset, 233 user_data, page_length); 234 if (ret) 235 goto fail_put_pages; 236 237 remain -= page_length; 238 user_data += page_length; 239 offset += page_length; 240 } 241 242 fail_put_pages: 243 i915_gem_object_put_pages(obj); 244 fail_unlock: 245 mutex_unlock(&dev->struct_mutex); 246 247 return ret; 248 } 249 250 /** 251 * This is the fallback shmem pread path, which allocates temporary storage 252 * in kernel space to copy_to_user into outside of the struct_mutex, so we 253 * can copy out of the object's backing pages while holding the struct mutex 254 * and not take page faults. 255 */ 256 static int 257 i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, 258 struct drm_i915_gem_pread *args, 259 struct drm_file *file_priv) 260 { 261 struct drm_i915_gem_object *obj_priv = obj->driver_private; 262 struct mm_struct *mm = current->mm; 263 struct page **user_pages; 264 ssize_t remain; 265 loff_t offset, pinned_pages, i; 266 loff_t first_data_page, last_data_page, num_pages; 267 int shmem_page_index, shmem_page_offset; 268 int data_page_index, data_page_offset; 269 int page_length; 270 int ret; 271 uint64_t data_ptr = args->data_ptr; 272 273 remain = args->size; 274 275 /* Pin the user pages containing the data. We can't fault while 276 * holding the struct mutex, yet we want to hold it while 277 * dereferencing the user data. 278 */ 279 first_data_page = data_ptr / PAGE_SIZE; 280 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 281 num_pages = last_data_page - first_data_page + 1; 282 283 user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); 284 if (user_pages == NULL) 285 return -ENOMEM; 286 287 down_read(&mm->mmap_sem); 288 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 289 num_pages, 0, 0, user_pages, NULL); 290 up_read(&mm->mmap_sem); 291 if (pinned_pages < num_pages) { 292 ret = -EFAULT; 293 goto fail_put_user_pages; 294 } 295 296 mutex_lock(&dev->struct_mutex); 297 298 ret = i915_gem_object_get_pages(obj); 299 if (ret != 0) 300 goto fail_unlock; 301 302 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, 303 args->size); 304 if (ret != 0) 305 goto fail_put_pages; 306 307 obj_priv = obj->driver_private; 308 offset = args->offset; 309 310 while (remain > 0) { 311 /* Operation in this page 312 * 313 * shmem_page_index = page number within shmem file 314 * shmem_page_offset = offset within page in shmem file 315 * data_page_index = page number in get_user_pages return 316 * data_page_offset = offset with data_page_index page. 317 * page_length = bytes to copy for this page 318 */ 319 shmem_page_index = offset / PAGE_SIZE; 320 shmem_page_offset = offset & ~PAGE_MASK; 321 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 322 data_page_offset = data_ptr & ~PAGE_MASK; 323 324 page_length = remain; 325 if ((shmem_page_offset + page_length) > PAGE_SIZE) 326 page_length = PAGE_SIZE - shmem_page_offset; 327 if ((data_page_offset + page_length) > PAGE_SIZE) 328 page_length = PAGE_SIZE - data_page_offset; 329 330 ret = slow_shmem_copy(user_pages[data_page_index], 331 data_page_offset, 332 obj_priv->pages[shmem_page_index], 333 shmem_page_offset, 334 page_length); 335 if (ret) 336 goto fail_put_pages; 337 338 remain -= page_length; 339 data_ptr += page_length; 340 offset += page_length; 341 } 342 343 fail_put_pages: 344 i915_gem_object_put_pages(obj); 345 fail_unlock: 346 mutex_unlock(&dev->struct_mutex); 347 fail_put_user_pages: 348 for (i = 0; i < pinned_pages; i++) { 349 SetPageDirty(user_pages[i]); 350 page_cache_release(user_pages[i]); 351 } 352 kfree(user_pages); 353 354 return ret; 355 } 356 357 /** 358 * Reads data from the object referenced by handle. 359 * 360 * On error, the contents of *data are undefined. 361 */ 362 int 363 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 364 struct drm_file *file_priv) 365 { 366 struct drm_i915_gem_pread *args = data; 367 struct drm_gem_object *obj; 368 struct drm_i915_gem_object *obj_priv; 369 int ret; 370 371 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 372 if (obj == NULL) 373 return -EBADF; 374 obj_priv = obj->driver_private; 375 376 /* Bounds check source. 377 * 378 * XXX: This could use review for overflow issues... 379 */ 380 if (args->offset > obj->size || args->size > obj->size || 381 args->offset + args->size > obj->size) { 382 drm_gem_object_unreference(obj); 383 return -EINVAL; 384 } 385 386 ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv); 387 if (ret != 0) 388 ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv); 389 390 drm_gem_object_unreference(obj); 391 392 return ret; 393 } 394 395 /* This is the fast write path which cannot handle 396 * page faults in the source data 397 */ 398 399 static inline int 400 fast_user_write(struct io_mapping *mapping, 401 loff_t page_base, int page_offset, 402 char __user *user_data, 403 int length) 404 { 405 char *vaddr_atomic; 406 unsigned long unwritten; 407 408 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 409 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset, 410 user_data, length); 411 io_mapping_unmap_atomic(vaddr_atomic); 412 if (unwritten) 413 return -EFAULT; 414 return 0; 415 } 416 417 /* Here's the write path which can sleep for 418 * page faults 419 */ 420 421 static inline int 422 slow_kernel_write(struct io_mapping *mapping, 423 loff_t gtt_base, int gtt_offset, 424 struct page *user_page, int user_offset, 425 int length) 426 { 427 char *src_vaddr, *dst_vaddr; 428 unsigned long unwritten; 429 430 dst_vaddr = io_mapping_map_atomic_wc(mapping, gtt_base); 431 src_vaddr = kmap_atomic(user_page, KM_USER1); 432 unwritten = __copy_from_user_inatomic_nocache(dst_vaddr + gtt_offset, 433 src_vaddr + user_offset, 434 length); 435 kunmap_atomic(src_vaddr, KM_USER1); 436 io_mapping_unmap_atomic(dst_vaddr); 437 if (unwritten) 438 return -EFAULT; 439 return 0; 440 } 441 442 static inline int 443 fast_shmem_write(struct page **pages, 444 loff_t page_base, int page_offset, 445 char __user *data, 446 int length) 447 { 448 char __iomem *vaddr; 449 unsigned long unwritten; 450 451 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0); 452 if (vaddr == NULL) 453 return -ENOMEM; 454 unwritten = __copy_from_user_inatomic(vaddr + page_offset, data, length); 455 kunmap_atomic(vaddr, KM_USER0); 456 457 if (unwritten) 458 return -EFAULT; 459 return 0; 460 } 461 462 /** 463 * This is the fast pwrite path, where we copy the data directly from the 464 * user into the GTT, uncached. 465 */ 466 static int 467 i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj, 468 struct drm_i915_gem_pwrite *args, 469 struct drm_file *file_priv) 470 { 471 struct drm_i915_gem_object *obj_priv = obj->driver_private; 472 drm_i915_private_t *dev_priv = dev->dev_private; 473 ssize_t remain; 474 loff_t offset, page_base; 475 char __user *user_data; 476 int page_offset, page_length; 477 int ret; 478 479 user_data = (char __user *) (uintptr_t) args->data_ptr; 480 remain = args->size; 481 if (!access_ok(VERIFY_READ, user_data, remain)) 482 return -EFAULT; 483 484 485 mutex_lock(&dev->struct_mutex); 486 ret = i915_gem_object_pin(obj, 0); 487 if (ret) { 488 mutex_unlock(&dev->struct_mutex); 489 return ret; 490 } 491 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 492 if (ret) 493 goto fail; 494 495 obj_priv = obj->driver_private; 496 offset = obj_priv->gtt_offset + args->offset; 497 498 while (remain > 0) { 499 /* Operation in this page 500 * 501 * page_base = page offset within aperture 502 * page_offset = offset within page 503 * page_length = bytes to copy for this page 504 */ 505 page_base = (offset & ~(PAGE_SIZE-1)); 506 page_offset = offset & (PAGE_SIZE-1); 507 page_length = remain; 508 if ((page_offset + remain) > PAGE_SIZE) 509 page_length = PAGE_SIZE - page_offset; 510 511 ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base, 512 page_offset, user_data, page_length); 513 514 /* If we get a fault while copying data, then (presumably) our 515 * source page isn't available. Return the error and we'll 516 * retry in the slow path. 517 */ 518 if (ret) 519 goto fail; 520 521 remain -= page_length; 522 user_data += page_length; 523 offset += page_length; 524 } 525 526 fail: 527 i915_gem_object_unpin(obj); 528 mutex_unlock(&dev->struct_mutex); 529 530 return ret; 531 } 532 533 /** 534 * This is the fallback GTT pwrite path, which uses get_user_pages to pin 535 * the memory and maps it using kmap_atomic for copying. 536 * 537 * This code resulted in x11perf -rgb10text consuming about 10% more CPU 538 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit). 539 */ 540 static int 541 i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, 542 struct drm_i915_gem_pwrite *args, 543 struct drm_file *file_priv) 544 { 545 struct drm_i915_gem_object *obj_priv = obj->driver_private; 546 drm_i915_private_t *dev_priv = dev->dev_private; 547 ssize_t remain; 548 loff_t gtt_page_base, offset; 549 loff_t first_data_page, last_data_page, num_pages; 550 loff_t pinned_pages, i; 551 struct page **user_pages; 552 struct mm_struct *mm = current->mm; 553 int gtt_page_offset, data_page_offset, data_page_index, page_length; 554 int ret; 555 uint64_t data_ptr = args->data_ptr; 556 557 remain = args->size; 558 559 /* Pin the user pages containing the data. We can't fault while 560 * holding the struct mutex, and all of the pwrite implementations 561 * want to hold it while dereferencing the user data. 562 */ 563 first_data_page = data_ptr / PAGE_SIZE; 564 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 565 num_pages = last_data_page - first_data_page + 1; 566 567 user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); 568 if (user_pages == NULL) 569 return -ENOMEM; 570 571 down_read(&mm->mmap_sem); 572 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 573 num_pages, 0, 0, user_pages, NULL); 574 up_read(&mm->mmap_sem); 575 if (pinned_pages < num_pages) { 576 ret = -EFAULT; 577 goto out_unpin_pages; 578 } 579 580 mutex_lock(&dev->struct_mutex); 581 ret = i915_gem_object_pin(obj, 0); 582 if (ret) 583 goto out_unlock; 584 585 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 586 if (ret) 587 goto out_unpin_object; 588 589 obj_priv = obj->driver_private; 590 offset = obj_priv->gtt_offset + args->offset; 591 592 while (remain > 0) { 593 /* Operation in this page 594 * 595 * gtt_page_base = page offset within aperture 596 * gtt_page_offset = offset within page in aperture 597 * data_page_index = page number in get_user_pages return 598 * data_page_offset = offset with data_page_index page. 599 * page_length = bytes to copy for this page 600 */ 601 gtt_page_base = offset & PAGE_MASK; 602 gtt_page_offset = offset & ~PAGE_MASK; 603 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 604 data_page_offset = data_ptr & ~PAGE_MASK; 605 606 page_length = remain; 607 if ((gtt_page_offset + page_length) > PAGE_SIZE) 608 page_length = PAGE_SIZE - gtt_page_offset; 609 if ((data_page_offset + page_length) > PAGE_SIZE) 610 page_length = PAGE_SIZE - data_page_offset; 611 612 ret = slow_kernel_write(dev_priv->mm.gtt_mapping, 613 gtt_page_base, gtt_page_offset, 614 user_pages[data_page_index], 615 data_page_offset, 616 page_length); 617 618 /* If we get a fault while copying data, then (presumably) our 619 * source page isn't available. Return the error and we'll 620 * retry in the slow path. 621 */ 622 if (ret) 623 goto out_unpin_object; 624 625 remain -= page_length; 626 offset += page_length; 627 data_ptr += page_length; 628 } 629 630 out_unpin_object: 631 i915_gem_object_unpin(obj); 632 out_unlock: 633 mutex_unlock(&dev->struct_mutex); 634 out_unpin_pages: 635 for (i = 0; i < pinned_pages; i++) 636 page_cache_release(user_pages[i]); 637 kfree(user_pages); 638 639 return ret; 640 } 641 642 /** 643 * This is the fast shmem pwrite path, which attempts to directly 644 * copy_from_user into the kmapped pages backing the object. 645 */ 646 static int 647 i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj, 648 struct drm_i915_gem_pwrite *args, 649 struct drm_file *file_priv) 650 { 651 struct drm_i915_gem_object *obj_priv = obj->driver_private; 652 ssize_t remain; 653 loff_t offset, page_base; 654 char __user *user_data; 655 int page_offset, page_length; 656 int ret; 657 658 user_data = (char __user *) (uintptr_t) args->data_ptr; 659 remain = args->size; 660 661 mutex_lock(&dev->struct_mutex); 662 663 ret = i915_gem_object_get_pages(obj); 664 if (ret != 0) 665 goto fail_unlock; 666 667 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 668 if (ret != 0) 669 goto fail_put_pages; 670 671 obj_priv = obj->driver_private; 672 offset = args->offset; 673 obj_priv->dirty = 1; 674 675 while (remain > 0) { 676 /* Operation in this page 677 * 678 * page_base = page offset within aperture 679 * page_offset = offset within page 680 * page_length = bytes to copy for this page 681 */ 682 page_base = (offset & ~(PAGE_SIZE-1)); 683 page_offset = offset & (PAGE_SIZE-1); 684 page_length = remain; 685 if ((page_offset + remain) > PAGE_SIZE) 686 page_length = PAGE_SIZE - page_offset; 687 688 ret = fast_shmem_write(obj_priv->pages, 689 page_base, page_offset, 690 user_data, page_length); 691 if (ret) 692 goto fail_put_pages; 693 694 remain -= page_length; 695 user_data += page_length; 696 offset += page_length; 697 } 698 699 fail_put_pages: 700 i915_gem_object_put_pages(obj); 701 fail_unlock: 702 mutex_unlock(&dev->struct_mutex); 703 704 return ret; 705 } 706 707 /** 708 * This is the fallback shmem pwrite path, which uses get_user_pages to pin 709 * the memory and maps it using kmap_atomic for copying. 710 * 711 * This avoids taking mmap_sem for faulting on the user's address while the 712 * struct_mutex is held. 713 */ 714 static int 715 i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, 716 struct drm_i915_gem_pwrite *args, 717 struct drm_file *file_priv) 718 { 719 struct drm_i915_gem_object *obj_priv = obj->driver_private; 720 struct mm_struct *mm = current->mm; 721 struct page **user_pages; 722 ssize_t remain; 723 loff_t offset, pinned_pages, i; 724 loff_t first_data_page, last_data_page, num_pages; 725 int shmem_page_index, shmem_page_offset; 726 int data_page_index, data_page_offset; 727 int page_length; 728 int ret; 729 uint64_t data_ptr = args->data_ptr; 730 731 remain = args->size; 732 733 /* Pin the user pages containing the data. We can't fault while 734 * holding the struct mutex, and all of the pwrite implementations 735 * want to hold it while dereferencing the user data. 736 */ 737 first_data_page = data_ptr / PAGE_SIZE; 738 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; 739 num_pages = last_data_page - first_data_page + 1; 740 741 user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); 742 if (user_pages == NULL) 743 return -ENOMEM; 744 745 down_read(&mm->mmap_sem); 746 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, 747 num_pages, 0, 0, user_pages, NULL); 748 up_read(&mm->mmap_sem); 749 if (pinned_pages < num_pages) { 750 ret = -EFAULT; 751 goto fail_put_user_pages; 752 } 753 754 mutex_lock(&dev->struct_mutex); 755 756 ret = i915_gem_object_get_pages(obj); 757 if (ret != 0) 758 goto fail_unlock; 759 760 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 761 if (ret != 0) 762 goto fail_put_pages; 763 764 obj_priv = obj->driver_private; 765 offset = args->offset; 766 obj_priv->dirty = 1; 767 768 while (remain > 0) { 769 /* Operation in this page 770 * 771 * shmem_page_index = page number within shmem file 772 * shmem_page_offset = offset within page in shmem file 773 * data_page_index = page number in get_user_pages return 774 * data_page_offset = offset with data_page_index page. 775 * page_length = bytes to copy for this page 776 */ 777 shmem_page_index = offset / PAGE_SIZE; 778 shmem_page_offset = offset & ~PAGE_MASK; 779 data_page_index = data_ptr / PAGE_SIZE - first_data_page; 780 data_page_offset = data_ptr & ~PAGE_MASK; 781 782 page_length = remain; 783 if ((shmem_page_offset + page_length) > PAGE_SIZE) 784 page_length = PAGE_SIZE - shmem_page_offset; 785 if ((data_page_offset + page_length) > PAGE_SIZE) 786 page_length = PAGE_SIZE - data_page_offset; 787 788 ret = slow_shmem_copy(obj_priv->pages[shmem_page_index], 789 shmem_page_offset, 790 user_pages[data_page_index], 791 data_page_offset, 792 page_length); 793 if (ret) 794 goto fail_put_pages; 795 796 remain -= page_length; 797 data_ptr += page_length; 798 offset += page_length; 799 } 800 801 fail_put_pages: 802 i915_gem_object_put_pages(obj); 803 fail_unlock: 804 mutex_unlock(&dev->struct_mutex); 805 fail_put_user_pages: 806 for (i = 0; i < pinned_pages; i++) 807 page_cache_release(user_pages[i]); 808 kfree(user_pages); 809 810 return ret; 811 } 812 813 /** 814 * Writes data to the object referenced by handle. 815 * 816 * On error, the contents of the buffer that were to be modified are undefined. 817 */ 818 int 819 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 820 struct drm_file *file_priv) 821 { 822 struct drm_i915_gem_pwrite *args = data; 823 struct drm_gem_object *obj; 824 struct drm_i915_gem_object *obj_priv; 825 int ret = 0; 826 827 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 828 if (obj == NULL) 829 return -EBADF; 830 obj_priv = obj->driver_private; 831 832 /* Bounds check destination. 833 * 834 * XXX: This could use review for overflow issues... 835 */ 836 if (args->offset > obj->size || args->size > obj->size || 837 args->offset + args->size > obj->size) { 838 drm_gem_object_unreference(obj); 839 return -EINVAL; 840 } 841 842 /* We can only do the GTT pwrite on untiled buffers, as otherwise 843 * it would end up going through the fenced access, and we'll get 844 * different detiling behavior between reading and writing. 845 * pread/pwrite currently are reading and writing from the CPU 846 * perspective, requiring manual detiling by the client. 847 */ 848 if (obj_priv->phys_obj) 849 ret = i915_gem_phys_pwrite(dev, obj, args, file_priv); 850 else if (obj_priv->tiling_mode == I915_TILING_NONE && 851 dev->gtt_total != 0) { 852 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file_priv); 853 if (ret == -EFAULT) { 854 ret = i915_gem_gtt_pwrite_slow(dev, obj, args, 855 file_priv); 856 } 857 } else { 858 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv); 859 if (ret == -EFAULT) { 860 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, 861 file_priv); 862 } 863 } 864 865 #if WATCH_PWRITE 866 if (ret) 867 DRM_INFO("pwrite failed %d\n", ret); 868 #endif 869 870 drm_gem_object_unreference(obj); 871 872 return ret; 873 } 874 875 /** 876 * Called when user space prepares to use an object with the CPU, either 877 * through the mmap ioctl's mapping or a GTT mapping. 878 */ 879 int 880 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 881 struct drm_file *file_priv) 882 { 883 struct drm_i915_gem_set_domain *args = data; 884 struct drm_gem_object *obj; 885 uint32_t read_domains = args->read_domains; 886 uint32_t write_domain = args->write_domain; 887 int ret; 888 889 if (!(dev->driver->driver_features & DRIVER_GEM)) 890 return -ENODEV; 891 892 /* Only handle setting domains to types used by the CPU. */ 893 if (write_domain & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) 894 return -EINVAL; 895 896 if (read_domains & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) 897 return -EINVAL; 898 899 /* Having something in the write domain implies it's in the read 900 * domain, and only that read domain. Enforce that in the request. 901 */ 902 if (write_domain != 0 && read_domains != write_domain) 903 return -EINVAL; 904 905 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 906 if (obj == NULL) 907 return -EBADF; 908 909 mutex_lock(&dev->struct_mutex); 910 #if WATCH_BUF 911 DRM_INFO("set_domain_ioctl %p(%d), %08x %08x\n", 912 obj, obj->size, read_domains, write_domain); 913 #endif 914 if (read_domains & I915_GEM_DOMAIN_GTT) { 915 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 916 917 /* Silently promote "you're not bound, there was nothing to do" 918 * to success, since the client was just asking us to 919 * make sure everything was done. 920 */ 921 if (ret == -EINVAL) 922 ret = 0; 923 } else { 924 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 925 } 926 927 drm_gem_object_unreference(obj); 928 mutex_unlock(&dev->struct_mutex); 929 return ret; 930 } 931 932 /** 933 * Called when user space has done writes to this buffer 934 */ 935 int 936 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 937 struct drm_file *file_priv) 938 { 939 struct drm_i915_gem_sw_finish *args = data; 940 struct drm_gem_object *obj; 941 struct drm_i915_gem_object *obj_priv; 942 int ret = 0; 943 944 if (!(dev->driver->driver_features & DRIVER_GEM)) 945 return -ENODEV; 946 947 mutex_lock(&dev->struct_mutex); 948 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 949 if (obj == NULL) { 950 mutex_unlock(&dev->struct_mutex); 951 return -EBADF; 952 } 953 954 #if WATCH_BUF 955 DRM_INFO("%s: sw_finish %d (%p %d)\n", 956 __func__, args->handle, obj, obj->size); 957 #endif 958 obj_priv = obj->driver_private; 959 960 /* Pinned buffers may be scanout, so flush the cache */ 961 if (obj_priv->pin_count) 962 i915_gem_object_flush_cpu_write_domain(obj); 963 964 drm_gem_object_unreference(obj); 965 mutex_unlock(&dev->struct_mutex); 966 return ret; 967 } 968 969 /** 970 * Maps the contents of an object, returning the address it is mapped 971 * into. 972 * 973 * While the mapping holds a reference on the contents of the object, it doesn't 974 * imply a ref on the object itself. 975 */ 976 int 977 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 978 struct drm_file *file_priv) 979 { 980 struct drm_i915_gem_mmap *args = data; 981 struct drm_gem_object *obj; 982 loff_t offset; 983 unsigned long addr; 984 985 if (!(dev->driver->driver_features & DRIVER_GEM)) 986 return -ENODEV; 987 988 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 989 if (obj == NULL) 990 return -EBADF; 991 992 offset = args->offset; 993 994 down_write(¤t->mm->mmap_sem); 995 addr = do_mmap(obj->filp, 0, args->size, 996 PROT_READ | PROT_WRITE, MAP_SHARED, 997 args->offset); 998 up_write(¤t->mm->mmap_sem); 999 mutex_lock(&dev->struct_mutex); 1000 drm_gem_object_unreference(obj); 1001 mutex_unlock(&dev->struct_mutex); 1002 if (IS_ERR((void *)addr)) 1003 return addr; 1004 1005 args->addr_ptr = (uint64_t) addr; 1006 1007 return 0; 1008 } 1009 1010 /** 1011 * i915_gem_fault - fault a page into the GTT 1012 * vma: VMA in question 1013 * vmf: fault info 1014 * 1015 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1016 * from userspace. The fault handler takes care of binding the object to 1017 * the GTT (if needed), allocating and programming a fence register (again, 1018 * only if needed based on whether the old reg is still valid or the object 1019 * is tiled) and inserting a new PTE into the faulting process. 1020 * 1021 * Note that the faulting process may involve evicting existing objects 1022 * from the GTT and/or fence registers to make room. So performance may 1023 * suffer if the GTT working set is large or there are few fence registers 1024 * left. 1025 */ 1026 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1027 { 1028 struct drm_gem_object *obj = vma->vm_private_data; 1029 struct drm_device *dev = obj->dev; 1030 struct drm_i915_private *dev_priv = dev->dev_private; 1031 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1032 pgoff_t page_offset; 1033 unsigned long pfn; 1034 int ret = 0; 1035 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1036 1037 /* We don't use vmf->pgoff since that has the fake offset */ 1038 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1039 PAGE_SHIFT; 1040 1041 /* Now bind it into the GTT if needed */ 1042 mutex_lock(&dev->struct_mutex); 1043 if (!obj_priv->gtt_space) { 1044 ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment); 1045 if (ret) { 1046 mutex_unlock(&dev->struct_mutex); 1047 return VM_FAULT_SIGBUS; 1048 } 1049 list_add(&obj_priv->list, &dev_priv->mm.inactive_list); 1050 } 1051 1052 /* Need a new fence register? */ 1053 if (obj_priv->fence_reg == I915_FENCE_REG_NONE && 1054 obj_priv->tiling_mode != I915_TILING_NONE) { 1055 ret = i915_gem_object_get_fence_reg(obj, write); 1056 if (ret) { 1057 mutex_unlock(&dev->struct_mutex); 1058 return VM_FAULT_SIGBUS; 1059 } 1060 } 1061 1062 pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) + 1063 page_offset; 1064 1065 /* Finally, remap it using the new GTT offset */ 1066 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); 1067 1068 mutex_unlock(&dev->struct_mutex); 1069 1070 switch (ret) { 1071 case -ENOMEM: 1072 case -EAGAIN: 1073 return VM_FAULT_OOM; 1074 case -EFAULT: 1075 return VM_FAULT_SIGBUS; 1076 default: 1077 return VM_FAULT_NOPAGE; 1078 } 1079 } 1080 1081 /** 1082 * i915_gem_create_mmap_offset - create a fake mmap offset for an object 1083 * @obj: obj in question 1084 * 1085 * GEM memory mapping works by handing back to userspace a fake mmap offset 1086 * it can use in a subsequent mmap(2) call. The DRM core code then looks 1087 * up the object based on the offset and sets up the various memory mapping 1088 * structures. 1089 * 1090 * This routine allocates and attaches a fake offset for @obj. 1091 */ 1092 static int 1093 i915_gem_create_mmap_offset(struct drm_gem_object *obj) 1094 { 1095 struct drm_device *dev = obj->dev; 1096 struct drm_gem_mm *mm = dev->mm_private; 1097 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1098 struct drm_map_list *list; 1099 struct drm_local_map *map; 1100 int ret = 0; 1101 1102 /* Set the object up for mmap'ing */ 1103 list = &obj->map_list; 1104 list->map = drm_calloc(1, sizeof(struct drm_map_list), 1105 DRM_MEM_DRIVER); 1106 if (!list->map) 1107 return -ENOMEM; 1108 1109 map = list->map; 1110 map->type = _DRM_GEM; 1111 map->size = obj->size; 1112 map->handle = obj; 1113 1114 /* Get a DRM GEM mmap offset allocated... */ 1115 list->file_offset_node = drm_mm_search_free(&mm->offset_manager, 1116 obj->size / PAGE_SIZE, 0, 0); 1117 if (!list->file_offset_node) { 1118 DRM_ERROR("failed to allocate offset for bo %d\n", obj->name); 1119 ret = -ENOMEM; 1120 goto out_free_list; 1121 } 1122 1123 list->file_offset_node = drm_mm_get_block(list->file_offset_node, 1124 obj->size / PAGE_SIZE, 0); 1125 if (!list->file_offset_node) { 1126 ret = -ENOMEM; 1127 goto out_free_list; 1128 } 1129 1130 list->hash.key = list->file_offset_node->start; 1131 if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) { 1132 DRM_ERROR("failed to add to map hash\n"); 1133 goto out_free_mm; 1134 } 1135 1136 /* By now we should be all set, any drm_mmap request on the offset 1137 * below will get to our mmap & fault handler */ 1138 obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT; 1139 1140 return 0; 1141 1142 out_free_mm: 1143 drm_mm_put_block(list->file_offset_node); 1144 out_free_list: 1145 drm_free(list->map, sizeof(struct drm_map_list), DRM_MEM_DRIVER); 1146 1147 return ret; 1148 } 1149 1150 static void 1151 i915_gem_free_mmap_offset(struct drm_gem_object *obj) 1152 { 1153 struct drm_device *dev = obj->dev; 1154 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1155 struct drm_gem_mm *mm = dev->mm_private; 1156 struct drm_map_list *list; 1157 1158 list = &obj->map_list; 1159 drm_ht_remove_item(&mm->offset_hash, &list->hash); 1160 1161 if (list->file_offset_node) { 1162 drm_mm_put_block(list->file_offset_node); 1163 list->file_offset_node = NULL; 1164 } 1165 1166 if (list->map) { 1167 drm_free(list->map, sizeof(struct drm_map), DRM_MEM_DRIVER); 1168 list->map = NULL; 1169 } 1170 1171 obj_priv->mmap_offset = 0; 1172 } 1173 1174 /** 1175 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1176 * @obj: object to check 1177 * 1178 * Return the required GTT alignment for an object, taking into account 1179 * potential fence register mapping if needed. 1180 */ 1181 static uint32_t 1182 i915_gem_get_gtt_alignment(struct drm_gem_object *obj) 1183 { 1184 struct drm_device *dev = obj->dev; 1185 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1186 int start, i; 1187 1188 /* 1189 * Minimum alignment is 4k (GTT page size), but might be greater 1190 * if a fence register is needed for the object. 1191 */ 1192 if (IS_I965G(dev) || obj_priv->tiling_mode == I915_TILING_NONE) 1193 return 4096; 1194 1195 /* 1196 * Previous chips need to be aligned to the size of the smallest 1197 * fence register that can contain the object. 1198 */ 1199 if (IS_I9XX(dev)) 1200 start = 1024*1024; 1201 else 1202 start = 512*1024; 1203 1204 for (i = start; i < obj->size; i <<= 1) 1205 ; 1206 1207 return i; 1208 } 1209 1210 /** 1211 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1212 * @dev: DRM device 1213 * @data: GTT mapping ioctl data 1214 * @file_priv: GEM object info 1215 * 1216 * Simply returns the fake offset to userspace so it can mmap it. 1217 * The mmap call will end up in drm_gem_mmap(), which will set things 1218 * up so we can get faults in the handler above. 1219 * 1220 * The fault handler will take care of binding the object into the GTT 1221 * (since it may have been evicted to make room for something), allocating 1222 * a fence register, and mapping the appropriate aperture address into 1223 * userspace. 1224 */ 1225 int 1226 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1227 struct drm_file *file_priv) 1228 { 1229 struct drm_i915_gem_mmap_gtt *args = data; 1230 struct drm_i915_private *dev_priv = dev->dev_private; 1231 struct drm_gem_object *obj; 1232 struct drm_i915_gem_object *obj_priv; 1233 int ret; 1234 1235 if (!(dev->driver->driver_features & DRIVER_GEM)) 1236 return -ENODEV; 1237 1238 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 1239 if (obj == NULL) 1240 return -EBADF; 1241 1242 mutex_lock(&dev->struct_mutex); 1243 1244 obj_priv = obj->driver_private; 1245 1246 if (!obj_priv->mmap_offset) { 1247 ret = i915_gem_create_mmap_offset(obj); 1248 if (ret) { 1249 drm_gem_object_unreference(obj); 1250 mutex_unlock(&dev->struct_mutex); 1251 return ret; 1252 } 1253 } 1254 1255 args->offset = obj_priv->mmap_offset; 1256 1257 obj_priv->gtt_alignment = i915_gem_get_gtt_alignment(obj); 1258 1259 /* Make sure the alignment is correct for fence regs etc */ 1260 if (obj_priv->agp_mem && 1261 (obj_priv->gtt_offset & (obj_priv->gtt_alignment - 1))) { 1262 drm_gem_object_unreference(obj); 1263 mutex_unlock(&dev->struct_mutex); 1264 return -EINVAL; 1265 } 1266 1267 /* 1268 * Pull it into the GTT so that we have a page list (makes the 1269 * initial fault faster and any subsequent flushing possible). 1270 */ 1271 if (!obj_priv->agp_mem) { 1272 ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment); 1273 if (ret) { 1274 drm_gem_object_unreference(obj); 1275 mutex_unlock(&dev->struct_mutex); 1276 return ret; 1277 } 1278 list_add(&obj_priv->list, &dev_priv->mm.inactive_list); 1279 } 1280 1281 drm_gem_object_unreference(obj); 1282 mutex_unlock(&dev->struct_mutex); 1283 1284 return 0; 1285 } 1286 1287 static void 1288 i915_gem_object_put_pages(struct drm_gem_object *obj) 1289 { 1290 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1291 int page_count = obj->size / PAGE_SIZE; 1292 int i; 1293 1294 BUG_ON(obj_priv->pages_refcount == 0); 1295 1296 if (--obj_priv->pages_refcount != 0) 1297 return; 1298 1299 for (i = 0; i < page_count; i++) 1300 if (obj_priv->pages[i] != NULL) { 1301 if (obj_priv->dirty) 1302 set_page_dirty(obj_priv->pages[i]); 1303 mark_page_accessed(obj_priv->pages[i]); 1304 page_cache_release(obj_priv->pages[i]); 1305 } 1306 obj_priv->dirty = 0; 1307 1308 drm_free(obj_priv->pages, 1309 page_count * sizeof(struct page *), 1310 DRM_MEM_DRIVER); 1311 obj_priv->pages = NULL; 1312 } 1313 1314 static void 1315 i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno) 1316 { 1317 struct drm_device *dev = obj->dev; 1318 drm_i915_private_t *dev_priv = dev->dev_private; 1319 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1320 1321 /* Add a reference if we're newly entering the active list. */ 1322 if (!obj_priv->active) { 1323 drm_gem_object_reference(obj); 1324 obj_priv->active = 1; 1325 } 1326 /* Move from whatever list we were on to the tail of execution. */ 1327 list_move_tail(&obj_priv->list, 1328 &dev_priv->mm.active_list); 1329 obj_priv->last_rendering_seqno = seqno; 1330 } 1331 1332 static void 1333 i915_gem_object_move_to_flushing(struct drm_gem_object *obj) 1334 { 1335 struct drm_device *dev = obj->dev; 1336 drm_i915_private_t *dev_priv = dev->dev_private; 1337 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1338 1339 BUG_ON(!obj_priv->active); 1340 list_move_tail(&obj_priv->list, &dev_priv->mm.flushing_list); 1341 obj_priv->last_rendering_seqno = 0; 1342 } 1343 1344 static void 1345 i915_gem_object_move_to_inactive(struct drm_gem_object *obj) 1346 { 1347 struct drm_device *dev = obj->dev; 1348 drm_i915_private_t *dev_priv = dev->dev_private; 1349 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1350 1351 i915_verify_inactive(dev, __FILE__, __LINE__); 1352 if (obj_priv->pin_count != 0) 1353 list_del_init(&obj_priv->list); 1354 else 1355 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list); 1356 1357 obj_priv->last_rendering_seqno = 0; 1358 if (obj_priv->active) { 1359 obj_priv->active = 0; 1360 drm_gem_object_unreference(obj); 1361 } 1362 i915_verify_inactive(dev, __FILE__, __LINE__); 1363 } 1364 1365 /** 1366 * Creates a new sequence number, emitting a write of it to the status page 1367 * plus an interrupt, which will trigger i915_user_interrupt_handler. 1368 * 1369 * Must be called with struct_lock held. 1370 * 1371 * Returned sequence numbers are nonzero on success. 1372 */ 1373 static uint32_t 1374 i915_add_request(struct drm_device *dev, uint32_t flush_domains) 1375 { 1376 drm_i915_private_t *dev_priv = dev->dev_private; 1377 struct drm_i915_gem_request *request; 1378 uint32_t seqno; 1379 int was_empty; 1380 RING_LOCALS; 1381 1382 request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER); 1383 if (request == NULL) 1384 return 0; 1385 1386 /* Grab the seqno we're going to make this request be, and bump the 1387 * next (skipping 0 so it can be the reserved no-seqno value). 1388 */ 1389 seqno = dev_priv->mm.next_gem_seqno; 1390 dev_priv->mm.next_gem_seqno++; 1391 if (dev_priv->mm.next_gem_seqno == 0) 1392 dev_priv->mm.next_gem_seqno++; 1393 1394 BEGIN_LP_RING(4); 1395 OUT_RING(MI_STORE_DWORD_INDEX); 1396 OUT_RING(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 1397 OUT_RING(seqno); 1398 1399 OUT_RING(MI_USER_INTERRUPT); 1400 ADVANCE_LP_RING(); 1401 1402 DRM_DEBUG("%d\n", seqno); 1403 1404 request->seqno = seqno; 1405 request->emitted_jiffies = jiffies; 1406 was_empty = list_empty(&dev_priv->mm.request_list); 1407 list_add_tail(&request->list, &dev_priv->mm.request_list); 1408 1409 /* Associate any objects on the flushing list matching the write 1410 * domain we're flushing with our flush. 1411 */ 1412 if (flush_domains != 0) { 1413 struct drm_i915_gem_object *obj_priv, *next; 1414 1415 list_for_each_entry_safe(obj_priv, next, 1416 &dev_priv->mm.flushing_list, list) { 1417 struct drm_gem_object *obj = obj_priv->obj; 1418 1419 if ((obj->write_domain & flush_domains) == 1420 obj->write_domain) { 1421 obj->write_domain = 0; 1422 i915_gem_object_move_to_active(obj, seqno); 1423 } 1424 } 1425 1426 } 1427 1428 if (was_empty && !dev_priv->mm.suspended) 1429 schedule_delayed_work(&dev_priv->mm.retire_work, HZ); 1430 return seqno; 1431 } 1432 1433 /** 1434 * Command execution barrier 1435 * 1436 * Ensures that all commands in the ring are finished 1437 * before signalling the CPU 1438 */ 1439 static uint32_t 1440 i915_retire_commands(struct drm_device *dev) 1441 { 1442 drm_i915_private_t *dev_priv = dev->dev_private; 1443 uint32_t cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 1444 uint32_t flush_domains = 0; 1445 RING_LOCALS; 1446 1447 /* The sampler always gets flushed on i965 (sigh) */ 1448 if (IS_I965G(dev)) 1449 flush_domains |= I915_GEM_DOMAIN_SAMPLER; 1450 BEGIN_LP_RING(2); 1451 OUT_RING(cmd); 1452 OUT_RING(0); /* noop */ 1453 ADVANCE_LP_RING(); 1454 return flush_domains; 1455 } 1456 1457 /** 1458 * Moves buffers associated only with the given active seqno from the active 1459 * to inactive list, potentially freeing them. 1460 */ 1461 static void 1462 i915_gem_retire_request(struct drm_device *dev, 1463 struct drm_i915_gem_request *request) 1464 { 1465 drm_i915_private_t *dev_priv = dev->dev_private; 1466 1467 /* Move any buffers on the active list that are no longer referenced 1468 * by the ringbuffer to the flushing/inactive lists as appropriate. 1469 */ 1470 while (!list_empty(&dev_priv->mm.active_list)) { 1471 struct drm_gem_object *obj; 1472 struct drm_i915_gem_object *obj_priv; 1473 1474 obj_priv = list_first_entry(&dev_priv->mm.active_list, 1475 struct drm_i915_gem_object, 1476 list); 1477 obj = obj_priv->obj; 1478 1479 /* If the seqno being retired doesn't match the oldest in the 1480 * list, then the oldest in the list must still be newer than 1481 * this seqno. 1482 */ 1483 if (obj_priv->last_rendering_seqno != request->seqno) 1484 return; 1485 1486 #if WATCH_LRU 1487 DRM_INFO("%s: retire %d moves to inactive list %p\n", 1488 __func__, request->seqno, obj); 1489 #endif 1490 1491 if (obj->write_domain != 0) 1492 i915_gem_object_move_to_flushing(obj); 1493 else 1494 i915_gem_object_move_to_inactive(obj); 1495 } 1496 } 1497 1498 /** 1499 * Returns true if seq1 is later than seq2. 1500 */ 1501 static int 1502 i915_seqno_passed(uint32_t seq1, uint32_t seq2) 1503 { 1504 return (int32_t)(seq1 - seq2) >= 0; 1505 } 1506 1507 uint32_t 1508 i915_get_gem_seqno(struct drm_device *dev) 1509 { 1510 drm_i915_private_t *dev_priv = dev->dev_private; 1511 1512 return READ_HWSP(dev_priv, I915_GEM_HWS_INDEX); 1513 } 1514 1515 /** 1516 * This function clears the request list as sequence numbers are passed. 1517 */ 1518 void 1519 i915_gem_retire_requests(struct drm_device *dev) 1520 { 1521 drm_i915_private_t *dev_priv = dev->dev_private; 1522 uint32_t seqno; 1523 1524 if (!dev_priv->hw_status_page) 1525 return; 1526 1527 seqno = i915_get_gem_seqno(dev); 1528 1529 while (!list_empty(&dev_priv->mm.request_list)) { 1530 struct drm_i915_gem_request *request; 1531 uint32_t retiring_seqno; 1532 1533 request = list_first_entry(&dev_priv->mm.request_list, 1534 struct drm_i915_gem_request, 1535 list); 1536 retiring_seqno = request->seqno; 1537 1538 if (i915_seqno_passed(seqno, retiring_seqno) || 1539 dev_priv->mm.wedged) { 1540 i915_gem_retire_request(dev, request); 1541 1542 list_del(&request->list); 1543 drm_free(request, sizeof(*request), DRM_MEM_DRIVER); 1544 } else 1545 break; 1546 } 1547 } 1548 1549 void 1550 i915_gem_retire_work_handler(struct work_struct *work) 1551 { 1552 drm_i915_private_t *dev_priv; 1553 struct drm_device *dev; 1554 1555 dev_priv = container_of(work, drm_i915_private_t, 1556 mm.retire_work.work); 1557 dev = dev_priv->dev; 1558 1559 mutex_lock(&dev->struct_mutex); 1560 i915_gem_retire_requests(dev); 1561 if (!dev_priv->mm.suspended && 1562 !list_empty(&dev_priv->mm.request_list)) 1563 schedule_delayed_work(&dev_priv->mm.retire_work, HZ); 1564 mutex_unlock(&dev->struct_mutex); 1565 } 1566 1567 /** 1568 * Waits for a sequence number to be signaled, and cleans up the 1569 * request and object lists appropriately for that event. 1570 */ 1571 static int 1572 i915_wait_request(struct drm_device *dev, uint32_t seqno) 1573 { 1574 drm_i915_private_t *dev_priv = dev->dev_private; 1575 int ret = 0; 1576 1577 BUG_ON(seqno == 0); 1578 1579 if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) { 1580 dev_priv->mm.waiting_gem_seqno = seqno; 1581 i915_user_irq_get(dev); 1582 ret = wait_event_interruptible(dev_priv->irq_queue, 1583 i915_seqno_passed(i915_get_gem_seqno(dev), 1584 seqno) || 1585 dev_priv->mm.wedged); 1586 i915_user_irq_put(dev); 1587 dev_priv->mm.waiting_gem_seqno = 0; 1588 } 1589 if (dev_priv->mm.wedged) 1590 ret = -EIO; 1591 1592 if (ret && ret != -ERESTARTSYS) 1593 DRM_ERROR("%s returns %d (awaiting %d at %d)\n", 1594 __func__, ret, seqno, i915_get_gem_seqno(dev)); 1595 1596 /* Directly dispatch request retiring. While we have the work queue 1597 * to handle this, the waiter on a request often wants an associated 1598 * buffer to have made it to the inactive list, and we would need 1599 * a separate wait queue to handle that. 1600 */ 1601 if (ret == 0) 1602 i915_gem_retire_requests(dev); 1603 1604 return ret; 1605 } 1606 1607 static void 1608 i915_gem_flush(struct drm_device *dev, 1609 uint32_t invalidate_domains, 1610 uint32_t flush_domains) 1611 { 1612 drm_i915_private_t *dev_priv = dev->dev_private; 1613 uint32_t cmd; 1614 RING_LOCALS; 1615 1616 #if WATCH_EXEC 1617 DRM_INFO("%s: invalidate %08x flush %08x\n", __func__, 1618 invalidate_domains, flush_domains); 1619 #endif 1620 1621 if (flush_domains & I915_GEM_DOMAIN_CPU) 1622 drm_agp_chipset_flush(dev); 1623 1624 if ((invalidate_domains | flush_domains) & ~(I915_GEM_DOMAIN_CPU | 1625 I915_GEM_DOMAIN_GTT)) { 1626 /* 1627 * read/write caches: 1628 * 1629 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 1630 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 1631 * also flushed at 2d versus 3d pipeline switches. 1632 * 1633 * read-only caches: 1634 * 1635 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 1636 * MI_READ_FLUSH is set, and is always flushed on 965. 1637 * 1638 * I915_GEM_DOMAIN_COMMAND may not exist? 1639 * 1640 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 1641 * invalidated when MI_EXE_FLUSH is set. 1642 * 1643 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 1644 * invalidated with every MI_FLUSH. 1645 * 1646 * TLBs: 1647 * 1648 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 1649 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 1650 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 1651 * are flushed at any MI_FLUSH. 1652 */ 1653 1654 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 1655 if ((invalidate_domains|flush_domains) & 1656 I915_GEM_DOMAIN_RENDER) 1657 cmd &= ~MI_NO_WRITE_FLUSH; 1658 if (!IS_I965G(dev)) { 1659 /* 1660 * On the 965, the sampler cache always gets flushed 1661 * and this bit is reserved. 1662 */ 1663 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) 1664 cmd |= MI_READ_FLUSH; 1665 } 1666 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) 1667 cmd |= MI_EXE_FLUSH; 1668 1669 #if WATCH_EXEC 1670 DRM_INFO("%s: queue flush %08x to ring\n", __func__, cmd); 1671 #endif 1672 BEGIN_LP_RING(2); 1673 OUT_RING(cmd); 1674 OUT_RING(0); /* noop */ 1675 ADVANCE_LP_RING(); 1676 } 1677 } 1678 1679 /** 1680 * Ensures that all rendering to the object has completed and the object is 1681 * safe to unbind from the GTT or access from the CPU. 1682 */ 1683 static int 1684 i915_gem_object_wait_rendering(struct drm_gem_object *obj) 1685 { 1686 struct drm_device *dev = obj->dev; 1687 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1688 int ret; 1689 1690 /* This function only exists to support waiting for existing rendering, 1691 * not for emitting required flushes. 1692 */ 1693 BUG_ON((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0); 1694 1695 /* If there is rendering queued on the buffer being evicted, wait for 1696 * it. 1697 */ 1698 if (obj_priv->active) { 1699 #if WATCH_BUF 1700 DRM_INFO("%s: object %p wait for seqno %08x\n", 1701 __func__, obj, obj_priv->last_rendering_seqno); 1702 #endif 1703 ret = i915_wait_request(dev, obj_priv->last_rendering_seqno); 1704 if (ret != 0) 1705 return ret; 1706 } 1707 1708 return 0; 1709 } 1710 1711 /** 1712 * Unbinds an object from the GTT aperture. 1713 */ 1714 int 1715 i915_gem_object_unbind(struct drm_gem_object *obj) 1716 { 1717 struct drm_device *dev = obj->dev; 1718 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1719 loff_t offset; 1720 int ret = 0; 1721 1722 #if WATCH_BUF 1723 DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj); 1724 DRM_INFO("gtt_space %p\n", obj_priv->gtt_space); 1725 #endif 1726 if (obj_priv->gtt_space == NULL) 1727 return 0; 1728 1729 if (obj_priv->pin_count != 0) { 1730 DRM_ERROR("Attempting to unbind pinned buffer\n"); 1731 return -EINVAL; 1732 } 1733 1734 /* Move the object to the CPU domain to ensure that 1735 * any possible CPU writes while it's not in the GTT 1736 * are flushed when we go to remap it. This will 1737 * also ensure that all pending GPU writes are finished 1738 * before we unbind. 1739 */ 1740 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 1741 if (ret) { 1742 if (ret != -ERESTARTSYS) 1743 DRM_ERROR("set_domain failed: %d\n", ret); 1744 return ret; 1745 } 1746 1747 if (obj_priv->agp_mem != NULL) { 1748 drm_unbind_agp(obj_priv->agp_mem); 1749 drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE); 1750 obj_priv->agp_mem = NULL; 1751 } 1752 1753 BUG_ON(obj_priv->active); 1754 1755 /* blow away mappings if mapped through GTT */ 1756 offset = ((loff_t) obj->map_list.hash.key) << PAGE_SHIFT; 1757 if (dev->dev_mapping) 1758 unmap_mapping_range(dev->dev_mapping, offset, obj->size, 1); 1759 1760 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) 1761 i915_gem_clear_fence_reg(obj); 1762 1763 i915_gem_object_put_pages(obj); 1764 1765 if (obj_priv->gtt_space) { 1766 atomic_dec(&dev->gtt_count); 1767 atomic_sub(obj->size, &dev->gtt_memory); 1768 1769 drm_mm_put_block(obj_priv->gtt_space); 1770 obj_priv->gtt_space = NULL; 1771 } 1772 1773 /* Remove ourselves from the LRU list if present. */ 1774 if (!list_empty(&obj_priv->list)) 1775 list_del_init(&obj_priv->list); 1776 1777 return 0; 1778 } 1779 1780 static int 1781 i915_gem_evict_something(struct drm_device *dev) 1782 { 1783 drm_i915_private_t *dev_priv = dev->dev_private; 1784 struct drm_gem_object *obj; 1785 struct drm_i915_gem_object *obj_priv; 1786 int ret = 0; 1787 1788 for (;;) { 1789 /* If there's an inactive buffer available now, grab it 1790 * and be done. 1791 */ 1792 if (!list_empty(&dev_priv->mm.inactive_list)) { 1793 obj_priv = list_first_entry(&dev_priv->mm.inactive_list, 1794 struct drm_i915_gem_object, 1795 list); 1796 obj = obj_priv->obj; 1797 BUG_ON(obj_priv->pin_count != 0); 1798 #if WATCH_LRU 1799 DRM_INFO("%s: evicting %p\n", __func__, obj); 1800 #endif 1801 BUG_ON(obj_priv->active); 1802 1803 /* Wait on the rendering and unbind the buffer. */ 1804 ret = i915_gem_object_unbind(obj); 1805 break; 1806 } 1807 1808 /* If we didn't get anything, but the ring is still processing 1809 * things, wait for one of those things to finish and hopefully 1810 * leave us a buffer to evict. 1811 */ 1812 if (!list_empty(&dev_priv->mm.request_list)) { 1813 struct drm_i915_gem_request *request; 1814 1815 request = list_first_entry(&dev_priv->mm.request_list, 1816 struct drm_i915_gem_request, 1817 list); 1818 1819 ret = i915_wait_request(dev, request->seqno); 1820 if (ret) 1821 break; 1822 1823 /* if waiting caused an object to become inactive, 1824 * then loop around and wait for it. Otherwise, we 1825 * assume that waiting freed and unbound something, 1826 * so there should now be some space in the GTT 1827 */ 1828 if (!list_empty(&dev_priv->mm.inactive_list)) 1829 continue; 1830 break; 1831 } 1832 1833 /* If we didn't have anything on the request list but there 1834 * are buffers awaiting a flush, emit one and try again. 1835 * When we wait on it, those buffers waiting for that flush 1836 * will get moved to inactive. 1837 */ 1838 if (!list_empty(&dev_priv->mm.flushing_list)) { 1839 obj_priv = list_first_entry(&dev_priv->mm.flushing_list, 1840 struct drm_i915_gem_object, 1841 list); 1842 obj = obj_priv->obj; 1843 1844 i915_gem_flush(dev, 1845 obj->write_domain, 1846 obj->write_domain); 1847 i915_add_request(dev, obj->write_domain); 1848 1849 obj = NULL; 1850 continue; 1851 } 1852 1853 DRM_ERROR("inactive empty %d request empty %d " 1854 "flushing empty %d\n", 1855 list_empty(&dev_priv->mm.inactive_list), 1856 list_empty(&dev_priv->mm.request_list), 1857 list_empty(&dev_priv->mm.flushing_list)); 1858 /* If we didn't do any of the above, there's nothing to be done 1859 * and we just can't fit it in. 1860 */ 1861 return -ENOMEM; 1862 } 1863 return ret; 1864 } 1865 1866 static int 1867 i915_gem_evict_everything(struct drm_device *dev) 1868 { 1869 int ret; 1870 1871 for (;;) { 1872 ret = i915_gem_evict_something(dev); 1873 if (ret != 0) 1874 break; 1875 } 1876 if (ret == -ENOMEM) 1877 return 0; 1878 return ret; 1879 } 1880 1881 static int 1882 i915_gem_object_get_pages(struct drm_gem_object *obj) 1883 { 1884 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1885 int page_count, i; 1886 struct address_space *mapping; 1887 struct inode *inode; 1888 struct page *page; 1889 int ret; 1890 1891 if (obj_priv->pages_refcount++ != 0) 1892 return 0; 1893 1894 /* Get the list of pages out of our struct file. They'll be pinned 1895 * at this point until we release them. 1896 */ 1897 page_count = obj->size / PAGE_SIZE; 1898 BUG_ON(obj_priv->pages != NULL); 1899 obj_priv->pages = drm_calloc(page_count, sizeof(struct page *), 1900 DRM_MEM_DRIVER); 1901 if (obj_priv->pages == NULL) { 1902 DRM_ERROR("Faled to allocate page list\n"); 1903 obj_priv->pages_refcount--; 1904 return -ENOMEM; 1905 } 1906 1907 inode = obj->filp->f_path.dentry->d_inode; 1908 mapping = inode->i_mapping; 1909 for (i = 0; i < page_count; i++) { 1910 page = read_mapping_page(mapping, i, NULL); 1911 if (IS_ERR(page)) { 1912 ret = PTR_ERR(page); 1913 DRM_ERROR("read_mapping_page failed: %d\n", ret); 1914 i915_gem_object_put_pages(obj); 1915 return ret; 1916 } 1917 obj_priv->pages[i] = page; 1918 } 1919 return 0; 1920 } 1921 1922 static void i965_write_fence_reg(struct drm_i915_fence_reg *reg) 1923 { 1924 struct drm_gem_object *obj = reg->obj; 1925 struct drm_device *dev = obj->dev; 1926 drm_i915_private_t *dev_priv = dev->dev_private; 1927 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1928 int regnum = obj_priv->fence_reg; 1929 uint64_t val; 1930 1931 val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) & 1932 0xfffff000) << 32; 1933 val |= obj_priv->gtt_offset & 0xfffff000; 1934 val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT; 1935 if (obj_priv->tiling_mode == I915_TILING_Y) 1936 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 1937 val |= I965_FENCE_REG_VALID; 1938 1939 I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val); 1940 } 1941 1942 static void i915_write_fence_reg(struct drm_i915_fence_reg *reg) 1943 { 1944 struct drm_gem_object *obj = reg->obj; 1945 struct drm_device *dev = obj->dev; 1946 drm_i915_private_t *dev_priv = dev->dev_private; 1947 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1948 int regnum = obj_priv->fence_reg; 1949 int tile_width; 1950 uint32_t fence_reg, val; 1951 uint32_t pitch_val; 1952 1953 if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) || 1954 (obj_priv->gtt_offset & (obj->size - 1))) { 1955 WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n", 1956 __func__, obj_priv->gtt_offset, obj->size); 1957 return; 1958 } 1959 1960 if (obj_priv->tiling_mode == I915_TILING_Y && 1961 HAS_128_BYTE_Y_TILING(dev)) 1962 tile_width = 128; 1963 else 1964 tile_width = 512; 1965 1966 /* Note: pitch better be a power of two tile widths */ 1967 pitch_val = obj_priv->stride / tile_width; 1968 pitch_val = ffs(pitch_val) - 1; 1969 1970 val = obj_priv->gtt_offset; 1971 if (obj_priv->tiling_mode == I915_TILING_Y) 1972 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 1973 val |= I915_FENCE_SIZE_BITS(obj->size); 1974 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 1975 val |= I830_FENCE_REG_VALID; 1976 1977 if (regnum < 8) 1978 fence_reg = FENCE_REG_830_0 + (regnum * 4); 1979 else 1980 fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4); 1981 I915_WRITE(fence_reg, val); 1982 } 1983 1984 static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) 1985 { 1986 struct drm_gem_object *obj = reg->obj; 1987 struct drm_device *dev = obj->dev; 1988 drm_i915_private_t *dev_priv = dev->dev_private; 1989 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1990 int regnum = obj_priv->fence_reg; 1991 uint32_t val; 1992 uint32_t pitch_val; 1993 1994 if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) || 1995 (obj_priv->gtt_offset & (obj->size - 1))) { 1996 WARN(1, "%s: object 0x%08x not 1M or size aligned\n", 1997 __func__, obj_priv->gtt_offset); 1998 return; 1999 } 2000 2001 pitch_val = (obj_priv->stride / 128) - 1; 2002 2003 val = obj_priv->gtt_offset; 2004 if (obj_priv->tiling_mode == I915_TILING_Y) 2005 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2006 val |= I830_FENCE_SIZE_BITS(obj->size); 2007 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2008 val |= I830_FENCE_REG_VALID; 2009 2010 I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val); 2011 2012 } 2013 2014 /** 2015 * i915_gem_object_get_fence_reg - set up a fence reg for an object 2016 * @obj: object to map through a fence reg 2017 * @write: object is about to be written 2018 * 2019 * When mapping objects through the GTT, userspace wants to be able to write 2020 * to them without having to worry about swizzling if the object is tiled. 2021 * 2022 * This function walks the fence regs looking for a free one for @obj, 2023 * stealing one if it can't find any. 2024 * 2025 * It then sets up the reg based on the object's properties: address, pitch 2026 * and tiling format. 2027 */ 2028 static int 2029 i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write) 2030 { 2031 struct drm_device *dev = obj->dev; 2032 struct drm_i915_private *dev_priv = dev->dev_private; 2033 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2034 struct drm_i915_fence_reg *reg = NULL; 2035 struct drm_i915_gem_object *old_obj_priv = NULL; 2036 int i, ret, avail; 2037 2038 switch (obj_priv->tiling_mode) { 2039 case I915_TILING_NONE: 2040 WARN(1, "allocating a fence for non-tiled object?\n"); 2041 break; 2042 case I915_TILING_X: 2043 if (!obj_priv->stride) 2044 return -EINVAL; 2045 WARN((obj_priv->stride & (512 - 1)), 2046 "object 0x%08x is X tiled but has non-512B pitch\n", 2047 obj_priv->gtt_offset); 2048 break; 2049 case I915_TILING_Y: 2050 if (!obj_priv->stride) 2051 return -EINVAL; 2052 WARN((obj_priv->stride & (128 - 1)), 2053 "object 0x%08x is Y tiled but has non-128B pitch\n", 2054 obj_priv->gtt_offset); 2055 break; 2056 } 2057 2058 /* First try to find a free reg */ 2059 try_again: 2060 avail = 0; 2061 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 2062 reg = &dev_priv->fence_regs[i]; 2063 if (!reg->obj) 2064 break; 2065 2066 old_obj_priv = reg->obj->driver_private; 2067 if (!old_obj_priv->pin_count) 2068 avail++; 2069 } 2070 2071 /* None available, try to steal one or wait for a user to finish */ 2072 if (i == dev_priv->num_fence_regs) { 2073 uint32_t seqno = dev_priv->mm.next_gem_seqno; 2074 loff_t offset; 2075 2076 if (avail == 0) 2077 return -ENOMEM; 2078 2079 for (i = dev_priv->fence_reg_start; 2080 i < dev_priv->num_fence_regs; i++) { 2081 uint32_t this_seqno; 2082 2083 reg = &dev_priv->fence_regs[i]; 2084 old_obj_priv = reg->obj->driver_private; 2085 2086 if (old_obj_priv->pin_count) 2087 continue; 2088 2089 /* i915 uses fences for GPU access to tiled buffers */ 2090 if (IS_I965G(dev) || !old_obj_priv->active) 2091 break; 2092 2093 /* find the seqno of the first available fence */ 2094 this_seqno = old_obj_priv->last_rendering_seqno; 2095 if (this_seqno != 0 && 2096 reg->obj->write_domain == 0 && 2097 i915_seqno_passed(seqno, this_seqno)) 2098 seqno = this_seqno; 2099 } 2100 2101 /* 2102 * Now things get ugly... we have to wait for one of the 2103 * objects to finish before trying again. 2104 */ 2105 if (i == dev_priv->num_fence_regs) { 2106 if (seqno == dev_priv->mm.next_gem_seqno) { 2107 i915_gem_flush(dev, 2108 I915_GEM_GPU_DOMAINS, 2109 I915_GEM_GPU_DOMAINS); 2110 seqno = i915_add_request(dev, 2111 I915_GEM_GPU_DOMAINS); 2112 if (seqno == 0) 2113 return -ENOMEM; 2114 } 2115 2116 ret = i915_wait_request(dev, seqno); 2117 if (ret) 2118 return ret; 2119 goto try_again; 2120 } 2121 2122 BUG_ON(old_obj_priv->active || 2123 (reg->obj->write_domain & I915_GEM_GPU_DOMAINS)); 2124 2125 /* 2126 * Zap this virtual mapping so we can set up a fence again 2127 * for this object next time we need it. 2128 */ 2129 offset = ((loff_t) reg->obj->map_list.hash.key) << PAGE_SHIFT; 2130 if (dev->dev_mapping) 2131 unmap_mapping_range(dev->dev_mapping, offset, 2132 reg->obj->size, 1); 2133 old_obj_priv->fence_reg = I915_FENCE_REG_NONE; 2134 } 2135 2136 obj_priv->fence_reg = i; 2137 reg->obj = obj; 2138 2139 if (IS_I965G(dev)) 2140 i965_write_fence_reg(reg); 2141 else if (IS_I9XX(dev)) 2142 i915_write_fence_reg(reg); 2143 else 2144 i830_write_fence_reg(reg); 2145 2146 return 0; 2147 } 2148 2149 /** 2150 * i915_gem_clear_fence_reg - clear out fence register info 2151 * @obj: object to clear 2152 * 2153 * Zeroes out the fence register itself and clears out the associated 2154 * data structures in dev_priv and obj_priv. 2155 */ 2156 static void 2157 i915_gem_clear_fence_reg(struct drm_gem_object *obj) 2158 { 2159 struct drm_device *dev = obj->dev; 2160 drm_i915_private_t *dev_priv = dev->dev_private; 2161 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2162 2163 if (IS_I965G(dev)) 2164 I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0); 2165 else { 2166 uint32_t fence_reg; 2167 2168 if (obj_priv->fence_reg < 8) 2169 fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4; 2170 else 2171 fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg - 2172 8) * 4; 2173 2174 I915_WRITE(fence_reg, 0); 2175 } 2176 2177 dev_priv->fence_regs[obj_priv->fence_reg].obj = NULL; 2178 obj_priv->fence_reg = I915_FENCE_REG_NONE; 2179 } 2180 2181 /** 2182 * Finds free space in the GTT aperture and binds the object there. 2183 */ 2184 static int 2185 i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) 2186 { 2187 struct drm_device *dev = obj->dev; 2188 drm_i915_private_t *dev_priv = dev->dev_private; 2189 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2190 struct drm_mm_node *free_space; 2191 int page_count, ret; 2192 2193 if (dev_priv->mm.suspended) 2194 return -EBUSY; 2195 if (alignment == 0) 2196 alignment = i915_gem_get_gtt_alignment(obj); 2197 if (alignment & (PAGE_SIZE - 1)) { 2198 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 2199 return -EINVAL; 2200 } 2201 2202 search_free: 2203 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space, 2204 obj->size, alignment, 0); 2205 if (free_space != NULL) { 2206 obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size, 2207 alignment); 2208 if (obj_priv->gtt_space != NULL) { 2209 obj_priv->gtt_space->private = obj; 2210 obj_priv->gtt_offset = obj_priv->gtt_space->start; 2211 } 2212 } 2213 if (obj_priv->gtt_space == NULL) { 2214 /* If the gtt is empty and we're still having trouble 2215 * fitting our object in, we're out of memory. 2216 */ 2217 #if WATCH_LRU 2218 DRM_INFO("%s: GTT full, evicting something\n", __func__); 2219 #endif 2220 if (list_empty(&dev_priv->mm.inactive_list) && 2221 list_empty(&dev_priv->mm.flushing_list) && 2222 list_empty(&dev_priv->mm.active_list)) { 2223 DRM_ERROR("GTT full, but LRU list empty\n"); 2224 return -ENOMEM; 2225 } 2226 2227 ret = i915_gem_evict_something(dev); 2228 if (ret != 0) { 2229 if (ret != -ERESTARTSYS) 2230 DRM_ERROR("Failed to evict a buffer %d\n", ret); 2231 return ret; 2232 } 2233 goto search_free; 2234 } 2235 2236 #if WATCH_BUF 2237 DRM_INFO("Binding object of size %d at 0x%08x\n", 2238 obj->size, obj_priv->gtt_offset); 2239 #endif 2240 ret = i915_gem_object_get_pages(obj); 2241 if (ret) { 2242 drm_mm_put_block(obj_priv->gtt_space); 2243 obj_priv->gtt_space = NULL; 2244 return ret; 2245 } 2246 2247 page_count = obj->size / PAGE_SIZE; 2248 /* Create an AGP memory structure pointing at our pages, and bind it 2249 * into the GTT. 2250 */ 2251 obj_priv->agp_mem = drm_agp_bind_pages(dev, 2252 obj_priv->pages, 2253 page_count, 2254 obj_priv->gtt_offset, 2255 obj_priv->agp_type); 2256 if (obj_priv->agp_mem == NULL) { 2257 i915_gem_object_put_pages(obj); 2258 drm_mm_put_block(obj_priv->gtt_space); 2259 obj_priv->gtt_space = NULL; 2260 return -ENOMEM; 2261 } 2262 atomic_inc(&dev->gtt_count); 2263 atomic_add(obj->size, &dev->gtt_memory); 2264 2265 /* Assert that the object is not currently in any GPU domain. As it 2266 * wasn't in the GTT, there shouldn't be any way it could have been in 2267 * a GPU cache 2268 */ 2269 BUG_ON(obj->read_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); 2270 BUG_ON(obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); 2271 2272 return 0; 2273 } 2274 2275 void 2276 i915_gem_clflush_object(struct drm_gem_object *obj) 2277 { 2278 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2279 2280 /* If we don't have a page list set up, then we're not pinned 2281 * to GPU, and we can ignore the cache flush because it'll happen 2282 * again at bind time. 2283 */ 2284 if (obj_priv->pages == NULL) 2285 return; 2286 2287 drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE); 2288 } 2289 2290 /** Flushes any GPU write domain for the object if it's dirty. */ 2291 static void 2292 i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj) 2293 { 2294 struct drm_device *dev = obj->dev; 2295 uint32_t seqno; 2296 2297 if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) 2298 return; 2299 2300 /* Queue the GPU write cache flushing we need. */ 2301 i915_gem_flush(dev, 0, obj->write_domain); 2302 seqno = i915_add_request(dev, obj->write_domain); 2303 obj->write_domain = 0; 2304 i915_gem_object_move_to_active(obj, seqno); 2305 } 2306 2307 /** Flushes the GTT write domain for the object if it's dirty. */ 2308 static void 2309 i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj) 2310 { 2311 if (obj->write_domain != I915_GEM_DOMAIN_GTT) 2312 return; 2313 2314 /* No actual flushing is required for the GTT write domain. Writes 2315 * to it immediately go to main memory as far as we know, so there's 2316 * no chipset flush. It also doesn't land in render cache. 2317 */ 2318 obj->write_domain = 0; 2319 } 2320 2321 /** Flushes the CPU write domain for the object if it's dirty. */ 2322 static void 2323 i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj) 2324 { 2325 struct drm_device *dev = obj->dev; 2326 2327 if (obj->write_domain != I915_GEM_DOMAIN_CPU) 2328 return; 2329 2330 i915_gem_clflush_object(obj); 2331 drm_agp_chipset_flush(dev); 2332 obj->write_domain = 0; 2333 } 2334 2335 /** 2336 * Moves a single object to the GTT read, and possibly write domain. 2337 * 2338 * This function returns when the move is complete, including waiting on 2339 * flushes to occur. 2340 */ 2341 int 2342 i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) 2343 { 2344 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2345 int ret; 2346 2347 /* Not valid to be called on unbound objects. */ 2348 if (obj_priv->gtt_space == NULL) 2349 return -EINVAL; 2350 2351 i915_gem_object_flush_gpu_write_domain(obj); 2352 /* Wait on any GPU rendering and flushing to occur. */ 2353 ret = i915_gem_object_wait_rendering(obj); 2354 if (ret != 0) 2355 return ret; 2356 2357 /* If we're writing through the GTT domain, then CPU and GPU caches 2358 * will need to be invalidated at next use. 2359 */ 2360 if (write) 2361 obj->read_domains &= I915_GEM_DOMAIN_GTT; 2362 2363 i915_gem_object_flush_cpu_write_domain(obj); 2364 2365 /* It should now be out of any other write domains, and we can update 2366 * the domain values for our changes. 2367 */ 2368 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 2369 obj->read_domains |= I915_GEM_DOMAIN_GTT; 2370 if (write) { 2371 obj->write_domain = I915_GEM_DOMAIN_GTT; 2372 obj_priv->dirty = 1; 2373 } 2374 2375 return 0; 2376 } 2377 2378 /** 2379 * Moves a single object to the CPU read, and possibly write domain. 2380 * 2381 * This function returns when the move is complete, including waiting on 2382 * flushes to occur. 2383 */ 2384 static int 2385 i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) 2386 { 2387 int ret; 2388 2389 i915_gem_object_flush_gpu_write_domain(obj); 2390 /* Wait on any GPU rendering and flushing to occur. */ 2391 ret = i915_gem_object_wait_rendering(obj); 2392 if (ret != 0) 2393 return ret; 2394 2395 i915_gem_object_flush_gtt_write_domain(obj); 2396 2397 /* If we have a partially-valid cache of the object in the CPU, 2398 * finish invalidating it and free the per-page flags. 2399 */ 2400 i915_gem_object_set_to_full_cpu_read_domain(obj); 2401 2402 /* Flush the CPU cache if it's still invalid. */ 2403 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 2404 i915_gem_clflush_object(obj); 2405 2406 obj->read_domains |= I915_GEM_DOMAIN_CPU; 2407 } 2408 2409 /* It should now be out of any other write domains, and we can update 2410 * the domain values for our changes. 2411 */ 2412 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 2413 2414 /* If we're writing through the CPU, then the GPU read domains will 2415 * need to be invalidated at next use. 2416 */ 2417 if (write) { 2418 obj->read_domains &= I915_GEM_DOMAIN_CPU; 2419 obj->write_domain = I915_GEM_DOMAIN_CPU; 2420 } 2421 2422 return 0; 2423 } 2424 2425 /* 2426 * Set the next domain for the specified object. This 2427 * may not actually perform the necessary flushing/invaliding though, 2428 * as that may want to be batched with other set_domain operations 2429 * 2430 * This is (we hope) the only really tricky part of gem. The goal 2431 * is fairly simple -- track which caches hold bits of the object 2432 * and make sure they remain coherent. A few concrete examples may 2433 * help to explain how it works. For shorthand, we use the notation 2434 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the 2435 * a pair of read and write domain masks. 2436 * 2437 * Case 1: the batch buffer 2438 * 2439 * 1. Allocated 2440 * 2. Written by CPU 2441 * 3. Mapped to GTT 2442 * 4. Read by GPU 2443 * 5. Unmapped from GTT 2444 * 6. Freed 2445 * 2446 * Let's take these a step at a time 2447 * 2448 * 1. Allocated 2449 * Pages allocated from the kernel may still have 2450 * cache contents, so we set them to (CPU, CPU) always. 2451 * 2. Written by CPU (using pwrite) 2452 * The pwrite function calls set_domain (CPU, CPU) and 2453 * this function does nothing (as nothing changes) 2454 * 3. Mapped by GTT 2455 * This function asserts that the object is not 2456 * currently in any GPU-based read or write domains 2457 * 4. Read by GPU 2458 * i915_gem_execbuffer calls set_domain (COMMAND, 0). 2459 * As write_domain is zero, this function adds in the 2460 * current read domains (CPU+COMMAND, 0). 2461 * flush_domains is set to CPU. 2462 * invalidate_domains is set to COMMAND 2463 * clflush is run to get data out of the CPU caches 2464 * then i915_dev_set_domain calls i915_gem_flush to 2465 * emit an MI_FLUSH and drm_agp_chipset_flush 2466 * 5. Unmapped from GTT 2467 * i915_gem_object_unbind calls set_domain (CPU, CPU) 2468 * flush_domains and invalidate_domains end up both zero 2469 * so no flushing/invalidating happens 2470 * 6. Freed 2471 * yay, done 2472 * 2473 * Case 2: The shared render buffer 2474 * 2475 * 1. Allocated 2476 * 2. Mapped to GTT 2477 * 3. Read/written by GPU 2478 * 4. set_domain to (CPU,CPU) 2479 * 5. Read/written by CPU 2480 * 6. Read/written by GPU 2481 * 2482 * 1. Allocated 2483 * Same as last example, (CPU, CPU) 2484 * 2. Mapped to GTT 2485 * Nothing changes (assertions find that it is not in the GPU) 2486 * 3. Read/written by GPU 2487 * execbuffer calls set_domain (RENDER, RENDER) 2488 * flush_domains gets CPU 2489 * invalidate_domains gets GPU 2490 * clflush (obj) 2491 * MI_FLUSH and drm_agp_chipset_flush 2492 * 4. set_domain (CPU, CPU) 2493 * flush_domains gets GPU 2494 * invalidate_domains gets CPU 2495 * wait_rendering (obj) to make sure all drawing is complete. 2496 * This will include an MI_FLUSH to get the data from GPU 2497 * to memory 2498 * clflush (obj) to invalidate the CPU cache 2499 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) 2500 * 5. Read/written by CPU 2501 * cache lines are loaded and dirtied 2502 * 6. Read written by GPU 2503 * Same as last GPU access 2504 * 2505 * Case 3: The constant buffer 2506 * 2507 * 1. Allocated 2508 * 2. Written by CPU 2509 * 3. Read by GPU 2510 * 4. Updated (written) by CPU again 2511 * 5. Read by GPU 2512 * 2513 * 1. Allocated 2514 * (CPU, CPU) 2515 * 2. Written by CPU 2516 * (CPU, CPU) 2517 * 3. Read by GPU 2518 * (CPU+RENDER, 0) 2519 * flush_domains = CPU 2520 * invalidate_domains = RENDER 2521 * clflush (obj) 2522 * MI_FLUSH 2523 * drm_agp_chipset_flush 2524 * 4. Updated (written) by CPU again 2525 * (CPU, CPU) 2526 * flush_domains = 0 (no previous write domain) 2527 * invalidate_domains = 0 (no new read domains) 2528 * 5. Read by GPU 2529 * (CPU+RENDER, 0) 2530 * flush_domains = CPU 2531 * invalidate_domains = RENDER 2532 * clflush (obj) 2533 * MI_FLUSH 2534 * drm_agp_chipset_flush 2535 */ 2536 static void 2537 i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj) 2538 { 2539 struct drm_device *dev = obj->dev; 2540 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2541 uint32_t invalidate_domains = 0; 2542 uint32_t flush_domains = 0; 2543 2544 BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU); 2545 BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU); 2546 2547 #if WATCH_BUF 2548 DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n", 2549 __func__, obj, 2550 obj->read_domains, obj->pending_read_domains, 2551 obj->write_domain, obj->pending_write_domain); 2552 #endif 2553 /* 2554 * If the object isn't moving to a new write domain, 2555 * let the object stay in multiple read domains 2556 */ 2557 if (obj->pending_write_domain == 0) 2558 obj->pending_read_domains |= obj->read_domains; 2559 else 2560 obj_priv->dirty = 1; 2561 2562 /* 2563 * Flush the current write domain if 2564 * the new read domains don't match. Invalidate 2565 * any read domains which differ from the old 2566 * write domain 2567 */ 2568 if (obj->write_domain && 2569 obj->write_domain != obj->pending_read_domains) { 2570 flush_domains |= obj->write_domain; 2571 invalidate_domains |= 2572 obj->pending_read_domains & ~obj->write_domain; 2573 } 2574 /* 2575 * Invalidate any read caches which may have 2576 * stale data. That is, any new read domains. 2577 */ 2578 invalidate_domains |= obj->pending_read_domains & ~obj->read_domains; 2579 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) { 2580 #if WATCH_BUF 2581 DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n", 2582 __func__, flush_domains, invalidate_domains); 2583 #endif 2584 i915_gem_clflush_object(obj); 2585 } 2586 2587 /* The actual obj->write_domain will be updated with 2588 * pending_write_domain after we emit the accumulated flush for all 2589 * of our domain changes in execbuffers (which clears objects' 2590 * write_domains). So if we have a current write domain that we 2591 * aren't changing, set pending_write_domain to that. 2592 */ 2593 if (flush_domains == 0 && obj->pending_write_domain == 0) 2594 obj->pending_write_domain = obj->write_domain; 2595 obj->read_domains = obj->pending_read_domains; 2596 2597 dev->invalidate_domains |= invalidate_domains; 2598 dev->flush_domains |= flush_domains; 2599 #if WATCH_BUF 2600 DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n", 2601 __func__, 2602 obj->read_domains, obj->write_domain, 2603 dev->invalidate_domains, dev->flush_domains); 2604 #endif 2605 } 2606 2607 /** 2608 * Moves the object from a partially CPU read to a full one. 2609 * 2610 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(), 2611 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU). 2612 */ 2613 static void 2614 i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj) 2615 { 2616 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2617 2618 if (!obj_priv->page_cpu_valid) 2619 return; 2620 2621 /* If we're partially in the CPU read domain, finish moving it in. 2622 */ 2623 if (obj->read_domains & I915_GEM_DOMAIN_CPU) { 2624 int i; 2625 2626 for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) { 2627 if (obj_priv->page_cpu_valid[i]) 2628 continue; 2629 drm_clflush_pages(obj_priv->pages + i, 1); 2630 } 2631 } 2632 2633 /* Free the page_cpu_valid mappings which are now stale, whether 2634 * or not we've got I915_GEM_DOMAIN_CPU. 2635 */ 2636 drm_free(obj_priv->page_cpu_valid, obj->size / PAGE_SIZE, 2637 DRM_MEM_DRIVER); 2638 obj_priv->page_cpu_valid = NULL; 2639 } 2640 2641 /** 2642 * Set the CPU read domain on a range of the object. 2643 * 2644 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's 2645 * not entirely valid. The page_cpu_valid member of the object flags which 2646 * pages have been flushed, and will be respected by 2647 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping 2648 * of the whole object. 2649 * 2650 * This function returns when the move is complete, including waiting on 2651 * flushes to occur. 2652 */ 2653 static int 2654 i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, 2655 uint64_t offset, uint64_t size) 2656 { 2657 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2658 int i, ret; 2659 2660 if (offset == 0 && size == obj->size) 2661 return i915_gem_object_set_to_cpu_domain(obj, 0); 2662 2663 i915_gem_object_flush_gpu_write_domain(obj); 2664 /* Wait on any GPU rendering and flushing to occur. */ 2665 ret = i915_gem_object_wait_rendering(obj); 2666 if (ret != 0) 2667 return ret; 2668 i915_gem_object_flush_gtt_write_domain(obj); 2669 2670 /* If we're already fully in the CPU read domain, we're done. */ 2671 if (obj_priv->page_cpu_valid == NULL && 2672 (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0) 2673 return 0; 2674 2675 /* Otherwise, create/clear the per-page CPU read domain flag if we're 2676 * newly adding I915_GEM_DOMAIN_CPU 2677 */ 2678 if (obj_priv->page_cpu_valid == NULL) { 2679 obj_priv->page_cpu_valid = drm_calloc(1, obj->size / PAGE_SIZE, 2680 DRM_MEM_DRIVER); 2681 if (obj_priv->page_cpu_valid == NULL) 2682 return -ENOMEM; 2683 } else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) 2684 memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE); 2685 2686 /* Flush the cache on any pages that are still invalid from the CPU's 2687 * perspective. 2688 */ 2689 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; 2690 i++) { 2691 if (obj_priv->page_cpu_valid[i]) 2692 continue; 2693 2694 drm_clflush_pages(obj_priv->pages + i, 1); 2695 2696 obj_priv->page_cpu_valid[i] = 1; 2697 } 2698 2699 /* It should now be out of any other write domains, and we can update 2700 * the domain values for our changes. 2701 */ 2702 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 2703 2704 obj->read_domains |= I915_GEM_DOMAIN_CPU; 2705 2706 return 0; 2707 } 2708 2709 /** 2710 * Pin an object to the GTT and evaluate the relocations landing in it. 2711 */ 2712 static int 2713 i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, 2714 struct drm_file *file_priv, 2715 struct drm_i915_gem_exec_object *entry, 2716 struct drm_i915_gem_relocation_entry *relocs) 2717 { 2718 struct drm_device *dev = obj->dev; 2719 drm_i915_private_t *dev_priv = dev->dev_private; 2720 struct drm_i915_gem_object *obj_priv = obj->driver_private; 2721 int i, ret; 2722 void __iomem *reloc_page; 2723 2724 /* Choose the GTT offset for our buffer and put it there. */ 2725 ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment); 2726 if (ret) 2727 return ret; 2728 2729 entry->offset = obj_priv->gtt_offset; 2730 2731 /* Apply the relocations, using the GTT aperture to avoid cache 2732 * flushing requirements. 2733 */ 2734 for (i = 0; i < entry->relocation_count; i++) { 2735 struct drm_i915_gem_relocation_entry *reloc= &relocs[i]; 2736 struct drm_gem_object *target_obj; 2737 struct drm_i915_gem_object *target_obj_priv; 2738 uint32_t reloc_val, reloc_offset; 2739 uint32_t __iomem *reloc_entry; 2740 2741 target_obj = drm_gem_object_lookup(obj->dev, file_priv, 2742 reloc->target_handle); 2743 if (target_obj == NULL) { 2744 i915_gem_object_unpin(obj); 2745 return -EBADF; 2746 } 2747 target_obj_priv = target_obj->driver_private; 2748 2749 /* The target buffer should have appeared before us in the 2750 * exec_object list, so it should have a GTT space bound by now. 2751 */ 2752 if (target_obj_priv->gtt_space == NULL) { 2753 DRM_ERROR("No GTT space found for object %d\n", 2754 reloc->target_handle); 2755 drm_gem_object_unreference(target_obj); 2756 i915_gem_object_unpin(obj); 2757 return -EINVAL; 2758 } 2759 2760 if (reloc->offset > obj->size - 4) { 2761 DRM_ERROR("Relocation beyond object bounds: " 2762 "obj %p target %d offset %d size %d.\n", 2763 obj, reloc->target_handle, 2764 (int) reloc->offset, (int) obj->size); 2765 drm_gem_object_unreference(target_obj); 2766 i915_gem_object_unpin(obj); 2767 return -EINVAL; 2768 } 2769 if (reloc->offset & 3) { 2770 DRM_ERROR("Relocation not 4-byte aligned: " 2771 "obj %p target %d offset %d.\n", 2772 obj, reloc->target_handle, 2773 (int) reloc->offset); 2774 drm_gem_object_unreference(target_obj); 2775 i915_gem_object_unpin(obj); 2776 return -EINVAL; 2777 } 2778 2779 if (reloc->write_domain & I915_GEM_DOMAIN_CPU || 2780 reloc->read_domains & I915_GEM_DOMAIN_CPU) { 2781 DRM_ERROR("reloc with read/write CPU domains: " 2782 "obj %p target %d offset %d " 2783 "read %08x write %08x", 2784 obj, reloc->target_handle, 2785 (int) reloc->offset, 2786 reloc->read_domains, 2787 reloc->write_domain); 2788 drm_gem_object_unreference(target_obj); 2789 i915_gem_object_unpin(obj); 2790 return -EINVAL; 2791 } 2792 2793 if (reloc->write_domain && target_obj->pending_write_domain && 2794 reloc->write_domain != target_obj->pending_write_domain) { 2795 DRM_ERROR("Write domain conflict: " 2796 "obj %p target %d offset %d " 2797 "new %08x old %08x\n", 2798 obj, reloc->target_handle, 2799 (int) reloc->offset, 2800 reloc->write_domain, 2801 target_obj->pending_write_domain); 2802 drm_gem_object_unreference(target_obj); 2803 i915_gem_object_unpin(obj); 2804 return -EINVAL; 2805 } 2806 2807 #if WATCH_RELOC 2808 DRM_INFO("%s: obj %p offset %08x target %d " 2809 "read %08x write %08x gtt %08x " 2810 "presumed %08x delta %08x\n", 2811 __func__, 2812 obj, 2813 (int) reloc->offset, 2814 (int) reloc->target_handle, 2815 (int) reloc->read_domains, 2816 (int) reloc->write_domain, 2817 (int) target_obj_priv->gtt_offset, 2818 (int) reloc->presumed_offset, 2819 reloc->delta); 2820 #endif 2821 2822 target_obj->pending_read_domains |= reloc->read_domains; 2823 target_obj->pending_write_domain |= reloc->write_domain; 2824 2825 /* If the relocation already has the right value in it, no 2826 * more work needs to be done. 2827 */ 2828 if (target_obj_priv->gtt_offset == reloc->presumed_offset) { 2829 drm_gem_object_unreference(target_obj); 2830 continue; 2831 } 2832 2833 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 2834 if (ret != 0) { 2835 drm_gem_object_unreference(target_obj); 2836 i915_gem_object_unpin(obj); 2837 return -EINVAL; 2838 } 2839 2840 /* Map the page containing the relocation we're going to 2841 * perform. 2842 */ 2843 reloc_offset = obj_priv->gtt_offset + reloc->offset; 2844 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, 2845 (reloc_offset & 2846 ~(PAGE_SIZE - 1))); 2847 reloc_entry = (uint32_t __iomem *)(reloc_page + 2848 (reloc_offset & (PAGE_SIZE - 1))); 2849 reloc_val = target_obj_priv->gtt_offset + reloc->delta; 2850 2851 #if WATCH_BUF 2852 DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n", 2853 obj, (unsigned int) reloc->offset, 2854 readl(reloc_entry), reloc_val); 2855 #endif 2856 writel(reloc_val, reloc_entry); 2857 io_mapping_unmap_atomic(reloc_page); 2858 2859 /* The updated presumed offset for this entry will be 2860 * copied back out to the user. 2861 */ 2862 reloc->presumed_offset = target_obj_priv->gtt_offset; 2863 2864 drm_gem_object_unreference(target_obj); 2865 } 2866 2867 #if WATCH_BUF 2868 if (0) 2869 i915_gem_dump_object(obj, 128, __func__, ~0); 2870 #endif 2871 return 0; 2872 } 2873 2874 /** Dispatch a batchbuffer to the ring 2875 */ 2876 static int 2877 i915_dispatch_gem_execbuffer(struct drm_device *dev, 2878 struct drm_i915_gem_execbuffer *exec, 2879 struct drm_clip_rect *cliprects, 2880 uint64_t exec_offset) 2881 { 2882 drm_i915_private_t *dev_priv = dev->dev_private; 2883 int nbox = exec->num_cliprects; 2884 int i = 0, count; 2885 uint32_t exec_start, exec_len; 2886 RING_LOCALS; 2887 2888 exec_start = (uint32_t) exec_offset + exec->batch_start_offset; 2889 exec_len = (uint32_t) exec->batch_len; 2890 2891 if ((exec_start | exec_len) & 0x7) { 2892 DRM_ERROR("alignment\n"); 2893 return -EINVAL; 2894 } 2895 2896 if (!exec_start) 2897 return -EINVAL; 2898 2899 count = nbox ? nbox : 1; 2900 2901 for (i = 0; i < count; i++) { 2902 if (i < nbox) { 2903 int ret = i915_emit_box(dev, cliprects, i, 2904 exec->DR1, exec->DR4); 2905 if (ret) 2906 return ret; 2907 } 2908 2909 if (IS_I830(dev) || IS_845G(dev)) { 2910 BEGIN_LP_RING(4); 2911 OUT_RING(MI_BATCH_BUFFER); 2912 OUT_RING(exec_start | MI_BATCH_NON_SECURE); 2913 OUT_RING(exec_start + exec_len - 4); 2914 OUT_RING(0); 2915 ADVANCE_LP_RING(); 2916 } else { 2917 BEGIN_LP_RING(2); 2918 if (IS_I965G(dev)) { 2919 OUT_RING(MI_BATCH_BUFFER_START | 2920 (2 << 6) | 2921 MI_BATCH_NON_SECURE_I965); 2922 OUT_RING(exec_start); 2923 } else { 2924 OUT_RING(MI_BATCH_BUFFER_START | 2925 (2 << 6)); 2926 OUT_RING(exec_start | MI_BATCH_NON_SECURE); 2927 } 2928 ADVANCE_LP_RING(); 2929 } 2930 } 2931 2932 /* XXX breadcrumb */ 2933 return 0; 2934 } 2935 2936 /* Throttle our rendering by waiting until the ring has completed our requests 2937 * emitted over 20 msec ago. 2938 * 2939 * This should get us reasonable parallelism between CPU and GPU but also 2940 * relatively low latency when blocking on a particular request to finish. 2941 */ 2942 static int 2943 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv) 2944 { 2945 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; 2946 int ret = 0; 2947 uint32_t seqno; 2948 2949 mutex_lock(&dev->struct_mutex); 2950 seqno = i915_file_priv->mm.last_gem_throttle_seqno; 2951 i915_file_priv->mm.last_gem_throttle_seqno = 2952 i915_file_priv->mm.last_gem_seqno; 2953 if (seqno) 2954 ret = i915_wait_request(dev, seqno); 2955 mutex_unlock(&dev->struct_mutex); 2956 return ret; 2957 } 2958 2959 static int 2960 i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list, 2961 uint32_t buffer_count, 2962 struct drm_i915_gem_relocation_entry **relocs) 2963 { 2964 uint32_t reloc_count = 0, reloc_index = 0, i; 2965 int ret; 2966 2967 *relocs = NULL; 2968 for (i = 0; i < buffer_count; i++) { 2969 if (reloc_count + exec_list[i].relocation_count < reloc_count) 2970 return -EINVAL; 2971 reloc_count += exec_list[i].relocation_count; 2972 } 2973 2974 *relocs = drm_calloc(reloc_count, sizeof(**relocs), DRM_MEM_DRIVER); 2975 if (*relocs == NULL) 2976 return -ENOMEM; 2977 2978 for (i = 0; i < buffer_count; i++) { 2979 struct drm_i915_gem_relocation_entry __user *user_relocs; 2980 2981 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr; 2982 2983 ret = copy_from_user(&(*relocs)[reloc_index], 2984 user_relocs, 2985 exec_list[i].relocation_count * 2986 sizeof(**relocs)); 2987 if (ret != 0) { 2988 drm_free(*relocs, reloc_count * sizeof(**relocs), 2989 DRM_MEM_DRIVER); 2990 *relocs = NULL; 2991 return ret; 2992 } 2993 2994 reloc_index += exec_list[i].relocation_count; 2995 } 2996 2997 return ret; 2998 } 2999 3000 static int 3001 i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object *exec_list, 3002 uint32_t buffer_count, 3003 struct drm_i915_gem_relocation_entry *relocs) 3004 { 3005 uint32_t reloc_count = 0, i; 3006 int ret; 3007 3008 for (i = 0; i < buffer_count; i++) { 3009 struct drm_i915_gem_relocation_entry __user *user_relocs; 3010 3011 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr; 3012 3013 if (ret == 0) { 3014 ret = copy_to_user(user_relocs, 3015 &relocs[reloc_count], 3016 exec_list[i].relocation_count * 3017 sizeof(*relocs)); 3018 } 3019 3020 reloc_count += exec_list[i].relocation_count; 3021 } 3022 3023 drm_free(relocs, reloc_count * sizeof(*relocs), DRM_MEM_DRIVER); 3024 3025 return ret; 3026 } 3027 3028 int 3029 i915_gem_execbuffer(struct drm_device *dev, void *data, 3030 struct drm_file *file_priv) 3031 { 3032 drm_i915_private_t *dev_priv = dev->dev_private; 3033 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv; 3034 struct drm_i915_gem_execbuffer *args = data; 3035 struct drm_i915_gem_exec_object *exec_list = NULL; 3036 struct drm_gem_object **object_list = NULL; 3037 struct drm_gem_object *batch_obj; 3038 struct drm_i915_gem_object *obj_priv; 3039 struct drm_clip_rect *cliprects = NULL; 3040 struct drm_i915_gem_relocation_entry *relocs; 3041 int ret, ret2, i, pinned = 0; 3042 uint64_t exec_offset; 3043 uint32_t seqno, flush_domains, reloc_index; 3044 int pin_tries; 3045 3046 #if WATCH_EXEC 3047 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", 3048 (int) args->buffers_ptr, args->buffer_count, args->batch_len); 3049 #endif 3050 3051 if (args->buffer_count < 1) { 3052 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count); 3053 return -EINVAL; 3054 } 3055 /* Copy in the exec list from userland */ 3056 exec_list = drm_calloc(sizeof(*exec_list), args->buffer_count, 3057 DRM_MEM_DRIVER); 3058 object_list = drm_calloc(sizeof(*object_list), args->buffer_count, 3059 DRM_MEM_DRIVER); 3060 if (exec_list == NULL || object_list == NULL) { 3061 DRM_ERROR("Failed to allocate exec or object list " 3062 "for %d buffers\n", 3063 args->buffer_count); 3064 ret = -ENOMEM; 3065 goto pre_mutex_err; 3066 } 3067 ret = copy_from_user(exec_list, 3068 (struct drm_i915_relocation_entry __user *) 3069 (uintptr_t) args->buffers_ptr, 3070 sizeof(*exec_list) * args->buffer_count); 3071 if (ret != 0) { 3072 DRM_ERROR("copy %d exec entries failed %d\n", 3073 args->buffer_count, ret); 3074 goto pre_mutex_err; 3075 } 3076 3077 if (args->num_cliprects != 0) { 3078 cliprects = drm_calloc(args->num_cliprects, sizeof(*cliprects), 3079 DRM_MEM_DRIVER); 3080 if (cliprects == NULL) 3081 goto pre_mutex_err; 3082 3083 ret = copy_from_user(cliprects, 3084 (struct drm_clip_rect __user *) 3085 (uintptr_t) args->cliprects_ptr, 3086 sizeof(*cliprects) * args->num_cliprects); 3087 if (ret != 0) { 3088 DRM_ERROR("copy %d cliprects failed: %d\n", 3089 args->num_cliprects, ret); 3090 goto pre_mutex_err; 3091 } 3092 } 3093 3094 ret = i915_gem_get_relocs_from_user(exec_list, args->buffer_count, 3095 &relocs); 3096 if (ret != 0) 3097 goto pre_mutex_err; 3098 3099 mutex_lock(&dev->struct_mutex); 3100 3101 i915_verify_inactive(dev, __FILE__, __LINE__); 3102 3103 if (dev_priv->mm.wedged) { 3104 DRM_ERROR("Execbuf while wedged\n"); 3105 mutex_unlock(&dev->struct_mutex); 3106 ret = -EIO; 3107 goto pre_mutex_err; 3108 } 3109 3110 if (dev_priv->mm.suspended) { 3111 DRM_ERROR("Execbuf while VT-switched.\n"); 3112 mutex_unlock(&dev->struct_mutex); 3113 ret = -EBUSY; 3114 goto pre_mutex_err; 3115 } 3116 3117 /* Look up object handles */ 3118 for (i = 0; i < args->buffer_count; i++) { 3119 object_list[i] = drm_gem_object_lookup(dev, file_priv, 3120 exec_list[i].handle); 3121 if (object_list[i] == NULL) { 3122 DRM_ERROR("Invalid object handle %d at index %d\n", 3123 exec_list[i].handle, i); 3124 ret = -EBADF; 3125 goto err; 3126 } 3127 3128 obj_priv = object_list[i]->driver_private; 3129 if (obj_priv->in_execbuffer) { 3130 DRM_ERROR("Object %p appears more than once in object list\n", 3131 object_list[i]); 3132 ret = -EBADF; 3133 goto err; 3134 } 3135 obj_priv->in_execbuffer = true; 3136 } 3137 3138 /* Pin and relocate */ 3139 for (pin_tries = 0; ; pin_tries++) { 3140 ret = 0; 3141 reloc_index = 0; 3142 3143 for (i = 0; i < args->buffer_count; i++) { 3144 object_list[i]->pending_read_domains = 0; 3145 object_list[i]->pending_write_domain = 0; 3146 ret = i915_gem_object_pin_and_relocate(object_list[i], 3147 file_priv, 3148 &exec_list[i], 3149 &relocs[reloc_index]); 3150 if (ret) 3151 break; 3152 pinned = i + 1; 3153 reloc_index += exec_list[i].relocation_count; 3154 } 3155 /* success */ 3156 if (ret == 0) 3157 break; 3158 3159 /* error other than GTT full, or we've already tried again */ 3160 if (ret != -ENOMEM || pin_tries >= 1) { 3161 if (ret != -ERESTARTSYS) 3162 DRM_ERROR("Failed to pin buffers %d\n", ret); 3163 goto err; 3164 } 3165 3166 /* unpin all of our buffers */ 3167 for (i = 0; i < pinned; i++) 3168 i915_gem_object_unpin(object_list[i]); 3169 pinned = 0; 3170 3171 /* evict everyone we can from the aperture */ 3172 ret = i915_gem_evict_everything(dev); 3173 if (ret) 3174 goto err; 3175 } 3176 3177 /* Set the pending read domains for the batch buffer to COMMAND */ 3178 batch_obj = object_list[args->buffer_count-1]; 3179 batch_obj->pending_read_domains = I915_GEM_DOMAIN_COMMAND; 3180 batch_obj->pending_write_domain = 0; 3181 3182 i915_verify_inactive(dev, __FILE__, __LINE__); 3183 3184 /* Zero the global flush/invalidate flags. These 3185 * will be modified as new domains are computed 3186 * for each object 3187 */ 3188 dev->invalidate_domains = 0; 3189 dev->flush_domains = 0; 3190 3191 for (i = 0; i < args->buffer_count; i++) { 3192 struct drm_gem_object *obj = object_list[i]; 3193 3194 /* Compute new gpu domains and update invalidate/flush */ 3195 i915_gem_object_set_to_gpu_domain(obj); 3196 } 3197 3198 i915_verify_inactive(dev, __FILE__, __LINE__); 3199 3200 if (dev->invalidate_domains | dev->flush_domains) { 3201 #if WATCH_EXEC 3202 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n", 3203 __func__, 3204 dev->invalidate_domains, 3205 dev->flush_domains); 3206 #endif 3207 i915_gem_flush(dev, 3208 dev->invalidate_domains, 3209 dev->flush_domains); 3210 if (dev->flush_domains) 3211 (void)i915_add_request(dev, dev->flush_domains); 3212 } 3213 3214 for (i = 0; i < args->buffer_count; i++) { 3215 struct drm_gem_object *obj = object_list[i]; 3216 3217 obj->write_domain = obj->pending_write_domain; 3218 } 3219 3220 i915_verify_inactive(dev, __FILE__, __LINE__); 3221 3222 #if WATCH_COHERENCY 3223 for (i = 0; i < args->buffer_count; i++) { 3224 i915_gem_object_check_coherency(object_list[i], 3225 exec_list[i].handle); 3226 } 3227 #endif 3228 3229 exec_offset = exec_list[args->buffer_count - 1].offset; 3230 3231 #if WATCH_EXEC 3232 i915_gem_dump_object(object_list[args->buffer_count - 1], 3233 args->batch_len, 3234 __func__, 3235 ~0); 3236 #endif 3237 3238 /* Exec the batchbuffer */ 3239 ret = i915_dispatch_gem_execbuffer(dev, args, cliprects, exec_offset); 3240 if (ret) { 3241 DRM_ERROR("dispatch failed %d\n", ret); 3242 goto err; 3243 } 3244 3245 /* 3246 * Ensure that the commands in the batch buffer are 3247 * finished before the interrupt fires 3248 */ 3249 flush_domains = i915_retire_commands(dev); 3250 3251 i915_verify_inactive(dev, __FILE__, __LINE__); 3252 3253 /* 3254 * Get a seqno representing the execution of the current buffer, 3255 * which we can wait on. We would like to mitigate these interrupts, 3256 * likely by only creating seqnos occasionally (so that we have 3257 * *some* interrupts representing completion of buffers that we can 3258 * wait on when trying to clear up gtt space). 3259 */ 3260 seqno = i915_add_request(dev, flush_domains); 3261 BUG_ON(seqno == 0); 3262 i915_file_priv->mm.last_gem_seqno = seqno; 3263 for (i = 0; i < args->buffer_count; i++) { 3264 struct drm_gem_object *obj = object_list[i]; 3265 3266 i915_gem_object_move_to_active(obj, seqno); 3267 #if WATCH_LRU 3268 DRM_INFO("%s: move to exec list %p\n", __func__, obj); 3269 #endif 3270 } 3271 #if WATCH_LRU 3272 i915_dump_lru(dev, __func__); 3273 #endif 3274 3275 i915_verify_inactive(dev, __FILE__, __LINE__); 3276 3277 err: 3278 for (i = 0; i < pinned; i++) 3279 i915_gem_object_unpin(object_list[i]); 3280 3281 for (i = 0; i < args->buffer_count; i++) { 3282 if (object_list[i]) { 3283 obj_priv = object_list[i]->driver_private; 3284 obj_priv->in_execbuffer = false; 3285 } 3286 drm_gem_object_unreference(object_list[i]); 3287 } 3288 3289 mutex_unlock(&dev->struct_mutex); 3290 3291 if (!ret) { 3292 /* Copy the new buffer offsets back to the user's exec list. */ 3293 ret = copy_to_user((struct drm_i915_relocation_entry __user *) 3294 (uintptr_t) args->buffers_ptr, 3295 exec_list, 3296 sizeof(*exec_list) * args->buffer_count); 3297 if (ret) 3298 DRM_ERROR("failed to copy %d exec entries " 3299 "back to user (%d)\n", 3300 args->buffer_count, ret); 3301 } 3302 3303 /* Copy the updated relocations out regardless of current error 3304 * state. Failure to update the relocs would mean that the next 3305 * time userland calls execbuf, it would do so with presumed offset 3306 * state that didn't match the actual object state. 3307 */ 3308 ret2 = i915_gem_put_relocs_to_user(exec_list, args->buffer_count, 3309 relocs); 3310 if (ret2 != 0) { 3311 DRM_ERROR("Failed to copy relocations back out: %d\n", ret2); 3312 3313 if (ret == 0) 3314 ret = ret2; 3315 } 3316 3317 pre_mutex_err: 3318 drm_free(object_list, sizeof(*object_list) * args->buffer_count, 3319 DRM_MEM_DRIVER); 3320 drm_free(exec_list, sizeof(*exec_list) * args->buffer_count, 3321 DRM_MEM_DRIVER); 3322 drm_free(cliprects, sizeof(*cliprects) * args->num_cliprects, 3323 DRM_MEM_DRIVER); 3324 3325 return ret; 3326 } 3327 3328 int 3329 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) 3330 { 3331 struct drm_device *dev = obj->dev; 3332 struct drm_i915_gem_object *obj_priv = obj->driver_private; 3333 int ret; 3334 3335 i915_verify_inactive(dev, __FILE__, __LINE__); 3336 if (obj_priv->gtt_space == NULL) { 3337 ret = i915_gem_object_bind_to_gtt(obj, alignment); 3338 if (ret != 0) { 3339 if (ret != -EBUSY && ret != -ERESTARTSYS) 3340 DRM_ERROR("Failure to bind: %d\n", ret); 3341 return ret; 3342 } 3343 } 3344 /* 3345 * Pre-965 chips need a fence register set up in order to 3346 * properly handle tiled surfaces. 3347 */ 3348 if (!IS_I965G(dev) && 3349 obj_priv->fence_reg == I915_FENCE_REG_NONE && 3350 obj_priv->tiling_mode != I915_TILING_NONE) { 3351 ret = i915_gem_object_get_fence_reg(obj, true); 3352 if (ret != 0) { 3353 if (ret != -EBUSY && ret != -ERESTARTSYS) 3354 DRM_ERROR("Failure to install fence: %d\n", 3355 ret); 3356 return ret; 3357 } 3358 } 3359 obj_priv->pin_count++; 3360 3361 /* If the object is not active and not pending a flush, 3362 * remove it from the inactive list 3363 */ 3364 if (obj_priv->pin_count == 1) { 3365 atomic_inc(&dev->pin_count); 3366 atomic_add(obj->size, &dev->pin_memory); 3367 if (!obj_priv->active && 3368 (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | 3369 I915_GEM_DOMAIN_GTT)) == 0 && 3370 !list_empty(&obj_priv->list)) 3371 list_del_init(&obj_priv->list); 3372 } 3373 i915_verify_inactive(dev, __FILE__, __LINE__); 3374 3375 return 0; 3376 } 3377 3378 void 3379 i915_gem_object_unpin(struct drm_gem_object *obj) 3380 { 3381 struct drm_device *dev = obj->dev; 3382 drm_i915_private_t *dev_priv = dev->dev_private; 3383 struct drm_i915_gem_object *obj_priv = obj->driver_private; 3384 3385 i915_verify_inactive(dev, __FILE__, __LINE__); 3386 obj_priv->pin_count--; 3387 BUG_ON(obj_priv->pin_count < 0); 3388 BUG_ON(obj_priv->gtt_space == NULL); 3389 3390 /* If the object is no longer pinned, and is 3391 * neither active nor being flushed, then stick it on 3392 * the inactive list 3393 */ 3394 if (obj_priv->pin_count == 0) { 3395 if (!obj_priv->active && 3396 (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | 3397 I915_GEM_DOMAIN_GTT)) == 0) 3398 list_move_tail(&obj_priv->list, 3399 &dev_priv->mm.inactive_list); 3400 atomic_dec(&dev->pin_count); 3401 atomic_sub(obj->size, &dev->pin_memory); 3402 } 3403 i915_verify_inactive(dev, __FILE__, __LINE__); 3404 } 3405 3406 int 3407 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 3408 struct drm_file *file_priv) 3409 { 3410 struct drm_i915_gem_pin *args = data; 3411 struct drm_gem_object *obj; 3412 struct drm_i915_gem_object *obj_priv; 3413 int ret; 3414 3415 mutex_lock(&dev->struct_mutex); 3416 3417 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 3418 if (obj == NULL) { 3419 DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n", 3420 args->handle); 3421 mutex_unlock(&dev->struct_mutex); 3422 return -EBADF; 3423 } 3424 obj_priv = obj->driver_private; 3425 3426 if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) { 3427 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 3428 args->handle); 3429 drm_gem_object_unreference(obj); 3430 mutex_unlock(&dev->struct_mutex); 3431 return -EINVAL; 3432 } 3433 3434 obj_priv->user_pin_count++; 3435 obj_priv->pin_filp = file_priv; 3436 if (obj_priv->user_pin_count == 1) { 3437 ret = i915_gem_object_pin(obj, args->alignment); 3438 if (ret != 0) { 3439 drm_gem_object_unreference(obj); 3440 mutex_unlock(&dev->struct_mutex); 3441 return ret; 3442 } 3443 } 3444 3445 /* XXX - flush the CPU caches for pinned objects 3446 * as the X server doesn't manage domains yet 3447 */ 3448 i915_gem_object_flush_cpu_write_domain(obj); 3449 args->offset = obj_priv->gtt_offset; 3450 drm_gem_object_unreference(obj); 3451 mutex_unlock(&dev->struct_mutex); 3452 3453 return 0; 3454 } 3455 3456 int 3457 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 3458 struct drm_file *file_priv) 3459 { 3460 struct drm_i915_gem_pin *args = data; 3461 struct drm_gem_object *obj; 3462 struct drm_i915_gem_object *obj_priv; 3463 3464 mutex_lock(&dev->struct_mutex); 3465 3466 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 3467 if (obj == NULL) { 3468 DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n", 3469 args->handle); 3470 mutex_unlock(&dev->struct_mutex); 3471 return -EBADF; 3472 } 3473 3474 obj_priv = obj->driver_private; 3475 if (obj_priv->pin_filp != file_priv) { 3476 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 3477 args->handle); 3478 drm_gem_object_unreference(obj); 3479 mutex_unlock(&dev->struct_mutex); 3480 return -EINVAL; 3481 } 3482 obj_priv->user_pin_count--; 3483 if (obj_priv->user_pin_count == 0) { 3484 obj_priv->pin_filp = NULL; 3485 i915_gem_object_unpin(obj); 3486 } 3487 3488 drm_gem_object_unreference(obj); 3489 mutex_unlock(&dev->struct_mutex); 3490 return 0; 3491 } 3492 3493 int 3494 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 3495 struct drm_file *file_priv) 3496 { 3497 struct drm_i915_gem_busy *args = data; 3498 struct drm_gem_object *obj; 3499 struct drm_i915_gem_object *obj_priv; 3500 3501 mutex_lock(&dev->struct_mutex); 3502 obj = drm_gem_object_lookup(dev, file_priv, args->handle); 3503 if (obj == NULL) { 3504 DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n", 3505 args->handle); 3506 mutex_unlock(&dev->struct_mutex); 3507 return -EBADF; 3508 } 3509 3510 /* Update the active list for the hardware's current position. 3511 * Otherwise this only updates on a delayed timer or when irqs are 3512 * actually unmasked, and our working set ends up being larger than 3513 * required. 3514 */ 3515 i915_gem_retire_requests(dev); 3516 3517 obj_priv = obj->driver_private; 3518 /* Don't count being on the flushing list against the object being 3519 * done. Otherwise, a buffer left on the flushing list but not getting 3520 * flushed (because nobody's flushing that domain) won't ever return 3521 * unbusy and get reused by libdrm's bo cache. The other expected 3522 * consumer of this interface, OpenGL's occlusion queries, also specs 3523 * that the objects get unbusy "eventually" without any interference. 3524 */ 3525 args->busy = obj_priv->active && obj_priv->last_rendering_seqno != 0; 3526 3527 drm_gem_object_unreference(obj); 3528 mutex_unlock(&dev->struct_mutex); 3529 return 0; 3530 } 3531 3532 int 3533 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 3534 struct drm_file *file_priv) 3535 { 3536 return i915_gem_ring_throttle(dev, file_priv); 3537 } 3538 3539 int i915_gem_init_object(struct drm_gem_object *obj) 3540 { 3541 struct drm_i915_gem_object *obj_priv; 3542 3543 obj_priv = drm_calloc(1, sizeof(*obj_priv), DRM_MEM_DRIVER); 3544 if (obj_priv == NULL) 3545 return -ENOMEM; 3546 3547 /* 3548 * We've just allocated pages from the kernel, 3549 * so they've just been written by the CPU with 3550 * zeros. They'll need to be clflushed before we 3551 * use them with the GPU. 3552 */ 3553 obj->write_domain = I915_GEM_DOMAIN_CPU; 3554 obj->read_domains = I915_GEM_DOMAIN_CPU; 3555 3556 obj_priv->agp_type = AGP_USER_MEMORY; 3557 3558 obj->driver_private = obj_priv; 3559 obj_priv->obj = obj; 3560 obj_priv->fence_reg = I915_FENCE_REG_NONE; 3561 INIT_LIST_HEAD(&obj_priv->list); 3562 3563 return 0; 3564 } 3565 3566 void i915_gem_free_object(struct drm_gem_object *obj) 3567 { 3568 struct drm_device *dev = obj->dev; 3569 struct drm_i915_gem_object *obj_priv = obj->driver_private; 3570 3571 while (obj_priv->pin_count > 0) 3572 i915_gem_object_unpin(obj); 3573 3574 if (obj_priv->phys_obj) 3575 i915_gem_detach_phys_object(dev, obj); 3576 3577 i915_gem_object_unbind(obj); 3578 3579 i915_gem_free_mmap_offset(obj); 3580 3581 drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER); 3582 drm_free(obj->driver_private, 1, DRM_MEM_DRIVER); 3583 } 3584 3585 /** Unbinds all objects that are on the given buffer list. */ 3586 static int 3587 i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head) 3588 { 3589 struct drm_gem_object *obj; 3590 struct drm_i915_gem_object *obj_priv; 3591 int ret; 3592 3593 while (!list_empty(head)) { 3594 obj_priv = list_first_entry(head, 3595 struct drm_i915_gem_object, 3596 list); 3597 obj = obj_priv->obj; 3598 3599 if (obj_priv->pin_count != 0) { 3600 DRM_ERROR("Pinned object in unbind list\n"); 3601 mutex_unlock(&dev->struct_mutex); 3602 return -EINVAL; 3603 } 3604 3605 ret = i915_gem_object_unbind(obj); 3606 if (ret != 0) { 3607 DRM_ERROR("Error unbinding object in LeaveVT: %d\n", 3608 ret); 3609 mutex_unlock(&dev->struct_mutex); 3610 return ret; 3611 } 3612 } 3613 3614 3615 return 0; 3616 } 3617 3618 int 3619 i915_gem_idle(struct drm_device *dev) 3620 { 3621 drm_i915_private_t *dev_priv = dev->dev_private; 3622 uint32_t seqno, cur_seqno, last_seqno; 3623 int stuck, ret; 3624 3625 mutex_lock(&dev->struct_mutex); 3626 3627 if (dev_priv->mm.suspended || dev_priv->ring.ring_obj == NULL) { 3628 mutex_unlock(&dev->struct_mutex); 3629 return 0; 3630 } 3631 3632 /* Hack! Don't let anybody do execbuf while we don't control the chip. 3633 * We need to replace this with a semaphore, or something. 3634 */ 3635 dev_priv->mm.suspended = 1; 3636 3637 /* Cancel the retire work handler, wait for it to finish if running 3638 */ 3639 mutex_unlock(&dev->struct_mutex); 3640 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 3641 mutex_lock(&dev->struct_mutex); 3642 3643 i915_kernel_lost_context(dev); 3644 3645 /* Flush the GPU along with all non-CPU write domains 3646 */ 3647 i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT), 3648 ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)); 3649 seqno = i915_add_request(dev, ~I915_GEM_DOMAIN_CPU); 3650 3651 if (seqno == 0) { 3652 mutex_unlock(&dev->struct_mutex); 3653 return -ENOMEM; 3654 } 3655 3656 dev_priv->mm.waiting_gem_seqno = seqno; 3657 last_seqno = 0; 3658 stuck = 0; 3659 for (;;) { 3660 cur_seqno = i915_get_gem_seqno(dev); 3661 if (i915_seqno_passed(cur_seqno, seqno)) 3662 break; 3663 if (last_seqno == cur_seqno) { 3664 if (stuck++ > 100) { 3665 DRM_ERROR("hardware wedged\n"); 3666 dev_priv->mm.wedged = 1; 3667 DRM_WAKEUP(&dev_priv->irq_queue); 3668 break; 3669 } 3670 } 3671 msleep(10); 3672 last_seqno = cur_seqno; 3673 } 3674 dev_priv->mm.waiting_gem_seqno = 0; 3675 3676 i915_gem_retire_requests(dev); 3677 3678 if (!dev_priv->mm.wedged) { 3679 /* Active and flushing should now be empty as we've 3680 * waited for a sequence higher than any pending execbuffer 3681 */ 3682 WARN_ON(!list_empty(&dev_priv->mm.active_list)); 3683 WARN_ON(!list_empty(&dev_priv->mm.flushing_list)); 3684 /* Request should now be empty as we've also waited 3685 * for the last request in the list 3686 */ 3687 WARN_ON(!list_empty(&dev_priv->mm.request_list)); 3688 } 3689 3690 /* Empty the active and flushing lists to inactive. If there's 3691 * anything left at this point, it means that we're wedged and 3692 * nothing good's going to happen by leaving them there. So strip 3693 * the GPU domains and just stuff them onto inactive. 3694 */ 3695 while (!list_empty(&dev_priv->mm.active_list)) { 3696 struct drm_i915_gem_object *obj_priv; 3697 3698 obj_priv = list_first_entry(&dev_priv->mm.active_list, 3699 struct drm_i915_gem_object, 3700 list); 3701 obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; 3702 i915_gem_object_move_to_inactive(obj_priv->obj); 3703 } 3704 3705 while (!list_empty(&dev_priv->mm.flushing_list)) { 3706 struct drm_i915_gem_object *obj_priv; 3707 3708 obj_priv = list_first_entry(&dev_priv->mm.flushing_list, 3709 struct drm_i915_gem_object, 3710 list); 3711 obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; 3712 i915_gem_object_move_to_inactive(obj_priv->obj); 3713 } 3714 3715 3716 /* Move all inactive buffers out of the GTT. */ 3717 ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list); 3718 WARN_ON(!list_empty(&dev_priv->mm.inactive_list)); 3719 if (ret) { 3720 mutex_unlock(&dev->struct_mutex); 3721 return ret; 3722 } 3723 3724 i915_gem_cleanup_ringbuffer(dev); 3725 mutex_unlock(&dev->struct_mutex); 3726 3727 return 0; 3728 } 3729 3730 static int 3731 i915_gem_init_hws(struct drm_device *dev) 3732 { 3733 drm_i915_private_t *dev_priv = dev->dev_private; 3734 struct drm_gem_object *obj; 3735 struct drm_i915_gem_object *obj_priv; 3736 int ret; 3737 3738 /* If we need a physical address for the status page, it's already 3739 * initialized at driver load time. 3740 */ 3741 if (!I915_NEED_GFX_HWS(dev)) 3742 return 0; 3743 3744 obj = drm_gem_object_alloc(dev, 4096); 3745 if (obj == NULL) { 3746 DRM_ERROR("Failed to allocate status page\n"); 3747 return -ENOMEM; 3748 } 3749 obj_priv = obj->driver_private; 3750 obj_priv->agp_type = AGP_USER_CACHED_MEMORY; 3751 3752 ret = i915_gem_object_pin(obj, 4096); 3753 if (ret != 0) { 3754 drm_gem_object_unreference(obj); 3755 return ret; 3756 } 3757 3758 dev_priv->status_gfx_addr = obj_priv->gtt_offset; 3759 3760 dev_priv->hw_status_page = kmap(obj_priv->pages[0]); 3761 if (dev_priv->hw_status_page == NULL) { 3762 DRM_ERROR("Failed to map status page.\n"); 3763 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 3764 i915_gem_object_unpin(obj); 3765 drm_gem_object_unreference(obj); 3766 return -EINVAL; 3767 } 3768 dev_priv->hws_obj = obj; 3769 memset(dev_priv->hw_status_page, 0, PAGE_SIZE); 3770 I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr); 3771 I915_READ(HWS_PGA); /* posting read */ 3772 DRM_DEBUG("hws offset: 0x%08x\n", dev_priv->status_gfx_addr); 3773 3774 return 0; 3775 } 3776 3777 static void 3778 i915_gem_cleanup_hws(struct drm_device *dev) 3779 { 3780 drm_i915_private_t *dev_priv = dev->dev_private; 3781 struct drm_gem_object *obj; 3782 struct drm_i915_gem_object *obj_priv; 3783 3784 if (dev_priv->hws_obj == NULL) 3785 return; 3786 3787 obj = dev_priv->hws_obj; 3788 obj_priv = obj->driver_private; 3789 3790 kunmap(obj_priv->pages[0]); 3791 i915_gem_object_unpin(obj); 3792 drm_gem_object_unreference(obj); 3793 dev_priv->hws_obj = NULL; 3794 3795 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 3796 dev_priv->hw_status_page = NULL; 3797 3798 /* Write high address into HWS_PGA when disabling. */ 3799 I915_WRITE(HWS_PGA, 0x1ffff000); 3800 } 3801 3802 int 3803 i915_gem_init_ringbuffer(struct drm_device *dev) 3804 { 3805 drm_i915_private_t *dev_priv = dev->dev_private; 3806 struct drm_gem_object *obj; 3807 struct drm_i915_gem_object *obj_priv; 3808 drm_i915_ring_buffer_t *ring = &dev_priv->ring; 3809 int ret; 3810 u32 head; 3811 3812 ret = i915_gem_init_hws(dev); 3813 if (ret != 0) 3814 return ret; 3815 3816 obj = drm_gem_object_alloc(dev, 128 * 1024); 3817 if (obj == NULL) { 3818 DRM_ERROR("Failed to allocate ringbuffer\n"); 3819 i915_gem_cleanup_hws(dev); 3820 return -ENOMEM; 3821 } 3822 obj_priv = obj->driver_private; 3823 3824 ret = i915_gem_object_pin(obj, 4096); 3825 if (ret != 0) { 3826 drm_gem_object_unreference(obj); 3827 i915_gem_cleanup_hws(dev); 3828 return ret; 3829 } 3830 3831 /* Set up the kernel mapping for the ring. */ 3832 ring->Size = obj->size; 3833 ring->tail_mask = obj->size - 1; 3834 3835 ring->map.offset = dev->agp->base + obj_priv->gtt_offset; 3836 ring->map.size = obj->size; 3837 ring->map.type = 0; 3838 ring->map.flags = 0; 3839 ring->map.mtrr = 0; 3840 3841 drm_core_ioremap_wc(&ring->map, dev); 3842 if (ring->map.handle == NULL) { 3843 DRM_ERROR("Failed to map ringbuffer.\n"); 3844 memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); 3845 i915_gem_object_unpin(obj); 3846 drm_gem_object_unreference(obj); 3847 i915_gem_cleanup_hws(dev); 3848 return -EINVAL; 3849 } 3850 ring->ring_obj = obj; 3851 ring->virtual_start = ring->map.handle; 3852 3853 /* Stop the ring if it's running. */ 3854 I915_WRITE(PRB0_CTL, 0); 3855 I915_WRITE(PRB0_TAIL, 0); 3856 I915_WRITE(PRB0_HEAD, 0); 3857 3858 /* Initialize the ring. */ 3859 I915_WRITE(PRB0_START, obj_priv->gtt_offset); 3860 head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 3861 3862 /* G45 ring initialization fails to reset head to zero */ 3863 if (head != 0) { 3864 DRM_ERROR("Ring head not reset to zero " 3865 "ctl %08x head %08x tail %08x start %08x\n", 3866 I915_READ(PRB0_CTL), 3867 I915_READ(PRB0_HEAD), 3868 I915_READ(PRB0_TAIL), 3869 I915_READ(PRB0_START)); 3870 I915_WRITE(PRB0_HEAD, 0); 3871 3872 DRM_ERROR("Ring head forced to zero " 3873 "ctl %08x head %08x tail %08x start %08x\n", 3874 I915_READ(PRB0_CTL), 3875 I915_READ(PRB0_HEAD), 3876 I915_READ(PRB0_TAIL), 3877 I915_READ(PRB0_START)); 3878 } 3879 3880 I915_WRITE(PRB0_CTL, 3881 ((obj->size - 4096) & RING_NR_PAGES) | 3882 RING_NO_REPORT | 3883 RING_VALID); 3884 3885 head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 3886 3887 /* If the head is still not zero, the ring is dead */ 3888 if (head != 0) { 3889 DRM_ERROR("Ring initialization failed " 3890 "ctl %08x head %08x tail %08x start %08x\n", 3891 I915_READ(PRB0_CTL), 3892 I915_READ(PRB0_HEAD), 3893 I915_READ(PRB0_TAIL), 3894 I915_READ(PRB0_START)); 3895 return -EIO; 3896 } 3897 3898 /* Update our cache of the ring state */ 3899 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 3900 i915_kernel_lost_context(dev); 3901 else { 3902 ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR; 3903 ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR; 3904 ring->space = ring->head - (ring->tail + 8); 3905 if (ring->space < 0) 3906 ring->space += ring->Size; 3907 } 3908 3909 return 0; 3910 } 3911 3912 void 3913 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 3914 { 3915 drm_i915_private_t *dev_priv = dev->dev_private; 3916 3917 if (dev_priv->ring.ring_obj == NULL) 3918 return; 3919 3920 drm_core_ioremapfree(&dev_priv->ring.map, dev); 3921 3922 i915_gem_object_unpin(dev_priv->ring.ring_obj); 3923 drm_gem_object_unreference(dev_priv->ring.ring_obj); 3924 dev_priv->ring.ring_obj = NULL; 3925 memset(&dev_priv->ring, 0, sizeof(dev_priv->ring)); 3926 3927 i915_gem_cleanup_hws(dev); 3928 } 3929 3930 int 3931 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 3932 struct drm_file *file_priv) 3933 { 3934 drm_i915_private_t *dev_priv = dev->dev_private; 3935 int ret; 3936 3937 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3938 return 0; 3939 3940 if (dev_priv->mm.wedged) { 3941 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 3942 dev_priv->mm.wedged = 0; 3943 } 3944 3945 mutex_lock(&dev->struct_mutex); 3946 dev_priv->mm.suspended = 0; 3947 3948 ret = i915_gem_init_ringbuffer(dev); 3949 if (ret != 0) 3950 return ret; 3951 3952 BUG_ON(!list_empty(&dev_priv->mm.active_list)); 3953 BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); 3954 BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); 3955 BUG_ON(!list_empty(&dev_priv->mm.request_list)); 3956 mutex_unlock(&dev->struct_mutex); 3957 3958 drm_irq_install(dev); 3959 3960 return 0; 3961 } 3962 3963 int 3964 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 3965 struct drm_file *file_priv) 3966 { 3967 int ret; 3968 3969 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3970 return 0; 3971 3972 ret = i915_gem_idle(dev); 3973 drm_irq_uninstall(dev); 3974 3975 return ret; 3976 } 3977 3978 void 3979 i915_gem_lastclose(struct drm_device *dev) 3980 { 3981 int ret; 3982 3983 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3984 return; 3985 3986 ret = i915_gem_idle(dev); 3987 if (ret) 3988 DRM_ERROR("failed to idle hardware: %d\n", ret); 3989 } 3990 3991 void 3992 i915_gem_load(struct drm_device *dev) 3993 { 3994 drm_i915_private_t *dev_priv = dev->dev_private; 3995 3996 INIT_LIST_HEAD(&dev_priv->mm.active_list); 3997 INIT_LIST_HEAD(&dev_priv->mm.flushing_list); 3998 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 3999 INIT_LIST_HEAD(&dev_priv->mm.request_list); 4000 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 4001 i915_gem_retire_work_handler); 4002 dev_priv->mm.next_gem_seqno = 1; 4003 4004 /* Old X drivers will take 0-2 for front, back, depth buffers */ 4005 dev_priv->fence_reg_start = 3; 4006 4007 if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 4008 dev_priv->num_fence_regs = 16; 4009 else 4010 dev_priv->num_fence_regs = 8; 4011 4012 i915_gem_detect_bit_6_swizzle(dev); 4013 } 4014 4015 /* 4016 * Create a physically contiguous memory object for this object 4017 * e.g. for cursor + overlay regs 4018 */ 4019 int i915_gem_init_phys_object(struct drm_device *dev, 4020 int id, int size) 4021 { 4022 drm_i915_private_t *dev_priv = dev->dev_private; 4023 struct drm_i915_gem_phys_object *phys_obj; 4024 int ret; 4025 4026 if (dev_priv->mm.phys_objs[id - 1] || !size) 4027 return 0; 4028 4029 phys_obj = drm_calloc(1, sizeof(struct drm_i915_gem_phys_object), DRM_MEM_DRIVER); 4030 if (!phys_obj) 4031 return -ENOMEM; 4032 4033 phys_obj->id = id; 4034 4035 phys_obj->handle = drm_pci_alloc(dev, size, 0, 0xffffffff); 4036 if (!phys_obj->handle) { 4037 ret = -ENOMEM; 4038 goto kfree_obj; 4039 } 4040 #ifdef CONFIG_X86 4041 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4042 #endif 4043 4044 dev_priv->mm.phys_objs[id - 1] = phys_obj; 4045 4046 return 0; 4047 kfree_obj: 4048 drm_free(phys_obj, sizeof(struct drm_i915_gem_phys_object), DRM_MEM_DRIVER); 4049 return ret; 4050 } 4051 4052 void i915_gem_free_phys_object(struct drm_device *dev, int id) 4053 { 4054 drm_i915_private_t *dev_priv = dev->dev_private; 4055 struct drm_i915_gem_phys_object *phys_obj; 4056 4057 if (!dev_priv->mm.phys_objs[id - 1]) 4058 return; 4059 4060 phys_obj = dev_priv->mm.phys_objs[id - 1]; 4061 if (phys_obj->cur_obj) { 4062 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 4063 } 4064 4065 #ifdef CONFIG_X86 4066 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4067 #endif 4068 drm_pci_free(dev, phys_obj->handle); 4069 kfree(phys_obj); 4070 dev_priv->mm.phys_objs[id - 1] = NULL; 4071 } 4072 4073 void i915_gem_free_all_phys_object(struct drm_device *dev) 4074 { 4075 int i; 4076 4077 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 4078 i915_gem_free_phys_object(dev, i); 4079 } 4080 4081 void i915_gem_detach_phys_object(struct drm_device *dev, 4082 struct drm_gem_object *obj) 4083 { 4084 struct drm_i915_gem_object *obj_priv; 4085 int i; 4086 int ret; 4087 int page_count; 4088 4089 obj_priv = obj->driver_private; 4090 if (!obj_priv->phys_obj) 4091 return; 4092 4093 ret = i915_gem_object_get_pages(obj); 4094 if (ret) 4095 goto out; 4096 4097 page_count = obj->size / PAGE_SIZE; 4098 4099 for (i = 0; i < page_count; i++) { 4100 char *dst = kmap_atomic(obj_priv->pages[i], KM_USER0); 4101 char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE); 4102 4103 memcpy(dst, src, PAGE_SIZE); 4104 kunmap_atomic(dst, KM_USER0); 4105 } 4106 drm_clflush_pages(obj_priv->pages, page_count); 4107 drm_agp_chipset_flush(dev); 4108 out: 4109 obj_priv->phys_obj->cur_obj = NULL; 4110 obj_priv->phys_obj = NULL; 4111 } 4112 4113 int 4114 i915_gem_attach_phys_object(struct drm_device *dev, 4115 struct drm_gem_object *obj, int id) 4116 { 4117 drm_i915_private_t *dev_priv = dev->dev_private; 4118 struct drm_i915_gem_object *obj_priv; 4119 int ret = 0; 4120 int page_count; 4121 int i; 4122 4123 if (id > I915_MAX_PHYS_OBJECT) 4124 return -EINVAL; 4125 4126 obj_priv = obj->driver_private; 4127 4128 if (obj_priv->phys_obj) { 4129 if (obj_priv->phys_obj->id == id) 4130 return 0; 4131 i915_gem_detach_phys_object(dev, obj); 4132 } 4133 4134 4135 /* create a new object */ 4136 if (!dev_priv->mm.phys_objs[id - 1]) { 4137 ret = i915_gem_init_phys_object(dev, id, 4138 obj->size); 4139 if (ret) { 4140 DRM_ERROR("failed to init phys object %d size: %zu\n", id, obj->size); 4141 goto out; 4142 } 4143 } 4144 4145 /* bind to the object */ 4146 obj_priv->phys_obj = dev_priv->mm.phys_objs[id - 1]; 4147 obj_priv->phys_obj->cur_obj = obj; 4148 4149 ret = i915_gem_object_get_pages(obj); 4150 if (ret) { 4151 DRM_ERROR("failed to get page list\n"); 4152 goto out; 4153 } 4154 4155 page_count = obj->size / PAGE_SIZE; 4156 4157 for (i = 0; i < page_count; i++) { 4158 char *src = kmap_atomic(obj_priv->pages[i], KM_USER0); 4159 char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE); 4160 4161 memcpy(dst, src, PAGE_SIZE); 4162 kunmap_atomic(src, KM_USER0); 4163 } 4164 4165 return 0; 4166 out: 4167 return ret; 4168 } 4169 4170 static int 4171 i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 4172 struct drm_i915_gem_pwrite *args, 4173 struct drm_file *file_priv) 4174 { 4175 struct drm_i915_gem_object *obj_priv = obj->driver_private; 4176 void *obj_addr; 4177 int ret; 4178 char __user *user_data; 4179 4180 user_data = (char __user *) (uintptr_t) args->data_ptr; 4181 obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset; 4182 4183 DRM_DEBUG("obj_addr %p, %lld\n", obj_addr, args->size); 4184 ret = copy_from_user(obj_addr, user_data, args->size); 4185 if (ret) 4186 return -EFAULT; 4187 4188 drm_agp_chipset_flush(dev); 4189 return 0; 4190 } 4191