1 /* 2 * Copyright 2008 Jerome Glisse. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Jerome Glisse <glisse@freedesktop.org> 26 */ 27 #include <drm/drmP.h> 28 #include <drm/radeon_drm.h> 29 #include "radeon_reg.h" 30 #include "radeon.h" 31 32 static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) 33 { 34 struct drm_device *ddev = p->rdev->ddev; 35 struct radeon_cs_chunk *chunk; 36 unsigned i, j; 37 bool duplicate; 38 39 if (p->chunk_relocs_idx == -1) { 40 return 0; 41 } 42 chunk = &p->chunks[p->chunk_relocs_idx]; 43 p->dma_reloc_idx = 0; 44 /* FIXME: we assume that each relocs use 4 dwords */ 45 p->nrelocs = chunk->length_dw / 4; 46 p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL); 47 if (p->relocs_ptr == NULL) { 48 return -ENOMEM; 49 } 50 p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_cs_reloc), GFP_KERNEL); 51 if (p->relocs == NULL) { 52 return -ENOMEM; 53 } 54 for (i = 0; i < p->nrelocs; i++) { 55 struct drm_radeon_cs_reloc *r; 56 57 duplicate = false; 58 r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4]; 59 for (j = 0; j < i; j++) { 60 if (r->handle == p->relocs[j].handle) { 61 p->relocs_ptr[i] = &p->relocs[j]; 62 duplicate = true; 63 break; 64 } 65 } 66 if (duplicate) { 67 p->relocs[i].handle = 0; 68 continue; 69 } 70 71 p->relocs[i].gobj = drm_gem_object_lookup(ddev, p->filp, 72 r->handle); 73 if (p->relocs[i].gobj == NULL) { 74 DRM_ERROR("gem object lookup failed 0x%x\n", 75 r->handle); 76 return -ENOENT; 77 } 78 p->relocs_ptr[i] = &p->relocs[i]; 79 p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj); 80 p->relocs[i].lobj.bo = p->relocs[i].robj; 81 p->relocs[i].lobj.written = !!r->write_domain; 82 83 /* the first reloc of an UVD job is the 84 msg and that must be in VRAM */ 85 if (p->ring == R600_RING_TYPE_UVD_INDEX && i == 0) { 86 /* TODO: is this still needed for NI+ ? */ 87 p->relocs[i].lobj.domain = 88 RADEON_GEM_DOMAIN_VRAM; 89 90 p->relocs[i].lobj.alt_domain = 91 RADEON_GEM_DOMAIN_VRAM; 92 93 } else { 94 uint32_t domain = r->write_domain ? 95 r->write_domain : r->read_domains; 96 97 p->relocs[i].lobj.domain = domain; 98 if (domain == RADEON_GEM_DOMAIN_VRAM) 99 domain |= RADEON_GEM_DOMAIN_GTT; 100 p->relocs[i].lobj.alt_domain = domain; 101 } 102 103 p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo; 104 p->relocs[i].handle = r->handle; 105 106 radeon_bo_list_add_object(&p->relocs[i].lobj, 107 &p->validated); 108 } 109 return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring); 110 } 111 112 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) 113 { 114 p->priority = priority; 115 116 switch (ring) { 117 default: 118 DRM_ERROR("unknown ring id: %d\n", ring); 119 return -EINVAL; 120 case RADEON_CS_RING_GFX: 121 p->ring = RADEON_RING_TYPE_GFX_INDEX; 122 break; 123 case RADEON_CS_RING_COMPUTE: 124 if (p->rdev->family >= CHIP_TAHITI) { 125 if (p->priority > 0) 126 p->ring = CAYMAN_RING_TYPE_CP1_INDEX; 127 else 128 p->ring = CAYMAN_RING_TYPE_CP2_INDEX; 129 } else 130 p->ring = RADEON_RING_TYPE_GFX_INDEX; 131 break; 132 case RADEON_CS_RING_DMA: 133 if (p->rdev->family >= CHIP_CAYMAN) { 134 if (p->priority > 0) 135 p->ring = R600_RING_TYPE_DMA_INDEX; 136 else 137 p->ring = CAYMAN_RING_TYPE_DMA1_INDEX; 138 } else if (p->rdev->family >= CHIP_R600) { 139 p->ring = R600_RING_TYPE_DMA_INDEX; 140 } else { 141 return -EINVAL; 142 } 143 break; 144 case RADEON_CS_RING_UVD: 145 p->ring = R600_RING_TYPE_UVD_INDEX; 146 break; 147 } 148 return 0; 149 } 150 151 static void radeon_cs_sync_rings(struct radeon_cs_parser *p) 152 { 153 int i; 154 155 for (i = 0; i < p->nrelocs; i++) { 156 if (!p->relocs[i].robj) 157 continue; 158 159 radeon_ib_sync_to(&p->ib, p->relocs[i].robj->tbo.sync_obj); 160 } 161 } 162 163 /* XXX: note that this is called from the legacy UMS CS ioctl as well */ 164 int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) 165 { 166 struct drm_radeon_cs *cs = data; 167 uint64_t *chunk_array_ptr; 168 unsigned size, i; 169 u32 ring = RADEON_CS_RING_GFX; 170 s32 priority = 0; 171 172 if (!cs->num_chunks) { 173 return 0; 174 } 175 /* get chunks */ 176 INIT_LIST_HEAD(&p->validated); 177 p->idx = 0; 178 p->ib.sa_bo = NULL; 179 p->ib.semaphore = NULL; 180 p->const_ib.sa_bo = NULL; 181 p->const_ib.semaphore = NULL; 182 p->chunk_ib_idx = -1; 183 p->chunk_relocs_idx = -1; 184 p->chunk_flags_idx = -1; 185 p->chunk_const_ib_idx = -1; 186 p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL); 187 if (p->chunks_array == NULL) { 188 return -ENOMEM; 189 } 190 chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks); 191 if (DRM_COPY_FROM_USER(p->chunks_array, chunk_array_ptr, 192 sizeof(uint64_t)*cs->num_chunks)) { 193 return -EFAULT; 194 } 195 p->cs_flags = 0; 196 p->nchunks = cs->num_chunks; 197 p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL); 198 if (p->chunks == NULL) { 199 return -ENOMEM; 200 } 201 for (i = 0; i < p->nchunks; i++) { 202 struct drm_radeon_cs_chunk __user **chunk_ptr = NULL; 203 struct drm_radeon_cs_chunk user_chunk; 204 uint32_t __user *cdata; 205 206 chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i]; 207 if (DRM_COPY_FROM_USER(&user_chunk, chunk_ptr, 208 sizeof(struct drm_radeon_cs_chunk))) { 209 return -EFAULT; 210 } 211 p->chunks[i].length_dw = user_chunk.length_dw; 212 p->chunks[i].kdata = NULL; 213 p->chunks[i].chunk_id = user_chunk.chunk_id; 214 p->chunks[i].user_ptr = (void __user *)(unsigned long)user_chunk.chunk_data; 215 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) { 216 p->chunk_relocs_idx = i; 217 } 218 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) { 219 p->chunk_ib_idx = i; 220 /* zero length IB isn't useful */ 221 if (p->chunks[i].length_dw == 0) 222 return -EINVAL; 223 } 224 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB) { 225 p->chunk_const_ib_idx = i; 226 /* zero length CONST IB isn't useful */ 227 if (p->chunks[i].length_dw == 0) 228 return -EINVAL; 229 } 230 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { 231 p->chunk_flags_idx = i; 232 /* zero length flags aren't useful */ 233 if (p->chunks[i].length_dw == 0) 234 return -EINVAL; 235 } 236 237 cdata = (uint32_t *)(unsigned long)user_chunk.chunk_data; 238 if ((p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) || 239 (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS)) { 240 size = p->chunks[i].length_dw * sizeof(uint32_t); 241 p->chunks[i].kdata = kmalloc(size, GFP_KERNEL); 242 if (p->chunks[i].kdata == NULL) { 243 return -ENOMEM; 244 } 245 if (DRM_COPY_FROM_USER(p->chunks[i].kdata, 246 p->chunks[i].user_ptr, size)) { 247 return -EFAULT; 248 } 249 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { 250 p->cs_flags = p->chunks[i].kdata[0]; 251 if (p->chunks[i].length_dw > 1) 252 ring = p->chunks[i].kdata[1]; 253 if (p->chunks[i].length_dw > 2) 254 priority = (s32)p->chunks[i].kdata[2]; 255 } 256 } 257 } 258 259 /* these are KMS only */ 260 if (p->rdev) { 261 if ((p->cs_flags & RADEON_CS_USE_VM) && 262 !p->rdev->vm_manager.enabled) { 263 DRM_ERROR("VM not active on asic!\n"); 264 return -EINVAL; 265 } 266 267 if (radeon_cs_get_ring(p, ring, priority)) 268 return -EINVAL; 269 270 /* we only support VM on some SI+ rings */ 271 if ((p->rdev->asic->ring[p->ring].cs_parse == NULL) && 272 ((p->cs_flags & RADEON_CS_USE_VM) == 0)) { 273 DRM_ERROR("Ring %d requires VM!\n", p->ring); 274 return -EINVAL; 275 } 276 } 277 278 /* deal with non-vm */ 279 if ((p->chunk_ib_idx != -1) && 280 ((p->cs_flags & RADEON_CS_USE_VM) == 0) && 281 (p->chunks[p->chunk_ib_idx].chunk_id == RADEON_CHUNK_ID_IB)) { 282 if (p->chunks[p->chunk_ib_idx].length_dw > (16 * 1024)) { 283 DRM_ERROR("cs IB too big: %d\n", 284 p->chunks[p->chunk_ib_idx].length_dw); 285 return -EINVAL; 286 } 287 if (p->rdev && (p->rdev->flags & RADEON_IS_AGP)) { 288 p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL); 289 p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL); 290 if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL || 291 p->chunks[p->chunk_ib_idx].kpage[1] == NULL) { 292 kfree(p->chunks[p->chunk_ib_idx].kpage[0]); 293 kfree(p->chunks[p->chunk_ib_idx].kpage[1]); 294 p->chunks[p->chunk_ib_idx].kpage[0] = NULL; 295 p->chunks[p->chunk_ib_idx].kpage[1] = NULL; 296 return -ENOMEM; 297 } 298 } 299 p->chunks[p->chunk_ib_idx].kpage_idx[0] = -1; 300 p->chunks[p->chunk_ib_idx].kpage_idx[1] = -1; 301 p->chunks[p->chunk_ib_idx].last_copied_page = -1; 302 p->chunks[p->chunk_ib_idx].last_page_index = 303 ((p->chunks[p->chunk_ib_idx].length_dw * 4) - 1) / PAGE_SIZE; 304 } 305 306 return 0; 307 } 308 309 /** 310 * cs_parser_fini() - clean parser states 311 * @parser: parser structure holding parsing context. 312 * @error: error number 313 * 314 * If error is set than unvalidate buffer, otherwise just free memory 315 * used by parsing context. 316 **/ 317 static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff) 318 { 319 unsigned i; 320 321 if (!error) { 322 ttm_eu_fence_buffer_objects(&parser->ticket, 323 &parser->validated, 324 parser->ib.fence); 325 } else if (backoff) { 326 ttm_eu_backoff_reservation(&parser->ticket, 327 &parser->validated); 328 } 329 330 if (parser->relocs != NULL) { 331 for (i = 0; i < parser->nrelocs; i++) { 332 if (parser->relocs[i].gobj) 333 drm_gem_object_unreference_unlocked(parser->relocs[i].gobj); 334 } 335 } 336 kfree(parser->track); 337 kfree(parser->relocs); 338 kfree(parser->relocs_ptr); 339 for (i = 0; i < parser->nchunks; i++) { 340 kfree(parser->chunks[i].kdata); 341 if ((parser->rdev->flags & RADEON_IS_AGP)) { 342 kfree(parser->chunks[i].kpage[0]); 343 kfree(parser->chunks[i].kpage[1]); 344 } 345 } 346 kfree(parser->chunks); 347 kfree(parser->chunks_array); 348 radeon_ib_free(parser->rdev, &parser->ib); 349 radeon_ib_free(parser->rdev, &parser->const_ib); 350 } 351 352 static int radeon_cs_ib_chunk(struct radeon_device *rdev, 353 struct radeon_cs_parser *parser) 354 { 355 struct radeon_cs_chunk *ib_chunk; 356 int r; 357 358 if (parser->chunk_ib_idx == -1) 359 return 0; 360 361 if (parser->cs_flags & RADEON_CS_USE_VM) 362 return 0; 363 364 ib_chunk = &parser->chunks[parser->chunk_ib_idx]; 365 /* Copy the packet into the IB, the parser will read from the 366 * input memory (cached) and write to the IB (which can be 367 * uncached). 368 */ 369 r = radeon_ib_get(rdev, parser->ring, &parser->ib, 370 NULL, ib_chunk->length_dw * 4); 371 if (r) { 372 DRM_ERROR("Failed to get ib !\n"); 373 return r; 374 } 375 parser->ib.length_dw = ib_chunk->length_dw; 376 r = radeon_cs_parse(rdev, parser->ring, parser); 377 if (r || parser->parser_error) { 378 DRM_ERROR("Invalid command stream !\n"); 379 return r; 380 } 381 r = radeon_cs_finish_pages(parser); 382 if (r) { 383 DRM_ERROR("Invalid command stream !\n"); 384 return r; 385 } 386 radeon_cs_sync_rings(parser); 387 r = radeon_ib_schedule(rdev, &parser->ib, NULL); 388 if (r) { 389 DRM_ERROR("Failed to schedule IB !\n"); 390 } 391 return r; 392 } 393 394 static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser, 395 struct radeon_vm *vm) 396 { 397 struct radeon_device *rdev = parser->rdev; 398 struct radeon_bo_list *lobj; 399 struct radeon_bo *bo; 400 int r; 401 402 r = radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo, &rdev->ring_tmp_bo.bo->tbo.mem); 403 if (r) { 404 return r; 405 } 406 list_for_each_entry(lobj, &parser->validated, tv.head) { 407 bo = lobj->bo; 408 r = radeon_vm_bo_update_pte(parser->rdev, vm, bo, &bo->tbo.mem); 409 if (r) { 410 return r; 411 } 412 } 413 return 0; 414 } 415 416 static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, 417 struct radeon_cs_parser *parser) 418 { 419 struct radeon_cs_chunk *ib_chunk; 420 struct radeon_fpriv *fpriv = parser->filp->driver_priv; 421 struct radeon_vm *vm = &fpriv->vm; 422 int r; 423 424 if (parser->chunk_ib_idx == -1) 425 return 0; 426 if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) 427 return 0; 428 429 if ((rdev->family >= CHIP_TAHITI) && 430 (parser->chunk_const_ib_idx != -1)) { 431 ib_chunk = &parser->chunks[parser->chunk_const_ib_idx]; 432 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { 433 DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw); 434 return -EINVAL; 435 } 436 r = radeon_ib_get(rdev, parser->ring, &parser->const_ib, 437 vm, ib_chunk->length_dw * 4); 438 if (r) { 439 DRM_ERROR("Failed to get const ib !\n"); 440 return r; 441 } 442 parser->const_ib.is_const_ib = true; 443 parser->const_ib.length_dw = ib_chunk->length_dw; 444 /* Copy the packet into the IB */ 445 if (DRM_COPY_FROM_USER(parser->const_ib.ptr, ib_chunk->user_ptr, 446 ib_chunk->length_dw * 4)) { 447 return -EFAULT; 448 } 449 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib); 450 if (r) { 451 return r; 452 } 453 } 454 455 ib_chunk = &parser->chunks[parser->chunk_ib_idx]; 456 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { 457 DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw); 458 return -EINVAL; 459 } 460 r = radeon_ib_get(rdev, parser->ring, &parser->ib, 461 vm, ib_chunk->length_dw * 4); 462 if (r) { 463 DRM_ERROR("Failed to get ib !\n"); 464 return r; 465 } 466 parser->ib.length_dw = ib_chunk->length_dw; 467 /* Copy the packet into the IB */ 468 if (DRM_COPY_FROM_USER(parser->ib.ptr, ib_chunk->user_ptr, 469 ib_chunk->length_dw * 4)) { 470 return -EFAULT; 471 } 472 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib); 473 if (r) { 474 return r; 475 } 476 477 mutex_lock(&rdev->vm_manager.lock); 478 mutex_lock(&vm->mutex); 479 r = radeon_vm_alloc_pt(rdev, vm); 480 if (r) { 481 goto out; 482 } 483 r = radeon_bo_vm_update_pte(parser, vm); 484 if (r) { 485 goto out; 486 } 487 radeon_cs_sync_rings(parser); 488 radeon_ib_sync_to(&parser->ib, vm->fence); 489 radeon_ib_sync_to(&parser->ib, radeon_vm_grab_id( 490 rdev, vm, parser->ring)); 491 492 if ((rdev->family >= CHIP_TAHITI) && 493 (parser->chunk_const_ib_idx != -1)) { 494 r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib); 495 } else { 496 r = radeon_ib_schedule(rdev, &parser->ib, NULL); 497 } 498 499 if (!r) { 500 radeon_vm_fence(rdev, vm, parser->ib.fence); 501 } 502 503 out: 504 radeon_vm_add_to_lru(rdev, vm); 505 mutex_unlock(&vm->mutex); 506 mutex_unlock(&rdev->vm_manager.lock); 507 return r; 508 } 509 510 static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r) 511 { 512 if (r == -EDEADLK) { 513 r = radeon_gpu_reset(rdev); 514 if (!r) 515 r = -EAGAIN; 516 } 517 return r; 518 } 519 520 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 521 { 522 struct radeon_device *rdev = dev->dev_private; 523 struct radeon_cs_parser parser; 524 int r; 525 526 down_read(&rdev->exclusive_lock); 527 if (!rdev->accel_working) { 528 up_read(&rdev->exclusive_lock); 529 return -EBUSY; 530 } 531 /* initialize parser */ 532 memset(&parser, 0, sizeof(struct radeon_cs_parser)); 533 parser.filp = filp; 534 parser.rdev = rdev; 535 parser.dev = rdev->dev; 536 parser.family = rdev->family; 537 r = radeon_cs_parser_init(&parser, data); 538 if (r) { 539 DRM_ERROR("Failed to initialize parser !\n"); 540 radeon_cs_parser_fini(&parser, r, false); 541 up_read(&rdev->exclusive_lock); 542 r = radeon_cs_handle_lockup(rdev, r); 543 return r; 544 } 545 r = radeon_cs_parser_relocs(&parser); 546 if (r) { 547 if (r != -ERESTARTSYS) 548 DRM_ERROR("Failed to parse relocation %d!\n", r); 549 radeon_cs_parser_fini(&parser, r, false); 550 up_read(&rdev->exclusive_lock); 551 r = radeon_cs_handle_lockup(rdev, r); 552 return r; 553 } 554 555 /* XXX pick SD/HD/MVC */ 556 if (parser.ring == R600_RING_TYPE_UVD_INDEX) 557 radeon_uvd_note_usage(rdev); 558 559 r = radeon_cs_ib_chunk(rdev, &parser); 560 if (r) { 561 goto out; 562 } 563 r = radeon_cs_ib_vm_chunk(rdev, &parser); 564 if (r) { 565 goto out; 566 } 567 out: 568 radeon_cs_parser_fini(&parser, r, true); 569 up_read(&rdev->exclusive_lock); 570 r = radeon_cs_handle_lockup(rdev, r); 571 return r; 572 } 573 574 int radeon_cs_finish_pages(struct radeon_cs_parser *p) 575 { 576 struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; 577 int i; 578 int size = PAGE_SIZE; 579 580 for (i = ibc->last_copied_page + 1; i <= ibc->last_page_index; i++) { 581 if (i == ibc->last_page_index) { 582 size = (ibc->length_dw * 4) % PAGE_SIZE; 583 if (size == 0) 584 size = PAGE_SIZE; 585 } 586 587 if (DRM_COPY_FROM_USER(p->ib.ptr + (i * (PAGE_SIZE/4)), 588 ibc->user_ptr + (i * PAGE_SIZE), 589 size)) 590 return -EFAULT; 591 } 592 return 0; 593 } 594 595 static int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx) 596 { 597 int new_page; 598 struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; 599 int i; 600 int size = PAGE_SIZE; 601 bool copy1 = (p->rdev && (p->rdev->flags & RADEON_IS_AGP)) ? 602 false : true; 603 604 for (i = ibc->last_copied_page + 1; i < pg_idx; i++) { 605 if (DRM_COPY_FROM_USER(p->ib.ptr + (i * (PAGE_SIZE/4)), 606 ibc->user_ptr + (i * PAGE_SIZE), 607 PAGE_SIZE)) { 608 p->parser_error = -EFAULT; 609 return 0; 610 } 611 } 612 613 if (pg_idx == ibc->last_page_index) { 614 size = (ibc->length_dw * 4) % PAGE_SIZE; 615 if (size == 0) 616 size = PAGE_SIZE; 617 } 618 619 new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1; 620 if (copy1) 621 ibc->kpage[new_page] = p->ib.ptr + (pg_idx * (PAGE_SIZE / 4)); 622 623 if (DRM_COPY_FROM_USER(ibc->kpage[new_page], 624 ibc->user_ptr + (pg_idx * PAGE_SIZE), 625 size)) { 626 p->parser_error = -EFAULT; 627 return 0; 628 } 629 630 /* copy to IB for non single case */ 631 if (!copy1) 632 memcpy((void *)(p->ib.ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size); 633 634 ibc->last_copied_page = pg_idx; 635 ibc->kpage_idx[new_page] = pg_idx; 636 637 return new_page; 638 } 639 640 u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) 641 { 642 struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; 643 u32 pg_idx, pg_offset; 644 u32 idx_value = 0; 645 int new_page; 646 647 pg_idx = (idx * 4) / PAGE_SIZE; 648 pg_offset = (idx * 4) % PAGE_SIZE; 649 650 if (ibc->kpage_idx[0] == pg_idx) 651 return ibc->kpage[0][pg_offset/4]; 652 if (ibc->kpage_idx[1] == pg_idx) 653 return ibc->kpage[1][pg_offset/4]; 654 655 new_page = radeon_cs_update_pages(p, pg_idx); 656 if (new_page < 0) { 657 p->parser_error = new_page; 658 return 0; 659 } 660 661 idx_value = ibc->kpage[new_page][pg_offset/4]; 662 return idx_value; 663 } 664 665 /** 666 * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet 667 * @parser: parser structure holding parsing context. 668 * @pkt: where to store packet information 669 * 670 * Assume that chunk_ib_index is properly set. Will return -EINVAL 671 * if packet is bigger than remaining ib size. or if packets is unknown. 672 **/ 673 int radeon_cs_packet_parse(struct radeon_cs_parser *p, 674 struct radeon_cs_packet *pkt, 675 unsigned idx) 676 { 677 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; 678 struct radeon_device *rdev = p->rdev; 679 uint32_t header; 680 681 if (idx >= ib_chunk->length_dw) { 682 DRM_ERROR("Can not parse packet at %d after CS end %d !\n", 683 idx, ib_chunk->length_dw); 684 return -EINVAL; 685 } 686 header = radeon_get_ib_value(p, idx); 687 pkt->idx = idx; 688 pkt->type = RADEON_CP_PACKET_GET_TYPE(header); 689 pkt->count = RADEON_CP_PACKET_GET_COUNT(header); 690 pkt->one_reg_wr = 0; 691 switch (pkt->type) { 692 case RADEON_PACKET_TYPE0: 693 if (rdev->family < CHIP_R600) { 694 pkt->reg = R100_CP_PACKET0_GET_REG(header); 695 pkt->one_reg_wr = 696 RADEON_CP_PACKET0_GET_ONE_REG_WR(header); 697 } else 698 pkt->reg = R600_CP_PACKET0_GET_REG(header); 699 break; 700 case RADEON_PACKET_TYPE3: 701 pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header); 702 break; 703 case RADEON_PACKET_TYPE2: 704 pkt->count = -1; 705 break; 706 default: 707 DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx); 708 return -EINVAL; 709 } 710 if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) { 711 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n", 712 pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw); 713 return -EINVAL; 714 } 715 return 0; 716 } 717 718 /** 719 * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP 720 * @p: structure holding the parser context. 721 * 722 * Check if the next packet is NOP relocation packet3. 723 **/ 724 bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p) 725 { 726 struct radeon_cs_packet p3reloc; 727 int r; 728 729 r = radeon_cs_packet_parse(p, &p3reloc, p->idx); 730 if (r) 731 return false; 732 if (p3reloc.type != RADEON_PACKET_TYPE3) 733 return false; 734 if (p3reloc.opcode != RADEON_PACKET3_NOP) 735 return false; 736 return true; 737 } 738 739 /** 740 * radeon_cs_dump_packet() - dump raw packet context 741 * @p: structure holding the parser context. 742 * @pkt: structure holding the packet. 743 * 744 * Used mostly for debugging and error reporting. 745 **/ 746 void radeon_cs_dump_packet(struct radeon_cs_parser *p, 747 struct radeon_cs_packet *pkt) 748 { 749 volatile uint32_t *ib; 750 unsigned i; 751 unsigned idx; 752 753 ib = p->ib.ptr; 754 idx = pkt->idx; 755 for (i = 0; i <= (pkt->count + 1); i++, idx++) 756 DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]); 757 } 758 759 /** 760 * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet 761 * @parser: parser structure holding parsing context. 762 * @data: pointer to relocation data 763 * @offset_start: starting offset 764 * @offset_mask: offset mask (to align start offset on) 765 * @reloc: reloc informations 766 * 767 * Check if next packet is relocation packet3, do bo validation and compute 768 * GPU offset using the provided start. 769 **/ 770 int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p, 771 struct radeon_cs_reloc **cs_reloc, 772 int nomm) 773 { 774 struct radeon_cs_chunk *relocs_chunk; 775 struct radeon_cs_packet p3reloc; 776 unsigned idx; 777 int r; 778 779 if (p->chunk_relocs_idx == -1) { 780 DRM_ERROR("No relocation chunk !\n"); 781 return -EINVAL; 782 } 783 *cs_reloc = NULL; 784 relocs_chunk = &p->chunks[p->chunk_relocs_idx]; 785 r = radeon_cs_packet_parse(p, &p3reloc, p->idx); 786 if (r) 787 return r; 788 p->idx += p3reloc.count + 2; 789 if (p3reloc.type != RADEON_PACKET_TYPE3 || 790 p3reloc.opcode != RADEON_PACKET3_NOP) { 791 DRM_ERROR("No packet3 for relocation for packet at %d.\n", 792 p3reloc.idx); 793 radeon_cs_dump_packet(p, &p3reloc); 794 return -EINVAL; 795 } 796 idx = radeon_get_ib_value(p, p3reloc.idx + 1); 797 if (idx >= relocs_chunk->length_dw) { 798 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 799 idx, relocs_chunk->length_dw); 800 radeon_cs_dump_packet(p, &p3reloc); 801 return -EINVAL; 802 } 803 /* FIXME: we assume reloc size is 4 dwords */ 804 if (nomm) { 805 *cs_reloc = p->relocs; 806 (*cs_reloc)->lobj.gpu_offset = 807 (u64)relocs_chunk->kdata[idx + 3] << 32; 808 (*cs_reloc)->lobj.gpu_offset |= relocs_chunk->kdata[idx + 0]; 809 } else 810 *cs_reloc = p->relocs_ptr[(idx / 4)]; 811 return 0; 812 } 813