1 /* 2 * Copyright 2008 Jerome Glisse. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Jerome Glisse <glisse@freedesktop.org> 26 */ 27 #include <drm/drmP.h> 28 #include <drm/radeon_drm.h> 29 #include "radeon_reg.h" 30 #include "radeon.h" 31 32 static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) 33 { 34 struct drm_device *ddev = p->rdev->ddev; 35 struct radeon_cs_chunk *chunk; 36 unsigned i, j; 37 bool duplicate; 38 39 if (p->chunk_relocs_idx == -1) { 40 return 0; 41 } 42 chunk = &p->chunks[p->chunk_relocs_idx]; 43 p->dma_reloc_idx = 0; 44 /* FIXME: we assume that each relocs use 4 dwords */ 45 p->nrelocs = chunk->length_dw / 4; 46 p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL); 47 if (p->relocs_ptr == NULL) { 48 return -ENOMEM; 49 } 50 p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_cs_reloc), GFP_KERNEL); 51 if (p->relocs == NULL) { 52 return -ENOMEM; 53 } 54 for (i = 0; i < p->nrelocs; i++) { 55 struct drm_radeon_cs_reloc *r; 56 57 duplicate = false; 58 r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4]; 59 for (j = 0; j < i; j++) { 60 if (r->handle == p->relocs[j].handle) { 61 p->relocs_ptr[i] = &p->relocs[j]; 62 duplicate = true; 63 break; 64 } 65 } 66 if (duplicate) { 67 p->relocs[i].handle = 0; 68 continue; 69 } 70 71 p->relocs[i].gobj = drm_gem_object_lookup(ddev, p->filp, 72 r->handle); 73 if (p->relocs[i].gobj == NULL) { 74 DRM_ERROR("gem object lookup failed 0x%x\n", 75 r->handle); 76 return -ENOENT; 77 } 78 p->relocs_ptr[i] = &p->relocs[i]; 79 p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj); 80 p->relocs[i].lobj.bo = p->relocs[i].robj; 81 p->relocs[i].lobj.written = !!r->write_domain; 82 83 /* the first reloc of an UVD job is the 84 msg and that must be in VRAM */ 85 if (p->ring == R600_RING_TYPE_UVD_INDEX && i == 0) { 86 /* TODO: is this still needed for NI+ ? */ 87 p->relocs[i].lobj.domain = 88 RADEON_GEM_DOMAIN_VRAM; 89 90 p->relocs[i].lobj.alt_domain = 91 RADEON_GEM_DOMAIN_VRAM; 92 93 } else { 94 uint32_t domain = r->write_domain ? 95 r->write_domain : r->read_domains; 96 97 p->relocs[i].lobj.domain = domain; 98 if (domain == RADEON_GEM_DOMAIN_VRAM) 99 domain |= RADEON_GEM_DOMAIN_GTT; 100 p->relocs[i].lobj.alt_domain = domain; 101 } 102 103 p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo; 104 p->relocs[i].handle = r->handle; 105 106 radeon_bo_list_add_object(&p->relocs[i].lobj, 107 &p->validated); 108 } 109 return radeon_bo_list_validate(&p->validated, p->ring); 110 } 111 112 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) 113 { 114 p->priority = priority; 115 116 switch (ring) { 117 default: 118 DRM_ERROR("unknown ring id: %d\n", ring); 119 return -EINVAL; 120 case RADEON_CS_RING_GFX: 121 p->ring = RADEON_RING_TYPE_GFX_INDEX; 122 break; 123 case RADEON_CS_RING_COMPUTE: 124 if (p->rdev->family >= CHIP_TAHITI) { 125 if (p->priority > 0) 126 p->ring = CAYMAN_RING_TYPE_CP1_INDEX; 127 else 128 p->ring = CAYMAN_RING_TYPE_CP2_INDEX; 129 } else 130 p->ring = RADEON_RING_TYPE_GFX_INDEX; 131 break; 132 case RADEON_CS_RING_DMA: 133 if (p->rdev->family >= CHIP_CAYMAN) { 134 if (p->priority > 0) 135 p->ring = R600_RING_TYPE_DMA_INDEX; 136 else 137 p->ring = CAYMAN_RING_TYPE_DMA1_INDEX; 138 } else if (p->rdev->family >= CHIP_R600) { 139 p->ring = R600_RING_TYPE_DMA_INDEX; 140 } else { 141 return -EINVAL; 142 } 143 break; 144 case RADEON_CS_RING_UVD: 145 p->ring = R600_RING_TYPE_UVD_INDEX; 146 break; 147 } 148 return 0; 149 } 150 151 static void radeon_cs_sync_rings(struct radeon_cs_parser *p) 152 { 153 int i; 154 155 for (i = 0; i < p->nrelocs; i++) { 156 if (!p->relocs[i].robj) 157 continue; 158 159 radeon_ib_sync_to(&p->ib, p->relocs[i].robj->tbo.sync_obj); 160 } 161 } 162 163 /* XXX: note that this is called from the legacy UMS CS ioctl as well */ 164 int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) 165 { 166 struct drm_radeon_cs *cs = data; 167 uint64_t *chunk_array_ptr; 168 unsigned size, i; 169 u32 ring = RADEON_CS_RING_GFX; 170 s32 priority = 0; 171 172 if (!cs->num_chunks) { 173 return 0; 174 } 175 /* get chunks */ 176 INIT_LIST_HEAD(&p->validated); 177 p->idx = 0; 178 p->ib.sa_bo = NULL; 179 p->ib.semaphore = NULL; 180 p->const_ib.sa_bo = NULL; 181 p->const_ib.semaphore = NULL; 182 p->chunk_ib_idx = -1; 183 p->chunk_relocs_idx = -1; 184 p->chunk_flags_idx = -1; 185 p->chunk_const_ib_idx = -1; 186 p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL); 187 if (p->chunks_array == NULL) { 188 return -ENOMEM; 189 } 190 chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks); 191 if (DRM_COPY_FROM_USER(p->chunks_array, chunk_array_ptr, 192 sizeof(uint64_t)*cs->num_chunks)) { 193 return -EFAULT; 194 } 195 p->cs_flags = 0; 196 p->nchunks = cs->num_chunks; 197 p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL); 198 if (p->chunks == NULL) { 199 return -ENOMEM; 200 } 201 for (i = 0; i < p->nchunks; i++) { 202 struct drm_radeon_cs_chunk __user **chunk_ptr = NULL; 203 struct drm_radeon_cs_chunk user_chunk; 204 uint32_t __user *cdata; 205 206 chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i]; 207 if (DRM_COPY_FROM_USER(&user_chunk, chunk_ptr, 208 sizeof(struct drm_radeon_cs_chunk))) { 209 return -EFAULT; 210 } 211 p->chunks[i].length_dw = user_chunk.length_dw; 212 p->chunks[i].kdata = NULL; 213 p->chunks[i].chunk_id = user_chunk.chunk_id; 214 p->chunks[i].user_ptr = (void __user *)(unsigned long)user_chunk.chunk_data; 215 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) { 216 p->chunk_relocs_idx = i; 217 } 218 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) { 219 p->chunk_ib_idx = i; 220 /* zero length IB isn't useful */ 221 if (p->chunks[i].length_dw == 0) 222 return -EINVAL; 223 } 224 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB) { 225 p->chunk_const_ib_idx = i; 226 /* zero length CONST IB isn't useful */ 227 if (p->chunks[i].length_dw == 0) 228 return -EINVAL; 229 } 230 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { 231 p->chunk_flags_idx = i; 232 /* zero length flags aren't useful */ 233 if (p->chunks[i].length_dw == 0) 234 return -EINVAL; 235 } 236 237 cdata = (uint32_t *)(unsigned long)user_chunk.chunk_data; 238 if ((p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) || 239 (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS)) { 240 size = p->chunks[i].length_dw * sizeof(uint32_t); 241 p->chunks[i].kdata = kmalloc(size, GFP_KERNEL); 242 if (p->chunks[i].kdata == NULL) { 243 return -ENOMEM; 244 } 245 if (DRM_COPY_FROM_USER(p->chunks[i].kdata, 246 p->chunks[i].user_ptr, size)) { 247 return -EFAULT; 248 } 249 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { 250 p->cs_flags = p->chunks[i].kdata[0]; 251 if (p->chunks[i].length_dw > 1) 252 ring = p->chunks[i].kdata[1]; 253 if (p->chunks[i].length_dw > 2) 254 priority = (s32)p->chunks[i].kdata[2]; 255 } 256 } 257 } 258 259 /* these are KMS only */ 260 if (p->rdev) { 261 if ((p->cs_flags & RADEON_CS_USE_VM) && 262 !p->rdev->vm_manager.enabled) { 263 DRM_ERROR("VM not active on asic!\n"); 264 return -EINVAL; 265 } 266 267 if (radeon_cs_get_ring(p, ring, priority)) 268 return -EINVAL; 269 270 /* we only support VM on some SI+ rings */ 271 if ((p->rdev->asic->ring[p->ring].cs_parse == NULL) && 272 ((p->cs_flags & RADEON_CS_USE_VM) == 0)) { 273 DRM_ERROR("Ring %d requires VM!\n", p->ring); 274 return -EINVAL; 275 } 276 } 277 278 /* deal with non-vm */ 279 if ((p->chunk_ib_idx != -1) && 280 ((p->cs_flags & RADEON_CS_USE_VM) == 0) && 281 (p->chunks[p->chunk_ib_idx].chunk_id == RADEON_CHUNK_ID_IB)) { 282 if (p->chunks[p->chunk_ib_idx].length_dw > (16 * 1024)) { 283 DRM_ERROR("cs IB too big: %d\n", 284 p->chunks[p->chunk_ib_idx].length_dw); 285 return -EINVAL; 286 } 287 if (p->rdev && (p->rdev->flags & RADEON_IS_AGP)) { 288 p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL); 289 p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL); 290 if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL || 291 p->chunks[p->chunk_ib_idx].kpage[1] == NULL) { 292 kfree(p->chunks[p->chunk_ib_idx].kpage[0]); 293 kfree(p->chunks[p->chunk_ib_idx].kpage[1]); 294 p->chunks[p->chunk_ib_idx].kpage[0] = NULL; 295 p->chunks[p->chunk_ib_idx].kpage[1] = NULL; 296 return -ENOMEM; 297 } 298 } 299 p->chunks[p->chunk_ib_idx].kpage_idx[0] = -1; 300 p->chunks[p->chunk_ib_idx].kpage_idx[1] = -1; 301 p->chunks[p->chunk_ib_idx].last_copied_page = -1; 302 p->chunks[p->chunk_ib_idx].last_page_index = 303 ((p->chunks[p->chunk_ib_idx].length_dw * 4) - 1) / PAGE_SIZE; 304 } 305 306 return 0; 307 } 308 309 /** 310 * cs_parser_fini() - clean parser states 311 * @parser: parser structure holding parsing context. 312 * @error: error number 313 * 314 * If error is set than unvalidate buffer, otherwise just free memory 315 * used by parsing context. 316 **/ 317 static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error) 318 { 319 unsigned i; 320 321 if (!error) { 322 ttm_eu_fence_buffer_objects(&parser->validated, 323 parser->ib.fence); 324 } else { 325 ttm_eu_backoff_reservation(&parser->validated); 326 } 327 328 if (parser->relocs != NULL) { 329 for (i = 0; i < parser->nrelocs; i++) { 330 if (parser->relocs[i].gobj) 331 drm_gem_object_unreference_unlocked(parser->relocs[i].gobj); 332 } 333 } 334 kfree(parser->track); 335 kfree(parser->relocs); 336 kfree(parser->relocs_ptr); 337 for (i = 0; i < parser->nchunks; i++) { 338 kfree(parser->chunks[i].kdata); 339 if ((parser->rdev->flags & RADEON_IS_AGP)) { 340 kfree(parser->chunks[i].kpage[0]); 341 kfree(parser->chunks[i].kpage[1]); 342 } 343 } 344 kfree(parser->chunks); 345 kfree(parser->chunks_array); 346 radeon_ib_free(parser->rdev, &parser->ib); 347 radeon_ib_free(parser->rdev, &parser->const_ib); 348 } 349 350 static int radeon_cs_ib_chunk(struct radeon_device *rdev, 351 struct radeon_cs_parser *parser) 352 { 353 struct radeon_cs_chunk *ib_chunk; 354 int r; 355 356 if (parser->chunk_ib_idx == -1) 357 return 0; 358 359 if (parser->cs_flags & RADEON_CS_USE_VM) 360 return 0; 361 362 ib_chunk = &parser->chunks[parser->chunk_ib_idx]; 363 /* Copy the packet into the IB, the parser will read from the 364 * input memory (cached) and write to the IB (which can be 365 * uncached). 366 */ 367 r = radeon_ib_get(rdev, parser->ring, &parser->ib, 368 NULL, ib_chunk->length_dw * 4); 369 if (r) { 370 DRM_ERROR("Failed to get ib !\n"); 371 return r; 372 } 373 parser->ib.length_dw = ib_chunk->length_dw; 374 r = radeon_cs_parse(rdev, parser->ring, parser); 375 if (r || parser->parser_error) { 376 DRM_ERROR("Invalid command stream !\n"); 377 return r; 378 } 379 r = radeon_cs_finish_pages(parser); 380 if (r) { 381 DRM_ERROR("Invalid command stream !\n"); 382 return r; 383 } 384 radeon_cs_sync_rings(parser); 385 r = radeon_ib_schedule(rdev, &parser->ib, NULL); 386 if (r) { 387 DRM_ERROR("Failed to schedule IB !\n"); 388 } 389 return r; 390 } 391 392 static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser, 393 struct radeon_vm *vm) 394 { 395 struct radeon_device *rdev = parser->rdev; 396 struct radeon_bo_list *lobj; 397 struct radeon_bo *bo; 398 int r; 399 400 r = radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo, &rdev->ring_tmp_bo.bo->tbo.mem); 401 if (r) { 402 return r; 403 } 404 list_for_each_entry(lobj, &parser->validated, tv.head) { 405 bo = lobj->bo; 406 r = radeon_vm_bo_update_pte(parser->rdev, vm, bo, &bo->tbo.mem); 407 if (r) { 408 return r; 409 } 410 } 411 return 0; 412 } 413 414 static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, 415 struct radeon_cs_parser *parser) 416 { 417 struct radeon_cs_chunk *ib_chunk; 418 struct radeon_fpriv *fpriv = parser->filp->driver_priv; 419 struct radeon_vm *vm = &fpriv->vm; 420 int r; 421 422 if (parser->chunk_ib_idx == -1) 423 return 0; 424 if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) 425 return 0; 426 427 if ((rdev->family >= CHIP_TAHITI) && 428 (parser->chunk_const_ib_idx != -1)) { 429 ib_chunk = &parser->chunks[parser->chunk_const_ib_idx]; 430 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { 431 DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw); 432 return -EINVAL; 433 } 434 r = radeon_ib_get(rdev, parser->ring, &parser->const_ib, 435 vm, ib_chunk->length_dw * 4); 436 if (r) { 437 DRM_ERROR("Failed to get const ib !\n"); 438 return r; 439 } 440 parser->const_ib.is_const_ib = true; 441 parser->const_ib.length_dw = ib_chunk->length_dw; 442 /* Copy the packet into the IB */ 443 if (DRM_COPY_FROM_USER(parser->const_ib.ptr, ib_chunk->user_ptr, 444 ib_chunk->length_dw * 4)) { 445 return -EFAULT; 446 } 447 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib); 448 if (r) { 449 return r; 450 } 451 } 452 453 ib_chunk = &parser->chunks[parser->chunk_ib_idx]; 454 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { 455 DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw); 456 return -EINVAL; 457 } 458 r = radeon_ib_get(rdev, parser->ring, &parser->ib, 459 vm, ib_chunk->length_dw * 4); 460 if (r) { 461 DRM_ERROR("Failed to get ib !\n"); 462 return r; 463 } 464 parser->ib.length_dw = ib_chunk->length_dw; 465 /* Copy the packet into the IB */ 466 if (DRM_COPY_FROM_USER(parser->ib.ptr, ib_chunk->user_ptr, 467 ib_chunk->length_dw * 4)) { 468 return -EFAULT; 469 } 470 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib); 471 if (r) { 472 return r; 473 } 474 475 mutex_lock(&rdev->vm_manager.lock); 476 mutex_lock(&vm->mutex); 477 r = radeon_vm_alloc_pt(rdev, vm); 478 if (r) { 479 goto out; 480 } 481 r = radeon_bo_vm_update_pte(parser, vm); 482 if (r) { 483 goto out; 484 } 485 radeon_cs_sync_rings(parser); 486 radeon_ib_sync_to(&parser->ib, vm->fence); 487 radeon_ib_sync_to(&parser->ib, radeon_vm_grab_id( 488 rdev, vm, parser->ring)); 489 490 if ((rdev->family >= CHIP_TAHITI) && 491 (parser->chunk_const_ib_idx != -1)) { 492 r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib); 493 } else { 494 r = radeon_ib_schedule(rdev, &parser->ib, NULL); 495 } 496 497 if (!r) { 498 radeon_vm_fence(rdev, vm, parser->ib.fence); 499 } 500 501 out: 502 radeon_vm_add_to_lru(rdev, vm); 503 mutex_unlock(&vm->mutex); 504 mutex_unlock(&rdev->vm_manager.lock); 505 return r; 506 } 507 508 static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r) 509 { 510 if (r == -EDEADLK) { 511 r = radeon_gpu_reset(rdev); 512 if (!r) 513 r = -EAGAIN; 514 } 515 return r; 516 } 517 518 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 519 { 520 struct radeon_device *rdev = dev->dev_private; 521 struct radeon_cs_parser parser; 522 int r; 523 524 down_read(&rdev->exclusive_lock); 525 if (!rdev->accel_working) { 526 up_read(&rdev->exclusive_lock); 527 return -EBUSY; 528 } 529 /* initialize parser */ 530 memset(&parser, 0, sizeof(struct radeon_cs_parser)); 531 parser.filp = filp; 532 parser.rdev = rdev; 533 parser.dev = rdev->dev; 534 parser.family = rdev->family; 535 r = radeon_cs_parser_init(&parser, data); 536 if (r) { 537 DRM_ERROR("Failed to initialize parser !\n"); 538 radeon_cs_parser_fini(&parser, r); 539 up_read(&rdev->exclusive_lock); 540 r = radeon_cs_handle_lockup(rdev, r); 541 return r; 542 } 543 r = radeon_cs_parser_relocs(&parser); 544 if (r) { 545 if (r != -ERESTARTSYS) 546 DRM_ERROR("Failed to parse relocation %d!\n", r); 547 radeon_cs_parser_fini(&parser, r); 548 up_read(&rdev->exclusive_lock); 549 r = radeon_cs_handle_lockup(rdev, r); 550 return r; 551 } 552 553 if (parser.ring == R600_RING_TYPE_UVD_INDEX) 554 radeon_uvd_note_usage(rdev); 555 556 r = radeon_cs_ib_chunk(rdev, &parser); 557 if (r) { 558 goto out; 559 } 560 r = radeon_cs_ib_vm_chunk(rdev, &parser); 561 if (r) { 562 goto out; 563 } 564 out: 565 radeon_cs_parser_fini(&parser, r); 566 up_read(&rdev->exclusive_lock); 567 r = radeon_cs_handle_lockup(rdev, r); 568 return r; 569 } 570 571 int radeon_cs_finish_pages(struct radeon_cs_parser *p) 572 { 573 struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; 574 int i; 575 int size = PAGE_SIZE; 576 577 for (i = ibc->last_copied_page + 1; i <= ibc->last_page_index; i++) { 578 if (i == ibc->last_page_index) { 579 size = (ibc->length_dw * 4) % PAGE_SIZE; 580 if (size == 0) 581 size = PAGE_SIZE; 582 } 583 584 if (DRM_COPY_FROM_USER(p->ib.ptr + (i * (PAGE_SIZE/4)), 585 ibc->user_ptr + (i * PAGE_SIZE), 586 size)) 587 return -EFAULT; 588 } 589 return 0; 590 } 591 592 static int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx) 593 { 594 int new_page; 595 struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; 596 int i; 597 int size = PAGE_SIZE; 598 bool copy1 = (p->rdev && (p->rdev->flags & RADEON_IS_AGP)) ? 599 false : true; 600 601 for (i = ibc->last_copied_page + 1; i < pg_idx; i++) { 602 if (DRM_COPY_FROM_USER(p->ib.ptr + (i * (PAGE_SIZE/4)), 603 ibc->user_ptr + (i * PAGE_SIZE), 604 PAGE_SIZE)) { 605 p->parser_error = -EFAULT; 606 return 0; 607 } 608 } 609 610 if (pg_idx == ibc->last_page_index) { 611 size = (ibc->length_dw * 4) % PAGE_SIZE; 612 if (size == 0) 613 size = PAGE_SIZE; 614 } 615 616 new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1; 617 if (copy1) 618 ibc->kpage[new_page] = p->ib.ptr + (pg_idx * (PAGE_SIZE / 4)); 619 620 if (DRM_COPY_FROM_USER(ibc->kpage[new_page], 621 ibc->user_ptr + (pg_idx * PAGE_SIZE), 622 size)) { 623 p->parser_error = -EFAULT; 624 return 0; 625 } 626 627 /* copy to IB for non single case */ 628 if (!copy1) 629 memcpy((void *)(p->ib.ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size); 630 631 ibc->last_copied_page = pg_idx; 632 ibc->kpage_idx[new_page] = pg_idx; 633 634 return new_page; 635 } 636 637 u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) 638 { 639 struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; 640 u32 pg_idx, pg_offset; 641 u32 idx_value = 0; 642 int new_page; 643 644 pg_idx = (idx * 4) / PAGE_SIZE; 645 pg_offset = (idx * 4) % PAGE_SIZE; 646 647 if (ibc->kpage_idx[0] == pg_idx) 648 return ibc->kpage[0][pg_offset/4]; 649 if (ibc->kpage_idx[1] == pg_idx) 650 return ibc->kpage[1][pg_offset/4]; 651 652 new_page = radeon_cs_update_pages(p, pg_idx); 653 if (new_page < 0) { 654 p->parser_error = new_page; 655 return 0; 656 } 657 658 idx_value = ibc->kpage[new_page][pg_offset/4]; 659 return idx_value; 660 } 661 662 /** 663 * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet 664 * @parser: parser structure holding parsing context. 665 * @pkt: where to store packet information 666 * 667 * Assume that chunk_ib_index is properly set. Will return -EINVAL 668 * if packet is bigger than remaining ib size. or if packets is unknown. 669 **/ 670 int radeon_cs_packet_parse(struct radeon_cs_parser *p, 671 struct radeon_cs_packet *pkt, 672 unsigned idx) 673 { 674 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; 675 struct radeon_device *rdev = p->rdev; 676 uint32_t header; 677 678 if (idx >= ib_chunk->length_dw) { 679 DRM_ERROR("Can not parse packet at %d after CS end %d !\n", 680 idx, ib_chunk->length_dw); 681 return -EINVAL; 682 } 683 header = radeon_get_ib_value(p, idx); 684 pkt->idx = idx; 685 pkt->type = RADEON_CP_PACKET_GET_TYPE(header); 686 pkt->count = RADEON_CP_PACKET_GET_COUNT(header); 687 pkt->one_reg_wr = 0; 688 switch (pkt->type) { 689 case RADEON_PACKET_TYPE0: 690 if (rdev->family < CHIP_R600) { 691 pkt->reg = R100_CP_PACKET0_GET_REG(header); 692 pkt->one_reg_wr = 693 RADEON_CP_PACKET0_GET_ONE_REG_WR(header); 694 } else 695 pkt->reg = R600_CP_PACKET0_GET_REG(header); 696 break; 697 case RADEON_PACKET_TYPE3: 698 pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header); 699 break; 700 case RADEON_PACKET_TYPE2: 701 pkt->count = -1; 702 break; 703 default: 704 DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx); 705 return -EINVAL; 706 } 707 if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) { 708 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n", 709 pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw); 710 return -EINVAL; 711 } 712 return 0; 713 } 714 715 /** 716 * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP 717 * @p: structure holding the parser context. 718 * 719 * Check if the next packet is NOP relocation packet3. 720 **/ 721 bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p) 722 { 723 struct radeon_cs_packet p3reloc; 724 int r; 725 726 r = radeon_cs_packet_parse(p, &p3reloc, p->idx); 727 if (r) 728 return false; 729 if (p3reloc.type != RADEON_PACKET_TYPE3) 730 return false; 731 if (p3reloc.opcode != RADEON_PACKET3_NOP) 732 return false; 733 return true; 734 } 735 736 /** 737 * radeon_cs_dump_packet() - dump raw packet context 738 * @p: structure holding the parser context. 739 * @pkt: structure holding the packet. 740 * 741 * Used mostly for debugging and error reporting. 742 **/ 743 void radeon_cs_dump_packet(struct radeon_cs_parser *p, 744 struct radeon_cs_packet *pkt) 745 { 746 volatile uint32_t *ib; 747 unsigned i; 748 unsigned idx; 749 750 ib = p->ib.ptr; 751 idx = pkt->idx; 752 for (i = 0; i <= (pkt->count + 1); i++, idx++) 753 DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]); 754 } 755 756 /** 757 * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet 758 * @parser: parser structure holding parsing context. 759 * @data: pointer to relocation data 760 * @offset_start: starting offset 761 * @offset_mask: offset mask (to align start offset on) 762 * @reloc: reloc informations 763 * 764 * Check if next packet is relocation packet3, do bo validation and compute 765 * GPU offset using the provided start. 766 **/ 767 int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p, 768 struct radeon_cs_reloc **cs_reloc, 769 int nomm) 770 { 771 struct radeon_cs_chunk *relocs_chunk; 772 struct radeon_cs_packet p3reloc; 773 unsigned idx; 774 int r; 775 776 if (p->chunk_relocs_idx == -1) { 777 DRM_ERROR("No relocation chunk !\n"); 778 return -EINVAL; 779 } 780 *cs_reloc = NULL; 781 relocs_chunk = &p->chunks[p->chunk_relocs_idx]; 782 r = radeon_cs_packet_parse(p, &p3reloc, p->idx); 783 if (r) 784 return r; 785 p->idx += p3reloc.count + 2; 786 if (p3reloc.type != RADEON_PACKET_TYPE3 || 787 p3reloc.opcode != RADEON_PACKET3_NOP) { 788 DRM_ERROR("No packet3 for relocation for packet at %d.\n", 789 p3reloc.idx); 790 radeon_cs_dump_packet(p, &p3reloc); 791 return -EINVAL; 792 } 793 idx = radeon_get_ib_value(p, p3reloc.idx + 1); 794 if (idx >= relocs_chunk->length_dw) { 795 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 796 idx, relocs_chunk->length_dw); 797 radeon_cs_dump_packet(p, &p3reloc); 798 return -EINVAL; 799 } 800 /* FIXME: we assume reloc size is 4 dwords */ 801 if (nomm) { 802 *cs_reloc = p->relocs; 803 (*cs_reloc)->lobj.gpu_offset = 804 (u64)relocs_chunk->kdata[idx + 3] << 32; 805 (*cs_reloc)->lobj.gpu_offset |= relocs_chunk->kdata[idx + 0]; 806 } else 807 *cs_reloc = p->relocs_ptr[(idx / 4)]; 808 return 0; 809 } 810