1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include "drmP.h" 29 #include "drm.h" 30 #include "radeon_drm.h" 31 #include "radeon_reg.h" 32 #include "radeon.h" 33 #include "radeon_asic.h" 34 35 #include "r100d.h" 36 #include "r200_reg_safe.h" 37 38 #include "r100_track.h" 39 40 static int r200_get_vtx_size_0(uint32_t vtx_fmt_0) 41 { 42 int vtx_size, i; 43 vtx_size = 2; 44 45 if (vtx_fmt_0 & R200_VTX_Z0) 46 vtx_size++; 47 if (vtx_fmt_0 & R200_VTX_W0) 48 vtx_size++; 49 /* blend weight */ 50 if (vtx_fmt_0 & (0x7 << R200_VTX_WEIGHT_COUNT_SHIFT)) 51 vtx_size += (vtx_fmt_0 >> R200_VTX_WEIGHT_COUNT_SHIFT) & 0x7; 52 if (vtx_fmt_0 & R200_VTX_PV_MATRIX_SEL) 53 vtx_size++; 54 if (vtx_fmt_0 & R200_VTX_N0) 55 vtx_size += 3; 56 if (vtx_fmt_0 & R200_VTX_POINT_SIZE) 57 vtx_size++; 58 if (vtx_fmt_0 & R200_VTX_DISCRETE_FOG) 59 vtx_size++; 60 if (vtx_fmt_0 & R200_VTX_SHININESS_0) 61 vtx_size++; 62 if (vtx_fmt_0 & R200_VTX_SHININESS_1) 63 vtx_size++; 64 for (i = 0; i < 8; i++) { 65 int color_size = (vtx_fmt_0 >> (11 + 2*i)) & 0x3; 66 switch (color_size) { 67 case 0: break; 68 case 1: vtx_size++; break; 69 case 2: vtx_size += 3; break; 70 case 3: vtx_size += 4; break; 71 } 72 } 73 if (vtx_fmt_0 & R200_VTX_XY1) 74 vtx_size += 2; 75 if (vtx_fmt_0 & R200_VTX_Z1) 76 vtx_size++; 77 if (vtx_fmt_0 & R200_VTX_W1) 78 vtx_size++; 79 if (vtx_fmt_0 & R200_VTX_N1) 80 vtx_size += 3; 81 return vtx_size; 82 } 83 84 int r200_copy_dma(struct radeon_device *rdev, 85 uint64_t src_offset, 86 uint64_t dst_offset, 87 unsigned num_pages, 88 struct radeon_fence *fence) 89 { 90 uint32_t size; 91 uint32_t cur_size; 92 int i, num_loops; 93 int r = 0; 94 95 /* radeon pitch is /64 */ 96 size = num_pages << PAGE_SHIFT; 97 num_loops = DIV_ROUND_UP(size, 0x1FFFFF); 98 r = radeon_ring_lock(rdev, num_loops * 4 + 64); 99 if (r) { 100 DRM_ERROR("radeon: moving bo (%d).\n", r); 101 return r; 102 } 103 /* Must wait for 2D idle & clean before DMA or hangs might happen */ 104 radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0)); 105 radeon_ring_write(rdev, (1 << 16)); 106 for (i = 0; i < num_loops; i++) { 107 cur_size = size; 108 if (cur_size > 0x1FFFFF) { 109 cur_size = 0x1FFFFF; 110 } 111 size -= cur_size; 112 radeon_ring_write(rdev, PACKET0(0x720, 2)); 113 radeon_ring_write(rdev, src_offset); 114 radeon_ring_write(rdev, dst_offset); 115 radeon_ring_write(rdev, cur_size | (1 << 31) | (1 << 30)); 116 src_offset += cur_size; 117 dst_offset += cur_size; 118 } 119 radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0)); 120 radeon_ring_write(rdev, RADEON_WAIT_DMA_GUI_IDLE); 121 if (fence) { 122 r = radeon_fence_emit(rdev, fence); 123 } 124 radeon_ring_unlock_commit(rdev); 125 return r; 126 } 127 128 129 static int r200_get_vtx_size_1(uint32_t vtx_fmt_1) 130 { 131 int vtx_size, i, tex_size; 132 vtx_size = 0; 133 for (i = 0; i < 6; i++) { 134 tex_size = (vtx_fmt_1 >> (i * 3)) & 0x7; 135 if (tex_size > 4) 136 continue; 137 vtx_size += tex_size; 138 } 139 return vtx_size; 140 } 141 142 int r200_packet0_check(struct radeon_cs_parser *p, 143 struct radeon_cs_packet *pkt, 144 unsigned idx, unsigned reg) 145 { 146 struct radeon_cs_reloc *reloc; 147 struct r100_cs_track *track; 148 volatile uint32_t *ib; 149 uint32_t tmp; 150 int r; 151 int i; 152 int face; 153 u32 tile_flags = 0; 154 u32 idx_value; 155 156 ib = p->ib->ptr; 157 track = (struct r100_cs_track *)p->track; 158 idx_value = radeon_get_ib_value(p, idx); 159 switch (reg) { 160 case RADEON_CRTC_GUI_TRIG_VLINE: 161 r = r100_cs_packet_parse_vline(p); 162 if (r) { 163 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 164 idx, reg); 165 r100_cs_dump_packet(p, pkt); 166 return r; 167 } 168 break; 169 /* FIXME: only allow PACKET3 blit? easier to check for out of 170 * range access */ 171 case RADEON_DST_PITCH_OFFSET: 172 case RADEON_SRC_PITCH_OFFSET: 173 r = r100_reloc_pitch_offset(p, pkt, idx, reg); 174 if (r) 175 return r; 176 break; 177 case RADEON_RB3D_DEPTHOFFSET: 178 r = r100_cs_packet_next_reloc(p, &reloc); 179 if (r) { 180 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 181 idx, reg); 182 r100_cs_dump_packet(p, pkt); 183 return r; 184 } 185 track->zb.robj = reloc->robj; 186 track->zb.offset = idx_value; 187 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 188 break; 189 case RADEON_RB3D_COLOROFFSET: 190 r = r100_cs_packet_next_reloc(p, &reloc); 191 if (r) { 192 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 193 idx, reg); 194 r100_cs_dump_packet(p, pkt); 195 return r; 196 } 197 track->cb[0].robj = reloc->robj; 198 track->cb[0].offset = idx_value; 199 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 200 break; 201 case R200_PP_TXOFFSET_0: 202 case R200_PP_TXOFFSET_1: 203 case R200_PP_TXOFFSET_2: 204 case R200_PP_TXOFFSET_3: 205 case R200_PP_TXOFFSET_4: 206 case R200_PP_TXOFFSET_5: 207 i = (reg - R200_PP_TXOFFSET_0) / 24; 208 r = r100_cs_packet_next_reloc(p, &reloc); 209 if (r) { 210 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 211 idx, reg); 212 r100_cs_dump_packet(p, pkt); 213 return r; 214 } 215 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 216 track->textures[i].robj = reloc->robj; 217 break; 218 case R200_PP_CUBIC_OFFSET_F1_0: 219 case R200_PP_CUBIC_OFFSET_F2_0: 220 case R200_PP_CUBIC_OFFSET_F3_0: 221 case R200_PP_CUBIC_OFFSET_F4_0: 222 case R200_PP_CUBIC_OFFSET_F5_0: 223 case R200_PP_CUBIC_OFFSET_F1_1: 224 case R200_PP_CUBIC_OFFSET_F2_1: 225 case R200_PP_CUBIC_OFFSET_F3_1: 226 case R200_PP_CUBIC_OFFSET_F4_1: 227 case R200_PP_CUBIC_OFFSET_F5_1: 228 case R200_PP_CUBIC_OFFSET_F1_2: 229 case R200_PP_CUBIC_OFFSET_F2_2: 230 case R200_PP_CUBIC_OFFSET_F3_2: 231 case R200_PP_CUBIC_OFFSET_F4_2: 232 case R200_PP_CUBIC_OFFSET_F5_2: 233 case R200_PP_CUBIC_OFFSET_F1_3: 234 case R200_PP_CUBIC_OFFSET_F2_3: 235 case R200_PP_CUBIC_OFFSET_F3_3: 236 case R200_PP_CUBIC_OFFSET_F4_3: 237 case R200_PP_CUBIC_OFFSET_F5_3: 238 case R200_PP_CUBIC_OFFSET_F1_4: 239 case R200_PP_CUBIC_OFFSET_F2_4: 240 case R200_PP_CUBIC_OFFSET_F3_4: 241 case R200_PP_CUBIC_OFFSET_F4_4: 242 case R200_PP_CUBIC_OFFSET_F5_4: 243 case R200_PP_CUBIC_OFFSET_F1_5: 244 case R200_PP_CUBIC_OFFSET_F2_5: 245 case R200_PP_CUBIC_OFFSET_F3_5: 246 case R200_PP_CUBIC_OFFSET_F4_5: 247 case R200_PP_CUBIC_OFFSET_F5_5: 248 i = (reg - R200_PP_TXOFFSET_0) / 24; 249 face = (reg - ((i * 24) + R200_PP_TXOFFSET_0)) / 4; 250 r = r100_cs_packet_next_reloc(p, &reloc); 251 if (r) { 252 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 253 idx, reg); 254 r100_cs_dump_packet(p, pkt); 255 return r; 256 } 257 track->textures[i].cube_info[face - 1].offset = idx_value; 258 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 259 track->textures[i].cube_info[face - 1].robj = reloc->robj; 260 break; 261 case RADEON_RE_WIDTH_HEIGHT: 262 track->maxy = ((idx_value >> 16) & 0x7FF); 263 break; 264 case RADEON_RB3D_COLORPITCH: 265 r = r100_cs_packet_next_reloc(p, &reloc); 266 if (r) { 267 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 268 idx, reg); 269 r100_cs_dump_packet(p, pkt); 270 return r; 271 } 272 273 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 274 tile_flags |= RADEON_COLOR_TILE_ENABLE; 275 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 276 tile_flags |= RADEON_COLOR_MICROTILE_ENABLE; 277 278 tmp = idx_value & ~(0x7 << 16); 279 tmp |= tile_flags; 280 ib[idx] = tmp; 281 282 track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK; 283 break; 284 case RADEON_RB3D_DEPTHPITCH: 285 track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK; 286 break; 287 case RADEON_RB3D_CNTL: 288 switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) { 289 case 7: 290 case 8: 291 case 9: 292 case 11: 293 case 12: 294 track->cb[0].cpp = 1; 295 break; 296 case 3: 297 case 4: 298 case 15: 299 track->cb[0].cpp = 2; 300 break; 301 case 6: 302 track->cb[0].cpp = 4; 303 break; 304 default: 305 DRM_ERROR("Invalid color buffer format (%d) !\n", 306 ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f)); 307 return -EINVAL; 308 } 309 if (idx_value & RADEON_DEPTHXY_OFFSET_ENABLE) { 310 DRM_ERROR("No support for depth xy offset in kms\n"); 311 return -EINVAL; 312 } 313 314 track->z_enabled = !!(idx_value & RADEON_Z_ENABLE); 315 break; 316 case RADEON_RB3D_ZSTENCILCNTL: 317 switch (idx_value & 0xf) { 318 case 0: 319 track->zb.cpp = 2; 320 break; 321 case 2: 322 case 3: 323 case 4: 324 case 5: 325 case 9: 326 case 11: 327 track->zb.cpp = 4; 328 break; 329 default: 330 break; 331 } 332 break; 333 case RADEON_RB3D_ZPASS_ADDR: 334 r = r100_cs_packet_next_reloc(p, &reloc); 335 if (r) { 336 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 337 idx, reg); 338 r100_cs_dump_packet(p, pkt); 339 return r; 340 } 341 ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); 342 break; 343 case RADEON_PP_CNTL: 344 { 345 uint32_t temp = idx_value >> 4; 346 for (i = 0; i < track->num_texture; i++) 347 track->textures[i].enabled = !!(temp & (1 << i)); 348 } 349 break; 350 case RADEON_SE_VF_CNTL: 351 track->vap_vf_cntl = idx_value; 352 break; 353 case 0x210c: 354 /* VAP_VF_MAX_VTX_INDX */ 355 track->max_indx = idx_value & 0x00FFFFFFUL; 356 break; 357 case R200_SE_VTX_FMT_0: 358 track->vtx_size = r200_get_vtx_size_0(idx_value); 359 break; 360 case R200_SE_VTX_FMT_1: 361 track->vtx_size += r200_get_vtx_size_1(idx_value); 362 break; 363 case R200_PP_TXSIZE_0: 364 case R200_PP_TXSIZE_1: 365 case R200_PP_TXSIZE_2: 366 case R200_PP_TXSIZE_3: 367 case R200_PP_TXSIZE_4: 368 case R200_PP_TXSIZE_5: 369 i = (reg - R200_PP_TXSIZE_0) / 32; 370 track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1; 371 track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1; 372 break; 373 case R200_PP_TXPITCH_0: 374 case R200_PP_TXPITCH_1: 375 case R200_PP_TXPITCH_2: 376 case R200_PP_TXPITCH_3: 377 case R200_PP_TXPITCH_4: 378 case R200_PP_TXPITCH_5: 379 i = (reg - R200_PP_TXPITCH_0) / 32; 380 track->textures[i].pitch = idx_value + 32; 381 break; 382 case R200_PP_TXFILTER_0: 383 case R200_PP_TXFILTER_1: 384 case R200_PP_TXFILTER_2: 385 case R200_PP_TXFILTER_3: 386 case R200_PP_TXFILTER_4: 387 case R200_PP_TXFILTER_5: 388 i = (reg - R200_PP_TXFILTER_0) / 32; 389 track->textures[i].num_levels = ((idx_value & R200_MAX_MIP_LEVEL_MASK) 390 >> R200_MAX_MIP_LEVEL_SHIFT); 391 tmp = (idx_value >> 23) & 0x7; 392 if (tmp == 2 || tmp == 6) 393 track->textures[i].roundup_w = false; 394 tmp = (idx_value >> 27) & 0x7; 395 if (tmp == 2 || tmp == 6) 396 track->textures[i].roundup_h = false; 397 break; 398 case R200_PP_TXMULTI_CTL_0: 399 case R200_PP_TXMULTI_CTL_1: 400 case R200_PP_TXMULTI_CTL_2: 401 case R200_PP_TXMULTI_CTL_3: 402 case R200_PP_TXMULTI_CTL_4: 403 case R200_PP_TXMULTI_CTL_5: 404 i = (reg - R200_PP_TXMULTI_CTL_0) / 32; 405 break; 406 case R200_PP_TXFORMAT_X_0: 407 case R200_PP_TXFORMAT_X_1: 408 case R200_PP_TXFORMAT_X_2: 409 case R200_PP_TXFORMAT_X_3: 410 case R200_PP_TXFORMAT_X_4: 411 case R200_PP_TXFORMAT_X_5: 412 i = (reg - R200_PP_TXFORMAT_X_0) / 32; 413 track->textures[i].txdepth = idx_value & 0x7; 414 tmp = (idx_value >> 16) & 0x3; 415 /* 2D, 3D, CUBE */ 416 switch (tmp) { 417 case 0: 418 case 3: 419 case 4: 420 case 5: 421 case 6: 422 case 7: 423 /* 1D/2D */ 424 track->textures[i].tex_coord_type = 0; 425 break; 426 case 1: 427 /* CUBE */ 428 track->textures[i].tex_coord_type = 2; 429 break; 430 case 2: 431 /* 3D */ 432 track->textures[i].tex_coord_type = 1; 433 break; 434 } 435 break; 436 case R200_PP_TXFORMAT_0: 437 case R200_PP_TXFORMAT_1: 438 case R200_PP_TXFORMAT_2: 439 case R200_PP_TXFORMAT_3: 440 case R200_PP_TXFORMAT_4: 441 case R200_PP_TXFORMAT_5: 442 i = (reg - R200_PP_TXFORMAT_0) / 32; 443 if (idx_value & R200_TXFORMAT_NON_POWER2) { 444 track->textures[i].use_pitch = 1; 445 } else { 446 track->textures[i].use_pitch = 0; 447 track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK); 448 track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK); 449 } 450 if (idx_value & R200_TXFORMAT_LOOKUP_DISABLE) 451 track->textures[i].lookup_disable = true; 452 switch ((idx_value & RADEON_TXFORMAT_FORMAT_MASK)) { 453 case R200_TXFORMAT_I8: 454 case R200_TXFORMAT_RGB332: 455 case R200_TXFORMAT_Y8: 456 track->textures[i].cpp = 1; 457 track->textures[i].compress_format = R100_TRACK_COMP_NONE; 458 break; 459 case R200_TXFORMAT_AI88: 460 case R200_TXFORMAT_ARGB1555: 461 case R200_TXFORMAT_RGB565: 462 case R200_TXFORMAT_ARGB4444: 463 case R200_TXFORMAT_VYUY422: 464 case R200_TXFORMAT_YVYU422: 465 case R200_TXFORMAT_LDVDU655: 466 case R200_TXFORMAT_DVDU88: 467 case R200_TXFORMAT_AVYU4444: 468 track->textures[i].cpp = 2; 469 track->textures[i].compress_format = R100_TRACK_COMP_NONE; 470 break; 471 case R200_TXFORMAT_ARGB8888: 472 case R200_TXFORMAT_RGBA8888: 473 case R200_TXFORMAT_ABGR8888: 474 case R200_TXFORMAT_BGR111110: 475 case R200_TXFORMAT_LDVDU8888: 476 track->textures[i].cpp = 4; 477 track->textures[i].compress_format = R100_TRACK_COMP_NONE; 478 break; 479 case R200_TXFORMAT_DXT1: 480 track->textures[i].cpp = 1; 481 track->textures[i].compress_format = R100_TRACK_COMP_DXT1; 482 break; 483 case R200_TXFORMAT_DXT23: 484 case R200_TXFORMAT_DXT45: 485 track->textures[i].cpp = 1; 486 track->textures[i].compress_format = R100_TRACK_COMP_DXT1; 487 break; 488 } 489 track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf); 490 track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf); 491 break; 492 case R200_PP_CUBIC_FACES_0: 493 case R200_PP_CUBIC_FACES_1: 494 case R200_PP_CUBIC_FACES_2: 495 case R200_PP_CUBIC_FACES_3: 496 case R200_PP_CUBIC_FACES_4: 497 case R200_PP_CUBIC_FACES_5: 498 tmp = idx_value; 499 i = (reg - R200_PP_CUBIC_FACES_0) / 32; 500 for (face = 0; face < 4; face++) { 501 track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf); 502 track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf); 503 } 504 break; 505 default: 506 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n", 507 reg, idx); 508 return -EINVAL; 509 } 510 return 0; 511 } 512 513 void r200_set_safe_registers(struct radeon_device *rdev) 514 { 515 rdev->config.r100.reg_safe_bm = r200_reg_safe_bm; 516 rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r200_reg_safe_bm); 517 } 518