1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <linux/firmware.h> 29 #include <linux/platform_device.h> 30 #include <linux/slab.h> 31 #include "drmP.h" 32 #include "radeon.h" 33 #include "radeon_asic.h" 34 #include "radeon_drm.h" 35 #include "rv770d.h" 36 #include "atom.h" 37 #include "avivod.h" 38 39 #define R700_PFP_UCODE_SIZE 848 40 #define R700_PM4_UCODE_SIZE 1360 41 42 static void rv770_gpu_init(struct radeon_device *rdev); 43 void rv770_fini(struct radeon_device *rdev); 44 static void rv770_pcie_gen2_enable(struct radeon_device *rdev); 45 46 u32 rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base) 47 { 48 struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id]; 49 u32 tmp = RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset); 50 51 /* Lock the graphics update lock */ 52 tmp |= AVIVO_D1GRPH_UPDATE_LOCK; 53 WREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset, tmp); 54 55 /* update the scanout addresses */ 56 if (radeon_crtc->crtc_id) { 57 WREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base)); 58 WREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base)); 59 } else { 60 WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base)); 61 WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base)); 62 } 63 WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset, 64 (u32)crtc_base); 65 WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset, 66 (u32)crtc_base); 67 68 /* Wait for update_pending to go high. */ 69 while (!(RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset) & AVIVO_D1GRPH_SURFACE_UPDATE_PENDING)); 70 DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n"); 71 72 /* Unlock the lock, so double-buffering can take place inside vblank */ 73 tmp &= ~AVIVO_D1GRPH_UPDATE_LOCK; 74 WREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset, tmp); 75 76 /* Return current update_pending status: */ 77 return RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset) & AVIVO_D1GRPH_SURFACE_UPDATE_PENDING; 78 } 79 80 /* get temperature in millidegrees */ 81 int rv770_get_temp(struct radeon_device *rdev) 82 { 83 u32 temp = (RREG32(CG_MULT_THERMAL_STATUS) & ASIC_T_MASK) >> 84 ASIC_T_SHIFT; 85 int actual_temp; 86 87 if (temp & 0x400) 88 actual_temp = -256; 89 else if (temp & 0x200) 90 actual_temp = 255; 91 else if (temp & 0x100) { 92 actual_temp = temp & 0x1ff; 93 actual_temp |= ~0x1ff; 94 } else 95 actual_temp = temp & 0xff; 96 97 return (actual_temp * 1000) / 2; 98 } 99 100 void rv770_pm_misc(struct radeon_device *rdev) 101 { 102 int req_ps_idx = rdev->pm.requested_power_state_index; 103 int req_cm_idx = rdev->pm.requested_clock_mode_index; 104 struct radeon_power_state *ps = &rdev->pm.power_state[req_ps_idx]; 105 struct radeon_voltage *voltage = &ps->clock_info[req_cm_idx].voltage; 106 107 if ((voltage->type == VOLTAGE_SW) && voltage->voltage) { 108 if (voltage->voltage != rdev->pm.current_vddc) { 109 radeon_atom_set_voltage(rdev, voltage->voltage); 110 rdev->pm.current_vddc = voltage->voltage; 111 DRM_DEBUG("Setting: v: %d\n", voltage->voltage); 112 } 113 } 114 } 115 116 /* 117 * GART 118 */ 119 int rv770_pcie_gart_enable(struct radeon_device *rdev) 120 { 121 u32 tmp; 122 int r, i; 123 124 if (rdev->gart.table.vram.robj == NULL) { 125 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n"); 126 return -EINVAL; 127 } 128 r = radeon_gart_table_vram_pin(rdev); 129 if (r) 130 return r; 131 radeon_gart_restore(rdev); 132 /* Setup L2 cache */ 133 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING | 134 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | 135 EFFECTIVE_L2_QUEUE_SIZE(7)); 136 WREG32(VM_L2_CNTL2, 0); 137 WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2)); 138 /* Setup TLB control */ 139 tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING | 140 SYSTEM_ACCESS_MODE_NOT_IN_SYS | 141 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU | 142 EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5); 143 WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp); 144 WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp); 145 WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp); 146 WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp); 147 WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp); 148 WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp); 149 WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp); 150 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); 151 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12); 152 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12); 153 WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | 154 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT); 155 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, 156 (u32)(rdev->dummy_page.addr >> 12)); 157 for (i = 1; i < 7; i++) 158 WREG32(VM_CONTEXT0_CNTL + (i * 4), 0); 159 160 r600_pcie_gart_tlb_flush(rdev); 161 rdev->gart.ready = true; 162 return 0; 163 } 164 165 void rv770_pcie_gart_disable(struct radeon_device *rdev) 166 { 167 u32 tmp; 168 int i, r; 169 170 /* Disable all tables */ 171 for (i = 0; i < 7; i++) 172 WREG32(VM_CONTEXT0_CNTL + (i * 4), 0); 173 174 /* Setup L2 cache */ 175 WREG32(VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING | 176 EFFECTIVE_L2_QUEUE_SIZE(7)); 177 WREG32(VM_L2_CNTL2, 0); 178 WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2)); 179 /* Setup TLB control */ 180 tmp = EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5); 181 WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp); 182 WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp); 183 WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp); 184 WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp); 185 WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp); 186 WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp); 187 WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp); 188 if (rdev->gart.table.vram.robj) { 189 r = radeon_bo_reserve(rdev->gart.table.vram.robj, false); 190 if (likely(r == 0)) { 191 radeon_bo_kunmap(rdev->gart.table.vram.robj); 192 radeon_bo_unpin(rdev->gart.table.vram.robj); 193 radeon_bo_unreserve(rdev->gart.table.vram.robj); 194 } 195 } 196 } 197 198 void rv770_pcie_gart_fini(struct radeon_device *rdev) 199 { 200 radeon_gart_fini(rdev); 201 rv770_pcie_gart_disable(rdev); 202 radeon_gart_table_vram_free(rdev); 203 } 204 205 206 void rv770_agp_enable(struct radeon_device *rdev) 207 { 208 u32 tmp; 209 int i; 210 211 /* Setup L2 cache */ 212 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING | 213 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | 214 EFFECTIVE_L2_QUEUE_SIZE(7)); 215 WREG32(VM_L2_CNTL2, 0); 216 WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2)); 217 /* Setup TLB control */ 218 tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING | 219 SYSTEM_ACCESS_MODE_NOT_IN_SYS | 220 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU | 221 EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5); 222 WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp); 223 WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp); 224 WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp); 225 WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp); 226 WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp); 227 WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp); 228 WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp); 229 for (i = 0; i < 7; i++) 230 WREG32(VM_CONTEXT0_CNTL + (i * 4), 0); 231 } 232 233 static void rv770_mc_program(struct radeon_device *rdev) 234 { 235 struct rv515_mc_save save; 236 u32 tmp; 237 int i, j; 238 239 /* Initialize HDP */ 240 for (i = 0, j = 0; i < 32; i++, j += 0x18) { 241 WREG32((0x2c14 + j), 0x00000000); 242 WREG32((0x2c18 + j), 0x00000000); 243 WREG32((0x2c1c + j), 0x00000000); 244 WREG32((0x2c20 + j), 0x00000000); 245 WREG32((0x2c24 + j), 0x00000000); 246 } 247 /* r7xx hw bug. Read from HDP_DEBUG1 rather 248 * than writing to HDP_REG_COHERENCY_FLUSH_CNTL 249 */ 250 tmp = RREG32(HDP_DEBUG1); 251 252 rv515_mc_stop(rdev, &save); 253 if (r600_mc_wait_for_idle(rdev)) { 254 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 255 } 256 /* Lockout access through VGA aperture*/ 257 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE); 258 /* Update configuration */ 259 if (rdev->flags & RADEON_IS_AGP) { 260 if (rdev->mc.vram_start < rdev->mc.gtt_start) { 261 /* VRAM before AGP */ 262 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, 263 rdev->mc.vram_start >> 12); 264 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, 265 rdev->mc.gtt_end >> 12); 266 } else { 267 /* VRAM after AGP */ 268 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, 269 rdev->mc.gtt_start >> 12); 270 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, 271 rdev->mc.vram_end >> 12); 272 } 273 } else { 274 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, 275 rdev->mc.vram_start >> 12); 276 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, 277 rdev->mc.vram_end >> 12); 278 } 279 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0); 280 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16; 281 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF); 282 WREG32(MC_VM_FB_LOCATION, tmp); 283 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8)); 284 WREG32(HDP_NONSURFACE_INFO, (2 << 7)); 285 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF); 286 if (rdev->flags & RADEON_IS_AGP) { 287 WREG32(MC_VM_AGP_TOP, rdev->mc.gtt_end >> 16); 288 WREG32(MC_VM_AGP_BOT, rdev->mc.gtt_start >> 16); 289 WREG32(MC_VM_AGP_BASE, rdev->mc.agp_base >> 22); 290 } else { 291 WREG32(MC_VM_AGP_BASE, 0); 292 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF); 293 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF); 294 } 295 if (r600_mc_wait_for_idle(rdev)) { 296 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 297 } 298 rv515_mc_resume(rdev, &save); 299 /* we need to own VRAM, so turn off the VGA renderer here 300 * to stop it overwriting our objects */ 301 rv515_vga_render_disable(rdev); 302 } 303 304 305 /* 306 * CP. 307 */ 308 void r700_cp_stop(struct radeon_device *rdev) 309 { 310 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 311 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); 312 WREG32(SCRATCH_UMSK, 0); 313 } 314 315 static int rv770_cp_load_microcode(struct radeon_device *rdev) 316 { 317 const __be32 *fw_data; 318 int i; 319 320 if (!rdev->me_fw || !rdev->pfp_fw) 321 return -EINVAL; 322 323 r700_cp_stop(rdev); 324 WREG32(CP_RB_CNTL, 325 #ifdef __BIG_ENDIAN 326 BUF_SWAP_32BIT | 327 #endif 328 RB_NO_UPDATE | RB_BLKSZ(15) | RB_BUFSZ(3)); 329 330 /* Reset cp */ 331 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP); 332 RREG32(GRBM_SOFT_RESET); 333 mdelay(15); 334 WREG32(GRBM_SOFT_RESET, 0); 335 336 fw_data = (const __be32 *)rdev->pfp_fw->data; 337 WREG32(CP_PFP_UCODE_ADDR, 0); 338 for (i = 0; i < R700_PFP_UCODE_SIZE; i++) 339 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++)); 340 WREG32(CP_PFP_UCODE_ADDR, 0); 341 342 fw_data = (const __be32 *)rdev->me_fw->data; 343 WREG32(CP_ME_RAM_WADDR, 0); 344 for (i = 0; i < R700_PM4_UCODE_SIZE; i++) 345 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++)); 346 347 WREG32(CP_PFP_UCODE_ADDR, 0); 348 WREG32(CP_ME_RAM_WADDR, 0); 349 WREG32(CP_ME_RAM_RADDR, 0); 350 return 0; 351 } 352 353 void r700_cp_fini(struct radeon_device *rdev) 354 { 355 r700_cp_stop(rdev); 356 radeon_ring_fini(rdev); 357 } 358 359 /* 360 * Core functions 361 */ 362 static u32 r700_get_tile_pipe_to_backend_map(struct radeon_device *rdev, 363 u32 num_tile_pipes, 364 u32 num_backends, 365 u32 backend_disable_mask) 366 { 367 u32 backend_map = 0; 368 u32 enabled_backends_mask; 369 u32 enabled_backends_count; 370 u32 cur_pipe; 371 u32 swizzle_pipe[R7XX_MAX_PIPES]; 372 u32 cur_backend; 373 u32 i; 374 bool force_no_swizzle; 375 376 if (num_tile_pipes > R7XX_MAX_PIPES) 377 num_tile_pipes = R7XX_MAX_PIPES; 378 if (num_tile_pipes < 1) 379 num_tile_pipes = 1; 380 if (num_backends > R7XX_MAX_BACKENDS) 381 num_backends = R7XX_MAX_BACKENDS; 382 if (num_backends < 1) 383 num_backends = 1; 384 385 enabled_backends_mask = 0; 386 enabled_backends_count = 0; 387 for (i = 0; i < R7XX_MAX_BACKENDS; ++i) { 388 if (((backend_disable_mask >> i) & 1) == 0) { 389 enabled_backends_mask |= (1 << i); 390 ++enabled_backends_count; 391 } 392 if (enabled_backends_count == num_backends) 393 break; 394 } 395 396 if (enabled_backends_count == 0) { 397 enabled_backends_mask = 1; 398 enabled_backends_count = 1; 399 } 400 401 if (enabled_backends_count != num_backends) 402 num_backends = enabled_backends_count; 403 404 switch (rdev->family) { 405 case CHIP_RV770: 406 case CHIP_RV730: 407 force_no_swizzle = false; 408 break; 409 case CHIP_RV710: 410 case CHIP_RV740: 411 default: 412 force_no_swizzle = true; 413 break; 414 } 415 416 memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES); 417 switch (num_tile_pipes) { 418 case 1: 419 swizzle_pipe[0] = 0; 420 break; 421 case 2: 422 swizzle_pipe[0] = 0; 423 swizzle_pipe[1] = 1; 424 break; 425 case 3: 426 if (force_no_swizzle) { 427 swizzle_pipe[0] = 0; 428 swizzle_pipe[1] = 1; 429 swizzle_pipe[2] = 2; 430 } else { 431 swizzle_pipe[0] = 0; 432 swizzle_pipe[1] = 2; 433 swizzle_pipe[2] = 1; 434 } 435 break; 436 case 4: 437 if (force_no_swizzle) { 438 swizzle_pipe[0] = 0; 439 swizzle_pipe[1] = 1; 440 swizzle_pipe[2] = 2; 441 swizzle_pipe[3] = 3; 442 } else { 443 swizzle_pipe[0] = 0; 444 swizzle_pipe[1] = 2; 445 swizzle_pipe[2] = 3; 446 swizzle_pipe[3] = 1; 447 } 448 break; 449 case 5: 450 if (force_no_swizzle) { 451 swizzle_pipe[0] = 0; 452 swizzle_pipe[1] = 1; 453 swizzle_pipe[2] = 2; 454 swizzle_pipe[3] = 3; 455 swizzle_pipe[4] = 4; 456 } else { 457 swizzle_pipe[0] = 0; 458 swizzle_pipe[1] = 2; 459 swizzle_pipe[2] = 4; 460 swizzle_pipe[3] = 1; 461 swizzle_pipe[4] = 3; 462 } 463 break; 464 case 6: 465 if (force_no_swizzle) { 466 swizzle_pipe[0] = 0; 467 swizzle_pipe[1] = 1; 468 swizzle_pipe[2] = 2; 469 swizzle_pipe[3] = 3; 470 swizzle_pipe[4] = 4; 471 swizzle_pipe[5] = 5; 472 } else { 473 swizzle_pipe[0] = 0; 474 swizzle_pipe[1] = 2; 475 swizzle_pipe[2] = 4; 476 swizzle_pipe[3] = 5; 477 swizzle_pipe[4] = 3; 478 swizzle_pipe[5] = 1; 479 } 480 break; 481 case 7: 482 if (force_no_swizzle) { 483 swizzle_pipe[0] = 0; 484 swizzle_pipe[1] = 1; 485 swizzle_pipe[2] = 2; 486 swizzle_pipe[3] = 3; 487 swizzle_pipe[4] = 4; 488 swizzle_pipe[5] = 5; 489 swizzle_pipe[6] = 6; 490 } else { 491 swizzle_pipe[0] = 0; 492 swizzle_pipe[1] = 2; 493 swizzle_pipe[2] = 4; 494 swizzle_pipe[3] = 6; 495 swizzle_pipe[4] = 3; 496 swizzle_pipe[5] = 1; 497 swizzle_pipe[6] = 5; 498 } 499 break; 500 case 8: 501 if (force_no_swizzle) { 502 swizzle_pipe[0] = 0; 503 swizzle_pipe[1] = 1; 504 swizzle_pipe[2] = 2; 505 swizzle_pipe[3] = 3; 506 swizzle_pipe[4] = 4; 507 swizzle_pipe[5] = 5; 508 swizzle_pipe[6] = 6; 509 swizzle_pipe[7] = 7; 510 } else { 511 swizzle_pipe[0] = 0; 512 swizzle_pipe[1] = 2; 513 swizzle_pipe[2] = 4; 514 swizzle_pipe[3] = 6; 515 swizzle_pipe[4] = 3; 516 swizzle_pipe[5] = 1; 517 swizzle_pipe[6] = 7; 518 swizzle_pipe[7] = 5; 519 } 520 break; 521 } 522 523 cur_backend = 0; 524 for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) { 525 while (((1 << cur_backend) & enabled_backends_mask) == 0) 526 cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS; 527 528 backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2))); 529 530 cur_backend = (cur_backend + 1) % R7XX_MAX_BACKENDS; 531 } 532 533 return backend_map; 534 } 535 536 static void rv770_program_channel_remap(struct radeon_device *rdev) 537 { 538 u32 tcp_chan_steer, mc_shared_chremap, tmp; 539 bool force_no_swizzle; 540 541 switch (rdev->family) { 542 case CHIP_RV770: 543 case CHIP_RV730: 544 force_no_swizzle = false; 545 break; 546 case CHIP_RV710: 547 case CHIP_RV740: 548 default: 549 force_no_swizzle = true; 550 break; 551 } 552 553 tmp = RREG32(MC_SHARED_CHMAP); 554 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { 555 case 0: 556 case 1: 557 default: 558 /* default mapping */ 559 mc_shared_chremap = 0x00fac688; 560 break; 561 case 2: 562 case 3: 563 if (force_no_swizzle) 564 mc_shared_chremap = 0x00fac688; 565 else 566 mc_shared_chremap = 0x00bbc298; 567 break; 568 } 569 570 if (rdev->family == CHIP_RV740) 571 tcp_chan_steer = 0x00ef2a60; 572 else 573 tcp_chan_steer = 0x00fac688; 574 575 WREG32(TCP_CHAN_STEER, tcp_chan_steer); 576 WREG32(MC_SHARED_CHREMAP, mc_shared_chremap); 577 } 578 579 static void rv770_gpu_init(struct radeon_device *rdev) 580 { 581 int i, j, num_qd_pipes; 582 u32 ta_aux_cntl; 583 u32 sx_debug_1; 584 u32 smx_dc_ctl0; 585 u32 db_debug3; 586 u32 num_gs_verts_per_thread; 587 u32 vgt_gs_per_es; 588 u32 gs_prim_buffer_depth = 0; 589 u32 sq_ms_fifo_sizes; 590 u32 sq_config; 591 u32 sq_thread_resource_mgmt; 592 u32 hdp_host_path_cntl; 593 u32 sq_dyn_gpr_size_simd_ab_0; 594 u32 backend_map; 595 u32 gb_tiling_config = 0; 596 u32 cc_rb_backend_disable = 0; 597 u32 cc_gc_shader_pipe_config = 0; 598 u32 mc_arb_ramcfg; 599 u32 db_debug4; 600 601 /* setup chip specs */ 602 switch (rdev->family) { 603 case CHIP_RV770: 604 rdev->config.rv770.max_pipes = 4; 605 rdev->config.rv770.max_tile_pipes = 8; 606 rdev->config.rv770.max_simds = 10; 607 rdev->config.rv770.max_backends = 4; 608 rdev->config.rv770.max_gprs = 256; 609 rdev->config.rv770.max_threads = 248; 610 rdev->config.rv770.max_stack_entries = 512; 611 rdev->config.rv770.max_hw_contexts = 8; 612 rdev->config.rv770.max_gs_threads = 16 * 2; 613 rdev->config.rv770.sx_max_export_size = 128; 614 rdev->config.rv770.sx_max_export_pos_size = 16; 615 rdev->config.rv770.sx_max_export_smx_size = 112; 616 rdev->config.rv770.sq_num_cf_insts = 2; 617 618 rdev->config.rv770.sx_num_of_sets = 7; 619 rdev->config.rv770.sc_prim_fifo_size = 0xF9; 620 rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30; 621 rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130; 622 break; 623 case CHIP_RV730: 624 rdev->config.rv770.max_pipes = 2; 625 rdev->config.rv770.max_tile_pipes = 4; 626 rdev->config.rv770.max_simds = 8; 627 rdev->config.rv770.max_backends = 2; 628 rdev->config.rv770.max_gprs = 128; 629 rdev->config.rv770.max_threads = 248; 630 rdev->config.rv770.max_stack_entries = 256; 631 rdev->config.rv770.max_hw_contexts = 8; 632 rdev->config.rv770.max_gs_threads = 16 * 2; 633 rdev->config.rv770.sx_max_export_size = 256; 634 rdev->config.rv770.sx_max_export_pos_size = 32; 635 rdev->config.rv770.sx_max_export_smx_size = 224; 636 rdev->config.rv770.sq_num_cf_insts = 2; 637 638 rdev->config.rv770.sx_num_of_sets = 7; 639 rdev->config.rv770.sc_prim_fifo_size = 0xf9; 640 rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30; 641 rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130; 642 if (rdev->config.rv770.sx_max_export_pos_size > 16) { 643 rdev->config.rv770.sx_max_export_pos_size -= 16; 644 rdev->config.rv770.sx_max_export_smx_size += 16; 645 } 646 break; 647 case CHIP_RV710: 648 rdev->config.rv770.max_pipes = 2; 649 rdev->config.rv770.max_tile_pipes = 2; 650 rdev->config.rv770.max_simds = 2; 651 rdev->config.rv770.max_backends = 1; 652 rdev->config.rv770.max_gprs = 256; 653 rdev->config.rv770.max_threads = 192; 654 rdev->config.rv770.max_stack_entries = 256; 655 rdev->config.rv770.max_hw_contexts = 4; 656 rdev->config.rv770.max_gs_threads = 8 * 2; 657 rdev->config.rv770.sx_max_export_size = 128; 658 rdev->config.rv770.sx_max_export_pos_size = 16; 659 rdev->config.rv770.sx_max_export_smx_size = 112; 660 rdev->config.rv770.sq_num_cf_insts = 1; 661 662 rdev->config.rv770.sx_num_of_sets = 7; 663 rdev->config.rv770.sc_prim_fifo_size = 0x40; 664 rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30; 665 rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130; 666 break; 667 case CHIP_RV740: 668 rdev->config.rv770.max_pipes = 4; 669 rdev->config.rv770.max_tile_pipes = 4; 670 rdev->config.rv770.max_simds = 8; 671 rdev->config.rv770.max_backends = 4; 672 rdev->config.rv770.max_gprs = 256; 673 rdev->config.rv770.max_threads = 248; 674 rdev->config.rv770.max_stack_entries = 512; 675 rdev->config.rv770.max_hw_contexts = 8; 676 rdev->config.rv770.max_gs_threads = 16 * 2; 677 rdev->config.rv770.sx_max_export_size = 256; 678 rdev->config.rv770.sx_max_export_pos_size = 32; 679 rdev->config.rv770.sx_max_export_smx_size = 224; 680 rdev->config.rv770.sq_num_cf_insts = 2; 681 682 rdev->config.rv770.sx_num_of_sets = 7; 683 rdev->config.rv770.sc_prim_fifo_size = 0x100; 684 rdev->config.rv770.sc_hiz_tile_fifo_size = 0x30; 685 rdev->config.rv770.sc_earlyz_tile_fifo_fize = 0x130; 686 687 if (rdev->config.rv770.sx_max_export_pos_size > 16) { 688 rdev->config.rv770.sx_max_export_pos_size -= 16; 689 rdev->config.rv770.sx_max_export_smx_size += 16; 690 } 691 break; 692 default: 693 break; 694 } 695 696 /* Initialize HDP */ 697 j = 0; 698 for (i = 0; i < 32; i++) { 699 WREG32((0x2c14 + j), 0x00000000); 700 WREG32((0x2c18 + j), 0x00000000); 701 WREG32((0x2c1c + j), 0x00000000); 702 WREG32((0x2c20 + j), 0x00000000); 703 WREG32((0x2c24 + j), 0x00000000); 704 j += 0x18; 705 } 706 707 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff)); 708 709 /* setup tiling, simd, pipe config */ 710 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); 711 712 switch (rdev->config.rv770.max_tile_pipes) { 713 case 1: 714 default: 715 gb_tiling_config |= PIPE_TILING(0); 716 break; 717 case 2: 718 gb_tiling_config |= PIPE_TILING(1); 719 break; 720 case 4: 721 gb_tiling_config |= PIPE_TILING(2); 722 break; 723 case 8: 724 gb_tiling_config |= PIPE_TILING(3); 725 break; 726 } 727 rdev->config.rv770.tiling_npipes = rdev->config.rv770.max_tile_pipes; 728 729 if (rdev->family == CHIP_RV770) 730 gb_tiling_config |= BANK_TILING(1); 731 else 732 gb_tiling_config |= BANK_TILING((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT); 733 rdev->config.rv770.tiling_nbanks = 4 << ((gb_tiling_config >> 4) & 0x3); 734 gb_tiling_config |= GROUP_SIZE((mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT); 735 if ((mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT) 736 rdev->config.rv770.tiling_group_size = 512; 737 else 738 rdev->config.rv770.tiling_group_size = 256; 739 if (((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT) > 3) { 740 gb_tiling_config |= ROW_TILING(3); 741 gb_tiling_config |= SAMPLE_SPLIT(3); 742 } else { 743 gb_tiling_config |= 744 ROW_TILING(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT)); 745 gb_tiling_config |= 746 SAMPLE_SPLIT(((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT)); 747 } 748 749 gb_tiling_config |= BANK_SWAPS(1); 750 751 cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000; 752 cc_rb_backend_disable |= 753 BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << rdev->config.rv770.max_backends) & R7XX_MAX_BACKENDS_MASK); 754 755 cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00; 756 cc_gc_shader_pipe_config |= 757 INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << rdev->config.rv770.max_pipes) & R7XX_MAX_PIPES_MASK); 758 cc_gc_shader_pipe_config |= 759 INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << rdev->config.rv770.max_simds) & R7XX_MAX_SIMDS_MASK); 760 761 if (rdev->family == CHIP_RV740) 762 backend_map = 0x28; 763 else 764 backend_map = r700_get_tile_pipe_to_backend_map(rdev, 765 rdev->config.rv770.max_tile_pipes, 766 (R7XX_MAX_BACKENDS - 767 r600_count_pipe_bits((cc_rb_backend_disable & 768 R7XX_MAX_BACKENDS_MASK) >> 16)), 769 (cc_rb_backend_disable >> 16)); 770 771 rdev->config.rv770.tile_config = gb_tiling_config; 772 gb_tiling_config |= BACKEND_MAP(backend_map); 773 774 WREG32(GB_TILING_CONFIG, gb_tiling_config); 775 WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 776 WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 777 778 rv770_program_channel_remap(rdev); 779 780 WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable); 781 WREG32(CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); 782 WREG32(GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config); 783 WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable); 784 785 WREG32(CGTS_SYS_TCC_DISABLE, 0); 786 WREG32(CGTS_TCC_DISABLE, 0); 787 WREG32(CGTS_USER_SYS_TCC_DISABLE, 0); 788 WREG32(CGTS_USER_TCC_DISABLE, 0); 789 790 num_qd_pipes = 791 R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8); 792 WREG32(VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & DEALLOC_DIST_MASK); 793 WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & VTX_REUSE_DEPTH_MASK); 794 795 /* set HW defaults for 3D engine */ 796 WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) | 797 ROQ_IB2_START(0x2b))); 798 799 WREG32(CP_MEQ_THRESHOLDS, STQ_SPLIT(0x30)); 800 801 ta_aux_cntl = RREG32(TA_CNTL_AUX); 802 WREG32(TA_CNTL_AUX, ta_aux_cntl | DISABLE_CUBE_ANISO); 803 804 sx_debug_1 = RREG32(SX_DEBUG_1); 805 sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS; 806 WREG32(SX_DEBUG_1, sx_debug_1); 807 808 smx_dc_ctl0 = RREG32(SMX_DC_CTL0); 809 smx_dc_ctl0 &= ~CACHE_DEPTH(0x1ff); 810 smx_dc_ctl0 |= CACHE_DEPTH((rdev->config.rv770.sx_num_of_sets * 64) - 1); 811 WREG32(SMX_DC_CTL0, smx_dc_ctl0); 812 813 if (rdev->family != CHIP_RV740) 814 WREG32(SMX_EVENT_CTL, (ES_FLUSH_CTL(4) | 815 GS_FLUSH_CTL(4) | 816 ACK_FLUSH_CTL(3) | 817 SYNC_FLUSH_CTL)); 818 819 db_debug3 = RREG32(DB_DEBUG3); 820 db_debug3 &= ~DB_CLK_OFF_DELAY(0x1f); 821 switch (rdev->family) { 822 case CHIP_RV770: 823 case CHIP_RV740: 824 db_debug3 |= DB_CLK_OFF_DELAY(0x1f); 825 break; 826 case CHIP_RV710: 827 case CHIP_RV730: 828 default: 829 db_debug3 |= DB_CLK_OFF_DELAY(2); 830 break; 831 } 832 WREG32(DB_DEBUG3, db_debug3); 833 834 if (rdev->family != CHIP_RV770) { 835 db_debug4 = RREG32(DB_DEBUG4); 836 db_debug4 |= DISABLE_TILE_COVERED_FOR_PS_ITER; 837 WREG32(DB_DEBUG4, db_debug4); 838 } 839 840 WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.rv770.sx_max_export_size / 4) - 1) | 841 POSITION_BUFFER_SIZE((rdev->config.rv770.sx_max_export_pos_size / 4) - 1) | 842 SMX_BUFFER_SIZE((rdev->config.rv770.sx_max_export_smx_size / 4) - 1))); 843 844 WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.rv770.sc_prim_fifo_size) | 845 SC_HIZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_hiz_tile_fifo_size) | 846 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_earlyz_tile_fifo_fize))); 847 848 WREG32(PA_SC_MULTI_CHIP_CNTL, 0); 849 850 WREG32(VGT_NUM_INSTANCES, 1); 851 852 WREG32(SPI_CONFIG_CNTL, GPR_WRITE_PRIORITY(0)); 853 854 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4)); 855 856 WREG32(CP_PERFMON_CNTL, 0); 857 858 sq_ms_fifo_sizes = (CACHE_FIFO_SIZE(16 * rdev->config.rv770.sq_num_cf_insts) | 859 DONE_FIFO_HIWATER(0xe0) | 860 ALU_UPDATE_FIFO_HIWATER(0x8)); 861 switch (rdev->family) { 862 case CHIP_RV770: 863 case CHIP_RV730: 864 case CHIP_RV710: 865 sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x1); 866 break; 867 case CHIP_RV740: 868 default: 869 sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x4); 870 break; 871 } 872 WREG32(SQ_MS_FIFO_SIZES, sq_ms_fifo_sizes); 873 874 /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT 875 * should be adjusted as needed by the 2D/3D drivers. This just sets default values 876 */ 877 sq_config = RREG32(SQ_CONFIG); 878 sq_config &= ~(PS_PRIO(3) | 879 VS_PRIO(3) | 880 GS_PRIO(3) | 881 ES_PRIO(3)); 882 sq_config |= (DX9_CONSTS | 883 VC_ENABLE | 884 EXPORT_SRC_C | 885 PS_PRIO(0) | 886 VS_PRIO(1) | 887 GS_PRIO(2) | 888 ES_PRIO(3)); 889 if (rdev->family == CHIP_RV710) 890 /* no vertex cache */ 891 sq_config &= ~VC_ENABLE; 892 893 WREG32(SQ_CONFIG, sq_config); 894 895 WREG32(SQ_GPR_RESOURCE_MGMT_1, (NUM_PS_GPRS((rdev->config.rv770.max_gprs * 24)/64) | 896 NUM_VS_GPRS((rdev->config.rv770.max_gprs * 24)/64) | 897 NUM_CLAUSE_TEMP_GPRS(((rdev->config.rv770.max_gprs * 24)/64)/2))); 898 899 WREG32(SQ_GPR_RESOURCE_MGMT_2, (NUM_GS_GPRS((rdev->config.rv770.max_gprs * 7)/64) | 900 NUM_ES_GPRS((rdev->config.rv770.max_gprs * 7)/64))); 901 902 sq_thread_resource_mgmt = (NUM_PS_THREADS((rdev->config.rv770.max_threads * 4)/8) | 903 NUM_VS_THREADS((rdev->config.rv770.max_threads * 2)/8) | 904 NUM_ES_THREADS((rdev->config.rv770.max_threads * 1)/8)); 905 if (((rdev->config.rv770.max_threads * 1) / 8) > rdev->config.rv770.max_gs_threads) 906 sq_thread_resource_mgmt |= NUM_GS_THREADS(rdev->config.rv770.max_gs_threads); 907 else 908 sq_thread_resource_mgmt |= NUM_GS_THREADS((rdev->config.rv770.max_gs_threads * 1)/8); 909 WREG32(SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt); 910 911 WREG32(SQ_STACK_RESOURCE_MGMT_1, (NUM_PS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) | 912 NUM_VS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4))); 913 914 WREG32(SQ_STACK_RESOURCE_MGMT_2, (NUM_GS_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4) | 915 NUM_ES_STACK_ENTRIES((rdev->config.rv770.max_stack_entries * 1)/4))); 916 917 sq_dyn_gpr_size_simd_ab_0 = (SIMDA_RING0((rdev->config.rv770.max_gprs * 38)/64) | 918 SIMDA_RING1((rdev->config.rv770.max_gprs * 38)/64) | 919 SIMDB_RING0((rdev->config.rv770.max_gprs * 38)/64) | 920 SIMDB_RING1((rdev->config.rv770.max_gprs * 38)/64)); 921 922 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_0, sq_dyn_gpr_size_simd_ab_0); 923 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_1, sq_dyn_gpr_size_simd_ab_0); 924 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_2, sq_dyn_gpr_size_simd_ab_0); 925 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_3, sq_dyn_gpr_size_simd_ab_0); 926 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_4, sq_dyn_gpr_size_simd_ab_0); 927 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_5, sq_dyn_gpr_size_simd_ab_0); 928 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_6, sq_dyn_gpr_size_simd_ab_0); 929 WREG32(SQ_DYN_GPR_SIZE_SIMD_AB_7, sq_dyn_gpr_size_simd_ab_0); 930 931 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) | 932 FORCE_EOV_MAX_REZ_CNT(255))); 933 934 if (rdev->family == CHIP_RV710) 935 WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(TC_ONLY) | 936 AUTO_INVLD_EN(ES_AND_GS_AUTO))); 937 else 938 WREG32(VGT_CACHE_INVALIDATION, (CACHE_INVALIDATION(VC_AND_TC) | 939 AUTO_INVLD_EN(ES_AND_GS_AUTO))); 940 941 switch (rdev->family) { 942 case CHIP_RV770: 943 case CHIP_RV730: 944 case CHIP_RV740: 945 gs_prim_buffer_depth = 384; 946 break; 947 case CHIP_RV710: 948 gs_prim_buffer_depth = 128; 949 break; 950 default: 951 break; 952 } 953 954 num_gs_verts_per_thread = rdev->config.rv770.max_pipes * 16; 955 vgt_gs_per_es = gs_prim_buffer_depth + num_gs_verts_per_thread; 956 /* Max value for this is 256 */ 957 if (vgt_gs_per_es > 256) 958 vgt_gs_per_es = 256; 959 960 WREG32(VGT_ES_PER_GS, 128); 961 WREG32(VGT_GS_PER_ES, vgt_gs_per_es); 962 WREG32(VGT_GS_PER_VS, 2); 963 964 /* more default values. 2D/3D driver should adjust as needed */ 965 WREG32(VGT_GS_VERTEX_REUSE, 16); 966 WREG32(PA_SC_LINE_STIPPLE_STATE, 0); 967 WREG32(VGT_STRMOUT_EN, 0); 968 WREG32(SX_MISC, 0); 969 WREG32(PA_SC_MODE_CNTL, 0); 970 WREG32(PA_SC_EDGERULE, 0xaaaaaaaa); 971 WREG32(PA_SC_AA_CONFIG, 0); 972 WREG32(PA_SC_CLIPRECT_RULE, 0xffff); 973 WREG32(PA_SC_LINE_STIPPLE, 0); 974 WREG32(SPI_INPUT_Z, 0); 975 WREG32(SPI_PS_IN_CONTROL_0, NUM_INTERP(2)); 976 WREG32(CB_COLOR7_FRAG, 0); 977 978 /* clear render buffer base addresses */ 979 WREG32(CB_COLOR0_BASE, 0); 980 WREG32(CB_COLOR1_BASE, 0); 981 WREG32(CB_COLOR2_BASE, 0); 982 WREG32(CB_COLOR3_BASE, 0); 983 WREG32(CB_COLOR4_BASE, 0); 984 WREG32(CB_COLOR5_BASE, 0); 985 WREG32(CB_COLOR6_BASE, 0); 986 WREG32(CB_COLOR7_BASE, 0); 987 988 WREG32(TCP_CNTL, 0); 989 990 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL); 991 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl); 992 993 WREG32(PA_SC_MULTI_CHIP_CNTL, 0); 994 995 WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA | 996 NUM_CLIP_SEQ(3))); 997 998 } 999 1000 static int rv770_vram_scratch_init(struct radeon_device *rdev) 1001 { 1002 int r; 1003 u64 gpu_addr; 1004 1005 if (rdev->vram_scratch.robj == NULL) { 1006 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, 1007 PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, 1008 &rdev->vram_scratch.robj); 1009 if (r) { 1010 return r; 1011 } 1012 } 1013 1014 r = radeon_bo_reserve(rdev->vram_scratch.robj, false); 1015 if (unlikely(r != 0)) 1016 return r; 1017 r = radeon_bo_pin(rdev->vram_scratch.robj, 1018 RADEON_GEM_DOMAIN_VRAM, &gpu_addr); 1019 if (r) { 1020 radeon_bo_unreserve(rdev->vram_scratch.robj); 1021 return r; 1022 } 1023 r = radeon_bo_kmap(rdev->vram_scratch.robj, 1024 (void **)&rdev->vram_scratch.ptr); 1025 if (r) 1026 radeon_bo_unpin(rdev->vram_scratch.robj); 1027 radeon_bo_unreserve(rdev->vram_scratch.robj); 1028 1029 return r; 1030 } 1031 1032 static void rv770_vram_scratch_fini(struct radeon_device *rdev) 1033 { 1034 int r; 1035 1036 if (rdev->vram_scratch.robj == NULL) { 1037 return; 1038 } 1039 r = radeon_bo_reserve(rdev->vram_scratch.robj, false); 1040 if (likely(r == 0)) { 1041 radeon_bo_kunmap(rdev->vram_scratch.robj); 1042 radeon_bo_unpin(rdev->vram_scratch.robj); 1043 radeon_bo_unreserve(rdev->vram_scratch.robj); 1044 } 1045 radeon_bo_unref(&rdev->vram_scratch.robj); 1046 } 1047 1048 void r700_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc) 1049 { 1050 u64 size_bf, size_af; 1051 1052 if (mc->mc_vram_size > 0xE0000000) { 1053 /* leave room for at least 512M GTT */ 1054 dev_warn(rdev->dev, "limiting VRAM\n"); 1055 mc->real_vram_size = 0xE0000000; 1056 mc->mc_vram_size = 0xE0000000; 1057 } 1058 if (rdev->flags & RADEON_IS_AGP) { 1059 size_bf = mc->gtt_start; 1060 size_af = 0xFFFFFFFF - mc->gtt_end + 1; 1061 if (size_bf > size_af) { 1062 if (mc->mc_vram_size > size_bf) { 1063 dev_warn(rdev->dev, "limiting VRAM\n"); 1064 mc->real_vram_size = size_bf; 1065 mc->mc_vram_size = size_bf; 1066 } 1067 mc->vram_start = mc->gtt_start - mc->mc_vram_size; 1068 } else { 1069 if (mc->mc_vram_size > size_af) { 1070 dev_warn(rdev->dev, "limiting VRAM\n"); 1071 mc->real_vram_size = size_af; 1072 mc->mc_vram_size = size_af; 1073 } 1074 mc->vram_start = mc->gtt_end; 1075 } 1076 mc->vram_end = mc->vram_start + mc->mc_vram_size - 1; 1077 dev_info(rdev->dev, "VRAM: %lluM 0x%08llX - 0x%08llX (%lluM used)\n", 1078 mc->mc_vram_size >> 20, mc->vram_start, 1079 mc->vram_end, mc->real_vram_size >> 20); 1080 } else { 1081 radeon_vram_location(rdev, &rdev->mc, 0); 1082 rdev->mc.gtt_base_align = 0; 1083 radeon_gtt_location(rdev, mc); 1084 } 1085 } 1086 1087 int rv770_mc_init(struct radeon_device *rdev) 1088 { 1089 u32 tmp; 1090 int chansize, numchan; 1091 1092 /* Get VRAM informations */ 1093 rdev->mc.vram_is_ddr = true; 1094 tmp = RREG32(MC_ARB_RAMCFG); 1095 if (tmp & CHANSIZE_OVERRIDE) { 1096 chansize = 16; 1097 } else if (tmp & CHANSIZE_MASK) { 1098 chansize = 64; 1099 } else { 1100 chansize = 32; 1101 } 1102 tmp = RREG32(MC_SHARED_CHMAP); 1103 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) { 1104 case 0: 1105 default: 1106 numchan = 1; 1107 break; 1108 case 1: 1109 numchan = 2; 1110 break; 1111 case 2: 1112 numchan = 4; 1113 break; 1114 case 3: 1115 numchan = 8; 1116 break; 1117 } 1118 rdev->mc.vram_width = numchan * chansize; 1119 /* Could aper size report 0 ? */ 1120 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0); 1121 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0); 1122 /* Setup GPU memory space */ 1123 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE); 1124 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE); 1125 rdev->mc.visible_vram_size = rdev->mc.aper_size; 1126 r700_vram_gtt_location(rdev, &rdev->mc); 1127 radeon_update_bandwidth_info(rdev); 1128 1129 return 0; 1130 } 1131 1132 static int rv770_startup(struct radeon_device *rdev) 1133 { 1134 int r; 1135 1136 /* enable pcie gen2 link */ 1137 rv770_pcie_gen2_enable(rdev); 1138 1139 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) { 1140 r = r600_init_microcode(rdev); 1141 if (r) { 1142 DRM_ERROR("Failed to load firmware!\n"); 1143 return r; 1144 } 1145 } 1146 1147 rv770_mc_program(rdev); 1148 if (rdev->flags & RADEON_IS_AGP) { 1149 rv770_agp_enable(rdev); 1150 } else { 1151 r = rv770_pcie_gart_enable(rdev); 1152 if (r) 1153 return r; 1154 } 1155 r = rv770_vram_scratch_init(rdev); 1156 if (r) 1157 return r; 1158 rv770_gpu_init(rdev); 1159 r = r600_blit_init(rdev); 1160 if (r) { 1161 r600_blit_fini(rdev); 1162 rdev->asic->copy = NULL; 1163 dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); 1164 } 1165 1166 /* allocate wb buffer */ 1167 r = radeon_wb_init(rdev); 1168 if (r) 1169 return r; 1170 1171 /* Enable IRQ */ 1172 r = r600_irq_init(rdev); 1173 if (r) { 1174 DRM_ERROR("radeon: IH init failed (%d).\n", r); 1175 radeon_irq_kms_fini(rdev); 1176 return r; 1177 } 1178 r600_irq_set(rdev); 1179 1180 r = radeon_ring_init(rdev, rdev->cp.ring_size); 1181 if (r) 1182 return r; 1183 r = rv770_cp_load_microcode(rdev); 1184 if (r) 1185 return r; 1186 r = r600_cp_resume(rdev); 1187 if (r) 1188 return r; 1189 1190 return 0; 1191 } 1192 1193 int rv770_resume(struct radeon_device *rdev) 1194 { 1195 int r; 1196 1197 /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw, 1198 * posting will perform necessary task to bring back GPU into good 1199 * shape. 1200 */ 1201 /* post card */ 1202 atom_asic_init(rdev->mode_info.atom_context); 1203 1204 r = rv770_startup(rdev); 1205 if (r) { 1206 DRM_ERROR("r600 startup failed on resume\n"); 1207 return r; 1208 } 1209 1210 r = r600_ib_test(rdev); 1211 if (r) { 1212 DRM_ERROR("radeon: failed testing IB (%d).\n", r); 1213 return r; 1214 } 1215 1216 r = r600_audio_init(rdev); 1217 if (r) { 1218 dev_err(rdev->dev, "radeon: audio init failed\n"); 1219 return r; 1220 } 1221 1222 return r; 1223 1224 } 1225 1226 int rv770_suspend(struct radeon_device *rdev) 1227 { 1228 int r; 1229 1230 r600_audio_fini(rdev); 1231 /* FIXME: we should wait for ring to be empty */ 1232 r700_cp_stop(rdev); 1233 rdev->cp.ready = false; 1234 r600_irq_suspend(rdev); 1235 radeon_wb_disable(rdev); 1236 rv770_pcie_gart_disable(rdev); 1237 /* unpin shaders bo */ 1238 if (rdev->r600_blit.shader_obj) { 1239 r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); 1240 if (likely(r == 0)) { 1241 radeon_bo_unpin(rdev->r600_blit.shader_obj); 1242 radeon_bo_unreserve(rdev->r600_blit.shader_obj); 1243 } 1244 } 1245 return 0; 1246 } 1247 1248 /* Plan is to move initialization in that function and use 1249 * helper function so that radeon_device_init pretty much 1250 * do nothing more than calling asic specific function. This 1251 * should also allow to remove a bunch of callback function 1252 * like vram_info. 1253 */ 1254 int rv770_init(struct radeon_device *rdev) 1255 { 1256 int r; 1257 1258 r = radeon_dummy_page_init(rdev); 1259 if (r) 1260 return r; 1261 /* This don't do much */ 1262 r = radeon_gem_init(rdev); 1263 if (r) 1264 return r; 1265 /* Read BIOS */ 1266 if (!radeon_get_bios(rdev)) { 1267 if (ASIC_IS_AVIVO(rdev)) 1268 return -EINVAL; 1269 } 1270 /* Must be an ATOMBIOS */ 1271 if (!rdev->is_atom_bios) { 1272 dev_err(rdev->dev, "Expecting atombios for R600 GPU\n"); 1273 return -EINVAL; 1274 } 1275 r = radeon_atombios_init(rdev); 1276 if (r) 1277 return r; 1278 /* Post card if necessary */ 1279 if (!radeon_card_posted(rdev)) { 1280 if (!rdev->bios) { 1281 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n"); 1282 return -EINVAL; 1283 } 1284 DRM_INFO("GPU not posted. posting now...\n"); 1285 atom_asic_init(rdev->mode_info.atom_context); 1286 } 1287 /* Initialize scratch registers */ 1288 r600_scratch_init(rdev); 1289 /* Initialize surface registers */ 1290 radeon_surface_init(rdev); 1291 /* Initialize clocks */ 1292 radeon_get_clock_info(rdev->ddev); 1293 /* Fence driver */ 1294 r = radeon_fence_driver_init(rdev); 1295 if (r) 1296 return r; 1297 /* initialize AGP */ 1298 if (rdev->flags & RADEON_IS_AGP) { 1299 r = radeon_agp_init(rdev); 1300 if (r) 1301 radeon_agp_disable(rdev); 1302 } 1303 r = rv770_mc_init(rdev); 1304 if (r) 1305 return r; 1306 /* Memory manager */ 1307 r = radeon_bo_init(rdev); 1308 if (r) 1309 return r; 1310 1311 r = radeon_irq_kms_init(rdev); 1312 if (r) 1313 return r; 1314 1315 rdev->cp.ring_obj = NULL; 1316 r600_ring_init(rdev, 1024 * 1024); 1317 1318 rdev->ih.ring_obj = NULL; 1319 r600_ih_ring_init(rdev, 64 * 1024); 1320 1321 r = r600_pcie_gart_init(rdev); 1322 if (r) 1323 return r; 1324 1325 rdev->accel_working = true; 1326 r = rv770_startup(rdev); 1327 if (r) { 1328 dev_err(rdev->dev, "disabling GPU acceleration\n"); 1329 r700_cp_fini(rdev); 1330 r600_irq_fini(rdev); 1331 radeon_wb_fini(rdev); 1332 radeon_irq_kms_fini(rdev); 1333 rv770_pcie_gart_fini(rdev); 1334 rdev->accel_working = false; 1335 } 1336 if (rdev->accel_working) { 1337 r = radeon_ib_pool_init(rdev); 1338 if (r) { 1339 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 1340 rdev->accel_working = false; 1341 } else { 1342 r = r600_ib_test(rdev); 1343 if (r) { 1344 dev_err(rdev->dev, "IB test failed (%d).\n", r); 1345 rdev->accel_working = false; 1346 } 1347 } 1348 } 1349 1350 r = r600_audio_init(rdev); 1351 if (r) { 1352 dev_err(rdev->dev, "radeon: audio init failed\n"); 1353 return r; 1354 } 1355 1356 return 0; 1357 } 1358 1359 void rv770_fini(struct radeon_device *rdev) 1360 { 1361 r600_blit_fini(rdev); 1362 r700_cp_fini(rdev); 1363 r600_irq_fini(rdev); 1364 radeon_wb_fini(rdev); 1365 radeon_irq_kms_fini(rdev); 1366 rv770_pcie_gart_fini(rdev); 1367 rv770_vram_scratch_fini(rdev); 1368 radeon_gem_fini(rdev); 1369 radeon_fence_driver_fini(rdev); 1370 radeon_agp_fini(rdev); 1371 radeon_bo_fini(rdev); 1372 radeon_atombios_fini(rdev); 1373 kfree(rdev->bios); 1374 rdev->bios = NULL; 1375 radeon_dummy_page_fini(rdev); 1376 } 1377 1378 static void rv770_pcie_gen2_enable(struct radeon_device *rdev) 1379 { 1380 u32 link_width_cntl, lanes, speed_cntl, tmp; 1381 u16 link_cntl2; 1382 1383 if (radeon_pcie_gen2 == 0) 1384 return; 1385 1386 if (rdev->flags & RADEON_IS_IGP) 1387 return; 1388 1389 if (!(rdev->flags & RADEON_IS_PCIE)) 1390 return; 1391 1392 /* x2 cards have a special sequence */ 1393 if (ASIC_IS_X2(rdev)) 1394 return; 1395 1396 /* advertise upconfig capability */ 1397 link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL); 1398 link_width_cntl &= ~LC_UPCONFIGURE_DIS; 1399 WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); 1400 link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL); 1401 if (link_width_cntl & LC_RENEGOTIATION_SUPPORT) { 1402 lanes = (link_width_cntl & LC_LINK_WIDTH_RD_MASK) >> LC_LINK_WIDTH_RD_SHIFT; 1403 link_width_cntl &= ~(LC_LINK_WIDTH_MASK | 1404 LC_RECONFIG_ARC_MISSING_ESCAPE); 1405 link_width_cntl |= lanes | LC_RECONFIG_NOW | 1406 LC_RENEGOTIATE_EN | LC_UPCONFIGURE_SUPPORT; 1407 WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); 1408 } else { 1409 link_width_cntl |= LC_UPCONFIGURE_DIS; 1410 WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); 1411 } 1412 1413 speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL); 1414 if ((speed_cntl & LC_OTHER_SIDE_EVER_SENT_GEN2) && 1415 (speed_cntl & LC_OTHER_SIDE_SUPPORTS_GEN2)) { 1416 1417 tmp = RREG32(0x541c); 1418 WREG32(0x541c, tmp | 0x8); 1419 WREG32(MM_CFGREGS_CNTL, MM_WR_TO_CFG_EN); 1420 link_cntl2 = RREG16(0x4088); 1421 link_cntl2 &= ~TARGET_LINK_SPEED_MASK; 1422 link_cntl2 |= 0x2; 1423 WREG16(0x4088, link_cntl2); 1424 WREG32(MM_CFGREGS_CNTL, 0); 1425 1426 speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL); 1427 speed_cntl &= ~LC_TARGET_LINK_SPEED_OVERRIDE_EN; 1428 WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl); 1429 1430 speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL); 1431 speed_cntl |= LC_CLR_FAILED_SPD_CHANGE_CNT; 1432 WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl); 1433 1434 speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL); 1435 speed_cntl &= ~LC_CLR_FAILED_SPD_CHANGE_CNT; 1436 WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl); 1437 1438 speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL); 1439 speed_cntl |= LC_GEN2_EN_STRAP; 1440 WREG32_PCIE_P(PCIE_LC_SPEED_CNTL, speed_cntl); 1441 1442 } else { 1443 link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL); 1444 /* XXX: only disable it if gen1 bridge vendor == 0x111d or 0x1106 */ 1445 if (1) 1446 link_width_cntl |= LC_UPCONFIGURE_DIS; 1447 else 1448 link_width_cntl &= ~LC_UPCONFIGURE_DIS; 1449 WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); 1450 } 1451 } 1452